18f0e9130SKonstantin Belousov /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 38a36da99SPedro F. Giffuni * 48f0e9130SKonstantin Belousov * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 5d8a16b6aSKonstantin Belousov * Copyright (c) 2023 The FreeBSD Foundation 6d8a16b6aSKonstantin Belousov * 7d8a16b6aSKonstantin Belousov * Portions of this software were developed by 8d8a16b6aSKonstantin Belousov * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from 9d8a16b6aSKonstantin Belousov * the FreeBSD Foundation. 108f0e9130SKonstantin Belousov * 118f0e9130SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 128f0e9130SKonstantin Belousov * modification, are permitted provided that the following conditions 138f0e9130SKonstantin Belousov * are met: 148f0e9130SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright 158f0e9130SKonstantin Belousov * notice unmodified, this list of conditions, and the following 168f0e9130SKonstantin Belousov * disclaimer. 178f0e9130SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright 188f0e9130SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 198f0e9130SKonstantin Belousov * documentation and/or other materials provided with the distribution. 208f0e9130SKonstantin Belousov * 218f0e9130SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 228f0e9130SKonstantin Belousov * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 238f0e9130SKonstantin Belousov * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 248f0e9130SKonstantin Belousov * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 258f0e9130SKonstantin Belousov * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 268f0e9130SKonstantin Belousov * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 278f0e9130SKonstantin Belousov * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 288f0e9130SKonstantin Belousov * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 298f0e9130SKonstantin Belousov * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 308f0e9130SKonstantin Belousov * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 318f0e9130SKonstantin Belousov */ 328f0e9130SKonstantin Belousov 338f0e9130SKonstantin Belousov #include <sys/param.h> 34c3d8a931SKonstantin Belousov #include <sys/kassert.h> 358f0e9130SKonstantin Belousov #include <sys/kernel.h> 368f0e9130SKonstantin Belousov #include <sys/lock.h> 378f0e9130SKonstantin Belousov #include <sys/mutex.h> 388f0e9130SKonstantin Belousov #include <sys/proc.h> 398f0e9130SKonstantin Belousov #include <sys/rangelock.h> 40c3d8a931SKonstantin Belousov #include <sys/sleepqueue.h> 41c3d8a931SKonstantin Belousov #include <sys/smr.h> 429ef425e5SKonstantin Belousov #include <sys/sysctl.h> 438f0e9130SKonstantin Belousov 448f0e9130SKonstantin Belousov #include <vm/uma.h> 458f0e9130SKonstantin Belousov 46c3d8a931SKonstantin Belousov /* 479ef425e5SKonstantin Belousov * Immediately after initialization (subject to 'rangelock_cheat' 489ef425e5SKonstantin Belousov * below), and until a request comes that conflicts with granted ones 499ef425e5SKonstantin Belousov * based on type, rangelocks serve requests in the "cheating" mode. 509ef425e5SKonstantin Belousov * In this mode, a rangelock behaves like a sxlock, as if each request 519ef425e5SKonstantin Belousov * covered the whole range of the protected object. On receiving a 529ef425e5SKonstantin Belousov * conflicting request (any request while a write request is 539ef425e5SKonstantin Belousov * effective, or any write request while some read ones are 549ef425e5SKonstantin Belousov * effective), all requests granted in "cheating" mode are drained, 559ef425e5SKonstantin Belousov * and the rangelock then switches to effectively keeping track of the 569ef425e5SKonstantin Belousov * precise range of each new request. 579ef425e5SKonstantin Belousov * 589ef425e5SKonstantin Belousov * Normal sx implementation is not used to not bloat structures (most 599ef425e5SKonstantin Belousov * important, vnodes) which embeds rangelocks. 609ef425e5SKonstantin Belousov * 619ef425e5SKonstantin Belousov * The cheating greatly helps very common pattern where file is first 629ef425e5SKonstantin Belousov * written single-threaded, and then read by many processes. 639ef425e5SKonstantin Belousov * 649ef425e5SKonstantin Belousov * Lock is in cheat mode when RL_CHEAT_CHEATING bit is set in the 659ef425e5SKonstantin Belousov * lock->head. Special cookies are returned in this mode, and 669ef425e5SKonstantin Belousov * trylocks are same as normal locks but do not drain. 679ef425e5SKonstantin Belousov */ 689ef425e5SKonstantin Belousov 69*53789621SKonstantin Belousov static int rangelock_cheat = 1; 709ef425e5SKonstantin Belousov SYSCTL_INT(_debug, OID_AUTO, rangelock_cheat, CTLFLAG_RWTUN, 719ef425e5SKonstantin Belousov &rangelock_cheat, 0, 729ef425e5SKonstantin Belousov ""); 739ef425e5SKonstantin Belousov 749ef425e5SKonstantin Belousov #define RL_CHEAT_MASK 0x7 759ef425e5SKonstantin Belousov #define RL_CHEAT_CHEATING 0x1 769ef425e5SKonstantin Belousov /* #define RL_CHEAT_RLOCKED 0x0 */ 779ef425e5SKonstantin Belousov #define RL_CHEAT_WLOCKED 0x2 789ef425e5SKonstantin Belousov #define RL_CHEAT_DRAINING 0x4 799ef425e5SKonstantin Belousov 809ef425e5SKonstantin Belousov #define RL_CHEAT_READER 0x8 819ef425e5SKonstantin Belousov 829ef425e5SKonstantin Belousov #define RL_RET_CHEAT_RLOCKED 0x1100 839ef425e5SKonstantin Belousov #define RL_RET_CHEAT_WLOCKED 0x2200 849ef425e5SKonstantin Belousov 8575447afcSKonstantin Belousov static void 8675447afcSKonstantin Belousov rangelock_cheat_drain(struct rangelock *lock) 8775447afcSKonstantin Belousov { 8875447afcSKonstantin Belousov uintptr_t v; 8975447afcSKonstantin Belousov 9075447afcSKonstantin Belousov DROP_GIANT(); 9175447afcSKonstantin Belousov for (;;) { 9275447afcSKonstantin Belousov v = (uintptr_t)atomic_load_ptr(&lock->head); 9375447afcSKonstantin Belousov if ((v & RL_CHEAT_DRAINING) == 0) 9475447afcSKonstantin Belousov break; 9575447afcSKonstantin Belousov sleepq_add(&lock->head, NULL, "ranged1", 0, 0); 9675447afcSKonstantin Belousov sleepq_wait(&lock->head, PRI_USER); 9775447afcSKonstantin Belousov sleepq_lock(&lock->head); 9875447afcSKonstantin Belousov } 9975447afcSKonstantin Belousov sleepq_release(&lock->head); 10075447afcSKonstantin Belousov PICKUP_GIANT(); 10175447afcSKonstantin Belousov } 10275447afcSKonstantin Belousov 1039ef425e5SKonstantin Belousov static bool 1049ef425e5SKonstantin Belousov rangelock_cheat_lock(struct rangelock *lock, int locktype, bool trylock, 1059ef425e5SKonstantin Belousov void **cookie) 1069ef425e5SKonstantin Belousov { 1079ef425e5SKonstantin Belousov uintptr_t v, x; 1089ef425e5SKonstantin Belousov 1099ef425e5SKonstantin Belousov v = (uintptr_t)atomic_load_ptr(&lock->head); 1109ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1119ef425e5SKonstantin Belousov return (false); 1129ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) { 1139ef425e5SKonstantin Belousov drain: 1149ef425e5SKonstantin Belousov if (trylock) { 1159ef425e5SKonstantin Belousov *cookie = NULL; 1169ef425e5SKonstantin Belousov return (true); 1179ef425e5SKonstantin Belousov } 1189ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1199ef425e5SKonstantin Belousov drain1: 12075447afcSKonstantin Belousov rangelock_cheat_drain(lock); 1219ef425e5SKonstantin Belousov return (false); 1229ef425e5SKonstantin Belousov } 1239ef425e5SKonstantin Belousov 1249ef425e5SKonstantin Belousov switch (locktype) { 1259ef425e5SKonstantin Belousov case RL_LOCK_READ: 1269ef425e5SKonstantin Belousov for (;;) { 1279ef425e5SKonstantin Belousov if ((v & RL_CHEAT_WLOCKED) != 0) { 1289ef425e5SKonstantin Belousov if (trylock) { 1299ef425e5SKonstantin Belousov *cookie = NULL; 1309ef425e5SKonstantin Belousov return (true); 1319ef425e5SKonstantin Belousov } 1329ef425e5SKonstantin Belousov x = v | RL_CHEAT_DRAINING; 1339ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1349ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v, 1359ef425e5SKonstantin Belousov x) != 0) 1369ef425e5SKonstantin Belousov goto drain1; 1379ef425e5SKonstantin Belousov sleepq_release(&lock->head); 1389ef425e5SKonstantin Belousov /* Possibly forgive passed conflict */ 13957cc80e6SKonstantin Belousov } else { 1409ef425e5SKonstantin Belousov x = (v & ~RL_CHEAT_MASK) + RL_CHEAT_READER; 1419ef425e5SKonstantin Belousov x |= RL_CHEAT_CHEATING; 14257cc80e6SKonstantin Belousov if (atomic_fcmpset_acq_ptr(&lock->head, &v, 14357cc80e6SKonstantin Belousov x) != 0) 1449ef425e5SKonstantin Belousov break; 14557cc80e6SKonstantin Belousov } 1469ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1479ef425e5SKonstantin Belousov return (false); 1489ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) 1499ef425e5SKonstantin Belousov goto drain; 1509ef425e5SKonstantin Belousov } 1519ef425e5SKonstantin Belousov *(uintptr_t *)cookie = RL_RET_CHEAT_RLOCKED; 1529ef425e5SKonstantin Belousov break; 1539ef425e5SKonstantin Belousov case RL_LOCK_WRITE: 1549ef425e5SKonstantin Belousov for (;;) { 1559ef425e5SKonstantin Belousov if ((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER || 1569ef425e5SKonstantin Belousov (v & RL_CHEAT_WLOCKED) != 0) { 1579ef425e5SKonstantin Belousov if (trylock) { 1589ef425e5SKonstantin Belousov *cookie = NULL; 1599ef425e5SKonstantin Belousov return (true); 1609ef425e5SKonstantin Belousov } 1619ef425e5SKonstantin Belousov x = v | RL_CHEAT_DRAINING; 1629ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1639ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v, 1649ef425e5SKonstantin Belousov x) != 0) 1659ef425e5SKonstantin Belousov goto drain1; 1669ef425e5SKonstantin Belousov sleepq_release(&lock->head); 1679ef425e5SKonstantin Belousov /* Possibly forgive passed conflict */ 16857cc80e6SKonstantin Belousov } else { 1699ef425e5SKonstantin Belousov x = RL_CHEAT_WLOCKED | RL_CHEAT_CHEATING; 17057cc80e6SKonstantin Belousov if (atomic_fcmpset_acq_ptr(&lock->head, &v, 17157cc80e6SKonstantin Belousov x) != 0) 1729ef425e5SKonstantin Belousov break; 17357cc80e6SKonstantin Belousov } 1749ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1759ef425e5SKonstantin Belousov return (false); 1769ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) 1779ef425e5SKonstantin Belousov goto drain; 1789ef425e5SKonstantin Belousov } 1799ef425e5SKonstantin Belousov *(uintptr_t *)cookie = RL_RET_CHEAT_WLOCKED; 1809ef425e5SKonstantin Belousov break; 1819ef425e5SKonstantin Belousov default: 1829ef425e5SKonstantin Belousov __assert_unreachable(); 1839ef425e5SKonstantin Belousov break; 1849ef425e5SKonstantin Belousov } 1859ef425e5SKonstantin Belousov return (true); 1869ef425e5SKonstantin Belousov } 1879ef425e5SKonstantin Belousov 1889ef425e5SKonstantin Belousov static bool 1899ef425e5SKonstantin Belousov rangelock_cheat_unlock(struct rangelock *lock, void *cookie) 1909ef425e5SKonstantin Belousov { 1919ef425e5SKonstantin Belousov uintptr_t v, x; 1929ef425e5SKonstantin Belousov 1939ef425e5SKonstantin Belousov v = (uintptr_t)atomic_load_ptr(&lock->head); 1949ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1959ef425e5SKonstantin Belousov return (false); 1969ef425e5SKonstantin Belousov 1979ef425e5SKonstantin Belousov MPASS((uintptr_t)cookie == RL_RET_CHEAT_WLOCKED || 1989ef425e5SKonstantin Belousov (uintptr_t)cookie == RL_RET_CHEAT_RLOCKED); 1999ef425e5SKonstantin Belousov 2009ef425e5SKonstantin Belousov switch ((uintptr_t)cookie) { 2019ef425e5SKonstantin Belousov case RL_RET_CHEAT_RLOCKED: 2029ef425e5SKonstantin Belousov for (;;) { 2039ef425e5SKonstantin Belousov MPASS((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER); 2049ef425e5SKonstantin Belousov MPASS((v & RL_CHEAT_WLOCKED) == 0); 2059ef425e5SKonstantin Belousov x = (v & ~RL_CHEAT_MASK) - RL_CHEAT_READER; 2069ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) { 2079ef425e5SKonstantin Belousov if (x != 0) { 2089ef425e5SKonstantin Belousov x |= RL_CHEAT_DRAINING | 2099ef425e5SKonstantin Belousov RL_CHEAT_CHEATING; 2109ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, 2119ef425e5SKonstantin Belousov &v, x) != 0) 2129ef425e5SKonstantin Belousov break; 2139ef425e5SKonstantin Belousov } else { 2149ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 2159ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, 2169ef425e5SKonstantin Belousov &v, x) != 0) { 2179ef425e5SKonstantin Belousov sleepq_broadcast( 2189ef425e5SKonstantin Belousov &lock->head, 2199ef425e5SKonstantin Belousov SLEEPQ_SLEEP, 0, 0); 2209ef425e5SKonstantin Belousov sleepq_release(&lock->head); 2219ef425e5SKonstantin Belousov break; 2229ef425e5SKonstantin Belousov } 2239ef425e5SKonstantin Belousov sleepq_release(&lock->head); 2249ef425e5SKonstantin Belousov } 2259ef425e5SKonstantin Belousov } else { 2269ef425e5SKonstantin Belousov x |= RL_CHEAT_CHEATING; 2279ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v, 2289ef425e5SKonstantin Belousov x) != 0) 2299ef425e5SKonstantin Belousov break; 2309ef425e5SKonstantin Belousov } 2319ef425e5SKonstantin Belousov } 2329ef425e5SKonstantin Belousov break; 2339ef425e5SKonstantin Belousov case RL_RET_CHEAT_WLOCKED: 2349ef425e5SKonstantin Belousov for (;;) { 2359ef425e5SKonstantin Belousov MPASS((v & RL_CHEAT_WLOCKED) != 0); 2369ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) { 2379ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 2389ef425e5SKonstantin Belousov atomic_store_ptr(&lock->head, 0); 2399ef425e5SKonstantin Belousov sleepq_broadcast(&lock->head, 2409ef425e5SKonstantin Belousov SLEEPQ_SLEEP, 0, 0); 2419ef425e5SKonstantin Belousov sleepq_release(&lock->head); 2429ef425e5SKonstantin Belousov break; 2439ef425e5SKonstantin Belousov } else { 2449ef425e5SKonstantin Belousov if (atomic_fcmpset_ptr(&lock->head, &v, 2459ef425e5SKonstantin Belousov RL_CHEAT_CHEATING) != 0) 2469ef425e5SKonstantin Belousov break; 2479ef425e5SKonstantin Belousov } 2489ef425e5SKonstantin Belousov } 2499ef425e5SKonstantin Belousov break; 2509ef425e5SKonstantin Belousov default: 2519ef425e5SKonstantin Belousov __assert_unreachable(); 2529ef425e5SKonstantin Belousov break; 2539ef425e5SKonstantin Belousov } 2549ef425e5SKonstantin Belousov return (true); 2559ef425e5SKonstantin Belousov } 2569ef425e5SKonstantin Belousov 2579ef425e5SKonstantin Belousov static bool 2589ef425e5SKonstantin Belousov rangelock_cheat_destroy(struct rangelock *lock) 2599ef425e5SKonstantin Belousov { 2609ef425e5SKonstantin Belousov uintptr_t v; 2619ef425e5SKonstantin Belousov 2629ef425e5SKonstantin Belousov v = (uintptr_t)atomic_load_ptr(&lock->head); 2639ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 2649ef425e5SKonstantin Belousov return (false); 2659ef425e5SKonstantin Belousov MPASS(v == RL_CHEAT_CHEATING); 2669ef425e5SKonstantin Belousov return (true); 2679ef425e5SKonstantin Belousov } 2689ef425e5SKonstantin Belousov 2699ef425e5SKonstantin Belousov /* 270c3d8a931SKonstantin Belousov * Implementation of range locks based on the paper 271c3d8a931SKonstantin Belousov * https://doi.org/10.1145/3342195.3387533 272c3d8a931SKonstantin Belousov * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020 273c3d8a931SKonstantin Belousov * Scalable Range Locks for Scalable Address Spaces and Beyond 274c3d8a931SKonstantin Belousov * by Alex Kogan, Dave Dice, and Shady Issa 275c3d8a931SKonstantin Belousov */ 276c3d8a931SKonstantin Belousov 277c3d8a931SKonstantin Belousov static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e); 278c3d8a931SKonstantin Belousov 279c3d8a931SKonstantin Belousov /* 280c3d8a931SKonstantin Belousov * rl_q_next links all granted ranges in the lock. We cannot free an 281c3d8a931SKonstantin Belousov * rl_q_entry while in the smr section, and cannot reuse rl_q_next 282c3d8a931SKonstantin Belousov * linkage since other threads might follow it even after CAS removed 283c3d8a931SKonstantin Belousov * the range. Use rl_q_free for local list of ranges to remove after 284c3d8a931SKonstantin Belousov * the smr section is dropped. 285c3d8a931SKonstantin Belousov */ 2868f0e9130SKonstantin Belousov struct rl_q_entry { 287c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_next; 288c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_free; 2898f0e9130SKonstantin Belousov off_t rl_q_start, rl_q_end; 2908f0e9130SKonstantin Belousov int rl_q_flags; 291c3d8a931SKonstantin Belousov #ifdef INVARIANTS 292c3d8a931SKonstantin Belousov struct thread *rl_q_owner; 293c3d8a931SKonstantin Belousov #endif 2948f0e9130SKonstantin Belousov }; 2958f0e9130SKonstantin Belousov 2968f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone; 297c3d8a931SKonstantin Belousov static smr_t rl_smr; 2988f0e9130SKonstantin Belousov 299a3f10d08SKonstantin Belousov static void rangelock_free_free(struct rl_q_entry *free); 3008a5b2db3SKonstantin Belousov static void rangelock_noncheating_destroy(struct rangelock *lock); 301a3f10d08SKonstantin Belousov 3028f0e9130SKonstantin Belousov static void 3038f0e9130SKonstantin Belousov rangelock_sys_init(void) 3048f0e9130SKonstantin Belousov { 3058f0e9130SKonstantin Belousov rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 306c3d8a931SKonstantin Belousov NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry), 307c3d8a931SKonstantin Belousov UMA_ZONE_SMR); 308c3d8a931SKonstantin Belousov rl_smr = uma_zone_get_smr(rl_entry_zone); 3098f0e9130SKonstantin Belousov } 310c3d8a931SKonstantin Belousov SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 3118f0e9130SKonstantin Belousov 3128f0e9130SKonstantin Belousov static struct rl_q_entry * 313c3d8a931SKonstantin Belousov rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags) 3148f0e9130SKonstantin Belousov { 315c3d8a931SKonstantin Belousov struct rl_q_entry *e; 316ff1ae3b3SKonstantin Belousov struct thread *td; 3178f0e9130SKonstantin Belousov 318ff1ae3b3SKonstantin Belousov td = curthread; 319ff1ae3b3SKonstantin Belousov if (td->td_rlqe != NULL) { 320ff1ae3b3SKonstantin Belousov e = td->td_rlqe; 321ff1ae3b3SKonstantin Belousov td->td_rlqe = NULL; 322ff1ae3b3SKonstantin Belousov } else { 323c3d8a931SKonstantin Belousov e = uma_zalloc_smr(rl_entry_zone, M_WAITOK); 324ff1ae3b3SKonstantin Belousov } 325c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 326c3d8a931SKonstantin Belousov e->rl_q_free = NULL; 327c3d8a931SKonstantin Belousov e->rl_q_start = start; 328c3d8a931SKonstantin Belousov e->rl_q_end = end; 329c3d8a931SKonstantin Belousov e->rl_q_flags = flags; 330c3d8a931SKonstantin Belousov #ifdef INVARIANTS 331c3d8a931SKonstantin Belousov e->rl_q_owner = curthread; 332c3d8a931SKonstantin Belousov #endif 333c3d8a931SKonstantin Belousov return (e); 3348f0e9130SKonstantin Belousov } 3358f0e9130SKonstantin Belousov 3368f0e9130SKonstantin Belousov void 337ff1ae3b3SKonstantin Belousov rangelock_entry_free(struct rl_q_entry *e) 338ff1ae3b3SKonstantin Belousov { 339ff1ae3b3SKonstantin Belousov uma_zfree_smr(rl_entry_zone, e); 340ff1ae3b3SKonstantin Belousov } 341ff1ae3b3SKonstantin Belousov 342ff1ae3b3SKonstantin Belousov void 3438f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock) 3448f0e9130SKonstantin Belousov { 345c3d8a931SKonstantin Belousov lock->sleepers = false; 3469ef425e5SKonstantin Belousov atomic_store_ptr(&lock->head, rangelock_cheat ? RL_CHEAT_CHEATING : 0); 3478f0e9130SKonstantin Belousov } 3488f0e9130SKonstantin Belousov 3498f0e9130SKonstantin Belousov void 3508f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock) 3518f0e9130SKonstantin Belousov { 352c3d8a931SKonstantin Belousov MPASS(!lock->sleepers); 3538a5b2db3SKonstantin Belousov if (!rangelock_cheat_destroy(lock)) 3548a5b2db3SKonstantin Belousov rangelock_noncheating_destroy(lock); 355e228961dSKonstantin Belousov DEBUG_POISON_POINTER(*(void **)&lock->head); 3568f0e9130SKonstantin Belousov } 3578f0e9130SKonstantin Belousov 358c3d8a931SKonstantin Belousov static bool 359c3d8a931SKonstantin Belousov rl_e_is_marked(const struct rl_q_entry *e) 3608f0e9130SKonstantin Belousov { 361c3d8a931SKonstantin Belousov return (((uintptr_t)e & 1) != 0); 3628f0e9130SKonstantin Belousov } 3638f0e9130SKonstantin Belousov 364c3d8a931SKonstantin Belousov static struct rl_q_entry * 3655badbeeaSKonstantin Belousov rl_e_unmark_unchecked(const struct rl_q_entry *e) 3665badbeeaSKonstantin Belousov { 3675badbeeaSKonstantin Belousov return ((struct rl_q_entry *)((uintptr_t)e & ~1)); 3685badbeeaSKonstantin Belousov } 3695badbeeaSKonstantin Belousov 3705badbeeaSKonstantin Belousov static struct rl_q_entry * 371c3d8a931SKonstantin Belousov rl_e_unmark(const struct rl_q_entry *e) 3728f0e9130SKonstantin Belousov { 373c3d8a931SKonstantin Belousov MPASS(rl_e_is_marked(e)); 3745badbeeaSKonstantin Belousov return (rl_e_unmark_unchecked(e)); 3755badbeeaSKonstantin Belousov } 3765badbeeaSKonstantin Belousov 3775badbeeaSKonstantin Belousov static void 3785badbeeaSKonstantin Belousov rl_e_mark(struct rl_q_entry *e) 3795badbeeaSKonstantin Belousov { 3805badbeeaSKonstantin Belousov #if defined(INVARIANTS) && defined(__LP64__) 3815badbeeaSKonstantin Belousov int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0); 3825badbeeaSKonstantin Belousov MPASS(r == 0); 3835badbeeaSKonstantin Belousov #else 3845badbeeaSKonstantin Belousov atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1); 3855badbeeaSKonstantin Belousov #endif 3862bb93f2dSColin Percival } 3872bb93f2dSColin Percival 388c3d8a931SKonstantin Belousov static struct rl_q_entry * 389c3d8a931SKonstantin Belousov rl_q_load(struct rl_q_entry **p) 3908f0e9130SKonstantin Belousov { 391c3d8a931SKonstantin Belousov return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p)); 3928f0e9130SKonstantin Belousov } 3938f0e9130SKonstantin Belousov 3946c32d89eSKonstantin Belousov static bool 3956c32d89eSKonstantin Belousov rl_e_is_rlock(const struct rl_q_entry *e) 3966c32d89eSKonstantin Belousov { 3976c32d89eSKonstantin Belousov return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ); 3986c32d89eSKonstantin Belousov } 3996c32d89eSKonstantin Belousov 4005badbeeaSKonstantin Belousov static void 401a3f10d08SKonstantin Belousov rangelock_free_free(struct rl_q_entry *free) 402a3f10d08SKonstantin Belousov { 403a3f10d08SKonstantin Belousov struct rl_q_entry *x, *xp; 404a3f10d08SKonstantin Belousov struct thread *td; 405a3f10d08SKonstantin Belousov 406a3f10d08SKonstantin Belousov td = curthread; 407a3f10d08SKonstantin Belousov for (x = free; x != NULL; x = xp) { 408a3f10d08SKonstantin Belousov MPASS(!rl_e_is_marked(x)); 409a3f10d08SKonstantin Belousov xp = x->rl_q_free; 410a3f10d08SKonstantin Belousov MPASS(!rl_e_is_marked(xp)); 411a3f10d08SKonstantin Belousov if (td->td_rlqe == NULL) { 412a3f10d08SKonstantin Belousov smr_synchronize(rl_smr); 413a3f10d08SKonstantin Belousov td->td_rlqe = x; 414a3f10d08SKonstantin Belousov } else { 415a3f10d08SKonstantin Belousov uma_zfree_smr(rl_entry_zone, x); 416a3f10d08SKonstantin Belousov } 417a3f10d08SKonstantin Belousov } 418a3f10d08SKonstantin Belousov } 419a3f10d08SKonstantin Belousov 420a3f10d08SKonstantin Belousov static void 4215badbeeaSKonstantin Belousov rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e) 4228f0e9130SKonstantin Belousov { 423c3158008SKonstantin Belousov bool sleepers; 424c3158008SKonstantin Belousov 425c3d8a931SKonstantin Belousov MPASS(lock != NULL && e != NULL); 426c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next))); 427c3d8a931SKonstantin Belousov MPASS(e->rl_q_owner == curthread); 4288f0e9130SKonstantin Belousov 4295badbeeaSKonstantin Belousov rl_e_mark(e); 430c3158008SKonstantin Belousov sleepers = lock->sleepers; 431c3d8a931SKonstantin Belousov lock->sleepers = false; 432c3158008SKonstantin Belousov if (sleepers) 433c3d8a931SKonstantin Belousov sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0); 4345badbeeaSKonstantin Belousov } 4355badbeeaSKonstantin Belousov 4365badbeeaSKonstantin Belousov void 4375badbeeaSKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie) 4385badbeeaSKonstantin Belousov { 4399ef425e5SKonstantin Belousov if (rangelock_cheat_unlock(lock, cookie)) 4409ef425e5SKonstantin Belousov return; 4419ef425e5SKonstantin Belousov 4425badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 4435badbeeaSKonstantin Belousov rangelock_unlock_int(lock, cookie); 444c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 4458f0e9130SKonstantin Belousov } 4468f0e9130SKonstantin Belousov 4478f0e9130SKonstantin Belousov /* 4485badbeeaSKonstantin Belousov * result: -1 if e1 before e2, or both locks are readers and e1 4495badbeeaSKonstantin Belousov * starts before or at e2 4505badbeeaSKonstantin Belousov * 1 if e1 after e2, or both locks are readers and e1 4515badbeeaSKonstantin Belousov * starts after e2 4525badbeeaSKonstantin Belousov * 0 if e1 and e2 overlap and at least one lock is writer 4538f0e9130SKonstantin Belousov */ 454c3d8a931SKonstantin Belousov static int 455c3d8a931SKonstantin Belousov rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2) 4568f0e9130SKonstantin Belousov { 4575badbeeaSKonstantin Belousov bool rds; 4585badbeeaSKonstantin Belousov 459c3d8a931SKonstantin Belousov if (e1 == NULL) 460c3d8a931SKonstantin Belousov return (1); 461c3d8a931SKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_end) 462c3d8a931SKonstantin Belousov return (-1); 4635badbeeaSKonstantin Belousov rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2); 4645badbeeaSKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_start && rds) 4655badbeeaSKonstantin Belousov return (-1); 4665badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_end) 4675badbeeaSKonstantin Belousov return (1); 4685badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_start && rds) 4695badbeeaSKonstantin Belousov return (1); 470c3d8a931SKonstantin Belousov return (0); 4718f0e9130SKonstantin Belousov } 4728f0e9130SKonstantin Belousov 473c3d8a931SKonstantin Belousov static void 474c3d8a931SKonstantin Belousov rl_insert_sleep(struct rangelock *lock) 4758f0e9130SKonstantin Belousov { 476c3d8a931SKonstantin Belousov smr_exit(rl_smr); 477c3d8a931SKonstantin Belousov DROP_GIANT(); 478c3d8a931SKonstantin Belousov lock->sleepers = true; 479c3d8a931SKonstantin Belousov sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0); 480c3d8a931SKonstantin Belousov sleepq_wait(&lock->sleepers, PRI_USER); 481c3d8a931SKonstantin Belousov PICKUP_GIANT(); 482c3d8a931SKonstantin Belousov smr_enter(rl_smr); 483c3d8a931SKonstantin Belousov } 4848f0e9130SKonstantin Belousov 485c3d8a931SKonstantin Belousov static bool 486c3d8a931SKonstantin Belousov rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old, 487c3d8a931SKonstantin Belousov struct rl_q_entry *new) 488c3d8a931SKonstantin Belousov { 4899467c1a6SKonstantin Belousov MPASS(!rl_e_is_marked(old)); 490c3d8a931SKonstantin Belousov return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old, 491c3d8a931SKonstantin Belousov (uintptr_t)new) != 0); 492c3d8a931SKonstantin Belousov } 4938f0e9130SKonstantin Belousov 4948a5b2db3SKonstantin Belousov static void 4958a5b2db3SKonstantin Belousov rangelock_noncheating_destroy(struct rangelock *lock) 4968a5b2db3SKonstantin Belousov { 4978a5b2db3SKonstantin Belousov struct rl_q_entry *cur, *free, *next, **prev; 4988a5b2db3SKonstantin Belousov 4998a5b2db3SKonstantin Belousov free = NULL; 5008a5b2db3SKonstantin Belousov again: 5018a5b2db3SKonstantin Belousov smr_enter(rl_smr); 5028a5b2db3SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head; 5038a5b2db3SKonstantin Belousov cur = rl_q_load(prev); 5048a5b2db3SKonstantin Belousov MPASS(!rl_e_is_marked(cur)); 5058a5b2db3SKonstantin Belousov 5068a5b2db3SKonstantin Belousov for (;;) { 5078a5b2db3SKonstantin Belousov if (cur == NULL) 5088a5b2db3SKonstantin Belousov break; 5098a5b2db3SKonstantin Belousov if (rl_e_is_marked(cur)) 5108a5b2db3SKonstantin Belousov goto again; 5118a5b2db3SKonstantin Belousov 5128a5b2db3SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 5138a5b2db3SKonstantin Belousov if (rl_e_is_marked(next)) { 5148a5b2db3SKonstantin Belousov next = rl_e_unmark(next); 5158a5b2db3SKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 5168a5b2db3SKonstantin Belousov #ifdef INVARIANTS 5178a5b2db3SKonstantin Belousov cur->rl_q_owner = NULL; 5188a5b2db3SKonstantin Belousov #endif 5198a5b2db3SKonstantin Belousov cur->rl_q_free = free; 5208a5b2db3SKonstantin Belousov free = cur; 5218a5b2db3SKonstantin Belousov cur = next; 5228a5b2db3SKonstantin Belousov continue; 5238a5b2db3SKonstantin Belousov } 5248a5b2db3SKonstantin Belousov smr_exit(rl_smr); 5258a5b2db3SKonstantin Belousov goto again; 5268a5b2db3SKonstantin Belousov } 5278a5b2db3SKonstantin Belousov 5288a5b2db3SKonstantin Belousov sleepq_lock(&lock->sleepers); 5298a5b2db3SKonstantin Belousov if (!rl_e_is_marked(cur)) { 5308a5b2db3SKonstantin Belousov rl_insert_sleep(lock); 5318a5b2db3SKonstantin Belousov goto again; 5328a5b2db3SKonstantin Belousov } 5338a5b2db3SKonstantin Belousov } 5348a5b2db3SKonstantin Belousov smr_exit(rl_smr); 5358a5b2db3SKonstantin Belousov rangelock_free_free(free); 5368a5b2db3SKonstantin Belousov } 5378a5b2db3SKonstantin Belousov 5385badbeeaSKonstantin Belousov enum RL_INSERT_RES { 5395badbeeaSKonstantin Belousov RL_TRYLOCK_FAILED, 5405badbeeaSKonstantin Belousov RL_LOCK_SUCCESS, 5415badbeeaSKonstantin Belousov RL_LOCK_RETRY, 5425badbeeaSKonstantin Belousov }; 5435badbeeaSKonstantin Belousov 5445badbeeaSKonstantin Belousov static enum RL_INSERT_RES 5455badbeeaSKonstantin Belousov rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 5465badbeeaSKonstantin Belousov struct rl_q_entry **free) 5475badbeeaSKonstantin Belousov { 5485badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 5495badbeeaSKonstantin Belousov 550a725d618SKonstantin Belousov again: 5515badbeeaSKonstantin Belousov prev = &e->rl_q_next; 5525badbeeaSKonstantin Belousov cur = rl_q_load(prev); 5535badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */ 5545badbeeaSKonstantin Belousov for (;;) { 555e6651546SMark Johnston if (cur == NULL || cur->rl_q_start >= e->rl_q_end) 5565badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 5575badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 5585badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 5595badbeeaSKonstantin Belousov next = rl_e_unmark(next); 5605badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 5615badbeeaSKonstantin Belousov cur->rl_q_free = *free; 5625badbeeaSKonstantin Belousov *free = cur; 5635badbeeaSKonstantin Belousov cur = next; 5645badbeeaSKonstantin Belousov continue; 5655badbeeaSKonstantin Belousov } 566a725d618SKonstantin Belousov goto again; 567a725d618SKonstantin Belousov } 5685badbeeaSKonstantin Belousov if (rl_e_is_rlock(cur)) { 5695badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 5705badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 5715badbeeaSKonstantin Belousov continue; 5725badbeeaSKonstantin Belousov } 5735badbeeaSKonstantin Belousov if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 5745badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 5755badbeeaSKonstantin Belousov if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 5765badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 5775badbeeaSKonstantin Belousov continue; 5785badbeeaSKonstantin Belousov } 5795badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 5805badbeeaSKonstantin Belousov if (trylock) { 5815badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 5825badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 5835badbeeaSKonstantin Belousov } 5845badbeeaSKonstantin Belousov rl_insert_sleep(lock); 5855badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 5865badbeeaSKonstantin Belousov } 5875badbeeaSKonstantin Belousov } 5885badbeeaSKonstantin Belousov } 5895badbeeaSKonstantin Belousov 5905badbeeaSKonstantin Belousov static enum RL_INSERT_RES 5915badbeeaSKonstantin Belousov rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, 5925badbeeaSKonstantin Belousov bool trylock, struct rl_q_entry **free) 5935badbeeaSKonstantin Belousov { 5945badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 5955badbeeaSKonstantin Belousov 596a725d618SKonstantin Belousov again: 5979ef425e5SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head; 5985badbeeaSKonstantin Belousov cur = rl_q_load(prev); 5995badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* head is not marked */ 6005badbeeaSKonstantin Belousov for (;;) { 6015badbeeaSKonstantin Belousov if (cur == e) 6025badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 6035badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 6045badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 6055badbeeaSKonstantin Belousov next = rl_e_unmark(next); 6065badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 60740bffb7dSKonstantin Belousov cur->rl_q_free = *free; 6085badbeeaSKonstantin Belousov *free = cur; 6095badbeeaSKonstantin Belousov cur = next; 6105badbeeaSKonstantin Belousov continue; 6115badbeeaSKonstantin Belousov } 612a725d618SKonstantin Belousov goto again; 613a725d618SKonstantin Belousov } 6145badbeeaSKonstantin Belousov if (cur->rl_q_end <= e->rl_q_start) { 6155badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 6165badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 6175badbeeaSKonstantin Belousov continue; 6185badbeeaSKonstantin Belousov } 6195badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 620c4d8b246SKonstantin Belousov /* Reload after sleepq is locked */ 621c4d8b246SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 622c4d8b246SKonstantin Belousov if (rl_e_is_marked(next)) { 623c4d8b246SKonstantin Belousov sleepq_release(&lock->sleepers); 624c4d8b246SKonstantin Belousov goto again; 625c4d8b246SKonstantin Belousov } 6265badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 6275badbeeaSKonstantin Belousov if (trylock) { 6285badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 6295badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 6305badbeeaSKonstantin Belousov } 6315badbeeaSKonstantin Belousov rl_insert_sleep(lock); 6325badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 6335badbeeaSKonstantin Belousov } 6345badbeeaSKonstantin Belousov } 6355badbeeaSKonstantin Belousov 6365badbeeaSKonstantin Belousov static enum RL_INSERT_RES 637c3d8a931SKonstantin Belousov rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 638c3d8a931SKonstantin Belousov struct rl_q_entry **free) 639c3d8a931SKonstantin Belousov { 640c3d8a931SKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 641c3d8a931SKonstantin Belousov int r; 6428f0e9130SKonstantin Belousov 643c3d8a931SKonstantin Belousov again: 6449ef425e5SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head; 6455badbeeaSKonstantin Belousov cur = rl_q_load(prev); 6465badbeeaSKonstantin Belousov if (cur == NULL && rl_q_cas(prev, NULL, e)) 6475badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 6488f0e9130SKonstantin Belousov 6495badbeeaSKonstantin Belousov for (;;) { 6505badbeeaSKonstantin Belousov if (cur != NULL) { 651c3d8a931SKonstantin Belousov if (rl_e_is_marked(cur)) 652c3d8a931SKonstantin Belousov goto again; 653c3d8a931SKonstantin Belousov 654c3d8a931SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 655c3d8a931SKonstantin Belousov if (rl_e_is_marked(next)) { 656c3d8a931SKonstantin Belousov next = rl_e_unmark(next); 657c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 658c3d8a931SKonstantin Belousov #ifdef INVARIANTS 659c3d8a931SKonstantin Belousov cur->rl_q_owner = NULL; 660c3d8a931SKonstantin Belousov #endif 661c3d8a931SKonstantin Belousov cur->rl_q_free = *free; 662c3d8a931SKonstantin Belousov *free = cur; 663c3d8a931SKonstantin Belousov cur = next; 664c3d8a931SKonstantin Belousov continue; 665c3d8a931SKonstantin Belousov } 666a725d618SKonstantin Belousov goto again; 667a725d618SKonstantin Belousov } 668c3d8a931SKonstantin Belousov } 669c3d8a931SKonstantin Belousov 6709467c1a6SKonstantin Belousov MPASS(!rl_e_is_marked(cur)); 671c3d8a931SKonstantin Belousov r = rl_e_compare(cur, e); 672c3d8a931SKonstantin Belousov if (r == -1) { 673c3d8a931SKonstantin Belousov prev = &cur->rl_q_next; 674c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 675c3d8a931SKonstantin Belousov } else if (r == 0) { 676c3d8a931SKonstantin Belousov sleepq_lock(&lock->sleepers); 677c3d8a931SKonstantin Belousov if (__predict_false(rl_e_is_marked(rl_q_load( 678c3d8a931SKonstantin Belousov &cur->rl_q_next)))) { 679c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 680c3d8a931SKonstantin Belousov continue; 681c3d8a931SKonstantin Belousov } 682e3680954SRick Macklem if (trylock) { 683c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 6845badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 685e3680954SRick Macklem } 686c3d8a931SKonstantin Belousov rl_insert_sleep(lock); 687c3d8a931SKonstantin Belousov /* e is still valid */ 688c3d8a931SKonstantin Belousov goto again; 689c3d8a931SKonstantin Belousov } else /* r == 1 */ { 690c3d8a931SKonstantin Belousov e->rl_q_next = cur; 691c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, e)) { 692c3d8a931SKonstantin Belousov atomic_thread_fence_acq(); 6935badbeeaSKonstantin Belousov return (rl_e_is_rlock(e) ? 6945badbeeaSKonstantin Belousov rl_r_validate(lock, e, trylock, free) : 6955badbeeaSKonstantin Belousov rl_w_validate(lock, e, trylock, free)); 696e3680954SRick Macklem } 697c3d8a931SKonstantin Belousov /* Reset rl_q_next in case we hit fast path. */ 698c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 699c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 700c3d8a931SKonstantin Belousov } 701c3d8a931SKonstantin Belousov } 702c3d8a931SKonstantin Belousov } 703c3d8a931SKonstantin Belousov 704c3d8a931SKonstantin Belousov static struct rl_q_entry * 7055badbeeaSKonstantin Belousov rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start, 7065badbeeaSKonstantin Belousov vm_ooffset_t end, int locktype) 707c3d8a931SKonstantin Belousov { 708a3f10d08SKonstantin Belousov struct rl_q_entry *e, *free; 7099ef425e5SKonstantin Belousov void *cookie; 7105badbeeaSKonstantin Belousov enum RL_INSERT_RES res; 711c3d8a931SKonstantin Belousov 7129ef425e5SKonstantin Belousov if (rangelock_cheat_lock(lock, locktype, trylock, &cookie)) 7139ef425e5SKonstantin Belousov return (cookie); 7145badbeeaSKonstantin Belousov for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) { 715c3d8a931SKonstantin Belousov free = NULL; 7165badbeeaSKonstantin Belousov e = rlqentry_alloc(start, end, locktype); 717c3d8a931SKonstantin Belousov smr_enter(rl_smr); 718c3d8a931SKonstantin Belousov res = rl_insert(lock, e, trylock, &free); 719c3d8a931SKonstantin Belousov smr_exit(rl_smr); 7205badbeeaSKonstantin Belousov if (res == RL_TRYLOCK_FAILED) { 7215badbeeaSKonstantin Belousov MPASS(trylock); 722c3d8a931SKonstantin Belousov e->rl_q_free = free; 723c3d8a931SKonstantin Belousov free = e; 724c3d8a931SKonstantin Belousov e = NULL; 725c3d8a931SKonstantin Belousov } 726a3f10d08SKonstantin Belousov rangelock_free_free(free); 727ff1ae3b3SKonstantin Belousov } 728c3d8a931SKonstantin Belousov return (e); 7298f0e9130SKonstantin Belousov } 7308f0e9130SKonstantin Belousov 7318f0e9130SKonstantin Belousov void * 732c3d8a931SKonstantin Belousov rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 7338f0e9130SKonstantin Belousov { 7345badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ)); 735e3680954SRick Macklem } 736e3680954SRick Macklem 737e3680954SRick Macklem void * 738c3d8a931SKonstantin Belousov rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 739e3680954SRick Macklem { 7405badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ)); 7418f0e9130SKonstantin Belousov } 7428f0e9130SKonstantin Belousov 7438f0e9130SKonstantin Belousov void * 744c3d8a931SKonstantin Belousov rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 7458f0e9130SKonstantin Belousov { 7469ef425e5SKonstantin Belousov return (rangelock_lock_int(lock, false, start, end, RL_LOCK_WRITE)); 747e3680954SRick Macklem } 748e3680954SRick Macklem 749e3680954SRick Macklem void * 750c3d8a931SKonstantin Belousov rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 751e3680954SRick Macklem { 7525badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); 7538f0e9130SKonstantin Belousov } 7543155f2f0SKyle Evans 7550b6b1c28SKonstantin Belousov /* 7560b6b1c28SKonstantin Belousov * If the caller asserts that it can obtain the range locks on the 7570b6b1c28SKonstantin Belousov * same lock simultaneously, switch to the non-cheat mode. Cheat mode 7580b6b1c28SKonstantin Belousov * cannot handle it, hanging in drain or trylock retries. 7590b6b1c28SKonstantin Belousov */ 7600b6b1c28SKonstantin Belousov void 7610b6b1c28SKonstantin Belousov rangelock_may_recurse(struct rangelock *lock) 7620b6b1c28SKonstantin Belousov { 7630b6b1c28SKonstantin Belousov uintptr_t v, x; 7640b6b1c28SKonstantin Belousov 7650b6b1c28SKonstantin Belousov v = atomic_load_ptr(&lock->head); 7660b6b1c28SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 7670b6b1c28SKonstantin Belousov return; 7680b6b1c28SKonstantin Belousov 7690b6b1c28SKonstantin Belousov sleepq_lock(&lock->head); 7700b6b1c28SKonstantin Belousov for (;;) { 7710b6b1c28SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) { 7720b6b1c28SKonstantin Belousov sleepq_release(&lock->head); 7730b6b1c28SKonstantin Belousov return; 7740b6b1c28SKonstantin Belousov } 7750b6b1c28SKonstantin Belousov 7760b6b1c28SKonstantin Belousov /* Cheating and locked, drain. */ 7770b6b1c28SKonstantin Belousov if ((v & RL_CHEAT_WLOCKED) != 0 || 7780b6b1c28SKonstantin Belousov (v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER) { 7790b6b1c28SKonstantin Belousov x = v | RL_CHEAT_DRAINING; 7800b6b1c28SKonstantin Belousov if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) { 7810b6b1c28SKonstantin Belousov rangelock_cheat_drain(lock); 7820b6b1c28SKonstantin Belousov return; 7830b6b1c28SKonstantin Belousov } 7840b6b1c28SKonstantin Belousov continue; 7850b6b1c28SKonstantin Belousov } 7860b6b1c28SKonstantin Belousov 7870b6b1c28SKonstantin Belousov /* Cheating and unlocked, clear RL_CHEAT_CHEATING. */ 7880b6b1c28SKonstantin Belousov x = 0; 7890b6b1c28SKonstantin Belousov if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) { 7900b6b1c28SKonstantin Belousov sleepq_release(&lock->head); 7910b6b1c28SKonstantin Belousov return; 7920b6b1c28SKonstantin Belousov } 7930b6b1c28SKonstantin Belousov } 7940b6b1c28SKonstantin Belousov } 7950b6b1c28SKonstantin Belousov 7963155f2f0SKyle Evans #ifdef INVARIANT_SUPPORT 7973155f2f0SKyle Evans void 7983155f2f0SKyle Evans _rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 7993155f2f0SKyle Evans { 8003155f2f0SKyle Evans } 8013155f2f0SKyle Evans #endif /* INVARIANT_SUPPORT */ 802c3d8a931SKonstantin Belousov 803c3d8a931SKonstantin Belousov #include "opt_ddb.h" 804c3d8a931SKonstantin Belousov #ifdef DDB 805c3d8a931SKonstantin Belousov #include <ddb/ddb.h> 806c3d8a931SKonstantin Belousov 807c3d8a931SKonstantin Belousov DB_SHOW_COMMAND(rangelock, db_show_rangelock) 808c3d8a931SKonstantin Belousov { 809c3d8a931SKonstantin Belousov struct rangelock *lock; 810c3d8a931SKonstantin Belousov struct rl_q_entry *e, *x; 8119ef425e5SKonstantin Belousov uintptr_t v; 812c3d8a931SKonstantin Belousov 813c3d8a931SKonstantin Belousov if (!have_addr) { 814c3d8a931SKonstantin Belousov db_printf("show rangelock addr\n"); 815c3d8a931SKonstantin Belousov return; 816c3d8a931SKonstantin Belousov } 817c3d8a931SKonstantin Belousov 818c3d8a931SKonstantin Belousov lock = (struct rangelock *)addr; 819c3d8a931SKonstantin Belousov db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers); 8209ef425e5SKonstantin Belousov v = lock->head; 8219ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) != 0) { 8229ef425e5SKonstantin Belousov db_printf(" cheating head %#jx\n", (uintmax_t)v); 8239ef425e5SKonstantin Belousov return; 8249ef425e5SKonstantin Belousov } 8259ef425e5SKonstantin Belousov for (e = (struct rl_q_entry *)(lock->head);;) { 826c3d8a931SKonstantin Belousov x = rl_e_is_marked(e) ? rl_e_unmark(e) : e; 827c3d8a931SKonstantin Belousov if (x == NULL) 828c3d8a931SKonstantin Belousov break; 829c3d8a931SKonstantin Belousov db_printf(" entry %p marked %d %d start %#jx end %#jx " 830c3d8a931SKonstantin Belousov "flags %x next %p", 831c3d8a931SKonstantin Belousov e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next), 832c3d8a931SKonstantin Belousov x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next); 833c3d8a931SKonstantin Belousov #ifdef INVARIANTS 834c3d8a931SKonstantin Belousov db_printf(" owner %p (%d)", x->rl_q_owner, 835c3d8a931SKonstantin Belousov x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1); 836c3d8a931SKonstantin Belousov #endif 837c3d8a931SKonstantin Belousov db_printf("\n"); 838c3d8a931SKonstantin Belousov e = x->rl_q_next; 839c3d8a931SKonstantin Belousov } 840c3d8a931SKonstantin Belousov } 841c3d8a931SKonstantin Belousov 842c3d8a931SKonstantin Belousov #endif /* DDB */ 843