18f0e9130SKonstantin Belousov /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 38a36da99SPedro F. Giffuni * 48f0e9130SKonstantin Belousov * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 5d8a16b6aSKonstantin Belousov * Copyright (c) 2023 The FreeBSD Foundation 6d8a16b6aSKonstantin Belousov * 7d8a16b6aSKonstantin Belousov * Portions of this software were developed by 8d8a16b6aSKonstantin Belousov * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from 9d8a16b6aSKonstantin Belousov * the FreeBSD Foundation. 108f0e9130SKonstantin Belousov * 118f0e9130SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 128f0e9130SKonstantin Belousov * modification, are permitted provided that the following conditions 138f0e9130SKonstantin Belousov * are met: 148f0e9130SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright 158f0e9130SKonstantin Belousov * notice unmodified, this list of conditions, and the following 168f0e9130SKonstantin Belousov * disclaimer. 178f0e9130SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright 188f0e9130SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 198f0e9130SKonstantin Belousov * documentation and/or other materials provided with the distribution. 208f0e9130SKonstantin Belousov * 218f0e9130SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 228f0e9130SKonstantin Belousov * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 238f0e9130SKonstantin Belousov * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 248f0e9130SKonstantin Belousov * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 258f0e9130SKonstantin Belousov * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 268f0e9130SKonstantin Belousov * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 278f0e9130SKonstantin Belousov * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 288f0e9130SKonstantin Belousov * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 298f0e9130SKonstantin Belousov * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 308f0e9130SKonstantin Belousov * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 318f0e9130SKonstantin Belousov */ 328f0e9130SKonstantin Belousov 338f0e9130SKonstantin Belousov #include <sys/param.h> 34c3d8a931SKonstantin Belousov #include <sys/kassert.h> 358f0e9130SKonstantin Belousov #include <sys/kernel.h> 368f0e9130SKonstantin Belousov #include <sys/lock.h> 378f0e9130SKonstantin Belousov #include <sys/mutex.h> 388f0e9130SKonstantin Belousov #include <sys/proc.h> 398f0e9130SKonstantin Belousov #include <sys/rangelock.h> 40c3d8a931SKonstantin Belousov #include <sys/sleepqueue.h> 41c3d8a931SKonstantin Belousov #include <sys/smr.h> 429ef425e5SKonstantin Belousov #include <sys/sysctl.h> 438f0e9130SKonstantin Belousov 448f0e9130SKonstantin Belousov #include <vm/uma.h> 458f0e9130SKonstantin Belousov 46c3d8a931SKonstantin Belousov /* 479ef425e5SKonstantin Belousov * Immediately after initialization (subject to 'rangelock_cheat' 489ef425e5SKonstantin Belousov * below), and until a request comes that conflicts with granted ones 499ef425e5SKonstantin Belousov * based on type, rangelocks serve requests in the "cheating" mode. 509ef425e5SKonstantin Belousov * In this mode, a rangelock behaves like a sxlock, as if each request 519ef425e5SKonstantin Belousov * covered the whole range of the protected object. On receiving a 529ef425e5SKonstantin Belousov * conflicting request (any request while a write request is 539ef425e5SKonstantin Belousov * effective, or any write request while some read ones are 549ef425e5SKonstantin Belousov * effective), all requests granted in "cheating" mode are drained, 559ef425e5SKonstantin Belousov * and the rangelock then switches to effectively keeping track of the 569ef425e5SKonstantin Belousov * precise range of each new request. 579ef425e5SKonstantin Belousov * 589ef425e5SKonstantin Belousov * Normal sx implementation is not used to not bloat structures (most 599ef425e5SKonstantin Belousov * important, vnodes) which embeds rangelocks. 609ef425e5SKonstantin Belousov * 619ef425e5SKonstantin Belousov * The cheating greatly helps very common pattern where file is first 629ef425e5SKonstantin Belousov * written single-threaded, and then read by many processes. 639ef425e5SKonstantin Belousov * 649ef425e5SKonstantin Belousov * Lock is in cheat mode when RL_CHEAT_CHEATING bit is set in the 659ef425e5SKonstantin Belousov * lock->head. Special cookies are returned in this mode, and 669ef425e5SKonstantin Belousov * trylocks are same as normal locks but do not drain. 679ef425e5SKonstantin Belousov */ 689ef425e5SKonstantin Belousov 699ef425e5SKonstantin Belousov static int rangelock_cheat = 1; 709ef425e5SKonstantin Belousov SYSCTL_INT(_debug, OID_AUTO, rangelock_cheat, CTLFLAG_RWTUN, 719ef425e5SKonstantin Belousov &rangelock_cheat, 0, 729ef425e5SKonstantin Belousov ""); 739ef425e5SKonstantin Belousov 749ef425e5SKonstantin Belousov #define RL_CHEAT_MASK 0x7 759ef425e5SKonstantin Belousov #define RL_CHEAT_CHEATING 0x1 769ef425e5SKonstantin Belousov /* #define RL_CHEAT_RLOCKED 0x0 */ 779ef425e5SKonstantin Belousov #define RL_CHEAT_WLOCKED 0x2 789ef425e5SKonstantin Belousov #define RL_CHEAT_DRAINING 0x4 799ef425e5SKonstantin Belousov 809ef425e5SKonstantin Belousov #define RL_CHEAT_READER 0x8 819ef425e5SKonstantin Belousov 829ef425e5SKonstantin Belousov #define RL_RET_CHEAT_RLOCKED 0x1100 839ef425e5SKonstantin Belousov #define RL_RET_CHEAT_WLOCKED 0x2200 849ef425e5SKonstantin Belousov 859ef425e5SKonstantin Belousov static bool 869ef425e5SKonstantin Belousov rangelock_cheat_lock(struct rangelock *lock, int locktype, bool trylock, 879ef425e5SKonstantin Belousov void **cookie) 889ef425e5SKonstantin Belousov { 899ef425e5SKonstantin Belousov uintptr_t v, x; 909ef425e5SKonstantin Belousov 919ef425e5SKonstantin Belousov v = (uintptr_t)atomic_load_ptr(&lock->head); 929ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 939ef425e5SKonstantin Belousov return (false); 949ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) { 959ef425e5SKonstantin Belousov drain: 969ef425e5SKonstantin Belousov if (trylock) { 979ef425e5SKonstantin Belousov *cookie = NULL; 989ef425e5SKonstantin Belousov return (true); 999ef425e5SKonstantin Belousov } 1009ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1019ef425e5SKonstantin Belousov drain1: 1029ef425e5SKonstantin Belousov DROP_GIANT(); 1039ef425e5SKonstantin Belousov for (;;) { 1049ef425e5SKonstantin Belousov v = (uintptr_t)atomic_load_ptr(&lock->head); 1059ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) == 0) 1069ef425e5SKonstantin Belousov break; 1079ef425e5SKonstantin Belousov sleepq_add(&lock->head, NULL, "ranged1", 0, 0); 1089ef425e5SKonstantin Belousov sleepq_wait(&lock->head, PRI_USER); 1099ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1109ef425e5SKonstantin Belousov } 1119ef425e5SKonstantin Belousov sleepq_release(&lock->head); 1129ef425e5SKonstantin Belousov PICKUP_GIANT(); 1139ef425e5SKonstantin Belousov return (false); 1149ef425e5SKonstantin Belousov } 1159ef425e5SKonstantin Belousov 1169ef425e5SKonstantin Belousov switch (locktype) { 1179ef425e5SKonstantin Belousov case RL_LOCK_READ: 1189ef425e5SKonstantin Belousov for (;;) { 1199ef425e5SKonstantin Belousov if ((v & RL_CHEAT_WLOCKED) != 0) { 1209ef425e5SKonstantin Belousov if (trylock) { 1219ef425e5SKonstantin Belousov *cookie = NULL; 1229ef425e5SKonstantin Belousov return (true); 1239ef425e5SKonstantin Belousov } 1249ef425e5SKonstantin Belousov x = v | RL_CHEAT_DRAINING; 1259ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1269ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v, 1279ef425e5SKonstantin Belousov x) != 0) 1289ef425e5SKonstantin Belousov goto drain1; 1299ef425e5SKonstantin Belousov sleepq_release(&lock->head); 1309ef425e5SKonstantin Belousov /* Possibly forgive passed conflict */ 13157cc80e6SKonstantin Belousov } else { 1329ef425e5SKonstantin Belousov x = (v & ~RL_CHEAT_MASK) + RL_CHEAT_READER; 1339ef425e5SKonstantin Belousov x |= RL_CHEAT_CHEATING; 13457cc80e6SKonstantin Belousov if (atomic_fcmpset_acq_ptr(&lock->head, &v, 13557cc80e6SKonstantin Belousov x) != 0) 1369ef425e5SKonstantin Belousov break; 13757cc80e6SKonstantin Belousov } 1389ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1399ef425e5SKonstantin Belousov return (false); 1409ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) 1419ef425e5SKonstantin Belousov goto drain; 1429ef425e5SKonstantin Belousov } 1439ef425e5SKonstantin Belousov *(uintptr_t *)cookie = RL_RET_CHEAT_RLOCKED; 1449ef425e5SKonstantin Belousov break; 1459ef425e5SKonstantin Belousov case RL_LOCK_WRITE: 1469ef425e5SKonstantin Belousov for (;;) { 1479ef425e5SKonstantin Belousov if ((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER || 1489ef425e5SKonstantin Belousov (v & RL_CHEAT_WLOCKED) != 0) { 1499ef425e5SKonstantin Belousov if (trylock) { 1509ef425e5SKonstantin Belousov *cookie = NULL; 1519ef425e5SKonstantin Belousov return (true); 1529ef425e5SKonstantin Belousov } 1539ef425e5SKonstantin Belousov x = v | RL_CHEAT_DRAINING; 1549ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1559ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v, 1569ef425e5SKonstantin Belousov x) != 0) 1579ef425e5SKonstantin Belousov goto drain1; 1589ef425e5SKonstantin Belousov sleepq_release(&lock->head); 1599ef425e5SKonstantin Belousov /* Possibly forgive passed conflict */ 16057cc80e6SKonstantin Belousov } else { 1619ef425e5SKonstantin Belousov x = RL_CHEAT_WLOCKED | RL_CHEAT_CHEATING; 16257cc80e6SKonstantin Belousov if (atomic_fcmpset_acq_ptr(&lock->head, &v, 16357cc80e6SKonstantin Belousov x) != 0) 1649ef425e5SKonstantin Belousov break; 16557cc80e6SKonstantin Belousov } 1669ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1679ef425e5SKonstantin Belousov return (false); 1689ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) 1699ef425e5SKonstantin Belousov goto drain; 1709ef425e5SKonstantin Belousov } 1719ef425e5SKonstantin Belousov *(uintptr_t *)cookie = RL_RET_CHEAT_WLOCKED; 1729ef425e5SKonstantin Belousov break; 1739ef425e5SKonstantin Belousov default: 1749ef425e5SKonstantin Belousov __assert_unreachable(); 1759ef425e5SKonstantin Belousov break; 1769ef425e5SKonstantin Belousov } 1779ef425e5SKonstantin Belousov return (true); 1789ef425e5SKonstantin Belousov } 1799ef425e5SKonstantin Belousov 1809ef425e5SKonstantin Belousov static bool 1819ef425e5SKonstantin Belousov rangelock_cheat_unlock(struct rangelock *lock, void *cookie) 1829ef425e5SKonstantin Belousov { 1839ef425e5SKonstantin Belousov uintptr_t v, x; 1849ef425e5SKonstantin Belousov 1859ef425e5SKonstantin Belousov v = (uintptr_t)atomic_load_ptr(&lock->head); 1869ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1879ef425e5SKonstantin Belousov return (false); 1889ef425e5SKonstantin Belousov 1899ef425e5SKonstantin Belousov MPASS((uintptr_t)cookie == RL_RET_CHEAT_WLOCKED || 1909ef425e5SKonstantin Belousov (uintptr_t)cookie == RL_RET_CHEAT_RLOCKED); 1919ef425e5SKonstantin Belousov 1929ef425e5SKonstantin Belousov switch ((uintptr_t)cookie) { 1939ef425e5SKonstantin Belousov case RL_RET_CHEAT_RLOCKED: 1949ef425e5SKonstantin Belousov for (;;) { 1959ef425e5SKonstantin Belousov MPASS((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER); 1969ef425e5SKonstantin Belousov MPASS((v & RL_CHEAT_WLOCKED) == 0); 1979ef425e5SKonstantin Belousov x = (v & ~RL_CHEAT_MASK) - RL_CHEAT_READER; 1989ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) { 1999ef425e5SKonstantin Belousov if (x != 0) { 2009ef425e5SKonstantin Belousov x |= RL_CHEAT_DRAINING | 2019ef425e5SKonstantin Belousov RL_CHEAT_CHEATING; 2029ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, 2039ef425e5SKonstantin Belousov &v, x) != 0) 2049ef425e5SKonstantin Belousov break; 2059ef425e5SKonstantin Belousov } else { 2069ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 2079ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, 2089ef425e5SKonstantin Belousov &v, x) != 0) { 2099ef425e5SKonstantin Belousov sleepq_broadcast( 2109ef425e5SKonstantin Belousov &lock->head, 2119ef425e5SKonstantin Belousov SLEEPQ_SLEEP, 0, 0); 2129ef425e5SKonstantin Belousov sleepq_release(&lock->head); 2139ef425e5SKonstantin Belousov break; 2149ef425e5SKonstantin Belousov } 2159ef425e5SKonstantin Belousov sleepq_release(&lock->head); 2169ef425e5SKonstantin Belousov } 2179ef425e5SKonstantin Belousov } else { 2189ef425e5SKonstantin Belousov x |= RL_CHEAT_CHEATING; 2199ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v, 2209ef425e5SKonstantin Belousov x) != 0) 2219ef425e5SKonstantin Belousov break; 2229ef425e5SKonstantin Belousov } 2239ef425e5SKonstantin Belousov } 2249ef425e5SKonstantin Belousov break; 2259ef425e5SKonstantin Belousov case RL_RET_CHEAT_WLOCKED: 2269ef425e5SKonstantin Belousov for (;;) { 2279ef425e5SKonstantin Belousov MPASS((v & RL_CHEAT_WLOCKED) != 0); 2289ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) { 2299ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 2309ef425e5SKonstantin Belousov atomic_store_ptr(&lock->head, 0); 2319ef425e5SKonstantin Belousov sleepq_broadcast(&lock->head, 2329ef425e5SKonstantin Belousov SLEEPQ_SLEEP, 0, 0); 2339ef425e5SKonstantin Belousov sleepq_release(&lock->head); 2349ef425e5SKonstantin Belousov break; 2359ef425e5SKonstantin Belousov } else { 2369ef425e5SKonstantin Belousov if (atomic_fcmpset_ptr(&lock->head, &v, 2379ef425e5SKonstantin Belousov RL_CHEAT_CHEATING) != 0) 2389ef425e5SKonstantin Belousov break; 2399ef425e5SKonstantin Belousov } 2409ef425e5SKonstantin Belousov } 2419ef425e5SKonstantin Belousov break; 2429ef425e5SKonstantin Belousov default: 2439ef425e5SKonstantin Belousov __assert_unreachable(); 2449ef425e5SKonstantin Belousov break; 2459ef425e5SKonstantin Belousov } 2469ef425e5SKonstantin Belousov return (true); 2479ef425e5SKonstantin Belousov } 2489ef425e5SKonstantin Belousov 2499ef425e5SKonstantin Belousov static bool 2509ef425e5SKonstantin Belousov rangelock_cheat_destroy(struct rangelock *lock) 2519ef425e5SKonstantin Belousov { 2529ef425e5SKonstantin Belousov uintptr_t v; 2539ef425e5SKonstantin Belousov 2549ef425e5SKonstantin Belousov v = (uintptr_t)atomic_load_ptr(&lock->head); 2559ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 2569ef425e5SKonstantin Belousov return (false); 2579ef425e5SKonstantin Belousov MPASS(v == RL_CHEAT_CHEATING); 2589ef425e5SKonstantin Belousov return (true); 2599ef425e5SKonstantin Belousov } 2609ef425e5SKonstantin Belousov 2619ef425e5SKonstantin Belousov /* 262c3d8a931SKonstantin Belousov * Implementation of range locks based on the paper 263c3d8a931SKonstantin Belousov * https://doi.org/10.1145/3342195.3387533 264c3d8a931SKonstantin Belousov * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020 265c3d8a931SKonstantin Belousov * Scalable Range Locks for Scalable Address Spaces and Beyond 266c3d8a931SKonstantin Belousov * by Alex Kogan, Dave Dice, and Shady Issa 267c3d8a931SKonstantin Belousov */ 268c3d8a931SKonstantin Belousov 269c3d8a931SKonstantin Belousov static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e); 270c3d8a931SKonstantin Belousov 271c3d8a931SKonstantin Belousov /* 272c3d8a931SKonstantin Belousov * rl_q_next links all granted ranges in the lock. We cannot free an 273c3d8a931SKonstantin Belousov * rl_q_entry while in the smr section, and cannot reuse rl_q_next 274c3d8a931SKonstantin Belousov * linkage since other threads might follow it even after CAS removed 275c3d8a931SKonstantin Belousov * the range. Use rl_q_free for local list of ranges to remove after 276c3d8a931SKonstantin Belousov * the smr section is dropped. 277c3d8a931SKonstantin Belousov */ 2788f0e9130SKonstantin Belousov struct rl_q_entry { 279c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_next; 280c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_free; 2818f0e9130SKonstantin Belousov off_t rl_q_start, rl_q_end; 2828f0e9130SKonstantin Belousov int rl_q_flags; 283c3d8a931SKonstantin Belousov #ifdef INVARIANTS 284c3d8a931SKonstantin Belousov struct thread *rl_q_owner; 285c3d8a931SKonstantin Belousov #endif 2868f0e9130SKonstantin Belousov }; 2878f0e9130SKonstantin Belousov 2888f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone; 289c3d8a931SKonstantin Belousov static smr_t rl_smr; 2908f0e9130SKonstantin Belousov 291a3f10d08SKonstantin Belousov static void rangelock_free_free(struct rl_q_entry *free); 2928a5b2db3SKonstantin Belousov static void rangelock_noncheating_destroy(struct rangelock *lock); 293a3f10d08SKonstantin Belousov 2948f0e9130SKonstantin Belousov static void 2958f0e9130SKonstantin Belousov rangelock_sys_init(void) 2968f0e9130SKonstantin Belousov { 2978f0e9130SKonstantin Belousov rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 298c3d8a931SKonstantin Belousov NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry), 299c3d8a931SKonstantin Belousov UMA_ZONE_SMR); 300c3d8a931SKonstantin Belousov rl_smr = uma_zone_get_smr(rl_entry_zone); 3018f0e9130SKonstantin Belousov } 302c3d8a931SKonstantin Belousov SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 3038f0e9130SKonstantin Belousov 3048f0e9130SKonstantin Belousov static struct rl_q_entry * 305c3d8a931SKonstantin Belousov rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags) 3068f0e9130SKonstantin Belousov { 307c3d8a931SKonstantin Belousov struct rl_q_entry *e; 308ff1ae3b3SKonstantin Belousov struct thread *td; 3098f0e9130SKonstantin Belousov 310ff1ae3b3SKonstantin Belousov td = curthread; 311ff1ae3b3SKonstantin Belousov if (td->td_rlqe != NULL) { 312ff1ae3b3SKonstantin Belousov e = td->td_rlqe; 313ff1ae3b3SKonstantin Belousov td->td_rlqe = NULL; 314ff1ae3b3SKonstantin Belousov } else { 315c3d8a931SKonstantin Belousov e = uma_zalloc_smr(rl_entry_zone, M_WAITOK); 316ff1ae3b3SKonstantin Belousov } 317c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 318c3d8a931SKonstantin Belousov e->rl_q_free = NULL; 319c3d8a931SKonstantin Belousov e->rl_q_start = start; 320c3d8a931SKonstantin Belousov e->rl_q_end = end; 321c3d8a931SKonstantin Belousov e->rl_q_flags = flags; 322c3d8a931SKonstantin Belousov #ifdef INVARIANTS 323c3d8a931SKonstantin Belousov e->rl_q_owner = curthread; 324c3d8a931SKonstantin Belousov #endif 325c3d8a931SKonstantin Belousov return (e); 3268f0e9130SKonstantin Belousov } 3278f0e9130SKonstantin Belousov 3288f0e9130SKonstantin Belousov void 329ff1ae3b3SKonstantin Belousov rangelock_entry_free(struct rl_q_entry *e) 330ff1ae3b3SKonstantin Belousov { 331ff1ae3b3SKonstantin Belousov uma_zfree_smr(rl_entry_zone, e); 332ff1ae3b3SKonstantin Belousov } 333ff1ae3b3SKonstantin Belousov 334ff1ae3b3SKonstantin Belousov void 3358f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock) 3368f0e9130SKonstantin Belousov { 337c3d8a931SKonstantin Belousov lock->sleepers = false; 3389ef425e5SKonstantin Belousov atomic_store_ptr(&lock->head, rangelock_cheat ? RL_CHEAT_CHEATING : 0); 3398f0e9130SKonstantin Belousov } 3408f0e9130SKonstantin Belousov 3418f0e9130SKonstantin Belousov void 3428f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock) 3438f0e9130SKonstantin Belousov { 344c3d8a931SKonstantin Belousov MPASS(!lock->sleepers); 3458a5b2db3SKonstantin Belousov if (!rangelock_cheat_destroy(lock)) 3468a5b2db3SKonstantin Belousov rangelock_noncheating_destroy(lock); 347e228961dSKonstantin Belousov DEBUG_POISON_POINTER(*(void **)&lock->head); 3488f0e9130SKonstantin Belousov } 3498f0e9130SKonstantin Belousov 350c3d8a931SKonstantin Belousov static bool 351c3d8a931SKonstantin Belousov rl_e_is_marked(const struct rl_q_entry *e) 3528f0e9130SKonstantin Belousov { 353c3d8a931SKonstantin Belousov return (((uintptr_t)e & 1) != 0); 3548f0e9130SKonstantin Belousov } 3558f0e9130SKonstantin Belousov 356c3d8a931SKonstantin Belousov static struct rl_q_entry * 3575badbeeaSKonstantin Belousov rl_e_unmark_unchecked(const struct rl_q_entry *e) 3585badbeeaSKonstantin Belousov { 3595badbeeaSKonstantin Belousov return ((struct rl_q_entry *)((uintptr_t)e & ~1)); 3605badbeeaSKonstantin Belousov } 3615badbeeaSKonstantin Belousov 3625badbeeaSKonstantin Belousov static struct rl_q_entry * 363c3d8a931SKonstantin Belousov rl_e_unmark(const struct rl_q_entry *e) 3648f0e9130SKonstantin Belousov { 365c3d8a931SKonstantin Belousov MPASS(rl_e_is_marked(e)); 3665badbeeaSKonstantin Belousov return (rl_e_unmark_unchecked(e)); 3675badbeeaSKonstantin Belousov } 3685badbeeaSKonstantin Belousov 3695badbeeaSKonstantin Belousov static void 3705badbeeaSKonstantin Belousov rl_e_mark(struct rl_q_entry *e) 3715badbeeaSKonstantin Belousov { 3725badbeeaSKonstantin Belousov #if defined(INVARIANTS) && defined(__LP64__) 3735badbeeaSKonstantin Belousov int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0); 3745badbeeaSKonstantin Belousov MPASS(r == 0); 3755badbeeaSKonstantin Belousov #else 3765badbeeaSKonstantin Belousov atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1); 3775badbeeaSKonstantin Belousov #endif 3782bb93f2dSColin Percival } 3792bb93f2dSColin Percival 380c3d8a931SKonstantin Belousov static struct rl_q_entry * 381c3d8a931SKonstantin Belousov rl_q_load(struct rl_q_entry **p) 3828f0e9130SKonstantin Belousov { 383c3d8a931SKonstantin Belousov return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p)); 3848f0e9130SKonstantin Belousov } 3858f0e9130SKonstantin Belousov 3866c32d89eSKonstantin Belousov static bool 3876c32d89eSKonstantin Belousov rl_e_is_rlock(const struct rl_q_entry *e) 3886c32d89eSKonstantin Belousov { 3896c32d89eSKonstantin Belousov return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ); 3906c32d89eSKonstantin Belousov } 3916c32d89eSKonstantin Belousov 3925badbeeaSKonstantin Belousov static void 393a3f10d08SKonstantin Belousov rangelock_free_free(struct rl_q_entry *free) 394a3f10d08SKonstantin Belousov { 395a3f10d08SKonstantin Belousov struct rl_q_entry *x, *xp; 396a3f10d08SKonstantin Belousov struct thread *td; 397a3f10d08SKonstantin Belousov 398a3f10d08SKonstantin Belousov td = curthread; 399a3f10d08SKonstantin Belousov for (x = free; x != NULL; x = xp) { 400a3f10d08SKonstantin Belousov MPASS(!rl_e_is_marked(x)); 401a3f10d08SKonstantin Belousov xp = x->rl_q_free; 402a3f10d08SKonstantin Belousov MPASS(!rl_e_is_marked(xp)); 403a3f10d08SKonstantin Belousov if (td->td_rlqe == NULL) { 404a3f10d08SKonstantin Belousov smr_synchronize(rl_smr); 405a3f10d08SKonstantin Belousov td->td_rlqe = x; 406a3f10d08SKonstantin Belousov } else { 407a3f10d08SKonstantin Belousov uma_zfree_smr(rl_entry_zone, x); 408a3f10d08SKonstantin Belousov } 409a3f10d08SKonstantin Belousov } 410a3f10d08SKonstantin Belousov } 411a3f10d08SKonstantin Belousov 412a3f10d08SKonstantin Belousov static void 4135badbeeaSKonstantin Belousov rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e) 4148f0e9130SKonstantin Belousov { 415c3158008SKonstantin Belousov bool sleepers; 416c3158008SKonstantin Belousov 417c3d8a931SKonstantin Belousov MPASS(lock != NULL && e != NULL); 418c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next))); 419c3d8a931SKonstantin Belousov MPASS(e->rl_q_owner == curthread); 4208f0e9130SKonstantin Belousov 4215badbeeaSKonstantin Belousov rl_e_mark(e); 422c3158008SKonstantin Belousov sleepers = lock->sleepers; 423c3d8a931SKonstantin Belousov lock->sleepers = false; 424c3158008SKonstantin Belousov if (sleepers) 425c3d8a931SKonstantin Belousov sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0); 4265badbeeaSKonstantin Belousov } 4275badbeeaSKonstantin Belousov 4285badbeeaSKonstantin Belousov void 4295badbeeaSKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie) 4305badbeeaSKonstantin Belousov { 4319ef425e5SKonstantin Belousov if (rangelock_cheat_unlock(lock, cookie)) 4329ef425e5SKonstantin Belousov return; 4339ef425e5SKonstantin Belousov 4345badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 4355badbeeaSKonstantin Belousov rangelock_unlock_int(lock, cookie); 436c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 4378f0e9130SKonstantin Belousov } 4388f0e9130SKonstantin Belousov 4398f0e9130SKonstantin Belousov /* 4405badbeeaSKonstantin Belousov * result: -1 if e1 before e2, or both locks are readers and e1 4415badbeeaSKonstantin Belousov * starts before or at e2 4425badbeeaSKonstantin Belousov * 1 if e1 after e2, or both locks are readers and e1 4435badbeeaSKonstantin Belousov * starts after e2 4445badbeeaSKonstantin Belousov * 0 if e1 and e2 overlap and at least one lock is writer 4458f0e9130SKonstantin Belousov */ 446c3d8a931SKonstantin Belousov static int 447c3d8a931SKonstantin Belousov rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2) 4488f0e9130SKonstantin Belousov { 4495badbeeaSKonstantin Belousov bool rds; 4505badbeeaSKonstantin Belousov 451c3d8a931SKonstantin Belousov if (e1 == NULL) 452c3d8a931SKonstantin Belousov return (1); 453c3d8a931SKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_end) 454c3d8a931SKonstantin Belousov return (-1); 4555badbeeaSKonstantin Belousov rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2); 4565badbeeaSKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_start && rds) 4575badbeeaSKonstantin Belousov return (-1); 4585badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_end) 4595badbeeaSKonstantin Belousov return (1); 4605badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_start && rds) 4615badbeeaSKonstantin Belousov return (1); 462c3d8a931SKonstantin Belousov return (0); 4638f0e9130SKonstantin Belousov } 4648f0e9130SKonstantin Belousov 465c3d8a931SKonstantin Belousov static void 466c3d8a931SKonstantin Belousov rl_insert_sleep(struct rangelock *lock) 4678f0e9130SKonstantin Belousov { 468c3d8a931SKonstantin Belousov smr_exit(rl_smr); 469c3d8a931SKonstantin Belousov DROP_GIANT(); 470c3d8a931SKonstantin Belousov lock->sleepers = true; 471c3d8a931SKonstantin Belousov sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0); 472c3d8a931SKonstantin Belousov sleepq_wait(&lock->sleepers, PRI_USER); 473c3d8a931SKonstantin Belousov PICKUP_GIANT(); 474c3d8a931SKonstantin Belousov smr_enter(rl_smr); 475c3d8a931SKonstantin Belousov } 4768f0e9130SKonstantin Belousov 477c3d8a931SKonstantin Belousov static bool 478c3d8a931SKonstantin Belousov rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old, 479c3d8a931SKonstantin Belousov struct rl_q_entry *new) 480c3d8a931SKonstantin Belousov { 481*9467c1a6SKonstantin Belousov MPASS(!rl_e_is_marked(old)); 482c3d8a931SKonstantin Belousov return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old, 483c3d8a931SKonstantin Belousov (uintptr_t)new) != 0); 484c3d8a931SKonstantin Belousov } 4858f0e9130SKonstantin Belousov 4868a5b2db3SKonstantin Belousov static void 4878a5b2db3SKonstantin Belousov rangelock_noncheating_destroy(struct rangelock *lock) 4888a5b2db3SKonstantin Belousov { 4898a5b2db3SKonstantin Belousov struct rl_q_entry *cur, *free, *next, **prev; 4908a5b2db3SKonstantin Belousov 4918a5b2db3SKonstantin Belousov free = NULL; 4928a5b2db3SKonstantin Belousov again: 4938a5b2db3SKonstantin Belousov smr_enter(rl_smr); 4948a5b2db3SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head; 4958a5b2db3SKonstantin Belousov cur = rl_q_load(prev); 4968a5b2db3SKonstantin Belousov MPASS(!rl_e_is_marked(cur)); 4978a5b2db3SKonstantin Belousov 4988a5b2db3SKonstantin Belousov for (;;) { 4998a5b2db3SKonstantin Belousov if (cur == NULL) 5008a5b2db3SKonstantin Belousov break; 5018a5b2db3SKonstantin Belousov if (rl_e_is_marked(cur)) 5028a5b2db3SKonstantin Belousov goto again; 5038a5b2db3SKonstantin Belousov 5048a5b2db3SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 5058a5b2db3SKonstantin Belousov if (rl_e_is_marked(next)) { 5068a5b2db3SKonstantin Belousov next = rl_e_unmark(next); 5078a5b2db3SKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 5088a5b2db3SKonstantin Belousov #ifdef INVARIANTS 5098a5b2db3SKonstantin Belousov cur->rl_q_owner = NULL; 5108a5b2db3SKonstantin Belousov #endif 5118a5b2db3SKonstantin Belousov cur->rl_q_free = free; 5128a5b2db3SKonstantin Belousov free = cur; 5138a5b2db3SKonstantin Belousov cur = next; 5148a5b2db3SKonstantin Belousov continue; 5158a5b2db3SKonstantin Belousov } 5168a5b2db3SKonstantin Belousov smr_exit(rl_smr); 5178a5b2db3SKonstantin Belousov goto again; 5188a5b2db3SKonstantin Belousov } 5198a5b2db3SKonstantin Belousov 5208a5b2db3SKonstantin Belousov sleepq_lock(&lock->sleepers); 5218a5b2db3SKonstantin Belousov if (!rl_e_is_marked(cur)) { 5228a5b2db3SKonstantin Belousov rl_insert_sleep(lock); 5238a5b2db3SKonstantin Belousov goto again; 5248a5b2db3SKonstantin Belousov } 5258a5b2db3SKonstantin Belousov } 5268a5b2db3SKonstantin Belousov smr_exit(rl_smr); 5278a5b2db3SKonstantin Belousov rangelock_free_free(free); 5288a5b2db3SKonstantin Belousov } 5298a5b2db3SKonstantin Belousov 5305badbeeaSKonstantin Belousov enum RL_INSERT_RES { 5315badbeeaSKonstantin Belousov RL_TRYLOCK_FAILED, 5325badbeeaSKonstantin Belousov RL_LOCK_SUCCESS, 5335badbeeaSKonstantin Belousov RL_LOCK_RETRY, 5345badbeeaSKonstantin Belousov }; 5355badbeeaSKonstantin Belousov 5365badbeeaSKonstantin Belousov static enum RL_INSERT_RES 5375badbeeaSKonstantin Belousov rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 5385badbeeaSKonstantin Belousov struct rl_q_entry **free) 5395badbeeaSKonstantin Belousov { 5405badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 5415badbeeaSKonstantin Belousov 5425badbeeaSKonstantin Belousov prev = &e->rl_q_next; 5435badbeeaSKonstantin Belousov cur = rl_q_load(prev); 5445badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */ 5455badbeeaSKonstantin Belousov for (;;) { 5465badbeeaSKonstantin Belousov if (cur == NULL || cur->rl_q_start > e->rl_q_end) 5475badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 5485badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 5495badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 5505badbeeaSKonstantin Belousov next = rl_e_unmark(next); 5515badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 5525badbeeaSKonstantin Belousov cur->rl_q_free = *free; 5535badbeeaSKonstantin Belousov *free = cur; 5545badbeeaSKonstantin Belousov } 5555badbeeaSKonstantin Belousov cur = next; 5565badbeeaSKonstantin Belousov continue; 5575badbeeaSKonstantin Belousov } 5585badbeeaSKonstantin Belousov if (rl_e_is_rlock(cur)) { 5595badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 5605badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 5615badbeeaSKonstantin Belousov continue; 5625badbeeaSKonstantin Belousov } 5635badbeeaSKonstantin Belousov if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 5645badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 5655badbeeaSKonstantin Belousov if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 5665badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 5675badbeeaSKonstantin Belousov continue; 5685badbeeaSKonstantin Belousov } 5695badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 5705badbeeaSKonstantin Belousov if (trylock) { 5715badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 5725badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 5735badbeeaSKonstantin Belousov } 5745badbeeaSKonstantin Belousov rl_insert_sleep(lock); 5755badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 5765badbeeaSKonstantin Belousov } 5775badbeeaSKonstantin Belousov } 5785badbeeaSKonstantin Belousov } 5795badbeeaSKonstantin Belousov 5805badbeeaSKonstantin Belousov static enum RL_INSERT_RES 5815badbeeaSKonstantin Belousov rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, 5825badbeeaSKonstantin Belousov bool trylock, struct rl_q_entry **free) 5835badbeeaSKonstantin Belousov { 5845badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 5855badbeeaSKonstantin Belousov 5869ef425e5SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head; 5875badbeeaSKonstantin Belousov cur = rl_q_load(prev); 5885badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* head is not marked */ 5895badbeeaSKonstantin Belousov for (;;) { 5905badbeeaSKonstantin Belousov if (cur == e) 5915badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 5925badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 5935badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 5945badbeeaSKonstantin Belousov next = rl_e_unmark(next); 5955badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 5965badbeeaSKonstantin Belousov cur->rl_q_next = *free; 5975badbeeaSKonstantin Belousov *free = cur; 5985badbeeaSKonstantin Belousov } 5995badbeeaSKonstantin Belousov cur = next; 6005badbeeaSKonstantin Belousov continue; 6015badbeeaSKonstantin Belousov } 6025badbeeaSKonstantin Belousov if (cur->rl_q_end <= e->rl_q_start) { 6035badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 6045badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 6055badbeeaSKonstantin Belousov continue; 6065badbeeaSKonstantin Belousov } 6075badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 6085badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 6095badbeeaSKonstantin Belousov if (trylock) { 6105badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 6115badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 6125badbeeaSKonstantin Belousov } 6135badbeeaSKonstantin Belousov rl_insert_sleep(lock); 6145badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 6155badbeeaSKonstantin Belousov } 6165badbeeaSKonstantin Belousov } 6175badbeeaSKonstantin Belousov 6185badbeeaSKonstantin Belousov static enum RL_INSERT_RES 619c3d8a931SKonstantin Belousov rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 620c3d8a931SKonstantin Belousov struct rl_q_entry **free) 621c3d8a931SKonstantin Belousov { 622c3d8a931SKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 623c3d8a931SKonstantin Belousov int r; 6248f0e9130SKonstantin Belousov 625c3d8a931SKonstantin Belousov again: 6269ef425e5SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head; 6275badbeeaSKonstantin Belousov cur = rl_q_load(prev); 6285badbeeaSKonstantin Belousov if (cur == NULL && rl_q_cas(prev, NULL, e)) 6295badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 6308f0e9130SKonstantin Belousov 6315badbeeaSKonstantin Belousov for (;;) { 6325badbeeaSKonstantin Belousov if (cur != NULL) { 633c3d8a931SKonstantin Belousov if (rl_e_is_marked(cur)) 634c3d8a931SKonstantin Belousov goto again; 635c3d8a931SKonstantin Belousov 636c3d8a931SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 637c3d8a931SKonstantin Belousov if (rl_e_is_marked(next)) { 638c3d8a931SKonstantin Belousov next = rl_e_unmark(next); 639c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 640c3d8a931SKonstantin Belousov #ifdef INVARIANTS 641c3d8a931SKonstantin Belousov cur->rl_q_owner = NULL; 642c3d8a931SKonstantin Belousov #endif 643c3d8a931SKonstantin Belousov cur->rl_q_free = *free; 644c3d8a931SKonstantin Belousov *free = cur; 645c3d8a931SKonstantin Belousov } 646c3d8a931SKonstantin Belousov cur = next; 647c3d8a931SKonstantin Belousov continue; 648c3d8a931SKonstantin Belousov } 649c3d8a931SKonstantin Belousov } 650c3d8a931SKonstantin Belousov 651*9467c1a6SKonstantin Belousov MPASS(!rl_e_is_marked(cur)); 652c3d8a931SKonstantin Belousov r = rl_e_compare(cur, e); 653c3d8a931SKonstantin Belousov if (r == -1) { 654c3d8a931SKonstantin Belousov prev = &cur->rl_q_next; 655c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 656c3d8a931SKonstantin Belousov } else if (r == 0) { 657c3d8a931SKonstantin Belousov sleepq_lock(&lock->sleepers); 658c3d8a931SKonstantin Belousov if (__predict_false(rl_e_is_marked(rl_q_load( 659c3d8a931SKonstantin Belousov &cur->rl_q_next)))) { 660c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 661c3d8a931SKonstantin Belousov continue; 662c3d8a931SKonstantin Belousov } 663e3680954SRick Macklem if (trylock) { 664c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 6655badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 666e3680954SRick Macklem } 667c3d8a931SKonstantin Belousov rl_insert_sleep(lock); 668c3d8a931SKonstantin Belousov /* e is still valid */ 669c3d8a931SKonstantin Belousov goto again; 670c3d8a931SKonstantin Belousov } else /* r == 1 */ { 671c3d8a931SKonstantin Belousov e->rl_q_next = cur; 672c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, e)) { 673c3d8a931SKonstantin Belousov atomic_thread_fence_acq(); 6745badbeeaSKonstantin Belousov return (rl_e_is_rlock(e) ? 6755badbeeaSKonstantin Belousov rl_r_validate(lock, e, trylock, free) : 6765badbeeaSKonstantin Belousov rl_w_validate(lock, e, trylock, free)); 677e3680954SRick Macklem } 678c3d8a931SKonstantin Belousov /* Reset rl_q_next in case we hit fast path. */ 679c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 680c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 681c3d8a931SKonstantin Belousov } 682c3d8a931SKonstantin Belousov } 683c3d8a931SKonstantin Belousov } 684c3d8a931SKonstantin Belousov 685c3d8a931SKonstantin Belousov static struct rl_q_entry * 6865badbeeaSKonstantin Belousov rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start, 6875badbeeaSKonstantin Belousov vm_ooffset_t end, int locktype) 688c3d8a931SKonstantin Belousov { 689a3f10d08SKonstantin Belousov struct rl_q_entry *e, *free; 6909ef425e5SKonstantin Belousov void *cookie; 6915badbeeaSKonstantin Belousov enum RL_INSERT_RES res; 692c3d8a931SKonstantin Belousov 6939ef425e5SKonstantin Belousov if (rangelock_cheat_lock(lock, locktype, trylock, &cookie)) 6949ef425e5SKonstantin Belousov return (cookie); 6955badbeeaSKonstantin Belousov for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) { 696c3d8a931SKonstantin Belousov free = NULL; 6975badbeeaSKonstantin Belousov e = rlqentry_alloc(start, end, locktype); 698c3d8a931SKonstantin Belousov smr_enter(rl_smr); 699c3d8a931SKonstantin Belousov res = rl_insert(lock, e, trylock, &free); 700c3d8a931SKonstantin Belousov smr_exit(rl_smr); 7015badbeeaSKonstantin Belousov if (res == RL_TRYLOCK_FAILED) { 7025badbeeaSKonstantin Belousov MPASS(trylock); 703c3d8a931SKonstantin Belousov e->rl_q_free = free; 704c3d8a931SKonstantin Belousov free = e; 705c3d8a931SKonstantin Belousov e = NULL; 706c3d8a931SKonstantin Belousov } 707a3f10d08SKonstantin Belousov rangelock_free_free(free); 708ff1ae3b3SKonstantin Belousov } 709c3d8a931SKonstantin Belousov return (e); 7108f0e9130SKonstantin Belousov } 7118f0e9130SKonstantin Belousov 7128f0e9130SKonstantin Belousov void * 713c3d8a931SKonstantin Belousov rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 7148f0e9130SKonstantin Belousov { 7155badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ)); 716e3680954SRick Macklem } 717e3680954SRick Macklem 718e3680954SRick Macklem void * 719c3d8a931SKonstantin Belousov rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 720e3680954SRick Macklem { 7215badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ)); 7228f0e9130SKonstantin Belousov } 7238f0e9130SKonstantin Belousov 7248f0e9130SKonstantin Belousov void * 725c3d8a931SKonstantin Belousov rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 7268f0e9130SKonstantin Belousov { 7279ef425e5SKonstantin Belousov return (rangelock_lock_int(lock, false, start, end, RL_LOCK_WRITE)); 728e3680954SRick Macklem } 729e3680954SRick Macklem 730e3680954SRick Macklem void * 731c3d8a931SKonstantin Belousov rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 732e3680954SRick Macklem { 7335badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); 7348f0e9130SKonstantin Belousov } 7353155f2f0SKyle Evans 7363155f2f0SKyle Evans #ifdef INVARIANT_SUPPORT 7373155f2f0SKyle Evans void 7383155f2f0SKyle Evans _rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 7393155f2f0SKyle Evans { 7403155f2f0SKyle Evans } 7413155f2f0SKyle Evans #endif /* INVARIANT_SUPPORT */ 742c3d8a931SKonstantin Belousov 743c3d8a931SKonstantin Belousov #include "opt_ddb.h" 744c3d8a931SKonstantin Belousov #ifdef DDB 745c3d8a931SKonstantin Belousov #include <ddb/ddb.h> 746c3d8a931SKonstantin Belousov 747c3d8a931SKonstantin Belousov DB_SHOW_COMMAND(rangelock, db_show_rangelock) 748c3d8a931SKonstantin Belousov { 749c3d8a931SKonstantin Belousov struct rangelock *lock; 750c3d8a931SKonstantin Belousov struct rl_q_entry *e, *x; 7519ef425e5SKonstantin Belousov uintptr_t v; 752c3d8a931SKonstantin Belousov 753c3d8a931SKonstantin Belousov if (!have_addr) { 754c3d8a931SKonstantin Belousov db_printf("show rangelock addr\n"); 755c3d8a931SKonstantin Belousov return; 756c3d8a931SKonstantin Belousov } 757c3d8a931SKonstantin Belousov 758c3d8a931SKonstantin Belousov lock = (struct rangelock *)addr; 759c3d8a931SKonstantin Belousov db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers); 7609ef425e5SKonstantin Belousov v = lock->head; 7619ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) != 0) { 7629ef425e5SKonstantin Belousov db_printf(" cheating head %#jx\n", (uintmax_t)v); 7639ef425e5SKonstantin Belousov return; 7649ef425e5SKonstantin Belousov } 7659ef425e5SKonstantin Belousov for (e = (struct rl_q_entry *)(lock->head);;) { 766c3d8a931SKonstantin Belousov x = rl_e_is_marked(e) ? rl_e_unmark(e) : e; 767c3d8a931SKonstantin Belousov if (x == NULL) 768c3d8a931SKonstantin Belousov break; 769c3d8a931SKonstantin Belousov db_printf(" entry %p marked %d %d start %#jx end %#jx " 770c3d8a931SKonstantin Belousov "flags %x next %p", 771c3d8a931SKonstantin Belousov e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next), 772c3d8a931SKonstantin Belousov x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next); 773c3d8a931SKonstantin Belousov #ifdef INVARIANTS 774c3d8a931SKonstantin Belousov db_printf(" owner %p (%d)", x->rl_q_owner, 775c3d8a931SKonstantin Belousov x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1); 776c3d8a931SKonstantin Belousov #endif 777c3d8a931SKonstantin Belousov db_printf("\n"); 778c3d8a931SKonstantin Belousov e = x->rl_q_next; 779c3d8a931SKonstantin Belousov } 780c3d8a931SKonstantin Belousov } 781c3d8a931SKonstantin Belousov 782c3d8a931SKonstantin Belousov #endif /* DDB */ 783