18f0e9130SKonstantin Belousov /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 38a36da99SPedro F. Giffuni * 48f0e9130SKonstantin Belousov * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 5d8a16b6aSKonstantin Belousov * Copyright (c) 2023 The FreeBSD Foundation 6d8a16b6aSKonstantin Belousov * 7d8a16b6aSKonstantin Belousov * Portions of this software were developed by 8d8a16b6aSKonstantin Belousov * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from 9d8a16b6aSKonstantin Belousov * the FreeBSD Foundation. 108f0e9130SKonstantin Belousov * 118f0e9130SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 128f0e9130SKonstantin Belousov * modification, are permitted provided that the following conditions 138f0e9130SKonstantin Belousov * are met: 148f0e9130SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright 158f0e9130SKonstantin Belousov * notice unmodified, this list of conditions, and the following 168f0e9130SKonstantin Belousov * disclaimer. 178f0e9130SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright 188f0e9130SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 198f0e9130SKonstantin Belousov * documentation and/or other materials provided with the distribution. 208f0e9130SKonstantin Belousov * 218f0e9130SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 228f0e9130SKonstantin Belousov * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 238f0e9130SKonstantin Belousov * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 248f0e9130SKonstantin Belousov * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 258f0e9130SKonstantin Belousov * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 268f0e9130SKonstantin Belousov * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 278f0e9130SKonstantin Belousov * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 288f0e9130SKonstantin Belousov * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 298f0e9130SKonstantin Belousov * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 308f0e9130SKonstantin Belousov * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 318f0e9130SKonstantin Belousov */ 328f0e9130SKonstantin Belousov 338f0e9130SKonstantin Belousov #include <sys/param.h> 34c3d8a931SKonstantin Belousov #include <sys/kassert.h> 358f0e9130SKonstantin Belousov #include <sys/kernel.h> 368f0e9130SKonstantin Belousov #include <sys/lock.h> 378f0e9130SKonstantin Belousov #include <sys/mutex.h> 388f0e9130SKonstantin Belousov #include <sys/proc.h> 398f0e9130SKonstantin Belousov #include <sys/rangelock.h> 40c3d8a931SKonstantin Belousov #include <sys/sleepqueue.h> 41c3d8a931SKonstantin Belousov #include <sys/smr.h> 429ef425e5SKonstantin Belousov #include <sys/sysctl.h> 438f0e9130SKonstantin Belousov 448f0e9130SKonstantin Belousov #include <vm/uma.h> 458f0e9130SKonstantin Belousov 46c3d8a931SKonstantin Belousov /* 479ef425e5SKonstantin Belousov * Immediately after initialization (subject to 'rangelock_cheat' 489ef425e5SKonstantin Belousov * below), and until a request comes that conflicts with granted ones 499ef425e5SKonstantin Belousov * based on type, rangelocks serve requests in the "cheating" mode. 509ef425e5SKonstantin Belousov * In this mode, a rangelock behaves like a sxlock, as if each request 519ef425e5SKonstantin Belousov * covered the whole range of the protected object. On receiving a 529ef425e5SKonstantin Belousov * conflicting request (any request while a write request is 539ef425e5SKonstantin Belousov * effective, or any write request while some read ones are 549ef425e5SKonstantin Belousov * effective), all requests granted in "cheating" mode are drained, 559ef425e5SKonstantin Belousov * and the rangelock then switches to effectively keeping track of the 569ef425e5SKonstantin Belousov * precise range of each new request. 579ef425e5SKonstantin Belousov * 589ef425e5SKonstantin Belousov * Normal sx implementation is not used to not bloat structures (most 599ef425e5SKonstantin Belousov * important, vnodes) which embeds rangelocks. 609ef425e5SKonstantin Belousov * 619ef425e5SKonstantin Belousov * The cheating greatly helps very common pattern where file is first 629ef425e5SKonstantin Belousov * written single-threaded, and then read by many processes. 639ef425e5SKonstantin Belousov * 649ef425e5SKonstantin Belousov * Lock is in cheat mode when RL_CHEAT_CHEATING bit is set in the 659ef425e5SKonstantin Belousov * lock->head. Special cookies are returned in this mode, and 669ef425e5SKonstantin Belousov * trylocks are same as normal locks but do not drain. 679ef425e5SKonstantin Belousov */ 689ef425e5SKonstantin Belousov 699ef425e5SKonstantin Belousov static int rangelock_cheat = 1; 709ef425e5SKonstantin Belousov SYSCTL_INT(_debug, OID_AUTO, rangelock_cheat, CTLFLAG_RWTUN, 719ef425e5SKonstantin Belousov &rangelock_cheat, 0, 729ef425e5SKonstantin Belousov ""); 739ef425e5SKonstantin Belousov 749ef425e5SKonstantin Belousov #define RL_CHEAT_MASK 0x7 759ef425e5SKonstantin Belousov #define RL_CHEAT_CHEATING 0x1 769ef425e5SKonstantin Belousov /* #define RL_CHEAT_RLOCKED 0x0 */ 779ef425e5SKonstantin Belousov #define RL_CHEAT_WLOCKED 0x2 789ef425e5SKonstantin Belousov #define RL_CHEAT_DRAINING 0x4 799ef425e5SKonstantin Belousov 809ef425e5SKonstantin Belousov #define RL_CHEAT_READER 0x8 819ef425e5SKonstantin Belousov 829ef425e5SKonstantin Belousov #define RL_RET_CHEAT_RLOCKED 0x1100 839ef425e5SKonstantin Belousov #define RL_RET_CHEAT_WLOCKED 0x2200 849ef425e5SKonstantin Belousov 859ef425e5SKonstantin Belousov static bool 869ef425e5SKonstantin Belousov rangelock_cheat_lock(struct rangelock *lock, int locktype, bool trylock, 879ef425e5SKonstantin Belousov void **cookie) 889ef425e5SKonstantin Belousov { 899ef425e5SKonstantin Belousov uintptr_t v, x; 909ef425e5SKonstantin Belousov 919ef425e5SKonstantin Belousov v = (uintptr_t)atomic_load_ptr(&lock->head); 929ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 939ef425e5SKonstantin Belousov return (false); 949ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) { 959ef425e5SKonstantin Belousov drain: 969ef425e5SKonstantin Belousov if (trylock) { 979ef425e5SKonstantin Belousov *cookie = NULL; 989ef425e5SKonstantin Belousov return (true); 999ef425e5SKonstantin Belousov } 1009ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1019ef425e5SKonstantin Belousov drain1: 1029ef425e5SKonstantin Belousov DROP_GIANT(); 1039ef425e5SKonstantin Belousov for (;;) { 1049ef425e5SKonstantin Belousov v = (uintptr_t)atomic_load_ptr(&lock->head); 1059ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) == 0) 1069ef425e5SKonstantin Belousov break; 1079ef425e5SKonstantin Belousov sleepq_add(&lock->head, NULL, "ranged1", 0, 0); 1089ef425e5SKonstantin Belousov sleepq_wait(&lock->head, PRI_USER); 1099ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1109ef425e5SKonstantin Belousov } 1119ef425e5SKonstantin Belousov sleepq_release(&lock->head); 1129ef425e5SKonstantin Belousov PICKUP_GIANT(); 1139ef425e5SKonstantin Belousov return (false); 1149ef425e5SKonstantin Belousov } 1159ef425e5SKonstantin Belousov 1169ef425e5SKonstantin Belousov switch (locktype) { 1179ef425e5SKonstantin Belousov case RL_LOCK_READ: 1189ef425e5SKonstantin Belousov for (;;) { 1199ef425e5SKonstantin Belousov if ((v & RL_CHEAT_WLOCKED) != 0) { 1209ef425e5SKonstantin Belousov if (trylock) { 1219ef425e5SKonstantin Belousov *cookie = NULL; 1229ef425e5SKonstantin Belousov return (true); 1239ef425e5SKonstantin Belousov } 1249ef425e5SKonstantin Belousov x = v | RL_CHEAT_DRAINING; 1259ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1269ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v, 1279ef425e5SKonstantin Belousov x) != 0) 1289ef425e5SKonstantin Belousov goto drain1; 1299ef425e5SKonstantin Belousov sleepq_release(&lock->head); 1309ef425e5SKonstantin Belousov /* Possibly forgive passed conflict */ 13157cc80e6SKonstantin Belousov } else { 1329ef425e5SKonstantin Belousov x = (v & ~RL_CHEAT_MASK) + RL_CHEAT_READER; 1339ef425e5SKonstantin Belousov x |= RL_CHEAT_CHEATING; 13457cc80e6SKonstantin Belousov if (atomic_fcmpset_acq_ptr(&lock->head, &v, 13557cc80e6SKonstantin Belousov x) != 0) 1369ef425e5SKonstantin Belousov break; 13757cc80e6SKonstantin Belousov } 1389ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1399ef425e5SKonstantin Belousov return (false); 1409ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) 1419ef425e5SKonstantin Belousov goto drain; 1429ef425e5SKonstantin Belousov } 1439ef425e5SKonstantin Belousov *(uintptr_t *)cookie = RL_RET_CHEAT_RLOCKED; 1449ef425e5SKonstantin Belousov break; 1459ef425e5SKonstantin Belousov case RL_LOCK_WRITE: 1469ef425e5SKonstantin Belousov for (;;) { 1479ef425e5SKonstantin Belousov if ((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER || 1489ef425e5SKonstantin Belousov (v & RL_CHEAT_WLOCKED) != 0) { 1499ef425e5SKonstantin Belousov if (trylock) { 1509ef425e5SKonstantin Belousov *cookie = NULL; 1519ef425e5SKonstantin Belousov return (true); 1529ef425e5SKonstantin Belousov } 1539ef425e5SKonstantin Belousov x = v | RL_CHEAT_DRAINING; 1549ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1559ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v, 1569ef425e5SKonstantin Belousov x) != 0) 1579ef425e5SKonstantin Belousov goto drain1; 1589ef425e5SKonstantin Belousov sleepq_release(&lock->head); 1599ef425e5SKonstantin Belousov /* Possibly forgive passed conflict */ 16057cc80e6SKonstantin Belousov } else { 1619ef425e5SKonstantin Belousov x = RL_CHEAT_WLOCKED | RL_CHEAT_CHEATING; 16257cc80e6SKonstantin Belousov if (atomic_fcmpset_acq_ptr(&lock->head, &v, 16357cc80e6SKonstantin Belousov x) != 0) 1649ef425e5SKonstantin Belousov break; 16557cc80e6SKonstantin Belousov } 1669ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1679ef425e5SKonstantin Belousov return (false); 1689ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) 1699ef425e5SKonstantin Belousov goto drain; 1709ef425e5SKonstantin Belousov } 1719ef425e5SKonstantin Belousov *(uintptr_t *)cookie = RL_RET_CHEAT_WLOCKED; 1729ef425e5SKonstantin Belousov break; 1739ef425e5SKonstantin Belousov default: 1749ef425e5SKonstantin Belousov __assert_unreachable(); 1759ef425e5SKonstantin Belousov break; 1769ef425e5SKonstantin Belousov } 1779ef425e5SKonstantin Belousov return (true); 1789ef425e5SKonstantin Belousov } 1799ef425e5SKonstantin Belousov 1809ef425e5SKonstantin Belousov static bool 1819ef425e5SKonstantin Belousov rangelock_cheat_unlock(struct rangelock *lock, void *cookie) 1829ef425e5SKonstantin Belousov { 1839ef425e5SKonstantin Belousov uintptr_t v, x; 1849ef425e5SKonstantin Belousov 1859ef425e5SKonstantin Belousov v = (uintptr_t)atomic_load_ptr(&lock->head); 1869ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1879ef425e5SKonstantin Belousov return (false); 1889ef425e5SKonstantin Belousov 1899ef425e5SKonstantin Belousov MPASS((uintptr_t)cookie == RL_RET_CHEAT_WLOCKED || 1909ef425e5SKonstantin Belousov (uintptr_t)cookie == RL_RET_CHEAT_RLOCKED); 1919ef425e5SKonstantin Belousov 1929ef425e5SKonstantin Belousov switch ((uintptr_t)cookie) { 1939ef425e5SKonstantin Belousov case RL_RET_CHEAT_RLOCKED: 1949ef425e5SKonstantin Belousov for (;;) { 1959ef425e5SKonstantin Belousov MPASS((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER); 1969ef425e5SKonstantin Belousov MPASS((v & RL_CHEAT_WLOCKED) == 0); 1979ef425e5SKonstantin Belousov x = (v & ~RL_CHEAT_MASK) - RL_CHEAT_READER; 1989ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) { 1999ef425e5SKonstantin Belousov if (x != 0) { 2009ef425e5SKonstantin Belousov x |= RL_CHEAT_DRAINING | 2019ef425e5SKonstantin Belousov RL_CHEAT_CHEATING; 2029ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, 2039ef425e5SKonstantin Belousov &v, x) != 0) 2049ef425e5SKonstantin Belousov break; 2059ef425e5SKonstantin Belousov } else { 2069ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 2079ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, 2089ef425e5SKonstantin Belousov &v, x) != 0) { 2099ef425e5SKonstantin Belousov sleepq_broadcast( 2109ef425e5SKonstantin Belousov &lock->head, 2119ef425e5SKonstantin Belousov SLEEPQ_SLEEP, 0, 0); 2129ef425e5SKonstantin Belousov sleepq_release(&lock->head); 2139ef425e5SKonstantin Belousov break; 2149ef425e5SKonstantin Belousov } 2159ef425e5SKonstantin Belousov sleepq_release(&lock->head); 2169ef425e5SKonstantin Belousov } 2179ef425e5SKonstantin Belousov } else { 2189ef425e5SKonstantin Belousov x |= RL_CHEAT_CHEATING; 2199ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v, 2209ef425e5SKonstantin Belousov x) != 0) 2219ef425e5SKonstantin Belousov break; 2229ef425e5SKonstantin Belousov } 2239ef425e5SKonstantin Belousov } 2249ef425e5SKonstantin Belousov break; 2259ef425e5SKonstantin Belousov case RL_RET_CHEAT_WLOCKED: 2269ef425e5SKonstantin Belousov for (;;) { 2279ef425e5SKonstantin Belousov MPASS((v & RL_CHEAT_WLOCKED) != 0); 2289ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) { 2299ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 2309ef425e5SKonstantin Belousov atomic_store_ptr(&lock->head, 0); 2319ef425e5SKonstantin Belousov sleepq_broadcast(&lock->head, 2329ef425e5SKonstantin Belousov SLEEPQ_SLEEP, 0, 0); 2339ef425e5SKonstantin Belousov sleepq_release(&lock->head); 2349ef425e5SKonstantin Belousov break; 2359ef425e5SKonstantin Belousov } else { 2369ef425e5SKonstantin Belousov if (atomic_fcmpset_ptr(&lock->head, &v, 2379ef425e5SKonstantin Belousov RL_CHEAT_CHEATING) != 0) 2389ef425e5SKonstantin Belousov break; 2399ef425e5SKonstantin Belousov } 2409ef425e5SKonstantin Belousov } 2419ef425e5SKonstantin Belousov break; 2429ef425e5SKonstantin Belousov default: 2439ef425e5SKonstantin Belousov __assert_unreachable(); 2449ef425e5SKonstantin Belousov break; 2459ef425e5SKonstantin Belousov } 2469ef425e5SKonstantin Belousov return (true); 2479ef425e5SKonstantin Belousov } 2489ef425e5SKonstantin Belousov 2499ef425e5SKonstantin Belousov static bool 2509ef425e5SKonstantin Belousov rangelock_cheat_destroy(struct rangelock *lock) 2519ef425e5SKonstantin Belousov { 2529ef425e5SKonstantin Belousov uintptr_t v; 2539ef425e5SKonstantin Belousov 2549ef425e5SKonstantin Belousov v = (uintptr_t)atomic_load_ptr(&lock->head); 2559ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 2569ef425e5SKonstantin Belousov return (false); 2579ef425e5SKonstantin Belousov MPASS(v == RL_CHEAT_CHEATING); 2589ef425e5SKonstantin Belousov return (true); 2599ef425e5SKonstantin Belousov } 2609ef425e5SKonstantin Belousov 2619ef425e5SKonstantin Belousov /* 262c3d8a931SKonstantin Belousov * Implementation of range locks based on the paper 263c3d8a931SKonstantin Belousov * https://doi.org/10.1145/3342195.3387533 264c3d8a931SKonstantin Belousov * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020 265c3d8a931SKonstantin Belousov * Scalable Range Locks for Scalable Address Spaces and Beyond 266c3d8a931SKonstantin Belousov * by Alex Kogan, Dave Dice, and Shady Issa 267c3d8a931SKonstantin Belousov */ 268c3d8a931SKonstantin Belousov 269c3d8a931SKonstantin Belousov static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e); 270c3d8a931SKonstantin Belousov 271c3d8a931SKonstantin Belousov /* 272c3d8a931SKonstantin Belousov * rl_q_next links all granted ranges in the lock. We cannot free an 273c3d8a931SKonstantin Belousov * rl_q_entry while in the smr section, and cannot reuse rl_q_next 274c3d8a931SKonstantin Belousov * linkage since other threads might follow it even after CAS removed 275c3d8a931SKonstantin Belousov * the range. Use rl_q_free for local list of ranges to remove after 276c3d8a931SKonstantin Belousov * the smr section is dropped. 277c3d8a931SKonstantin Belousov */ 2788f0e9130SKonstantin Belousov struct rl_q_entry { 279c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_next; 280c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_free; 2818f0e9130SKonstantin Belousov off_t rl_q_start, rl_q_end; 2828f0e9130SKonstantin Belousov int rl_q_flags; 283c3d8a931SKonstantin Belousov #ifdef INVARIANTS 284c3d8a931SKonstantin Belousov struct thread *rl_q_owner; 285c3d8a931SKonstantin Belousov #endif 2868f0e9130SKonstantin Belousov }; 2878f0e9130SKonstantin Belousov 2888f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone; 289c3d8a931SKonstantin Belousov static smr_t rl_smr; 2908f0e9130SKonstantin Belousov 291a3f10d08SKonstantin Belousov static void rangelock_free_free(struct rl_q_entry *free); 2928a5b2db3SKonstantin Belousov static void rangelock_noncheating_destroy(struct rangelock *lock); 293a3f10d08SKonstantin Belousov 2948f0e9130SKonstantin Belousov static void 2958f0e9130SKonstantin Belousov rangelock_sys_init(void) 2968f0e9130SKonstantin Belousov { 2978f0e9130SKonstantin Belousov rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 298c3d8a931SKonstantin Belousov NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry), 299c3d8a931SKonstantin Belousov UMA_ZONE_SMR); 300c3d8a931SKonstantin Belousov rl_smr = uma_zone_get_smr(rl_entry_zone); 3018f0e9130SKonstantin Belousov } 302c3d8a931SKonstantin Belousov SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 3038f0e9130SKonstantin Belousov 3048f0e9130SKonstantin Belousov static struct rl_q_entry * 305c3d8a931SKonstantin Belousov rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags) 3068f0e9130SKonstantin Belousov { 307c3d8a931SKonstantin Belousov struct rl_q_entry *e; 308ff1ae3b3SKonstantin Belousov struct thread *td; 3098f0e9130SKonstantin Belousov 310ff1ae3b3SKonstantin Belousov td = curthread; 311ff1ae3b3SKonstantin Belousov if (td->td_rlqe != NULL) { 312ff1ae3b3SKonstantin Belousov e = td->td_rlqe; 313ff1ae3b3SKonstantin Belousov td->td_rlqe = NULL; 314ff1ae3b3SKonstantin Belousov } else { 315c3d8a931SKonstantin Belousov e = uma_zalloc_smr(rl_entry_zone, M_WAITOK); 316ff1ae3b3SKonstantin Belousov } 317c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 318c3d8a931SKonstantin Belousov e->rl_q_free = NULL; 319c3d8a931SKonstantin Belousov e->rl_q_start = start; 320c3d8a931SKonstantin Belousov e->rl_q_end = end; 321c3d8a931SKonstantin Belousov e->rl_q_flags = flags; 322c3d8a931SKonstantin Belousov #ifdef INVARIANTS 323c3d8a931SKonstantin Belousov e->rl_q_owner = curthread; 324c3d8a931SKonstantin Belousov #endif 325c3d8a931SKonstantin Belousov return (e); 3268f0e9130SKonstantin Belousov } 3278f0e9130SKonstantin Belousov 3288f0e9130SKonstantin Belousov void 329ff1ae3b3SKonstantin Belousov rangelock_entry_free(struct rl_q_entry *e) 330ff1ae3b3SKonstantin Belousov { 331ff1ae3b3SKonstantin Belousov uma_zfree_smr(rl_entry_zone, e); 332ff1ae3b3SKonstantin Belousov } 333ff1ae3b3SKonstantin Belousov 334ff1ae3b3SKonstantin Belousov void 3358f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock) 3368f0e9130SKonstantin Belousov { 337c3d8a931SKonstantin Belousov lock->sleepers = false; 3389ef425e5SKonstantin Belousov atomic_store_ptr(&lock->head, rangelock_cheat ? RL_CHEAT_CHEATING : 0); 3398f0e9130SKonstantin Belousov } 3408f0e9130SKonstantin Belousov 3418f0e9130SKonstantin Belousov void 3428f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock) 3438f0e9130SKonstantin Belousov { 344c3d8a931SKonstantin Belousov MPASS(!lock->sleepers); 3458a5b2db3SKonstantin Belousov if (!rangelock_cheat_destroy(lock)) 3468a5b2db3SKonstantin Belousov rangelock_noncheating_destroy(lock); 347e228961dSKonstantin Belousov DEBUG_POISON_POINTER(*(void **)&lock->head); 3488f0e9130SKonstantin Belousov } 3498f0e9130SKonstantin Belousov 350c3d8a931SKonstantin Belousov static bool 351c3d8a931SKonstantin Belousov rl_e_is_marked(const struct rl_q_entry *e) 3528f0e9130SKonstantin Belousov { 353c3d8a931SKonstantin Belousov return (((uintptr_t)e & 1) != 0); 3548f0e9130SKonstantin Belousov } 3558f0e9130SKonstantin Belousov 356c3d8a931SKonstantin Belousov static struct rl_q_entry * 3575badbeeaSKonstantin Belousov rl_e_unmark_unchecked(const struct rl_q_entry *e) 3585badbeeaSKonstantin Belousov { 3595badbeeaSKonstantin Belousov return ((struct rl_q_entry *)((uintptr_t)e & ~1)); 3605badbeeaSKonstantin Belousov } 3615badbeeaSKonstantin Belousov 3625badbeeaSKonstantin Belousov static struct rl_q_entry * 363c3d8a931SKonstantin Belousov rl_e_unmark(const struct rl_q_entry *e) 3648f0e9130SKonstantin Belousov { 365c3d8a931SKonstantin Belousov MPASS(rl_e_is_marked(e)); 3665badbeeaSKonstantin Belousov return (rl_e_unmark_unchecked(e)); 3675badbeeaSKonstantin Belousov } 3685badbeeaSKonstantin Belousov 3695badbeeaSKonstantin Belousov static void 3705badbeeaSKonstantin Belousov rl_e_mark(struct rl_q_entry *e) 3715badbeeaSKonstantin Belousov { 3725badbeeaSKonstantin Belousov #if defined(INVARIANTS) && defined(__LP64__) 3735badbeeaSKonstantin Belousov int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0); 3745badbeeaSKonstantin Belousov MPASS(r == 0); 3755badbeeaSKonstantin Belousov #else 3765badbeeaSKonstantin Belousov atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1); 3775badbeeaSKonstantin Belousov #endif 3782bb93f2dSColin Percival } 3792bb93f2dSColin Percival 380c3d8a931SKonstantin Belousov static struct rl_q_entry * 381c3d8a931SKonstantin Belousov rl_q_load(struct rl_q_entry **p) 3828f0e9130SKonstantin Belousov { 383c3d8a931SKonstantin Belousov return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p)); 3848f0e9130SKonstantin Belousov } 3858f0e9130SKonstantin Belousov 3866c32d89eSKonstantin Belousov static bool 3876c32d89eSKonstantin Belousov rl_e_is_rlock(const struct rl_q_entry *e) 3886c32d89eSKonstantin Belousov { 3896c32d89eSKonstantin Belousov return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ); 3906c32d89eSKonstantin Belousov } 3916c32d89eSKonstantin Belousov 3925badbeeaSKonstantin Belousov static void 393a3f10d08SKonstantin Belousov rangelock_free_free(struct rl_q_entry *free) 394a3f10d08SKonstantin Belousov { 395a3f10d08SKonstantin Belousov struct rl_q_entry *x, *xp; 396a3f10d08SKonstantin Belousov struct thread *td; 397a3f10d08SKonstantin Belousov 398a3f10d08SKonstantin Belousov td = curthread; 399a3f10d08SKonstantin Belousov for (x = free; x != NULL; x = xp) { 400a3f10d08SKonstantin Belousov MPASS(!rl_e_is_marked(x)); 401a3f10d08SKonstantin Belousov xp = x->rl_q_free; 402a3f10d08SKonstantin Belousov MPASS(!rl_e_is_marked(xp)); 403a3f10d08SKonstantin Belousov if (td->td_rlqe == NULL) { 404a3f10d08SKonstantin Belousov smr_synchronize(rl_smr); 405a3f10d08SKonstantin Belousov td->td_rlqe = x; 406a3f10d08SKonstantin Belousov } else { 407a3f10d08SKonstantin Belousov uma_zfree_smr(rl_entry_zone, x); 408a3f10d08SKonstantin Belousov } 409a3f10d08SKonstantin Belousov } 410a3f10d08SKonstantin Belousov } 411a3f10d08SKonstantin Belousov 412a3f10d08SKonstantin Belousov static void 4135badbeeaSKonstantin Belousov rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e) 4148f0e9130SKonstantin Belousov { 415c3158008SKonstantin Belousov bool sleepers; 416c3158008SKonstantin Belousov 417c3d8a931SKonstantin Belousov MPASS(lock != NULL && e != NULL); 418c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next))); 419c3d8a931SKonstantin Belousov MPASS(e->rl_q_owner == curthread); 4208f0e9130SKonstantin Belousov 4215badbeeaSKonstantin Belousov rl_e_mark(e); 422c3158008SKonstantin Belousov sleepers = lock->sleepers; 423c3d8a931SKonstantin Belousov lock->sleepers = false; 424c3158008SKonstantin Belousov if (sleepers) 425c3d8a931SKonstantin Belousov sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0); 4265badbeeaSKonstantin Belousov } 4275badbeeaSKonstantin Belousov 4285badbeeaSKonstantin Belousov void 4295badbeeaSKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie) 4305badbeeaSKonstantin Belousov { 4319ef425e5SKonstantin Belousov if (rangelock_cheat_unlock(lock, cookie)) 4329ef425e5SKonstantin Belousov return; 4339ef425e5SKonstantin Belousov 4345badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 4355badbeeaSKonstantin Belousov rangelock_unlock_int(lock, cookie); 436c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 4378f0e9130SKonstantin Belousov } 4388f0e9130SKonstantin Belousov 4398f0e9130SKonstantin Belousov /* 4405badbeeaSKonstantin Belousov * result: -1 if e1 before e2, or both locks are readers and e1 4415badbeeaSKonstantin Belousov * starts before or at e2 4425badbeeaSKonstantin Belousov * 1 if e1 after e2, or both locks are readers and e1 4435badbeeaSKonstantin Belousov * starts after e2 4445badbeeaSKonstantin Belousov * 0 if e1 and e2 overlap and at least one lock is writer 4458f0e9130SKonstantin Belousov */ 446c3d8a931SKonstantin Belousov static int 447c3d8a931SKonstantin Belousov rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2) 4488f0e9130SKonstantin Belousov { 4495badbeeaSKonstantin Belousov bool rds; 4505badbeeaSKonstantin Belousov 451c3d8a931SKonstantin Belousov if (e1 == NULL) 452c3d8a931SKonstantin Belousov return (1); 453c3d8a931SKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_end) 454c3d8a931SKonstantin Belousov return (-1); 4555badbeeaSKonstantin Belousov rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2); 4565badbeeaSKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_start && rds) 4575badbeeaSKonstantin Belousov return (-1); 4585badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_end) 4595badbeeaSKonstantin Belousov return (1); 4605badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_start && rds) 4615badbeeaSKonstantin Belousov return (1); 462c3d8a931SKonstantin Belousov return (0); 4638f0e9130SKonstantin Belousov } 4648f0e9130SKonstantin Belousov 465c3d8a931SKonstantin Belousov static void 466c3d8a931SKonstantin Belousov rl_insert_sleep(struct rangelock *lock) 4678f0e9130SKonstantin Belousov { 468c3d8a931SKonstantin Belousov smr_exit(rl_smr); 469c3d8a931SKonstantin Belousov DROP_GIANT(); 470c3d8a931SKonstantin Belousov lock->sleepers = true; 471c3d8a931SKonstantin Belousov sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0); 472c3d8a931SKonstantin Belousov sleepq_wait(&lock->sleepers, PRI_USER); 473c3d8a931SKonstantin Belousov PICKUP_GIANT(); 474c3d8a931SKonstantin Belousov smr_enter(rl_smr); 475c3d8a931SKonstantin Belousov } 4768f0e9130SKonstantin Belousov 477c3d8a931SKonstantin Belousov static bool 478c3d8a931SKonstantin Belousov rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old, 479c3d8a931SKonstantin Belousov struct rl_q_entry *new) 480c3d8a931SKonstantin Belousov { 4819467c1a6SKonstantin Belousov MPASS(!rl_e_is_marked(old)); 482c3d8a931SKonstantin Belousov return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old, 483c3d8a931SKonstantin Belousov (uintptr_t)new) != 0); 484c3d8a931SKonstantin Belousov } 4858f0e9130SKonstantin Belousov 4868a5b2db3SKonstantin Belousov static void 4878a5b2db3SKonstantin Belousov rangelock_noncheating_destroy(struct rangelock *lock) 4888a5b2db3SKonstantin Belousov { 4898a5b2db3SKonstantin Belousov struct rl_q_entry *cur, *free, *next, **prev; 4908a5b2db3SKonstantin Belousov 4918a5b2db3SKonstantin Belousov free = NULL; 4928a5b2db3SKonstantin Belousov again: 4938a5b2db3SKonstantin Belousov smr_enter(rl_smr); 4948a5b2db3SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head; 4958a5b2db3SKonstantin Belousov cur = rl_q_load(prev); 4968a5b2db3SKonstantin Belousov MPASS(!rl_e_is_marked(cur)); 4978a5b2db3SKonstantin Belousov 4988a5b2db3SKonstantin Belousov for (;;) { 4998a5b2db3SKonstantin Belousov if (cur == NULL) 5008a5b2db3SKonstantin Belousov break; 5018a5b2db3SKonstantin Belousov if (rl_e_is_marked(cur)) 5028a5b2db3SKonstantin Belousov goto again; 5038a5b2db3SKonstantin Belousov 5048a5b2db3SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 5058a5b2db3SKonstantin Belousov if (rl_e_is_marked(next)) { 5068a5b2db3SKonstantin Belousov next = rl_e_unmark(next); 5078a5b2db3SKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 5088a5b2db3SKonstantin Belousov #ifdef INVARIANTS 5098a5b2db3SKonstantin Belousov cur->rl_q_owner = NULL; 5108a5b2db3SKonstantin Belousov #endif 5118a5b2db3SKonstantin Belousov cur->rl_q_free = free; 5128a5b2db3SKonstantin Belousov free = cur; 5138a5b2db3SKonstantin Belousov cur = next; 5148a5b2db3SKonstantin Belousov continue; 5158a5b2db3SKonstantin Belousov } 5168a5b2db3SKonstantin Belousov smr_exit(rl_smr); 5178a5b2db3SKonstantin Belousov goto again; 5188a5b2db3SKonstantin Belousov } 5198a5b2db3SKonstantin Belousov 5208a5b2db3SKonstantin Belousov sleepq_lock(&lock->sleepers); 5218a5b2db3SKonstantin Belousov if (!rl_e_is_marked(cur)) { 5228a5b2db3SKonstantin Belousov rl_insert_sleep(lock); 5238a5b2db3SKonstantin Belousov goto again; 5248a5b2db3SKonstantin Belousov } 5258a5b2db3SKonstantin Belousov } 5268a5b2db3SKonstantin Belousov smr_exit(rl_smr); 5278a5b2db3SKonstantin Belousov rangelock_free_free(free); 5288a5b2db3SKonstantin Belousov } 5298a5b2db3SKonstantin Belousov 5305badbeeaSKonstantin Belousov enum RL_INSERT_RES { 5315badbeeaSKonstantin Belousov RL_TRYLOCK_FAILED, 5325badbeeaSKonstantin Belousov RL_LOCK_SUCCESS, 5335badbeeaSKonstantin Belousov RL_LOCK_RETRY, 5345badbeeaSKonstantin Belousov }; 5355badbeeaSKonstantin Belousov 5365badbeeaSKonstantin Belousov static enum RL_INSERT_RES 5375badbeeaSKonstantin Belousov rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 5385badbeeaSKonstantin Belousov struct rl_q_entry **free) 5395badbeeaSKonstantin Belousov { 5405badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 5415badbeeaSKonstantin Belousov 542a725d618SKonstantin Belousov again: 5435badbeeaSKonstantin Belousov prev = &e->rl_q_next; 5445badbeeaSKonstantin Belousov cur = rl_q_load(prev); 5455badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */ 5465badbeeaSKonstantin Belousov for (;;) { 5475badbeeaSKonstantin Belousov if (cur == NULL || cur->rl_q_start > e->rl_q_end) 5485badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 5495badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 5505badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 5515badbeeaSKonstantin Belousov next = rl_e_unmark(next); 5525badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 5535badbeeaSKonstantin Belousov cur->rl_q_free = *free; 5545badbeeaSKonstantin Belousov *free = cur; 5555badbeeaSKonstantin Belousov cur = next; 5565badbeeaSKonstantin Belousov continue; 5575badbeeaSKonstantin Belousov } 558a725d618SKonstantin Belousov goto again; 559a725d618SKonstantin Belousov } 5605badbeeaSKonstantin Belousov if (rl_e_is_rlock(cur)) { 5615badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 5625badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 5635badbeeaSKonstantin Belousov continue; 5645badbeeaSKonstantin Belousov } 5655badbeeaSKonstantin Belousov if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 5665badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 5675badbeeaSKonstantin Belousov if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 5685badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 5695badbeeaSKonstantin Belousov continue; 5705badbeeaSKonstantin Belousov } 5715badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 5725badbeeaSKonstantin Belousov if (trylock) { 5735badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 5745badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 5755badbeeaSKonstantin Belousov } 5765badbeeaSKonstantin Belousov rl_insert_sleep(lock); 5775badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 5785badbeeaSKonstantin Belousov } 5795badbeeaSKonstantin Belousov } 5805badbeeaSKonstantin Belousov } 5815badbeeaSKonstantin Belousov 5825badbeeaSKonstantin Belousov static enum RL_INSERT_RES 5835badbeeaSKonstantin Belousov rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, 5845badbeeaSKonstantin Belousov bool trylock, struct rl_q_entry **free) 5855badbeeaSKonstantin Belousov { 5865badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 5875badbeeaSKonstantin Belousov 588a725d618SKonstantin Belousov again: 5899ef425e5SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head; 5905badbeeaSKonstantin Belousov cur = rl_q_load(prev); 5915badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* head is not marked */ 5925badbeeaSKonstantin Belousov for (;;) { 5935badbeeaSKonstantin Belousov if (cur == e) 5945badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 5955badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 5965badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 5975badbeeaSKonstantin Belousov next = rl_e_unmark(next); 5985badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 599*40bffb7dSKonstantin Belousov cur->rl_q_free = *free; 6005badbeeaSKonstantin Belousov *free = cur; 6015badbeeaSKonstantin Belousov cur = next; 6025badbeeaSKonstantin Belousov continue; 6035badbeeaSKonstantin Belousov } 604a725d618SKonstantin Belousov goto again; 605a725d618SKonstantin Belousov } 6065badbeeaSKonstantin Belousov if (cur->rl_q_end <= e->rl_q_start) { 6075badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 6085badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 6095badbeeaSKonstantin Belousov continue; 6105badbeeaSKonstantin Belousov } 6115badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 612c4d8b246SKonstantin Belousov /* Reload after sleepq is locked */ 613c4d8b246SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 614c4d8b246SKonstantin Belousov if (rl_e_is_marked(next)) { 615c4d8b246SKonstantin Belousov sleepq_release(&lock->sleepers); 616c4d8b246SKonstantin Belousov goto again; 617c4d8b246SKonstantin Belousov } 6185badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 6195badbeeaSKonstantin Belousov if (trylock) { 6205badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 6215badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 6225badbeeaSKonstantin Belousov } 6235badbeeaSKonstantin Belousov rl_insert_sleep(lock); 6245badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 6255badbeeaSKonstantin Belousov } 6265badbeeaSKonstantin Belousov } 6275badbeeaSKonstantin Belousov 6285badbeeaSKonstantin Belousov static enum RL_INSERT_RES 629c3d8a931SKonstantin Belousov rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 630c3d8a931SKonstantin Belousov struct rl_q_entry **free) 631c3d8a931SKonstantin Belousov { 632c3d8a931SKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 633c3d8a931SKonstantin Belousov int r; 6348f0e9130SKonstantin Belousov 635c3d8a931SKonstantin Belousov again: 6369ef425e5SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head; 6375badbeeaSKonstantin Belousov cur = rl_q_load(prev); 6385badbeeaSKonstantin Belousov if (cur == NULL && rl_q_cas(prev, NULL, e)) 6395badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 6408f0e9130SKonstantin Belousov 6415badbeeaSKonstantin Belousov for (;;) { 6425badbeeaSKonstantin Belousov if (cur != NULL) { 643c3d8a931SKonstantin Belousov if (rl_e_is_marked(cur)) 644c3d8a931SKonstantin Belousov goto again; 645c3d8a931SKonstantin Belousov 646c3d8a931SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 647c3d8a931SKonstantin Belousov if (rl_e_is_marked(next)) { 648c3d8a931SKonstantin Belousov next = rl_e_unmark(next); 649c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 650c3d8a931SKonstantin Belousov #ifdef INVARIANTS 651c3d8a931SKonstantin Belousov cur->rl_q_owner = NULL; 652c3d8a931SKonstantin Belousov #endif 653c3d8a931SKonstantin Belousov cur->rl_q_free = *free; 654c3d8a931SKonstantin Belousov *free = cur; 655c3d8a931SKonstantin Belousov cur = next; 656c3d8a931SKonstantin Belousov continue; 657c3d8a931SKonstantin Belousov } 658a725d618SKonstantin Belousov goto again; 659a725d618SKonstantin Belousov } 660c3d8a931SKonstantin Belousov } 661c3d8a931SKonstantin Belousov 6629467c1a6SKonstantin Belousov MPASS(!rl_e_is_marked(cur)); 663c3d8a931SKonstantin Belousov r = rl_e_compare(cur, e); 664c3d8a931SKonstantin Belousov if (r == -1) { 665c3d8a931SKonstantin Belousov prev = &cur->rl_q_next; 666c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 667c3d8a931SKonstantin Belousov } else if (r == 0) { 668c3d8a931SKonstantin Belousov sleepq_lock(&lock->sleepers); 669c3d8a931SKonstantin Belousov if (__predict_false(rl_e_is_marked(rl_q_load( 670c3d8a931SKonstantin Belousov &cur->rl_q_next)))) { 671c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 672c3d8a931SKonstantin Belousov continue; 673c3d8a931SKonstantin Belousov } 674e3680954SRick Macklem if (trylock) { 675c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 6765badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 677e3680954SRick Macklem } 678c3d8a931SKonstantin Belousov rl_insert_sleep(lock); 679c3d8a931SKonstantin Belousov /* e is still valid */ 680c3d8a931SKonstantin Belousov goto again; 681c3d8a931SKonstantin Belousov } else /* r == 1 */ { 682c3d8a931SKonstantin Belousov e->rl_q_next = cur; 683c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, e)) { 684c3d8a931SKonstantin Belousov atomic_thread_fence_acq(); 6855badbeeaSKonstantin Belousov return (rl_e_is_rlock(e) ? 6865badbeeaSKonstantin Belousov rl_r_validate(lock, e, trylock, free) : 6875badbeeaSKonstantin Belousov rl_w_validate(lock, e, trylock, free)); 688e3680954SRick Macklem } 689c3d8a931SKonstantin Belousov /* Reset rl_q_next in case we hit fast path. */ 690c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 691c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 692c3d8a931SKonstantin Belousov } 693c3d8a931SKonstantin Belousov } 694c3d8a931SKonstantin Belousov } 695c3d8a931SKonstantin Belousov 696c3d8a931SKonstantin Belousov static struct rl_q_entry * 6975badbeeaSKonstantin Belousov rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start, 6985badbeeaSKonstantin Belousov vm_ooffset_t end, int locktype) 699c3d8a931SKonstantin Belousov { 700a3f10d08SKonstantin Belousov struct rl_q_entry *e, *free; 7019ef425e5SKonstantin Belousov void *cookie; 7025badbeeaSKonstantin Belousov enum RL_INSERT_RES res; 703c3d8a931SKonstantin Belousov 7049ef425e5SKonstantin Belousov if (rangelock_cheat_lock(lock, locktype, trylock, &cookie)) 7059ef425e5SKonstantin Belousov return (cookie); 7065badbeeaSKonstantin Belousov for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) { 707c3d8a931SKonstantin Belousov free = NULL; 7085badbeeaSKonstantin Belousov e = rlqentry_alloc(start, end, locktype); 709c3d8a931SKonstantin Belousov smr_enter(rl_smr); 710c3d8a931SKonstantin Belousov res = rl_insert(lock, e, trylock, &free); 711c3d8a931SKonstantin Belousov smr_exit(rl_smr); 7125badbeeaSKonstantin Belousov if (res == RL_TRYLOCK_FAILED) { 7135badbeeaSKonstantin Belousov MPASS(trylock); 714c3d8a931SKonstantin Belousov e->rl_q_free = free; 715c3d8a931SKonstantin Belousov free = e; 716c3d8a931SKonstantin Belousov e = NULL; 717c3d8a931SKonstantin Belousov } 718a3f10d08SKonstantin Belousov rangelock_free_free(free); 719ff1ae3b3SKonstantin Belousov } 720c3d8a931SKonstantin Belousov return (e); 7218f0e9130SKonstantin Belousov } 7228f0e9130SKonstantin Belousov 7238f0e9130SKonstantin Belousov void * 724c3d8a931SKonstantin Belousov rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 7258f0e9130SKonstantin Belousov { 7265badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ)); 727e3680954SRick Macklem } 728e3680954SRick Macklem 729e3680954SRick Macklem void * 730c3d8a931SKonstantin Belousov rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 731e3680954SRick Macklem { 7325badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ)); 7338f0e9130SKonstantin Belousov } 7348f0e9130SKonstantin Belousov 7358f0e9130SKonstantin Belousov void * 736c3d8a931SKonstantin Belousov rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 7378f0e9130SKonstantin Belousov { 7389ef425e5SKonstantin Belousov return (rangelock_lock_int(lock, false, start, end, RL_LOCK_WRITE)); 739e3680954SRick Macklem } 740e3680954SRick Macklem 741e3680954SRick Macklem void * 742c3d8a931SKonstantin Belousov rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 743e3680954SRick Macklem { 7445badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); 7458f0e9130SKonstantin Belousov } 7463155f2f0SKyle Evans 7473155f2f0SKyle Evans #ifdef INVARIANT_SUPPORT 7483155f2f0SKyle Evans void 7493155f2f0SKyle Evans _rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 7503155f2f0SKyle Evans { 7513155f2f0SKyle Evans } 7523155f2f0SKyle Evans #endif /* INVARIANT_SUPPORT */ 753c3d8a931SKonstantin Belousov 754c3d8a931SKonstantin Belousov #include "opt_ddb.h" 755c3d8a931SKonstantin Belousov #ifdef DDB 756c3d8a931SKonstantin Belousov #include <ddb/ddb.h> 757c3d8a931SKonstantin Belousov 758c3d8a931SKonstantin Belousov DB_SHOW_COMMAND(rangelock, db_show_rangelock) 759c3d8a931SKonstantin Belousov { 760c3d8a931SKonstantin Belousov struct rangelock *lock; 761c3d8a931SKonstantin Belousov struct rl_q_entry *e, *x; 7629ef425e5SKonstantin Belousov uintptr_t v; 763c3d8a931SKonstantin Belousov 764c3d8a931SKonstantin Belousov if (!have_addr) { 765c3d8a931SKonstantin Belousov db_printf("show rangelock addr\n"); 766c3d8a931SKonstantin Belousov return; 767c3d8a931SKonstantin Belousov } 768c3d8a931SKonstantin Belousov 769c3d8a931SKonstantin Belousov lock = (struct rangelock *)addr; 770c3d8a931SKonstantin Belousov db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers); 7719ef425e5SKonstantin Belousov v = lock->head; 7729ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) != 0) { 7739ef425e5SKonstantin Belousov db_printf(" cheating head %#jx\n", (uintmax_t)v); 7749ef425e5SKonstantin Belousov return; 7759ef425e5SKonstantin Belousov } 7769ef425e5SKonstantin Belousov for (e = (struct rl_q_entry *)(lock->head);;) { 777c3d8a931SKonstantin Belousov x = rl_e_is_marked(e) ? rl_e_unmark(e) : e; 778c3d8a931SKonstantin Belousov if (x == NULL) 779c3d8a931SKonstantin Belousov break; 780c3d8a931SKonstantin Belousov db_printf(" entry %p marked %d %d start %#jx end %#jx " 781c3d8a931SKonstantin Belousov "flags %x next %p", 782c3d8a931SKonstantin Belousov e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next), 783c3d8a931SKonstantin Belousov x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next); 784c3d8a931SKonstantin Belousov #ifdef INVARIANTS 785c3d8a931SKonstantin Belousov db_printf(" owner %p (%d)", x->rl_q_owner, 786c3d8a931SKonstantin Belousov x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1); 787c3d8a931SKonstantin Belousov #endif 788c3d8a931SKonstantin Belousov db_printf("\n"); 789c3d8a931SKonstantin Belousov e = x->rl_q_next; 790c3d8a931SKonstantin Belousov } 791c3d8a931SKonstantin Belousov } 792c3d8a931SKonstantin Belousov 793c3d8a931SKonstantin Belousov #endif /* DDB */ 794