18f0e9130SKonstantin Belousov /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 38a36da99SPedro F. Giffuni * 48f0e9130SKonstantin Belousov * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 58f0e9130SKonstantin Belousov * All rights reserved. 68f0e9130SKonstantin Belousov * 78f0e9130SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 88f0e9130SKonstantin Belousov * modification, are permitted provided that the following conditions 98f0e9130SKonstantin Belousov * are met: 108f0e9130SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright 118f0e9130SKonstantin Belousov * notice unmodified, this list of conditions, and the following 128f0e9130SKonstantin Belousov * disclaimer. 138f0e9130SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright 148f0e9130SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 158f0e9130SKonstantin Belousov * documentation and/or other materials provided with the distribution. 168f0e9130SKonstantin Belousov * 178f0e9130SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 188f0e9130SKonstantin Belousov * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 198f0e9130SKonstantin Belousov * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 208f0e9130SKonstantin Belousov * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 218f0e9130SKonstantin Belousov * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 228f0e9130SKonstantin Belousov * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 238f0e9130SKonstantin Belousov * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 248f0e9130SKonstantin Belousov * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 258f0e9130SKonstantin Belousov * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 268f0e9130SKonstantin Belousov * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 278f0e9130SKonstantin Belousov */ 288f0e9130SKonstantin Belousov 298f0e9130SKonstantin Belousov #include <sys/param.h> 30c3d8a931SKonstantin Belousov #include <sys/kassert.h> 318f0e9130SKonstantin Belousov #include <sys/kernel.h> 328f0e9130SKonstantin Belousov #include <sys/lock.h> 338f0e9130SKonstantin Belousov #include <sys/mutex.h> 348f0e9130SKonstantin Belousov #include <sys/proc.h> 358f0e9130SKonstantin Belousov #include <sys/rangelock.h> 36c3d8a931SKonstantin Belousov #include <sys/sleepqueue.h> 37c3d8a931SKonstantin Belousov #include <sys/smr.h> 388f0e9130SKonstantin Belousov 398f0e9130SKonstantin Belousov #include <vm/uma.h> 408f0e9130SKonstantin Belousov 41c3d8a931SKonstantin Belousov /* 42c3d8a931SKonstantin Belousov * Implementation of range locks based on the paper 43c3d8a931SKonstantin Belousov * https://doi.org/10.1145/3342195.3387533 44c3d8a931SKonstantin Belousov * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020 45c3d8a931SKonstantin Belousov * Scalable Range Locks for Scalable Address Spaces and Beyond 46c3d8a931SKonstantin Belousov * by Alex Kogan, Dave Dice, and Shady Issa 47c3d8a931SKonstantin Belousov */ 48c3d8a931SKonstantin Belousov 49c3d8a931SKonstantin Belousov static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e); 50c3d8a931SKonstantin Belousov 51c3d8a931SKonstantin Belousov /* 52c3d8a931SKonstantin Belousov * rl_q_next links all granted ranges in the lock. We cannot free an 53c3d8a931SKonstantin Belousov * rl_q_entry while in the smr section, and cannot reuse rl_q_next 54c3d8a931SKonstantin Belousov * linkage since other threads might follow it even after CAS removed 55c3d8a931SKonstantin Belousov * the range. Use rl_q_free for local list of ranges to remove after 56c3d8a931SKonstantin Belousov * the smr section is dropped. 57c3d8a931SKonstantin Belousov */ 588f0e9130SKonstantin Belousov struct rl_q_entry { 59c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_next; 60c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_free; 618f0e9130SKonstantin Belousov off_t rl_q_start, rl_q_end; 628f0e9130SKonstantin Belousov int rl_q_flags; 63c3d8a931SKonstantin Belousov #ifdef INVARIANTS 64c3d8a931SKonstantin Belousov struct thread *rl_q_owner; 65c3d8a931SKonstantin Belousov #endif 668f0e9130SKonstantin Belousov }; 678f0e9130SKonstantin Belousov 688f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone; 69c3d8a931SKonstantin Belousov static smr_t rl_smr; 708f0e9130SKonstantin Belousov 718f0e9130SKonstantin Belousov static void 728f0e9130SKonstantin Belousov rangelock_sys_init(void) 738f0e9130SKonstantin Belousov { 748f0e9130SKonstantin Belousov rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 75c3d8a931SKonstantin Belousov NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry), 76c3d8a931SKonstantin Belousov UMA_ZONE_SMR); 77c3d8a931SKonstantin Belousov rl_smr = uma_zone_get_smr(rl_entry_zone); 788f0e9130SKonstantin Belousov } 79c3d8a931SKonstantin Belousov SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 808f0e9130SKonstantin Belousov 818f0e9130SKonstantin Belousov static struct rl_q_entry * 82c3d8a931SKonstantin Belousov rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags) 838f0e9130SKonstantin Belousov { 84c3d8a931SKonstantin Belousov struct rl_q_entry *e; 858f0e9130SKonstantin Belousov 86c3d8a931SKonstantin Belousov e = uma_zalloc_smr(rl_entry_zone, M_WAITOK); 87c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 88c3d8a931SKonstantin Belousov e->rl_q_free = NULL; 89c3d8a931SKonstantin Belousov e->rl_q_start = start; 90c3d8a931SKonstantin Belousov e->rl_q_end = end; 91c3d8a931SKonstantin Belousov e->rl_q_flags = flags; 92c3d8a931SKonstantin Belousov #ifdef INVARIANTS 93c3d8a931SKonstantin Belousov e->rl_q_owner = curthread; 94c3d8a931SKonstantin Belousov #endif 95c3d8a931SKonstantin Belousov return (e); 968f0e9130SKonstantin Belousov } 978f0e9130SKonstantin Belousov 988f0e9130SKonstantin Belousov void 998f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock) 1008f0e9130SKonstantin Belousov { 101c3d8a931SKonstantin Belousov lock->sleepers = false; 102c3d8a931SKonstantin Belousov atomic_store_ptr(&lock->head, NULL); 1038f0e9130SKonstantin Belousov } 1048f0e9130SKonstantin Belousov 1058f0e9130SKonstantin Belousov void 1068f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock) 1078f0e9130SKonstantin Belousov { 108c3d8a931SKonstantin Belousov struct rl_q_entry *e, *ep; 1098f0e9130SKonstantin Belousov 110c3d8a931SKonstantin Belousov MPASS(!lock->sleepers); 111c3d8a931SKonstantin Belousov for (e = (struct rl_q_entry *)atomic_load_ptr(&lock->head); 112c3d8a931SKonstantin Belousov e != NULL; e = rl_e_unmark(ep)) { 113c3d8a931SKonstantin Belousov ep = atomic_load_ptr(&e->rl_q_next); 114c3d8a931SKonstantin Belousov uma_zfree_smr(rl_entry_zone, e); 115c3d8a931SKonstantin Belousov } 1168f0e9130SKonstantin Belousov } 1178f0e9130SKonstantin Belousov 118c3d8a931SKonstantin Belousov static bool 119c3d8a931SKonstantin Belousov rl_e_is_marked(const struct rl_q_entry *e) 1208f0e9130SKonstantin Belousov { 121c3d8a931SKonstantin Belousov return (((uintptr_t)e & 1) != 0); 1228f0e9130SKonstantin Belousov } 1238f0e9130SKonstantin Belousov 124c3d8a931SKonstantin Belousov static struct rl_q_entry * 1255badbeeaSKonstantin Belousov rl_e_unmark_unchecked(const struct rl_q_entry *e) 1265badbeeaSKonstantin Belousov { 1275badbeeaSKonstantin Belousov return ((struct rl_q_entry *)((uintptr_t)e & ~1)); 1285badbeeaSKonstantin Belousov } 1295badbeeaSKonstantin Belousov 1305badbeeaSKonstantin Belousov static struct rl_q_entry * 131c3d8a931SKonstantin Belousov rl_e_unmark(const struct rl_q_entry *e) 1328f0e9130SKonstantin Belousov { 133c3d8a931SKonstantin Belousov MPASS(rl_e_is_marked(e)); 1345badbeeaSKonstantin Belousov return (rl_e_unmark_unchecked(e)); 1355badbeeaSKonstantin Belousov } 1365badbeeaSKonstantin Belousov 1375badbeeaSKonstantin Belousov static void 1385badbeeaSKonstantin Belousov rl_e_mark(struct rl_q_entry *e) 1395badbeeaSKonstantin Belousov { 1405badbeeaSKonstantin Belousov #if defined(INVARIANTS) && defined(__LP64__) 1415badbeeaSKonstantin Belousov int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0); 1425badbeeaSKonstantin Belousov MPASS(r == 0); 1435badbeeaSKonstantin Belousov #else 1445badbeeaSKonstantin Belousov atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1); 1455badbeeaSKonstantin Belousov #endif 1462bb93f2dSColin Percival } 1472bb93f2dSColin Percival 148c3d8a931SKonstantin Belousov static struct rl_q_entry * 149c3d8a931SKonstantin Belousov rl_q_load(struct rl_q_entry **p) 1508f0e9130SKonstantin Belousov { 151c3d8a931SKonstantin Belousov return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p)); 1528f0e9130SKonstantin Belousov } 1538f0e9130SKonstantin Belousov 1546c32d89eSKonstantin Belousov static bool 1556c32d89eSKonstantin Belousov rl_e_is_rlock(const struct rl_q_entry *e) 1566c32d89eSKonstantin Belousov { 1576c32d89eSKonstantin Belousov return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ); 1586c32d89eSKonstantin Belousov } 1596c32d89eSKonstantin Belousov 1605badbeeaSKonstantin Belousov static void 1615badbeeaSKonstantin Belousov rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e) 1628f0e9130SKonstantin Belousov { 163*c3158008SKonstantin Belousov bool sleepers; 164*c3158008SKonstantin Belousov 165c3d8a931SKonstantin Belousov MPASS(lock != NULL && e != NULL); 166c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next))); 167c3d8a931SKonstantin Belousov MPASS(e->rl_q_owner == curthread); 1688f0e9130SKonstantin Belousov 1695badbeeaSKonstantin Belousov rl_e_mark(e); 170*c3158008SKonstantin Belousov sleepers = lock->sleepers; 171c3d8a931SKonstantin Belousov lock->sleepers = false; 172*c3158008SKonstantin Belousov if (sleepers) 173c3d8a931SKonstantin Belousov sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0); 1745badbeeaSKonstantin Belousov } 1755badbeeaSKonstantin Belousov 1765badbeeaSKonstantin Belousov void 1775badbeeaSKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie) 1785badbeeaSKonstantin Belousov { 1795badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 1805badbeeaSKonstantin Belousov rangelock_unlock_int(lock, cookie); 181c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 1828f0e9130SKonstantin Belousov } 1838f0e9130SKonstantin Belousov 1848f0e9130SKonstantin Belousov /* 1855badbeeaSKonstantin Belousov * result: -1 if e1 before e2, or both locks are readers and e1 1865badbeeaSKonstantin Belousov * starts before or at e2 1875badbeeaSKonstantin Belousov * 1 if e1 after e2, or both locks are readers and e1 1885badbeeaSKonstantin Belousov * starts after e2 1895badbeeaSKonstantin Belousov * 0 if e1 and e2 overlap and at least one lock is writer 1908f0e9130SKonstantin Belousov */ 191c3d8a931SKonstantin Belousov static int 192c3d8a931SKonstantin Belousov rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2) 1938f0e9130SKonstantin Belousov { 1945badbeeaSKonstantin Belousov bool rds; 1955badbeeaSKonstantin Belousov 196c3d8a931SKonstantin Belousov if (e1 == NULL) 197c3d8a931SKonstantin Belousov return (1); 198c3d8a931SKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_end) 199c3d8a931SKonstantin Belousov return (-1); 2005badbeeaSKonstantin Belousov rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2); 2015badbeeaSKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_start && rds) 2025badbeeaSKonstantin Belousov return (-1); 2035badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_end) 2045badbeeaSKonstantin Belousov return (1); 2055badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_start && rds) 2065badbeeaSKonstantin Belousov return (1); 207c3d8a931SKonstantin Belousov return (0); 2088f0e9130SKonstantin Belousov } 2098f0e9130SKonstantin Belousov 210c3d8a931SKonstantin Belousov static void 211c3d8a931SKonstantin Belousov rl_insert_sleep(struct rangelock *lock) 2128f0e9130SKonstantin Belousov { 213c3d8a931SKonstantin Belousov smr_exit(rl_smr); 214c3d8a931SKonstantin Belousov DROP_GIANT(); 215c3d8a931SKonstantin Belousov lock->sleepers = true; 216c3d8a931SKonstantin Belousov sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0); 217c3d8a931SKonstantin Belousov sleepq_wait(&lock->sleepers, PRI_USER); 218c3d8a931SKonstantin Belousov PICKUP_GIANT(); 219c3d8a931SKonstantin Belousov smr_enter(rl_smr); 220c3d8a931SKonstantin Belousov } 2218f0e9130SKonstantin Belousov 222c3d8a931SKonstantin Belousov static bool 223c3d8a931SKonstantin Belousov rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old, 224c3d8a931SKonstantin Belousov struct rl_q_entry *new) 225c3d8a931SKonstantin Belousov { 226c3d8a931SKonstantin Belousov return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old, 227c3d8a931SKonstantin Belousov (uintptr_t)new) != 0); 228c3d8a931SKonstantin Belousov } 2298f0e9130SKonstantin Belousov 2305badbeeaSKonstantin Belousov enum RL_INSERT_RES { 2315badbeeaSKonstantin Belousov RL_TRYLOCK_FAILED, 2325badbeeaSKonstantin Belousov RL_LOCK_SUCCESS, 2335badbeeaSKonstantin Belousov RL_LOCK_RETRY, 2345badbeeaSKonstantin Belousov }; 2355badbeeaSKonstantin Belousov 2365badbeeaSKonstantin Belousov static enum RL_INSERT_RES 2375badbeeaSKonstantin Belousov rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 2385badbeeaSKonstantin Belousov struct rl_q_entry **free) 2395badbeeaSKonstantin Belousov { 2405badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 2415badbeeaSKonstantin Belousov 2425badbeeaSKonstantin Belousov prev = &e->rl_q_next; 2435badbeeaSKonstantin Belousov cur = rl_q_load(prev); 2445badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */ 2455badbeeaSKonstantin Belousov for (;;) { 2465badbeeaSKonstantin Belousov if (cur == NULL || cur->rl_q_start > e->rl_q_end) 2475badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 2485badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 2495badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 2505badbeeaSKonstantin Belousov next = rl_e_unmark(next); 2515badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 2525badbeeaSKonstantin Belousov cur->rl_q_free = *free; 2535badbeeaSKonstantin Belousov *free = cur; 2545badbeeaSKonstantin Belousov } 2555badbeeaSKonstantin Belousov cur = next; 2565badbeeaSKonstantin Belousov continue; 2575badbeeaSKonstantin Belousov } 2585badbeeaSKonstantin Belousov if (rl_e_is_rlock(cur)) { 2595badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 2605badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 2615badbeeaSKonstantin Belousov continue; 2625badbeeaSKonstantin Belousov } 2635badbeeaSKonstantin Belousov if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 2645badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 2655badbeeaSKonstantin Belousov if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 2665badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 2675badbeeaSKonstantin Belousov continue; 2685badbeeaSKonstantin Belousov } 2695badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 2705badbeeaSKonstantin Belousov if (trylock) { 2715badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 2725badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 2735badbeeaSKonstantin Belousov } 2745badbeeaSKonstantin Belousov rl_insert_sleep(lock); 2755badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 2765badbeeaSKonstantin Belousov } 2775badbeeaSKonstantin Belousov } 2785badbeeaSKonstantin Belousov } 2795badbeeaSKonstantin Belousov 2805badbeeaSKonstantin Belousov static enum RL_INSERT_RES 2815badbeeaSKonstantin Belousov rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, 2825badbeeaSKonstantin Belousov bool trylock, struct rl_q_entry **free) 2835badbeeaSKonstantin Belousov { 2845badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 2855badbeeaSKonstantin Belousov 2865badbeeaSKonstantin Belousov prev = &lock->head; 2875badbeeaSKonstantin Belousov cur = rl_q_load(prev); 2885badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* head is not marked */ 2895badbeeaSKonstantin Belousov for (;;) { 2905badbeeaSKonstantin Belousov if (cur == e) 2915badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 2925badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 2935badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 2945badbeeaSKonstantin Belousov next = rl_e_unmark(next); 2955badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 2965badbeeaSKonstantin Belousov cur->rl_q_next = *free; 2975badbeeaSKonstantin Belousov *free = cur; 2985badbeeaSKonstantin Belousov } 2995badbeeaSKonstantin Belousov cur = next; 3005badbeeaSKonstantin Belousov continue; 3015badbeeaSKonstantin Belousov } 3025badbeeaSKonstantin Belousov if (cur->rl_q_end <= e->rl_q_start) { 3035badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 3045badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 3055badbeeaSKonstantin Belousov continue; 3065badbeeaSKonstantin Belousov } 3075badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 3085badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 3095badbeeaSKonstantin Belousov if (trylock) { 3105badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 3115badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 3125badbeeaSKonstantin Belousov } 3135badbeeaSKonstantin Belousov rl_insert_sleep(lock); 3145badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 3155badbeeaSKonstantin Belousov } 3165badbeeaSKonstantin Belousov } 3175badbeeaSKonstantin Belousov 3185badbeeaSKonstantin Belousov static enum RL_INSERT_RES 319c3d8a931SKonstantin Belousov rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 320c3d8a931SKonstantin Belousov struct rl_q_entry **free) 321c3d8a931SKonstantin Belousov { 322c3d8a931SKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 323c3d8a931SKonstantin Belousov int r; 3248f0e9130SKonstantin Belousov 325c3d8a931SKonstantin Belousov again: 326c3d8a931SKonstantin Belousov prev = &lock->head; 3275badbeeaSKonstantin Belousov cur = rl_q_load(prev); 3285badbeeaSKonstantin Belousov if (cur == NULL && rl_q_cas(prev, NULL, e)) 3295badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 3308f0e9130SKonstantin Belousov 3315badbeeaSKonstantin Belousov for (;;) { 3325badbeeaSKonstantin Belousov if (cur != NULL) { 333c3d8a931SKonstantin Belousov if (rl_e_is_marked(cur)) 334c3d8a931SKonstantin Belousov goto again; 335c3d8a931SKonstantin Belousov 336c3d8a931SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 337c3d8a931SKonstantin Belousov if (rl_e_is_marked(next)) { 338c3d8a931SKonstantin Belousov next = rl_e_unmark(next); 339c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 340c3d8a931SKonstantin Belousov #ifdef INVARIANTS 341c3d8a931SKonstantin Belousov cur->rl_q_owner = NULL; 342c3d8a931SKonstantin Belousov #endif 343c3d8a931SKonstantin Belousov cur->rl_q_free = *free; 344c3d8a931SKonstantin Belousov *free = cur; 345c3d8a931SKonstantin Belousov } 346c3d8a931SKonstantin Belousov cur = next; 347c3d8a931SKonstantin Belousov continue; 348c3d8a931SKonstantin Belousov } 349c3d8a931SKonstantin Belousov } 350c3d8a931SKonstantin Belousov 351c3d8a931SKonstantin Belousov r = rl_e_compare(cur, e); 352c3d8a931SKonstantin Belousov if (r == -1) { 353c3d8a931SKonstantin Belousov prev = &cur->rl_q_next; 354c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 355c3d8a931SKonstantin Belousov } else if (r == 0) { 356c3d8a931SKonstantin Belousov sleepq_lock(&lock->sleepers); 357c3d8a931SKonstantin Belousov if (__predict_false(rl_e_is_marked(rl_q_load( 358c3d8a931SKonstantin Belousov &cur->rl_q_next)))) { 359c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 360c3d8a931SKonstantin Belousov continue; 361c3d8a931SKonstantin Belousov } 362e3680954SRick Macklem if (trylock) { 363c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 3645badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 365e3680954SRick Macklem } 366c3d8a931SKonstantin Belousov rl_insert_sleep(lock); 367c3d8a931SKonstantin Belousov /* e is still valid */ 368c3d8a931SKonstantin Belousov goto again; 369c3d8a931SKonstantin Belousov } else /* r == 1 */ { 370c3d8a931SKonstantin Belousov e->rl_q_next = cur; 371c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, e)) { 372c3d8a931SKonstantin Belousov atomic_thread_fence_acq(); 3735badbeeaSKonstantin Belousov return (rl_e_is_rlock(e) ? 3745badbeeaSKonstantin Belousov rl_r_validate(lock, e, trylock, free) : 3755badbeeaSKonstantin Belousov rl_w_validate(lock, e, trylock, free)); 376e3680954SRick Macklem } 377c3d8a931SKonstantin Belousov /* Reset rl_q_next in case we hit fast path. */ 378c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 379c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 380c3d8a931SKonstantin Belousov } 381c3d8a931SKonstantin Belousov } 382c3d8a931SKonstantin Belousov } 383c3d8a931SKonstantin Belousov 384c3d8a931SKonstantin Belousov static struct rl_q_entry * 3855badbeeaSKonstantin Belousov rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start, 3865badbeeaSKonstantin Belousov vm_ooffset_t end, int locktype) 387c3d8a931SKonstantin Belousov { 3885badbeeaSKonstantin Belousov struct rl_q_entry *e, *free, *x, *xp; 3895badbeeaSKonstantin Belousov enum RL_INSERT_RES res; 390c3d8a931SKonstantin Belousov 3915badbeeaSKonstantin Belousov for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) { 392c3d8a931SKonstantin Belousov free = NULL; 3935badbeeaSKonstantin Belousov e = rlqentry_alloc(start, end, locktype); 394c3d8a931SKonstantin Belousov smr_enter(rl_smr); 395c3d8a931SKonstantin Belousov res = rl_insert(lock, e, trylock, &free); 396c3d8a931SKonstantin Belousov smr_exit(rl_smr); 3975badbeeaSKonstantin Belousov if (res == RL_TRYLOCK_FAILED) { 3985badbeeaSKonstantin Belousov MPASS(trylock); 399c3d8a931SKonstantin Belousov e->rl_q_free = free; 400c3d8a931SKonstantin Belousov free = e; 401c3d8a931SKonstantin Belousov e = NULL; 402c3d8a931SKonstantin Belousov } 403c3d8a931SKonstantin Belousov for (x = free; x != NULL; x = xp) { 404c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(x)); 405c3d8a931SKonstantin Belousov xp = x->rl_q_free; 406c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(xp)); 407c3d8a931SKonstantin Belousov uma_zfree_smr(rl_entry_zone, x); 408c3d8a931SKonstantin Belousov } 4095badbeeaSKonstantin Belousov } 410c3d8a931SKonstantin Belousov return (e); 4118f0e9130SKonstantin Belousov } 4128f0e9130SKonstantin Belousov 4138f0e9130SKonstantin Belousov void * 414c3d8a931SKonstantin Belousov rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 4158f0e9130SKonstantin Belousov { 4165badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ)); 417e3680954SRick Macklem } 418e3680954SRick Macklem 419e3680954SRick Macklem void * 420c3d8a931SKonstantin Belousov rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 421e3680954SRick Macklem { 4225badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ)); 4238f0e9130SKonstantin Belousov } 4248f0e9130SKonstantin Belousov 4258f0e9130SKonstantin Belousov void * 426c3d8a931SKonstantin Belousov rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 4278f0e9130SKonstantin Belousov { 4285badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); 429e3680954SRick Macklem } 430e3680954SRick Macklem 431e3680954SRick Macklem void * 432c3d8a931SKonstantin Belousov rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 433e3680954SRick Macklem { 4345badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); 4358f0e9130SKonstantin Belousov } 4363155f2f0SKyle Evans 4373155f2f0SKyle Evans #ifdef INVARIANT_SUPPORT 4383155f2f0SKyle Evans void 4393155f2f0SKyle Evans _rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 4403155f2f0SKyle Evans { 4413155f2f0SKyle Evans } 4423155f2f0SKyle Evans #endif /* INVARIANT_SUPPORT */ 443c3d8a931SKonstantin Belousov 444c3d8a931SKonstantin Belousov #include "opt_ddb.h" 445c3d8a931SKonstantin Belousov #ifdef DDB 446c3d8a931SKonstantin Belousov #include <ddb/ddb.h> 447c3d8a931SKonstantin Belousov 448c3d8a931SKonstantin Belousov DB_SHOW_COMMAND(rangelock, db_show_rangelock) 449c3d8a931SKonstantin Belousov { 450c3d8a931SKonstantin Belousov struct rangelock *lock; 451c3d8a931SKonstantin Belousov struct rl_q_entry *e, *x; 452c3d8a931SKonstantin Belousov 453c3d8a931SKonstantin Belousov if (!have_addr) { 454c3d8a931SKonstantin Belousov db_printf("show rangelock addr\n"); 455c3d8a931SKonstantin Belousov return; 456c3d8a931SKonstantin Belousov } 457c3d8a931SKonstantin Belousov 458c3d8a931SKonstantin Belousov lock = (struct rangelock *)addr; 459c3d8a931SKonstantin Belousov db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers); 460c3d8a931SKonstantin Belousov for (e = lock->head;;) { 461c3d8a931SKonstantin Belousov x = rl_e_is_marked(e) ? rl_e_unmark(e) : e; 462c3d8a931SKonstantin Belousov if (x == NULL) 463c3d8a931SKonstantin Belousov break; 464c3d8a931SKonstantin Belousov db_printf(" entry %p marked %d %d start %#jx end %#jx " 465c3d8a931SKonstantin Belousov "flags %x next %p", 466c3d8a931SKonstantin Belousov e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next), 467c3d8a931SKonstantin Belousov x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next); 468c3d8a931SKonstantin Belousov #ifdef INVARIANTS 469c3d8a931SKonstantin Belousov db_printf(" owner %p (%d)", x->rl_q_owner, 470c3d8a931SKonstantin Belousov x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1); 471c3d8a931SKonstantin Belousov #endif 472c3d8a931SKonstantin Belousov db_printf("\n"); 473c3d8a931SKonstantin Belousov e = x->rl_q_next; 474c3d8a931SKonstantin Belousov } 475c3d8a931SKonstantin Belousov } 476c3d8a931SKonstantin Belousov 477c3d8a931SKonstantin Belousov #endif /* DDB */ 478