18f0e9130SKonstantin Belousov /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 38a36da99SPedro F. Giffuni * 48f0e9130SKonstantin Belousov * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 58f0e9130SKonstantin Belousov * All rights reserved. 68f0e9130SKonstantin Belousov * 78f0e9130SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 88f0e9130SKonstantin Belousov * modification, are permitted provided that the following conditions 98f0e9130SKonstantin Belousov * are met: 108f0e9130SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright 118f0e9130SKonstantin Belousov * notice unmodified, this list of conditions, and the following 128f0e9130SKonstantin Belousov * disclaimer. 138f0e9130SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright 148f0e9130SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 158f0e9130SKonstantin Belousov * documentation and/or other materials provided with the distribution. 168f0e9130SKonstantin Belousov * 178f0e9130SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 188f0e9130SKonstantin Belousov * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 198f0e9130SKonstantin Belousov * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 208f0e9130SKonstantin Belousov * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 218f0e9130SKonstantin Belousov * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 228f0e9130SKonstantin Belousov * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 238f0e9130SKonstantin Belousov * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 248f0e9130SKonstantin Belousov * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 258f0e9130SKonstantin Belousov * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 268f0e9130SKonstantin Belousov * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 278f0e9130SKonstantin Belousov */ 288f0e9130SKonstantin Belousov 298f0e9130SKonstantin Belousov #include <sys/param.h> 30c3d8a931SKonstantin Belousov #include <sys/kassert.h> 318f0e9130SKonstantin Belousov #include <sys/kernel.h> 328f0e9130SKonstantin Belousov #include <sys/lock.h> 338f0e9130SKonstantin Belousov #include <sys/mutex.h> 348f0e9130SKonstantin Belousov #include <sys/proc.h> 358f0e9130SKonstantin Belousov #include <sys/rangelock.h> 36c3d8a931SKonstantin Belousov #include <sys/sleepqueue.h> 37c3d8a931SKonstantin Belousov #include <sys/smr.h> 388f0e9130SKonstantin Belousov 398f0e9130SKonstantin Belousov #include <vm/uma.h> 408f0e9130SKonstantin Belousov 41c3d8a931SKonstantin Belousov /* 42c3d8a931SKonstantin Belousov * Implementation of range locks based on the paper 43c3d8a931SKonstantin Belousov * https://doi.org/10.1145/3342195.3387533 44c3d8a931SKonstantin Belousov * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020 45c3d8a931SKonstantin Belousov * Scalable Range Locks for Scalable Address Spaces and Beyond 46c3d8a931SKonstantin Belousov * by Alex Kogan, Dave Dice, and Shady Issa 47c3d8a931SKonstantin Belousov */ 48c3d8a931SKonstantin Belousov 49c3d8a931SKonstantin Belousov static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e); 50c3d8a931SKonstantin Belousov 51c3d8a931SKonstantin Belousov /* 52c3d8a931SKonstantin Belousov * rl_q_next links all granted ranges in the lock. We cannot free an 53c3d8a931SKonstantin Belousov * rl_q_entry while in the smr section, and cannot reuse rl_q_next 54c3d8a931SKonstantin Belousov * linkage since other threads might follow it even after CAS removed 55c3d8a931SKonstantin Belousov * the range. Use rl_q_free for local list of ranges to remove after 56c3d8a931SKonstantin Belousov * the smr section is dropped. 57c3d8a931SKonstantin Belousov */ 588f0e9130SKonstantin Belousov struct rl_q_entry { 59c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_next; 60c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_free; 618f0e9130SKonstantin Belousov off_t rl_q_start, rl_q_end; 628f0e9130SKonstantin Belousov int rl_q_flags; 63c3d8a931SKonstantin Belousov #ifdef INVARIANTS 64c3d8a931SKonstantin Belousov struct thread *rl_q_owner; 65c3d8a931SKonstantin Belousov #endif 668f0e9130SKonstantin Belousov }; 678f0e9130SKonstantin Belousov 688f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone; 69c3d8a931SKonstantin Belousov static smr_t rl_smr; 708f0e9130SKonstantin Belousov 718f0e9130SKonstantin Belousov static void 728f0e9130SKonstantin Belousov rangelock_sys_init(void) 738f0e9130SKonstantin Belousov { 748f0e9130SKonstantin Belousov rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 75c3d8a931SKonstantin Belousov NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry), 76c3d8a931SKonstantin Belousov UMA_ZONE_SMR); 77c3d8a931SKonstantin Belousov rl_smr = uma_zone_get_smr(rl_entry_zone); 788f0e9130SKonstantin Belousov } 79c3d8a931SKonstantin Belousov SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 808f0e9130SKonstantin Belousov 818f0e9130SKonstantin Belousov static struct rl_q_entry * 82c3d8a931SKonstantin Belousov rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags) 838f0e9130SKonstantin Belousov { 84c3d8a931SKonstantin Belousov struct rl_q_entry *e; 85*ff1ae3b3SKonstantin Belousov struct thread *td; 868f0e9130SKonstantin Belousov 87*ff1ae3b3SKonstantin Belousov td = curthread; 88*ff1ae3b3SKonstantin Belousov if (td->td_rlqe != NULL) { 89*ff1ae3b3SKonstantin Belousov e = td->td_rlqe; 90*ff1ae3b3SKonstantin Belousov td->td_rlqe = NULL; 91*ff1ae3b3SKonstantin Belousov } else { 92c3d8a931SKonstantin Belousov e = uma_zalloc_smr(rl_entry_zone, M_WAITOK); 93*ff1ae3b3SKonstantin Belousov } 94c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 95c3d8a931SKonstantin Belousov e->rl_q_free = NULL; 96c3d8a931SKonstantin Belousov e->rl_q_start = start; 97c3d8a931SKonstantin Belousov e->rl_q_end = end; 98c3d8a931SKonstantin Belousov e->rl_q_flags = flags; 99c3d8a931SKonstantin Belousov #ifdef INVARIANTS 100c3d8a931SKonstantin Belousov e->rl_q_owner = curthread; 101c3d8a931SKonstantin Belousov #endif 102c3d8a931SKonstantin Belousov return (e); 1038f0e9130SKonstantin Belousov } 1048f0e9130SKonstantin Belousov 1058f0e9130SKonstantin Belousov void 106*ff1ae3b3SKonstantin Belousov rangelock_entry_free(struct rl_q_entry *e) 107*ff1ae3b3SKonstantin Belousov { 108*ff1ae3b3SKonstantin Belousov uma_zfree_smr(rl_entry_zone, e); 109*ff1ae3b3SKonstantin Belousov } 110*ff1ae3b3SKonstantin Belousov 111*ff1ae3b3SKonstantin Belousov void 1128f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock) 1138f0e9130SKonstantin Belousov { 114c3d8a931SKonstantin Belousov lock->sleepers = false; 115c3d8a931SKonstantin Belousov atomic_store_ptr(&lock->head, NULL); 1168f0e9130SKonstantin Belousov } 1178f0e9130SKonstantin Belousov 1188f0e9130SKonstantin Belousov void 1198f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock) 1208f0e9130SKonstantin Belousov { 121c3d8a931SKonstantin Belousov struct rl_q_entry *e, *ep; 122*ff1ae3b3SKonstantin Belousov struct thread *td; 1238f0e9130SKonstantin Belousov 124c3d8a931SKonstantin Belousov MPASS(!lock->sleepers); 125c3d8a931SKonstantin Belousov for (e = (struct rl_q_entry *)atomic_load_ptr(&lock->head); 126c3d8a931SKonstantin Belousov e != NULL; e = rl_e_unmark(ep)) { 127c3d8a931SKonstantin Belousov ep = atomic_load_ptr(&e->rl_q_next); 128c3d8a931SKonstantin Belousov uma_zfree_smr(rl_entry_zone, e); 129c3d8a931SKonstantin Belousov } 1308f0e9130SKonstantin Belousov } 1318f0e9130SKonstantin Belousov 132c3d8a931SKonstantin Belousov static bool 133c3d8a931SKonstantin Belousov rl_e_is_marked(const struct rl_q_entry *e) 1348f0e9130SKonstantin Belousov { 135c3d8a931SKonstantin Belousov return (((uintptr_t)e & 1) != 0); 1368f0e9130SKonstantin Belousov } 1378f0e9130SKonstantin Belousov 138c3d8a931SKonstantin Belousov static struct rl_q_entry * 1395badbeeaSKonstantin Belousov rl_e_unmark_unchecked(const struct rl_q_entry *e) 1405badbeeaSKonstantin Belousov { 1415badbeeaSKonstantin Belousov return ((struct rl_q_entry *)((uintptr_t)e & ~1)); 1425badbeeaSKonstantin Belousov } 1435badbeeaSKonstantin Belousov 1445badbeeaSKonstantin Belousov static struct rl_q_entry * 145c3d8a931SKonstantin Belousov rl_e_unmark(const struct rl_q_entry *e) 1468f0e9130SKonstantin Belousov { 147c3d8a931SKonstantin Belousov MPASS(rl_e_is_marked(e)); 1485badbeeaSKonstantin Belousov return (rl_e_unmark_unchecked(e)); 1495badbeeaSKonstantin Belousov } 1505badbeeaSKonstantin Belousov 1515badbeeaSKonstantin Belousov static void 1525badbeeaSKonstantin Belousov rl_e_mark(struct rl_q_entry *e) 1535badbeeaSKonstantin Belousov { 1545badbeeaSKonstantin Belousov #if defined(INVARIANTS) && defined(__LP64__) 1555badbeeaSKonstantin Belousov int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0); 1565badbeeaSKonstantin Belousov MPASS(r == 0); 1575badbeeaSKonstantin Belousov #else 1585badbeeaSKonstantin Belousov atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1); 1595badbeeaSKonstantin Belousov #endif 1602bb93f2dSColin Percival } 1612bb93f2dSColin Percival 162c3d8a931SKonstantin Belousov static struct rl_q_entry * 163c3d8a931SKonstantin Belousov rl_q_load(struct rl_q_entry **p) 1648f0e9130SKonstantin Belousov { 165c3d8a931SKonstantin Belousov return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p)); 1668f0e9130SKonstantin Belousov } 1678f0e9130SKonstantin Belousov 1686c32d89eSKonstantin Belousov static bool 1696c32d89eSKonstantin Belousov rl_e_is_rlock(const struct rl_q_entry *e) 1706c32d89eSKonstantin Belousov { 1716c32d89eSKonstantin Belousov return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ); 1726c32d89eSKonstantin Belousov } 1736c32d89eSKonstantin Belousov 1745badbeeaSKonstantin Belousov static void 1755badbeeaSKonstantin Belousov rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e) 1768f0e9130SKonstantin Belousov { 177c3158008SKonstantin Belousov bool sleepers; 178c3158008SKonstantin Belousov 179c3d8a931SKonstantin Belousov MPASS(lock != NULL && e != NULL); 180c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next))); 181c3d8a931SKonstantin Belousov MPASS(e->rl_q_owner == curthread); 1828f0e9130SKonstantin Belousov 1835badbeeaSKonstantin Belousov rl_e_mark(e); 184c3158008SKonstantin Belousov sleepers = lock->sleepers; 185c3d8a931SKonstantin Belousov lock->sleepers = false; 186c3158008SKonstantin Belousov if (sleepers) 187c3d8a931SKonstantin Belousov sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0); 1885badbeeaSKonstantin Belousov } 1895badbeeaSKonstantin Belousov 1905badbeeaSKonstantin Belousov void 1915badbeeaSKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie) 1925badbeeaSKonstantin Belousov { 1935badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 1945badbeeaSKonstantin Belousov rangelock_unlock_int(lock, cookie); 195c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 1968f0e9130SKonstantin Belousov } 1978f0e9130SKonstantin Belousov 1988f0e9130SKonstantin Belousov /* 1995badbeeaSKonstantin Belousov * result: -1 if e1 before e2, or both locks are readers and e1 2005badbeeaSKonstantin Belousov * starts before or at e2 2015badbeeaSKonstantin Belousov * 1 if e1 after e2, or both locks are readers and e1 2025badbeeaSKonstantin Belousov * starts after e2 2035badbeeaSKonstantin Belousov * 0 if e1 and e2 overlap and at least one lock is writer 2048f0e9130SKonstantin Belousov */ 205c3d8a931SKonstantin Belousov static int 206c3d8a931SKonstantin Belousov rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2) 2078f0e9130SKonstantin Belousov { 2085badbeeaSKonstantin Belousov bool rds; 2095badbeeaSKonstantin Belousov 210c3d8a931SKonstantin Belousov if (e1 == NULL) 211c3d8a931SKonstantin Belousov return (1); 212c3d8a931SKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_end) 213c3d8a931SKonstantin Belousov return (-1); 2145badbeeaSKonstantin Belousov rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2); 2155badbeeaSKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_start && rds) 2165badbeeaSKonstantin Belousov return (-1); 2175badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_end) 2185badbeeaSKonstantin Belousov return (1); 2195badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_start && rds) 2205badbeeaSKonstantin Belousov return (1); 221c3d8a931SKonstantin Belousov return (0); 2228f0e9130SKonstantin Belousov } 2238f0e9130SKonstantin Belousov 224c3d8a931SKonstantin Belousov static void 225c3d8a931SKonstantin Belousov rl_insert_sleep(struct rangelock *lock) 2268f0e9130SKonstantin Belousov { 227c3d8a931SKonstantin Belousov smr_exit(rl_smr); 228c3d8a931SKonstantin Belousov DROP_GIANT(); 229c3d8a931SKonstantin Belousov lock->sleepers = true; 230c3d8a931SKonstantin Belousov sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0); 231c3d8a931SKonstantin Belousov sleepq_wait(&lock->sleepers, PRI_USER); 232c3d8a931SKonstantin Belousov PICKUP_GIANT(); 233c3d8a931SKonstantin Belousov smr_enter(rl_smr); 234c3d8a931SKonstantin Belousov } 2358f0e9130SKonstantin Belousov 236c3d8a931SKonstantin Belousov static bool 237c3d8a931SKonstantin Belousov rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old, 238c3d8a931SKonstantin Belousov struct rl_q_entry *new) 239c3d8a931SKonstantin Belousov { 240c3d8a931SKonstantin Belousov return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old, 241c3d8a931SKonstantin Belousov (uintptr_t)new) != 0); 242c3d8a931SKonstantin Belousov } 2438f0e9130SKonstantin Belousov 2445badbeeaSKonstantin Belousov enum RL_INSERT_RES { 2455badbeeaSKonstantin Belousov RL_TRYLOCK_FAILED, 2465badbeeaSKonstantin Belousov RL_LOCK_SUCCESS, 2475badbeeaSKonstantin Belousov RL_LOCK_RETRY, 2485badbeeaSKonstantin Belousov }; 2495badbeeaSKonstantin Belousov 2505badbeeaSKonstantin Belousov static enum RL_INSERT_RES 2515badbeeaSKonstantin Belousov rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 2525badbeeaSKonstantin Belousov struct rl_q_entry **free) 2535badbeeaSKonstantin Belousov { 2545badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 2555badbeeaSKonstantin Belousov 2565badbeeaSKonstantin Belousov prev = &e->rl_q_next; 2575badbeeaSKonstantin Belousov cur = rl_q_load(prev); 2585badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */ 2595badbeeaSKonstantin Belousov for (;;) { 2605badbeeaSKonstantin Belousov if (cur == NULL || cur->rl_q_start > e->rl_q_end) 2615badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 2625badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 2635badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 2645badbeeaSKonstantin Belousov next = rl_e_unmark(next); 2655badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 2665badbeeaSKonstantin Belousov cur->rl_q_free = *free; 2675badbeeaSKonstantin Belousov *free = cur; 2685badbeeaSKonstantin Belousov } 2695badbeeaSKonstantin Belousov cur = next; 2705badbeeaSKonstantin Belousov continue; 2715badbeeaSKonstantin Belousov } 2725badbeeaSKonstantin Belousov if (rl_e_is_rlock(cur)) { 2735badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 2745badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 2755badbeeaSKonstantin Belousov continue; 2765badbeeaSKonstantin Belousov } 2775badbeeaSKonstantin Belousov if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 2785badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 2795badbeeaSKonstantin Belousov if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 2805badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 2815badbeeaSKonstantin Belousov continue; 2825badbeeaSKonstantin Belousov } 2835badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 2845badbeeaSKonstantin Belousov if (trylock) { 2855badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 2865badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 2875badbeeaSKonstantin Belousov } 2885badbeeaSKonstantin Belousov rl_insert_sleep(lock); 2895badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 2905badbeeaSKonstantin Belousov } 2915badbeeaSKonstantin Belousov } 2925badbeeaSKonstantin Belousov } 2935badbeeaSKonstantin Belousov 2945badbeeaSKonstantin Belousov static enum RL_INSERT_RES 2955badbeeaSKonstantin Belousov rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, 2965badbeeaSKonstantin Belousov bool trylock, struct rl_q_entry **free) 2975badbeeaSKonstantin Belousov { 2985badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 2995badbeeaSKonstantin Belousov 3005badbeeaSKonstantin Belousov prev = &lock->head; 3015badbeeaSKonstantin Belousov cur = rl_q_load(prev); 3025badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* head is not marked */ 3035badbeeaSKonstantin Belousov for (;;) { 3045badbeeaSKonstantin Belousov if (cur == e) 3055badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 3065badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 3075badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 3085badbeeaSKonstantin Belousov next = rl_e_unmark(next); 3095badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 3105badbeeaSKonstantin Belousov cur->rl_q_next = *free; 3115badbeeaSKonstantin Belousov *free = cur; 3125badbeeaSKonstantin Belousov } 3135badbeeaSKonstantin Belousov cur = next; 3145badbeeaSKonstantin Belousov continue; 3155badbeeaSKonstantin Belousov } 3165badbeeaSKonstantin Belousov if (cur->rl_q_end <= e->rl_q_start) { 3175badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 3185badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 3195badbeeaSKonstantin Belousov continue; 3205badbeeaSKonstantin Belousov } 3215badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 3225badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 3235badbeeaSKonstantin Belousov if (trylock) { 3245badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 3255badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 3265badbeeaSKonstantin Belousov } 3275badbeeaSKonstantin Belousov rl_insert_sleep(lock); 3285badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 3295badbeeaSKonstantin Belousov } 3305badbeeaSKonstantin Belousov } 3315badbeeaSKonstantin Belousov 3325badbeeaSKonstantin Belousov static enum RL_INSERT_RES 333c3d8a931SKonstantin Belousov rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 334c3d8a931SKonstantin Belousov struct rl_q_entry **free) 335c3d8a931SKonstantin Belousov { 336c3d8a931SKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 337c3d8a931SKonstantin Belousov int r; 3388f0e9130SKonstantin Belousov 339c3d8a931SKonstantin Belousov again: 340c3d8a931SKonstantin Belousov prev = &lock->head; 3415badbeeaSKonstantin Belousov cur = rl_q_load(prev); 3425badbeeaSKonstantin Belousov if (cur == NULL && rl_q_cas(prev, NULL, e)) 3435badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 3448f0e9130SKonstantin Belousov 3455badbeeaSKonstantin Belousov for (;;) { 3465badbeeaSKonstantin Belousov if (cur != NULL) { 347c3d8a931SKonstantin Belousov if (rl_e_is_marked(cur)) 348c3d8a931SKonstantin Belousov goto again; 349c3d8a931SKonstantin Belousov 350c3d8a931SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 351c3d8a931SKonstantin Belousov if (rl_e_is_marked(next)) { 352c3d8a931SKonstantin Belousov next = rl_e_unmark(next); 353c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 354c3d8a931SKonstantin Belousov #ifdef INVARIANTS 355c3d8a931SKonstantin Belousov cur->rl_q_owner = NULL; 356c3d8a931SKonstantin Belousov #endif 357c3d8a931SKonstantin Belousov cur->rl_q_free = *free; 358c3d8a931SKonstantin Belousov *free = cur; 359c3d8a931SKonstantin Belousov } 360c3d8a931SKonstantin Belousov cur = next; 361c3d8a931SKonstantin Belousov continue; 362c3d8a931SKonstantin Belousov } 363c3d8a931SKonstantin Belousov } 364c3d8a931SKonstantin Belousov 365c3d8a931SKonstantin Belousov r = rl_e_compare(cur, e); 366c3d8a931SKonstantin Belousov if (r == -1) { 367c3d8a931SKonstantin Belousov prev = &cur->rl_q_next; 368c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 369c3d8a931SKonstantin Belousov } else if (r == 0) { 370c3d8a931SKonstantin Belousov sleepq_lock(&lock->sleepers); 371c3d8a931SKonstantin Belousov if (__predict_false(rl_e_is_marked(rl_q_load( 372c3d8a931SKonstantin Belousov &cur->rl_q_next)))) { 373c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 374c3d8a931SKonstantin Belousov continue; 375c3d8a931SKonstantin Belousov } 376e3680954SRick Macklem if (trylock) { 377c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 3785badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 379e3680954SRick Macklem } 380c3d8a931SKonstantin Belousov rl_insert_sleep(lock); 381c3d8a931SKonstantin Belousov /* e is still valid */ 382c3d8a931SKonstantin Belousov goto again; 383c3d8a931SKonstantin Belousov } else /* r == 1 */ { 384c3d8a931SKonstantin Belousov e->rl_q_next = cur; 385c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, e)) { 386c3d8a931SKonstantin Belousov atomic_thread_fence_acq(); 3875badbeeaSKonstantin Belousov return (rl_e_is_rlock(e) ? 3885badbeeaSKonstantin Belousov rl_r_validate(lock, e, trylock, free) : 3895badbeeaSKonstantin Belousov rl_w_validate(lock, e, trylock, free)); 390e3680954SRick Macklem } 391c3d8a931SKonstantin Belousov /* Reset rl_q_next in case we hit fast path. */ 392c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 393c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 394c3d8a931SKonstantin Belousov } 395c3d8a931SKonstantin Belousov } 396c3d8a931SKonstantin Belousov } 397c3d8a931SKonstantin Belousov 398c3d8a931SKonstantin Belousov static struct rl_q_entry * 3995badbeeaSKonstantin Belousov rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start, 4005badbeeaSKonstantin Belousov vm_ooffset_t end, int locktype) 401c3d8a931SKonstantin Belousov { 4025badbeeaSKonstantin Belousov struct rl_q_entry *e, *free, *x, *xp; 403*ff1ae3b3SKonstantin Belousov struct thread *td; 4045badbeeaSKonstantin Belousov enum RL_INSERT_RES res; 405c3d8a931SKonstantin Belousov 406*ff1ae3b3SKonstantin Belousov td = curthread; 4075badbeeaSKonstantin Belousov for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) { 408c3d8a931SKonstantin Belousov free = NULL; 4095badbeeaSKonstantin Belousov e = rlqentry_alloc(start, end, locktype); 410c3d8a931SKonstantin Belousov smr_enter(rl_smr); 411c3d8a931SKonstantin Belousov res = rl_insert(lock, e, trylock, &free); 412c3d8a931SKonstantin Belousov smr_exit(rl_smr); 4135badbeeaSKonstantin Belousov if (res == RL_TRYLOCK_FAILED) { 4145badbeeaSKonstantin Belousov MPASS(trylock); 415c3d8a931SKonstantin Belousov e->rl_q_free = free; 416c3d8a931SKonstantin Belousov free = e; 417c3d8a931SKonstantin Belousov e = NULL; 418c3d8a931SKonstantin Belousov } 419c3d8a931SKonstantin Belousov for (x = free; x != NULL; x = xp) { 420c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(x)); 421c3d8a931SKonstantin Belousov xp = x->rl_q_free; 422c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(xp)); 423*ff1ae3b3SKonstantin Belousov if (td->td_rlqe == NULL) { 424*ff1ae3b3SKonstantin Belousov smr_synchronize(rl_smr); 425*ff1ae3b3SKonstantin Belousov td->td_rlqe = x; 426*ff1ae3b3SKonstantin Belousov } else { 427c3d8a931SKonstantin Belousov uma_zfree_smr(rl_entry_zone, x); 428c3d8a931SKonstantin Belousov } 4295badbeeaSKonstantin Belousov } 430*ff1ae3b3SKonstantin Belousov } 431c3d8a931SKonstantin Belousov return (e); 4328f0e9130SKonstantin Belousov } 4338f0e9130SKonstantin Belousov 4348f0e9130SKonstantin Belousov void * 435c3d8a931SKonstantin Belousov rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 4368f0e9130SKonstantin Belousov { 4375badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ)); 438e3680954SRick Macklem } 439e3680954SRick Macklem 440e3680954SRick Macklem void * 441c3d8a931SKonstantin Belousov rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 442e3680954SRick Macklem { 4435badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ)); 4448f0e9130SKonstantin Belousov } 4458f0e9130SKonstantin Belousov 4468f0e9130SKonstantin Belousov void * 447c3d8a931SKonstantin Belousov rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 4488f0e9130SKonstantin Belousov { 4495badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); 450e3680954SRick Macklem } 451e3680954SRick Macklem 452e3680954SRick Macklem void * 453c3d8a931SKonstantin Belousov rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 454e3680954SRick Macklem { 4555badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); 4568f0e9130SKonstantin Belousov } 4573155f2f0SKyle Evans 4583155f2f0SKyle Evans #ifdef INVARIANT_SUPPORT 4593155f2f0SKyle Evans void 4603155f2f0SKyle Evans _rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 4613155f2f0SKyle Evans { 4623155f2f0SKyle Evans } 4633155f2f0SKyle Evans #endif /* INVARIANT_SUPPORT */ 464c3d8a931SKonstantin Belousov 465c3d8a931SKonstantin Belousov #include "opt_ddb.h" 466c3d8a931SKonstantin Belousov #ifdef DDB 467c3d8a931SKonstantin Belousov #include <ddb/ddb.h> 468c3d8a931SKonstantin Belousov 469c3d8a931SKonstantin Belousov DB_SHOW_COMMAND(rangelock, db_show_rangelock) 470c3d8a931SKonstantin Belousov { 471c3d8a931SKonstantin Belousov struct rangelock *lock; 472c3d8a931SKonstantin Belousov struct rl_q_entry *e, *x; 473c3d8a931SKonstantin Belousov 474c3d8a931SKonstantin Belousov if (!have_addr) { 475c3d8a931SKonstantin Belousov db_printf("show rangelock addr\n"); 476c3d8a931SKonstantin Belousov return; 477c3d8a931SKonstantin Belousov } 478c3d8a931SKonstantin Belousov 479c3d8a931SKonstantin Belousov lock = (struct rangelock *)addr; 480c3d8a931SKonstantin Belousov db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers); 481c3d8a931SKonstantin Belousov for (e = lock->head;;) { 482c3d8a931SKonstantin Belousov x = rl_e_is_marked(e) ? rl_e_unmark(e) : e; 483c3d8a931SKonstantin Belousov if (x == NULL) 484c3d8a931SKonstantin Belousov break; 485c3d8a931SKonstantin Belousov db_printf(" entry %p marked %d %d start %#jx end %#jx " 486c3d8a931SKonstantin Belousov "flags %x next %p", 487c3d8a931SKonstantin Belousov e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next), 488c3d8a931SKonstantin Belousov x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next); 489c3d8a931SKonstantin Belousov #ifdef INVARIANTS 490c3d8a931SKonstantin Belousov db_printf(" owner %p (%d)", x->rl_q_owner, 491c3d8a931SKonstantin Belousov x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1); 492c3d8a931SKonstantin Belousov #endif 493c3d8a931SKonstantin Belousov db_printf("\n"); 494c3d8a931SKonstantin Belousov e = x->rl_q_next; 495c3d8a931SKonstantin Belousov } 496c3d8a931SKonstantin Belousov } 497c3d8a931SKonstantin Belousov 498c3d8a931SKonstantin Belousov #endif /* DDB */ 499