18f0e9130SKonstantin Belousov /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 38a36da99SPedro F. Giffuni * 48f0e9130SKonstantin Belousov * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 58f0e9130SKonstantin Belousov * All rights reserved. 68f0e9130SKonstantin Belousov * 78f0e9130SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 88f0e9130SKonstantin Belousov * modification, are permitted provided that the following conditions 98f0e9130SKonstantin Belousov * are met: 108f0e9130SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright 118f0e9130SKonstantin Belousov * notice unmodified, this list of conditions, and the following 128f0e9130SKonstantin Belousov * disclaimer. 138f0e9130SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright 148f0e9130SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 158f0e9130SKonstantin Belousov * documentation and/or other materials provided with the distribution. 168f0e9130SKonstantin Belousov * 178f0e9130SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 188f0e9130SKonstantin Belousov * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 198f0e9130SKonstantin Belousov * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 208f0e9130SKonstantin Belousov * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 218f0e9130SKonstantin Belousov * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 228f0e9130SKonstantin Belousov * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 238f0e9130SKonstantin Belousov * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 248f0e9130SKonstantin Belousov * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 258f0e9130SKonstantin Belousov * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 268f0e9130SKonstantin Belousov * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 278f0e9130SKonstantin Belousov */ 288f0e9130SKonstantin Belousov 298f0e9130SKonstantin Belousov #include <sys/param.h> 30c3d8a931SKonstantin Belousov #include <sys/kassert.h> 318f0e9130SKonstantin Belousov #include <sys/kernel.h> 328f0e9130SKonstantin Belousov #include <sys/lock.h> 338f0e9130SKonstantin Belousov #include <sys/mutex.h> 348f0e9130SKonstantin Belousov #include <sys/proc.h> 358f0e9130SKonstantin Belousov #include <sys/rangelock.h> 36c3d8a931SKonstantin Belousov #include <sys/sleepqueue.h> 37c3d8a931SKonstantin Belousov #include <sys/smr.h> 388f0e9130SKonstantin Belousov 398f0e9130SKonstantin Belousov #include <vm/uma.h> 408f0e9130SKonstantin Belousov 41c3d8a931SKonstantin Belousov /* 42c3d8a931SKonstantin Belousov * Implementation of range locks based on the paper 43c3d8a931SKonstantin Belousov * https://doi.org/10.1145/3342195.3387533 44c3d8a931SKonstantin Belousov * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020 45c3d8a931SKonstantin Belousov * Scalable Range Locks for Scalable Address Spaces and Beyond 46c3d8a931SKonstantin Belousov * by Alex Kogan, Dave Dice, and Shady Issa 47c3d8a931SKonstantin Belousov */ 48c3d8a931SKonstantin Belousov 49c3d8a931SKonstantin Belousov static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e); 50c3d8a931SKonstantin Belousov 51c3d8a931SKonstantin Belousov /* 52c3d8a931SKonstantin Belousov * rl_q_next links all granted ranges in the lock. We cannot free an 53c3d8a931SKonstantin Belousov * rl_q_entry while in the smr section, and cannot reuse rl_q_next 54c3d8a931SKonstantin Belousov * linkage since other threads might follow it even after CAS removed 55c3d8a931SKonstantin Belousov * the range. Use rl_q_free for local list of ranges to remove after 56c3d8a931SKonstantin Belousov * the smr section is dropped. 57c3d8a931SKonstantin Belousov */ 588f0e9130SKonstantin Belousov struct rl_q_entry { 59c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_next; 60c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_free; 618f0e9130SKonstantin Belousov off_t rl_q_start, rl_q_end; 628f0e9130SKonstantin Belousov int rl_q_flags; 63c3d8a931SKonstantin Belousov #ifdef INVARIANTS 64c3d8a931SKonstantin Belousov struct thread *rl_q_owner; 65c3d8a931SKonstantin Belousov #endif 668f0e9130SKonstantin Belousov }; 678f0e9130SKonstantin Belousov 688f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone; 69c3d8a931SKonstantin Belousov static smr_t rl_smr; 708f0e9130SKonstantin Belousov 718f0e9130SKonstantin Belousov static void 728f0e9130SKonstantin Belousov rangelock_sys_init(void) 738f0e9130SKonstantin Belousov { 748f0e9130SKonstantin Belousov rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 75c3d8a931SKonstantin Belousov NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry), 76c3d8a931SKonstantin Belousov UMA_ZONE_SMR); 77c3d8a931SKonstantin Belousov rl_smr = uma_zone_get_smr(rl_entry_zone); 788f0e9130SKonstantin Belousov } 79c3d8a931SKonstantin Belousov SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 808f0e9130SKonstantin Belousov 818f0e9130SKonstantin Belousov static struct rl_q_entry * 82c3d8a931SKonstantin Belousov rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags) 838f0e9130SKonstantin Belousov { 84c3d8a931SKonstantin Belousov struct rl_q_entry *e; 858f0e9130SKonstantin Belousov 86c3d8a931SKonstantin Belousov e = uma_zalloc_smr(rl_entry_zone, M_WAITOK); 87c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 88c3d8a931SKonstantin Belousov e->rl_q_free = NULL; 89c3d8a931SKonstantin Belousov e->rl_q_start = start; 90c3d8a931SKonstantin Belousov e->rl_q_end = end; 91c3d8a931SKonstantin Belousov e->rl_q_flags = flags; 92c3d8a931SKonstantin Belousov #ifdef INVARIANTS 93c3d8a931SKonstantin Belousov e->rl_q_owner = curthread; 94c3d8a931SKonstantin Belousov #endif 95c3d8a931SKonstantin Belousov return (e); 968f0e9130SKonstantin Belousov } 978f0e9130SKonstantin Belousov 988f0e9130SKonstantin Belousov void 998f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock) 1008f0e9130SKonstantin Belousov { 101c3d8a931SKonstantin Belousov lock->sleepers = false; 102c3d8a931SKonstantin Belousov atomic_store_ptr(&lock->head, NULL); 1038f0e9130SKonstantin Belousov } 1048f0e9130SKonstantin Belousov 1058f0e9130SKonstantin Belousov void 1068f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock) 1078f0e9130SKonstantin Belousov { 108c3d8a931SKonstantin Belousov struct rl_q_entry *e, *ep; 1098f0e9130SKonstantin Belousov 110c3d8a931SKonstantin Belousov MPASS(!lock->sleepers); 111c3d8a931SKonstantin Belousov for (e = (struct rl_q_entry *)atomic_load_ptr(&lock->head); 112c3d8a931SKonstantin Belousov e != NULL; e = rl_e_unmark(ep)) { 113c3d8a931SKonstantin Belousov ep = atomic_load_ptr(&e->rl_q_next); 114c3d8a931SKonstantin Belousov uma_zfree_smr(rl_entry_zone, e); 115c3d8a931SKonstantin Belousov } 1168f0e9130SKonstantin Belousov } 1178f0e9130SKonstantin Belousov 118c3d8a931SKonstantin Belousov static bool 119c3d8a931SKonstantin Belousov rl_e_is_marked(const struct rl_q_entry *e) 1208f0e9130SKonstantin Belousov { 121c3d8a931SKonstantin Belousov return (((uintptr_t)e & 1) != 0); 1228f0e9130SKonstantin Belousov } 1238f0e9130SKonstantin Belousov 124c3d8a931SKonstantin Belousov static struct rl_q_entry * 125*5badbeeaSKonstantin Belousov rl_e_unmark_unchecked(const struct rl_q_entry *e) 126*5badbeeaSKonstantin Belousov { 127*5badbeeaSKonstantin Belousov return ((struct rl_q_entry *)((uintptr_t)e & ~1)); 128*5badbeeaSKonstantin Belousov } 129*5badbeeaSKonstantin Belousov 130*5badbeeaSKonstantin Belousov static struct rl_q_entry * 131c3d8a931SKonstantin Belousov rl_e_unmark(const struct rl_q_entry *e) 1328f0e9130SKonstantin Belousov { 133c3d8a931SKonstantin Belousov MPASS(rl_e_is_marked(e)); 134*5badbeeaSKonstantin Belousov return (rl_e_unmark_unchecked(e)); 135*5badbeeaSKonstantin Belousov } 136*5badbeeaSKonstantin Belousov 137*5badbeeaSKonstantin Belousov static void 138*5badbeeaSKonstantin Belousov rl_e_mark(struct rl_q_entry *e) 139*5badbeeaSKonstantin Belousov { 140*5badbeeaSKonstantin Belousov #if defined(INVARIANTS) && defined(__LP64__) 141*5badbeeaSKonstantin Belousov int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0); 142*5badbeeaSKonstantin Belousov MPASS(r == 0); 143*5badbeeaSKonstantin Belousov #else 144*5badbeeaSKonstantin Belousov atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1); 145*5badbeeaSKonstantin Belousov #endif 1462bb93f2dSColin Percival } 1472bb93f2dSColin Percival 148c3d8a931SKonstantin Belousov static struct rl_q_entry * 149c3d8a931SKonstantin Belousov rl_q_load(struct rl_q_entry **p) 1508f0e9130SKonstantin Belousov { 151c3d8a931SKonstantin Belousov return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p)); 1528f0e9130SKonstantin Belousov } 1538f0e9130SKonstantin Belousov 1546c32d89eSKonstantin Belousov static bool 1556c32d89eSKonstantin Belousov rl_e_is_rlock(const struct rl_q_entry *e) 1566c32d89eSKonstantin Belousov { 1576c32d89eSKonstantin Belousov return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ); 1586c32d89eSKonstantin Belousov } 1596c32d89eSKonstantin Belousov 160*5badbeeaSKonstantin Belousov static void 161*5badbeeaSKonstantin Belousov rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e) 1628f0e9130SKonstantin Belousov { 163c3d8a931SKonstantin Belousov MPASS(lock != NULL && e != NULL); 164c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next))); 165c3d8a931SKonstantin Belousov MPASS(e->rl_q_owner == curthread); 1668f0e9130SKonstantin Belousov 167*5badbeeaSKonstantin Belousov rl_e_mark(e); 168c3d8a931SKonstantin Belousov lock->sleepers = false; 169c3d8a931SKonstantin Belousov sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0); 170*5badbeeaSKonstantin Belousov } 171*5badbeeaSKonstantin Belousov 172*5badbeeaSKonstantin Belousov void 173*5badbeeaSKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie) 174*5badbeeaSKonstantin Belousov { 175*5badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 176*5badbeeaSKonstantin Belousov rangelock_unlock_int(lock, cookie); 177c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 1788f0e9130SKonstantin Belousov } 1798f0e9130SKonstantin Belousov 1808f0e9130SKonstantin Belousov /* 181*5badbeeaSKonstantin Belousov * result: -1 if e1 before e2, or both locks are readers and e1 182*5badbeeaSKonstantin Belousov * starts before or at e2 183*5badbeeaSKonstantin Belousov * 1 if e1 after e2, or both locks are readers and e1 184*5badbeeaSKonstantin Belousov * starts after e2 185*5badbeeaSKonstantin Belousov * 0 if e1 and e2 overlap and at least one lock is writer 1868f0e9130SKonstantin Belousov */ 187c3d8a931SKonstantin Belousov static int 188c3d8a931SKonstantin Belousov rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2) 1898f0e9130SKonstantin Belousov { 190*5badbeeaSKonstantin Belousov bool rds; 191*5badbeeaSKonstantin Belousov 192c3d8a931SKonstantin Belousov if (e1 == NULL) 193c3d8a931SKonstantin Belousov return (1); 194c3d8a931SKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_end) 195c3d8a931SKonstantin Belousov return (-1); 196*5badbeeaSKonstantin Belousov rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2); 197*5badbeeaSKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_start && rds) 198*5badbeeaSKonstantin Belousov return (-1); 199*5badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_end) 200*5badbeeaSKonstantin Belousov return (1); 201*5badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_start && rds) 202*5badbeeaSKonstantin Belousov return (1); 203c3d8a931SKonstantin Belousov return (0); 2048f0e9130SKonstantin Belousov } 2058f0e9130SKonstantin Belousov 206c3d8a931SKonstantin Belousov static void 207c3d8a931SKonstantin Belousov rl_insert_sleep(struct rangelock *lock) 2088f0e9130SKonstantin Belousov { 209c3d8a931SKonstantin Belousov smr_exit(rl_smr); 210c3d8a931SKonstantin Belousov DROP_GIANT(); 211c3d8a931SKonstantin Belousov lock->sleepers = true; 212c3d8a931SKonstantin Belousov sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0); 213c3d8a931SKonstantin Belousov sleepq_wait(&lock->sleepers, PRI_USER); 214c3d8a931SKonstantin Belousov PICKUP_GIANT(); 215c3d8a931SKonstantin Belousov smr_enter(rl_smr); 216c3d8a931SKonstantin Belousov } 2178f0e9130SKonstantin Belousov 218c3d8a931SKonstantin Belousov static bool 219c3d8a931SKonstantin Belousov rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old, 220c3d8a931SKonstantin Belousov struct rl_q_entry *new) 221c3d8a931SKonstantin Belousov { 222c3d8a931SKonstantin Belousov return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old, 223c3d8a931SKonstantin Belousov (uintptr_t)new) != 0); 224c3d8a931SKonstantin Belousov } 2258f0e9130SKonstantin Belousov 226*5badbeeaSKonstantin Belousov enum RL_INSERT_RES { 227*5badbeeaSKonstantin Belousov RL_TRYLOCK_FAILED, 228*5badbeeaSKonstantin Belousov RL_LOCK_SUCCESS, 229*5badbeeaSKonstantin Belousov RL_LOCK_RETRY, 230*5badbeeaSKonstantin Belousov }; 231*5badbeeaSKonstantin Belousov 232*5badbeeaSKonstantin Belousov static enum RL_INSERT_RES 233*5badbeeaSKonstantin Belousov rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 234*5badbeeaSKonstantin Belousov struct rl_q_entry **free) 235*5badbeeaSKonstantin Belousov { 236*5badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 237*5badbeeaSKonstantin Belousov 238*5badbeeaSKonstantin Belousov prev = &e->rl_q_next; 239*5badbeeaSKonstantin Belousov cur = rl_q_load(prev); 240*5badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */ 241*5badbeeaSKonstantin Belousov for (;;) { 242*5badbeeaSKonstantin Belousov if (cur == NULL || cur->rl_q_start > e->rl_q_end) 243*5badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 244*5badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 245*5badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 246*5badbeeaSKonstantin Belousov next = rl_e_unmark(next); 247*5badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 248*5badbeeaSKonstantin Belousov cur->rl_q_free = *free; 249*5badbeeaSKonstantin Belousov *free = cur; 250*5badbeeaSKonstantin Belousov } 251*5badbeeaSKonstantin Belousov cur = next; 252*5badbeeaSKonstantin Belousov continue; 253*5badbeeaSKonstantin Belousov } 254*5badbeeaSKonstantin Belousov if (rl_e_is_rlock(cur)) { 255*5badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 256*5badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 257*5badbeeaSKonstantin Belousov continue; 258*5badbeeaSKonstantin Belousov } 259*5badbeeaSKonstantin Belousov if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 260*5badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 261*5badbeeaSKonstantin Belousov if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 262*5badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 263*5badbeeaSKonstantin Belousov continue; 264*5badbeeaSKonstantin Belousov } 265*5badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 266*5badbeeaSKonstantin Belousov if (trylock) { 267*5badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 268*5badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 269*5badbeeaSKonstantin Belousov } 270*5badbeeaSKonstantin Belousov rl_insert_sleep(lock); 271*5badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 272*5badbeeaSKonstantin Belousov } 273*5badbeeaSKonstantin Belousov } 274*5badbeeaSKonstantin Belousov } 275*5badbeeaSKonstantin Belousov 276*5badbeeaSKonstantin Belousov static enum RL_INSERT_RES 277*5badbeeaSKonstantin Belousov rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, 278*5badbeeaSKonstantin Belousov bool trylock, struct rl_q_entry **free) 279*5badbeeaSKonstantin Belousov { 280*5badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 281*5badbeeaSKonstantin Belousov 282*5badbeeaSKonstantin Belousov prev = &lock->head; 283*5badbeeaSKonstantin Belousov cur = rl_q_load(prev); 284*5badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* head is not marked */ 285*5badbeeaSKonstantin Belousov for (;;) { 286*5badbeeaSKonstantin Belousov if (cur == e) 287*5badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 288*5badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 289*5badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 290*5badbeeaSKonstantin Belousov next = rl_e_unmark(next); 291*5badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 292*5badbeeaSKonstantin Belousov cur->rl_q_next = *free; 293*5badbeeaSKonstantin Belousov *free = cur; 294*5badbeeaSKonstantin Belousov } 295*5badbeeaSKonstantin Belousov cur = next; 296*5badbeeaSKonstantin Belousov continue; 297*5badbeeaSKonstantin Belousov } 298*5badbeeaSKonstantin Belousov if (cur->rl_q_end <= e->rl_q_start) { 299*5badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 300*5badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 301*5badbeeaSKonstantin Belousov continue; 302*5badbeeaSKonstantin Belousov } 303*5badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 304*5badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 305*5badbeeaSKonstantin Belousov if (trylock) { 306*5badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 307*5badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 308*5badbeeaSKonstantin Belousov } 309*5badbeeaSKonstantin Belousov rl_insert_sleep(lock); 310*5badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 311*5badbeeaSKonstantin Belousov } 312*5badbeeaSKonstantin Belousov } 313*5badbeeaSKonstantin Belousov 314*5badbeeaSKonstantin Belousov static enum RL_INSERT_RES 315c3d8a931SKonstantin Belousov rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 316c3d8a931SKonstantin Belousov struct rl_q_entry **free) 317c3d8a931SKonstantin Belousov { 318c3d8a931SKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 319c3d8a931SKonstantin Belousov int r; 3208f0e9130SKonstantin Belousov 321c3d8a931SKonstantin Belousov again: 322c3d8a931SKonstantin Belousov prev = &lock->head; 323*5badbeeaSKonstantin Belousov cur = rl_q_load(prev); 324*5badbeeaSKonstantin Belousov if (cur == NULL && rl_q_cas(prev, NULL, e)) 325*5badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 3268f0e9130SKonstantin Belousov 327*5badbeeaSKonstantin Belousov for (;;) { 328*5badbeeaSKonstantin Belousov if (cur != NULL) { 329c3d8a931SKonstantin Belousov if (rl_e_is_marked(cur)) 330c3d8a931SKonstantin Belousov goto again; 331c3d8a931SKonstantin Belousov 332c3d8a931SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 333c3d8a931SKonstantin Belousov if (rl_e_is_marked(next)) { 334c3d8a931SKonstantin Belousov next = rl_e_unmark(next); 335c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 336c3d8a931SKonstantin Belousov #ifdef INVARIANTS 337c3d8a931SKonstantin Belousov cur->rl_q_owner = NULL; 338c3d8a931SKonstantin Belousov #endif 339c3d8a931SKonstantin Belousov cur->rl_q_free = *free; 340c3d8a931SKonstantin Belousov *free = cur; 341c3d8a931SKonstantin Belousov } 342c3d8a931SKonstantin Belousov cur = next; 343c3d8a931SKonstantin Belousov continue; 344c3d8a931SKonstantin Belousov } 345c3d8a931SKonstantin Belousov } 346c3d8a931SKonstantin Belousov 347c3d8a931SKonstantin Belousov r = rl_e_compare(cur, e); 348c3d8a931SKonstantin Belousov if (r == -1) { 349c3d8a931SKonstantin Belousov prev = &cur->rl_q_next; 350c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 351c3d8a931SKonstantin Belousov } else if (r == 0) { 352c3d8a931SKonstantin Belousov sleepq_lock(&lock->sleepers); 353c3d8a931SKonstantin Belousov if (__predict_false(rl_e_is_marked(rl_q_load( 354c3d8a931SKonstantin Belousov &cur->rl_q_next)))) { 355c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 356c3d8a931SKonstantin Belousov continue; 357c3d8a931SKonstantin Belousov } 358e3680954SRick Macklem if (trylock) { 359c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 360*5badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 361e3680954SRick Macklem } 362c3d8a931SKonstantin Belousov rl_insert_sleep(lock); 363c3d8a931SKonstantin Belousov /* e is still valid */ 364c3d8a931SKonstantin Belousov goto again; 365c3d8a931SKonstantin Belousov } else /* r == 1 */ { 366c3d8a931SKonstantin Belousov e->rl_q_next = cur; 367c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, e)) { 368c3d8a931SKonstantin Belousov atomic_thread_fence_acq(); 369*5badbeeaSKonstantin Belousov return (rl_e_is_rlock(e) ? 370*5badbeeaSKonstantin Belousov rl_r_validate(lock, e, trylock, free) : 371*5badbeeaSKonstantin Belousov rl_w_validate(lock, e, trylock, free)); 372e3680954SRick Macklem } 373c3d8a931SKonstantin Belousov /* Reset rl_q_next in case we hit fast path. */ 374c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 375c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 376c3d8a931SKonstantin Belousov } 377c3d8a931SKonstantin Belousov } 378c3d8a931SKonstantin Belousov } 379c3d8a931SKonstantin Belousov 380c3d8a931SKonstantin Belousov static struct rl_q_entry * 381*5badbeeaSKonstantin Belousov rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start, 382*5badbeeaSKonstantin Belousov vm_ooffset_t end, int locktype) 383c3d8a931SKonstantin Belousov { 384*5badbeeaSKonstantin Belousov struct rl_q_entry *e, *free, *x, *xp; 385*5badbeeaSKonstantin Belousov enum RL_INSERT_RES res; 386c3d8a931SKonstantin Belousov 387*5badbeeaSKonstantin Belousov for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) { 388c3d8a931SKonstantin Belousov free = NULL; 389*5badbeeaSKonstantin Belousov e = rlqentry_alloc(start, end, locktype); 390c3d8a931SKonstantin Belousov smr_enter(rl_smr); 391c3d8a931SKonstantin Belousov res = rl_insert(lock, e, trylock, &free); 392c3d8a931SKonstantin Belousov smr_exit(rl_smr); 393*5badbeeaSKonstantin Belousov if (res == RL_TRYLOCK_FAILED) { 394*5badbeeaSKonstantin Belousov MPASS(trylock); 395c3d8a931SKonstantin Belousov e->rl_q_free = free; 396c3d8a931SKonstantin Belousov free = e; 397c3d8a931SKonstantin Belousov e = NULL; 398c3d8a931SKonstantin Belousov } 399c3d8a931SKonstantin Belousov for (x = free; x != NULL; x = xp) { 400c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(x)); 401c3d8a931SKonstantin Belousov xp = x->rl_q_free; 402c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(xp)); 403c3d8a931SKonstantin Belousov uma_zfree_smr(rl_entry_zone, x); 404c3d8a931SKonstantin Belousov } 405*5badbeeaSKonstantin Belousov } 406c3d8a931SKonstantin Belousov return (e); 4078f0e9130SKonstantin Belousov } 4088f0e9130SKonstantin Belousov 4098f0e9130SKonstantin Belousov void * 410c3d8a931SKonstantin Belousov rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 4118f0e9130SKonstantin Belousov { 412*5badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ)); 413e3680954SRick Macklem } 414e3680954SRick Macklem 415e3680954SRick Macklem void * 416c3d8a931SKonstantin Belousov rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 417e3680954SRick Macklem { 418*5badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ)); 4198f0e9130SKonstantin Belousov } 4208f0e9130SKonstantin Belousov 4218f0e9130SKonstantin Belousov void * 422c3d8a931SKonstantin Belousov rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 4238f0e9130SKonstantin Belousov { 424*5badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); 425e3680954SRick Macklem } 426e3680954SRick Macklem 427e3680954SRick Macklem void * 428c3d8a931SKonstantin Belousov rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 429e3680954SRick Macklem { 430*5badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); 4318f0e9130SKonstantin Belousov } 4323155f2f0SKyle Evans 4333155f2f0SKyle Evans #ifdef INVARIANT_SUPPORT 4343155f2f0SKyle Evans void 4353155f2f0SKyle Evans _rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 4363155f2f0SKyle Evans { 4373155f2f0SKyle Evans } 4383155f2f0SKyle Evans #endif /* INVARIANT_SUPPORT */ 439c3d8a931SKonstantin Belousov 440c3d8a931SKonstantin Belousov #include "opt_ddb.h" 441c3d8a931SKonstantin Belousov #ifdef DDB 442c3d8a931SKonstantin Belousov #include <ddb/ddb.h> 443c3d8a931SKonstantin Belousov 444c3d8a931SKonstantin Belousov DB_SHOW_COMMAND(rangelock, db_show_rangelock) 445c3d8a931SKonstantin Belousov { 446c3d8a931SKonstantin Belousov struct rangelock *lock; 447c3d8a931SKonstantin Belousov struct rl_q_entry *e, *x; 448c3d8a931SKonstantin Belousov 449c3d8a931SKonstantin Belousov if (!have_addr) { 450c3d8a931SKonstantin Belousov db_printf("show rangelock addr\n"); 451c3d8a931SKonstantin Belousov return; 452c3d8a931SKonstantin Belousov } 453c3d8a931SKonstantin Belousov 454c3d8a931SKonstantin Belousov lock = (struct rangelock *)addr; 455c3d8a931SKonstantin Belousov db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers); 456c3d8a931SKonstantin Belousov for (e = lock->head;;) { 457c3d8a931SKonstantin Belousov x = rl_e_is_marked(e) ? rl_e_unmark(e) : e; 458c3d8a931SKonstantin Belousov if (x == NULL) 459c3d8a931SKonstantin Belousov break; 460c3d8a931SKonstantin Belousov db_printf(" entry %p marked %d %d start %#jx end %#jx " 461c3d8a931SKonstantin Belousov "flags %x next %p", 462c3d8a931SKonstantin Belousov e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next), 463c3d8a931SKonstantin Belousov x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next); 464c3d8a931SKonstantin Belousov #ifdef INVARIANTS 465c3d8a931SKonstantin Belousov db_printf(" owner %p (%d)", x->rl_q_owner, 466c3d8a931SKonstantin Belousov x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1); 467c3d8a931SKonstantin Belousov #endif 468c3d8a931SKonstantin Belousov db_printf("\n"); 469c3d8a931SKonstantin Belousov e = x->rl_q_next; 470c3d8a931SKonstantin Belousov } 471c3d8a931SKonstantin Belousov } 472c3d8a931SKonstantin Belousov 473c3d8a931SKonstantin Belousov #endif /* DDB */ 474