1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 5 * Copyright (c) 2023 The FreeBSD Foundation 6 * 7 * Portions of this software were developed by 8 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from 9 * the FreeBSD Foundation. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice unmodified, this list of conditions, and the following 16 * disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/param.h> 34 #include <sys/kassert.h> 35 #include <sys/kernel.h> 36 #include <sys/lock.h> 37 #include <sys/mutex.h> 38 #include <sys/proc.h> 39 #include <sys/rangelock.h> 40 #include <sys/sleepqueue.h> 41 #include <sys/smr.h> 42 #include <sys/sysctl.h> 43 44 #include <vm/uma.h> 45 46 /* 47 * Immediately after initialization (subject to 'rangelock_cheat' 48 * below), and until a request comes that conflicts with granted ones 49 * based on type, rangelocks serve requests in the "cheating" mode. 50 * In this mode, a rangelock behaves like a sxlock, as if each request 51 * covered the whole range of the protected object. On receiving a 52 * conflicting request (any request while a write request is 53 * effective, or any write request while some read ones are 54 * effective), all requests granted in "cheating" mode are drained, 55 * and the rangelock then switches to effectively keeping track of the 56 * precise range of each new request. 57 * 58 * Normal sx implementation is not used to not bloat structures (most 59 * important, vnodes) which embeds rangelocks. 60 * 61 * The cheating greatly helps very common pattern where file is first 62 * written single-threaded, and then read by many processes. 63 * 64 * Lock is in cheat mode when RL_CHEAT_CHEATING bit is set in the 65 * lock->head. Special cookies are returned in this mode, and 66 * trylocks are same as normal locks but do not drain. 67 */ 68 69 static int rangelock_cheat = 1; 70 SYSCTL_INT(_debug, OID_AUTO, rangelock_cheat, CTLFLAG_RWTUN, 71 &rangelock_cheat, 0, 72 ""); 73 74 #define RL_CHEAT_MASK 0x7 75 #define RL_CHEAT_CHEATING 0x1 76 /* #define RL_CHEAT_RLOCKED 0x0 */ 77 #define RL_CHEAT_WLOCKED 0x2 78 #define RL_CHEAT_DRAINING 0x4 79 80 #define RL_CHEAT_READER 0x8 81 82 #define RL_RET_CHEAT_RLOCKED 0x1100 83 #define RL_RET_CHEAT_WLOCKED 0x2200 84 85 static void 86 rangelock_cheat_drain(struct rangelock *lock) 87 { 88 uintptr_t v; 89 90 DROP_GIANT(); 91 for (;;) { 92 v = atomic_load_ptr(&lock->head); 93 if ((v & RL_CHEAT_DRAINING) == 0) 94 break; 95 sleepq_add(&lock->head, NULL, "ranged1", 0, 0); 96 sleepq_wait(&lock->head, PRI_USER); 97 sleepq_lock(&lock->head); 98 } 99 sleepq_release(&lock->head); 100 PICKUP_GIANT(); 101 } 102 103 static bool 104 rangelock_cheat_lock(struct rangelock *lock, int locktype, bool trylock, 105 void **cookie) 106 { 107 uintptr_t v, x; 108 109 v = atomic_load_ptr(&lock->head); 110 if ((v & RL_CHEAT_CHEATING) == 0) 111 return (false); 112 if ((v & RL_CHEAT_DRAINING) != 0) { 113 drain: 114 if (trylock) { 115 *cookie = NULL; 116 return (true); 117 } 118 sleepq_lock(&lock->head); 119 drain1: 120 rangelock_cheat_drain(lock); 121 return (false); 122 } 123 124 switch (locktype) { 125 case RL_LOCK_READ: 126 for (;;) { 127 if ((v & RL_CHEAT_WLOCKED) != 0) { 128 if (trylock) { 129 *cookie = NULL; 130 return (true); 131 } 132 x = v | RL_CHEAT_DRAINING; 133 sleepq_lock(&lock->head); 134 if (atomic_fcmpset_rel_ptr(&lock->head, &v, 135 x) != 0) 136 goto drain1; 137 sleepq_release(&lock->head); 138 /* Possibly forgive passed conflict */ 139 } else { 140 x = (v & ~RL_CHEAT_MASK) + RL_CHEAT_READER; 141 x |= RL_CHEAT_CHEATING; 142 if (atomic_fcmpset_acq_ptr(&lock->head, &v, 143 x) != 0) 144 break; 145 } 146 if ((v & RL_CHEAT_CHEATING) == 0) 147 return (false); 148 if ((v & RL_CHEAT_DRAINING) != 0) 149 goto drain; 150 } 151 *(uintptr_t *)cookie = RL_RET_CHEAT_RLOCKED; 152 break; 153 case RL_LOCK_WRITE: 154 for (;;) { 155 if ((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER || 156 (v & RL_CHEAT_WLOCKED) != 0) { 157 if (trylock) { 158 *cookie = NULL; 159 return (true); 160 } 161 x = v | RL_CHEAT_DRAINING; 162 sleepq_lock(&lock->head); 163 if (atomic_fcmpset_rel_ptr(&lock->head, &v, 164 x) != 0) 165 goto drain1; 166 sleepq_release(&lock->head); 167 /* Possibly forgive passed conflict */ 168 } else { 169 x = RL_CHEAT_WLOCKED | RL_CHEAT_CHEATING; 170 if (atomic_fcmpset_acq_ptr(&lock->head, &v, 171 x) != 0) 172 break; 173 } 174 if ((v & RL_CHEAT_CHEATING) == 0) 175 return (false); 176 if ((v & RL_CHEAT_DRAINING) != 0) 177 goto drain; 178 } 179 *(uintptr_t *)cookie = RL_RET_CHEAT_WLOCKED; 180 break; 181 default: 182 __assert_unreachable(); 183 break; 184 } 185 return (true); 186 } 187 188 static bool 189 rangelock_cheat_unlock(struct rangelock *lock, void *cookie) 190 { 191 uintptr_t v, x; 192 193 v = atomic_load_ptr(&lock->head); 194 if ((v & RL_CHEAT_CHEATING) == 0) 195 return (false); 196 197 MPASS((uintptr_t)cookie == RL_RET_CHEAT_WLOCKED || 198 (uintptr_t)cookie == RL_RET_CHEAT_RLOCKED); 199 200 switch ((uintptr_t)cookie) { 201 case RL_RET_CHEAT_RLOCKED: 202 for (;;) { 203 MPASS((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER); 204 MPASS((v & RL_CHEAT_WLOCKED) == 0); 205 x = (v & ~RL_CHEAT_MASK) - RL_CHEAT_READER; 206 if ((v & RL_CHEAT_DRAINING) != 0) { 207 if (x != 0) { 208 x |= RL_CHEAT_DRAINING | 209 RL_CHEAT_CHEATING; 210 if (atomic_fcmpset_rel_ptr(&lock->head, 211 &v, x) != 0) 212 break; 213 } else { 214 sleepq_lock(&lock->head); 215 if (atomic_fcmpset_rel_ptr(&lock->head, 216 &v, x) != 0) { 217 sleepq_broadcast( 218 &lock->head, 219 SLEEPQ_SLEEP, 0, 0); 220 sleepq_release(&lock->head); 221 break; 222 } 223 sleepq_release(&lock->head); 224 } 225 } else { 226 x |= RL_CHEAT_CHEATING; 227 if (atomic_fcmpset_rel_ptr(&lock->head, &v, 228 x) != 0) 229 break; 230 } 231 } 232 break; 233 case RL_RET_CHEAT_WLOCKED: 234 for (;;) { 235 MPASS((v & RL_CHEAT_WLOCKED) != 0); 236 if ((v & RL_CHEAT_DRAINING) != 0) { 237 sleepq_lock(&lock->head); 238 atomic_store_ptr(&lock->head, 0); 239 sleepq_broadcast(&lock->head, 240 SLEEPQ_SLEEP, 0, 0); 241 sleepq_release(&lock->head); 242 break; 243 } else { 244 if (atomic_fcmpset_ptr(&lock->head, &v, 245 RL_CHEAT_CHEATING) != 0) 246 break; 247 } 248 } 249 break; 250 default: 251 __assert_unreachable(); 252 break; 253 } 254 return (true); 255 } 256 257 static bool 258 rangelock_cheat_destroy(struct rangelock *lock) 259 { 260 uintptr_t v; 261 262 v = atomic_load_ptr(&lock->head); 263 if ((v & RL_CHEAT_CHEATING) == 0) 264 return (false); 265 MPASS(v == RL_CHEAT_CHEATING); 266 return (true); 267 } 268 269 /* 270 * Implementation of range locks based on the paper 271 * https://doi.org/10.1145/3342195.3387533 272 * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020 273 * Scalable Range Locks for Scalable Address Spaces and Beyond 274 * by Alex Kogan, Dave Dice, and Shady Issa 275 */ 276 277 static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e); 278 279 /* 280 * rl_q_next links all granted ranges in the lock. We cannot free an 281 * rl_q_entry while in the smr section, and cannot reuse rl_q_next 282 * linkage since other threads might follow it even after CAS removed 283 * the range. Use rl_q_free for local list of ranges to remove after 284 * the smr section is dropped. 285 */ 286 struct rl_q_entry { 287 struct rl_q_entry *rl_q_next; 288 struct rl_q_entry *rl_q_free; 289 off_t rl_q_start, rl_q_end; 290 int rl_q_flags; 291 #ifdef INVARIANTS 292 struct thread *rl_q_owner; 293 #endif 294 }; 295 296 static uma_zone_t rl_entry_zone; 297 static smr_t rl_smr; 298 299 static void rangelock_free_free(struct rl_q_entry *free); 300 static void rangelock_noncheating_destroy(struct rangelock *lock); 301 302 static void 303 rangelock_sys_init(void) 304 { 305 rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 306 NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry), 307 UMA_ZONE_SMR); 308 rl_smr = uma_zone_get_smr(rl_entry_zone); 309 } 310 SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 311 312 static struct rl_q_entry * 313 rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags) 314 { 315 struct rl_q_entry *e; 316 struct thread *td; 317 318 td = curthread; 319 if (td->td_rlqe != NULL) { 320 e = td->td_rlqe; 321 td->td_rlqe = NULL; 322 } else { 323 e = uma_zalloc_smr(rl_entry_zone, M_WAITOK); 324 } 325 e->rl_q_next = NULL; 326 e->rl_q_free = NULL; 327 e->rl_q_start = start; 328 e->rl_q_end = end; 329 e->rl_q_flags = flags; 330 #ifdef INVARIANTS 331 e->rl_q_owner = curthread; 332 #endif 333 return (e); 334 } 335 336 void 337 rangelock_entry_free(struct rl_q_entry *e) 338 { 339 uma_zfree_smr(rl_entry_zone, e); 340 } 341 342 void 343 rangelock_init(struct rangelock *lock) 344 { 345 lock->sleepers = false; 346 atomic_store_ptr(&lock->head, rangelock_cheat ? RL_CHEAT_CHEATING : 0); 347 } 348 349 void 350 rangelock_destroy(struct rangelock *lock) 351 { 352 MPASS(!lock->sleepers); 353 if (!rangelock_cheat_destroy(lock)) 354 rangelock_noncheating_destroy(lock); 355 DEBUG_POISON_POINTER(*(void **)&lock->head); 356 } 357 358 static bool 359 rl_e_is_marked(const struct rl_q_entry *e) 360 { 361 return (((uintptr_t)e & 1) != 0); 362 } 363 364 static struct rl_q_entry * 365 rl_e_unmark_unchecked(const struct rl_q_entry *e) 366 { 367 return ((struct rl_q_entry *)((uintptr_t)e & ~1)); 368 } 369 370 static struct rl_q_entry * 371 rl_e_unmark(const struct rl_q_entry *e) 372 { 373 MPASS(rl_e_is_marked(e)); 374 return (rl_e_unmark_unchecked(e)); 375 } 376 377 static void 378 rl_e_mark(struct rl_q_entry *e) 379 { 380 #if defined(INVARIANTS) && defined(__LP64__) 381 int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0); 382 MPASS(r == 0); 383 #else 384 atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1); 385 #endif 386 } 387 388 static struct rl_q_entry * 389 rl_q_load(struct rl_q_entry **p) 390 { 391 return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p)); 392 } 393 394 static bool 395 rl_e_is_rlock(const struct rl_q_entry *e) 396 { 397 return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ); 398 } 399 400 static void 401 rangelock_free_free(struct rl_q_entry *free) 402 { 403 struct rl_q_entry *x, *xp; 404 struct thread *td; 405 406 td = curthread; 407 for (x = free; x != NULL; x = xp) { 408 MPASS(!rl_e_is_marked(x)); 409 xp = x->rl_q_free; 410 MPASS(!rl_e_is_marked(xp)); 411 if (td->td_rlqe == NULL) { 412 smr_synchronize(rl_smr); 413 td->td_rlqe = x; 414 } else { 415 uma_zfree_smr(rl_entry_zone, x); 416 } 417 } 418 } 419 420 static void 421 rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e) 422 { 423 bool sleepers; 424 425 MPASS(lock != NULL && e != NULL); 426 MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next))); 427 MPASS(e->rl_q_owner == curthread); 428 429 rl_e_mark(e); 430 sleepers = lock->sleepers; 431 lock->sleepers = false; 432 if (sleepers) 433 sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0); 434 } 435 436 void 437 rangelock_unlock(struct rangelock *lock, void *cookie) 438 { 439 if (rangelock_cheat_unlock(lock, cookie)) 440 return; 441 442 sleepq_lock(&lock->sleepers); 443 rangelock_unlock_int(lock, cookie); 444 sleepq_release(&lock->sleepers); 445 } 446 447 /* 448 * result: -1 if e1 before e2, or both locks are readers and e1 449 * starts before or at e2 450 * 1 if e1 after e2, or both locks are readers and e1 451 * starts after e2 452 * 0 if e1 and e2 overlap and at least one lock is writer 453 */ 454 static int 455 rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2) 456 { 457 bool rds; 458 459 if (e1 == NULL) 460 return (1); 461 if (e2->rl_q_start >= e1->rl_q_end) 462 return (-1); 463 rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2); 464 if (e2->rl_q_start >= e1->rl_q_start && rds) 465 return (-1); 466 if (e1->rl_q_start >= e2->rl_q_end) 467 return (1); 468 if (e1->rl_q_start >= e2->rl_q_start && rds) 469 return (1); 470 return (0); 471 } 472 473 static void 474 rl_insert_sleep(struct rangelock *lock) 475 { 476 smr_exit(rl_smr); 477 DROP_GIANT(); 478 lock->sleepers = true; 479 sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0); 480 sleepq_wait(&lock->sleepers, PRI_USER); 481 PICKUP_GIANT(); 482 smr_enter(rl_smr); 483 } 484 485 static bool 486 rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old, 487 struct rl_q_entry *new) 488 { 489 MPASS(!rl_e_is_marked(old)); 490 return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old, 491 (uintptr_t)new) != 0); 492 } 493 494 static void 495 rangelock_noncheating_destroy(struct rangelock *lock) 496 { 497 struct rl_q_entry *cur, *free, *next, **prev; 498 499 free = NULL; 500 again: 501 smr_enter(rl_smr); 502 prev = (struct rl_q_entry **)&lock->head; 503 cur = rl_q_load(prev); 504 MPASS(!rl_e_is_marked(cur)); 505 506 for (;;) { 507 if (cur == NULL) 508 break; 509 if (rl_e_is_marked(cur)) 510 goto again; 511 512 next = rl_q_load(&cur->rl_q_next); 513 if (rl_e_is_marked(next)) { 514 next = rl_e_unmark(next); 515 if (rl_q_cas(prev, cur, next)) { 516 #ifdef INVARIANTS 517 cur->rl_q_owner = NULL; 518 #endif 519 cur->rl_q_free = free; 520 free = cur; 521 cur = next; 522 continue; 523 } 524 smr_exit(rl_smr); 525 goto again; 526 } 527 528 sleepq_lock(&lock->sleepers); 529 if (!rl_e_is_marked(cur)) { 530 rl_insert_sleep(lock); 531 goto again; 532 } 533 } 534 smr_exit(rl_smr); 535 rangelock_free_free(free); 536 } 537 538 enum RL_INSERT_RES { 539 RL_TRYLOCK_FAILED, 540 RL_LOCK_SUCCESS, 541 RL_LOCK_RETRY, 542 }; 543 544 static enum RL_INSERT_RES 545 rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 546 struct rl_q_entry **free) 547 { 548 struct rl_q_entry *cur, *next, **prev; 549 550 again: 551 prev = &e->rl_q_next; 552 cur = rl_q_load(prev); 553 MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */ 554 for (;;) { 555 if (cur == NULL || cur->rl_q_start >= e->rl_q_end) 556 return (RL_LOCK_SUCCESS); 557 next = rl_q_load(&cur->rl_q_next); 558 if (rl_e_is_marked(next)) { 559 next = rl_e_unmark(next); 560 if (rl_q_cas(prev, cur, next)) { 561 cur->rl_q_free = *free; 562 *free = cur; 563 cur = next; 564 continue; 565 } 566 goto again; 567 } 568 if (rl_e_is_rlock(cur)) { 569 prev = &cur->rl_q_next; 570 cur = rl_e_unmark_unchecked(rl_q_load(prev)); 571 continue; 572 } 573 if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 574 sleepq_lock(&lock->sleepers); 575 if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 576 sleepq_release(&lock->sleepers); 577 continue; 578 } 579 rangelock_unlock_int(lock, e); 580 if (trylock) { 581 sleepq_release(&lock->sleepers); 582 return (RL_TRYLOCK_FAILED); 583 } 584 rl_insert_sleep(lock); 585 return (RL_LOCK_RETRY); 586 } 587 } 588 } 589 590 static enum RL_INSERT_RES 591 rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, 592 bool trylock, struct rl_q_entry **free) 593 { 594 struct rl_q_entry *cur, *next, **prev; 595 596 again: 597 prev = (struct rl_q_entry **)&lock->head; 598 cur = rl_q_load(prev); 599 MPASS(!rl_e_is_marked(cur)); /* head is not marked */ 600 for (;;) { 601 if (cur == e) 602 return (RL_LOCK_SUCCESS); 603 next = rl_q_load(&cur->rl_q_next); 604 if (rl_e_is_marked(next)) { 605 next = rl_e_unmark(next); 606 if (rl_q_cas(prev, cur, next)) { 607 cur->rl_q_free = *free; 608 *free = cur; 609 cur = next; 610 continue; 611 } 612 goto again; 613 } 614 if (cur->rl_q_end <= e->rl_q_start) { 615 prev = &cur->rl_q_next; 616 cur = rl_e_unmark_unchecked(rl_q_load(prev)); 617 continue; 618 } 619 sleepq_lock(&lock->sleepers); 620 /* Reload after sleepq is locked */ 621 next = rl_q_load(&cur->rl_q_next); 622 if (rl_e_is_marked(next)) { 623 sleepq_release(&lock->sleepers); 624 goto again; 625 } 626 rangelock_unlock_int(lock, e); 627 if (trylock) { 628 sleepq_release(&lock->sleepers); 629 return (RL_TRYLOCK_FAILED); 630 } 631 rl_insert_sleep(lock); 632 return (RL_LOCK_RETRY); 633 } 634 } 635 636 static enum RL_INSERT_RES 637 rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 638 struct rl_q_entry **free) 639 { 640 struct rl_q_entry *cur, *next, **prev; 641 int r; 642 643 again: 644 prev = (struct rl_q_entry **)&lock->head; 645 cur = rl_q_load(prev); 646 if (cur == NULL && rl_q_cas(prev, NULL, e)) 647 return (RL_LOCK_SUCCESS); 648 649 for (;;) { 650 if (cur != NULL) { 651 if (rl_e_is_marked(cur)) 652 goto again; 653 654 next = rl_q_load(&cur->rl_q_next); 655 if (rl_e_is_marked(next)) { 656 next = rl_e_unmark(next); 657 if (rl_q_cas(prev, cur, next)) { 658 #ifdef INVARIANTS 659 cur->rl_q_owner = NULL; 660 #endif 661 cur->rl_q_free = *free; 662 *free = cur; 663 cur = next; 664 continue; 665 } 666 goto again; 667 } 668 } 669 670 MPASS(!rl_e_is_marked(cur)); 671 r = rl_e_compare(cur, e); 672 if (r == -1) { 673 prev = &cur->rl_q_next; 674 cur = rl_q_load(prev); 675 } else if (r == 0) { 676 sleepq_lock(&lock->sleepers); 677 if (__predict_false(rl_e_is_marked(rl_q_load( 678 &cur->rl_q_next)))) { 679 sleepq_release(&lock->sleepers); 680 continue; 681 } 682 if (trylock) { 683 sleepq_release(&lock->sleepers); 684 return (RL_TRYLOCK_FAILED); 685 } 686 rl_insert_sleep(lock); 687 /* e is still valid */ 688 goto again; 689 } else /* r == 1 */ { 690 e->rl_q_next = cur; 691 if (rl_q_cas(prev, cur, e)) { 692 atomic_thread_fence_acq(); 693 return (rl_e_is_rlock(e) ? 694 rl_r_validate(lock, e, trylock, free) : 695 rl_w_validate(lock, e, trylock, free)); 696 } 697 /* Reset rl_q_next in case we hit fast path. */ 698 e->rl_q_next = NULL; 699 cur = rl_q_load(prev); 700 } 701 } 702 } 703 704 static struct rl_q_entry * 705 rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start, 706 vm_ooffset_t end, int locktype) 707 { 708 struct rl_q_entry *e, *free; 709 void *cookie; 710 enum RL_INSERT_RES res; 711 712 if (rangelock_cheat_lock(lock, locktype, trylock, &cookie)) 713 return (cookie); 714 for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) { 715 free = NULL; 716 e = rlqentry_alloc(start, end, locktype); 717 smr_enter(rl_smr); 718 res = rl_insert(lock, e, trylock, &free); 719 smr_exit(rl_smr); 720 if (res == RL_TRYLOCK_FAILED) { 721 MPASS(trylock); 722 e->rl_q_free = free; 723 free = e; 724 e = NULL; 725 } 726 rangelock_free_free(free); 727 } 728 return (e); 729 } 730 731 void * 732 rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 733 { 734 return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ)); 735 } 736 737 void * 738 rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 739 { 740 return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ)); 741 } 742 743 void * 744 rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 745 { 746 return (rangelock_lock_int(lock, false, start, end, RL_LOCK_WRITE)); 747 } 748 749 void * 750 rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 751 { 752 return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); 753 } 754 755 /* 756 * If the caller asserts that it can obtain the range locks on the 757 * same lock simultaneously, switch to the non-cheat mode. Cheat mode 758 * cannot handle it, hanging in drain or trylock retries. 759 */ 760 void 761 rangelock_may_recurse(struct rangelock *lock) 762 { 763 uintptr_t v, x; 764 765 v = atomic_load_ptr(&lock->head); 766 if ((v & RL_CHEAT_CHEATING) == 0) 767 return; 768 769 sleepq_lock(&lock->head); 770 for (;;) { 771 if ((v & RL_CHEAT_CHEATING) == 0) { 772 sleepq_release(&lock->head); 773 return; 774 } 775 776 /* Cheating and locked, drain. */ 777 if ((v & RL_CHEAT_WLOCKED) != 0 || 778 (v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER) { 779 x = v | RL_CHEAT_DRAINING; 780 if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) { 781 rangelock_cheat_drain(lock); 782 return; 783 } 784 continue; 785 } 786 787 /* Cheating and unlocked, clear RL_CHEAT_CHEATING. */ 788 x = 0; 789 if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) { 790 sleepq_release(&lock->head); 791 return; 792 } 793 } 794 } 795 796 #ifdef INVARIANT_SUPPORT 797 void 798 _rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 799 { 800 } 801 #endif /* INVARIANT_SUPPORT */ 802 803 #include "opt_ddb.h" 804 #ifdef DDB 805 #include <ddb/ddb.h> 806 807 DB_SHOW_COMMAND(rangelock, db_show_rangelock) 808 { 809 struct rangelock *lock; 810 struct rl_q_entry *e, *x; 811 uintptr_t v; 812 813 if (!have_addr) { 814 db_printf("show rangelock addr\n"); 815 return; 816 } 817 818 lock = (struct rangelock *)addr; 819 db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers); 820 v = lock->head; 821 if ((v & RL_CHEAT_CHEATING) != 0) { 822 db_printf(" cheating head %#jx\n", (uintmax_t)v); 823 return; 824 } 825 for (e = (struct rl_q_entry *)(lock->head);;) { 826 x = rl_e_is_marked(e) ? rl_e_unmark(e) : e; 827 if (x == NULL) 828 break; 829 db_printf(" entry %p marked %d %d start %#jx end %#jx " 830 "flags %x next %p", 831 e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next), 832 x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next); 833 #ifdef INVARIANTS 834 db_printf(" owner %p (%d)", x->rl_q_owner, 835 x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1); 836 #endif 837 db_printf("\n"); 838 e = x->rl_q_next; 839 } 840 } 841 842 #endif /* DDB */ 843