1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 5 * Copyright (c) 2023 The FreeBSD Foundation 6 * 7 * Portions of this software were developed by 8 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from 9 * the FreeBSD Foundation. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice unmodified, this list of conditions, and the following 16 * disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/param.h> 34 #include <sys/kassert.h> 35 #include <sys/kernel.h> 36 #include <sys/lock.h> 37 #include <sys/mutex.h> 38 #include <sys/proc.h> 39 #include <sys/rangelock.h> 40 #include <sys/sleepqueue.h> 41 #include <sys/smr.h> 42 #include <sys/sysctl.h> 43 44 #include <vm/uma.h> 45 46 /* 47 * Immediately after initialization (subject to 'rangelock_cheat' 48 * below), and until a request comes that conflicts with granted ones 49 * based on type, rangelocks serve requests in the "cheating" mode. 50 * In this mode, a rangelock behaves like a sxlock, as if each request 51 * covered the whole range of the protected object. On receiving a 52 * conflicting request (any request while a write request is 53 * effective, or any write request while some read ones are 54 * effective), all requests granted in "cheating" mode are drained, 55 * and the rangelock then switches to effectively keeping track of the 56 * precise range of each new request. 57 * 58 * Normal sx implementation is not used to not bloat structures (most 59 * important, vnodes) which embeds rangelocks. 60 * 61 * The cheating greatly helps very common pattern where file is first 62 * written single-threaded, and then read by many processes. 63 * 64 * Lock is in cheat mode when RL_CHEAT_CHEATING bit is set in the 65 * lock->head. Special cookies are returned in this mode, and 66 * trylocks are same as normal locks but do not drain. 67 */ 68 69 static int rangelock_cheat = 1; 70 SYSCTL_INT(_debug, OID_AUTO, rangelock_cheat, CTLFLAG_RWTUN, 71 &rangelock_cheat, 0, 72 ""); 73 74 #define RL_CHEAT_MASK 0x7 75 #define RL_CHEAT_CHEATING 0x1 76 /* #define RL_CHEAT_RLOCKED 0x0 */ 77 #define RL_CHEAT_WLOCKED 0x2 78 #define RL_CHEAT_DRAINING 0x4 79 80 #define RL_CHEAT_READER 0x8 81 82 #define RL_RET_CHEAT_RLOCKED 0x1100 83 #define RL_RET_CHEAT_WLOCKED 0x2200 84 85 static void 86 rangelock_cheat_drain(struct rangelock *lock) 87 { 88 uintptr_t v; 89 90 DROP_GIANT(); 91 for (;;) { 92 v = atomic_load_ptr(&lock->head); 93 if ((v & RL_CHEAT_DRAINING) == 0) 94 break; 95 sleepq_add(&lock->head, NULL, "ranged1", 0, 0); 96 sleepq_wait(&lock->head, PRI_USER); 97 sleepq_lock(&lock->head); 98 } 99 sleepq_release(&lock->head); 100 PICKUP_GIANT(); 101 } 102 103 static bool 104 rangelock_cheat_lock(struct rangelock *lock, int locktype, bool trylock, 105 void **cookie) 106 { 107 uintptr_t v, x; 108 109 v = atomic_load_ptr(&lock->head); 110 if ((v & RL_CHEAT_CHEATING) == 0) 111 return (false); 112 if ((v & RL_CHEAT_DRAINING) != 0) { 113 drain: 114 if (trylock) { 115 *cookie = NULL; 116 return (true); 117 } 118 sleepq_lock(&lock->head); 119 drain1: 120 rangelock_cheat_drain(lock); 121 return (false); 122 } 123 124 switch (locktype) { 125 case RL_LOCK_READ: 126 for (;;) { 127 if ((v & RL_CHEAT_WLOCKED) != 0) { 128 if (trylock) { 129 *cookie = NULL; 130 return (true); 131 } 132 x = v | RL_CHEAT_DRAINING; 133 sleepq_lock(&lock->head); 134 if (atomic_fcmpset_rel_ptr(&lock->head, &v, 135 x) != 0) 136 goto drain1; 137 sleepq_release(&lock->head); 138 /* Possibly forgive passed conflict */ 139 } else { 140 x = (v & ~RL_CHEAT_MASK) + RL_CHEAT_READER; 141 x |= RL_CHEAT_CHEATING; 142 if (atomic_fcmpset_acq_ptr(&lock->head, &v, 143 x) != 0) 144 break; 145 } 146 if ((v & RL_CHEAT_CHEATING) == 0) 147 return (false); 148 if ((v & RL_CHEAT_DRAINING) != 0) 149 goto drain; 150 } 151 *(uintptr_t *)cookie = RL_RET_CHEAT_RLOCKED; 152 break; 153 case RL_LOCK_WRITE: 154 for (;;) { 155 if ((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER || 156 (v & RL_CHEAT_WLOCKED) != 0) { 157 if (trylock) { 158 *cookie = NULL; 159 return (true); 160 } 161 x = v | RL_CHEAT_DRAINING; 162 sleepq_lock(&lock->head); 163 if (atomic_fcmpset_rel_ptr(&lock->head, &v, 164 x) != 0) 165 goto drain1; 166 sleepq_release(&lock->head); 167 /* Possibly forgive passed conflict */ 168 } else { 169 x = RL_CHEAT_WLOCKED | RL_CHEAT_CHEATING; 170 if (atomic_fcmpset_acq_ptr(&lock->head, &v, 171 x) != 0) 172 break; 173 } 174 if ((v & RL_CHEAT_CHEATING) == 0) 175 return (false); 176 if ((v & RL_CHEAT_DRAINING) != 0) 177 goto drain; 178 } 179 *(uintptr_t *)cookie = RL_RET_CHEAT_WLOCKED; 180 break; 181 default: 182 __assert_unreachable(); 183 break; 184 } 185 return (true); 186 } 187 188 static bool 189 rangelock_cheat_unlock(struct rangelock *lock, void *cookie) 190 { 191 uintptr_t v, x; 192 193 v = atomic_load_ptr(&lock->head); 194 if ((v & RL_CHEAT_CHEATING) == 0) 195 return (false); 196 197 MPASS((uintptr_t)cookie == RL_RET_CHEAT_WLOCKED || 198 (uintptr_t)cookie == RL_RET_CHEAT_RLOCKED); 199 200 switch ((uintptr_t)cookie) { 201 case RL_RET_CHEAT_RLOCKED: 202 for (;;) { 203 MPASS((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER); 204 MPASS((v & RL_CHEAT_WLOCKED) == 0); 205 x = (v & ~RL_CHEAT_MASK) - RL_CHEAT_READER; 206 if ((v & RL_CHEAT_DRAINING) != 0) { 207 if (x != 0) { 208 x |= RL_CHEAT_DRAINING | 209 RL_CHEAT_CHEATING; 210 if (atomic_fcmpset_rel_ptr(&lock->head, 211 &v, x) != 0) 212 break; 213 } else { 214 sleepq_lock(&lock->head); 215 if (atomic_fcmpset_rel_ptr(&lock->head, 216 &v, x) != 0) { 217 sleepq_broadcast( 218 &lock->head, 219 SLEEPQ_SLEEP, 0, 0); 220 sleepq_release(&lock->head); 221 break; 222 } 223 sleepq_release(&lock->head); 224 } 225 } else { 226 x |= RL_CHEAT_CHEATING; 227 if (atomic_fcmpset_rel_ptr(&lock->head, &v, 228 x) != 0) 229 break; 230 } 231 } 232 break; 233 case RL_RET_CHEAT_WLOCKED: 234 for (;;) { 235 MPASS((v & RL_CHEAT_WLOCKED) != 0); 236 if ((v & RL_CHEAT_DRAINING) != 0) { 237 sleepq_lock(&lock->head); 238 atomic_store_ptr(&lock->head, 0); 239 sleepq_broadcast(&lock->head, 240 SLEEPQ_SLEEP, 0, 0); 241 sleepq_release(&lock->head); 242 break; 243 } else { 244 if (atomic_fcmpset_ptr(&lock->head, &v, 245 RL_CHEAT_CHEATING) != 0) 246 break; 247 } 248 } 249 break; 250 default: 251 __assert_unreachable(); 252 break; 253 } 254 return (true); 255 } 256 257 static bool 258 rangelock_cheat_destroy(struct rangelock *lock) 259 { 260 uintptr_t v; 261 262 v = atomic_load_ptr(&lock->head); 263 if ((v & RL_CHEAT_CHEATING) == 0) 264 return (false); 265 MPASS(v == RL_CHEAT_CHEATING); 266 return (true); 267 } 268 269 /* 270 * Implementation of range locks based on the paper 271 * https://doi.org/10.1145/3342195.3387533 272 * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020 273 * Scalable Range Locks for Scalable Address Spaces and Beyond 274 * by Alex Kogan, Dave Dice, and Shady Issa 275 */ 276 277 static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e); 278 279 /* 280 * rl_q_next links all granted ranges in the lock. We cannot free an 281 * rl_q_entry while in the smr section, and cannot reuse rl_q_next 282 * linkage since other threads might follow it even after CAS removed 283 * the range. Use rl_q_free for local list of ranges to remove after 284 * the smr section is dropped. 285 */ 286 struct rl_q_entry { 287 struct rl_q_entry *rl_q_next; 288 struct rl_q_entry *rl_q_free; 289 off_t rl_q_start, rl_q_end; 290 int rl_q_flags; 291 #ifdef INVARIANTS 292 struct thread *rl_q_owner; 293 #endif 294 }; 295 296 static uma_zone_t rl_entry_zone; 297 static smr_t rl_smr; 298 299 static void rangelock_free_free(struct rl_q_entry *free); 300 static void rangelock_noncheating_destroy(struct rangelock *lock); 301 302 static void 303 rangelock_sys_init(void) 304 { 305 rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 306 NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry), 307 UMA_ZONE_SMR); 308 rl_smr = uma_zone_get_smr(rl_entry_zone); 309 } 310 SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 311 312 static struct rl_q_entry * 313 rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags) 314 { 315 struct rl_q_entry *e; 316 317 e = uma_zalloc_smr(rl_entry_zone, M_WAITOK); 318 e->rl_q_next = NULL; 319 e->rl_q_free = NULL; 320 e->rl_q_start = start; 321 e->rl_q_end = end; 322 e->rl_q_flags = flags; 323 #ifdef INVARIANTS 324 e->rl_q_owner = curthread; 325 #endif 326 return (e); 327 } 328 329 void 330 rangelock_init(struct rangelock *lock) 331 { 332 lock->sleepers = false; 333 atomic_store_ptr(&lock->head, rangelock_cheat ? RL_CHEAT_CHEATING : 0); 334 } 335 336 void 337 rangelock_destroy(struct rangelock *lock) 338 { 339 MPASS(!lock->sleepers); 340 if (!rangelock_cheat_destroy(lock)) 341 rangelock_noncheating_destroy(lock); 342 DEBUG_POISON_POINTER(*(void **)&lock->head); 343 } 344 345 static bool 346 rl_e_is_marked(const struct rl_q_entry *e) 347 { 348 return (((uintptr_t)e & 1) != 0); 349 } 350 351 static struct rl_q_entry * 352 rl_e_unmark_unchecked(const struct rl_q_entry *e) 353 { 354 return ((struct rl_q_entry *)((uintptr_t)e & ~1)); 355 } 356 357 static struct rl_q_entry * 358 rl_e_unmark(const struct rl_q_entry *e) 359 { 360 MPASS(rl_e_is_marked(e)); 361 return (rl_e_unmark_unchecked(e)); 362 } 363 364 static void 365 rl_e_mark(struct rl_q_entry *e) 366 { 367 #if defined(INVARIANTS) 368 int r = atomic_testandset_ptr((uintptr_t *)&e->rl_q_next, 0); 369 MPASS(r == 0); 370 #else 371 atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1); 372 #endif 373 } 374 375 static struct rl_q_entry * 376 rl_q_load(struct rl_q_entry **p) 377 { 378 return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p)); 379 } 380 381 static bool 382 rl_e_is_rlock(const struct rl_q_entry *e) 383 { 384 return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ); 385 } 386 387 static void 388 rangelock_free_free(struct rl_q_entry *free) 389 { 390 struct rl_q_entry *x, *xp; 391 392 for (x = free; x != NULL; x = xp) { 393 MPASS(!rl_e_is_marked(x)); 394 xp = x->rl_q_free; 395 MPASS(!rl_e_is_marked(xp)); 396 uma_zfree_smr(rl_entry_zone, x); 397 } 398 } 399 400 static void 401 rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e) 402 { 403 bool sleepers; 404 405 MPASS(lock != NULL && e != NULL); 406 MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next))); 407 MPASS(e->rl_q_owner == curthread); 408 409 rl_e_mark(e); 410 sleepers = lock->sleepers; 411 lock->sleepers = false; 412 if (sleepers) 413 sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0); 414 } 415 416 void 417 rangelock_unlock(struct rangelock *lock, void *cookie) 418 { 419 if (rangelock_cheat_unlock(lock, cookie)) 420 return; 421 422 sleepq_lock(&lock->sleepers); 423 rangelock_unlock_int(lock, cookie); 424 sleepq_release(&lock->sleepers); 425 } 426 427 /* 428 * result: -1 if e1 before e2, or both locks are readers and e1 429 * starts before or at e2 430 * 1 if e1 after e2, or both locks are readers and e1 431 * starts after e2 432 * 0 if e1 and e2 overlap and at least one lock is writer 433 */ 434 static int 435 rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2) 436 { 437 bool rds; 438 439 if (e1 == NULL) 440 return (1); 441 if (e2->rl_q_start >= e1->rl_q_end) 442 return (-1); 443 rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2); 444 if (e2->rl_q_start >= e1->rl_q_start && rds) 445 return (-1); 446 if (e1->rl_q_start >= e2->rl_q_end) 447 return (1); 448 if (e1->rl_q_start >= e2->rl_q_start && rds) 449 return (1); 450 return (0); 451 } 452 453 static void 454 rl_insert_sleep(struct rangelock *lock) 455 { 456 smr_exit(rl_smr); 457 DROP_GIANT(); 458 lock->sleepers = true; 459 sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0); 460 sleepq_wait(&lock->sleepers, PRI_USER); 461 PICKUP_GIANT(); 462 smr_enter(rl_smr); 463 } 464 465 /* 466 * Try to insert an entry into the queue. Return true if successful, otherwise 467 * false. 468 */ 469 static bool 470 rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old, 471 struct rl_q_entry *new) 472 { 473 MPASS(!rl_e_is_marked(old)); 474 return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old, 475 (uintptr_t)new) != 0); 476 } 477 478 static void 479 rangelock_noncheating_destroy(struct rangelock *lock) 480 { 481 struct rl_q_entry *cur, *free, *next, **prev; 482 483 free = NULL; 484 again: 485 smr_enter(rl_smr); 486 prev = (struct rl_q_entry **)&lock->head; 487 cur = rl_q_load(prev); 488 MPASS(!rl_e_is_marked(cur)); 489 490 for (;;) { 491 if (cur == NULL) 492 break; 493 if (rl_e_is_marked(cur)) 494 goto again; 495 496 next = rl_q_load(&cur->rl_q_next); 497 if (rl_e_is_marked(next)) { 498 next = rl_e_unmark(next); 499 if (rl_q_cas(prev, cur, next)) { 500 #ifdef INVARIANTS 501 cur->rl_q_owner = NULL; 502 #endif 503 cur->rl_q_free = free; 504 free = cur; 505 cur = next; 506 continue; 507 } 508 smr_exit(rl_smr); 509 goto again; 510 } 511 512 sleepq_lock(&lock->sleepers); 513 if (!rl_e_is_marked(cur)) { 514 rl_insert_sleep(lock); 515 goto again; 516 } 517 } 518 smr_exit(rl_smr); 519 rangelock_free_free(free); 520 } 521 522 enum RL_INSERT_RES { 523 RL_TRYLOCK_FAILED, 524 RL_TRYLOCK_FAILED_MARKED, 525 RL_LOCK_SUCCESS, 526 RL_LOCK_RETRY, 527 }; 528 529 /* 530 * Handle a possible lock conflict between cur and e. "inserted" is true if e 531 * is already inserted into the queue. 532 */ 533 static enum RL_INSERT_RES 534 rl_conflict(struct rangelock *lock, struct rl_q_entry *cur, struct rl_q_entry *e, 535 bool trylock, bool inserted) 536 { 537 sleepq_lock(&lock->sleepers); 538 if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 539 sleepq_release(&lock->sleepers); 540 return (RL_LOCK_SUCCESS); /* no conflict after all */ 541 } 542 543 /* 544 * Make sure we're not conflicting with one of our own locks. This 545 * scenario could conceivably happen for one of two reasons: a bug in 546 * the rangelock consumer (if "inserted" is true), or a bug in the 547 * rangelock implementation itself (if "inserted" is false). 548 */ 549 KASSERT(cur->rl_q_owner != curthread, 550 ("%s: conflicting range is locked by the current thread", 551 __func__)); 552 553 if (inserted) 554 rangelock_unlock_int(lock, e); 555 if (trylock) { 556 sleepq_release(&lock->sleepers); 557 558 /* 559 * If the new entry e has been enqueued and is thus visible to 560 * other threads, it cannot be safely freed. 561 */ 562 return (inserted ? RL_TRYLOCK_FAILED_MARKED: RL_TRYLOCK_FAILED); 563 } 564 rl_insert_sleep(lock); 565 return (RL_LOCK_RETRY); 566 } 567 568 /* 569 * Having inserted entry e, verify that no conflicting write locks are present; 570 * clean up dead entries that we encounter along the way. 571 */ 572 static enum RL_INSERT_RES 573 rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 574 struct rl_q_entry **free) 575 { 576 struct rl_q_entry *cur, *next, **prev; 577 enum RL_INSERT_RES res; 578 579 again: 580 prev = &e->rl_q_next; 581 cur = rl_q_load(prev); 582 MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */ 583 for (;;) { 584 if (cur == NULL || cur->rl_q_start >= e->rl_q_end) 585 return (RL_LOCK_SUCCESS); 586 next = rl_q_load(&cur->rl_q_next); 587 if (rl_e_is_marked(next)) { 588 next = rl_e_unmark(next); 589 if (rl_q_cas(prev, cur, next)) { 590 cur->rl_q_free = *free; 591 *free = cur; 592 cur = next; 593 continue; 594 } 595 goto again; 596 } 597 if (rl_e_is_rlock(cur)) { 598 prev = &cur->rl_q_next; 599 cur = rl_e_unmark_unchecked(rl_q_load(prev)); 600 continue; 601 } 602 603 res = rl_conflict(lock, cur, e, trylock, true); 604 if (res != RL_LOCK_SUCCESS) 605 return (res); 606 } 607 } 608 609 /* 610 * Having inserted entry e, verify that no conflicting locks are present; 611 * clean up dead entries that we encounter along the way. 612 */ 613 static enum RL_INSERT_RES 614 rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, 615 bool trylock, struct rl_q_entry **free) 616 { 617 struct rl_q_entry *cur, *next, **prev; 618 enum RL_INSERT_RES res; 619 620 again: 621 prev = (struct rl_q_entry **)&lock->head; 622 cur = rl_q_load(prev); 623 MPASS(!rl_e_is_marked(cur)); /* head is not marked */ 624 for (;;) { 625 if (cur == e) 626 return (RL_LOCK_SUCCESS); 627 next = rl_q_load(&cur->rl_q_next); 628 if (rl_e_is_marked(next)) { 629 next = rl_e_unmark(next); 630 if (rl_q_cas(prev, cur, next)) { 631 cur->rl_q_free = *free; 632 *free = cur; 633 cur = next; 634 continue; 635 } 636 goto again; 637 } 638 if (cur->rl_q_end <= e->rl_q_start) { 639 prev = &cur->rl_q_next; 640 cur = rl_e_unmark_unchecked(rl_q_load(prev)); 641 continue; 642 } 643 644 res = rl_conflict(lock, cur, e, trylock, true); 645 if (res != RL_LOCK_SUCCESS) 646 return (res); 647 } 648 } 649 650 static enum RL_INSERT_RES 651 rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 652 struct rl_q_entry **free) 653 { 654 struct rl_q_entry *cur, *next, **prev; 655 int r; 656 657 again: 658 prev = (struct rl_q_entry **)&lock->head; 659 cur = rl_q_load(prev); 660 if (cur == NULL && rl_q_cas(prev, NULL, e)) 661 return (RL_LOCK_SUCCESS); 662 663 for (;;) { 664 if (cur != NULL) { 665 if (rl_e_is_marked(cur)) 666 goto again; 667 668 next = rl_q_load(&cur->rl_q_next); 669 if (rl_e_is_marked(next)) { 670 next = rl_e_unmark(next); 671 if (rl_q_cas(prev, cur, next)) { 672 #ifdef INVARIANTS 673 cur->rl_q_owner = NULL; 674 #endif 675 cur->rl_q_free = *free; 676 *free = cur; 677 cur = next; 678 continue; 679 } 680 goto again; 681 } 682 } 683 684 MPASS(!rl_e_is_marked(cur)); 685 r = rl_e_compare(cur, e); 686 if (r == -1) { 687 prev = &cur->rl_q_next; 688 cur = rl_q_load(prev); 689 } else if (r == 0) { 690 enum RL_INSERT_RES res; 691 692 res = rl_conflict(lock, cur, e, trylock, false); 693 switch (res) { 694 case RL_LOCK_SUCCESS: 695 /* cur does not conflict after all. */ 696 break; 697 case RL_LOCK_RETRY: 698 /* e is still valid. */ 699 goto again; 700 default: 701 return (res); 702 } 703 } else /* r == 1 */ { 704 e->rl_q_next = cur; 705 if (rl_q_cas(prev, cur, e)) { 706 atomic_thread_fence_acq(); 707 return (rl_e_is_rlock(e) ? 708 rl_r_validate(lock, e, trylock, free) : 709 rl_w_validate(lock, e, trylock, free)); 710 } 711 /* Reset rl_q_next in case we hit fast path. */ 712 e->rl_q_next = NULL; 713 cur = rl_q_load(prev); 714 } 715 } 716 } 717 718 static struct rl_q_entry * 719 rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start, 720 vm_ooffset_t end, int locktype) 721 { 722 struct rl_q_entry *e, *free; 723 void *cookie; 724 enum RL_INSERT_RES res; 725 726 if (rangelock_cheat_lock(lock, locktype, trylock, &cookie)) 727 return (cookie); 728 for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) { 729 free = NULL; 730 e = rlqentry_alloc(start, end, locktype); 731 smr_enter(rl_smr); 732 res = rl_insert(lock, e, trylock, &free); 733 smr_exit(rl_smr); 734 if (res == RL_TRYLOCK_FAILED || res == RL_TRYLOCK_FAILED_MARKED) { 735 MPASS(trylock); 736 if (res == RL_TRYLOCK_FAILED) { 737 e->rl_q_free = free; 738 free = e; 739 } 740 e = NULL; 741 } 742 rangelock_free_free(free); 743 } 744 return (e); 745 } 746 747 void * 748 rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 749 { 750 return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ)); 751 } 752 753 void * 754 rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 755 { 756 return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ)); 757 } 758 759 void * 760 rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 761 { 762 return (rangelock_lock_int(lock, false, start, end, RL_LOCK_WRITE)); 763 } 764 765 void * 766 rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 767 { 768 return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); 769 } 770 771 /* 772 * If the caller asserts that it can obtain the range locks on the 773 * same lock simultaneously, switch to the non-cheat mode. Cheat mode 774 * cannot handle it, hanging in drain or trylock retries. 775 */ 776 void 777 rangelock_may_recurse(struct rangelock *lock) 778 { 779 uintptr_t v, x; 780 781 v = atomic_load_ptr(&lock->head); 782 if ((v & RL_CHEAT_CHEATING) == 0) 783 return; 784 785 sleepq_lock(&lock->head); 786 for (;;) { 787 if ((v & RL_CHEAT_CHEATING) == 0) { 788 sleepq_release(&lock->head); 789 return; 790 } 791 792 /* Cheating and locked, drain. */ 793 if ((v & RL_CHEAT_WLOCKED) != 0 || 794 (v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER) { 795 x = v | RL_CHEAT_DRAINING; 796 if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) { 797 rangelock_cheat_drain(lock); 798 return; 799 } 800 continue; 801 } 802 803 /* Cheating and unlocked, clear RL_CHEAT_CHEATING. */ 804 x = 0; 805 if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) { 806 sleepq_release(&lock->head); 807 return; 808 } 809 } 810 } 811 812 #ifdef INVARIANT_SUPPORT 813 void 814 _rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 815 { 816 } 817 #endif /* INVARIANT_SUPPORT */ 818 819 #include "opt_ddb.h" 820 #ifdef DDB 821 #include <ddb/ddb.h> 822 823 DB_SHOW_COMMAND(rangelock, db_show_rangelock) 824 { 825 struct rangelock *lock; 826 struct rl_q_entry *e, *x; 827 uintptr_t v; 828 829 if (!have_addr) { 830 db_printf("show rangelock addr\n"); 831 return; 832 } 833 834 lock = (struct rangelock *)addr; 835 db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers); 836 v = lock->head; 837 if ((v & RL_CHEAT_CHEATING) != 0) { 838 db_printf(" cheating head %#jx\n", (uintmax_t)v); 839 return; 840 } 841 for (e = (struct rl_q_entry *)(lock->head);;) { 842 x = rl_e_is_marked(e) ? rl_e_unmark(e) : e; 843 if (x == NULL) 844 break; 845 db_printf(" entry %p marked %d %d start %#jx end %#jx " 846 "flags %x next %p", 847 e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next), 848 x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next); 849 #ifdef INVARIANTS 850 db_printf(" owner %p (%d)", x->rl_q_owner, 851 x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1); 852 #endif 853 db_printf("\n"); 854 e = x->rl_q_next; 855 } 856 } 857 858 #endif /* DDB */ 859