1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 5 * Copyright (c) 2023 The FreeBSD Foundation 6 * 7 * Portions of this software were developed by 8 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from 9 * the FreeBSD Foundation. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice unmodified, this list of conditions, and the following 16 * disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/param.h> 34 #include <sys/kassert.h> 35 #include <sys/kernel.h> 36 #include <sys/lock.h> 37 #include <sys/mutex.h> 38 #include <sys/proc.h> 39 #include <sys/rangelock.h> 40 #include <sys/sleepqueue.h> 41 #include <sys/smr.h> 42 #include <sys/sysctl.h> 43 44 #include <vm/uma.h> 45 46 /* 47 * Immediately after initialization (subject to 'rangelock_cheat' 48 * below), and until a request comes that conflicts with granted ones 49 * based on type, rangelocks serve requests in the "cheating" mode. 50 * In this mode, a rangelock behaves like a sxlock, as if each request 51 * covered the whole range of the protected object. On receiving a 52 * conflicting request (any request while a write request is 53 * effective, or any write request while some read ones are 54 * effective), all requests granted in "cheating" mode are drained, 55 * and the rangelock then switches to effectively keeping track of the 56 * precise range of each new request. 57 * 58 * Normal sx implementation is not used to not bloat structures (most 59 * important, vnodes) which embeds rangelocks. 60 * 61 * The cheating greatly helps very common pattern where file is first 62 * written single-threaded, and then read by many processes. 63 * 64 * Lock is in cheat mode when RL_CHEAT_CHEATING bit is set in the 65 * lock->head. Special cookies are returned in this mode, and 66 * trylocks are same as normal locks but do not drain. 67 */ 68 69 static int rangelock_cheat = 1; 70 SYSCTL_INT(_debug, OID_AUTO, rangelock_cheat, CTLFLAG_RWTUN, 71 &rangelock_cheat, 0, 72 ""); 73 74 #define RL_CHEAT_MASK 0x7 75 #define RL_CHEAT_CHEATING 0x1 76 /* #define RL_CHEAT_RLOCKED 0x0 */ 77 #define RL_CHEAT_WLOCKED 0x2 78 #define RL_CHEAT_DRAINING 0x4 79 80 #define RL_CHEAT_READER 0x8 81 82 #define RL_RET_CHEAT_RLOCKED 0x1100 83 #define RL_RET_CHEAT_WLOCKED 0x2200 84 85 static void 86 rangelock_cheat_drain(struct rangelock *lock) 87 { 88 uintptr_t v; 89 90 DROP_GIANT(); 91 for (;;) { 92 v = atomic_load_ptr(&lock->head); 93 if ((v & RL_CHEAT_DRAINING) == 0) 94 break; 95 sleepq_add(&lock->head, NULL, "ranged1", 0, 0); 96 sleepq_wait(&lock->head, PRI_USER); 97 sleepq_lock(&lock->head); 98 } 99 sleepq_release(&lock->head); 100 PICKUP_GIANT(); 101 } 102 103 static bool 104 rangelock_cheat_lock(struct rangelock *lock, int locktype, bool trylock, 105 void **cookie) 106 { 107 uintptr_t v, x; 108 109 v = atomic_load_ptr(&lock->head); 110 if ((v & RL_CHEAT_CHEATING) == 0) 111 return (false); 112 if ((v & RL_CHEAT_DRAINING) != 0) { 113 drain: 114 if (trylock) { 115 *cookie = NULL; 116 return (true); 117 } 118 sleepq_lock(&lock->head); 119 drain1: 120 rangelock_cheat_drain(lock); 121 return (false); 122 } 123 124 switch (locktype) { 125 case RL_LOCK_READ: 126 for (;;) { 127 if ((v & RL_CHEAT_WLOCKED) != 0) { 128 if (trylock) { 129 *cookie = NULL; 130 return (true); 131 } 132 x = v | RL_CHEAT_DRAINING; 133 sleepq_lock(&lock->head); 134 if (atomic_fcmpset_rel_ptr(&lock->head, &v, 135 x) != 0) 136 goto drain1; 137 sleepq_release(&lock->head); 138 /* Possibly forgive passed conflict */ 139 } else { 140 x = (v & ~RL_CHEAT_MASK) + RL_CHEAT_READER; 141 x |= RL_CHEAT_CHEATING; 142 if (atomic_fcmpset_acq_ptr(&lock->head, &v, 143 x) != 0) 144 break; 145 } 146 if ((v & RL_CHEAT_CHEATING) == 0) 147 return (false); 148 if ((v & RL_CHEAT_DRAINING) != 0) 149 goto drain; 150 } 151 *(uintptr_t *)cookie = RL_RET_CHEAT_RLOCKED; 152 break; 153 case RL_LOCK_WRITE: 154 for (;;) { 155 if ((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER || 156 (v & RL_CHEAT_WLOCKED) != 0) { 157 if (trylock) { 158 *cookie = NULL; 159 return (true); 160 } 161 x = v | RL_CHEAT_DRAINING; 162 sleepq_lock(&lock->head); 163 if (atomic_fcmpset_rel_ptr(&lock->head, &v, 164 x) != 0) 165 goto drain1; 166 sleepq_release(&lock->head); 167 /* Possibly forgive passed conflict */ 168 } else { 169 x = RL_CHEAT_WLOCKED | RL_CHEAT_CHEATING; 170 if (atomic_fcmpset_acq_ptr(&lock->head, &v, 171 x) != 0) 172 break; 173 } 174 if ((v & RL_CHEAT_CHEATING) == 0) 175 return (false); 176 if ((v & RL_CHEAT_DRAINING) != 0) 177 goto drain; 178 } 179 *(uintptr_t *)cookie = RL_RET_CHEAT_WLOCKED; 180 break; 181 default: 182 __assert_unreachable(); 183 break; 184 } 185 return (true); 186 } 187 188 static bool 189 rangelock_cheat_unlock(struct rangelock *lock, void *cookie) 190 { 191 uintptr_t v, x; 192 193 v = atomic_load_ptr(&lock->head); 194 if ((v & RL_CHEAT_CHEATING) == 0) 195 return (false); 196 197 MPASS((uintptr_t)cookie == RL_RET_CHEAT_WLOCKED || 198 (uintptr_t)cookie == RL_RET_CHEAT_RLOCKED); 199 200 switch ((uintptr_t)cookie) { 201 case RL_RET_CHEAT_RLOCKED: 202 for (;;) { 203 MPASS((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER); 204 MPASS((v & RL_CHEAT_WLOCKED) == 0); 205 x = (v & ~RL_CHEAT_MASK) - RL_CHEAT_READER; 206 if ((v & RL_CHEAT_DRAINING) != 0) { 207 if (x != 0) { 208 x |= RL_CHEAT_DRAINING | 209 RL_CHEAT_CHEATING; 210 if (atomic_fcmpset_rel_ptr(&lock->head, 211 &v, x) != 0) 212 break; 213 } else { 214 sleepq_lock(&lock->head); 215 if (atomic_fcmpset_rel_ptr(&lock->head, 216 &v, x) != 0) { 217 sleepq_broadcast( 218 &lock->head, 219 SLEEPQ_SLEEP, 0, 0); 220 sleepq_release(&lock->head); 221 break; 222 } 223 sleepq_release(&lock->head); 224 } 225 } else { 226 x |= RL_CHEAT_CHEATING; 227 if (atomic_fcmpset_rel_ptr(&lock->head, &v, 228 x) != 0) 229 break; 230 } 231 } 232 break; 233 case RL_RET_CHEAT_WLOCKED: 234 for (;;) { 235 MPASS((v & RL_CHEAT_WLOCKED) != 0); 236 if ((v & RL_CHEAT_DRAINING) != 0) { 237 sleepq_lock(&lock->head); 238 atomic_store_ptr(&lock->head, 0); 239 sleepq_broadcast(&lock->head, 240 SLEEPQ_SLEEP, 0, 0); 241 sleepq_release(&lock->head); 242 break; 243 } else { 244 if (atomic_fcmpset_ptr(&lock->head, &v, 245 RL_CHEAT_CHEATING) != 0) 246 break; 247 } 248 } 249 break; 250 default: 251 __assert_unreachable(); 252 break; 253 } 254 return (true); 255 } 256 257 static bool 258 rangelock_cheat_destroy(struct rangelock *lock) 259 { 260 uintptr_t v; 261 262 v = atomic_load_ptr(&lock->head); 263 if ((v & RL_CHEAT_CHEATING) == 0) 264 return (false); 265 MPASS(v == RL_CHEAT_CHEATING); 266 return (true); 267 } 268 269 /* 270 * Implementation of range locks based on the paper 271 * https://doi.org/10.1145/3342195.3387533 272 * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020 273 * Scalable Range Locks for Scalable Address Spaces and Beyond 274 * by Alex Kogan, Dave Dice, and Shady Issa 275 */ 276 277 static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e); 278 279 /* 280 * rl_q_next links all granted ranges in the lock. We cannot free an 281 * rl_q_entry while in the smr section, and cannot reuse rl_q_next 282 * linkage since other threads might follow it even after CAS removed 283 * the range. Use rl_q_free for local list of ranges to remove after 284 * the smr section is dropped. 285 */ 286 struct rl_q_entry { 287 struct rl_q_entry *rl_q_next; 288 struct rl_q_entry *rl_q_free; 289 off_t rl_q_start, rl_q_end; 290 int rl_q_flags; 291 #ifdef INVARIANTS 292 struct thread *rl_q_owner; 293 #endif 294 }; 295 296 static uma_zone_t rl_entry_zone; 297 static smr_t rl_smr; 298 299 static void rangelock_free_free(struct rl_q_entry *free); 300 static void rangelock_noncheating_destroy(struct rangelock *lock); 301 302 static void 303 rangelock_sys_init(void) 304 { 305 rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 306 NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry), 307 UMA_ZONE_SMR); 308 rl_smr = uma_zone_get_smr(rl_entry_zone); 309 } 310 SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 311 312 static struct rl_q_entry * 313 rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags) 314 { 315 struct rl_q_entry *e; 316 317 e = uma_zalloc_smr(rl_entry_zone, M_WAITOK); 318 e->rl_q_next = NULL; 319 e->rl_q_free = NULL; 320 e->rl_q_start = start; 321 e->rl_q_end = end; 322 e->rl_q_flags = flags; 323 #ifdef INVARIANTS 324 e->rl_q_owner = curthread; 325 #endif 326 return (e); 327 } 328 329 void 330 rangelock_init(struct rangelock *lock) 331 { 332 lock->sleepers = false; 333 atomic_store_ptr(&lock->head, rangelock_cheat ? RL_CHEAT_CHEATING : 0); 334 } 335 336 void 337 rangelock_destroy(struct rangelock *lock) 338 { 339 MPASS(!lock->sleepers); 340 if (!rangelock_cheat_destroy(lock)) 341 rangelock_noncheating_destroy(lock); 342 DEBUG_POISON_POINTER(*(void **)&lock->head); 343 } 344 345 static bool 346 rl_e_is_marked(const struct rl_q_entry *e) 347 { 348 return (((uintptr_t)e & 1) != 0); 349 } 350 351 static struct rl_q_entry * 352 rl_e_unmark_unchecked(const struct rl_q_entry *e) 353 { 354 return ((struct rl_q_entry *)((uintptr_t)e & ~1)); 355 } 356 357 static struct rl_q_entry * 358 rl_e_unmark(const struct rl_q_entry *e) 359 { 360 MPASS(rl_e_is_marked(e)); 361 return (rl_e_unmark_unchecked(e)); 362 } 363 364 static void 365 rl_e_mark(struct rl_q_entry *e) 366 { 367 #if defined(INVARIANTS) && defined(__LP64__) 368 int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0); 369 MPASS(r == 0); 370 #else 371 atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1); 372 #endif 373 } 374 375 static struct rl_q_entry * 376 rl_q_load(struct rl_q_entry **p) 377 { 378 return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p)); 379 } 380 381 static bool 382 rl_e_is_rlock(const struct rl_q_entry *e) 383 { 384 return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ); 385 } 386 387 static void 388 rangelock_free_free(struct rl_q_entry *free) 389 { 390 struct rl_q_entry *x, *xp; 391 392 for (x = free; x != NULL; x = xp) { 393 MPASS(!rl_e_is_marked(x)); 394 xp = x->rl_q_free; 395 MPASS(!rl_e_is_marked(xp)); 396 uma_zfree_smr(rl_entry_zone, x); 397 } 398 } 399 400 static void 401 rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e) 402 { 403 bool sleepers; 404 405 MPASS(lock != NULL && e != NULL); 406 MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next))); 407 MPASS(e->rl_q_owner == curthread); 408 409 rl_e_mark(e); 410 sleepers = lock->sleepers; 411 lock->sleepers = false; 412 if (sleepers) 413 sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0); 414 } 415 416 void 417 rangelock_unlock(struct rangelock *lock, void *cookie) 418 { 419 if (rangelock_cheat_unlock(lock, cookie)) 420 return; 421 422 sleepq_lock(&lock->sleepers); 423 rangelock_unlock_int(lock, cookie); 424 sleepq_release(&lock->sleepers); 425 } 426 427 /* 428 * result: -1 if e1 before e2, or both locks are readers and e1 429 * starts before or at e2 430 * 1 if e1 after e2, or both locks are readers and e1 431 * starts after e2 432 * 0 if e1 and e2 overlap and at least one lock is writer 433 */ 434 static int 435 rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2) 436 { 437 bool rds; 438 439 if (e1 == NULL) 440 return (1); 441 if (e2->rl_q_start >= e1->rl_q_end) 442 return (-1); 443 rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2); 444 if (e2->rl_q_start >= e1->rl_q_start && rds) 445 return (-1); 446 if (e1->rl_q_start >= e2->rl_q_end) 447 return (1); 448 if (e1->rl_q_start >= e2->rl_q_start && rds) 449 return (1); 450 return (0); 451 } 452 453 static void 454 rl_insert_sleep(struct rangelock *lock) 455 { 456 smr_exit(rl_smr); 457 DROP_GIANT(); 458 lock->sleepers = true; 459 sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0); 460 sleepq_wait(&lock->sleepers, PRI_USER); 461 PICKUP_GIANT(); 462 smr_enter(rl_smr); 463 } 464 465 static bool 466 rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old, 467 struct rl_q_entry *new) 468 { 469 MPASS(!rl_e_is_marked(old)); 470 return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old, 471 (uintptr_t)new) != 0); 472 } 473 474 static void 475 rangelock_noncheating_destroy(struct rangelock *lock) 476 { 477 struct rl_q_entry *cur, *free, *next, **prev; 478 479 free = NULL; 480 again: 481 smr_enter(rl_smr); 482 prev = (struct rl_q_entry **)&lock->head; 483 cur = rl_q_load(prev); 484 MPASS(!rl_e_is_marked(cur)); 485 486 for (;;) { 487 if (cur == NULL) 488 break; 489 if (rl_e_is_marked(cur)) 490 goto again; 491 492 next = rl_q_load(&cur->rl_q_next); 493 if (rl_e_is_marked(next)) { 494 next = rl_e_unmark(next); 495 if (rl_q_cas(prev, cur, next)) { 496 #ifdef INVARIANTS 497 cur->rl_q_owner = NULL; 498 #endif 499 cur->rl_q_free = free; 500 free = cur; 501 cur = next; 502 continue; 503 } 504 smr_exit(rl_smr); 505 goto again; 506 } 507 508 sleepq_lock(&lock->sleepers); 509 if (!rl_e_is_marked(cur)) { 510 rl_insert_sleep(lock); 511 goto again; 512 } 513 } 514 smr_exit(rl_smr); 515 rangelock_free_free(free); 516 } 517 518 enum RL_INSERT_RES { 519 RL_TRYLOCK_FAILED, 520 RL_LOCK_SUCCESS, 521 RL_LOCK_RETRY, 522 }; 523 524 static enum RL_INSERT_RES 525 rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 526 struct rl_q_entry **free) 527 { 528 struct rl_q_entry *cur, *next, **prev; 529 530 again: 531 prev = &e->rl_q_next; 532 cur = rl_q_load(prev); 533 MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */ 534 for (;;) { 535 if (cur == NULL || cur->rl_q_start >= e->rl_q_end) 536 return (RL_LOCK_SUCCESS); 537 next = rl_q_load(&cur->rl_q_next); 538 if (rl_e_is_marked(next)) { 539 next = rl_e_unmark(next); 540 if (rl_q_cas(prev, cur, next)) { 541 cur->rl_q_free = *free; 542 *free = cur; 543 cur = next; 544 continue; 545 } 546 goto again; 547 } 548 if (rl_e_is_rlock(cur)) { 549 prev = &cur->rl_q_next; 550 cur = rl_e_unmark_unchecked(rl_q_load(prev)); 551 continue; 552 } 553 if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 554 sleepq_lock(&lock->sleepers); 555 if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 556 sleepq_release(&lock->sleepers); 557 continue; 558 } 559 rangelock_unlock_int(lock, e); 560 if (trylock) { 561 sleepq_release(&lock->sleepers); 562 return (RL_TRYLOCK_FAILED); 563 } 564 rl_insert_sleep(lock); 565 return (RL_LOCK_RETRY); 566 } 567 } 568 } 569 570 static enum RL_INSERT_RES 571 rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, 572 bool trylock, struct rl_q_entry **free) 573 { 574 struct rl_q_entry *cur, *next, **prev; 575 576 again: 577 prev = (struct rl_q_entry **)&lock->head; 578 cur = rl_q_load(prev); 579 MPASS(!rl_e_is_marked(cur)); /* head is not marked */ 580 for (;;) { 581 if (cur == e) 582 return (RL_LOCK_SUCCESS); 583 next = rl_q_load(&cur->rl_q_next); 584 if (rl_e_is_marked(next)) { 585 next = rl_e_unmark(next); 586 if (rl_q_cas(prev, cur, next)) { 587 cur->rl_q_free = *free; 588 *free = cur; 589 cur = next; 590 continue; 591 } 592 goto again; 593 } 594 if (cur->rl_q_end <= e->rl_q_start) { 595 prev = &cur->rl_q_next; 596 cur = rl_e_unmark_unchecked(rl_q_load(prev)); 597 continue; 598 } 599 sleepq_lock(&lock->sleepers); 600 /* Reload after sleepq is locked */ 601 next = rl_q_load(&cur->rl_q_next); 602 if (rl_e_is_marked(next)) { 603 sleepq_release(&lock->sleepers); 604 goto again; 605 } 606 rangelock_unlock_int(lock, e); 607 if (trylock) { 608 sleepq_release(&lock->sleepers); 609 return (RL_TRYLOCK_FAILED); 610 } 611 rl_insert_sleep(lock); 612 return (RL_LOCK_RETRY); 613 } 614 } 615 616 static enum RL_INSERT_RES 617 rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 618 struct rl_q_entry **free) 619 { 620 struct rl_q_entry *cur, *next, **prev; 621 int r; 622 623 again: 624 prev = (struct rl_q_entry **)&lock->head; 625 cur = rl_q_load(prev); 626 if (cur == NULL && rl_q_cas(prev, NULL, e)) 627 return (RL_LOCK_SUCCESS); 628 629 for (;;) { 630 if (cur != NULL) { 631 if (rl_e_is_marked(cur)) 632 goto again; 633 634 next = rl_q_load(&cur->rl_q_next); 635 if (rl_e_is_marked(next)) { 636 next = rl_e_unmark(next); 637 if (rl_q_cas(prev, cur, next)) { 638 #ifdef INVARIANTS 639 cur->rl_q_owner = NULL; 640 #endif 641 cur->rl_q_free = *free; 642 *free = cur; 643 cur = next; 644 continue; 645 } 646 goto again; 647 } 648 } 649 650 MPASS(!rl_e_is_marked(cur)); 651 r = rl_e_compare(cur, e); 652 if (r == -1) { 653 prev = &cur->rl_q_next; 654 cur = rl_q_load(prev); 655 } else if (r == 0) { 656 sleepq_lock(&lock->sleepers); 657 if (__predict_false(rl_e_is_marked(rl_q_load( 658 &cur->rl_q_next)))) { 659 sleepq_release(&lock->sleepers); 660 continue; 661 } 662 if (trylock) { 663 sleepq_release(&lock->sleepers); 664 return (RL_TRYLOCK_FAILED); 665 } 666 rl_insert_sleep(lock); 667 /* e is still valid */ 668 goto again; 669 } else /* r == 1 */ { 670 e->rl_q_next = cur; 671 if (rl_q_cas(prev, cur, e)) { 672 atomic_thread_fence_acq(); 673 return (rl_e_is_rlock(e) ? 674 rl_r_validate(lock, e, trylock, free) : 675 rl_w_validate(lock, e, trylock, free)); 676 } 677 /* Reset rl_q_next in case we hit fast path. */ 678 e->rl_q_next = NULL; 679 cur = rl_q_load(prev); 680 } 681 } 682 } 683 684 static struct rl_q_entry * 685 rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start, 686 vm_ooffset_t end, int locktype) 687 { 688 struct rl_q_entry *e, *free; 689 void *cookie; 690 enum RL_INSERT_RES res; 691 692 if (rangelock_cheat_lock(lock, locktype, trylock, &cookie)) 693 return (cookie); 694 for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) { 695 free = NULL; 696 e = rlqentry_alloc(start, end, locktype); 697 smr_enter(rl_smr); 698 res = rl_insert(lock, e, trylock, &free); 699 smr_exit(rl_smr); 700 if (res == RL_TRYLOCK_FAILED) { 701 MPASS(trylock); 702 e->rl_q_free = free; 703 free = e; 704 e = NULL; 705 } 706 rangelock_free_free(free); 707 } 708 return (e); 709 } 710 711 void * 712 rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 713 { 714 return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ)); 715 } 716 717 void * 718 rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 719 { 720 return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ)); 721 } 722 723 void * 724 rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 725 { 726 return (rangelock_lock_int(lock, false, start, end, RL_LOCK_WRITE)); 727 } 728 729 void * 730 rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 731 { 732 return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); 733 } 734 735 /* 736 * If the caller asserts that it can obtain the range locks on the 737 * same lock simultaneously, switch to the non-cheat mode. Cheat mode 738 * cannot handle it, hanging in drain or trylock retries. 739 */ 740 void 741 rangelock_may_recurse(struct rangelock *lock) 742 { 743 uintptr_t v, x; 744 745 v = atomic_load_ptr(&lock->head); 746 if ((v & RL_CHEAT_CHEATING) == 0) 747 return; 748 749 sleepq_lock(&lock->head); 750 for (;;) { 751 if ((v & RL_CHEAT_CHEATING) == 0) { 752 sleepq_release(&lock->head); 753 return; 754 } 755 756 /* Cheating and locked, drain. */ 757 if ((v & RL_CHEAT_WLOCKED) != 0 || 758 (v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER) { 759 x = v | RL_CHEAT_DRAINING; 760 if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) { 761 rangelock_cheat_drain(lock); 762 return; 763 } 764 continue; 765 } 766 767 /* Cheating and unlocked, clear RL_CHEAT_CHEATING. */ 768 x = 0; 769 if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) { 770 sleepq_release(&lock->head); 771 return; 772 } 773 } 774 } 775 776 #ifdef INVARIANT_SUPPORT 777 void 778 _rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 779 { 780 } 781 #endif /* INVARIANT_SUPPORT */ 782 783 #include "opt_ddb.h" 784 #ifdef DDB 785 #include <ddb/ddb.h> 786 787 DB_SHOW_COMMAND(rangelock, db_show_rangelock) 788 { 789 struct rangelock *lock; 790 struct rl_q_entry *e, *x; 791 uintptr_t v; 792 793 if (!have_addr) { 794 db_printf("show rangelock addr\n"); 795 return; 796 } 797 798 lock = (struct rangelock *)addr; 799 db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers); 800 v = lock->head; 801 if ((v & RL_CHEAT_CHEATING) != 0) { 802 db_printf(" cheating head %#jx\n", (uintmax_t)v); 803 return; 804 } 805 for (e = (struct rl_q_entry *)(lock->head);;) { 806 x = rl_e_is_marked(e) ? rl_e_unmark(e) : e; 807 if (x == NULL) 808 break; 809 db_printf(" entry %p marked %d %d start %#jx end %#jx " 810 "flags %x next %p", 811 e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next), 812 x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next); 813 #ifdef INVARIANTS 814 db_printf(" owner %p (%d)", x->rl_q_owner, 815 x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1); 816 #endif 817 db_printf("\n"); 818 e = x->rl_q_next; 819 } 820 } 821 822 #endif /* DDB */ 823