1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 27 */ 28 29 /* 30 * This file contains the code to implement file range locking in 31 * ZFS, although there isn't much specific to ZFS (all that comes to mind is 32 * support for growing the blocksize). 33 * 34 * Interface 35 * --------- 36 * Defined in zfs_rlock.h but essentially: 37 * lr = rangelock_enter(zp, off, len, lock_type); 38 * rangelock_reduce(lr, off, len); // optional 39 * rangelock_exit(lr); 40 * 41 * Range locking rules 42 * -------------------- 43 * 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole 44 * file range needs to be locked as RL_WRITER. Only then can the pages be 45 * freed etc and zp_size reset. zp_size must be set within range lock. 46 * 2. For writes and punching holes (zfs_write & zfs_space) just the range 47 * being written or freed needs to be locked as RL_WRITER. 48 * Multiple writes at the end of the file must coordinate zp_size updates 49 * to ensure data isn't lost. A compare and swap loop is currently used 50 * to ensure the file size is at least the offset last written. 51 * 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being 52 * read needs to be locked as RL_READER. A check against zp_size can then 53 * be made for reading beyond end of file. 54 * 55 * AVL tree 56 * -------- 57 * An AVL tree is used to maintain the state of the existing ranges 58 * that are locked for exclusive (writer) or shared (reader) use. 59 * The starting range offset is used for searching and sorting the tree. 60 * 61 * Common case 62 * ----------- 63 * The (hopefully) usual case is of no overlaps or contention for locks. On 64 * entry to rangelock_enter(), a locked_range_t is allocated; the tree 65 * searched that finds no overlap, and *this* locked_range_t is placed in the 66 * tree. 67 * 68 * Overlaps/Reference counting/Proxy locks 69 * --------------------------------------- 70 * The avl code only allows one node at a particular offset. Also it's very 71 * inefficient to search through all previous entries looking for overlaps 72 * (because the very 1st in the ordered list might be at offset 0 but 73 * cover the whole file). 74 * So this implementation uses reference counts and proxy range locks. 75 * Firstly, only reader locks use reference counts and proxy locks, 76 * because writer locks are exclusive. 77 * When a reader lock overlaps with another then a proxy lock is created 78 * for that range and replaces the original lock. If the overlap 79 * is exact then the reference count of the proxy is simply incremented. 80 * Otherwise, the proxy lock is split into smaller lock ranges and 81 * new proxy locks created for non overlapping ranges. 82 * The reference counts are adjusted accordingly. 83 * Meanwhile, the original lock is kept around (this is the callers handle) 84 * and its offset and length are used when releasing the lock. 85 * 86 * Thread coordination 87 * ------------------- 88 * In order to make wakeups efficient and to ensure multiple continuous 89 * readers on a range don't starve a writer for the same range lock, 90 * two condition variables are allocated in each rl_t. 91 * If a writer (or reader) can't get a range it initialises the writer 92 * (or reader) cv; sets a flag saying there's a writer (or reader) waiting; 93 * and waits on that cv. When a thread unlocks that range it wakes up all 94 * writers then all readers before destroying the lock. 95 * 96 * Append mode writes 97 * ------------------ 98 * Append mode writes need to lock a range at the end of a file. 99 * The offset of the end of the file is determined under the 100 * range locking mutex, and the lock type converted from RL_APPEND to 101 * RL_WRITER and the range locked. 102 * 103 * Grow block handling 104 * ------------------- 105 * ZFS supports multiple block sizes, up to 16MB. The smallest 106 * block size is used for the file which is grown as needed. During this 107 * growth all other writers and readers must be excluded. 108 * So if the block size needs to be grown then the whole file is 109 * exclusively locked, then later the caller will reduce the lock 110 * range to just the range to be written using rangelock_reduce(). 111 */ 112 113 #include <sys/zfs_context.h> 114 #include <sys/zfs_rlock.h> 115 116 117 /* 118 * AVL comparison function used to order range locks 119 * Locks are ordered on the start offset of the range. 120 */ 121 static int 122 zfs_rangelock_compare(const void *arg1, const void *arg2) 123 { 124 const zfs_locked_range_t *rl1 = (const zfs_locked_range_t *)arg1; 125 const zfs_locked_range_t *rl2 = (const zfs_locked_range_t *)arg2; 126 127 return (TREE_CMP(rl1->lr_offset, rl2->lr_offset)); 128 } 129 130 /* 131 * The callback is invoked when acquiring a RL_WRITER or RL_APPEND lock. 132 * It must convert RL_APPEND to RL_WRITER (starting at the end of the file), 133 * and may increase the range that's locked for RL_WRITER. 134 */ 135 void 136 zfs_rangelock_init(zfs_rangelock_t *rl, zfs_rangelock_cb_t *cb, void *arg) 137 { 138 mutex_init(&rl->rl_lock, NULL, MUTEX_DEFAULT, NULL); 139 avl_create(&rl->rl_tree, zfs_rangelock_compare, 140 sizeof (zfs_locked_range_t), offsetof(zfs_locked_range_t, lr_node)); 141 rl->rl_cb = cb; 142 rl->rl_arg = arg; 143 } 144 145 void 146 zfs_rangelock_fini(zfs_rangelock_t *rl) 147 { 148 mutex_destroy(&rl->rl_lock); 149 avl_destroy(&rl->rl_tree); 150 } 151 152 /* 153 * Check if a write lock can be grabbed. If not, fail immediately or sleep and 154 * recheck until available, depending on the value of the "nonblock" parameter. 155 */ 156 static boolean_t 157 zfs_rangelock_enter_writer(zfs_rangelock_t *rl, zfs_locked_range_t *new, 158 boolean_t nonblock) 159 { 160 avl_tree_t *tree = &rl->rl_tree; 161 zfs_locked_range_t *lr; 162 avl_index_t where; 163 uint64_t orig_off = new->lr_offset; 164 uint64_t orig_len = new->lr_length; 165 zfs_rangelock_type_t orig_type = new->lr_type; 166 167 for (;;) { 168 /* 169 * Call callback which can modify new->r_off,len,type. 170 * Note, the callback is used by the ZPL to handle appending 171 * and changing blocksizes. It isn't needed for zvols. 172 */ 173 if (rl->rl_cb != NULL) { 174 rl->rl_cb(new, rl->rl_arg); 175 } 176 177 /* 178 * If the type was APPEND, the callback must convert it to 179 * WRITER. 180 */ 181 ASSERT3U(new->lr_type, ==, RL_WRITER); 182 183 /* 184 * First check for the usual case of no locks 185 */ 186 if (avl_numnodes(tree) == 0) { 187 avl_add(tree, new); 188 return (B_TRUE); 189 } 190 191 /* 192 * Look for any locks in the range. 193 */ 194 lr = avl_find(tree, new, &where); 195 if (lr != NULL) 196 goto wait; /* already locked at same offset */ 197 198 lr = avl_nearest(tree, where, AVL_AFTER); 199 if (lr != NULL && 200 lr->lr_offset < new->lr_offset + new->lr_length) 201 goto wait; 202 203 lr = avl_nearest(tree, where, AVL_BEFORE); 204 if (lr != NULL && 205 lr->lr_offset + lr->lr_length > new->lr_offset) 206 goto wait; 207 208 avl_insert(tree, new, where); 209 return (B_TRUE); 210 wait: 211 if (nonblock) 212 return (B_FALSE); 213 if (!lr->lr_write_wanted) { 214 cv_init(&lr->lr_write_cv, NULL, CV_DEFAULT, NULL); 215 lr->lr_write_wanted = B_TRUE; 216 } 217 cv_wait(&lr->lr_write_cv, &rl->rl_lock); 218 219 /* reset to original */ 220 new->lr_offset = orig_off; 221 new->lr_length = orig_len; 222 new->lr_type = orig_type; 223 } 224 } 225 226 /* 227 * If this is an original (non-proxy) lock then replace it by 228 * a proxy and return the proxy. 229 */ 230 static zfs_locked_range_t * 231 zfs_rangelock_proxify(avl_tree_t *tree, zfs_locked_range_t *lr) 232 { 233 zfs_locked_range_t *proxy; 234 235 if (lr->lr_proxy) 236 return (lr); /* already a proxy */ 237 238 ASSERT3U(lr->lr_count, ==, 1); 239 ASSERT(lr->lr_write_wanted == B_FALSE); 240 ASSERT(lr->lr_read_wanted == B_FALSE); 241 avl_remove(tree, lr); 242 lr->lr_count = 0; 243 244 /* create a proxy range lock */ 245 proxy = kmem_alloc(sizeof (zfs_locked_range_t), KM_SLEEP); 246 proxy->lr_offset = lr->lr_offset; 247 proxy->lr_length = lr->lr_length; 248 proxy->lr_count = 1; 249 proxy->lr_type = RL_READER; 250 proxy->lr_proxy = B_TRUE; 251 proxy->lr_write_wanted = B_FALSE; 252 proxy->lr_read_wanted = B_FALSE; 253 avl_add(tree, proxy); 254 255 return (proxy); 256 } 257 258 /* 259 * Split the range lock at the supplied offset 260 * returning the *front* proxy. 261 */ 262 static zfs_locked_range_t * 263 zfs_rangelock_split(avl_tree_t *tree, zfs_locked_range_t *lr, uint64_t off) 264 { 265 zfs_locked_range_t *rear; 266 267 ASSERT3U(lr->lr_length, >, 1); 268 ASSERT3U(off, >, lr->lr_offset); 269 ASSERT3U(off, <, lr->lr_offset + lr->lr_length); 270 ASSERT(lr->lr_write_wanted == B_FALSE); 271 ASSERT(lr->lr_read_wanted == B_FALSE); 272 273 /* create the rear proxy range lock */ 274 rear = kmem_alloc(sizeof (zfs_locked_range_t), KM_SLEEP); 275 rear->lr_offset = off; 276 rear->lr_length = lr->lr_offset + lr->lr_length - off; 277 rear->lr_count = lr->lr_count; 278 rear->lr_type = RL_READER; 279 rear->lr_proxy = B_TRUE; 280 rear->lr_write_wanted = B_FALSE; 281 rear->lr_read_wanted = B_FALSE; 282 283 zfs_locked_range_t *front = zfs_rangelock_proxify(tree, lr); 284 front->lr_length = off - lr->lr_offset; 285 286 avl_insert_here(tree, rear, front, AVL_AFTER); 287 return (front); 288 } 289 290 /* 291 * Create and add a new proxy range lock for the supplied range. 292 */ 293 static void 294 zfs_rangelock_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) 295 { 296 zfs_locked_range_t *lr; 297 298 ASSERT(len != 0); 299 lr = kmem_alloc(sizeof (zfs_locked_range_t), KM_SLEEP); 300 lr->lr_offset = off; 301 lr->lr_length = len; 302 lr->lr_count = 1; 303 lr->lr_type = RL_READER; 304 lr->lr_proxy = B_TRUE; 305 lr->lr_write_wanted = B_FALSE; 306 lr->lr_read_wanted = B_FALSE; 307 avl_add(tree, lr); 308 } 309 310 static void 311 zfs_rangelock_add_reader(avl_tree_t *tree, zfs_locked_range_t *new, 312 zfs_locked_range_t *prev, avl_index_t where) 313 { 314 zfs_locked_range_t *next; 315 uint64_t off = new->lr_offset; 316 uint64_t len = new->lr_length; 317 318 /* 319 * prev arrives either: 320 * - pointing to an entry at the same offset 321 * - pointing to the entry with the closest previous offset whose 322 * range may overlap with the new range 323 * - null, if there were no ranges starting before the new one 324 */ 325 if (prev != NULL) { 326 if (prev->lr_offset + prev->lr_length <= off) { 327 prev = NULL; 328 } else if (prev->lr_offset != off) { 329 /* 330 * convert to proxy if needed then 331 * split this entry and bump ref count 332 */ 333 prev = zfs_rangelock_split(tree, prev, off); 334 prev = AVL_NEXT(tree, prev); /* move to rear range */ 335 } 336 } 337 ASSERT((prev == NULL) || (prev->lr_offset == off)); 338 339 if (prev != NULL) 340 next = prev; 341 else 342 next = avl_nearest(tree, where, AVL_AFTER); 343 344 if (next == NULL || off + len <= next->lr_offset) { 345 /* no overlaps, use the original new rl_t in the tree */ 346 avl_insert(tree, new, where); 347 return; 348 } 349 350 if (off < next->lr_offset) { 351 /* Add a proxy for initial range before the overlap */ 352 zfs_rangelock_new_proxy(tree, off, next->lr_offset - off); 353 } 354 355 new->lr_count = 0; /* will use proxies in tree */ 356 /* 357 * We now search forward through the ranges, until we go past the end 358 * of the new range. For each entry we make it a proxy if it 359 * isn't already, then bump its reference count. If there's any 360 * gaps between the ranges then we create a new proxy range. 361 */ 362 for (prev = NULL; next; prev = next, next = AVL_NEXT(tree, next)) { 363 if (off + len <= next->lr_offset) 364 break; 365 if (prev != NULL && prev->lr_offset + prev->lr_length < 366 next->lr_offset) { 367 /* there's a gap */ 368 ASSERT3U(next->lr_offset, >, 369 prev->lr_offset + prev->lr_length); 370 zfs_rangelock_new_proxy(tree, 371 prev->lr_offset + prev->lr_length, 372 next->lr_offset - 373 (prev->lr_offset + prev->lr_length)); 374 } 375 if (off + len == next->lr_offset + next->lr_length) { 376 /* exact overlap with end */ 377 next = zfs_rangelock_proxify(tree, next); 378 next->lr_count++; 379 return; 380 } 381 if (off + len < next->lr_offset + next->lr_length) { 382 /* new range ends in the middle of this block */ 383 next = zfs_rangelock_split(tree, next, off + len); 384 next->lr_count++; 385 return; 386 } 387 ASSERT3U(off + len, >, next->lr_offset + next->lr_length); 388 next = zfs_rangelock_proxify(tree, next); 389 next->lr_count++; 390 } 391 392 /* Add the remaining end range. */ 393 zfs_rangelock_new_proxy(tree, prev->lr_offset + prev->lr_length, 394 (off + len) - (prev->lr_offset + prev->lr_length)); 395 } 396 397 /* 398 * Check if a reader lock can be grabbed. If not, fail immediately or sleep and 399 * recheck until available, depending on the value of the "nonblock" parameter. 400 */ 401 static boolean_t 402 zfs_rangelock_enter_reader(zfs_rangelock_t *rl, zfs_locked_range_t *new, 403 boolean_t nonblock) 404 { 405 avl_tree_t *tree = &rl->rl_tree; 406 zfs_locked_range_t *prev, *next; 407 avl_index_t where; 408 uint64_t off = new->lr_offset; 409 uint64_t len = new->lr_length; 410 411 /* 412 * Look for any writer locks in the range. 413 */ 414 retry: 415 prev = avl_find(tree, new, &where); 416 if (prev == NULL) 417 prev = avl_nearest(tree, where, AVL_BEFORE); 418 419 /* 420 * Check the previous range for a writer lock overlap. 421 */ 422 if (prev && (off < prev->lr_offset + prev->lr_length)) { 423 if ((prev->lr_type == RL_WRITER) || (prev->lr_write_wanted)) { 424 if (nonblock) 425 return (B_FALSE); 426 if (!prev->lr_read_wanted) { 427 cv_init(&prev->lr_read_cv, 428 NULL, CV_DEFAULT, NULL); 429 prev->lr_read_wanted = B_TRUE; 430 } 431 cv_wait(&prev->lr_read_cv, &rl->rl_lock); 432 goto retry; 433 } 434 if (off + len < prev->lr_offset + prev->lr_length) 435 goto got_lock; 436 } 437 438 /* 439 * Search through the following ranges to see if there's 440 * write lock any overlap. 441 */ 442 if (prev != NULL) 443 next = AVL_NEXT(tree, prev); 444 else 445 next = avl_nearest(tree, where, AVL_AFTER); 446 for (; next != NULL; next = AVL_NEXT(tree, next)) { 447 if (off + len <= next->lr_offset) 448 goto got_lock; 449 if ((next->lr_type == RL_WRITER) || (next->lr_write_wanted)) { 450 if (nonblock) 451 return (B_FALSE); 452 if (!next->lr_read_wanted) { 453 cv_init(&next->lr_read_cv, 454 NULL, CV_DEFAULT, NULL); 455 next->lr_read_wanted = B_TRUE; 456 } 457 cv_wait(&next->lr_read_cv, &rl->rl_lock); 458 goto retry; 459 } 460 if (off + len <= next->lr_offset + next->lr_length) 461 goto got_lock; 462 } 463 464 got_lock: 465 /* 466 * Add the read lock, which may involve splitting existing 467 * locks and bumping ref counts (r_count). 468 */ 469 zfs_rangelock_add_reader(tree, new, prev, where); 470 return (B_TRUE); 471 } 472 473 /* 474 * Lock a range (offset, length) as either shared (RL_READER) or exclusive 475 * (RL_WRITER or RL_APPEND). If RL_APPEND is specified, rl_cb() will convert 476 * it to a RL_WRITER lock (with the offset at the end of the file). Returns 477 * the range lock structure for later unlocking (or reduce range if the 478 * entire file is locked as RL_WRITER), or NULL if nonblock is true and the 479 * lock could not be acquired immediately. 480 */ 481 static zfs_locked_range_t * 482 zfs_rangelock_enter_impl(zfs_rangelock_t *rl, uint64_t off, uint64_t len, 483 zfs_rangelock_type_t type, boolean_t nonblock) 484 { 485 zfs_locked_range_t *new; 486 487 ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); 488 489 new = kmem_alloc(sizeof (zfs_locked_range_t), KM_SLEEP); 490 new->lr_rangelock = rl; 491 new->lr_offset = off; 492 if (len + off < off) /* overflow */ 493 len = UINT64_MAX - off; 494 new->lr_length = len; 495 new->lr_count = 1; /* assume it's going to be in the tree */ 496 new->lr_type = type; 497 new->lr_proxy = B_FALSE; 498 new->lr_write_wanted = B_FALSE; 499 new->lr_read_wanted = B_FALSE; 500 501 mutex_enter(&rl->rl_lock); 502 if (type == RL_READER) { 503 /* 504 * First check for the usual case of no locks 505 */ 506 if (avl_numnodes(&rl->rl_tree) == 0) { 507 avl_add(&rl->rl_tree, new); 508 } else if (!zfs_rangelock_enter_reader(rl, new, nonblock)) { 509 kmem_free(new, sizeof (*new)); 510 new = NULL; 511 } 512 } else if (!zfs_rangelock_enter_writer(rl, new, nonblock)) { 513 kmem_free(new, sizeof (*new)); 514 new = NULL; 515 } 516 mutex_exit(&rl->rl_lock); 517 return (new); 518 } 519 520 zfs_locked_range_t * 521 zfs_rangelock_enter(zfs_rangelock_t *rl, uint64_t off, uint64_t len, 522 zfs_rangelock_type_t type) 523 { 524 return (zfs_rangelock_enter_impl(rl, off, len, type, B_FALSE)); 525 } 526 527 zfs_locked_range_t * 528 zfs_rangelock_tryenter(zfs_rangelock_t *rl, uint64_t off, uint64_t len, 529 zfs_rangelock_type_t type) 530 { 531 return (zfs_rangelock_enter_impl(rl, off, len, type, B_TRUE)); 532 } 533 534 /* 535 * Safely free the zfs_locked_range_t. 536 */ 537 static void 538 zfs_rangelock_free(zfs_locked_range_t *lr) 539 { 540 if (lr->lr_write_wanted) 541 cv_destroy(&lr->lr_write_cv); 542 543 if (lr->lr_read_wanted) 544 cv_destroy(&lr->lr_read_cv); 545 546 kmem_free(lr, sizeof (zfs_locked_range_t)); 547 } 548 549 /* 550 * Unlock a reader lock 551 */ 552 static void 553 zfs_rangelock_exit_reader(zfs_rangelock_t *rl, zfs_locked_range_t *remove, 554 list_t *free_list) 555 { 556 avl_tree_t *tree = &rl->rl_tree; 557 uint64_t len; 558 559 /* 560 * The common case is when the remove entry is in the tree 561 * (cnt == 1) meaning there's been no other reader locks overlapping 562 * with this one. Otherwise the remove entry will have been 563 * removed from the tree and replaced by proxies (one or 564 * more ranges mapping to the entire range). 565 */ 566 if (remove->lr_count == 1) { 567 avl_remove(tree, remove); 568 if (remove->lr_write_wanted) 569 cv_broadcast(&remove->lr_write_cv); 570 if (remove->lr_read_wanted) 571 cv_broadcast(&remove->lr_read_cv); 572 list_insert_tail(free_list, remove); 573 } else { 574 ASSERT0(remove->lr_count); 575 ASSERT0(remove->lr_write_wanted); 576 ASSERT0(remove->lr_read_wanted); 577 /* 578 * Find start proxy representing this reader lock, 579 * then decrement ref count on all proxies 580 * that make up this range, freeing them as needed. 581 */ 582 zfs_locked_range_t *lr = avl_find(tree, remove, NULL); 583 ASSERT3P(lr, !=, NULL); 584 ASSERT3U(lr->lr_count, !=, 0); 585 ASSERT3U(lr->lr_type, ==, RL_READER); 586 zfs_locked_range_t *next = NULL; 587 for (len = remove->lr_length; len != 0; lr = next) { 588 len -= lr->lr_length; 589 if (len != 0) { 590 next = AVL_NEXT(tree, lr); 591 ASSERT3P(next, !=, NULL); 592 ASSERT3U(lr->lr_offset + lr->lr_length, ==, 593 next->lr_offset); 594 ASSERT3U(next->lr_count, !=, 0); 595 ASSERT3U(next->lr_type, ==, RL_READER); 596 } 597 lr->lr_count--; 598 if (lr->lr_count == 0) { 599 avl_remove(tree, lr); 600 if (lr->lr_write_wanted) 601 cv_broadcast(&lr->lr_write_cv); 602 if (lr->lr_read_wanted) 603 cv_broadcast(&lr->lr_read_cv); 604 list_insert_tail(free_list, lr); 605 } 606 } 607 kmem_free(remove, sizeof (zfs_locked_range_t)); 608 } 609 } 610 611 /* 612 * Unlock range and destroy range lock structure. 613 */ 614 void 615 zfs_rangelock_exit(zfs_locked_range_t *lr) 616 { 617 zfs_rangelock_t *rl = lr->lr_rangelock; 618 list_t free_list; 619 zfs_locked_range_t *free_lr; 620 621 ASSERT(lr->lr_type == RL_WRITER || lr->lr_type == RL_READER); 622 ASSERT(lr->lr_count == 1 || lr->lr_count == 0); 623 ASSERT(!lr->lr_proxy); 624 625 /* 626 * The free list is used to defer the cv_destroy() and 627 * subsequent kmem_free until after the mutex is dropped. 628 */ 629 list_create(&free_list, sizeof (zfs_locked_range_t), 630 offsetof(zfs_locked_range_t, lr_node)); 631 632 mutex_enter(&rl->rl_lock); 633 if (lr->lr_type == RL_WRITER) { 634 /* writer locks can't be shared or split */ 635 avl_remove(&rl->rl_tree, lr); 636 if (lr->lr_write_wanted) 637 cv_broadcast(&lr->lr_write_cv); 638 if (lr->lr_read_wanted) 639 cv_broadcast(&lr->lr_read_cv); 640 list_insert_tail(&free_list, lr); 641 } else { 642 /* 643 * lock may be shared, let rangelock_exit_reader() 644 * release the lock and free the zfs_locked_range_t. 645 */ 646 zfs_rangelock_exit_reader(rl, lr, &free_list); 647 } 648 mutex_exit(&rl->rl_lock); 649 650 while ((free_lr = list_remove_head(&free_list)) != NULL) 651 zfs_rangelock_free(free_lr); 652 653 list_destroy(&free_list); 654 } 655 656 /* 657 * Reduce range locked as RL_WRITER from whole file to specified range. 658 * Asserts the whole file is exclusively locked and so there's only one 659 * entry in the tree. 660 */ 661 void 662 zfs_rangelock_reduce(zfs_locked_range_t *lr, uint64_t off, uint64_t len) 663 { 664 zfs_rangelock_t *rl = lr->lr_rangelock; 665 666 /* Ensure there are no other locks */ 667 ASSERT3U(avl_numnodes(&rl->rl_tree), ==, 1); 668 ASSERT3U(lr->lr_offset, ==, 0); 669 ASSERT3U(lr->lr_type, ==, RL_WRITER); 670 ASSERT(!lr->lr_proxy); 671 ASSERT3U(lr->lr_length, ==, UINT64_MAX); 672 ASSERT3U(lr->lr_count, ==, 1); 673 674 mutex_enter(&rl->rl_lock); 675 lr->lr_offset = off; 676 lr->lr_length = len; 677 mutex_exit(&rl->rl_lock); 678 if (lr->lr_write_wanted) 679 cv_broadcast(&lr->lr_write_cv); 680 if (lr->lr_read_wanted) 681 cv_broadcast(&lr->lr_read_cv); 682 } 683 684 #if defined(_KERNEL) 685 EXPORT_SYMBOL(zfs_rangelock_init); 686 EXPORT_SYMBOL(zfs_rangelock_fini); 687 EXPORT_SYMBOL(zfs_rangelock_enter); 688 EXPORT_SYMBOL(zfs_rangelock_tryenter); 689 EXPORT_SYMBOL(zfs_rangelock_exit); 690 EXPORT_SYMBOL(zfs_rangelock_reduce); 691 #endif 692