1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* 27 * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 28 */ 29 30 /* 31 * This file contains the code to implement file range locking in 32 * ZFS, although there isn't much specific to ZFS (all that comes to mind is 33 * support for growing the blocksize). 34 * 35 * Interface 36 * --------- 37 * Defined in zfs_rlock.h but essentially: 38 * lr = rangelock_enter(zp, off, len, lock_type); 39 * rangelock_reduce(lr, off, len); // optional 40 * rangelock_exit(lr); 41 * 42 * Range locking rules 43 * -------------------- 44 * 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole 45 * file range needs to be locked as RL_WRITER. Only then can the pages be 46 * freed etc and zp_size reset. zp_size must be set within range lock. 47 * 2. For writes and punching holes (zfs_write & zfs_space) just the range 48 * being written or freed needs to be locked as RL_WRITER. 49 * Multiple writes at the end of the file must coordinate zp_size updates 50 * to ensure data isn't lost. A compare and swap loop is currently used 51 * to ensure the file size is at least the offset last written. 52 * 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being 53 * read needs to be locked as RL_READER. A check against zp_size can then 54 * be made for reading beyond end of file. 55 * 56 * AVL tree 57 * -------- 58 * An AVL tree is used to maintain the state of the existing ranges 59 * that are locked for exclusive (writer) or shared (reader) use. 60 * The starting range offset is used for searching and sorting the tree. 61 * 62 * Common case 63 * ----------- 64 * The (hopefully) usual case is of no overlaps or contention for locks. On 65 * entry to rangelock_enter(), a locked_range_t is allocated; the tree 66 * searched that finds no overlap, and *this* locked_range_t is placed in the 67 * tree. 68 * 69 * Overlaps/Reference counting/Proxy locks 70 * --------------------------------------- 71 * The avl code only allows one node at a particular offset. Also it's very 72 * inefficient to search through all previous entries looking for overlaps 73 * (because the very 1st in the ordered list might be at offset 0 but 74 * cover the whole file). 75 * So this implementation uses reference counts and proxy range locks. 76 * Firstly, only reader locks use reference counts and proxy locks, 77 * because writer locks are exclusive. 78 * When a reader lock overlaps with another then a proxy lock is created 79 * for that range and replaces the original lock. If the overlap 80 * is exact then the reference count of the proxy is simply incremented. 81 * Otherwise, the proxy lock is split into smaller lock ranges and 82 * new proxy locks created for non overlapping ranges. 83 * The reference counts are adjusted accordingly. 84 * Meanwhile, the original lock is kept around (this is the callers handle) 85 * and its offset and length are used when releasing the lock. 86 * 87 * Thread coordination 88 * ------------------- 89 * In order to make wakeups efficient and to ensure multiple continuous 90 * readers on a range don't starve a writer for the same range lock, 91 * two condition variables are allocated in each rl_t. 92 * If a writer (or reader) can't get a range it initialises the writer 93 * (or reader) cv; sets a flag saying there's a writer (or reader) waiting; 94 * and waits on that cv. When a thread unlocks that range it wakes up all 95 * writers then all readers before destroying the lock. 96 * 97 * Append mode writes 98 * ------------------ 99 * Append mode writes need to lock a range at the end of a file. 100 * The offset of the end of the file is determined under the 101 * range locking mutex, and the lock type converted from RL_APPEND to 102 * RL_WRITER and the range locked. 103 * 104 * Grow block handling 105 * ------------------- 106 * ZFS supports multiple block sizes, up to 16MB. The smallest 107 * block size is used for the file which is grown as needed. During this 108 * growth all other writers and readers must be excluded. 109 * So if the block size needs to be grown then the whole file is 110 * exclusively locked, then later the caller will reduce the lock 111 * range to just the range to be written using rangelock_reduce(). 112 */ 113 114 #include <sys/zfs_context.h> 115 #include <sys/zfs_rlock.h> 116 117 118 /* 119 * AVL comparison function used to order range locks 120 * Locks are ordered on the start offset of the range. 121 */ 122 static int 123 zfs_rangelock_compare(const void *arg1, const void *arg2) 124 { 125 const zfs_locked_range_t *rl1 = (const zfs_locked_range_t *)arg1; 126 const zfs_locked_range_t *rl2 = (const zfs_locked_range_t *)arg2; 127 128 return (TREE_CMP(rl1->lr_offset, rl2->lr_offset)); 129 } 130 131 /* 132 * The callback is invoked when acquiring a RL_WRITER or RL_APPEND lock. 133 * It must convert RL_APPEND to RL_WRITER (starting at the end of the file), 134 * and may increase the range that's locked for RL_WRITER. 135 */ 136 void 137 zfs_rangelock_init(zfs_rangelock_t *rl, zfs_rangelock_cb_t *cb, void *arg) 138 { 139 mutex_init(&rl->rl_lock, NULL, MUTEX_DEFAULT, NULL); 140 avl_create(&rl->rl_tree, zfs_rangelock_compare, 141 sizeof (zfs_locked_range_t), offsetof(zfs_locked_range_t, lr_node)); 142 rl->rl_cb = cb; 143 rl->rl_arg = arg; 144 } 145 146 void 147 zfs_rangelock_fini(zfs_rangelock_t *rl) 148 { 149 mutex_destroy(&rl->rl_lock); 150 avl_destroy(&rl->rl_tree); 151 } 152 153 /* 154 * Check if a write lock can be grabbed. If not, fail immediately or sleep and 155 * recheck until available, depending on the value of the "nonblock" parameter. 156 */ 157 static boolean_t 158 zfs_rangelock_enter_writer(zfs_rangelock_t *rl, zfs_locked_range_t *new, 159 boolean_t nonblock) 160 { 161 avl_tree_t *tree = &rl->rl_tree; 162 zfs_locked_range_t *lr; 163 avl_index_t where; 164 uint64_t orig_off = new->lr_offset; 165 uint64_t orig_len = new->lr_length; 166 zfs_rangelock_type_t orig_type = new->lr_type; 167 168 for (;;) { 169 /* 170 * Call callback which can modify new->r_off,len,type. 171 * Note, the callback is used by the ZPL to handle appending 172 * and changing blocksizes. It isn't needed for zvols. 173 */ 174 if (rl->rl_cb != NULL) { 175 rl->rl_cb(new, rl->rl_arg); 176 } 177 178 /* 179 * If the type was APPEND, the callback must convert it to 180 * WRITER. 181 */ 182 ASSERT3U(new->lr_type, ==, RL_WRITER); 183 184 /* 185 * First check for the usual case of no locks 186 */ 187 if (avl_numnodes(tree) == 0) { 188 avl_add(tree, new); 189 return (B_TRUE); 190 } 191 192 /* 193 * Look for any locks in the range. 194 */ 195 lr = avl_find(tree, new, &where); 196 if (lr != NULL) 197 goto wait; /* already locked at same offset */ 198 199 lr = avl_nearest(tree, where, AVL_AFTER); 200 if (lr != NULL && 201 lr->lr_offset < new->lr_offset + new->lr_length) 202 goto wait; 203 204 lr = avl_nearest(tree, where, AVL_BEFORE); 205 if (lr != NULL && 206 lr->lr_offset + lr->lr_length > new->lr_offset) 207 goto wait; 208 209 avl_insert(tree, new, where); 210 return (B_TRUE); 211 wait: 212 if (nonblock) 213 return (B_FALSE); 214 if (!lr->lr_write_wanted) { 215 cv_init(&lr->lr_write_cv, NULL, CV_DEFAULT, NULL); 216 lr->lr_write_wanted = B_TRUE; 217 } 218 cv_wait(&lr->lr_write_cv, &rl->rl_lock); 219 220 /* reset to original */ 221 new->lr_offset = orig_off; 222 new->lr_length = orig_len; 223 new->lr_type = orig_type; 224 } 225 } 226 227 /* 228 * If this is an original (non-proxy) lock then replace it by 229 * a proxy and return the proxy. 230 */ 231 static zfs_locked_range_t * 232 zfs_rangelock_proxify(avl_tree_t *tree, zfs_locked_range_t *lr) 233 { 234 zfs_locked_range_t *proxy; 235 236 if (lr->lr_proxy) 237 return (lr); /* already a proxy */ 238 239 ASSERT3U(lr->lr_count, ==, 1); 240 ASSERT(lr->lr_write_wanted == B_FALSE); 241 ASSERT(lr->lr_read_wanted == B_FALSE); 242 avl_remove(tree, lr); 243 lr->lr_count = 0; 244 245 /* create a proxy range lock */ 246 proxy = kmem_alloc(sizeof (zfs_locked_range_t), KM_SLEEP); 247 proxy->lr_offset = lr->lr_offset; 248 proxy->lr_length = lr->lr_length; 249 proxy->lr_count = 1; 250 proxy->lr_type = RL_READER; 251 proxy->lr_proxy = B_TRUE; 252 proxy->lr_write_wanted = B_FALSE; 253 proxy->lr_read_wanted = B_FALSE; 254 avl_add(tree, proxy); 255 256 return (proxy); 257 } 258 259 /* 260 * Split the range lock at the supplied offset 261 * returning the *front* proxy. 262 */ 263 static zfs_locked_range_t * 264 zfs_rangelock_split(avl_tree_t *tree, zfs_locked_range_t *lr, uint64_t off) 265 { 266 zfs_locked_range_t *rear; 267 268 ASSERT3U(lr->lr_length, >, 1); 269 ASSERT3U(off, >, lr->lr_offset); 270 ASSERT3U(off, <, lr->lr_offset + lr->lr_length); 271 ASSERT(lr->lr_write_wanted == B_FALSE); 272 ASSERT(lr->lr_read_wanted == B_FALSE); 273 274 /* create the rear proxy range lock */ 275 rear = kmem_alloc(sizeof (zfs_locked_range_t), KM_SLEEP); 276 rear->lr_offset = off; 277 rear->lr_length = lr->lr_offset + lr->lr_length - off; 278 rear->lr_count = lr->lr_count; 279 rear->lr_type = RL_READER; 280 rear->lr_proxy = B_TRUE; 281 rear->lr_write_wanted = B_FALSE; 282 rear->lr_read_wanted = B_FALSE; 283 284 zfs_locked_range_t *front = zfs_rangelock_proxify(tree, lr); 285 front->lr_length = off - lr->lr_offset; 286 287 avl_insert_here(tree, rear, front, AVL_AFTER); 288 return (front); 289 } 290 291 /* 292 * Create and add a new proxy range lock for the supplied range. 293 */ 294 static void 295 zfs_rangelock_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) 296 { 297 zfs_locked_range_t *lr; 298 299 ASSERT(len != 0); 300 lr = kmem_alloc(sizeof (zfs_locked_range_t), KM_SLEEP); 301 lr->lr_offset = off; 302 lr->lr_length = len; 303 lr->lr_count = 1; 304 lr->lr_type = RL_READER; 305 lr->lr_proxy = B_TRUE; 306 lr->lr_write_wanted = B_FALSE; 307 lr->lr_read_wanted = B_FALSE; 308 avl_add(tree, lr); 309 } 310 311 static void 312 zfs_rangelock_add_reader(avl_tree_t *tree, zfs_locked_range_t *new, 313 zfs_locked_range_t *prev, avl_index_t where) 314 { 315 zfs_locked_range_t *next; 316 uint64_t off = new->lr_offset; 317 uint64_t len = new->lr_length; 318 319 /* 320 * prev arrives either: 321 * - pointing to an entry at the same offset 322 * - pointing to the entry with the closest previous offset whose 323 * range may overlap with the new range 324 * - null, if there were no ranges starting before the new one 325 */ 326 if (prev != NULL) { 327 if (prev->lr_offset + prev->lr_length <= off) { 328 prev = NULL; 329 } else if (prev->lr_offset != off) { 330 /* 331 * convert to proxy if needed then 332 * split this entry and bump ref count 333 */ 334 prev = zfs_rangelock_split(tree, prev, off); 335 prev = AVL_NEXT(tree, prev); /* move to rear range */ 336 } 337 } 338 ASSERT((prev == NULL) || (prev->lr_offset == off)); 339 340 if (prev != NULL) 341 next = prev; 342 else 343 next = avl_nearest(tree, where, AVL_AFTER); 344 345 if (next == NULL || off + len <= next->lr_offset) { 346 /* no overlaps, use the original new rl_t in the tree */ 347 avl_insert(tree, new, where); 348 return; 349 } 350 351 if (off < next->lr_offset) { 352 /* Add a proxy for initial range before the overlap */ 353 zfs_rangelock_new_proxy(tree, off, next->lr_offset - off); 354 } 355 356 new->lr_count = 0; /* will use proxies in tree */ 357 /* 358 * We now search forward through the ranges, until we go past the end 359 * of the new range. For each entry we make it a proxy if it 360 * isn't already, then bump its reference count. If there's any 361 * gaps between the ranges then we create a new proxy range. 362 */ 363 for (prev = NULL; next; prev = next, next = AVL_NEXT(tree, next)) { 364 if (off + len <= next->lr_offset) 365 break; 366 if (prev != NULL && prev->lr_offset + prev->lr_length < 367 next->lr_offset) { 368 /* there's a gap */ 369 ASSERT3U(next->lr_offset, >, 370 prev->lr_offset + prev->lr_length); 371 zfs_rangelock_new_proxy(tree, 372 prev->lr_offset + prev->lr_length, 373 next->lr_offset - 374 (prev->lr_offset + prev->lr_length)); 375 } 376 if (off + len == next->lr_offset + next->lr_length) { 377 /* exact overlap with end */ 378 next = zfs_rangelock_proxify(tree, next); 379 next->lr_count++; 380 return; 381 } 382 if (off + len < next->lr_offset + next->lr_length) { 383 /* new range ends in the middle of this block */ 384 next = zfs_rangelock_split(tree, next, off + len); 385 next->lr_count++; 386 return; 387 } 388 ASSERT3U(off + len, >, next->lr_offset + next->lr_length); 389 next = zfs_rangelock_proxify(tree, next); 390 next->lr_count++; 391 } 392 393 /* Add the remaining end range. */ 394 zfs_rangelock_new_proxy(tree, prev->lr_offset + prev->lr_length, 395 (off + len) - (prev->lr_offset + prev->lr_length)); 396 } 397 398 /* 399 * Check if a reader lock can be grabbed. If not, fail immediately or sleep and 400 * recheck until available, depending on the value of the "nonblock" parameter. 401 */ 402 static boolean_t 403 zfs_rangelock_enter_reader(zfs_rangelock_t *rl, zfs_locked_range_t *new, 404 boolean_t nonblock) 405 { 406 avl_tree_t *tree = &rl->rl_tree; 407 zfs_locked_range_t *prev, *next; 408 avl_index_t where; 409 uint64_t off = new->lr_offset; 410 uint64_t len = new->lr_length; 411 412 /* 413 * Look for any writer locks in the range. 414 */ 415 retry: 416 prev = avl_find(tree, new, &where); 417 if (prev == NULL) 418 prev = avl_nearest(tree, where, AVL_BEFORE); 419 420 /* 421 * Check the previous range for a writer lock overlap. 422 */ 423 if (prev && (off < prev->lr_offset + prev->lr_length)) { 424 if ((prev->lr_type == RL_WRITER) || (prev->lr_write_wanted)) { 425 if (nonblock) 426 return (B_FALSE); 427 if (!prev->lr_read_wanted) { 428 cv_init(&prev->lr_read_cv, 429 NULL, CV_DEFAULT, NULL); 430 prev->lr_read_wanted = B_TRUE; 431 } 432 cv_wait(&prev->lr_read_cv, &rl->rl_lock); 433 goto retry; 434 } 435 if (off + len < prev->lr_offset + prev->lr_length) 436 goto got_lock; 437 } 438 439 /* 440 * Search through the following ranges to see if there's 441 * write lock any overlap. 442 */ 443 if (prev != NULL) 444 next = AVL_NEXT(tree, prev); 445 else 446 next = avl_nearest(tree, where, AVL_AFTER); 447 for (; next != NULL; next = AVL_NEXT(tree, next)) { 448 if (off + len <= next->lr_offset) 449 goto got_lock; 450 if ((next->lr_type == RL_WRITER) || (next->lr_write_wanted)) { 451 if (nonblock) 452 return (B_FALSE); 453 if (!next->lr_read_wanted) { 454 cv_init(&next->lr_read_cv, 455 NULL, CV_DEFAULT, NULL); 456 next->lr_read_wanted = B_TRUE; 457 } 458 cv_wait(&next->lr_read_cv, &rl->rl_lock); 459 goto retry; 460 } 461 if (off + len <= next->lr_offset + next->lr_length) 462 goto got_lock; 463 } 464 465 got_lock: 466 /* 467 * Add the read lock, which may involve splitting existing 468 * locks and bumping ref counts (r_count). 469 */ 470 zfs_rangelock_add_reader(tree, new, prev, where); 471 return (B_TRUE); 472 } 473 474 /* 475 * Lock a range (offset, length) as either shared (RL_READER) or exclusive 476 * (RL_WRITER or RL_APPEND). If RL_APPEND is specified, rl_cb() will convert 477 * it to a RL_WRITER lock (with the offset at the end of the file). Returns 478 * the range lock structure for later unlocking (or reduce range if the 479 * entire file is locked as RL_WRITER), or NULL if nonblock is true and the 480 * lock could not be acquired immediately. 481 */ 482 static zfs_locked_range_t * 483 zfs_rangelock_enter_impl(zfs_rangelock_t *rl, uint64_t off, uint64_t len, 484 zfs_rangelock_type_t type, boolean_t nonblock) 485 { 486 zfs_locked_range_t *new; 487 488 ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); 489 490 new = kmem_alloc(sizeof (zfs_locked_range_t), KM_SLEEP); 491 new->lr_rangelock = rl; 492 new->lr_offset = off; 493 if (len + off < off) /* overflow */ 494 len = UINT64_MAX - off; 495 new->lr_length = len; 496 new->lr_count = 1; /* assume it's going to be in the tree */ 497 new->lr_type = type; 498 new->lr_proxy = B_FALSE; 499 new->lr_write_wanted = B_FALSE; 500 new->lr_read_wanted = B_FALSE; 501 502 mutex_enter(&rl->rl_lock); 503 if (type == RL_READER) { 504 /* 505 * First check for the usual case of no locks 506 */ 507 if (avl_numnodes(&rl->rl_tree) == 0) { 508 avl_add(&rl->rl_tree, new); 509 } else if (!zfs_rangelock_enter_reader(rl, new, nonblock)) { 510 kmem_free(new, sizeof (*new)); 511 new = NULL; 512 } 513 } else if (!zfs_rangelock_enter_writer(rl, new, nonblock)) { 514 kmem_free(new, sizeof (*new)); 515 new = NULL; 516 } 517 mutex_exit(&rl->rl_lock); 518 return (new); 519 } 520 521 zfs_locked_range_t * 522 zfs_rangelock_enter(zfs_rangelock_t *rl, uint64_t off, uint64_t len, 523 zfs_rangelock_type_t type) 524 { 525 return (zfs_rangelock_enter_impl(rl, off, len, type, B_FALSE)); 526 } 527 528 zfs_locked_range_t * 529 zfs_rangelock_tryenter(zfs_rangelock_t *rl, uint64_t off, uint64_t len, 530 zfs_rangelock_type_t type) 531 { 532 return (zfs_rangelock_enter_impl(rl, off, len, type, B_TRUE)); 533 } 534 535 /* 536 * Safely free the zfs_locked_range_t. 537 */ 538 static void 539 zfs_rangelock_free(zfs_locked_range_t *lr) 540 { 541 if (lr->lr_write_wanted) 542 cv_destroy(&lr->lr_write_cv); 543 544 if (lr->lr_read_wanted) 545 cv_destroy(&lr->lr_read_cv); 546 547 kmem_free(lr, sizeof (zfs_locked_range_t)); 548 } 549 550 /* 551 * Unlock a reader lock 552 */ 553 static void 554 zfs_rangelock_exit_reader(zfs_rangelock_t *rl, zfs_locked_range_t *remove, 555 list_t *free_list) 556 { 557 avl_tree_t *tree = &rl->rl_tree; 558 uint64_t len; 559 560 /* 561 * The common case is when the remove entry is in the tree 562 * (cnt == 1) meaning there's been no other reader locks overlapping 563 * with this one. Otherwise the remove entry will have been 564 * removed from the tree and replaced by proxies (one or 565 * more ranges mapping to the entire range). 566 */ 567 if (remove->lr_count == 1) { 568 avl_remove(tree, remove); 569 if (remove->lr_write_wanted) 570 cv_broadcast(&remove->lr_write_cv); 571 if (remove->lr_read_wanted) 572 cv_broadcast(&remove->lr_read_cv); 573 list_insert_tail(free_list, remove); 574 } else { 575 ASSERT0(remove->lr_count); 576 ASSERT0(remove->lr_write_wanted); 577 ASSERT0(remove->lr_read_wanted); 578 /* 579 * Find start proxy representing this reader lock, 580 * then decrement ref count on all proxies 581 * that make up this range, freeing them as needed. 582 */ 583 zfs_locked_range_t *lr = avl_find(tree, remove, NULL); 584 ASSERT3P(lr, !=, NULL); 585 ASSERT3U(lr->lr_count, !=, 0); 586 ASSERT3U(lr->lr_type, ==, RL_READER); 587 zfs_locked_range_t *next = NULL; 588 for (len = remove->lr_length; len != 0; lr = next) { 589 len -= lr->lr_length; 590 if (len != 0) { 591 next = AVL_NEXT(tree, lr); 592 ASSERT3P(next, !=, NULL); 593 ASSERT3U(lr->lr_offset + lr->lr_length, ==, 594 next->lr_offset); 595 ASSERT3U(next->lr_count, !=, 0); 596 ASSERT3U(next->lr_type, ==, RL_READER); 597 } 598 lr->lr_count--; 599 if (lr->lr_count == 0) { 600 avl_remove(tree, lr); 601 if (lr->lr_write_wanted) 602 cv_broadcast(&lr->lr_write_cv); 603 if (lr->lr_read_wanted) 604 cv_broadcast(&lr->lr_read_cv); 605 list_insert_tail(free_list, lr); 606 } 607 } 608 kmem_free(remove, sizeof (zfs_locked_range_t)); 609 } 610 } 611 612 /* 613 * Unlock range and destroy range lock structure. 614 */ 615 void 616 zfs_rangelock_exit(zfs_locked_range_t *lr) 617 { 618 zfs_rangelock_t *rl = lr->lr_rangelock; 619 list_t free_list; 620 zfs_locked_range_t *free_lr; 621 622 ASSERT(lr->lr_type == RL_WRITER || lr->lr_type == RL_READER); 623 ASSERT(lr->lr_count == 1 || lr->lr_count == 0); 624 ASSERT(!lr->lr_proxy); 625 626 /* 627 * The free list is used to defer the cv_destroy() and 628 * subsequent kmem_free until after the mutex is dropped. 629 */ 630 list_create(&free_list, sizeof (zfs_locked_range_t), 631 offsetof(zfs_locked_range_t, lr_node)); 632 633 mutex_enter(&rl->rl_lock); 634 if (lr->lr_type == RL_WRITER) { 635 /* writer locks can't be shared or split */ 636 avl_remove(&rl->rl_tree, lr); 637 if (lr->lr_write_wanted) 638 cv_broadcast(&lr->lr_write_cv); 639 if (lr->lr_read_wanted) 640 cv_broadcast(&lr->lr_read_cv); 641 list_insert_tail(&free_list, lr); 642 } else { 643 /* 644 * lock may be shared, let rangelock_exit_reader() 645 * release the lock and free the zfs_locked_range_t. 646 */ 647 zfs_rangelock_exit_reader(rl, lr, &free_list); 648 } 649 mutex_exit(&rl->rl_lock); 650 651 while ((free_lr = list_remove_head(&free_list)) != NULL) 652 zfs_rangelock_free(free_lr); 653 654 list_destroy(&free_list); 655 } 656 657 /* 658 * Reduce range locked as RL_WRITER from whole file to specified range. 659 * Asserts the whole file is exclusively locked and so there's only one 660 * entry in the tree. 661 */ 662 void 663 zfs_rangelock_reduce(zfs_locked_range_t *lr, uint64_t off, uint64_t len) 664 { 665 zfs_rangelock_t *rl = lr->lr_rangelock; 666 667 /* Ensure there are no other locks */ 668 ASSERT3U(avl_numnodes(&rl->rl_tree), ==, 1); 669 ASSERT3U(lr->lr_offset, ==, 0); 670 ASSERT3U(lr->lr_type, ==, RL_WRITER); 671 ASSERT(!lr->lr_proxy); 672 ASSERT3U(lr->lr_length, ==, UINT64_MAX); 673 ASSERT3U(lr->lr_count, ==, 1); 674 675 mutex_enter(&rl->rl_lock); 676 lr->lr_offset = off; 677 lr->lr_length = len; 678 mutex_exit(&rl->rl_lock); 679 if (lr->lr_write_wanted) 680 cv_broadcast(&lr->lr_write_cv); 681 if (lr->lr_read_wanted) 682 cv_broadcast(&lr->lr_read_cv); 683 } 684 685 #if defined(_KERNEL) 686 EXPORT_SYMBOL(zfs_rangelock_init); 687 EXPORT_SYMBOL(zfs_rangelock_fini); 688 EXPORT_SYMBOL(zfs_rangelock_enter); 689 EXPORT_SYMBOL(zfs_rangelock_tryenter); 690 EXPORT_SYMBOL(zfs_rangelock_exit); 691 EXPORT_SYMBOL(zfs_rangelock_reduce); 692 #endif 693