1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * 4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. 5 * 6 * This code builds two trees of free clusters extents. 7 * Trees are sorted by start of extent and by length of extent. 8 * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees. 9 * In extreme case code reads on-disk bitmap to find free clusters. 10 * 11 */ 12 13 #include <linux/buffer_head.h> 14 #include <linux/fs.h> 15 #include <linux/kernel.h> 16 17 #include "ntfs.h" 18 #include "ntfs_fs.h" 19 20 /* 21 * Maximum number of extents in tree. 22 */ 23 #define NTFS_MAX_WND_EXTENTS (32u * 1024u) 24 25 struct rb_node_key { 26 struct rb_node node; 27 size_t key; 28 }; 29 30 struct e_node { 31 struct rb_node_key start; /* Tree sorted by start. */ 32 struct rb_node_key count; /* Tree sorted by len. */ 33 }; 34 35 static int wnd_rescan(struct wnd_bitmap *wnd); 36 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw); 37 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits); 38 39 static struct kmem_cache *ntfs_enode_cachep; 40 41 int __init ntfs3_init_bitmap(void) 42 { 43 ntfs_enode_cachep = 44 kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0, 45 SLAB_RECLAIM_ACCOUNT, NULL); 46 return ntfs_enode_cachep ? 0 : -ENOMEM; 47 } 48 49 void ntfs3_exit_bitmap(void) 50 { 51 kmem_cache_destroy(ntfs_enode_cachep); 52 } 53 54 static inline u32 wnd_bits(const struct wnd_bitmap *wnd, size_t i) 55 { 56 return i + 1 == wnd->nwnd ? wnd->bits_last : wnd->sb->s_blocksize * 8; 57 } 58 59 /* 60 * wnd_scan 61 * 62 * b_pos + b_len - biggest fragment. 63 * Scan range [wpos wbits) window @buf. 64 * 65 * Return: -1 if not found. 66 */ 67 static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend, 68 size_t to_alloc, size_t *prev_tail, size_t *b_pos, 69 size_t *b_len) 70 { 71 while (wpos < wend) { 72 size_t free_len; 73 u32 free_bits, end; 74 u32 used = find_next_zero_bit(buf, wend, wpos); 75 76 if (used >= wend) { 77 if (*b_len < *prev_tail) { 78 *b_pos = wbit - *prev_tail; 79 *b_len = *prev_tail; 80 } 81 82 *prev_tail = 0; 83 return -1; 84 } 85 86 if (used > wpos) { 87 wpos = used; 88 if (*b_len < *prev_tail) { 89 *b_pos = wbit - *prev_tail; 90 *b_len = *prev_tail; 91 } 92 93 *prev_tail = 0; 94 } 95 96 /* 97 * Now we have a fragment [wpos, wend) staring with 0. 98 */ 99 end = wpos + to_alloc - *prev_tail; 100 free_bits = find_next_bit(buf, min(end, wend), wpos); 101 102 free_len = *prev_tail + free_bits - wpos; 103 104 if (*b_len < free_len) { 105 *b_pos = wbit + wpos - *prev_tail; 106 *b_len = free_len; 107 } 108 109 if (free_len >= to_alloc) 110 return wbit + wpos - *prev_tail; 111 112 if (free_bits >= wend) { 113 *prev_tail += free_bits - wpos; 114 return -1; 115 } 116 117 wpos = free_bits + 1; 118 119 *prev_tail = 0; 120 } 121 122 return -1; 123 } 124 125 /* 126 * wnd_close - Frees all resources. 127 */ 128 void wnd_close(struct wnd_bitmap *wnd) 129 { 130 struct rb_node *node, *next; 131 132 kfree(wnd->free_bits); 133 run_close(&wnd->run); 134 135 node = rb_first(&wnd->start_tree); 136 137 while (node) { 138 next = rb_next(node); 139 rb_erase(node, &wnd->start_tree); 140 kmem_cache_free(ntfs_enode_cachep, 141 rb_entry(node, struct e_node, start.node)); 142 node = next; 143 } 144 } 145 146 static struct rb_node *rb_lookup(struct rb_root *root, size_t v) 147 { 148 struct rb_node **p = &root->rb_node; 149 struct rb_node *r = NULL; 150 151 while (*p) { 152 struct rb_node_key *k; 153 154 k = rb_entry(*p, struct rb_node_key, node); 155 if (v < k->key) { 156 p = &(*p)->rb_left; 157 } else if (v > k->key) { 158 r = &k->node; 159 p = &(*p)->rb_right; 160 } else { 161 return &k->node; 162 } 163 } 164 165 return r; 166 } 167 168 /* 169 * rb_insert_count - Helper function to insert special kind of 'count' tree. 170 */ 171 static inline bool rb_insert_count(struct rb_root *root, struct e_node *e) 172 { 173 struct rb_node **p = &root->rb_node; 174 struct rb_node *parent = NULL; 175 size_t e_ckey = e->count.key; 176 size_t e_skey = e->start.key; 177 178 while (*p) { 179 struct e_node *k = 180 rb_entry(parent = *p, struct e_node, count.node); 181 182 if (e_ckey > k->count.key) { 183 p = &(*p)->rb_left; 184 } else if (e_ckey < k->count.key) { 185 p = &(*p)->rb_right; 186 } else if (e_skey < k->start.key) { 187 p = &(*p)->rb_left; 188 } else if (e_skey > k->start.key) { 189 p = &(*p)->rb_right; 190 } else { 191 WARN_ON(1); 192 return false; 193 } 194 } 195 196 rb_link_node(&e->count.node, parent, p); 197 rb_insert_color(&e->count.node, root); 198 return true; 199 } 200 201 /* 202 * rb_insert_start - Helper function to insert special kind of 'count' tree. 203 */ 204 static inline bool rb_insert_start(struct rb_root *root, struct e_node *e) 205 { 206 struct rb_node **p = &root->rb_node; 207 struct rb_node *parent = NULL; 208 size_t e_skey = e->start.key; 209 210 while (*p) { 211 struct e_node *k; 212 213 parent = *p; 214 215 k = rb_entry(parent, struct e_node, start.node); 216 if (e_skey < k->start.key) { 217 p = &(*p)->rb_left; 218 } else if (e_skey > k->start.key) { 219 p = &(*p)->rb_right; 220 } else { 221 WARN_ON(1); 222 return false; 223 } 224 } 225 226 rb_link_node(&e->start.node, parent, p); 227 rb_insert_color(&e->start.node, root); 228 return true; 229 } 230 231 /* 232 * wnd_add_free_ext - Adds a new extent of free space. 233 * @build: 1 when building tree. 234 */ 235 static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len, 236 bool build) 237 { 238 struct e_node *e, *e0 = NULL; 239 size_t ib, end_in = bit + len; 240 struct rb_node *n; 241 242 if (build) { 243 /* Use extent_min to filter too short extents. */ 244 if (wnd->count >= NTFS_MAX_WND_EXTENTS && 245 len <= wnd->extent_min) { 246 wnd->uptodated = -1; 247 return; 248 } 249 } else { 250 /* Try to find extent before 'bit'. */ 251 n = rb_lookup(&wnd->start_tree, bit); 252 253 if (!n) { 254 n = rb_first(&wnd->start_tree); 255 } else { 256 e = rb_entry(n, struct e_node, start.node); 257 n = rb_next(n); 258 if (e->start.key + e->count.key == bit) { 259 /* Remove left. */ 260 bit = e->start.key; 261 len += e->count.key; 262 rb_erase(&e->start.node, &wnd->start_tree); 263 rb_erase(&e->count.node, &wnd->count_tree); 264 wnd->count -= 1; 265 e0 = e; 266 } 267 } 268 269 while (n) { 270 size_t next_end; 271 272 e = rb_entry(n, struct e_node, start.node); 273 next_end = e->start.key + e->count.key; 274 if (e->start.key > end_in) 275 break; 276 277 /* Remove right. */ 278 n = rb_next(n); 279 len += next_end - end_in; 280 end_in = next_end; 281 rb_erase(&e->start.node, &wnd->start_tree); 282 rb_erase(&e->count.node, &wnd->count_tree); 283 wnd->count -= 1; 284 285 if (!e0) 286 e0 = e; 287 else 288 kmem_cache_free(ntfs_enode_cachep, e); 289 } 290 291 if (wnd->uptodated != 1) { 292 /* Check bits before 'bit'. */ 293 ib = wnd->zone_bit == wnd->zone_end || 294 bit < wnd->zone_end 295 ? 0 296 : wnd->zone_end; 297 298 while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) { 299 bit -= 1; 300 len += 1; 301 } 302 303 /* Check bits after 'end_in'. */ 304 ib = wnd->zone_bit == wnd->zone_end || 305 end_in > wnd->zone_bit 306 ? wnd->nbits 307 : wnd->zone_bit; 308 309 while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) { 310 end_in += 1; 311 len += 1; 312 } 313 } 314 } 315 /* Insert new fragment. */ 316 if (wnd->count >= NTFS_MAX_WND_EXTENTS) { 317 if (e0) 318 kmem_cache_free(ntfs_enode_cachep, e0); 319 320 wnd->uptodated = -1; 321 322 /* Compare with smallest fragment. */ 323 n = rb_last(&wnd->count_tree); 324 e = rb_entry(n, struct e_node, count.node); 325 if (len <= e->count.key) 326 goto out; /* Do not insert small fragments. */ 327 328 if (build) { 329 struct e_node *e2; 330 331 n = rb_prev(n); 332 e2 = rb_entry(n, struct e_node, count.node); 333 /* Smallest fragment will be 'e2->count.key'. */ 334 wnd->extent_min = e2->count.key; 335 } 336 337 /* Replace smallest fragment by new one. */ 338 rb_erase(&e->start.node, &wnd->start_tree); 339 rb_erase(&e->count.node, &wnd->count_tree); 340 wnd->count -= 1; 341 } else { 342 e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); 343 if (!e) { 344 wnd->uptodated = -1; 345 goto out; 346 } 347 348 if (build && len <= wnd->extent_min) 349 wnd->extent_min = len; 350 } 351 e->start.key = bit; 352 e->count.key = len; 353 if (len > wnd->extent_max) 354 wnd->extent_max = len; 355 356 rb_insert_start(&wnd->start_tree, e); 357 rb_insert_count(&wnd->count_tree, e); 358 wnd->count += 1; 359 360 out:; 361 } 362 363 /* 364 * wnd_remove_free_ext - Remove a run from the cached free space. 365 */ 366 static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len) 367 { 368 struct rb_node *n, *n3; 369 struct e_node *e, *e3; 370 size_t end_in = bit + len; 371 size_t end3, end, new_key, new_len, max_new_len; 372 373 /* Try to find extent before 'bit'. */ 374 n = rb_lookup(&wnd->start_tree, bit); 375 376 if (!n) 377 return; 378 379 e = rb_entry(n, struct e_node, start.node); 380 end = e->start.key + e->count.key; 381 382 new_key = new_len = 0; 383 len = e->count.key; 384 385 /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n'. */ 386 if (e->start.key > bit) 387 ; 388 else if (end_in <= end) { 389 /* Range [bit,end_in) inside 'e'. */ 390 new_key = end_in; 391 new_len = end - end_in; 392 len = bit - e->start.key; 393 } else if (bit > end) { 394 bool bmax = false; 395 396 n3 = rb_next(n); 397 398 while (n3) { 399 e3 = rb_entry(n3, struct e_node, start.node); 400 if (e3->start.key >= end_in) 401 break; 402 403 if (e3->count.key == wnd->extent_max) 404 bmax = true; 405 406 end3 = e3->start.key + e3->count.key; 407 if (end3 > end_in) { 408 e3->start.key = end_in; 409 rb_erase(&e3->count.node, &wnd->count_tree); 410 e3->count.key = end3 - end_in; 411 rb_insert_count(&wnd->count_tree, e3); 412 break; 413 } 414 415 n3 = rb_next(n3); 416 rb_erase(&e3->start.node, &wnd->start_tree); 417 rb_erase(&e3->count.node, &wnd->count_tree); 418 wnd->count -= 1; 419 kmem_cache_free(ntfs_enode_cachep, e3); 420 } 421 if (!bmax) 422 return; 423 n3 = rb_first(&wnd->count_tree); 424 wnd->extent_max = 425 n3 ? rb_entry(n3, struct e_node, count.node)->count.key 426 : 0; 427 return; 428 } 429 430 if (e->count.key != wnd->extent_max) { 431 ; 432 } else if (rb_prev(&e->count.node)) { 433 ; 434 } else { 435 n3 = rb_next(&e->count.node); 436 max_new_len = max(len, new_len); 437 if (!n3) { 438 wnd->extent_max = max_new_len; 439 } else { 440 e3 = rb_entry(n3, struct e_node, count.node); 441 wnd->extent_max = max(e3->count.key, max_new_len); 442 } 443 } 444 445 if (!len) { 446 if (new_len) { 447 e->start.key = new_key; 448 rb_erase(&e->count.node, &wnd->count_tree); 449 e->count.key = new_len; 450 rb_insert_count(&wnd->count_tree, e); 451 } else { 452 rb_erase(&e->start.node, &wnd->start_tree); 453 rb_erase(&e->count.node, &wnd->count_tree); 454 wnd->count -= 1; 455 kmem_cache_free(ntfs_enode_cachep, e); 456 } 457 goto out; 458 } 459 rb_erase(&e->count.node, &wnd->count_tree); 460 e->count.key = len; 461 rb_insert_count(&wnd->count_tree, e); 462 463 if (!new_len) 464 goto out; 465 466 if (wnd->count >= NTFS_MAX_WND_EXTENTS) { 467 wnd->uptodated = -1; 468 469 /* Get minimal extent. */ 470 e = rb_entry(rb_last(&wnd->count_tree), struct e_node, 471 count.node); 472 if (e->count.key > new_len) 473 goto out; 474 475 /* Replace minimum. */ 476 rb_erase(&e->start.node, &wnd->start_tree); 477 rb_erase(&e->count.node, &wnd->count_tree); 478 wnd->count -= 1; 479 } else { 480 e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); 481 if (!e) 482 wnd->uptodated = -1; 483 } 484 485 if (e) { 486 e->start.key = new_key; 487 e->count.key = new_len; 488 rb_insert_start(&wnd->start_tree, e); 489 rb_insert_count(&wnd->count_tree, e); 490 wnd->count += 1; 491 } 492 493 out: 494 if (!wnd->count && 1 != wnd->uptodated) 495 wnd_rescan(wnd); 496 } 497 498 /* 499 * wnd_rescan - Scan all bitmap. Used while initialization. 500 */ 501 static int wnd_rescan(struct wnd_bitmap *wnd) 502 { 503 int err = 0; 504 size_t prev_tail = 0; 505 struct super_block *sb = wnd->sb; 506 struct ntfs_sb_info *sbi = sb->s_fs_info; 507 u64 lbo, len = 0; 508 u32 blocksize = sb->s_blocksize; 509 u8 cluster_bits = sbi->cluster_bits; 510 u32 wbits = 8 * sb->s_blocksize; 511 u32 used, frb; 512 const ulong *buf; 513 size_t wpos, wbit, iw, vbo; 514 struct buffer_head *bh = NULL; 515 CLST lcn, clen; 516 517 wnd->uptodated = 0; 518 wnd->extent_max = 0; 519 wnd->extent_min = MINUS_ONE_T; 520 wnd->total_zeroes = 0; 521 522 vbo = 0; 523 524 for (iw = 0; iw < wnd->nwnd; iw++) { 525 if (iw + 1 == wnd->nwnd) 526 wbits = wnd->bits_last; 527 528 if (wnd->inited) { 529 if (!wnd->free_bits[iw]) { 530 /* All ones. */ 531 if (prev_tail) { 532 wnd_add_free_ext(wnd, 533 vbo * 8 - prev_tail, 534 prev_tail, true); 535 prev_tail = 0; 536 } 537 goto next_wnd; 538 } 539 if (wbits == wnd->free_bits[iw]) { 540 /* All zeroes. */ 541 prev_tail += wbits; 542 wnd->total_zeroes += wbits; 543 goto next_wnd; 544 } 545 } 546 547 if (!len) { 548 u32 off = vbo & sbi->cluster_mask; 549 550 if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits, 551 &lcn, &clen, NULL)) { 552 err = -ENOENT; 553 goto out; 554 } 555 556 lbo = ((u64)lcn << cluster_bits) + off; 557 len = ((u64)clen << cluster_bits) - off; 558 } 559 560 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); 561 if (!bh) { 562 err = -EIO; 563 goto out; 564 } 565 566 buf = (ulong *)bh->b_data; 567 568 used = __bitmap_weight(buf, wbits); 569 if (used < wbits) { 570 frb = wbits - used; 571 wnd->free_bits[iw] = frb; 572 wnd->total_zeroes += frb; 573 } 574 575 wpos = 0; 576 wbit = vbo * 8; 577 578 if (wbit + wbits > wnd->nbits) 579 wbits = wnd->nbits - wbit; 580 581 do { 582 used = find_next_zero_bit(buf, wbits, wpos); 583 584 if (used > wpos && prev_tail) { 585 wnd_add_free_ext(wnd, wbit + wpos - prev_tail, 586 prev_tail, true); 587 prev_tail = 0; 588 } 589 590 wpos = used; 591 592 if (wpos >= wbits) { 593 /* No free blocks. */ 594 prev_tail = 0; 595 break; 596 } 597 598 frb = find_next_bit(buf, wbits, wpos); 599 if (frb >= wbits) { 600 /* Keep last free block. */ 601 prev_tail += frb - wpos; 602 break; 603 } 604 605 wnd_add_free_ext(wnd, wbit + wpos - prev_tail, 606 frb + prev_tail - wpos, true); 607 608 /* Skip free block and first '1'. */ 609 wpos = frb + 1; 610 /* Reset previous tail. */ 611 prev_tail = 0; 612 } while (wpos < wbits); 613 614 next_wnd: 615 616 if (bh) 617 put_bh(bh); 618 bh = NULL; 619 620 vbo += blocksize; 621 if (len) { 622 len -= blocksize; 623 lbo += blocksize; 624 } 625 } 626 627 /* Add last block. */ 628 if (prev_tail) 629 wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true); 630 631 /* 632 * Before init cycle wnd->uptodated was 0. 633 * If any errors or limits occurs while initialization then 634 * wnd->uptodated will be -1. 635 * If 'uptodated' is still 0 then Tree is really updated. 636 */ 637 if (!wnd->uptodated) 638 wnd->uptodated = 1; 639 640 if (wnd->zone_bit != wnd->zone_end) { 641 size_t zlen = wnd->zone_end - wnd->zone_bit; 642 643 wnd->zone_end = wnd->zone_bit; 644 wnd_zone_set(wnd, wnd->zone_bit, zlen); 645 } 646 647 out: 648 return err; 649 } 650 651 int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) 652 { 653 int err; 654 u32 blocksize = sb->s_blocksize; 655 u32 wbits = blocksize * 8; 656 657 init_rwsem(&wnd->rw_lock); 658 659 wnd->sb = sb; 660 wnd->nbits = nbits; 661 wnd->total_zeroes = nbits; 662 wnd->extent_max = MINUS_ONE_T; 663 wnd->zone_bit = wnd->zone_end = 0; 664 wnd->nwnd = bytes_to_block(sb, bitmap_size(nbits)); 665 wnd->bits_last = nbits & (wbits - 1); 666 if (!wnd->bits_last) 667 wnd->bits_last = wbits; 668 669 wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS); 670 if (!wnd->free_bits) 671 return -ENOMEM; 672 673 err = wnd_rescan(wnd); 674 if (err) 675 return err; 676 677 wnd->inited = true; 678 679 return 0; 680 } 681 682 /* 683 * wnd_map - Call sb_bread for requested window. 684 */ 685 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw) 686 { 687 size_t vbo; 688 CLST lcn, clen; 689 struct super_block *sb = wnd->sb; 690 struct ntfs_sb_info *sbi; 691 struct buffer_head *bh; 692 u64 lbo; 693 694 sbi = sb->s_fs_info; 695 vbo = (u64)iw << sb->s_blocksize_bits; 696 697 if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen, 698 NULL)) { 699 return ERR_PTR(-ENOENT); 700 } 701 702 lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask); 703 704 bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits); 705 if (!bh) 706 return ERR_PTR(-EIO); 707 708 return bh; 709 } 710 711 /* 712 * wnd_set_free - Mark the bits range from bit to bit + bits as free. 713 */ 714 int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) 715 { 716 int err = 0; 717 struct super_block *sb = wnd->sb; 718 size_t bits0 = bits; 719 u32 wbits = 8 * sb->s_blocksize; 720 size_t iw = bit >> (sb->s_blocksize_bits + 3); 721 u32 wbit = bit & (wbits - 1); 722 struct buffer_head *bh; 723 724 while (iw < wnd->nwnd && bits) { 725 u32 tail, op; 726 ulong *buf; 727 728 if (iw + 1 == wnd->nwnd) 729 wbits = wnd->bits_last; 730 731 tail = wbits - wbit; 732 op = min_t(u32, tail, bits); 733 734 bh = wnd_map(wnd, iw); 735 if (IS_ERR(bh)) { 736 err = PTR_ERR(bh); 737 break; 738 } 739 740 buf = (ulong *)bh->b_data; 741 742 lock_buffer(bh); 743 744 __bitmap_clear(buf, wbit, op); 745 746 wnd->free_bits[iw] += op; 747 748 set_buffer_uptodate(bh); 749 mark_buffer_dirty(bh); 750 unlock_buffer(bh); 751 put_bh(bh); 752 753 wnd->total_zeroes += op; 754 bits -= op; 755 wbit = 0; 756 iw += 1; 757 } 758 759 wnd_add_free_ext(wnd, bit, bits0, false); 760 761 return err; 762 } 763 764 /* 765 * wnd_set_used - Mark the bits range from bit to bit + bits as used. 766 */ 767 int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) 768 { 769 int err = 0; 770 struct super_block *sb = wnd->sb; 771 size_t bits0 = bits; 772 size_t iw = bit >> (sb->s_blocksize_bits + 3); 773 u32 wbits = 8 * sb->s_blocksize; 774 u32 wbit = bit & (wbits - 1); 775 struct buffer_head *bh; 776 777 while (iw < wnd->nwnd && bits) { 778 u32 tail, op; 779 ulong *buf; 780 781 if (unlikely(iw + 1 == wnd->nwnd)) 782 wbits = wnd->bits_last; 783 784 tail = wbits - wbit; 785 op = min_t(u32, tail, bits); 786 787 bh = wnd_map(wnd, iw); 788 if (IS_ERR(bh)) { 789 err = PTR_ERR(bh); 790 break; 791 } 792 buf = (ulong *)bh->b_data; 793 794 lock_buffer(bh); 795 796 __bitmap_set(buf, wbit, op); 797 wnd->free_bits[iw] -= op; 798 799 set_buffer_uptodate(bh); 800 mark_buffer_dirty(bh); 801 unlock_buffer(bh); 802 put_bh(bh); 803 804 wnd->total_zeroes -= op; 805 bits -= op; 806 wbit = 0; 807 iw += 1; 808 } 809 810 if (!RB_EMPTY_ROOT(&wnd->start_tree)) 811 wnd_remove_free_ext(wnd, bit, bits0); 812 813 return err; 814 } 815 816 /* 817 * wnd_is_free_hlp 818 * 819 * Return: True if all clusters [bit, bit+bits) are free (bitmap only). 820 */ 821 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) 822 { 823 struct super_block *sb = wnd->sb; 824 size_t iw = bit >> (sb->s_blocksize_bits + 3); 825 u32 wbits = 8 * sb->s_blocksize; 826 u32 wbit = bit & (wbits - 1); 827 828 while (iw < wnd->nwnd && bits) { 829 u32 tail, op; 830 831 if (unlikely(iw + 1 == wnd->nwnd)) 832 wbits = wnd->bits_last; 833 834 tail = wbits - wbit; 835 op = min_t(u32, tail, bits); 836 837 if (wbits != wnd->free_bits[iw]) { 838 bool ret; 839 struct buffer_head *bh = wnd_map(wnd, iw); 840 841 if (IS_ERR(bh)) 842 return false; 843 844 ret = are_bits_clear((ulong *)bh->b_data, wbit, op); 845 846 put_bh(bh); 847 if (!ret) 848 return false; 849 } 850 851 bits -= op; 852 wbit = 0; 853 iw += 1; 854 } 855 856 return true; 857 } 858 859 /* 860 * wnd_is_free 861 * 862 * Return: True if all clusters [bit, bit+bits) are free. 863 */ 864 bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) 865 { 866 bool ret; 867 struct rb_node *n; 868 size_t end; 869 struct e_node *e; 870 871 if (RB_EMPTY_ROOT(&wnd->start_tree)) 872 goto use_wnd; 873 874 n = rb_lookup(&wnd->start_tree, bit); 875 if (!n) 876 goto use_wnd; 877 878 e = rb_entry(n, struct e_node, start.node); 879 880 end = e->start.key + e->count.key; 881 882 if (bit < end && bit + bits <= end) 883 return true; 884 885 use_wnd: 886 ret = wnd_is_free_hlp(wnd, bit, bits); 887 888 return ret; 889 } 890 891 /* 892 * wnd_is_used 893 * 894 * Return: True if all clusters [bit, bit+bits) are used. 895 */ 896 bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) 897 { 898 bool ret = false; 899 struct super_block *sb = wnd->sb; 900 size_t iw = bit >> (sb->s_blocksize_bits + 3); 901 u32 wbits = 8 * sb->s_blocksize; 902 u32 wbit = bit & (wbits - 1); 903 size_t end; 904 struct rb_node *n; 905 struct e_node *e; 906 907 if (RB_EMPTY_ROOT(&wnd->start_tree)) 908 goto use_wnd; 909 910 end = bit + bits; 911 n = rb_lookup(&wnd->start_tree, end - 1); 912 if (!n) 913 goto use_wnd; 914 915 e = rb_entry(n, struct e_node, start.node); 916 if (e->start.key + e->count.key > bit) 917 return false; 918 919 use_wnd: 920 while (iw < wnd->nwnd && bits) { 921 u32 tail, op; 922 923 if (unlikely(iw + 1 == wnd->nwnd)) 924 wbits = wnd->bits_last; 925 926 tail = wbits - wbit; 927 op = min_t(u32, tail, bits); 928 929 if (wnd->free_bits[iw]) { 930 bool ret; 931 struct buffer_head *bh = wnd_map(wnd, iw); 932 933 if (IS_ERR(bh)) 934 goto out; 935 936 ret = are_bits_set((ulong *)bh->b_data, wbit, op); 937 put_bh(bh); 938 if (!ret) 939 goto out; 940 } 941 942 bits -= op; 943 wbit = 0; 944 iw += 1; 945 } 946 ret = true; 947 948 out: 949 return ret; 950 } 951 952 /* 953 * wnd_find - Look for free space. 954 * 955 * - flags - BITMAP_FIND_XXX flags 956 * 957 * Return: 0 if not found. 958 */ 959 size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint, 960 size_t flags, size_t *allocated) 961 { 962 struct super_block *sb; 963 u32 wbits, wpos, wzbit, wzend; 964 size_t fnd, max_alloc, b_len, b_pos; 965 size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend; 966 size_t to_alloc0 = to_alloc; 967 const ulong *buf; 968 const struct e_node *e; 969 const struct rb_node *pr, *cr; 970 u8 log2_bits; 971 bool fbits_valid; 972 struct buffer_head *bh; 973 974 /* Fast checking for available free space. */ 975 if (flags & BITMAP_FIND_FULL) { 976 size_t zeroes = wnd_zeroes(wnd); 977 978 zeroes -= wnd->zone_end - wnd->zone_bit; 979 if (zeroes < to_alloc0) 980 goto no_space; 981 982 if (to_alloc0 > wnd->extent_max) 983 goto no_space; 984 } else { 985 if (to_alloc > wnd->extent_max) 986 to_alloc = wnd->extent_max; 987 } 988 989 if (wnd->zone_bit <= hint && hint < wnd->zone_end) 990 hint = wnd->zone_end; 991 992 max_alloc = wnd->nbits; 993 b_len = b_pos = 0; 994 995 if (hint >= max_alloc) 996 hint = 0; 997 998 if (RB_EMPTY_ROOT(&wnd->start_tree)) { 999 if (wnd->uptodated == 1) { 1000 /* Extents tree is updated -> No free space. */ 1001 goto no_space; 1002 } 1003 goto scan_bitmap; 1004 } 1005 1006 e = NULL; 1007 if (!hint) 1008 goto allocate_biggest; 1009 1010 /* Use hint: Enumerate extents by start >= hint. */ 1011 pr = NULL; 1012 cr = wnd->start_tree.rb_node; 1013 1014 for (;;) { 1015 e = rb_entry(cr, struct e_node, start.node); 1016 1017 if (e->start.key == hint) 1018 break; 1019 1020 if (e->start.key < hint) { 1021 pr = cr; 1022 cr = cr->rb_right; 1023 if (!cr) 1024 break; 1025 continue; 1026 } 1027 1028 cr = cr->rb_left; 1029 if (!cr) { 1030 e = pr ? rb_entry(pr, struct e_node, start.node) : NULL; 1031 break; 1032 } 1033 } 1034 1035 if (!e) 1036 goto allocate_biggest; 1037 1038 if (e->start.key + e->count.key > hint) { 1039 /* We have found extension with 'hint' inside. */ 1040 size_t len = e->start.key + e->count.key - hint; 1041 1042 if (len >= to_alloc && hint + to_alloc <= max_alloc) { 1043 fnd = hint; 1044 goto found; 1045 } 1046 1047 if (!(flags & BITMAP_FIND_FULL)) { 1048 if (len > to_alloc) 1049 len = to_alloc; 1050 1051 if (hint + len <= max_alloc) { 1052 fnd = hint; 1053 to_alloc = len; 1054 goto found; 1055 } 1056 } 1057 } 1058 1059 allocate_biggest: 1060 /* Allocate from biggest free extent. */ 1061 e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node); 1062 if (e->count.key != wnd->extent_max) 1063 wnd->extent_max = e->count.key; 1064 1065 if (e->count.key < max_alloc) { 1066 if (e->count.key >= to_alloc) { 1067 ; 1068 } else if (flags & BITMAP_FIND_FULL) { 1069 if (e->count.key < to_alloc0) { 1070 /* Biggest free block is less then requested. */ 1071 goto no_space; 1072 } 1073 to_alloc = e->count.key; 1074 } else if (-1 != wnd->uptodated) { 1075 to_alloc = e->count.key; 1076 } else { 1077 /* Check if we can use more bits. */ 1078 size_t op, max_check; 1079 struct rb_root start_tree; 1080 1081 memcpy(&start_tree, &wnd->start_tree, 1082 sizeof(struct rb_root)); 1083 memset(&wnd->start_tree, 0, sizeof(struct rb_root)); 1084 1085 max_check = e->start.key + to_alloc; 1086 if (max_check > max_alloc) 1087 max_check = max_alloc; 1088 for (op = e->start.key + e->count.key; op < max_check; 1089 op++) { 1090 if (!wnd_is_free(wnd, op, 1)) 1091 break; 1092 } 1093 memcpy(&wnd->start_tree, &start_tree, 1094 sizeof(struct rb_root)); 1095 to_alloc = op - e->start.key; 1096 } 1097 1098 /* Prepare to return. */ 1099 fnd = e->start.key; 1100 if (e->start.key + to_alloc > max_alloc) 1101 to_alloc = max_alloc - e->start.key; 1102 goto found; 1103 } 1104 1105 if (wnd->uptodated == 1) { 1106 /* Extents tree is updated -> no free space. */ 1107 goto no_space; 1108 } 1109 1110 b_len = e->count.key; 1111 b_pos = e->start.key; 1112 1113 scan_bitmap: 1114 sb = wnd->sb; 1115 log2_bits = sb->s_blocksize_bits + 3; 1116 1117 /* At most two ranges [hint, max_alloc) + [0, hint). */ 1118 Again: 1119 1120 /* TODO: Optimize request for case nbits > wbits. */ 1121 iw = hint >> log2_bits; 1122 wbits = sb->s_blocksize * 8; 1123 wpos = hint & (wbits - 1); 1124 prev_tail = 0; 1125 fbits_valid = true; 1126 1127 if (max_alloc == wnd->nbits) { 1128 nwnd = wnd->nwnd; 1129 } else { 1130 size_t t = max_alloc + wbits - 1; 1131 1132 nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd; 1133 } 1134 1135 /* Enumerate all windows. */ 1136 for (; iw < nwnd; iw++) { 1137 wbit = iw << log2_bits; 1138 1139 if (!wnd->free_bits[iw]) { 1140 if (prev_tail > b_len) { 1141 b_pos = wbit - prev_tail; 1142 b_len = prev_tail; 1143 } 1144 1145 /* Skip full used window. */ 1146 prev_tail = 0; 1147 wpos = 0; 1148 continue; 1149 } 1150 1151 if (unlikely(iw + 1 == nwnd)) { 1152 if (max_alloc == wnd->nbits) { 1153 wbits = wnd->bits_last; 1154 } else { 1155 size_t t = max_alloc & (wbits - 1); 1156 1157 if (t) { 1158 wbits = t; 1159 fbits_valid = false; 1160 } 1161 } 1162 } 1163 1164 if (wnd->zone_end > wnd->zone_bit) { 1165 ebit = wbit + wbits; 1166 zbit = max(wnd->zone_bit, wbit); 1167 zend = min(wnd->zone_end, ebit); 1168 1169 /* Here we have a window [wbit, ebit) and zone [zbit, zend). */ 1170 if (zend <= zbit) { 1171 /* Zone does not overlap window. */ 1172 } else { 1173 wzbit = zbit - wbit; 1174 wzend = zend - wbit; 1175 1176 /* Zone overlaps window. */ 1177 if (wnd->free_bits[iw] == wzend - wzbit) { 1178 prev_tail = 0; 1179 wpos = 0; 1180 continue; 1181 } 1182 1183 /* Scan two ranges window: [wbit, zbit) and [zend, ebit). */ 1184 bh = wnd_map(wnd, iw); 1185 1186 if (IS_ERR(bh)) { 1187 /* TODO: Error */ 1188 prev_tail = 0; 1189 wpos = 0; 1190 continue; 1191 } 1192 1193 buf = (ulong *)bh->b_data; 1194 1195 /* Scan range [wbit, zbit). */ 1196 if (wpos < wzbit) { 1197 /* Scan range [wpos, zbit). */ 1198 fnd = wnd_scan(buf, wbit, wpos, wzbit, 1199 to_alloc, &prev_tail, 1200 &b_pos, &b_len); 1201 if (fnd != MINUS_ONE_T) { 1202 put_bh(bh); 1203 goto found; 1204 } 1205 } 1206 1207 prev_tail = 0; 1208 1209 /* Scan range [zend, ebit). */ 1210 if (wzend < wbits) { 1211 fnd = wnd_scan(buf, wbit, 1212 max(wzend, wpos), wbits, 1213 to_alloc, &prev_tail, 1214 &b_pos, &b_len); 1215 if (fnd != MINUS_ONE_T) { 1216 put_bh(bh); 1217 goto found; 1218 } 1219 } 1220 1221 wpos = 0; 1222 put_bh(bh); 1223 continue; 1224 } 1225 } 1226 1227 /* Current window does not overlap zone. */ 1228 if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) { 1229 /* Window is empty. */ 1230 if (prev_tail + wbits >= to_alloc) { 1231 fnd = wbit + wpos - prev_tail; 1232 goto found; 1233 } 1234 1235 /* Increase 'prev_tail' and process next window. */ 1236 prev_tail += wbits; 1237 wpos = 0; 1238 continue; 1239 } 1240 1241 /* Read window. */ 1242 bh = wnd_map(wnd, iw); 1243 if (IS_ERR(bh)) { 1244 // TODO: Error. 1245 prev_tail = 0; 1246 wpos = 0; 1247 continue; 1248 } 1249 1250 buf = (ulong *)bh->b_data; 1251 1252 /* Scan range [wpos, eBits). */ 1253 fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail, 1254 &b_pos, &b_len); 1255 put_bh(bh); 1256 if (fnd != MINUS_ONE_T) 1257 goto found; 1258 } 1259 1260 if (b_len < prev_tail) { 1261 /* The last fragment. */ 1262 b_len = prev_tail; 1263 b_pos = max_alloc - prev_tail; 1264 } 1265 1266 if (hint) { 1267 /* 1268 * We have scanned range [hint max_alloc). 1269 * Prepare to scan range [0 hint + to_alloc). 1270 */ 1271 size_t nextmax = hint + to_alloc; 1272 1273 if (likely(nextmax >= hint) && nextmax < max_alloc) 1274 max_alloc = nextmax; 1275 hint = 0; 1276 goto Again; 1277 } 1278 1279 if (!b_len) 1280 goto no_space; 1281 1282 wnd->extent_max = b_len; 1283 1284 if (flags & BITMAP_FIND_FULL) 1285 goto no_space; 1286 1287 fnd = b_pos; 1288 to_alloc = b_len; 1289 1290 found: 1291 if (flags & BITMAP_FIND_MARK_AS_USED) { 1292 /* TODO: Optimize remove extent (pass 'e'?). */ 1293 if (wnd_set_used(wnd, fnd, to_alloc)) 1294 goto no_space; 1295 } else if (wnd->extent_max != MINUS_ONE_T && 1296 to_alloc > wnd->extent_max) { 1297 wnd->extent_max = to_alloc; 1298 } 1299 1300 *allocated = fnd; 1301 return to_alloc; 1302 1303 no_space: 1304 return 0; 1305 } 1306 1307 /* 1308 * wnd_extend - Extend bitmap ($MFT bitmap). 1309 */ 1310 int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) 1311 { 1312 int err; 1313 struct super_block *sb = wnd->sb; 1314 struct ntfs_sb_info *sbi = sb->s_fs_info; 1315 u32 blocksize = sb->s_blocksize; 1316 u32 wbits = blocksize * 8; 1317 u32 b0, new_last; 1318 size_t bits, iw, new_wnd; 1319 size_t old_bits = wnd->nbits; 1320 u16 *new_free; 1321 1322 if (new_bits <= old_bits) 1323 return -EINVAL; 1324 1325 /* Align to 8 byte boundary. */ 1326 new_wnd = bytes_to_block(sb, bitmap_size(new_bits)); 1327 new_last = new_bits & (wbits - 1); 1328 if (!new_last) 1329 new_last = wbits; 1330 1331 if (new_wnd != wnd->nwnd) { 1332 new_free = kmalloc(new_wnd * sizeof(u16), GFP_NOFS); 1333 if (!new_free) 1334 return -ENOMEM; 1335 1336 if (new_free != wnd->free_bits) 1337 memcpy(new_free, wnd->free_bits, 1338 wnd->nwnd * sizeof(short)); 1339 memset(new_free + wnd->nwnd, 0, 1340 (new_wnd - wnd->nwnd) * sizeof(short)); 1341 kfree(wnd->free_bits); 1342 wnd->free_bits = new_free; 1343 } 1344 1345 /* Zero bits [old_bits,new_bits). */ 1346 bits = new_bits - old_bits; 1347 b0 = old_bits & (wbits - 1); 1348 1349 for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) { 1350 u32 op; 1351 size_t frb; 1352 u64 vbo, lbo, bytes; 1353 struct buffer_head *bh; 1354 ulong *buf; 1355 1356 if (iw + 1 == new_wnd) 1357 wbits = new_last; 1358 1359 op = b0 + bits > wbits ? wbits - b0 : bits; 1360 vbo = (u64)iw * blocksize; 1361 1362 err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes); 1363 if (err) 1364 break; 1365 1366 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); 1367 if (!bh) 1368 return -EIO; 1369 1370 lock_buffer(bh); 1371 buf = (ulong *)bh->b_data; 1372 1373 __bitmap_clear(buf, b0, blocksize * 8 - b0); 1374 frb = wbits - __bitmap_weight(buf, wbits); 1375 wnd->total_zeroes += frb - wnd->free_bits[iw]; 1376 wnd->free_bits[iw] = frb; 1377 1378 set_buffer_uptodate(bh); 1379 mark_buffer_dirty(bh); 1380 unlock_buffer(bh); 1381 /* err = sync_dirty_buffer(bh); */ 1382 1383 b0 = 0; 1384 bits -= op; 1385 } 1386 1387 wnd->nbits = new_bits; 1388 wnd->nwnd = new_wnd; 1389 wnd->bits_last = new_last; 1390 1391 wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false); 1392 1393 return 0; 1394 } 1395 1396 void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len) 1397 { 1398 size_t zlen; 1399 1400 zlen = wnd->zone_end - wnd->zone_bit; 1401 if (zlen) 1402 wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false); 1403 1404 if (!RB_EMPTY_ROOT(&wnd->start_tree) && len) 1405 wnd_remove_free_ext(wnd, lcn, len); 1406 1407 wnd->zone_bit = lcn; 1408 wnd->zone_end = lcn + len; 1409 } 1410 1411 int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range) 1412 { 1413 int err = 0; 1414 struct super_block *sb = sbi->sb; 1415 struct wnd_bitmap *wnd = &sbi->used.bitmap; 1416 u32 wbits = 8 * sb->s_blocksize; 1417 CLST len = 0, lcn = 0, done = 0; 1418 CLST minlen = bytes_to_cluster(sbi, range->minlen); 1419 CLST lcn_from = bytes_to_cluster(sbi, range->start); 1420 size_t iw = lcn_from >> (sb->s_blocksize_bits + 3); 1421 u32 wbit = lcn_from & (wbits - 1); 1422 const ulong *buf; 1423 CLST lcn_to; 1424 1425 if (!minlen) 1426 minlen = 1; 1427 1428 if (range->len == (u64)-1) 1429 lcn_to = wnd->nbits; 1430 else 1431 lcn_to = bytes_to_cluster(sbi, range->start + range->len); 1432 1433 down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); 1434 1435 for (; iw < wnd->nbits; iw++, wbit = 0) { 1436 CLST lcn_wnd = iw * wbits; 1437 struct buffer_head *bh; 1438 1439 if (lcn_wnd > lcn_to) 1440 break; 1441 1442 if (!wnd->free_bits[iw]) 1443 continue; 1444 1445 if (iw + 1 == wnd->nwnd) 1446 wbits = wnd->bits_last; 1447 1448 if (lcn_wnd + wbits > lcn_to) 1449 wbits = lcn_to - lcn_wnd; 1450 1451 bh = wnd_map(wnd, iw); 1452 if (IS_ERR(bh)) { 1453 err = PTR_ERR(bh); 1454 break; 1455 } 1456 1457 buf = (ulong *)bh->b_data; 1458 1459 for (; wbit < wbits; wbit++) { 1460 if (!test_bit(wbit, buf)) { 1461 if (!len) 1462 lcn = lcn_wnd + wbit; 1463 len += 1; 1464 continue; 1465 } 1466 if (len >= minlen) { 1467 err = ntfs_discard(sbi, lcn, len); 1468 if (err) 1469 goto out; 1470 done += len; 1471 } 1472 len = 0; 1473 } 1474 put_bh(bh); 1475 } 1476 1477 /* Process the last fragment. */ 1478 if (len >= minlen) { 1479 err = ntfs_discard(sbi, lcn, len); 1480 if (err) 1481 goto out; 1482 done += len; 1483 } 1484 1485 out: 1486 range->len = (u64)done << sbi->cluster_bits; 1487 1488 up_read(&wnd->rw_lock); 1489 1490 return err; 1491 } 1492