1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * 4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. 5 * 6 * This code builds two trees of free clusters extents. 7 * Trees are sorted by start of extent and by length of extent. 8 * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees. 9 * In extreme case code reads on-disk bitmap to find free clusters 10 * 11 */ 12 13 #include <linux/blkdev.h> 14 #include <linux/buffer_head.h> 15 #include <linux/fs.h> 16 #include <linux/nls.h> 17 18 #include "debug.h" 19 #include "ntfs.h" 20 #include "ntfs_fs.h" 21 22 /* 23 * Maximum number of extents in tree. 24 */ 25 #define NTFS_MAX_WND_EXTENTS (32u * 1024u) 26 27 struct rb_node_key { 28 struct rb_node node; 29 size_t key; 30 }; 31 32 /* 33 * Tree is sorted by start (key) 34 */ 35 struct e_node { 36 struct rb_node_key start; /* Tree sorted by start */ 37 struct rb_node_key count; /* Tree sorted by len*/ 38 }; 39 40 static int wnd_rescan(struct wnd_bitmap *wnd); 41 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw); 42 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits); 43 44 static struct kmem_cache *ntfs_enode_cachep; 45 46 int __init ntfs3_init_bitmap(void) 47 { 48 ntfs_enode_cachep = 49 kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0, 50 SLAB_RECLAIM_ACCOUNT, NULL); 51 return ntfs_enode_cachep ? 0 : -ENOMEM; 52 } 53 54 void ntfs3_exit_bitmap(void) 55 { 56 kmem_cache_destroy(ntfs_enode_cachep); 57 } 58 59 static inline u32 wnd_bits(const struct wnd_bitmap *wnd, size_t i) 60 { 61 return i + 1 == wnd->nwnd ? wnd->bits_last : wnd->sb->s_blocksize * 8; 62 } 63 64 /* 65 * b_pos + b_len - biggest fragment 66 * Scan range [wpos wbits) window 'buf' 67 * Returns -1 if not found 68 */ 69 static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend, 70 size_t to_alloc, size_t *prev_tail, size_t *b_pos, 71 size_t *b_len) 72 { 73 while (wpos < wend) { 74 size_t free_len; 75 u32 free_bits, end; 76 u32 used = find_next_zero_bit(buf, wend, wpos); 77 78 if (used >= wend) { 79 if (*b_len < *prev_tail) { 80 *b_pos = wbit - *prev_tail; 81 *b_len = *prev_tail; 82 } 83 84 *prev_tail = 0; 85 return -1; 86 } 87 88 if (used > wpos) { 89 wpos = used; 90 if (*b_len < *prev_tail) { 91 *b_pos = wbit - *prev_tail; 92 *b_len = *prev_tail; 93 } 94 95 *prev_tail = 0; 96 } 97 98 /* 99 * Now we have a fragment [wpos, wend) staring with 0 100 */ 101 end = wpos + to_alloc - *prev_tail; 102 free_bits = find_next_bit(buf, min(end, wend), wpos); 103 104 free_len = *prev_tail + free_bits - wpos; 105 106 if (*b_len < free_len) { 107 *b_pos = wbit + wpos - *prev_tail; 108 *b_len = free_len; 109 } 110 111 if (free_len >= to_alloc) 112 return wbit + wpos - *prev_tail; 113 114 if (free_bits >= wend) { 115 *prev_tail += free_bits - wpos; 116 return -1; 117 } 118 119 wpos = free_bits + 1; 120 121 *prev_tail = 0; 122 } 123 124 return -1; 125 } 126 127 /* 128 * wnd_close 129 * 130 * Frees all resources 131 */ 132 void wnd_close(struct wnd_bitmap *wnd) 133 { 134 struct rb_node *node, *next; 135 136 ntfs_free(wnd->free_bits); 137 run_close(&wnd->run); 138 139 node = rb_first(&wnd->start_tree); 140 141 while (node) { 142 next = rb_next(node); 143 rb_erase(node, &wnd->start_tree); 144 kmem_cache_free(ntfs_enode_cachep, 145 rb_entry(node, struct e_node, start.node)); 146 node = next; 147 } 148 } 149 150 static struct rb_node *rb_lookup(struct rb_root *root, size_t v) 151 { 152 struct rb_node **p = &root->rb_node; 153 struct rb_node *r = NULL; 154 155 while (*p) { 156 struct rb_node_key *k; 157 158 k = rb_entry(*p, struct rb_node_key, node); 159 if (v < k->key) { 160 p = &(*p)->rb_left; 161 } else if (v > k->key) { 162 r = &k->node; 163 p = &(*p)->rb_right; 164 } else { 165 return &k->node; 166 } 167 } 168 169 return r; 170 } 171 172 /* 173 * rb_insert_count 174 * 175 * Helper function to insert special kind of 'count' tree 176 */ 177 static inline bool rb_insert_count(struct rb_root *root, struct e_node *e) 178 { 179 struct rb_node **p = &root->rb_node; 180 struct rb_node *parent = NULL; 181 size_t e_ckey = e->count.key; 182 size_t e_skey = e->start.key; 183 184 while (*p) { 185 struct e_node *k = 186 rb_entry(parent = *p, struct e_node, count.node); 187 188 if (e_ckey > k->count.key) { 189 p = &(*p)->rb_left; 190 } else if (e_ckey < k->count.key) { 191 p = &(*p)->rb_right; 192 } else if (e_skey < k->start.key) { 193 p = &(*p)->rb_left; 194 } else if (e_skey > k->start.key) { 195 p = &(*p)->rb_right; 196 } else { 197 WARN_ON(1); 198 return false; 199 } 200 } 201 202 rb_link_node(&e->count.node, parent, p); 203 rb_insert_color(&e->count.node, root); 204 return true; 205 } 206 207 /* 208 * inline bool rb_insert_start 209 * 210 * Helper function to insert special kind of 'start' tree 211 */ 212 static inline bool rb_insert_start(struct rb_root *root, struct e_node *e) 213 { 214 struct rb_node **p = &root->rb_node; 215 struct rb_node *parent = NULL; 216 size_t e_skey = e->start.key; 217 218 while (*p) { 219 struct e_node *k; 220 221 parent = *p; 222 223 k = rb_entry(parent, struct e_node, start.node); 224 if (e_skey < k->start.key) { 225 p = &(*p)->rb_left; 226 } else if (e_skey > k->start.key) { 227 p = &(*p)->rb_right; 228 } else { 229 WARN_ON(1); 230 return false; 231 } 232 } 233 234 rb_link_node(&e->start.node, parent, p); 235 rb_insert_color(&e->start.node, root); 236 return true; 237 } 238 239 /* 240 * wnd_add_free_ext 241 * 242 * adds a new extent of free space 243 * build = 1 when building tree 244 */ 245 static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len, 246 bool build) 247 { 248 struct e_node *e, *e0 = NULL; 249 size_t ib, end_in = bit + len; 250 struct rb_node *n; 251 252 if (build) { 253 /* Use extent_min to filter too short extents */ 254 if (wnd->count >= NTFS_MAX_WND_EXTENTS && 255 len <= wnd->extent_min) { 256 wnd->uptodated = -1; 257 return; 258 } 259 } else { 260 /* Try to find extent before 'bit' */ 261 n = rb_lookup(&wnd->start_tree, bit); 262 263 if (!n) { 264 n = rb_first(&wnd->start_tree); 265 } else { 266 e = rb_entry(n, struct e_node, start.node); 267 n = rb_next(n); 268 if (e->start.key + e->count.key == bit) { 269 /* Remove left */ 270 bit = e->start.key; 271 len += e->count.key; 272 rb_erase(&e->start.node, &wnd->start_tree); 273 rb_erase(&e->count.node, &wnd->count_tree); 274 wnd->count -= 1; 275 e0 = e; 276 } 277 } 278 279 while (n) { 280 size_t next_end; 281 282 e = rb_entry(n, struct e_node, start.node); 283 next_end = e->start.key + e->count.key; 284 if (e->start.key > end_in) 285 break; 286 287 /* Remove right */ 288 n = rb_next(n); 289 len += next_end - end_in; 290 end_in = next_end; 291 rb_erase(&e->start.node, &wnd->start_tree); 292 rb_erase(&e->count.node, &wnd->count_tree); 293 wnd->count -= 1; 294 295 if (!e0) 296 e0 = e; 297 else 298 kmem_cache_free(ntfs_enode_cachep, e); 299 } 300 301 if (wnd->uptodated != 1) { 302 /* Check bits before 'bit' */ 303 ib = wnd->zone_bit == wnd->zone_end || 304 bit < wnd->zone_end 305 ? 0 306 : wnd->zone_end; 307 308 while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) { 309 bit -= 1; 310 len += 1; 311 } 312 313 /* Check bits after 'end_in' */ 314 ib = wnd->zone_bit == wnd->zone_end || 315 end_in > wnd->zone_bit 316 ? wnd->nbits 317 : wnd->zone_bit; 318 319 while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) { 320 end_in += 1; 321 len += 1; 322 } 323 } 324 } 325 /* Insert new fragment */ 326 if (wnd->count >= NTFS_MAX_WND_EXTENTS) { 327 if (e0) 328 kmem_cache_free(ntfs_enode_cachep, e0); 329 330 wnd->uptodated = -1; 331 332 /* Compare with smallest fragment */ 333 n = rb_last(&wnd->count_tree); 334 e = rb_entry(n, struct e_node, count.node); 335 if (len <= e->count.key) 336 goto out; /* Do not insert small fragments */ 337 338 if (build) { 339 struct e_node *e2; 340 341 n = rb_prev(n); 342 e2 = rb_entry(n, struct e_node, count.node); 343 /* smallest fragment will be 'e2->count.key' */ 344 wnd->extent_min = e2->count.key; 345 } 346 347 /* Replace smallest fragment by new one */ 348 rb_erase(&e->start.node, &wnd->start_tree); 349 rb_erase(&e->count.node, &wnd->count_tree); 350 wnd->count -= 1; 351 } else { 352 e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); 353 if (!e) { 354 wnd->uptodated = -1; 355 goto out; 356 } 357 358 if (build && len <= wnd->extent_min) 359 wnd->extent_min = len; 360 } 361 e->start.key = bit; 362 e->count.key = len; 363 if (len > wnd->extent_max) 364 wnd->extent_max = len; 365 366 rb_insert_start(&wnd->start_tree, e); 367 rb_insert_count(&wnd->count_tree, e); 368 wnd->count += 1; 369 370 out:; 371 } 372 373 /* 374 * wnd_remove_free_ext 375 * 376 * removes a run from the cached free space 377 */ 378 static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len) 379 { 380 struct rb_node *n, *n3; 381 struct e_node *e, *e3; 382 size_t end_in = bit + len; 383 size_t end3, end, new_key, new_len, max_new_len; 384 385 /* Try to find extent before 'bit' */ 386 n = rb_lookup(&wnd->start_tree, bit); 387 388 if (!n) 389 return; 390 391 e = rb_entry(n, struct e_node, start.node); 392 end = e->start.key + e->count.key; 393 394 new_key = new_len = 0; 395 len = e->count.key; 396 397 /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n' */ 398 if (e->start.key > bit) 399 ; 400 else if (end_in <= end) { 401 /* Range [bit,end_in) inside 'e' */ 402 new_key = end_in; 403 new_len = end - end_in; 404 len = bit - e->start.key; 405 } else if (bit > end) { 406 bool bmax = false; 407 408 n3 = rb_next(n); 409 410 while (n3) { 411 e3 = rb_entry(n3, struct e_node, start.node); 412 if (e3->start.key >= end_in) 413 break; 414 415 if (e3->count.key == wnd->extent_max) 416 bmax = true; 417 418 end3 = e3->start.key + e3->count.key; 419 if (end3 > end_in) { 420 e3->start.key = end_in; 421 rb_erase(&e3->count.node, &wnd->count_tree); 422 e3->count.key = end3 - end_in; 423 rb_insert_count(&wnd->count_tree, e3); 424 break; 425 } 426 427 n3 = rb_next(n3); 428 rb_erase(&e3->start.node, &wnd->start_tree); 429 rb_erase(&e3->count.node, &wnd->count_tree); 430 wnd->count -= 1; 431 kmem_cache_free(ntfs_enode_cachep, e3); 432 } 433 if (!bmax) 434 return; 435 n3 = rb_first(&wnd->count_tree); 436 wnd->extent_max = 437 n3 ? rb_entry(n3, struct e_node, count.node)->count.key 438 : 0; 439 return; 440 } 441 442 if (e->count.key != wnd->extent_max) { 443 ; 444 } else if (rb_prev(&e->count.node)) { 445 ; 446 } else { 447 n3 = rb_next(&e->count.node); 448 max_new_len = len > new_len ? len : new_len; 449 if (!n3) { 450 wnd->extent_max = max_new_len; 451 } else { 452 e3 = rb_entry(n3, struct e_node, count.node); 453 wnd->extent_max = max(e3->count.key, max_new_len); 454 } 455 } 456 457 if (!len) { 458 if (new_len) { 459 e->start.key = new_key; 460 rb_erase(&e->count.node, &wnd->count_tree); 461 e->count.key = new_len; 462 rb_insert_count(&wnd->count_tree, e); 463 } else { 464 rb_erase(&e->start.node, &wnd->start_tree); 465 rb_erase(&e->count.node, &wnd->count_tree); 466 wnd->count -= 1; 467 kmem_cache_free(ntfs_enode_cachep, e); 468 } 469 goto out; 470 } 471 rb_erase(&e->count.node, &wnd->count_tree); 472 e->count.key = len; 473 rb_insert_count(&wnd->count_tree, e); 474 475 if (!new_len) 476 goto out; 477 478 if (wnd->count >= NTFS_MAX_WND_EXTENTS) { 479 wnd->uptodated = -1; 480 481 /* Get minimal extent */ 482 e = rb_entry(rb_last(&wnd->count_tree), struct e_node, 483 count.node); 484 if (e->count.key > new_len) 485 goto out; 486 487 /* Replace minimum */ 488 rb_erase(&e->start.node, &wnd->start_tree); 489 rb_erase(&e->count.node, &wnd->count_tree); 490 wnd->count -= 1; 491 } else { 492 e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); 493 if (!e) 494 wnd->uptodated = -1; 495 } 496 497 if (e) { 498 e->start.key = new_key; 499 e->count.key = new_len; 500 rb_insert_start(&wnd->start_tree, e); 501 rb_insert_count(&wnd->count_tree, e); 502 wnd->count += 1; 503 } 504 505 out: 506 if (!wnd->count && 1 != wnd->uptodated) 507 wnd_rescan(wnd); 508 } 509 510 /* 511 * wnd_rescan 512 * 513 * Scan all bitmap. used while initialization. 514 */ 515 static int wnd_rescan(struct wnd_bitmap *wnd) 516 { 517 int err = 0; 518 size_t prev_tail = 0; 519 struct super_block *sb = wnd->sb; 520 struct ntfs_sb_info *sbi = sb->s_fs_info; 521 u64 lbo, len = 0; 522 u32 blocksize = sb->s_blocksize; 523 u8 cluster_bits = sbi->cluster_bits; 524 u32 wbits = 8 * sb->s_blocksize; 525 u32 used, frb; 526 const ulong *buf; 527 size_t wpos, wbit, iw, vbo; 528 struct buffer_head *bh = NULL; 529 CLST lcn, clen; 530 531 wnd->uptodated = 0; 532 wnd->extent_max = 0; 533 wnd->extent_min = MINUS_ONE_T; 534 wnd->total_zeroes = 0; 535 536 vbo = 0; 537 538 for (iw = 0; iw < wnd->nwnd; iw++) { 539 if (iw + 1 == wnd->nwnd) 540 wbits = wnd->bits_last; 541 542 if (wnd->inited) { 543 if (!wnd->free_bits[iw]) { 544 /* all ones */ 545 if (prev_tail) { 546 wnd_add_free_ext(wnd, 547 vbo * 8 - prev_tail, 548 prev_tail, true); 549 prev_tail = 0; 550 } 551 goto next_wnd; 552 } 553 if (wbits == wnd->free_bits[iw]) { 554 /* all zeroes */ 555 prev_tail += wbits; 556 wnd->total_zeroes += wbits; 557 goto next_wnd; 558 } 559 } 560 561 if (!len) { 562 u32 off = vbo & sbi->cluster_mask; 563 564 if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits, 565 &lcn, &clen, NULL)) { 566 err = -ENOENT; 567 goto out; 568 } 569 570 lbo = ((u64)lcn << cluster_bits) + off; 571 len = ((u64)clen << cluster_bits) - off; 572 } 573 574 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); 575 if (!bh) { 576 err = -EIO; 577 goto out; 578 } 579 580 buf = (ulong *)bh->b_data; 581 582 used = __bitmap_weight(buf, wbits); 583 if (used < wbits) { 584 frb = wbits - used; 585 wnd->free_bits[iw] = frb; 586 wnd->total_zeroes += frb; 587 } 588 589 wpos = 0; 590 wbit = vbo * 8; 591 592 if (wbit + wbits > wnd->nbits) 593 wbits = wnd->nbits - wbit; 594 595 do { 596 used = find_next_zero_bit(buf, wbits, wpos); 597 598 if (used > wpos && prev_tail) { 599 wnd_add_free_ext(wnd, wbit + wpos - prev_tail, 600 prev_tail, true); 601 prev_tail = 0; 602 } 603 604 wpos = used; 605 606 if (wpos >= wbits) { 607 /* No free blocks */ 608 prev_tail = 0; 609 break; 610 } 611 612 frb = find_next_bit(buf, wbits, wpos); 613 if (frb >= wbits) { 614 /* keep last free block */ 615 prev_tail += frb - wpos; 616 break; 617 } 618 619 wnd_add_free_ext(wnd, wbit + wpos - prev_tail, 620 frb + prev_tail - wpos, true); 621 622 /* Skip free block and first '1' */ 623 wpos = frb + 1; 624 /* Reset previous tail */ 625 prev_tail = 0; 626 } while (wpos < wbits); 627 628 next_wnd: 629 630 if (bh) 631 put_bh(bh); 632 bh = NULL; 633 634 vbo += blocksize; 635 if (len) { 636 len -= blocksize; 637 lbo += blocksize; 638 } 639 } 640 641 /* Add last block */ 642 if (prev_tail) 643 wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true); 644 645 /* 646 * Before init cycle wnd->uptodated was 0 647 * If any errors or limits occurs while initialization then 648 * wnd->uptodated will be -1 649 * If 'uptodated' is still 0 then Tree is really updated 650 */ 651 if (!wnd->uptodated) 652 wnd->uptodated = 1; 653 654 if (wnd->zone_bit != wnd->zone_end) { 655 size_t zlen = wnd->zone_end - wnd->zone_bit; 656 657 wnd->zone_end = wnd->zone_bit; 658 wnd_zone_set(wnd, wnd->zone_bit, zlen); 659 } 660 661 out: 662 return err; 663 } 664 665 /* 666 * wnd_init 667 */ 668 int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) 669 { 670 int err; 671 u32 blocksize = sb->s_blocksize; 672 u32 wbits = blocksize * 8; 673 674 init_rwsem(&wnd->rw_lock); 675 676 wnd->sb = sb; 677 wnd->nbits = nbits; 678 wnd->total_zeroes = nbits; 679 wnd->extent_max = MINUS_ONE_T; 680 wnd->zone_bit = wnd->zone_end = 0; 681 wnd->nwnd = bytes_to_block(sb, bitmap_size(nbits)); 682 wnd->bits_last = nbits & (wbits - 1); 683 if (!wnd->bits_last) 684 wnd->bits_last = wbits; 685 686 wnd->free_bits = ntfs_zalloc(wnd->nwnd * sizeof(u16)); 687 if (!wnd->free_bits) 688 return -ENOMEM; 689 690 err = wnd_rescan(wnd); 691 if (err) 692 return err; 693 694 wnd->inited = true; 695 696 return 0; 697 } 698 699 /* 700 * wnd_map 701 * 702 * call sb_bread for requested window 703 */ 704 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw) 705 { 706 size_t vbo; 707 CLST lcn, clen; 708 struct super_block *sb = wnd->sb; 709 struct ntfs_sb_info *sbi; 710 struct buffer_head *bh; 711 u64 lbo; 712 713 sbi = sb->s_fs_info; 714 vbo = (u64)iw << sb->s_blocksize_bits; 715 716 if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen, 717 NULL)) { 718 return ERR_PTR(-ENOENT); 719 } 720 721 lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask); 722 723 bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits); 724 if (!bh) 725 return ERR_PTR(-EIO); 726 727 return bh; 728 } 729 730 /* 731 * wnd_set_free 732 * 733 * Marks the bits range from bit to bit + bits as free 734 */ 735 int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) 736 { 737 int err = 0; 738 struct super_block *sb = wnd->sb; 739 size_t bits0 = bits; 740 u32 wbits = 8 * sb->s_blocksize; 741 size_t iw = bit >> (sb->s_blocksize_bits + 3); 742 u32 wbit = bit & (wbits - 1); 743 struct buffer_head *bh; 744 745 while (iw < wnd->nwnd && bits) { 746 u32 tail, op; 747 ulong *buf; 748 749 if (iw + 1 == wnd->nwnd) 750 wbits = wnd->bits_last; 751 752 tail = wbits - wbit; 753 op = tail < bits ? tail : bits; 754 755 bh = wnd_map(wnd, iw); 756 if (IS_ERR(bh)) { 757 err = PTR_ERR(bh); 758 break; 759 } 760 761 buf = (ulong *)bh->b_data; 762 763 lock_buffer(bh); 764 765 __bitmap_clear(buf, wbit, op); 766 767 wnd->free_bits[iw] += op; 768 769 set_buffer_uptodate(bh); 770 mark_buffer_dirty(bh); 771 unlock_buffer(bh); 772 put_bh(bh); 773 774 wnd->total_zeroes += op; 775 bits -= op; 776 wbit = 0; 777 iw += 1; 778 } 779 780 wnd_add_free_ext(wnd, bit, bits0, false); 781 782 return err; 783 } 784 785 /* 786 * wnd_set_used 787 * 788 * Marks the bits range from bit to bit + bits as used 789 */ 790 int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) 791 { 792 int err = 0; 793 struct super_block *sb = wnd->sb; 794 size_t bits0 = bits; 795 size_t iw = bit >> (sb->s_blocksize_bits + 3); 796 u32 wbits = 8 * sb->s_blocksize; 797 u32 wbit = bit & (wbits - 1); 798 struct buffer_head *bh; 799 800 while (iw < wnd->nwnd && bits) { 801 u32 tail, op; 802 ulong *buf; 803 804 if (unlikely(iw + 1 == wnd->nwnd)) 805 wbits = wnd->bits_last; 806 807 tail = wbits - wbit; 808 op = tail < bits ? tail : bits; 809 810 bh = wnd_map(wnd, iw); 811 if (IS_ERR(bh)) { 812 err = PTR_ERR(bh); 813 break; 814 } 815 buf = (ulong *)bh->b_data; 816 817 lock_buffer(bh); 818 819 __bitmap_set(buf, wbit, op); 820 wnd->free_bits[iw] -= op; 821 822 set_buffer_uptodate(bh); 823 mark_buffer_dirty(bh); 824 unlock_buffer(bh); 825 put_bh(bh); 826 827 wnd->total_zeroes -= op; 828 bits -= op; 829 wbit = 0; 830 iw += 1; 831 } 832 833 if (!RB_EMPTY_ROOT(&wnd->start_tree)) 834 wnd_remove_free_ext(wnd, bit, bits0); 835 836 return err; 837 } 838 839 /* 840 * wnd_is_free_hlp 841 * 842 * Returns true if all clusters [bit, bit+bits) are free (bitmap only) 843 */ 844 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) 845 { 846 struct super_block *sb = wnd->sb; 847 size_t iw = bit >> (sb->s_blocksize_bits + 3); 848 u32 wbits = 8 * sb->s_blocksize; 849 u32 wbit = bit & (wbits - 1); 850 851 while (iw < wnd->nwnd && bits) { 852 u32 tail, op; 853 854 if (unlikely(iw + 1 == wnd->nwnd)) 855 wbits = wnd->bits_last; 856 857 tail = wbits - wbit; 858 op = tail < bits ? tail : bits; 859 860 if (wbits != wnd->free_bits[iw]) { 861 bool ret; 862 struct buffer_head *bh = wnd_map(wnd, iw); 863 864 if (IS_ERR(bh)) 865 return false; 866 867 ret = are_bits_clear((ulong *)bh->b_data, wbit, op); 868 869 put_bh(bh); 870 if (!ret) 871 return false; 872 } 873 874 bits -= op; 875 wbit = 0; 876 iw += 1; 877 } 878 879 return true; 880 } 881 882 /* 883 * wnd_is_free 884 * 885 * Returns true if all clusters [bit, bit+bits) are free 886 */ 887 bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) 888 { 889 bool ret; 890 struct rb_node *n; 891 size_t end; 892 struct e_node *e; 893 894 if (RB_EMPTY_ROOT(&wnd->start_tree)) 895 goto use_wnd; 896 897 n = rb_lookup(&wnd->start_tree, bit); 898 if (!n) 899 goto use_wnd; 900 901 e = rb_entry(n, struct e_node, start.node); 902 903 end = e->start.key + e->count.key; 904 905 if (bit < end && bit + bits <= end) 906 return true; 907 908 use_wnd: 909 ret = wnd_is_free_hlp(wnd, bit, bits); 910 911 return ret; 912 } 913 914 /* 915 * wnd_is_used 916 * 917 * Returns true if all clusters [bit, bit+bits) are used 918 */ 919 bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) 920 { 921 bool ret = false; 922 struct super_block *sb = wnd->sb; 923 size_t iw = bit >> (sb->s_blocksize_bits + 3); 924 u32 wbits = 8 * sb->s_blocksize; 925 u32 wbit = bit & (wbits - 1); 926 size_t end; 927 struct rb_node *n; 928 struct e_node *e; 929 930 if (RB_EMPTY_ROOT(&wnd->start_tree)) 931 goto use_wnd; 932 933 end = bit + bits; 934 n = rb_lookup(&wnd->start_tree, end - 1); 935 if (!n) 936 goto use_wnd; 937 938 e = rb_entry(n, struct e_node, start.node); 939 if (e->start.key + e->count.key > bit) 940 return false; 941 942 use_wnd: 943 while (iw < wnd->nwnd && bits) { 944 u32 tail, op; 945 946 if (unlikely(iw + 1 == wnd->nwnd)) 947 wbits = wnd->bits_last; 948 949 tail = wbits - wbit; 950 op = tail < bits ? tail : bits; 951 952 if (wnd->free_bits[iw]) { 953 bool ret; 954 struct buffer_head *bh = wnd_map(wnd, iw); 955 956 if (IS_ERR(bh)) 957 goto out; 958 959 ret = are_bits_set((ulong *)bh->b_data, wbit, op); 960 put_bh(bh); 961 if (!ret) 962 goto out; 963 } 964 965 bits -= op; 966 wbit = 0; 967 iw += 1; 968 } 969 ret = true; 970 971 out: 972 return ret; 973 } 974 975 /* 976 * wnd_find 977 * - flags - BITMAP_FIND_XXX flags 978 * 979 * looks for free space 980 * Returns 0 if not found 981 */ 982 size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint, 983 size_t flags, size_t *allocated) 984 { 985 struct super_block *sb; 986 u32 wbits, wpos, wzbit, wzend; 987 size_t fnd, max_alloc, b_len, b_pos; 988 size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend; 989 size_t to_alloc0 = to_alloc; 990 const ulong *buf; 991 const struct e_node *e; 992 const struct rb_node *pr, *cr; 993 u8 log2_bits; 994 bool fbits_valid; 995 struct buffer_head *bh; 996 997 /* fast checking for available free space */ 998 if (flags & BITMAP_FIND_FULL) { 999 size_t zeroes = wnd_zeroes(wnd); 1000 1001 zeroes -= wnd->zone_end - wnd->zone_bit; 1002 if (zeroes < to_alloc0) 1003 goto no_space; 1004 1005 if (to_alloc0 > wnd->extent_max) 1006 goto no_space; 1007 } else { 1008 if (to_alloc > wnd->extent_max) 1009 to_alloc = wnd->extent_max; 1010 } 1011 1012 if (wnd->zone_bit <= hint && hint < wnd->zone_end) 1013 hint = wnd->zone_end; 1014 1015 max_alloc = wnd->nbits; 1016 b_len = b_pos = 0; 1017 1018 if (hint >= max_alloc) 1019 hint = 0; 1020 1021 if (RB_EMPTY_ROOT(&wnd->start_tree)) { 1022 if (wnd->uptodated == 1) { 1023 /* extents tree is updated -> no free space */ 1024 goto no_space; 1025 } 1026 goto scan_bitmap; 1027 } 1028 1029 e = NULL; 1030 if (!hint) 1031 goto allocate_biggest; 1032 1033 /* Use hint: enumerate extents by start >= hint */ 1034 pr = NULL; 1035 cr = wnd->start_tree.rb_node; 1036 1037 for (;;) { 1038 e = rb_entry(cr, struct e_node, start.node); 1039 1040 if (e->start.key == hint) 1041 break; 1042 1043 if (e->start.key < hint) { 1044 pr = cr; 1045 cr = cr->rb_right; 1046 if (!cr) 1047 break; 1048 continue; 1049 } 1050 1051 cr = cr->rb_left; 1052 if (!cr) { 1053 e = pr ? rb_entry(pr, struct e_node, start.node) : NULL; 1054 break; 1055 } 1056 } 1057 1058 if (!e) 1059 goto allocate_biggest; 1060 1061 if (e->start.key + e->count.key > hint) { 1062 /* We have found extension with 'hint' inside */ 1063 size_t len = e->start.key + e->count.key - hint; 1064 1065 if (len >= to_alloc && hint + to_alloc <= max_alloc) { 1066 fnd = hint; 1067 goto found; 1068 } 1069 1070 if (!(flags & BITMAP_FIND_FULL)) { 1071 if (len > to_alloc) 1072 len = to_alloc; 1073 1074 if (hint + len <= max_alloc) { 1075 fnd = hint; 1076 to_alloc = len; 1077 goto found; 1078 } 1079 } 1080 } 1081 1082 allocate_biggest: 1083 /* Allocate from biggest free extent */ 1084 e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node); 1085 if (e->count.key != wnd->extent_max) 1086 wnd->extent_max = e->count.key; 1087 1088 if (e->count.key < max_alloc) { 1089 if (e->count.key >= to_alloc) { 1090 ; 1091 } else if (flags & BITMAP_FIND_FULL) { 1092 if (e->count.key < to_alloc0) { 1093 /* Biggest free block is less then requested */ 1094 goto no_space; 1095 } 1096 to_alloc = e->count.key; 1097 } else if (-1 != wnd->uptodated) { 1098 to_alloc = e->count.key; 1099 } else { 1100 /* Check if we can use more bits */ 1101 size_t op, max_check; 1102 struct rb_root start_tree; 1103 1104 memcpy(&start_tree, &wnd->start_tree, 1105 sizeof(struct rb_root)); 1106 memset(&wnd->start_tree, 0, sizeof(struct rb_root)); 1107 1108 max_check = e->start.key + to_alloc; 1109 if (max_check > max_alloc) 1110 max_check = max_alloc; 1111 for (op = e->start.key + e->count.key; op < max_check; 1112 op++) { 1113 if (!wnd_is_free(wnd, op, 1)) 1114 break; 1115 } 1116 memcpy(&wnd->start_tree, &start_tree, 1117 sizeof(struct rb_root)); 1118 to_alloc = op - e->start.key; 1119 } 1120 1121 /* Prepare to return */ 1122 fnd = e->start.key; 1123 if (e->start.key + to_alloc > max_alloc) 1124 to_alloc = max_alloc - e->start.key; 1125 goto found; 1126 } 1127 1128 if (wnd->uptodated == 1) { 1129 /* extents tree is updated -> no free space */ 1130 goto no_space; 1131 } 1132 1133 b_len = e->count.key; 1134 b_pos = e->start.key; 1135 1136 scan_bitmap: 1137 sb = wnd->sb; 1138 log2_bits = sb->s_blocksize_bits + 3; 1139 1140 /* At most two ranges [hint, max_alloc) + [0, hint) */ 1141 Again: 1142 1143 /* TODO: optimize request for case nbits > wbits */ 1144 iw = hint >> log2_bits; 1145 wbits = sb->s_blocksize * 8; 1146 wpos = hint & (wbits - 1); 1147 prev_tail = 0; 1148 fbits_valid = true; 1149 1150 if (max_alloc == wnd->nbits) { 1151 nwnd = wnd->nwnd; 1152 } else { 1153 size_t t = max_alloc + wbits - 1; 1154 1155 nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd; 1156 } 1157 1158 /* Enumerate all windows */ 1159 for (; iw < nwnd; iw++) { 1160 wbit = iw << log2_bits; 1161 1162 if (!wnd->free_bits[iw]) { 1163 if (prev_tail > b_len) { 1164 b_pos = wbit - prev_tail; 1165 b_len = prev_tail; 1166 } 1167 1168 /* Skip full used window */ 1169 prev_tail = 0; 1170 wpos = 0; 1171 continue; 1172 } 1173 1174 if (unlikely(iw + 1 == nwnd)) { 1175 if (max_alloc == wnd->nbits) { 1176 wbits = wnd->bits_last; 1177 } else { 1178 size_t t = max_alloc & (wbits - 1); 1179 1180 if (t) { 1181 wbits = t; 1182 fbits_valid = false; 1183 } 1184 } 1185 } 1186 1187 if (wnd->zone_end > wnd->zone_bit) { 1188 ebit = wbit + wbits; 1189 zbit = max(wnd->zone_bit, wbit); 1190 zend = min(wnd->zone_end, ebit); 1191 1192 /* Here we have a window [wbit, ebit) and zone [zbit, zend) */ 1193 if (zend <= zbit) { 1194 /* Zone does not overlap window */ 1195 } else { 1196 wzbit = zbit - wbit; 1197 wzend = zend - wbit; 1198 1199 /* Zone overlaps window */ 1200 if (wnd->free_bits[iw] == wzend - wzbit) { 1201 prev_tail = 0; 1202 wpos = 0; 1203 continue; 1204 } 1205 1206 /* Scan two ranges window: [wbit, zbit) and [zend, ebit) */ 1207 bh = wnd_map(wnd, iw); 1208 1209 if (IS_ERR(bh)) { 1210 /* TODO: error */ 1211 prev_tail = 0; 1212 wpos = 0; 1213 continue; 1214 } 1215 1216 buf = (ulong *)bh->b_data; 1217 1218 /* Scan range [wbit, zbit) */ 1219 if (wpos < wzbit) { 1220 /* Scan range [wpos, zbit) */ 1221 fnd = wnd_scan(buf, wbit, wpos, wzbit, 1222 to_alloc, &prev_tail, 1223 &b_pos, &b_len); 1224 if (fnd != MINUS_ONE_T) { 1225 put_bh(bh); 1226 goto found; 1227 } 1228 } 1229 1230 prev_tail = 0; 1231 1232 /* Scan range [zend, ebit) */ 1233 if (wzend < wbits) { 1234 fnd = wnd_scan(buf, wbit, 1235 max(wzend, wpos), wbits, 1236 to_alloc, &prev_tail, 1237 &b_pos, &b_len); 1238 if (fnd != MINUS_ONE_T) { 1239 put_bh(bh); 1240 goto found; 1241 } 1242 } 1243 1244 wpos = 0; 1245 put_bh(bh); 1246 continue; 1247 } 1248 } 1249 1250 /* Current window does not overlap zone */ 1251 if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) { 1252 /* window is empty */ 1253 if (prev_tail + wbits >= to_alloc) { 1254 fnd = wbit + wpos - prev_tail; 1255 goto found; 1256 } 1257 1258 /* Increase 'prev_tail' and process next window */ 1259 prev_tail += wbits; 1260 wpos = 0; 1261 continue; 1262 } 1263 1264 /* read window */ 1265 bh = wnd_map(wnd, iw); 1266 if (IS_ERR(bh)) { 1267 // TODO: error 1268 prev_tail = 0; 1269 wpos = 0; 1270 continue; 1271 } 1272 1273 buf = (ulong *)bh->b_data; 1274 1275 /* Scan range [wpos, eBits) */ 1276 fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail, 1277 &b_pos, &b_len); 1278 put_bh(bh); 1279 if (fnd != MINUS_ONE_T) 1280 goto found; 1281 } 1282 1283 if (b_len < prev_tail) { 1284 /* The last fragment */ 1285 b_len = prev_tail; 1286 b_pos = max_alloc - prev_tail; 1287 } 1288 1289 if (hint) { 1290 /* 1291 * We have scanned range [hint max_alloc) 1292 * Prepare to scan range [0 hint + to_alloc) 1293 */ 1294 size_t nextmax = hint + to_alloc; 1295 1296 if (likely(nextmax >= hint) && nextmax < max_alloc) 1297 max_alloc = nextmax; 1298 hint = 0; 1299 goto Again; 1300 } 1301 1302 if (!b_len) 1303 goto no_space; 1304 1305 wnd->extent_max = b_len; 1306 1307 if (flags & BITMAP_FIND_FULL) 1308 goto no_space; 1309 1310 fnd = b_pos; 1311 to_alloc = b_len; 1312 1313 found: 1314 if (flags & BITMAP_FIND_MARK_AS_USED) { 1315 /* TODO optimize remove extent (pass 'e'?) */ 1316 if (wnd_set_used(wnd, fnd, to_alloc)) 1317 goto no_space; 1318 } else if (wnd->extent_max != MINUS_ONE_T && 1319 to_alloc > wnd->extent_max) { 1320 wnd->extent_max = to_alloc; 1321 } 1322 1323 *allocated = fnd; 1324 return to_alloc; 1325 1326 no_space: 1327 return 0; 1328 } 1329 1330 /* 1331 * wnd_extend 1332 * 1333 * Extend bitmap ($MFT bitmap) 1334 */ 1335 int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) 1336 { 1337 int err; 1338 struct super_block *sb = wnd->sb; 1339 struct ntfs_sb_info *sbi = sb->s_fs_info; 1340 u32 blocksize = sb->s_blocksize; 1341 u32 wbits = blocksize * 8; 1342 u32 b0, new_last; 1343 size_t bits, iw, new_wnd; 1344 size_t old_bits = wnd->nbits; 1345 u16 *new_free; 1346 1347 if (new_bits <= old_bits) 1348 return -EINVAL; 1349 1350 /* align to 8 byte boundary */ 1351 new_wnd = bytes_to_block(sb, bitmap_size(new_bits)); 1352 new_last = new_bits & (wbits - 1); 1353 if (!new_last) 1354 new_last = wbits; 1355 1356 if (new_wnd != wnd->nwnd) { 1357 new_free = ntfs_malloc(new_wnd * sizeof(u16)); 1358 if (!new_free) 1359 return -ENOMEM; 1360 1361 if (new_free != wnd->free_bits) 1362 memcpy(new_free, wnd->free_bits, 1363 wnd->nwnd * sizeof(short)); 1364 memset(new_free + wnd->nwnd, 0, 1365 (new_wnd - wnd->nwnd) * sizeof(short)); 1366 ntfs_free(wnd->free_bits); 1367 wnd->free_bits = new_free; 1368 } 1369 1370 /* Zero bits [old_bits,new_bits) */ 1371 bits = new_bits - old_bits; 1372 b0 = old_bits & (wbits - 1); 1373 1374 for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) { 1375 u32 op; 1376 size_t frb; 1377 u64 vbo, lbo, bytes; 1378 struct buffer_head *bh; 1379 ulong *buf; 1380 1381 if (iw + 1 == new_wnd) 1382 wbits = new_last; 1383 1384 op = b0 + bits > wbits ? wbits - b0 : bits; 1385 vbo = (u64)iw * blocksize; 1386 1387 err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes); 1388 if (err) 1389 break; 1390 1391 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); 1392 if (!bh) 1393 return -EIO; 1394 1395 lock_buffer(bh); 1396 buf = (ulong *)bh->b_data; 1397 1398 __bitmap_clear(buf, b0, blocksize * 8 - b0); 1399 frb = wbits - __bitmap_weight(buf, wbits); 1400 wnd->total_zeroes += frb - wnd->free_bits[iw]; 1401 wnd->free_bits[iw] = frb; 1402 1403 set_buffer_uptodate(bh); 1404 mark_buffer_dirty(bh); 1405 unlock_buffer(bh); 1406 /*err = sync_dirty_buffer(bh);*/ 1407 1408 b0 = 0; 1409 bits -= op; 1410 } 1411 1412 wnd->nbits = new_bits; 1413 wnd->nwnd = new_wnd; 1414 wnd->bits_last = new_last; 1415 1416 wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false); 1417 1418 return 0; 1419 } 1420 1421 /* 1422 * wnd_zone_set 1423 */ 1424 void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len) 1425 { 1426 size_t zlen; 1427 1428 zlen = wnd->zone_end - wnd->zone_bit; 1429 if (zlen) 1430 wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false); 1431 1432 if (!RB_EMPTY_ROOT(&wnd->start_tree) && len) 1433 wnd_remove_free_ext(wnd, lcn, len); 1434 1435 wnd->zone_bit = lcn; 1436 wnd->zone_end = lcn + len; 1437 } 1438 1439 int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range) 1440 { 1441 int err = 0; 1442 struct super_block *sb = sbi->sb; 1443 struct wnd_bitmap *wnd = &sbi->used.bitmap; 1444 u32 wbits = 8 * sb->s_blocksize; 1445 CLST len = 0, lcn = 0, done = 0; 1446 CLST minlen = bytes_to_cluster(sbi, range->minlen); 1447 CLST lcn_from = bytes_to_cluster(sbi, range->start); 1448 size_t iw = lcn_from >> (sb->s_blocksize_bits + 3); 1449 u32 wbit = lcn_from & (wbits - 1); 1450 const ulong *buf; 1451 CLST lcn_to; 1452 1453 if (!minlen) 1454 minlen = 1; 1455 1456 if (range->len == (u64)-1) 1457 lcn_to = wnd->nbits; 1458 else 1459 lcn_to = bytes_to_cluster(sbi, range->start + range->len); 1460 1461 down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); 1462 1463 for (; iw < wnd->nbits; iw++, wbit = 0) { 1464 CLST lcn_wnd = iw * wbits; 1465 struct buffer_head *bh; 1466 1467 if (lcn_wnd > lcn_to) 1468 break; 1469 1470 if (!wnd->free_bits[iw]) 1471 continue; 1472 1473 if (iw + 1 == wnd->nwnd) 1474 wbits = wnd->bits_last; 1475 1476 if (lcn_wnd + wbits > lcn_to) 1477 wbits = lcn_to - lcn_wnd; 1478 1479 bh = wnd_map(wnd, iw); 1480 if (IS_ERR(bh)) { 1481 err = PTR_ERR(bh); 1482 break; 1483 } 1484 1485 buf = (ulong *)bh->b_data; 1486 1487 for (; wbit < wbits; wbit++) { 1488 if (!test_bit(wbit, buf)) { 1489 if (!len) 1490 lcn = lcn_wnd + wbit; 1491 len += 1; 1492 continue; 1493 } 1494 if (len >= minlen) { 1495 err = ntfs_discard(sbi, lcn, len); 1496 if (err) 1497 goto out; 1498 done += len; 1499 } 1500 len = 0; 1501 } 1502 put_bh(bh); 1503 } 1504 1505 /* Process the last fragment */ 1506 if (len >= minlen) { 1507 err = ntfs_discard(sbi, lcn, len); 1508 if (err) 1509 goto out; 1510 done += len; 1511 } 1512 1513 out: 1514 range->len = (u64)done << sbi->cluster_bits; 1515 1516 up_read(&wnd->rw_lock); 1517 1518 return err; 1519 } 1520