1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Cluster (de)allocation code. 4 * 5 * Copyright (c) 2004-2005 Anton Altaparmakov 6 * Copyright (c) 2025 LG Electronics Co., Ltd. 7 * 8 * Part of this file is based on code from the NTFS-3G. 9 * and is copyrighted by the respective authors below: 10 * Copyright (c) 2002-2004 Anton Altaparmakov 11 * Copyright (c) 2004 Yura Pakhuchiy 12 * Copyright (c) 2004-2008 Szabolcs Szakacsits 13 * Copyright (c) 2008-2009 Jean-Pierre Andre 14 */ 15 16 #include <linux/blkdev.h> 17 18 #include "lcnalloc.h" 19 #include "bitmap.h" 20 #include "ntfs.h" 21 22 /* 23 * ntfs_cluster_free_from_rl_nolock - free clusters from runlist 24 * @vol: mounted ntfs volume on which to free the clusters 25 * @rl: runlist describing the clusters to free 26 * 27 * Free all the clusters described by the runlist @rl on the volume @vol. In 28 * the case of an error being returned, at least some of the clusters were not 29 * freed. 30 * 31 * Return 0 on success and -errno on error. 32 * 33 * Locking: - The volume lcn bitmap must be locked for writing on entry and is 34 * left locked on return. 35 */ 36 int ntfs_cluster_free_from_rl_nolock(struct ntfs_volume *vol, 37 const struct runlist_element *rl) 38 { 39 struct inode *lcnbmp_vi = vol->lcnbmp_ino; 40 int ret = 0; 41 s64 nr_freed = 0; 42 43 ntfs_debug("Entering."); 44 if (!rl) 45 return 0; 46 47 if (!NVolFreeClusterKnown(vol)) 48 wait_event(vol->free_waitq, NVolFreeClusterKnown(vol)); 49 50 for (; rl->length; rl++) { 51 int err; 52 53 if (rl->lcn < 0) 54 continue; 55 err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length); 56 if (unlikely(err && (!ret || ret == -ENOMEM) && ret != err)) 57 ret = err; 58 else 59 nr_freed += rl->length; 60 } 61 ntfs_inc_free_clusters(vol, nr_freed); 62 ntfs_debug("Done."); 63 return ret; 64 } 65 66 static s64 max_empty_bit_range(unsigned char *buf, int size) 67 { 68 int i, j, run = 0; 69 int max_range = 0; 70 s64 start_pos = -1; 71 72 ntfs_debug("Entering\n"); 73 74 i = 0; 75 while (i < size) { 76 switch (*buf) { 77 case 0: 78 do { 79 buf++; 80 run += 8; 81 i++; 82 } while ((i < size) && !*buf); 83 break; 84 case 255: 85 if (run > max_range) { 86 max_range = run; 87 start_pos = (s64)i * 8 - run; 88 } 89 run = 0; 90 do { 91 buf++; 92 i++; 93 } while ((i < size) && (*buf == 255)); 94 break; 95 default: 96 for (j = 0; j < 8; j++) { 97 int bit = *buf & (1 << j); 98 99 if (bit) { 100 if (run > max_range) { 101 max_range = run; 102 start_pos = (s64)i * 8 + (j - run); 103 } 104 run = 0; 105 } else 106 run++; 107 } 108 i++; 109 buf++; 110 } 111 } 112 113 if (run > max_range) 114 start_pos = (s64)i * 8 - run; 115 116 return start_pos; 117 } 118 119 /* 120 * ntfs_cluster_alloc - allocate clusters on an ntfs volume 121 * @vol: mounted ntfs volume on which to allocate clusters 122 * @start_vcn: vcn of the first allocated cluster 123 * @count: number of clusters to allocate 124 * @start_lcn: starting lcn at which to allocate the clusters or -1 if none 125 * @zone: zone from which to allocate (MFT_ZONE or DATA_ZONE) 126 * @is_extension: if true, the caller is extending an attribute 127 * @is_contig: if true, require contiguous allocation 128 * @is_dealloc: if true, the allocation is for deallocation purposes 129 * 130 * Allocate @count clusters preferably starting at cluster @start_lcn or at the 131 * current allocator position if @start_lcn is -1, on the mounted ntfs volume 132 * @vol. @zone is either DATA_ZONE for allocation of normal clusters or 133 * MFT_ZONE for allocation of clusters for the master file table, i.e. the 134 * $MFT/$DATA attribute. 135 * 136 * @start_vcn specifies the vcn of the first allocated cluster. This makes 137 * merging the resulting runlist with the old runlist easier. 138 * 139 * If @is_extension is 'true', the caller is allocating clusters to extend an 140 * attribute and if it is 'false', the caller is allocating clusters to fill a 141 * hole in an attribute. Practically the difference is that if @is_extension 142 * is 'true' the returned runlist will be terminated with LCN_ENOENT and if 143 * @is_extension is 'false' the runlist will be terminated with 144 * LCN_RL_NOT_MAPPED. 145 * 146 * You need to check the return value with IS_ERR(). If this is false, the 147 * function was successful and the return value is a runlist describing the 148 * allocated cluster(s). If IS_ERR() is true, the function failed and 149 * PTR_ERR() gives you the error code. 150 * 151 * Notes on the allocation algorithm 152 * ================================= 153 * 154 * There are two data zones. First is the area between the end of the mft zone 155 * and the end of the volume, and second is the area between the start of the 156 * volume and the start of the mft zone. On unmodified/standard NTFS 1.x 157 * volumes, the second data zone does not exist due to the mft zone being 158 * expanded to cover the start of the volume in order to reserve space for the 159 * mft bitmap attribute. 160 * 161 * This is not the prettiest function but the complexity stems from the need of 162 * implementing the mft vs data zoned approach and from the fact that we have 163 * access to the lcn bitmap in portions of up to 8192 bytes at a time, so we 164 * need to cope with crossing over boundaries of two buffers. Further, the 165 * fact that the allocator allows for caller supplied hints as to the location 166 * of where allocation should begin and the fact that the allocator keeps track 167 * of where in the data zones the next natural allocation should occur, 168 * contribute to the complexity of the function. But it should all be 169 * worthwhile, because this allocator should: 1) be a full implementation of 170 * the MFT zone approach used by Windows NT, 2) cause reduction in 171 * fragmentation, and 3) be speedy in allocations (the code is not optimized 172 * for speed, but the algorithm is, so further speed improvements are probably 173 * possible). 174 * 175 * Locking: - The volume lcn bitmap must be unlocked on entry and is unlocked 176 * on return. 177 * - This function takes the volume lcn bitmap lock for writing and 178 * modifies the bitmap contents. 179 * 180 * Return: Runlist describing the allocated cluster(s) on success, error pointer 181 * on failure. 182 */ 183 struct runlist_element *ntfs_cluster_alloc(struct ntfs_volume *vol, const s64 start_vcn, 184 const s64 count, const s64 start_lcn, 185 const int zone, 186 const bool is_extension, 187 const bool is_contig, 188 const bool is_dealloc) 189 { 190 s64 zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn; 191 s64 prev_lcn = 0, prev_run_len = 0, mft_zone_size; 192 s64 clusters, free_clusters; 193 loff_t i_size; 194 struct inode *lcnbmp_vi; 195 struct runlist_element *rl = NULL; 196 struct address_space *mapping; 197 struct folio *folio = NULL; 198 u8 *buf = NULL, *byte; 199 int err = 0, rlpos, rlsize, buf_size, pg_off; 200 u8 pass, done_zones, search_zone, need_writeback = 0, bit; 201 unsigned int memalloc_flags; 202 u8 has_guess, used_zone_pos; 203 pgoff_t index; 204 205 ntfs_debug("Entering for start_vcn 0x%llx, count 0x%llx, start_lcn 0x%llx, zone %s_ZONE.", 206 start_vcn, count, start_lcn, 207 zone == MFT_ZONE ? "MFT" : "DATA"); 208 209 lcnbmp_vi = vol->lcnbmp_ino; 210 if (start_vcn < 0 || start_lcn < LCN_HOLE || 211 zone < FIRST_ZONE || zone > LAST_ZONE) 212 return ERR_PTR(-EINVAL); 213 214 /* Return NULL if @count is zero. */ 215 if (count < 0 || !count) 216 return ERR_PTR(-EINVAL); 217 218 memalloc_flags = memalloc_nofs_save(); 219 220 if (!NVolFreeClusterKnown(vol)) 221 wait_event(vol->free_waitq, NVolFreeClusterKnown(vol)); 222 free_clusters = atomic64_read(&vol->free_clusters); 223 224 /* Take the lcnbmp lock for writing. */ 225 down_write(&vol->lcnbmp_lock); 226 if (is_dealloc == false) 227 free_clusters -= atomic64_read(&vol->dirty_clusters); 228 229 if (free_clusters < count) { 230 err = -ENOSPC; 231 goto out_restore; 232 } 233 234 /* 235 * If no specific @start_lcn was requested, use the current data zone 236 * position, otherwise use the requested @start_lcn but make sure it 237 * lies outside the mft zone. Also set done_zones to 0 (no zones done) 238 * and pass depending on whether we are starting inside a zone (1) or 239 * at the beginning of a zone (2). If requesting from the MFT_ZONE, 240 * we either start at the current position within the mft zone or at 241 * the specified position. If the latter is out of bounds then we start 242 * at the beginning of the MFT_ZONE. 243 */ 244 done_zones = 0; 245 pass = 1; 246 /* 247 * zone_start and zone_end are the current search range. search_zone 248 * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of 249 * volume) and 4 for data zone 2 (start of volume till start of mft 250 * zone). 251 */ 252 has_guess = 1; 253 zone_start = start_lcn; 254 255 if (zone_start < 0) { 256 if (zone == DATA_ZONE) 257 zone_start = vol->data1_zone_pos; 258 else 259 zone_start = vol->mft_zone_pos; 260 if (!zone_start) { 261 /* 262 * Zone starts at beginning of volume which means a 263 * single pass is sufficient. 264 */ 265 pass = 2; 266 } 267 has_guess = 0; 268 } 269 270 used_zone_pos = has_guess ? 0 : 1; 271 272 if (!zone_start || zone_start == vol->mft_zone_start || 273 zone_start == vol->mft_zone_end) 274 pass = 2; 275 276 if (zone_start < vol->mft_zone_start) { 277 zone_end = vol->mft_zone_start; 278 search_zone = 4; 279 /* Skip searching the mft zone. */ 280 done_zones |= 1; 281 } else if (zone_start < vol->mft_zone_end) { 282 zone_end = vol->mft_zone_end; 283 search_zone = 1; 284 } else { 285 zone_end = vol->nr_clusters; 286 search_zone = 2; 287 /* Skip searching the mft zone. */ 288 done_zones |= 1; 289 } 290 291 /* 292 * bmp_pos is the current bit position inside the bitmap. We use 293 * bmp_initial_pos to determine whether or not to do a zone switch. 294 */ 295 bmp_pos = bmp_initial_pos = zone_start; 296 297 /* Loop until all clusters are allocated, i.e. clusters == 0. */ 298 clusters = count; 299 rlpos = rlsize = 0; 300 mapping = lcnbmp_vi->i_mapping; 301 i_size = i_size_read(lcnbmp_vi); 302 while (1) { 303 ntfs_debug("Start of outer while loop: done_zones 0x%x, search_zone %i, pass %i, zone_start 0x%llx, zone_end 0x%llx, bmp_initial_pos 0x%llx, bmp_pos 0x%llx, rlpos %i, rlsize %i.", 304 done_zones, search_zone, pass, 305 zone_start, zone_end, bmp_initial_pos, 306 bmp_pos, rlpos, rlsize); 307 /* Loop until we run out of free clusters. */ 308 last_read_pos = bmp_pos >> 3; 309 ntfs_debug("last_read_pos 0x%llx.", last_read_pos); 310 if (last_read_pos >= i_size) { 311 ntfs_debug("End of attribute reached. Skipping to zone_pass_done."); 312 goto zone_pass_done; 313 } 314 if (likely(folio)) { 315 if (need_writeback) { 316 ntfs_debug("Marking page dirty."); 317 folio_mark_dirty(folio); 318 need_writeback = 0; 319 } 320 folio_unlock(folio); 321 kunmap_local(buf); 322 folio_put(folio); 323 folio = NULL; 324 } 325 326 index = last_read_pos >> PAGE_SHIFT; 327 pg_off = last_read_pos & ~PAGE_MASK; 328 buf_size = PAGE_SIZE - pg_off; 329 if (unlikely(last_read_pos + buf_size > i_size)) 330 buf_size = i_size - last_read_pos; 331 buf_size <<= 3; 332 lcn = bmp_pos & 7; 333 bmp_pos &= ~(s64)7; 334 335 if (vol->lcn_empty_bits_per_page[index] == 0) 336 goto next_bmp_pos; 337 338 folio = read_mapping_folio(mapping, index, NULL); 339 if (IS_ERR(folio)) { 340 err = PTR_ERR(folio); 341 ntfs_error(vol->sb, "Failed to map page."); 342 goto out; 343 } 344 345 folio_lock(folio); 346 buf = kmap_local_folio(folio, 0) + pg_off; 347 ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, bmp_pos 0x%llx, need_writeback %i.", 348 buf_size, lcn, bmp_pos, need_writeback); 349 while (lcn < buf_size && lcn + bmp_pos < zone_end) { 350 byte = buf + (lcn >> 3); 351 ntfs_debug("In inner while loop: buf_size %i, lcn 0x%llx, bmp_pos 0x%llx, need_writeback %i, byte ofs 0x%x, *byte 0x%x.", 352 buf_size, lcn, bmp_pos, need_writeback, 353 (unsigned int)(lcn >> 3), 354 (unsigned int)*byte); 355 bit = 1 << (lcn & 7); 356 ntfs_debug("bit 0x%x.", bit); 357 358 if (has_guess) { 359 if (*byte & bit) { 360 if (is_contig == true && prev_run_len > 0) 361 goto done; 362 363 has_guess = 0; 364 break; 365 } 366 } else { 367 lcn = max_empty_bit_range(buf, buf_size >> 3); 368 if (lcn < 0) 369 break; 370 has_guess = 1; 371 continue; 372 } 373 /* 374 * Allocate more memory if needed, including space for 375 * the terminator element. 376 * kvzalloc() operates on whole pages only. 377 */ 378 if ((rlpos + 2) * sizeof(*rl) > rlsize) { 379 struct runlist_element *rl2; 380 381 ntfs_debug("Reallocating memory."); 382 if (!rl) 383 ntfs_debug("First free bit is at s64 0x%llx.", 384 lcn + bmp_pos); 385 rl2 = kvzalloc(rlsize + PAGE_SIZE, GFP_NOFS); 386 if (unlikely(!rl2)) { 387 err = -ENOMEM; 388 ntfs_error(vol->sb, "Failed to allocate memory."); 389 goto out; 390 } 391 memcpy(rl2, rl, rlsize); 392 kvfree(rl); 393 rl = rl2; 394 rlsize += PAGE_SIZE; 395 ntfs_debug("Reallocated memory, rlsize 0x%x.", 396 rlsize); 397 } 398 /* Allocate the bitmap bit. */ 399 *byte |= bit; 400 /* We need to write this bitmap page to disk. */ 401 need_writeback = 1; 402 ntfs_debug("*byte 0x%x, need_writeback is set.", 403 (unsigned int)*byte); 404 ntfs_dec_free_clusters(vol, 1); 405 ntfs_set_lcn_empty_bits(vol, index, 1, 1); 406 407 /* 408 * Coalesce with previous run if adjacent LCNs. 409 * Otherwise, append a new run. 410 */ 411 ntfs_debug("Adding run (lcn 0x%llx, len 0x%llx), prev_lcn 0x%llx, lcn 0x%llx, bmp_pos 0x%llx, prev_run_len 0x%llx, rlpos %i.", 412 lcn + bmp_pos, 1ULL, prev_lcn, 413 lcn, bmp_pos, prev_run_len, rlpos); 414 if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) { 415 ntfs_debug("Coalescing to run (lcn 0x%llx, len 0x%llx).", 416 rl[rlpos - 1].lcn, 417 rl[rlpos - 1].length); 418 rl[rlpos - 1].length = ++prev_run_len; 419 ntfs_debug("Run now (lcn 0x%llx, len 0x%llx), prev_run_len 0x%llx.", 420 rl[rlpos - 1].lcn, 421 rl[rlpos - 1].length, 422 prev_run_len); 423 } else { 424 if (likely(rlpos)) { 425 ntfs_debug("Adding new run, (previous run lcn 0x%llx, len 0x%llx).", 426 rl[rlpos - 1].lcn, rl[rlpos - 1].length); 427 rl[rlpos].vcn = rl[rlpos - 1].vcn + 428 prev_run_len; 429 } else { 430 ntfs_debug("Adding new run, is first run."); 431 rl[rlpos].vcn = start_vcn; 432 } 433 rl[rlpos].lcn = prev_lcn = lcn + bmp_pos; 434 rl[rlpos].length = prev_run_len = 1; 435 rlpos++; 436 } 437 /* Done? */ 438 if (!--clusters) { 439 s64 tc; 440 done: 441 if (!used_zone_pos) 442 goto out; 443 /* 444 * Update the current zone position. Positions 445 * of already scanned zones have been updated 446 * during the respective zone switches. 447 */ 448 tc = lcn + bmp_pos + 1; 449 ntfs_debug("Done. Updating current zone position, tc 0x%llx, search_zone %i.", 450 tc, search_zone); 451 switch (search_zone) { 452 case 1: 453 ntfs_debug("Before checks, vol->mft_zone_pos 0x%llx.", 454 vol->mft_zone_pos); 455 if (tc >= vol->mft_zone_end) { 456 vol->mft_zone_pos = 457 vol->mft_lcn; 458 if (!vol->mft_zone_end) 459 vol->mft_zone_pos = 0; 460 } else if ((bmp_initial_pos >= 461 vol->mft_zone_pos || 462 tc > vol->mft_zone_pos) 463 && tc >= vol->mft_lcn) 464 vol->mft_zone_pos = tc; 465 ntfs_debug("After checks, vol->mft_zone_pos 0x%llx.", 466 vol->mft_zone_pos); 467 break; 468 case 2: 469 ntfs_debug("Before checks, vol->data1_zone_pos 0x%llx.", 470 vol->data1_zone_pos); 471 if (tc >= vol->nr_clusters) 472 vol->data1_zone_pos = 473 vol->mft_zone_end; 474 else if ((bmp_initial_pos >= 475 vol->data1_zone_pos || 476 tc > vol->data1_zone_pos) 477 && tc >= vol->mft_zone_end) 478 vol->data1_zone_pos = tc; 479 ntfs_debug("After checks, vol->data1_zone_pos 0x%llx.", 480 vol->data1_zone_pos); 481 break; 482 case 4: 483 ntfs_debug("Before checks, vol->data2_zone_pos 0x%llx.", 484 vol->data2_zone_pos); 485 if (tc >= vol->mft_zone_start) 486 vol->data2_zone_pos = 0; 487 else if (bmp_initial_pos >= 488 vol->data2_zone_pos || 489 tc > vol->data2_zone_pos) 490 vol->data2_zone_pos = tc; 491 ntfs_debug("After checks, vol->data2_zone_pos 0x%llx.", 492 vol->data2_zone_pos); 493 break; 494 default: 495 WARN_ON(1); 496 } 497 ntfs_debug("Finished. Going to out."); 498 goto out; 499 } 500 lcn++; 501 } 502 503 if (!used_zone_pos) { 504 used_zone_pos = 1; 505 if (search_zone == 1) 506 zone_start = vol->mft_zone_pos; 507 else if (search_zone == 2) 508 zone_start = vol->data1_zone_pos; 509 else 510 zone_start = vol->data2_zone_pos; 511 512 if (!zone_start || zone_start == vol->mft_zone_start || 513 zone_start == vol->mft_zone_end) 514 pass = 2; 515 bmp_pos = zone_start; 516 } else { 517 next_bmp_pos: 518 bmp_pos += buf_size; 519 } 520 521 ntfs_debug("After inner while loop: buf_size 0x%x, lcn 0x%llx, bmp_pos 0x%llx, need_writeback %i.", 522 buf_size, lcn, bmp_pos, need_writeback); 523 if (bmp_pos < zone_end) { 524 ntfs_debug("Continuing outer while loop, bmp_pos 0x%llx, zone_end 0x%llx.", 525 bmp_pos, zone_end); 526 continue; 527 } 528 zone_pass_done: /* Finished with the current zone pass. */ 529 ntfs_debug("At zone_pass_done, pass %i.", pass); 530 if (pass == 1) { 531 /* 532 * Now do pass 2, scanning the first part of the zone 533 * we omitted in pass 1. 534 */ 535 pass = 2; 536 zone_end = zone_start; 537 switch (search_zone) { 538 case 1: /* mft_zone */ 539 zone_start = vol->mft_zone_start; 540 break; 541 case 2: /* data1_zone */ 542 zone_start = vol->mft_zone_end; 543 break; 544 case 4: /* data2_zone */ 545 zone_start = 0; 546 break; 547 default: 548 WARN_ON(1); 549 } 550 /* Sanity check. */ 551 if (zone_end < zone_start) 552 zone_end = zone_start; 553 bmp_pos = zone_start; 554 ntfs_debug("Continuing outer while loop, pass 2, zone_start 0x%llx, zone_end 0x%llx, bmp_pos 0x%llx.", 555 zone_start, zone_end, bmp_pos); 556 continue; 557 } /* pass == 2 */ 558 done_zones_check: 559 ntfs_debug("At done_zones_check, search_zone %i, done_zones before 0x%x, done_zones after 0x%x.", 560 search_zone, done_zones, 561 done_zones | search_zone); 562 done_zones |= search_zone; 563 if (done_zones < 7) { 564 ntfs_debug("Switching zone."); 565 /* Now switch to the next zone we haven't done yet. */ 566 pass = 1; 567 switch (search_zone) { 568 case 1: 569 ntfs_debug("Switching from mft zone to data1 zone."); 570 /* Update mft zone position. */ 571 if (rlpos && used_zone_pos) { 572 s64 tc; 573 574 ntfs_debug("Before checks, vol->mft_zone_pos 0x%llx.", 575 vol->mft_zone_pos); 576 tc = rl[rlpos - 1].lcn + 577 rl[rlpos - 1].length; 578 if (tc >= vol->mft_zone_end) { 579 vol->mft_zone_pos = 580 vol->mft_lcn; 581 if (!vol->mft_zone_end) 582 vol->mft_zone_pos = 0; 583 } else if ((bmp_initial_pos >= 584 vol->mft_zone_pos || 585 tc > vol->mft_zone_pos) 586 && tc >= vol->mft_lcn) 587 vol->mft_zone_pos = tc; 588 ntfs_debug("After checks, vol->mft_zone_pos 0x%llx.", 589 vol->mft_zone_pos); 590 } 591 /* Switch from mft zone to data1 zone. */ 592 switch_to_data1_zone: search_zone = 2; 593 zone_start = bmp_initial_pos = 594 vol->data1_zone_pos; 595 zone_end = vol->nr_clusters; 596 if (zone_start == vol->mft_zone_end) 597 pass = 2; 598 if (zone_start >= zone_end) { 599 vol->data1_zone_pos = zone_start = 600 vol->mft_zone_end; 601 pass = 2; 602 } 603 break; 604 case 2: 605 ntfs_debug("Switching from data1 zone to data2 zone."); 606 /* Update data1 zone position. */ 607 if (rlpos && used_zone_pos) { 608 s64 tc; 609 610 ntfs_debug("Before checks, vol->data1_zone_pos 0x%llx.", 611 vol->data1_zone_pos); 612 tc = rl[rlpos - 1].lcn + 613 rl[rlpos - 1].length; 614 if (tc >= vol->nr_clusters) 615 vol->data1_zone_pos = 616 vol->mft_zone_end; 617 else if ((bmp_initial_pos >= 618 vol->data1_zone_pos || 619 tc > vol->data1_zone_pos) 620 && tc >= vol->mft_zone_end) 621 vol->data1_zone_pos = tc; 622 ntfs_debug("After checks, vol->data1_zone_pos 0x%llx.", 623 vol->data1_zone_pos); 624 } 625 /* Switch from data1 zone to data2 zone. */ 626 search_zone = 4; 627 zone_start = bmp_initial_pos = 628 vol->data2_zone_pos; 629 zone_end = vol->mft_zone_start; 630 if (!zone_start) 631 pass = 2; 632 if (zone_start >= zone_end) { 633 vol->data2_zone_pos = zone_start = 634 bmp_initial_pos = 0; 635 pass = 2; 636 } 637 break; 638 case 4: 639 ntfs_debug("Switching from data2 zone to data1 zone."); 640 /* Update data2 zone position. */ 641 if (rlpos && used_zone_pos) { 642 s64 tc; 643 644 ntfs_debug("Before checks, vol->data2_zone_pos 0x%llx.", 645 vol->data2_zone_pos); 646 tc = rl[rlpos - 1].lcn + 647 rl[rlpos - 1].length; 648 if (tc >= vol->mft_zone_start) 649 vol->data2_zone_pos = 0; 650 else if (bmp_initial_pos >= 651 vol->data2_zone_pos || 652 tc > vol->data2_zone_pos) 653 vol->data2_zone_pos = tc; 654 ntfs_debug("After checks, vol->data2_zone_pos 0x%llx.", 655 vol->data2_zone_pos); 656 } 657 /* Switch from data2 zone to data1 zone. */ 658 goto switch_to_data1_zone; 659 default: 660 WARN_ON(1); 661 } 662 ntfs_debug("After zone switch, search_zone %i, pass %i, bmp_initial_pos 0x%llx, zone_start 0x%llx, zone_end 0x%llx.", 663 search_zone, pass, 664 bmp_initial_pos, 665 zone_start, 666 zone_end); 667 bmp_pos = zone_start; 668 if (zone_start == zone_end) { 669 ntfs_debug("Empty zone, going to done_zones_check."); 670 /* Empty zone. Don't bother searching it. */ 671 goto done_zones_check; 672 } 673 ntfs_debug("Continuing outer while loop."); 674 continue; 675 } /* done_zones == 7 */ 676 ntfs_debug("All zones are finished."); 677 /* 678 * All zones are finished! If DATA_ZONE, shrink mft zone. If 679 * MFT_ZONE, we have really run out of space. 680 */ 681 mft_zone_size = vol->mft_zone_end - vol->mft_zone_start; 682 ntfs_debug("vol->mft_zone_start 0x%llx, vol->mft_zone_end 0x%llx, mft_zone_size 0x%llx.", 683 vol->mft_zone_start, vol->mft_zone_end, 684 mft_zone_size); 685 if (zone == MFT_ZONE || mft_zone_size <= 0) { 686 ntfs_debug("No free clusters left, going to out."); 687 /* Really no more space left on device. */ 688 err = -ENOSPC; 689 goto out; 690 } /* zone == DATA_ZONE && mft_zone_size > 0 */ 691 ntfs_debug("Shrinking mft zone."); 692 zone_end = vol->mft_zone_end; 693 mft_zone_size >>= 1; 694 if (mft_zone_size > 0) 695 vol->mft_zone_end = vol->mft_zone_start + mft_zone_size; 696 else /* mft zone and data2 zone no longer exist. */ 697 vol->data2_zone_pos = vol->mft_zone_start = 698 vol->mft_zone_end = 0; 699 if (vol->mft_zone_pos >= vol->mft_zone_end) { 700 vol->mft_zone_pos = vol->mft_lcn; 701 if (!vol->mft_zone_end) 702 vol->mft_zone_pos = 0; 703 } 704 bmp_pos = zone_start = bmp_initial_pos = 705 vol->data1_zone_pos = vol->mft_zone_end; 706 search_zone = 2; 707 pass = 2; 708 done_zones &= ~2; 709 ntfs_debug("After shrinking mft zone, mft_zone_size 0x%llx, vol->mft_zone_start 0x%llx, vol->mft_zone_end 0x%llx, vol->mft_zone_pos 0x%llx, search_zone 2, pass 2, dones_zones 0x%x, zone_start 0x%llx, zone_end 0x%llx, vol->data1_zone_pos 0x%llx, continuing outer while loop.", 710 mft_zone_size, vol->mft_zone_start, 711 vol->mft_zone_end, vol->mft_zone_pos, 712 done_zones, zone_start, zone_end, 713 vol->data1_zone_pos); 714 } 715 ntfs_debug("After outer while loop."); 716 out: 717 ntfs_debug("At out."); 718 /* Add runlist terminator element. */ 719 if (likely(rl)) { 720 rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length; 721 rl[rlpos].lcn = is_extension ? LCN_ENOENT : LCN_RL_NOT_MAPPED; 722 rl[rlpos].length = 0; 723 } 724 if (!IS_ERR_OR_NULL(folio)) { 725 if (need_writeback) { 726 ntfs_debug("Marking page dirty."); 727 folio_mark_dirty(folio); 728 need_writeback = 0; 729 } 730 folio_unlock(folio); 731 kunmap_local(buf); 732 folio_put(folio); 733 } 734 if (likely(!err)) { 735 if (!rl) { 736 err = -EIO; 737 goto out_restore; 738 } 739 if (is_dealloc == true) 740 ntfs_release_dirty_clusters(vol, rl->length); 741 ntfs_debug("Done."); 742 goto out_restore; 743 } 744 if (err != -ENOSPC) 745 ntfs_error(vol->sb, 746 "Failed to allocate clusters, aborting (error %i).", 747 err); 748 if (rl) { 749 int err2; 750 751 if (err == -ENOSPC) 752 ntfs_debug("Not enough space to complete allocation, err -ENOSPC, first free lcn 0x%llx, could allocate up to 0x%llx clusters.", 753 rl[0].lcn, count - clusters); 754 /* Deallocate all allocated clusters. */ 755 ntfs_debug("Attempting rollback..."); 756 err2 = ntfs_cluster_free_from_rl_nolock(vol, rl); 757 if (err2) { 758 ntfs_error(vol->sb, 759 "Failed to rollback (error %i). Leaving inconsistent metadata! Unmount and run chkdsk.", 760 err2); 761 NVolSetErrors(vol); 762 } 763 /* Free the runlist. */ 764 kvfree(rl); 765 } else if (err == -ENOSPC) 766 ntfs_debug("No space left at all, err = -ENOSPC, first free lcn = 0x%llx.", 767 vol->data1_zone_pos); 768 atomic64_set(&vol->dirty_clusters, 0); 769 770 out_restore: 771 up_write(&vol->lcnbmp_lock); 772 memalloc_nofs_restore(memalloc_flags); 773 774 return err < 0 ? ERR_PTR(err) : rl; 775 } 776 777 /* 778 * __ntfs_cluster_free - free clusters on an ntfs volume 779 * @ni: ntfs inode whose runlist describes the clusters to free 780 * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters 781 * @count: number of clusters to free or -1 for all clusters 782 * @ctx: active attribute search context if present or NULL if not 783 * @is_rollback: true if this is a rollback operation 784 * 785 * Free @count clusters starting at the cluster @start_vcn in the runlist 786 * described by the vfs inode @ni. 787 * 788 * If @count is -1, all clusters from @start_vcn to the end of the runlist are 789 * deallocated. Thus, to completely free all clusters in a runlist, use 790 * @start_vcn = 0 and @count = -1. 791 * 792 * If @ctx is specified, it is an active search context of @ni and its base mft 793 * record. This is needed when __ntfs_cluster_free() encounters unmapped 794 * runlist fragments and allows their mapping. If you do not have the mft 795 * record mapped, you can specify @ctx as NULL and __ntfs_cluster_free() will 796 * perform the necessary mapping and unmapping. 797 * 798 * Note, __ntfs_cluster_free() saves the state of @ctx on entry and restores it 799 * before returning. Thus, @ctx will be left pointing to the same attribute on 800 * return as on entry. However, the actual pointers in @ctx may point to 801 * different memory locations on return, so you must remember to reset any 802 * cached pointers from the @ctx, i.e. after the call to __ntfs_cluster_free(), 803 * you will probably want to do: 804 * m = ctx->mrec; 805 * a = ctx->attr; 806 * Assuming you cache ctx->attr in a variable @a of type attr_record * and that 807 * you cache ctx->mrec in a variable @m of type struct mft_record *. 808 * 809 * @is_rollback should always be 'false', it is for internal use to rollback 810 * errors. You probably want to use ntfs_cluster_free() instead. 811 * 812 * Note, __ntfs_cluster_free() does not modify the runlist, so you have to 813 * remove from the runlist or mark sparse the freed runs later. 814 * 815 * Return the number of deallocated clusters (not counting sparse ones) on 816 * success and -errno on error. 817 * 818 * WARNING: If @ctx is supplied, regardless of whether success or failure is 819 * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx 820 * is no longer valid, i.e. you need to either call 821 * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it. 822 * In that case PTR_ERR(@ctx->mrec) will give you the error code for 823 * why the mapping of the old inode failed. 824 * 825 * Locking: - The runlist described by @ni must be locked for writing on entry 826 * and is locked on return. Note the runlist may be modified when 827 * needed runlist fragments need to be mapped. 828 * - The volume lcn bitmap must be unlocked on entry and is unlocked 829 * on return. 830 * - This function takes the volume lcn bitmap lock for writing and 831 * modifies the bitmap contents. 832 * - If @ctx is NULL, the base mft record of @ni must not be mapped on 833 * entry and it will be left unmapped on return. 834 * - If @ctx is not NULL, the base mft record must be mapped on entry 835 * and it will be left mapped on return. 836 */ 837 s64 __ntfs_cluster_free(struct ntfs_inode *ni, const s64 start_vcn, s64 count, 838 struct ntfs_attr_search_ctx *ctx, const bool is_rollback) 839 { 840 s64 delta, to_free, total_freed, real_freed; 841 struct ntfs_volume *vol; 842 struct inode *lcnbmp_vi; 843 struct runlist_element *rl; 844 int err; 845 unsigned int memalloc_flags; 846 847 ntfs_debug("Entering for i_ino 0x%llx, start_vcn 0x%llx, count 0x%llx.%s", 848 ni->mft_no, start_vcn, count, 849 is_rollback ? " (rollback)" : ""); 850 vol = ni->vol; 851 lcnbmp_vi = vol->lcnbmp_ino; 852 if (start_vcn < 0 || count < -1) 853 return -EINVAL; 854 855 if (!NVolFreeClusterKnown(vol)) 856 wait_event(vol->free_waitq, NVolFreeClusterKnown(vol)); 857 858 /* 859 * Lock the lcn bitmap for writing but only if not rolling back. We 860 * must hold the lock all the way including through rollback otherwise 861 * rollback is not possible because once we have cleared a bit and 862 * dropped the lock, anyone could have set the bit again, thus 863 * allocating the cluster for another use. 864 */ 865 if (likely(!is_rollback)) { 866 memalloc_flags = memalloc_nofs_save(); 867 down_write(&vol->lcnbmp_lock); 868 } 869 870 total_freed = real_freed = 0; 871 872 rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx); 873 if (IS_ERR(rl)) { 874 err = PTR_ERR(rl); 875 if (err == -ENOENT) { 876 if (likely(!is_rollback)) { 877 up_write(&vol->lcnbmp_lock); 878 memalloc_nofs_restore(memalloc_flags); 879 } 880 return 0; 881 } 882 883 if (!is_rollback) 884 ntfs_error(vol->sb, 885 "Failed to find first runlist element (error %d), aborting.", 886 err); 887 goto err_out; 888 } 889 if (unlikely(rl->lcn < LCN_HOLE)) { 890 if (!is_rollback) 891 ntfs_error(vol->sb, "First runlist element has invalid lcn, aborting."); 892 err = -EIO; 893 goto err_out; 894 } 895 /* Find the starting cluster inside the run that needs freeing. */ 896 delta = start_vcn - rl->vcn; 897 898 /* The number of clusters in this run that need freeing. */ 899 to_free = rl->length - delta; 900 if (count >= 0 && to_free > count) 901 to_free = count; 902 903 if (likely(rl->lcn >= 0)) { 904 /* Do the actual freeing of the clusters in this run. */ 905 err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn + delta, 906 to_free, likely(!is_rollback) ? 0 : 1); 907 if (unlikely(err)) { 908 if (!is_rollback) 909 ntfs_error(vol->sb, 910 "Failed to clear first run (error %i), aborting.", 911 err); 912 goto err_out; 913 } 914 /* We have freed @to_free real clusters. */ 915 real_freed = to_free; 916 } 917 /* Go to the next run and adjust the number of clusters left to free. */ 918 ++rl; 919 if (count >= 0) 920 count -= to_free; 921 922 /* Keep track of the total "freed" clusters, including sparse ones. */ 923 total_freed = to_free; 924 /* 925 * Loop over the remaining runs, using @count as a capping value, and 926 * free them. 927 */ 928 for (; rl->length && count != 0; ++rl) { 929 if (unlikely(rl->lcn < LCN_HOLE)) { 930 s64 vcn; 931 932 /* Attempt to map runlist. */ 933 vcn = rl->vcn; 934 rl = ntfs_attr_find_vcn_nolock(ni, vcn, ctx); 935 if (IS_ERR(rl)) { 936 err = PTR_ERR(rl); 937 if (!is_rollback) 938 ntfs_error(vol->sb, 939 "Failed to map runlist fragment or failed to find subsequent runlist element."); 940 goto err_out; 941 } 942 if (unlikely(rl->lcn < LCN_HOLE)) { 943 if (!is_rollback) 944 ntfs_error(vol->sb, 945 "Runlist element has invalid lcn (0x%llx).", 946 rl->lcn); 947 err = -EIO; 948 goto err_out; 949 } 950 } 951 /* The number of clusters in this run that need freeing. */ 952 to_free = rl->length; 953 if (count >= 0 && to_free > count) 954 to_free = count; 955 956 if (likely(rl->lcn >= 0)) { 957 /* Do the actual freeing of the clusters in the run. */ 958 err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn, 959 to_free, likely(!is_rollback) ? 0 : 1); 960 if (unlikely(err)) { 961 if (!is_rollback) 962 ntfs_error(vol->sb, "Failed to clear subsequent run."); 963 goto err_out; 964 } 965 /* We have freed @to_free real clusters. */ 966 real_freed += to_free; 967 } 968 /* Adjust the number of clusters left to free. */ 969 if (count >= 0) 970 count -= to_free; 971 972 /* Update the total done clusters. */ 973 total_freed += to_free; 974 } 975 ntfs_inc_free_clusters(vol, real_freed); 976 if (likely(!is_rollback)) { 977 up_write(&vol->lcnbmp_lock); 978 memalloc_nofs_restore(memalloc_flags); 979 } 980 981 WARN_ON(count > 0); 982 983 if (NVolDiscard(vol) && !is_rollback) { 984 s64 total_discarded = 0, rl_off; 985 u32 gran = bdev_discard_granularity(vol->sb->s_bdev); 986 987 rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx); 988 if (IS_ERR(rl)) 989 return real_freed; 990 rl_off = start_vcn - rl->vcn; 991 while (rl->length && total_discarded < total_freed) { 992 s64 to_discard = rl->length - rl_off; 993 994 if (to_discard + total_discarded > total_freed) 995 to_discard = total_freed - total_discarded; 996 if (rl->lcn >= 0) { 997 sector_t start_sector, end_sector; 998 int ret; 999 1000 start_sector = ALIGN(NTFS_CLU_TO_B(vol, rl->lcn + rl_off), 1001 gran) >> SECTOR_SHIFT; 1002 end_sector = ALIGN_DOWN(NTFS_CLU_TO_B(vol, 1003 rl->lcn + rl_off + to_discard), 1004 gran) >> SECTOR_SHIFT; 1005 if (start_sector < end_sector) { 1006 ret = blkdev_issue_discard(vol->sb->s_bdev, start_sector, 1007 end_sector - start_sector, 1008 GFP_NOFS); 1009 if (ret) 1010 break; 1011 } 1012 } 1013 1014 total_discarded += to_discard; 1015 ++rl; 1016 rl_off = 0; 1017 } 1018 } 1019 1020 /* We are done. Return the number of actually freed clusters. */ 1021 ntfs_debug("Done."); 1022 return real_freed; 1023 err_out: 1024 if (is_rollback) 1025 return err; 1026 /* If no real clusters were freed, no need to rollback. */ 1027 if (!real_freed) { 1028 up_write(&vol->lcnbmp_lock); 1029 memalloc_nofs_restore(memalloc_flags); 1030 return err; 1031 } 1032 /* 1033 * Attempt to rollback and if that succeeds just return the error code. 1034 * If rollback fails, set the volume errors flag, emit an error 1035 * message, and return the error code. 1036 */ 1037 delta = __ntfs_cluster_free(ni, start_vcn, total_freed, ctx, true); 1038 if (delta < 0) { 1039 ntfs_error(vol->sb, 1040 "Failed to rollback (error %i). Leaving inconsistent metadata! Unmount and run chkdsk.", 1041 (int)delta); 1042 NVolSetErrors(vol); 1043 } 1044 ntfs_dec_free_clusters(vol, delta); 1045 up_write(&vol->lcnbmp_lock); 1046 memalloc_nofs_restore(memalloc_flags); 1047 ntfs_error(vol->sb, "Aborting (error %i).", err); 1048 return err; 1049 } 1050