1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NTFS kernel mft record operations. 4 * Part of this file is based on code from the NTFS-3G. 5 * 6 * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. 7 * Copyright (c) 2002 Richard Russon 8 * Copyright (c) 2025 LG Electronics Co., Ltd. 9 */ 10 11 #include <linux/writeback.h> 12 #include <linux/bio.h> 13 #include <linux/iomap.h> 14 15 #include "bitmap.h" 16 #include "lcnalloc.h" 17 #include "mft.h" 18 #include "ntfs.h" 19 20 /* 21 * ntfs_mft_record_check - Check the consistency of an MFT record 22 * 23 * Make sure its general fields are safe, then examine all its 24 * attributes and apply generic checks to them. 25 * 26 * Returns 0 if the checks are successful. If not, return -EIO. 27 */ 28 int ntfs_mft_record_check(const struct ntfs_volume *vol, struct mft_record *m, 29 u64 mft_no) 30 { 31 struct attr_record *a; 32 struct super_block *sb = vol->sb; 33 34 if (!ntfs_is_file_record(m->magic)) { 35 ntfs_error(sb, "Record %llu has no FILE magic (0x%x)\n", 36 mft_no, le32_to_cpu(*(__le32 *)m)); 37 goto err_out; 38 } 39 40 if (le16_to_cpu(m->usa_ofs) & 0x1 || 41 (vol->mft_record_size >> NTFS_BLOCK_SIZE_BITS) + 1 != le16_to_cpu(m->usa_count) || 42 le16_to_cpu(m->usa_ofs) + le16_to_cpu(m->usa_count) * 2 > vol->mft_record_size) { 43 ntfs_error(sb, "Record %llu has corrupt fix-up values fields\n", 44 mft_no); 45 goto err_out; 46 } 47 48 if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) { 49 ntfs_error(sb, "Record %llu has corrupt allocation size (%u <> %u)\n", 50 mft_no, vol->mft_record_size, 51 le32_to_cpu(m->bytes_allocated)); 52 goto err_out; 53 } 54 55 if (le32_to_cpu(m->bytes_in_use) > vol->mft_record_size) { 56 ntfs_error(sb, "Record %llu has corrupt in-use size (%u > %u)\n", 57 mft_no, le32_to_cpu(m->bytes_in_use), 58 vol->mft_record_size); 59 goto err_out; 60 } 61 62 if (le16_to_cpu(m->attrs_offset) & 7) { 63 ntfs_error(sb, "Attributes badly aligned in record %llu\n", 64 mft_no); 65 goto err_out; 66 } 67 68 a = (struct attr_record *)((char *)m + le16_to_cpu(m->attrs_offset)); 69 if ((char *)a < (char *)m || (char *)a > (char *)m + vol->mft_record_size) { 70 ntfs_error(sb, "Record %llu is corrupt\n", mft_no); 71 goto err_out; 72 } 73 74 return 0; 75 76 err_out: 77 return -EIO; 78 } 79 80 /* 81 * map_mft_record_folio - map the folio in which a specific mft record resides 82 * @ni: ntfs inode whose mft record page to map 83 * 84 * This maps the folio in which the mft record of the ntfs inode @ni is 85 * situated. 86 * 87 * This allocates a new buffer (@ni->mrec), copies the MFT record data from 88 * the mapped folio into this buffer, and applies the MST (Multi Sector 89 * Transfer) fixups on the copy. 90 * 91 * The folio is pinned (referenced) in @ni->folio to ensure the data remains 92 * valid in the page cache, but the returned pointer is the allocated copy. 93 * 94 * Return: A pointer to the allocated and fixed-up mft record (@ni->mrec). 95 * The return value needs to be checked with IS_ERR(). If it is true, 96 * PTR_ERR() contains the negative error code. 97 */ 98 static inline struct mft_record *map_mft_record_folio(struct ntfs_inode *ni) 99 { 100 loff_t i_size; 101 struct ntfs_volume *vol = ni->vol; 102 struct inode *mft_vi = vol->mft_ino; 103 struct folio *folio; 104 unsigned long index, end_index; 105 unsigned int ofs; 106 107 WARN_ON(ni->folio); 108 /* 109 * The index into the page cache and the offset within the page cache 110 * page of the wanted mft record. 111 */ 112 index = NTFS_MFT_NR_TO_PIDX(vol, ni->mft_no); 113 ofs = NTFS_MFT_NR_TO_POFS(vol, ni->mft_no); 114 115 i_size = i_size_read(mft_vi); 116 /* The maximum valid index into the page cache for $MFT's data. */ 117 end_index = i_size >> PAGE_SHIFT; 118 119 /* If the wanted index is out of bounds the mft record doesn't exist. */ 120 if (unlikely(index >= end_index)) { 121 if (index > end_index || (i_size & ~PAGE_MASK) < ofs + 122 vol->mft_record_size) { 123 folio = ERR_PTR(-ENOENT); 124 ntfs_error(vol->sb, 125 "Attempt to read mft record 0x%llx, which is beyond the end of the mft. This is probably a bug in the ntfs driver.", 126 ni->mft_no); 127 goto err_out; 128 } 129 } 130 131 /* Read, map, and pin the folio. */ 132 folio = read_mapping_folio(mft_vi->i_mapping, index, NULL); 133 if (!IS_ERR(folio)) { 134 u8 *addr; 135 136 ni->mrec = kmalloc(vol->mft_record_size, GFP_NOFS); 137 if (!ni->mrec) { 138 folio_put(folio); 139 folio = ERR_PTR(-ENOMEM); 140 goto err_out; 141 } 142 143 addr = kmap_local_folio(folio, 0); 144 memcpy(ni->mrec, addr + ofs, vol->mft_record_size); 145 post_read_mst_fixup((struct ntfs_record *)ni->mrec, vol->mft_record_size); 146 147 /* Catch multi sector transfer fixup errors. */ 148 if (!ntfs_mft_record_check(vol, (struct mft_record *)ni->mrec, ni->mft_no)) { 149 kunmap_local(addr); 150 ni->folio = folio; 151 ni->folio_ofs = ofs; 152 return ni->mrec; 153 } 154 kunmap_local(addr); 155 folio_put(folio); 156 kfree(ni->mrec); 157 ni->mrec = NULL; 158 folio = ERR_PTR(-EIO); 159 NVolSetErrors(vol); 160 } 161 err_out: 162 ni->folio = NULL; 163 ni->folio_ofs = 0; 164 return (struct mft_record *)folio; 165 } 166 167 /* 168 * map_mft_record - map and pin an mft record 169 * @ni: ntfs inode whose MFT record to map 170 * 171 * This function ensures the MFT record for the given inode is mapped and 172 * accessible. 173 * 174 * It increments the reference count of the ntfs inode. If the record is 175 * already mapped (@ni->folio is set), it returns the cached record 176 * immediately. 177 * 178 * Otherwise, it calls map_mft_record_folio() to read the folio from disk 179 * (if necessary via read_mapping_folio), allocate a buffer, and copy the 180 * record data. 181 * 182 * Return: A pointer to the mft record. You need to check the returned 183 * pointer with IS_ERR(). 184 */ 185 struct mft_record *map_mft_record(struct ntfs_inode *ni) 186 { 187 struct mft_record *m; 188 189 if (!ni) 190 return ERR_PTR(-EINVAL); 191 192 ntfs_debug("Entering for mft_no 0x%llx.", ni->mft_no); 193 194 /* Make sure the ntfs inode doesn't go away. */ 195 atomic_inc(&ni->count); 196 197 if (ni->folio) 198 return (struct mft_record *)ni->mrec; 199 200 m = map_mft_record_folio(ni); 201 if (!IS_ERR(m)) 202 return m; 203 204 atomic_dec(&ni->count); 205 ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m)); 206 return m; 207 } 208 209 /* 210 * unmap_mft_record - release a reference to a mapped mft record 211 * @ni: ntfs inode whose MFT record to unmap 212 * 213 * This decrements the reference count of the ntfs inode. 214 * 215 * It releases the caller's hold on the inode. If the reference count indicates 216 * that there are still other users (count > 1), the function returns 217 * immediately, keeping the resources (folio and mrec buffer) pinned for 218 * those users. 219 * 220 * NOTE: If caller has modified the mft record, it is imperative to set the mft 221 * record dirty BEFORE calling unmap_mft_record(). 222 */ 223 void unmap_mft_record(struct ntfs_inode *ni) 224 { 225 struct folio *folio; 226 227 if (!ni) 228 return; 229 230 ntfs_debug("Entering for mft_no 0x%llx.", ni->mft_no); 231 232 folio = ni->folio; 233 if (atomic_dec_return(&ni->count) > 1) 234 return; 235 WARN_ON(!folio); 236 } 237 238 /* 239 * map_extent_mft_record - load an extent inode and attach it to its base 240 * @base_ni: base ntfs inode 241 * @mref: mft reference of the extent inode to load 242 * @ntfs_ino: on successful return, pointer to the struct ntfs_inode structure 243 * 244 * Load the extent mft record @mref and attach it to its base inode @base_ni. 245 * Return the mapped extent mft record if IS_ERR(result) is false. Otherwise 246 * PTR_ERR(result) gives the negative error code. 247 * 248 * On successful return, @ntfs_ino contains a pointer to the ntfs_inode 249 * structure of the mapped extent inode. 250 */ 251 struct mft_record *map_extent_mft_record(struct ntfs_inode *base_ni, u64 mref, 252 struct ntfs_inode **ntfs_ino) 253 { 254 struct mft_record *m; 255 struct ntfs_inode *ni = NULL; 256 struct ntfs_inode **extent_nis = NULL; 257 int i; 258 u64 mft_no = MREF(mref); 259 u16 seq_no = MSEQNO(mref); 260 bool destroy_ni = false; 261 262 ntfs_debug("Mapping extent mft record 0x%llx (base mft record 0x%llx).", 263 mft_no, base_ni->mft_no); 264 /* Make sure the base ntfs inode doesn't go away. */ 265 atomic_inc(&base_ni->count); 266 /* 267 * Check if this extent inode has already been added to the base inode, 268 * in which case just return it. If not found, add it to the base 269 * inode before returning it. 270 */ 271 retry: 272 mutex_lock(&base_ni->extent_lock); 273 if (base_ni->nr_extents > 0) { 274 extent_nis = base_ni->ext.extent_ntfs_inos; 275 for (i = 0; i < base_ni->nr_extents; i++) { 276 if (mft_no != extent_nis[i]->mft_no) 277 continue; 278 ni = extent_nis[i]; 279 /* Make sure the ntfs inode doesn't go away. */ 280 atomic_inc(&ni->count); 281 break; 282 } 283 } 284 if (likely(ni != NULL)) { 285 mutex_unlock(&base_ni->extent_lock); 286 atomic_dec(&base_ni->count); 287 /* We found the record; just have to map and return it. */ 288 m = map_mft_record(ni); 289 /* map_mft_record() has incremented this on success. */ 290 atomic_dec(&ni->count); 291 if (!IS_ERR(m)) { 292 /* Verify the sequence number. */ 293 if (likely(le16_to_cpu(m->sequence_number) == seq_no)) { 294 ntfs_debug("Done 1."); 295 *ntfs_ino = ni; 296 return m; 297 } 298 unmap_mft_record(ni); 299 ntfs_error(base_ni->vol->sb, 300 "Found stale extent mft reference! Corrupt filesystem. Run chkdsk."); 301 return ERR_PTR(-EIO); 302 } 303 map_err_out: 304 ntfs_error(base_ni->vol->sb, 305 "Failed to map extent mft record, error code %ld.", 306 -PTR_ERR(m)); 307 return m; 308 } 309 mutex_unlock(&base_ni->extent_lock); 310 311 /* Record wasn't there. Get a new ntfs inode and initialize it. */ 312 ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no); 313 if (unlikely(!ni)) { 314 atomic_dec(&base_ni->count); 315 return ERR_PTR(-ENOMEM); 316 } 317 ni->vol = base_ni->vol; 318 ni->seq_no = seq_no; 319 ni->nr_extents = -1; 320 ni->ext.base_ntfs_ino = base_ni; 321 /* Now map the record. */ 322 m = map_mft_record(ni); 323 if (IS_ERR(m)) { 324 atomic_dec(&base_ni->count); 325 ntfs_clear_extent_inode(ni); 326 goto map_err_out; 327 } 328 /* Verify the sequence number if it is present. */ 329 if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) { 330 ntfs_error(base_ni->vol->sb, 331 "Found stale extent mft reference! Corrupt filesystem. Run chkdsk."); 332 destroy_ni = true; 333 m = ERR_PTR(-EIO); 334 goto unm_nolock_err_out; 335 } 336 337 mutex_lock(&base_ni->extent_lock); 338 for (i = 0; i < base_ni->nr_extents; i++) { 339 if (mft_no == extent_nis[i]->mft_no) { 340 mutex_unlock(&base_ni->extent_lock); 341 ntfs_clear_extent_inode(ni); 342 goto retry; 343 } 344 } 345 /* Attach extent inode to base inode, reallocating memory if needed. */ 346 if (!(base_ni->nr_extents & 3)) { 347 struct ntfs_inode **tmp; 348 int new_size = (base_ni->nr_extents + 4) * sizeof(struct ntfs_inode *); 349 350 tmp = kvzalloc(new_size, GFP_NOFS); 351 if (unlikely(!tmp)) { 352 ntfs_error(base_ni->vol->sb, "Failed to allocate internal buffer."); 353 destroy_ni = true; 354 m = ERR_PTR(-ENOMEM); 355 goto unm_err_out; 356 } 357 if (base_ni->nr_extents) { 358 WARN_ON(!base_ni->ext.extent_ntfs_inos); 359 memcpy(tmp, base_ni->ext.extent_ntfs_inos, new_size - 360 4 * sizeof(struct ntfs_inode *)); 361 kvfree(base_ni->ext.extent_ntfs_inos); 362 } 363 base_ni->ext.extent_ntfs_inos = tmp; 364 } 365 base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni; 366 mutex_unlock(&base_ni->extent_lock); 367 atomic_dec(&base_ni->count); 368 ntfs_debug("Done 2."); 369 *ntfs_ino = ni; 370 return m; 371 unm_err_out: 372 mutex_unlock(&base_ni->extent_lock); 373 unm_nolock_err_out: 374 unmap_mft_record(ni); 375 atomic_dec(&base_ni->count); 376 /* 377 * If the extent inode was not attached to the base inode we need to 378 * release it or we will leak memory. 379 */ 380 if (destroy_ni) 381 ntfs_clear_extent_inode(ni); 382 return m; 383 } 384 385 /* 386 * __mark_mft_record_dirty - mark the base vfs inode dirty 387 * @ni: ntfs inode describing the mapped mft record 388 * 389 * Internal function. Users should call mark_mft_record_dirty() instead. 390 * 391 * This function determines the base ntfs inode (in case @ni is an extent 392 * inode) and marks the corresponding VFS inode dirty. 393 * 394 * NOTE: We only set I_DIRTY_DATASYNC (and not I_DIRTY_PAGES) 395 * on the base vfs inode, because even though file data may have been modified, 396 * it is dirty in the inode meta data rather than the data page cache of the 397 * inode, and thus there are no data pages that need writing out. Therefore, a 398 * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the 399 * other hand, is not sufficient, because ->write_inode needs to be called even 400 * in case of fdatasync. This needs to happen or the file data would not 401 * necessarily hit the device synchronously, even though the vfs inode has the 402 * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just 403 * I_DIRTY_SYNC, since the file data has not actually hit the block device yet, 404 * which is not what I_DIRTY_SYNC on its own would suggest. 405 */ 406 void __mark_mft_record_dirty(struct ntfs_inode *ni) 407 { 408 struct ntfs_inode *base_ni; 409 410 ntfs_debug("Entering for inode 0x%llx.", ni->mft_no); 411 WARN_ON(NInoAttr(ni)); 412 /* Determine the base vfs inode and mark it dirty, too. */ 413 if (likely(ni->nr_extents >= 0)) 414 base_ni = ni; 415 else 416 base_ni = ni->ext.base_ntfs_ino; 417 __mark_inode_dirty(VFS_I(base_ni), I_DIRTY_DATASYNC); 418 } 419 420 /* 421 * ntfs_bio_end_io - bio completion callback for MFT record writes 422 * 423 * Decrements the folio reference count that was incremented before 424 * submit_bio(). This prevents a race condition where umount could 425 * evict the inode and release the folio while I/O is still in flight, 426 * potentially causing data corruption or use-after-free. 427 */ 428 static void ntfs_bio_end_io(struct bio *bio) 429 { 430 if (bio->bi_private) 431 folio_put((struct folio *)bio->bi_private); 432 bio_put(bio); 433 } 434 435 /* 436 * ntfs_sync_mft_mirror - synchronize an mft record to the mft mirror 437 * @vol: ntfs volume on which the mft record to synchronize resides 438 * @mft_no: mft record number of mft record to synchronize 439 * @m: mapped, mst protected (extent) mft record to synchronize 440 * 441 * Write the mapped, mst protected (extent) mft record @m with mft record 442 * number @mft_no to the mft mirror ($MFTMirr) of the ntfs volume @vol. 443 * 444 * On success return 0. On error return -errno and set the volume errors flag 445 * in the ntfs volume @vol. 446 * 447 * NOTE: We always perform synchronous i/o. 448 */ 449 int ntfs_sync_mft_mirror(struct ntfs_volume *vol, const u64 mft_no, 450 struct mft_record *m) 451 { 452 u8 *kmirr = NULL; 453 struct folio *folio; 454 unsigned int folio_ofs, lcn_folio_off = 0; 455 int err = 0; 456 struct bio *bio; 457 458 ntfs_debug("Entering for inode 0x%llx.", mft_no); 459 460 if (unlikely(!vol->mftmirr_ino)) { 461 /* This could happen during umount... */ 462 err = -EIO; 463 goto err_out; 464 } 465 /* Get the page containing the mirror copy of the mft record @m. */ 466 folio = read_mapping_folio(vol->mftmirr_ino->i_mapping, 467 NTFS_MFT_NR_TO_PIDX(vol, mft_no), NULL); 468 if (IS_ERR(folio)) { 469 ntfs_error(vol->sb, "Failed to map mft mirror page."); 470 err = PTR_ERR(folio); 471 goto err_out; 472 } 473 474 folio_lock(folio); 475 folio_clear_uptodate(folio); 476 /* Offset of the mft mirror record inside the page. */ 477 folio_ofs = NTFS_MFT_NR_TO_POFS(vol, mft_no); 478 /* The address in the page of the mirror copy of the mft record @m. */ 479 kmirr = kmap_local_folio(folio, 0) + folio_ofs; 480 /* Copy the mst protected mft record to the mirror. */ 481 memcpy(kmirr, m, vol->mft_record_size); 482 483 if (vol->cluster_size_bits > PAGE_SHIFT) { 484 lcn_folio_off = folio->index << PAGE_SHIFT; 485 lcn_folio_off &= vol->cluster_size_mask; 486 } 487 488 bio = bio_alloc(vol->sb->s_bdev, 1, REQ_OP_WRITE, GFP_NOIO); 489 bio->bi_iter.bi_sector = 490 NTFS_B_TO_SECTOR(vol, NTFS_CLU_TO_B(vol, vol->mftmirr_lcn) + 491 lcn_folio_off + folio_ofs); 492 493 if (!bio_add_folio(bio, folio, vol->mft_record_size, folio_ofs)) { 494 err = -EIO; 495 bio_put(bio); 496 goto unlock_folio; 497 } 498 499 bio->bi_end_io = ntfs_bio_end_io; 500 submit_bio(bio); 501 /* Current state: all buffers are clean, unlocked, and uptodate. */ 502 folio_mark_uptodate(folio); 503 504 unlock_folio: 505 folio_unlock(folio); 506 kunmap_local(kmirr); 507 folio_put(folio); 508 if (likely(!err)) { 509 ntfs_debug("Done."); 510 } else { 511 ntfs_error(vol->sb, "I/O error while writing mft mirror record 0x%llx!", mft_no); 512 err_out: 513 ntfs_error(vol->sb, 514 "Failed to synchronize $MFTMirr (error code %i). Volume will be left marked dirty on umount. Run chkdsk on the partition after umounting to correct this.", 515 err); 516 NVolSetErrors(vol); 517 } 518 return err; 519 } 520 521 /* 522 * write_mft_record_nolock - write out a mapped (extent) mft record 523 * @ni: ntfs inode describing the mapped (extent) mft record 524 * @m: mapped (extent) mft record to write 525 * @sync: if true, wait for i/o completion 526 * 527 * Write the mapped (extent) mft record @m described by the (regular or extent) 528 * ntfs inode @ni to backing store. If the mft record @m has a counterpart in 529 * the mft mirror, that is also updated. 530 * 531 * We only write the mft record if the ntfs inode @ni is dirty. 532 * 533 * On success, clean the mft record and return 0. 534 * On error (specifically ENOMEM), we redirty the record so it can be retried. 535 * For other errors, we mark the volume with errors. 536 */ 537 int write_mft_record_nolock(struct ntfs_inode *ni, struct mft_record *m, int sync) 538 { 539 struct ntfs_volume *vol = ni->vol; 540 struct folio *folio = ni->folio; 541 int err = 0, i = 0; 542 u8 *kaddr; 543 struct mft_record *fixup_m; 544 struct bio *bio; 545 unsigned int offset = 0, folio_size; 546 547 ntfs_debug("Entering for inode 0x%llx.", ni->mft_no); 548 549 WARN_ON(NInoAttr(ni)); 550 WARN_ON(!folio_test_locked(folio)); 551 552 /* 553 * If the struct ntfs_inode is clean no need to do anything. If it is dirty, 554 * mark it as clean now so that it can be redirtied later on if needed. 555 * There is no danger of races since the caller is holding the locks 556 * for the mft record @m and the page it is in. 557 */ 558 if (!NInoTestClearDirty(ni)) 559 goto done; 560 561 kaddr = kmap_local_folio(folio, 0); 562 fixup_m = (struct mft_record *)(kaddr + ni->folio_ofs); 563 memcpy(fixup_m, m, vol->mft_record_size); 564 565 /* Apply the mst protection fixups. */ 566 err = pre_write_mst_fixup((struct ntfs_record *)fixup_m, vol->mft_record_size); 567 if (err) { 568 ntfs_error(vol->sb, "Failed to apply mst fixups!"); 569 goto err_out; 570 } 571 572 folio_size = vol->mft_record_size / ni->mft_lcn_count; 573 while (i < ni->mft_lcn_count) { 574 unsigned int clu_off; 575 576 clu_off = (unsigned int)((s64)ni->mft_no * vol->mft_record_size + offset) & 577 vol->cluster_size_mask; 578 579 bio = bio_alloc(vol->sb->s_bdev, 1, REQ_OP_WRITE, GFP_NOIO); 580 bio->bi_iter.bi_sector = 581 NTFS_B_TO_SECTOR(vol, NTFS_CLU_TO_B(vol, ni->mft_lcn[i]) + 582 clu_off); 583 584 if (!bio_add_folio(bio, folio, folio_size, 585 ni->folio_ofs + offset)) { 586 err = -EIO; 587 goto put_bio_out; 588 } 589 590 /* Synchronize the mft mirror now if not @sync. */ 591 if (!sync && ni->mft_no < vol->mftmirr_size) 592 ntfs_sync_mft_mirror(vol, ni->mft_no, fixup_m); 593 594 folio_get(folio); 595 bio->bi_private = folio; 596 bio->bi_end_io = ntfs_bio_end_io; 597 submit_bio(bio); 598 offset += vol->cluster_size; 599 i++; 600 } 601 602 /* If @sync, now synchronize the mft mirror. */ 603 if (sync && ni->mft_no < vol->mftmirr_size) 604 ntfs_sync_mft_mirror(vol, ni->mft_no, fixup_m); 605 kunmap_local(kaddr); 606 if (unlikely(err)) { 607 /* I/O error during writing. This is really bad! */ 608 ntfs_error(vol->sb, 609 "I/O error while writing mft record 0x%llx! Marking base inode as bad. You should unmount the volume and run chkdsk.", 610 ni->mft_no); 611 goto err_out; 612 } 613 done: 614 ntfs_debug("Done."); 615 return 0; 616 put_bio_out: 617 bio_put(bio); 618 err_out: 619 /* 620 * Current state: all buffers are clean, unlocked, and uptodate. 621 * The caller should mark the base inode as bad so that no more i/o 622 * happens. ->drop_inode() will still be invoked so all extent inodes 623 * and other allocated memory will be freed. 624 */ 625 if (err == -ENOMEM) { 626 ntfs_error(vol->sb, 627 "Not enough memory to write mft record. Redirtying so the write is retried later."); 628 mark_mft_record_dirty(ni); 629 err = 0; 630 } else 631 NVolSetErrors(vol); 632 return err; 633 } 634 635 static int ntfs_test_inode_wb(struct inode *vi, u64 ino, void *data) 636 { 637 struct ntfs_attr *na = data; 638 639 if (!ntfs_test_inode(vi, na)) 640 return 0; 641 642 /* 643 * Without this, ntfs_write_mst_block() could call iput_final() 644 * , and ntfs_evict_big_inode() could try to unlink this inode 645 * and the contex could be blocked infinitly in map_mft_record(). 646 */ 647 if (NInoBeingDeleted(NTFS_I(vi))) { 648 na->state = NI_BeingDeleted; 649 return -1; 650 } 651 652 /* 653 * This condition can prevent ntfs_write_mst_block() 654 * from applying/undo fixups while ntfs_create() being 655 * called 656 */ 657 spin_lock(&vi->i_lock); 658 if (inode_state_read_once(vi) & I_CREATING) { 659 spin_unlock(&vi->i_lock); 660 na->state = NI_BeingCreated; 661 return -1; 662 } 663 spin_unlock(&vi->i_lock); 664 665 return igrab(vi) ? 1 : -1; 666 } 667 668 /* 669 * ntfs_may_write_mft_record - check if an mft record may be written out 670 * @vol: [IN] ntfs volume on which the mft record to check resides 671 * @mft_no: [IN] mft record number of the mft record to check 672 * @m: [IN] mapped mft record to check 673 * @locked_ni: [OUT] caller has to unlock this ntfs inode if one is returned 674 * @ref_vi: [OUT] caller has to drop this vfs inode if one is returned 675 * 676 * Check if the mapped (base or extent) mft record @m with mft record number 677 * @mft_no belonging to the ntfs volume @vol may be written out. If necessary 678 * and possible the ntfs inode of the mft record is locked and the base vfs 679 * inode is pinned. The locked ntfs inode is then returned in @locked_ni. The 680 * caller is responsible for unlocking the ntfs inode and unpinning the base 681 * vfs inode. 682 * 683 * To avoid deadlock when the caller holds a folio lock, if the function 684 * returns @ref_vi it defers dropping the vfs inode reference by returning 685 * it in @ref_vi instead of calling iput() directly. The caller must call 686 * iput() on @ref_vi after releasing the folio lock. 687 * 688 * Return 'true' if the mft record may be written out and 'false' if not. 689 * 690 * The caller has locked the page and cleared the uptodate flag on it which 691 * means that we can safely write out any dirty mft records that do not have 692 * their inodes in icache as determined by find_inode_nowait(). 693 * 694 * Here is a description of the tests we perform: 695 * 696 * If the inode is found in icache we know the mft record must be a base mft 697 * record. If it is dirty, we do not write it and return 'false' as the vfs 698 * inode write paths will result in the access times being updated which would 699 * cause the base mft record to be redirtied and written out again. 700 * 701 * If the inode is in icache and not dirty, we attempt to lock the mft record 702 * and if we find the lock was already taken, it is not safe to write the mft 703 * record and we return 'false'. 704 * 705 * If we manage to obtain the lock we have exclusive access to the mft record, 706 * which also allows us safe writeout of the mft record. We then set 707 * @locked_ni to the locked ntfs inode and return 'true'. 708 * 709 * Note we cannot just lock the mft record and sleep while waiting for the lock 710 * because this would deadlock due to lock reversal. 711 * 712 * If the inode is not in icache we need to perform further checks. 713 * 714 * If the mft record is not a FILE record or it is a base mft record, we can 715 * safely write it and return 'true'. 716 * 717 * We now know the mft record is an extent mft record. We check if the inode 718 * corresponding to its base mft record is in icache. If it is not, we cannot 719 * safely determine the state of the extent inode, so we return 'false'. 720 * 721 * We now have the base inode for the extent mft record. We check if it has an 722 * ntfs inode for the extent mft record attached. If not, it is safe to write 723 * the extent mft record and we return 'true'. 724 * 725 * If the extent inode is attached, we check if it is dirty. If so, we return 726 * 'false' (letting the standard write_inode path handle it). 727 * 728 * If it is not dirty, we attempt to lock the extent mft record. If the lock 729 * was already taken, it is not safe to write and we return 'false'. 730 * 731 * If we manage to obtain the lock we have exclusive access to the extent mft 732 * record. We set @locked_ni to the now locked ntfs inode and return 'true'. 733 */ 734 static bool ntfs_may_write_mft_record(struct ntfs_volume *vol, const u64 mft_no, 735 const struct mft_record *m, struct ntfs_inode **locked_ni, 736 struct inode **ref_vi) 737 { 738 struct super_block *sb = vol->sb; 739 struct inode *mft_vi = vol->mft_ino; 740 struct inode *vi; 741 struct ntfs_inode *ni, *eni, **extent_nis; 742 int i; 743 struct ntfs_attr na = {0}; 744 745 ntfs_debug("Entering for inode 0x%llx.", mft_no); 746 /* 747 * Normally we do not return a locked inode so set @locked_ni to NULL. 748 */ 749 *locked_ni = NULL; 750 *ref_vi = NULL; 751 752 /* 753 * Check if the inode corresponding to this mft record is in the VFS 754 * inode cache and obtain a reference to it if it is. 755 */ 756 ntfs_debug("Looking for inode 0x%llx in icache.", mft_no); 757 na.mft_no = mft_no; 758 na.type = AT_UNUSED; 759 /* 760 * Optimize inode 0, i.e. $MFT itself, since we have it in memory and 761 * we get here for it rather often. 762 */ 763 if (!mft_no) { 764 /* Balance the below iput(). */ 765 vi = igrab(mft_vi); 766 WARN_ON(vi != mft_vi); 767 } else { 768 /* 769 * Have to use find_inode_nowait() since ilookup5_nowait() 770 * waits for inode with I_FREEING, which causes ntfs to deadlock 771 * when inodes are unlinked concurrently 772 */ 773 vi = find_inode_nowait(sb, mft_no, ntfs_test_inode_wb, &na); 774 if (na.state == NI_BeingDeleted || na.state == NI_BeingCreated) 775 return false; 776 } 777 if (vi) { 778 ntfs_debug("Base inode 0x%llx is in icache.", mft_no); 779 /* The inode is in icache. */ 780 ni = NTFS_I(vi); 781 /* Take a reference to the ntfs inode. */ 782 atomic_inc(&ni->count); 783 /* If the inode is dirty, do not write this record. */ 784 if (NInoDirty(ni)) { 785 ntfs_debug("Inode 0x%llx is dirty, do not write it.", 786 mft_no); 787 atomic_dec(&ni->count); 788 *ref_vi = vi; 789 return false; 790 } 791 ntfs_debug("Inode 0x%llx is not dirty.", mft_no); 792 /* The inode is not dirty, try to take the mft record lock. */ 793 if (unlikely(!mutex_trylock(&ni->mrec_lock))) { 794 ntfs_debug("Mft record 0x%llx is already locked, do not write it.", mft_no); 795 atomic_dec(&ni->count); 796 *ref_vi = vi; 797 return false; 798 } 799 ntfs_debug("Managed to lock mft record 0x%llx, write it.", 800 mft_no); 801 /* 802 * The write has to occur while we hold the mft record lock so 803 * return the locked ntfs inode. 804 */ 805 *locked_ni = ni; 806 return true; 807 } 808 ntfs_debug("Inode 0x%llx is not in icache.", mft_no); 809 /* The inode is not in icache. */ 810 /* Write the record if it is not a mft record (type "FILE"). */ 811 if (!ntfs_is_mft_record(m->magic)) { 812 ntfs_debug("Mft record 0x%llx is not a FILE record, write it.", 813 mft_no); 814 return true; 815 } 816 /* Write the mft record if it is a base inode. */ 817 if (!m->base_mft_record) { 818 ntfs_debug("Mft record 0x%llx is a base record, write it.", 819 mft_no); 820 return true; 821 } 822 /* 823 * This is an extent mft record. Check if the inode corresponding to 824 * its base mft record is in icache and obtain a reference to it if it 825 * is. 826 */ 827 na.mft_no = MREF_LE(m->base_mft_record); 828 na.state = 0; 829 ntfs_debug("Mft record 0x%llx is an extent record. Looking for base inode 0x%llx in icache.", 830 mft_no, na.mft_no); 831 if (!na.mft_no) { 832 /* Balance the below iput(). */ 833 vi = igrab(mft_vi); 834 WARN_ON(vi != mft_vi); 835 } else { 836 vi = find_inode_nowait(sb, mft_no, ntfs_test_inode_wb, &na); 837 if (na.state == NI_BeingDeleted || na.state == NI_BeingCreated) 838 return false; 839 } 840 841 if (!vi) 842 return false; 843 ntfs_debug("Base inode 0x%llx is in icache.", na.mft_no); 844 /* 845 * The base inode is in icache. Check if it has the extent inode 846 * corresponding to this extent mft record attached. 847 */ 848 ni = NTFS_I(vi); 849 mutex_lock(&ni->extent_lock); 850 if (ni->nr_extents <= 0) { 851 /* 852 * The base inode has no attached extent inodes, write this 853 * extent mft record. 854 */ 855 mutex_unlock(&ni->extent_lock); 856 *ref_vi = vi; 857 ntfs_debug("Base inode 0x%llx has no attached extent inodes, write the extent record.", 858 na.mft_no); 859 return true; 860 } 861 /* Iterate over the attached extent inodes. */ 862 extent_nis = ni->ext.extent_ntfs_inos; 863 for (eni = NULL, i = 0; i < ni->nr_extents; ++i) { 864 if (mft_no == extent_nis[i]->mft_no) { 865 /* 866 * Found the extent inode corresponding to this extent 867 * mft record. 868 */ 869 eni = extent_nis[i]; 870 break; 871 } 872 } 873 /* 874 * If the extent inode was not attached to the base inode, write this 875 * extent mft record. 876 */ 877 if (!eni) { 878 mutex_unlock(&ni->extent_lock); 879 *ref_vi = vi; 880 ntfs_debug("Extent inode 0x%llx is not attached to its base inode 0x%llx, write the extent record.", 881 mft_no, na.mft_no); 882 return true; 883 } 884 ntfs_debug("Extent inode 0x%llx is attached to its base inode 0x%llx.", 885 mft_no, na.mft_no); 886 /* Take a reference to the extent ntfs inode. */ 887 atomic_inc(&eni->count); 888 mutex_unlock(&ni->extent_lock); 889 890 /* if extent inode is dirty, write_inode will write it */ 891 if (NInoDirty(eni)) { 892 atomic_dec(&eni->count); 893 *ref_vi = vi; 894 return false; 895 } 896 897 /* 898 * Found the extent inode coresponding to this extent mft record. 899 * Try to take the mft record lock. 900 */ 901 if (unlikely(!mutex_trylock(&eni->mrec_lock))) { 902 atomic_dec(&eni->count); 903 *ref_vi = vi; 904 ntfs_debug("Extent mft record 0x%llx is already locked, do not write it.", 905 mft_no); 906 return false; 907 } 908 ntfs_debug("Managed to lock extent mft record 0x%llx, write it.", 909 mft_no); 910 /* 911 * The write has to occur while we hold the mft record lock so return 912 * the locked extent ntfs inode. 913 */ 914 *locked_ni = eni; 915 return true; 916 } 917 918 static const char *es = " Leaving inconsistent metadata. Unmount and run chkdsk."; 919 920 #define RESERVED_MFT_RECORDS 64 921 922 /* 923 * ntfs_mft_bitmap_find_and_alloc_free_rec_nolock - see name 924 * @vol: volume on which to search for a free mft record 925 * @base_ni: open base inode if allocating an extent mft record or NULL 926 * 927 * Search for a free mft record in the mft bitmap attribute on the ntfs volume 928 * @vol. 929 * 930 * If @base_ni is NULL start the search at the default allocator position. 931 * 932 * If @base_ni is not NULL start the search at the mft record after the base 933 * mft record @base_ni. 934 * 935 * Return the free mft record on success and -errno on error. An error code of 936 * -ENOSPC means that there are no free mft records in the currently 937 * initialized mft bitmap. 938 * 939 * Locking: Caller must hold vol->mftbmp_lock for writing. 940 */ 941 static s64 ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(struct ntfs_volume *vol, 942 struct ntfs_inode *base_ni) 943 { 944 s64 pass_end, ll, data_pos, pass_start, ofs, bit; 945 unsigned long flags; 946 struct address_space *mftbmp_mapping; 947 u8 *buf = NULL, *byte; 948 struct folio *folio; 949 unsigned int folio_ofs, size; 950 u8 pass, b; 951 952 ntfs_debug("Searching for free mft record in the currently initialized mft bitmap."); 953 mftbmp_mapping = vol->mftbmp_ino->i_mapping; 954 /* 955 * Set the end of the pass making sure we do not overflow the mft 956 * bitmap. 957 */ 958 read_lock_irqsave(&NTFS_I(vol->mft_ino)->size_lock, flags); 959 pass_end = NTFS_I(vol->mft_ino)->allocated_size >> 960 vol->mft_record_size_bits; 961 read_unlock_irqrestore(&NTFS_I(vol->mft_ino)->size_lock, flags); 962 read_lock_irqsave(&NTFS_I(vol->mftbmp_ino)->size_lock, flags); 963 ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3; 964 read_unlock_irqrestore(&NTFS_I(vol->mftbmp_ino)->size_lock, flags); 965 if (pass_end > ll) 966 pass_end = ll; 967 pass = 1; 968 if (!base_ni) 969 data_pos = vol->mft_data_pos; 970 else 971 data_pos = base_ni->mft_no + 1; 972 if (data_pos < RESERVED_MFT_RECORDS) 973 data_pos = RESERVED_MFT_RECORDS; 974 if (data_pos >= pass_end) { 975 data_pos = RESERVED_MFT_RECORDS; 976 pass = 2; 977 /* This happens on a freshly formatted volume. */ 978 if (data_pos >= pass_end) 979 return -ENOSPC; 980 } 981 982 if (base_ni && base_ni->mft_no == FILE_MFT) { 983 data_pos = 0; 984 pass = 2; 985 } 986 987 pass_start = data_pos; 988 ntfs_debug("Starting bitmap search: pass %u, pass_start 0x%llx, pass_end 0x%llx, data_pos 0x%llx.", 989 pass, pass_start, pass_end, data_pos); 990 /* Loop until a free mft record is found. */ 991 for (; pass <= 2;) { 992 /* Cap size to pass_end. */ 993 ofs = data_pos >> 3; 994 folio_ofs = ofs & ~PAGE_MASK; 995 size = PAGE_SIZE - folio_ofs; 996 ll = ((pass_end + 7) >> 3) - ofs; 997 if (size > ll) 998 size = ll; 999 size <<= 3; 1000 /* 1001 * If we are still within the active pass, search the next page 1002 * for a zero bit. 1003 */ 1004 if (size) { 1005 folio = read_mapping_folio(mftbmp_mapping, 1006 ofs >> PAGE_SHIFT, NULL); 1007 if (IS_ERR(folio)) { 1008 ntfs_error(vol->sb, "Failed to read mft bitmap, aborting."); 1009 return PTR_ERR(folio); 1010 } 1011 folio_lock(folio); 1012 buf = (u8 *)kmap_local_folio(folio, 0) + folio_ofs; 1013 bit = data_pos & 7; 1014 data_pos &= ~7ull; 1015 ntfs_debug("Before inner for loop: size 0x%x, data_pos 0x%llx, bit 0x%llx", 1016 size, data_pos, bit); 1017 for (; bit < size && data_pos + bit < pass_end; 1018 bit &= ~7ull, bit += 8) { 1019 /* 1020 * If we're extending $MFT and running out of the first 1021 * mft record (base record) then give up searching since 1022 * no guarantee that the found record will be accessible. 1023 */ 1024 if (base_ni && base_ni->mft_no == FILE_MFT && bit > 400) { 1025 folio_unlock(folio); 1026 kunmap_local(buf); 1027 folio_put(folio); 1028 return -ENOSPC; 1029 } 1030 1031 byte = buf + (bit >> 3); 1032 if (*byte == 0xff) 1033 continue; 1034 b = ffz((unsigned long)*byte); 1035 if (b < 8 && b >= (bit & 7)) { 1036 ll = data_pos + (bit & ~7ull) + b; 1037 if (unlikely(ll > (1ll << 32))) { 1038 folio_unlock(folio); 1039 kunmap_local(buf); 1040 folio_put(folio); 1041 return -ENOSPC; 1042 } 1043 *byte |= 1 << b; 1044 folio_mark_dirty(folio); 1045 folio_unlock(folio); 1046 kunmap_local(buf); 1047 folio_put(folio); 1048 ntfs_debug("Done. (Found and allocated mft record 0x%llx.)", 1049 ll); 1050 return ll; 1051 } 1052 } 1053 ntfs_debug("After inner for loop: size 0x%x, data_pos 0x%llx, bit 0x%llx", 1054 size, data_pos, bit); 1055 data_pos += size; 1056 folio_unlock(folio); 1057 kunmap_local(buf); 1058 folio_put(folio); 1059 /* 1060 * If the end of the pass has not been reached yet, 1061 * continue searching the mft bitmap for a zero bit. 1062 */ 1063 if (data_pos < pass_end) 1064 continue; 1065 } 1066 /* Do the next pass. */ 1067 if (++pass == 2) { 1068 /* 1069 * Starting the second pass, in which we scan the first 1070 * part of the zone which we omitted earlier. 1071 */ 1072 pass_end = pass_start; 1073 data_pos = pass_start = RESERVED_MFT_RECORDS; 1074 ntfs_debug("pass %i, pass_start 0x%llx, pass_end 0x%llx.", 1075 pass, pass_start, pass_end); 1076 if (data_pos >= pass_end) 1077 break; 1078 } 1079 } 1080 /* No free mft records in currently initialized mft bitmap. */ 1081 ntfs_debug("Done. (No free mft records left in currently initialized mft bitmap.)"); 1082 return -ENOSPC; 1083 } 1084 1085 static int ntfs_mft_attr_extend(struct ntfs_inode *ni) 1086 { 1087 int ret = 0; 1088 struct ntfs_inode *base_ni; 1089 1090 if (NInoAttr(ni)) 1091 base_ni = ni->ext.base_ntfs_ino; 1092 else 1093 base_ni = ni; 1094 1095 if (!NInoAttrList(base_ni)) { 1096 ret = ntfs_inode_add_attrlist(base_ni); 1097 if (ret) { 1098 pr_err("Can not add attrlist\n"); 1099 goto out; 1100 } else { 1101 ret = -EAGAIN; 1102 goto out; 1103 } 1104 } 1105 1106 ret = ntfs_attr_update_mapping_pairs(ni, 0); 1107 if (ret) 1108 pr_err("MP update failed\n"); 1109 1110 out: 1111 return ret; 1112 } 1113 1114 /* 1115 * ntfs_mft_bitmap_extend_allocation_nolock - extend mft bitmap by a cluster 1116 * @vol: volume on which to extend the mft bitmap attribute 1117 * 1118 * Extend the mft bitmap attribute on the ntfs volume @vol by one cluster. 1119 * 1120 * Note: Only changes allocated_size, i.e. does not touch initialized_size or 1121 * data_size. 1122 * 1123 * Return 0 on success and -errno on error. 1124 * 1125 * Locking: - Caller must hold vol->mftbmp_lock for writing. 1126 * - This function takes NTFS_I(vol->mftbmp_ino)->runlist.lock for 1127 * writing and releases it before returning. 1128 * - This function takes vol->lcnbmp_lock for writing and releases it 1129 * before returning. 1130 */ 1131 static int ntfs_mft_bitmap_extend_allocation_nolock(struct ntfs_volume *vol) 1132 { 1133 s64 lcn; 1134 s64 ll; 1135 unsigned long flags; 1136 struct folio *folio; 1137 struct ntfs_inode *mft_ni, *mftbmp_ni; 1138 struct runlist_element *rl, *rl2 = NULL; 1139 struct ntfs_attr_search_ctx *ctx = NULL; 1140 struct mft_record *mrec; 1141 struct attr_record *a = NULL; 1142 int ret, mp_size; 1143 u32 old_alen = 0; 1144 u8 *b, tb; 1145 struct { 1146 u8 added_cluster:1; 1147 u8 added_run:1; 1148 u8 mp_rebuilt:1; 1149 u8 mp_extended:1; 1150 } status = { 0, 0, 0, 0 }; 1151 size_t new_rl_count; 1152 1153 ntfs_debug("Extending mft bitmap allocation."); 1154 mft_ni = NTFS_I(vol->mft_ino); 1155 mftbmp_ni = NTFS_I(vol->mftbmp_ino); 1156 /* 1157 * Determine the last lcn of the mft bitmap. The allocated size of the 1158 * mft bitmap cannot be zero so we are ok to do this. 1159 */ 1160 down_write(&mftbmp_ni->runlist.lock); 1161 read_lock_irqsave(&mftbmp_ni->size_lock, flags); 1162 ll = mftbmp_ni->allocated_size; 1163 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 1164 rl = ntfs_attr_find_vcn_nolock(mftbmp_ni, 1165 NTFS_B_TO_CLU(vol, ll - 1), NULL); 1166 if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) { 1167 up_write(&mftbmp_ni->runlist.lock); 1168 ntfs_error(vol->sb, 1169 "Failed to determine last allocated cluster of mft bitmap attribute."); 1170 if (!IS_ERR(rl)) 1171 ret = -EIO; 1172 else 1173 ret = PTR_ERR(rl); 1174 return ret; 1175 } 1176 lcn = rl->lcn + rl->length; 1177 ntfs_debug("Last lcn of mft bitmap attribute is 0x%llx.", 1178 (long long)lcn); 1179 /* 1180 * Attempt to get the cluster following the last allocated cluster by 1181 * hand as it may be in the MFT zone so the allocator would not give it 1182 * to us. 1183 */ 1184 ll = lcn >> 3; 1185 folio = read_mapping_folio(vol->lcnbmp_ino->i_mapping, 1186 ll >> PAGE_SHIFT, NULL); 1187 if (IS_ERR(folio)) { 1188 up_write(&mftbmp_ni->runlist.lock); 1189 ntfs_error(vol->sb, "Failed to read from lcn bitmap."); 1190 return PTR_ERR(folio); 1191 } 1192 1193 down_write(&vol->lcnbmp_lock); 1194 folio_lock(folio); 1195 b = (u8 *)kmap_local_folio(folio, 0) + (ll & ~PAGE_MASK); 1196 tb = 1 << (lcn & 7ull); 1197 if (*b != 0xff && !(*b & tb)) { 1198 /* Next cluster is free, allocate it. */ 1199 *b |= tb; 1200 folio_mark_dirty(folio); 1201 folio_unlock(folio); 1202 kunmap_local(b); 1203 folio_put(folio); 1204 up_write(&vol->lcnbmp_lock); 1205 /* Update the mft bitmap runlist. */ 1206 rl->length++; 1207 rl[1].vcn++; 1208 status.added_cluster = 1; 1209 ntfs_debug("Appending one cluster to mft bitmap."); 1210 } else { 1211 folio_unlock(folio); 1212 kunmap_local(b); 1213 folio_put(folio); 1214 up_write(&vol->lcnbmp_lock); 1215 /* Allocate a cluster from the DATA_ZONE. */ 1216 rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE, 1217 true, false, false); 1218 if (IS_ERR(rl2)) { 1219 up_write(&mftbmp_ni->runlist.lock); 1220 ntfs_error(vol->sb, 1221 "Failed to allocate a cluster for the mft bitmap."); 1222 return PTR_ERR(rl2); 1223 } 1224 rl = ntfs_runlists_merge(&mftbmp_ni->runlist, rl2, 0, &new_rl_count); 1225 if (IS_ERR(rl)) { 1226 up_write(&mftbmp_ni->runlist.lock); 1227 ntfs_error(vol->sb, "Failed to merge runlists for mft bitmap."); 1228 if (ntfs_cluster_free_from_rl(vol, rl2)) { 1229 ntfs_error(vol->sb, "Failed to deallocate allocated cluster.%s", 1230 es); 1231 NVolSetErrors(vol); 1232 } 1233 kvfree(rl2); 1234 return PTR_ERR(rl); 1235 } 1236 mftbmp_ni->runlist.rl = rl; 1237 mftbmp_ni->runlist.count = new_rl_count; 1238 status.added_run = 1; 1239 ntfs_debug("Adding one run to mft bitmap."); 1240 /* Find the last run in the new runlist. */ 1241 for (; rl[1].length; rl++) 1242 ; 1243 } 1244 /* 1245 * Update the attribute record as well. Note: @rl is the last 1246 * (non-terminator) runlist element of mft bitmap. 1247 */ 1248 mrec = map_mft_record(mft_ni); 1249 if (IS_ERR(mrec)) { 1250 ntfs_error(vol->sb, "Failed to map mft record."); 1251 ret = PTR_ERR(mrec); 1252 goto undo_alloc; 1253 } 1254 ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 1255 if (unlikely(!ctx)) { 1256 ntfs_error(vol->sb, "Failed to get search context."); 1257 ret = -ENOMEM; 1258 goto undo_alloc; 1259 } 1260 ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 1261 mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL, 1262 0, ctx); 1263 if (unlikely(ret)) { 1264 ntfs_error(vol->sb, 1265 "Failed to find last attribute extent of mft bitmap attribute."); 1266 if (ret == -ENOENT) 1267 ret = -EIO; 1268 goto undo_alloc; 1269 } 1270 a = ctx->attr; 1271 ll = le64_to_cpu(a->data.non_resident.lowest_vcn); 1272 /* Search back for the previous last allocated cluster of mft bitmap. */ 1273 for (rl2 = rl; rl2 > mftbmp_ni->runlist.rl; rl2--) { 1274 if (ll >= rl2->vcn) 1275 break; 1276 } 1277 WARN_ON(ll < rl2->vcn); 1278 WARN_ON(ll >= rl2->vcn + rl2->length); 1279 /* Get the size for the new mapping pairs array for this extent. */ 1280 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1, -1); 1281 if (unlikely(mp_size <= 0)) { 1282 ntfs_error(vol->sb, 1283 "Get size for mapping pairs failed for mft bitmap attribute extent."); 1284 ret = mp_size; 1285 if (!ret) 1286 ret = -EIO; 1287 goto undo_alloc; 1288 } 1289 /* Expand the attribute record if necessary. */ 1290 old_alen = le32_to_cpu(a->length); 1291 ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size + 1292 le16_to_cpu(a->data.non_resident.mapping_pairs_offset)); 1293 if (unlikely(ret)) { 1294 ret = ntfs_mft_attr_extend(mftbmp_ni); 1295 if (!ret) 1296 goto extended_ok; 1297 if (ret != -EAGAIN) 1298 status.mp_extended = 1; 1299 goto undo_alloc; 1300 } 1301 status.mp_rebuilt = 1; 1302 /* Generate the mapping pairs array directly into the attr record. */ 1303 ret = ntfs_mapping_pairs_build(vol, (u8 *)a + 1304 le16_to_cpu(a->data.non_resident.mapping_pairs_offset), 1305 mp_size, rl2, ll, -1, NULL, NULL, NULL); 1306 if (unlikely(ret)) { 1307 ntfs_error(vol->sb, 1308 "Failed to build mapping pairs array for mft bitmap attribute."); 1309 goto undo_alloc; 1310 } 1311 /* Update the highest_vcn. */ 1312 a->data.non_resident.highest_vcn = cpu_to_le64(rl[1].vcn - 1); 1313 /* 1314 * We now have extended the mft bitmap allocated_size by one cluster. 1315 * Reflect this in the struct ntfs_inode structure and the attribute record. 1316 */ 1317 if (a->data.non_resident.lowest_vcn) { 1318 /* 1319 * We are not in the first attribute extent, switch to it, but 1320 * first ensure the changes will make it to disk later. 1321 */ 1322 mark_mft_record_dirty(ctx->ntfs_ino); 1323 extended_ok: 1324 ntfs_attr_reinit_search_ctx(ctx); 1325 ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 1326 mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 1327 0, ctx); 1328 if (unlikely(ret)) { 1329 ntfs_error(vol->sb, 1330 "Failed to find first attribute extent of mft bitmap attribute."); 1331 goto restore_undo_alloc; 1332 } 1333 a = ctx->attr; 1334 } 1335 1336 write_lock_irqsave(&mftbmp_ni->size_lock, flags); 1337 mftbmp_ni->allocated_size += vol->cluster_size; 1338 a->data.non_resident.allocated_size = 1339 cpu_to_le64(mftbmp_ni->allocated_size); 1340 write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 1341 /* Ensure the changes make it to disk. */ 1342 mark_mft_record_dirty(ctx->ntfs_ino); 1343 ntfs_attr_put_search_ctx(ctx); 1344 unmap_mft_record(mft_ni); 1345 up_write(&mftbmp_ni->runlist.lock); 1346 ntfs_debug("Done."); 1347 return 0; 1348 1349 restore_undo_alloc: 1350 ntfs_attr_reinit_search_ctx(ctx); 1351 if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 1352 mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL, 1353 0, ctx)) { 1354 ntfs_error(vol->sb, 1355 "Failed to find last attribute extent of mft bitmap attribute.%s", es); 1356 write_lock_irqsave(&mftbmp_ni->size_lock, flags); 1357 mftbmp_ni->allocated_size += vol->cluster_size; 1358 write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 1359 ntfs_attr_put_search_ctx(ctx); 1360 unmap_mft_record(mft_ni); 1361 up_write(&mftbmp_ni->runlist.lock); 1362 /* 1363 * The only thing that is now wrong is ->allocated_size of the 1364 * base attribute extent which chkdsk should be able to fix. 1365 */ 1366 NVolSetErrors(vol); 1367 return ret; 1368 } 1369 a = ctx->attr; 1370 a->data.non_resident.highest_vcn = cpu_to_le64(rl[1].vcn - 2); 1371 undo_alloc: 1372 if (status.added_cluster) { 1373 /* Truncate the last run in the runlist by one cluster. */ 1374 rl->length--; 1375 rl[1].vcn--; 1376 } else if (status.added_run) { 1377 lcn = rl->lcn; 1378 /* Remove the last run from the runlist. */ 1379 rl->lcn = rl[1].lcn; 1380 rl->length = 0; 1381 mftbmp_ni->runlist.count--; 1382 } 1383 /* Deallocate the cluster. */ 1384 down_write(&vol->lcnbmp_lock); 1385 if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) { 1386 ntfs_error(vol->sb, "Failed to free allocated cluster.%s", es); 1387 NVolSetErrors(vol); 1388 } else 1389 ntfs_inc_free_clusters(vol, 1); 1390 up_write(&vol->lcnbmp_lock); 1391 if (status.mp_rebuilt) { 1392 if (ntfs_mapping_pairs_build(vol, (u8 *)a + le16_to_cpu( 1393 a->data.non_resident.mapping_pairs_offset), 1394 old_alen - le16_to_cpu( 1395 a->data.non_resident.mapping_pairs_offset), 1396 rl2, ll, -1, NULL, NULL, NULL)) { 1397 ntfs_error(vol->sb, "Failed to restore mapping pairs array.%s", es); 1398 NVolSetErrors(vol); 1399 } 1400 if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) { 1401 ntfs_error(vol->sb, "Failed to restore attribute record.%s", es); 1402 NVolSetErrors(vol); 1403 } 1404 mark_mft_record_dirty(ctx->ntfs_ino); 1405 } else if (status.mp_extended && ntfs_attr_update_mapping_pairs(mftbmp_ni, 0)) { 1406 ntfs_error(vol->sb, "Failed to restore mapping pairs.%s", es); 1407 NVolSetErrors(vol); 1408 } 1409 if (ctx) 1410 ntfs_attr_put_search_ctx(ctx); 1411 if (!IS_ERR(mrec)) 1412 unmap_mft_record(mft_ni); 1413 up_write(&mftbmp_ni->runlist.lock); 1414 return ret; 1415 } 1416 1417 /* 1418 * ntfs_mft_bitmap_extend_initialized_nolock - extend mftbmp initialized data 1419 * @vol: volume on which to extend the mft bitmap attribute 1420 * 1421 * Extend the initialized portion of the mft bitmap attribute on the ntfs 1422 * volume @vol by 8 bytes. 1423 * 1424 * Note: Only changes initialized_size and data_size, i.e. requires that 1425 * allocated_size is big enough to fit the new initialized_size. 1426 * 1427 * Return 0 on success and -error on error. 1428 * 1429 * Locking: Caller must hold vol->mftbmp_lock for writing. 1430 */ 1431 static int ntfs_mft_bitmap_extend_initialized_nolock(struct ntfs_volume *vol) 1432 { 1433 s64 old_data_size, old_initialized_size; 1434 unsigned long flags; 1435 struct inode *mftbmp_vi; 1436 struct ntfs_inode *mft_ni, *mftbmp_ni; 1437 struct ntfs_attr_search_ctx *ctx; 1438 struct mft_record *mrec; 1439 struct attr_record *a; 1440 int ret; 1441 1442 ntfs_debug("Extending mft bitmap initialized (and data) size."); 1443 mft_ni = NTFS_I(vol->mft_ino); 1444 mftbmp_vi = vol->mftbmp_ino; 1445 mftbmp_ni = NTFS_I(mftbmp_vi); 1446 /* Get the attribute record. */ 1447 mrec = map_mft_record(mft_ni); 1448 if (IS_ERR(mrec)) { 1449 ntfs_error(vol->sb, "Failed to map mft record."); 1450 return PTR_ERR(mrec); 1451 } 1452 ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 1453 if (unlikely(!ctx)) { 1454 ntfs_error(vol->sb, "Failed to get search context."); 1455 ret = -ENOMEM; 1456 goto unm_err_out; 1457 } 1458 ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 1459 mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx); 1460 if (unlikely(ret)) { 1461 ntfs_error(vol->sb, 1462 "Failed to find first attribute extent of mft bitmap attribute."); 1463 if (ret == -ENOENT) 1464 ret = -EIO; 1465 goto put_err_out; 1466 } 1467 a = ctx->attr; 1468 write_lock_irqsave(&mftbmp_ni->size_lock, flags); 1469 old_data_size = i_size_read(mftbmp_vi); 1470 old_initialized_size = mftbmp_ni->initialized_size; 1471 /* 1472 * We can simply update the initialized_size before filling the space 1473 * with zeroes because the caller is holding the mft bitmap lock for 1474 * writing which ensures that no one else is trying to access the data. 1475 */ 1476 mftbmp_ni->initialized_size += 8; 1477 a->data.non_resident.initialized_size = 1478 cpu_to_le64(mftbmp_ni->initialized_size); 1479 if (mftbmp_ni->initialized_size > old_data_size) { 1480 i_size_write(mftbmp_vi, mftbmp_ni->initialized_size); 1481 a->data.non_resident.data_size = 1482 cpu_to_le64(mftbmp_ni->initialized_size); 1483 } 1484 write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 1485 /* Ensure the changes make it to disk. */ 1486 mark_mft_record_dirty(ctx->ntfs_ino); 1487 ntfs_attr_put_search_ctx(ctx); 1488 unmap_mft_record(mft_ni); 1489 /* Initialize the mft bitmap attribute value with zeroes. */ 1490 ret = ntfs_attr_set(mftbmp_ni, old_initialized_size, 8, 0); 1491 if (likely(!ret)) { 1492 ntfs_debug("Done. (Wrote eight initialized bytes to mft bitmap."); 1493 ntfs_inc_free_mft_records(vol, 8 * 8); 1494 return 0; 1495 } 1496 ntfs_error(vol->sb, "Failed to write to mft bitmap."); 1497 /* Try to recover from the error. */ 1498 mrec = map_mft_record(mft_ni); 1499 if (IS_ERR(mrec)) { 1500 ntfs_error(vol->sb, "Failed to map mft record.%s", es); 1501 NVolSetErrors(vol); 1502 return ret; 1503 } 1504 ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 1505 if (unlikely(!ctx)) { 1506 ntfs_error(vol->sb, "Failed to get search context.%s", es); 1507 NVolSetErrors(vol); 1508 goto unm_err_out; 1509 } 1510 if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 1511 mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx)) { 1512 ntfs_error(vol->sb, 1513 "Failed to find first attribute extent of mft bitmap attribute.%s", es); 1514 NVolSetErrors(vol); 1515 put_err_out: 1516 ntfs_attr_put_search_ctx(ctx); 1517 unm_err_out: 1518 unmap_mft_record(mft_ni); 1519 goto err_out; 1520 } 1521 a = ctx->attr; 1522 write_lock_irqsave(&mftbmp_ni->size_lock, flags); 1523 mftbmp_ni->initialized_size = old_initialized_size; 1524 a->data.non_resident.initialized_size = 1525 cpu_to_le64(old_initialized_size); 1526 if (i_size_read(mftbmp_vi) != old_data_size) { 1527 i_size_write(mftbmp_vi, old_data_size); 1528 a->data.non_resident.data_size = cpu_to_le64(old_data_size); 1529 } 1530 write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 1531 mark_mft_record_dirty(ctx->ntfs_ino); 1532 ntfs_attr_put_search_ctx(ctx); 1533 unmap_mft_record(mft_ni); 1534 #ifdef DEBUG 1535 read_lock_irqsave(&mftbmp_ni->size_lock, flags); 1536 ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 1537 mftbmp_ni->allocated_size, i_size_read(mftbmp_vi), 1538 mftbmp_ni->initialized_size); 1539 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 1540 #endif /* DEBUG */ 1541 err_out: 1542 return ret; 1543 } 1544 1545 /* 1546 * ntfs_mft_data_extend_allocation_nolock - extend mft data attribute 1547 * @vol: volume on which to extend the mft data attribute 1548 * 1549 * Extend the mft data attribute on the ntfs volume @vol by 16 mft records 1550 * worth of clusters or if not enough space for this by one mft record worth 1551 * of clusters. 1552 * 1553 * Note: Only changes allocated_size, i.e. does not touch initialized_size or 1554 * data_size. 1555 * 1556 * Return 0 on success and -errno on error. 1557 * 1558 * Locking: - Caller must hold vol->mftbmp_lock for writing. 1559 * - This function takes NTFS_I(vol->mft_ino)->runlist.lock for 1560 * writing and releases it before returning. 1561 * - This function calls functions which take vol->lcnbmp_lock for 1562 * writing and release it before returning. 1563 */ 1564 static int ntfs_mft_data_extend_allocation_nolock(struct ntfs_volume *vol) 1565 { 1566 s64 lcn; 1567 s64 old_last_vcn; 1568 s64 min_nr, nr, ll; 1569 unsigned long flags; 1570 struct ntfs_inode *mft_ni; 1571 struct runlist_element *rl, *rl2; 1572 struct ntfs_attr_search_ctx *ctx = NULL; 1573 struct mft_record *mrec; 1574 struct attr_record *a = NULL; 1575 int ret, mp_size; 1576 u32 old_alen = 0; 1577 bool mp_rebuilt = false, mp_extended = false; 1578 size_t new_rl_count; 1579 1580 ntfs_debug("Extending mft data allocation."); 1581 mft_ni = NTFS_I(vol->mft_ino); 1582 /* 1583 * Determine the preferred allocation location, i.e. the last lcn of 1584 * the mft data attribute. The allocated size of the mft data 1585 * attribute cannot be zero so we are ok to do this. 1586 */ 1587 down_write(&mft_ni->runlist.lock); 1588 read_lock_irqsave(&mft_ni->size_lock, flags); 1589 ll = mft_ni->allocated_size; 1590 read_unlock_irqrestore(&mft_ni->size_lock, flags); 1591 rl = ntfs_attr_find_vcn_nolock(mft_ni, 1592 NTFS_B_TO_CLU(vol, ll - 1), NULL); 1593 if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) { 1594 up_write(&mft_ni->runlist.lock); 1595 ntfs_error(vol->sb, 1596 "Failed to determine last allocated cluster of mft data attribute."); 1597 if (!IS_ERR(rl)) 1598 ret = -EIO; 1599 else 1600 ret = PTR_ERR(rl); 1601 return ret; 1602 } 1603 lcn = rl->lcn + rl->length; 1604 ntfs_debug("Last lcn of mft data attribute is 0x%llx.", lcn); 1605 /* Minimum allocation is one mft record worth of clusters. */ 1606 min_nr = NTFS_B_TO_CLU(vol, vol->mft_record_size); 1607 if (!min_nr) 1608 min_nr = 1; 1609 /* Want to allocate 16 mft records worth of clusters. */ 1610 nr = vol->mft_record_size << 4 >> vol->cluster_size_bits; 1611 if (!nr) 1612 nr = min_nr; 1613 /* Ensure we do not go above 2^32-1 mft records. */ 1614 read_lock_irqsave(&mft_ni->size_lock, flags); 1615 ll = mft_ni->allocated_size; 1616 read_unlock_irqrestore(&mft_ni->size_lock, flags); 1617 if (unlikely((ll + NTFS_CLU_TO_B(vol, nr)) >> 1618 vol->mft_record_size_bits >= (1ll << 32))) { 1619 nr = min_nr; 1620 if (unlikely((ll + NTFS_CLU_TO_B(vol, nr)) >> 1621 vol->mft_record_size_bits >= (1ll << 32))) { 1622 ntfs_warning(vol->sb, 1623 "Cannot allocate mft record because the maximum number of inodes (2^32) has already been reached."); 1624 up_write(&mft_ni->runlist.lock); 1625 return -ENOSPC; 1626 } 1627 } 1628 ntfs_debug("Trying mft data allocation with %s cluster count %lli.", 1629 nr > min_nr ? "default" : "minimal", (long long)nr); 1630 old_last_vcn = rl[1].vcn; 1631 /* 1632 * We can release the mft_ni runlist lock, Because this function is 1633 * the only one that expends $MFT data attribute and is called with 1634 * mft_ni->mrec_lock. 1635 * This is required for the lock order, vol->lcnbmp_lock => 1636 * mft_ni->runlist.lock. 1637 */ 1638 up_write(&mft_ni->runlist.lock); 1639 1640 do { 1641 rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE, 1642 true, false, false); 1643 if (!IS_ERR(rl2)) 1644 break; 1645 if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) { 1646 ntfs_error(vol->sb, 1647 "Failed to allocate the minimal number of clusters (%lli) for the mft data attribute.", 1648 nr); 1649 return PTR_ERR(rl2); 1650 } 1651 /* 1652 * There is not enough space to do the allocation, but there 1653 * might be enough space to do a minimal allocation so try that 1654 * before failing. 1655 */ 1656 nr = min_nr; 1657 ntfs_debug("Retrying mft data allocation with minimal cluster count %lli.", nr); 1658 } while (1); 1659 1660 down_write(&mft_ni->runlist.lock); 1661 rl = ntfs_runlists_merge(&mft_ni->runlist, rl2, 0, &new_rl_count); 1662 if (IS_ERR(rl)) { 1663 up_write(&mft_ni->runlist.lock); 1664 ntfs_error(vol->sb, "Failed to merge runlists for mft data attribute."); 1665 if (ntfs_cluster_free_from_rl(vol, rl2)) { 1666 ntfs_error(vol->sb, 1667 "Failed to deallocate clusters from the mft data attribute.%s", es); 1668 NVolSetErrors(vol); 1669 } 1670 kvfree(rl2); 1671 return PTR_ERR(rl); 1672 } 1673 mft_ni->runlist.rl = rl; 1674 mft_ni->runlist.count = new_rl_count; 1675 ntfs_debug("Allocated %lli clusters.", (long long)nr); 1676 /* Find the last run in the new runlist. */ 1677 for (; rl[1].length; rl++) 1678 ; 1679 up_write(&mft_ni->runlist.lock); 1680 1681 /* Update the attribute record as well. */ 1682 mrec = map_mft_record(mft_ni); 1683 if (IS_ERR(mrec)) { 1684 ntfs_error(vol->sb, "Failed to map mft record."); 1685 ret = PTR_ERR(mrec); 1686 down_write(&mft_ni->runlist.lock); 1687 goto undo_alloc; 1688 } 1689 ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 1690 if (unlikely(!ctx)) { 1691 ntfs_error(vol->sb, "Failed to get search context."); 1692 ret = -ENOMEM; 1693 goto undo_alloc; 1694 } 1695 ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 1696 CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx); 1697 if (unlikely(ret)) { 1698 ntfs_error(vol->sb, "Failed to find last attribute extent of mft data attribute."); 1699 if (ret == -ENOENT) 1700 ret = -EIO; 1701 goto undo_alloc; 1702 } 1703 a = ctx->attr; 1704 ll = le64_to_cpu(a->data.non_resident.lowest_vcn); 1705 1706 down_write(&mft_ni->runlist.lock); 1707 /* Search back for the previous last allocated cluster of mft bitmap. */ 1708 for (rl2 = rl; rl2 > mft_ni->runlist.rl; rl2--) { 1709 if (ll >= rl2->vcn) 1710 break; 1711 } 1712 WARN_ON(ll < rl2->vcn); 1713 WARN_ON(ll >= rl2->vcn + rl2->length); 1714 /* Get the size for the new mapping pairs array for this extent. */ 1715 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1, -1); 1716 if (unlikely(mp_size <= 0)) { 1717 ntfs_error(vol->sb, 1718 "Get size for mapping pairs failed for mft data attribute extent."); 1719 ret = mp_size; 1720 if (!ret) 1721 ret = -EIO; 1722 up_write(&mft_ni->runlist.lock); 1723 goto undo_alloc; 1724 } 1725 up_write(&mft_ni->runlist.lock); 1726 1727 /* Expand the attribute record if necessary. */ 1728 old_alen = le32_to_cpu(a->length); 1729 ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size + 1730 le16_to_cpu(a->data.non_resident.mapping_pairs_offset)); 1731 if (unlikely(ret)) { 1732 ret = ntfs_mft_attr_extend(mft_ni); 1733 if (!ret) 1734 goto extended_ok; 1735 if (ret != -EAGAIN) 1736 mp_extended = true; 1737 goto undo_alloc; 1738 } 1739 mp_rebuilt = true; 1740 /* Generate the mapping pairs array directly into the attr record. */ 1741 ret = ntfs_mapping_pairs_build(vol, (u8 *)a + 1742 le16_to_cpu(a->data.non_resident.mapping_pairs_offset), 1743 mp_size, rl2, ll, -1, NULL, NULL, NULL); 1744 if (unlikely(ret)) { 1745 ntfs_error(vol->sb, "Failed to build mapping pairs array of mft data attribute."); 1746 goto undo_alloc; 1747 } 1748 /* Update the highest_vcn. */ 1749 a->data.non_resident.highest_vcn = cpu_to_le64(rl[1].vcn - 1); 1750 /* 1751 * We now have extended the mft data allocated_size by nr clusters. 1752 * Reflect this in the struct ntfs_inode structure and the attribute record. 1753 * @rl is the last (non-terminator) runlist element of mft data 1754 * attribute. 1755 */ 1756 if (a->data.non_resident.lowest_vcn) { 1757 /* 1758 * We are not in the first attribute extent, switch to it, but 1759 * first ensure the changes will make it to disk later. 1760 */ 1761 mark_mft_record_dirty(ctx->ntfs_ino); 1762 extended_ok: 1763 ntfs_attr_reinit_search_ctx(ctx); 1764 ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name, 1765 mft_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, 1766 ctx); 1767 if (unlikely(ret)) { 1768 ntfs_error(vol->sb, 1769 "Failed to find first attribute extent of mft data attribute."); 1770 goto restore_undo_alloc; 1771 } 1772 a = ctx->attr; 1773 } 1774 1775 write_lock_irqsave(&mft_ni->size_lock, flags); 1776 mft_ni->allocated_size += NTFS_CLU_TO_B(vol, nr); 1777 a->data.non_resident.allocated_size = 1778 cpu_to_le64(mft_ni->allocated_size); 1779 write_unlock_irqrestore(&mft_ni->size_lock, flags); 1780 /* Ensure the changes make it to disk. */ 1781 mark_mft_record_dirty(ctx->ntfs_ino); 1782 ntfs_attr_put_search_ctx(ctx); 1783 unmap_mft_record(mft_ni); 1784 ntfs_debug("Done."); 1785 return 0; 1786 restore_undo_alloc: 1787 ntfs_attr_reinit_search_ctx(ctx); 1788 if (ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 1789 CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) { 1790 ntfs_error(vol->sb, 1791 "Failed to find last attribute extent of mft data attribute.%s", es); 1792 write_lock_irqsave(&mft_ni->size_lock, flags); 1793 mft_ni->allocated_size += NTFS_CLU_TO_B(vol, nr); 1794 write_unlock_irqrestore(&mft_ni->size_lock, flags); 1795 ntfs_attr_put_search_ctx(ctx); 1796 unmap_mft_record(mft_ni); 1797 up_write(&mft_ni->runlist.lock); 1798 /* 1799 * The only thing that is now wrong is ->allocated_size of the 1800 * base attribute extent which chkdsk should be able to fix. 1801 */ 1802 NVolSetErrors(vol); 1803 return ret; 1804 } 1805 ctx->attr->data.non_resident.highest_vcn = 1806 cpu_to_le64(old_last_vcn - 1); 1807 undo_alloc: 1808 if (ntfs_cluster_free(mft_ni, old_last_vcn, -1, ctx) < 0) { 1809 ntfs_error(vol->sb, "Failed to free clusters from mft data attribute.%s", es); 1810 NVolSetErrors(vol); 1811 } 1812 1813 if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) { 1814 ntfs_error(vol->sb, "Failed to truncate mft data attribute runlist.%s", es); 1815 NVolSetErrors(vol); 1816 } 1817 if (mp_extended && ntfs_attr_update_mapping_pairs(mft_ni, 0)) { 1818 ntfs_error(vol->sb, "Failed to restore mapping pairs.%s", 1819 es); 1820 NVolSetErrors(vol); 1821 } 1822 if (ctx) { 1823 a = ctx->attr; 1824 if (mp_rebuilt && !IS_ERR(ctx->mrec)) { 1825 if (ntfs_mapping_pairs_build(vol, (u8 *)a + le16_to_cpu( 1826 a->data.non_resident.mapping_pairs_offset), 1827 old_alen - le16_to_cpu( 1828 a->data.non_resident.mapping_pairs_offset), 1829 rl2, ll, -1, NULL, NULL, NULL)) { 1830 ntfs_error(vol->sb, "Failed to restore mapping pairs array.%s", es); 1831 NVolSetErrors(vol); 1832 } 1833 if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) { 1834 ntfs_error(vol->sb, "Failed to restore attribute record.%s", es); 1835 NVolSetErrors(vol); 1836 } 1837 mark_mft_record_dirty(ctx->ntfs_ino); 1838 } else if (IS_ERR(ctx->mrec)) { 1839 ntfs_error(vol->sb, "Failed to restore attribute search context.%s", es); 1840 NVolSetErrors(vol); 1841 } 1842 ntfs_attr_put_search_ctx(ctx); 1843 } 1844 if (!IS_ERR(mrec)) 1845 unmap_mft_record(mft_ni); 1846 return ret; 1847 } 1848 1849 /* 1850 * ntfs_mft_record_layout - layout an mft record into a memory buffer 1851 * @vol: volume to which the mft record will belong 1852 * @mft_no: mft reference specifying the mft record number 1853 * @m: destination buffer of size >= @vol->mft_record_size bytes 1854 * 1855 * Layout an empty, unused mft record with the mft record number @mft_no into 1856 * the buffer @m. The volume @vol is needed because the mft record structure 1857 * was modified in NTFS 3.1 so we need to know which volume version this mft 1858 * record will be used on. 1859 * 1860 * Return 0 on success and -errno on error. 1861 */ 1862 static int ntfs_mft_record_layout(const struct ntfs_volume *vol, const s64 mft_no, 1863 struct mft_record *m) 1864 { 1865 struct attr_record *a; 1866 1867 ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no); 1868 if (mft_no >= (1ll << 32)) { 1869 ntfs_error(vol->sb, "Mft record number 0x%llx exceeds maximum of 2^32.", 1870 (long long)mft_no); 1871 return -ERANGE; 1872 } 1873 /* Start by clearing the whole mft record to gives us a clean slate. */ 1874 memset(m, 0, vol->mft_record_size); 1875 /* Aligned to 2-byte boundary. */ 1876 if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver)) 1877 m->usa_ofs = cpu_to_le16((sizeof(struct mft_record_old) + 1) & ~1); 1878 else { 1879 m->usa_ofs = cpu_to_le16((sizeof(struct mft_record) + 1) & ~1); 1880 /* 1881 * Set the NTFS 3.1+ specific fields while we know that the 1882 * volume version is 3.1+. 1883 */ 1884 m->reserved = 0; 1885 m->mft_record_number = cpu_to_le32((u32)mft_no); 1886 } 1887 m->magic = magic_FILE; 1888 if (vol->mft_record_size >= NTFS_BLOCK_SIZE) 1889 m->usa_count = cpu_to_le16(vol->mft_record_size / 1890 NTFS_BLOCK_SIZE + 1); 1891 else { 1892 m->usa_count = cpu_to_le16(1); 1893 ntfs_warning(vol->sb, 1894 "Sector size is bigger than mft record size. Setting usa_count to 1. If chkdsk reports this as corruption"); 1895 } 1896 /* Set the update sequence number to 1. */ 1897 *(__le16 *)((u8 *)m + le16_to_cpu(m->usa_ofs)) = cpu_to_le16(1); 1898 m->lsn = 0; 1899 m->sequence_number = cpu_to_le16(1); 1900 m->link_count = 0; 1901 /* 1902 * Place the attributes straight after the update sequence array, 1903 * aligned to 8-byte boundary. 1904 */ 1905 m->attrs_offset = cpu_to_le16((le16_to_cpu(m->usa_ofs) + 1906 (le16_to_cpu(m->usa_count) << 1) + 7) & ~7); 1907 m->flags = 0; 1908 /* 1909 * Using attrs_offset plus eight bytes (for the termination attribute). 1910 * attrs_offset is already aligned to 8-byte boundary, so no need to 1911 * align again. 1912 */ 1913 m->bytes_in_use = cpu_to_le32(le16_to_cpu(m->attrs_offset) + 8); 1914 m->bytes_allocated = cpu_to_le32(vol->mft_record_size); 1915 m->base_mft_record = 0; 1916 m->next_attr_instance = 0; 1917 /* Add the termination attribute. */ 1918 a = (struct attr_record *)((u8 *)m + le16_to_cpu(m->attrs_offset)); 1919 a->type = AT_END; 1920 a->length = 0; 1921 ntfs_debug("Done."); 1922 return 0; 1923 } 1924 1925 /* 1926 * ntfs_mft_record_format - format an mft record on an ntfs volume 1927 * @vol: volume on which to format the mft record 1928 * @mft_no: mft record number to format 1929 * 1930 * Format the mft record @mft_no in $MFT/$DATA, i.e. lay out an empty, unused 1931 * mft record into the appropriate place of the mft data attribute. This is 1932 * used when extending the mft data attribute. 1933 * 1934 * Return 0 on success and -errno on error. 1935 */ 1936 static int ntfs_mft_record_format(const struct ntfs_volume *vol, const s64 mft_no) 1937 { 1938 loff_t i_size; 1939 struct inode *mft_vi = vol->mft_ino; 1940 struct folio *folio; 1941 struct mft_record *m; 1942 pgoff_t index, end_index; 1943 unsigned int ofs; 1944 int err; 1945 1946 ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no); 1947 /* 1948 * The index into the page cache and the offset within the page cache 1949 * page of the wanted mft record. 1950 */ 1951 index = NTFS_MFT_NR_TO_PIDX(vol, mft_no); 1952 ofs = NTFS_MFT_NR_TO_POFS(vol, mft_no); 1953 /* The maximum valid index into the page cache for $MFT's data. */ 1954 i_size = i_size_read(mft_vi); 1955 end_index = i_size >> PAGE_SHIFT; 1956 if (unlikely(index >= end_index)) { 1957 if (unlikely(index > end_index || 1958 ofs + vol->mft_record_size > (i_size & ~PAGE_MASK))) { 1959 ntfs_error(vol->sb, "Tried to format non-existing mft record 0x%llx.", 1960 (long long)mft_no); 1961 return -ENOENT; 1962 } 1963 } 1964 1965 /* Read, map, and pin the folio containing the mft record. */ 1966 folio = read_mapping_folio(mft_vi->i_mapping, index, NULL); 1967 if (IS_ERR(folio)) { 1968 ntfs_error(vol->sb, "Failed to map page containing mft record to format 0x%llx.", 1969 (long long)mft_no); 1970 return PTR_ERR(folio); 1971 } 1972 folio_lock(folio); 1973 folio_clear_uptodate(folio); 1974 m = (struct mft_record *)((u8 *)kmap_local_folio(folio, 0) + ofs); 1975 err = ntfs_mft_record_layout(vol, mft_no, m); 1976 if (unlikely(err)) { 1977 ntfs_error(vol->sb, "Failed to layout mft record 0x%llx.", 1978 (long long)mft_no); 1979 folio_mark_uptodate(folio); 1980 folio_unlock(folio); 1981 kunmap_local(m); 1982 folio_put(folio); 1983 return err; 1984 } 1985 pre_write_mst_fixup((struct ntfs_record *)m, vol->mft_record_size); 1986 folio_mark_uptodate(folio); 1987 /* 1988 * Make sure the mft record is written out to disk. We could use 1989 * ilookup5() to check if an inode is in icache and so on but this is 1990 * unnecessary as ntfs_writepage() will write the dirty record anyway. 1991 */ 1992 ntfs_mft_mark_dirty(folio); 1993 folio_unlock(folio); 1994 kunmap_local(m); 1995 folio_put(folio); 1996 ntfs_debug("Done."); 1997 return 0; 1998 } 1999 2000 /* 2001 * ntfs_mft_record_alloc - allocate an mft record on an ntfs volume 2002 * @vol: [IN] volume on which to allocate the mft record 2003 * @mode: [IN] mode if want a file or directory, i.e. base inode or 0 2004 * @ni: [OUT] on success, set to the allocated ntfs inode 2005 * @base_ni: [IN] open base inode if allocating an extent mft record or NULL 2006 * @ni_mrec: [OUT] on successful return this is the mapped mft record 2007 * 2008 * Allocate an mft record in $MFT/$DATA of an open ntfs volume @vol. 2009 * 2010 * If @base_ni is NULL make the mft record a base mft record, i.e. a file or 2011 * direvctory inode, and allocate it at the default allocator position. In 2012 * this case @mode is the file mode as given to us by the caller. We in 2013 * particular use @mode to distinguish whether a file or a directory is being 2014 * created (S_IFDIR(mode) and S_IFREG(mode), respectively). 2015 * 2016 * If @base_ni is not NULL make the allocated mft record an extent record, 2017 * allocate it starting at the mft record after the base mft record and attach 2018 * the allocated and opened ntfs inode to the base inode @base_ni. In this 2019 * case @mode must be 0 as it is meaningless for extent inodes. 2020 * 2021 * You need to check the return value with IS_ERR(). If false, the function 2022 * was successful and the return value is the now opened ntfs inode of the 2023 * allocated mft record. *@mrec is then set to the allocated, mapped, pinned, 2024 * and locked mft record. If IS_ERR() is true, the function failed and the 2025 * error code is obtained from PTR_ERR(return value). *@mrec is undefined in 2026 * this case. 2027 * 2028 * Allocation strategy: 2029 * 2030 * To find a free mft record, we scan the mft bitmap for a zero bit. To 2031 * optimize this we start scanning at the place specified by @base_ni or if 2032 * @base_ni is NULL we start where we last stopped and we perform wrap around 2033 * when we reach the end. Note, we do not try to allocate mft records below 2034 * number 64 because numbers 0 to 15 are the defined system files anyway and 16 2035 * to 64 are special in that they are used for storing extension mft records 2036 * for the $DATA attribute of $MFT. This is required to avoid the possibility 2037 * of creating a runlist with a circular dependency which once written to disk 2038 * can never be read in again. Windows will only use records 16 to 24 for 2039 * normal files if the volume is completely out of space. We never use them 2040 * which means that when the volume is really out of space we cannot create any 2041 * more files while Windows can still create up to 8 small files. We can start 2042 * doing this at some later time, it does not matter much for now. 2043 * 2044 * When scanning the mft bitmap, we only search up to the last allocated mft 2045 * record. If there are no free records left in the range 64 to number of 2046 * allocated mft records, then we extend the $MFT/$DATA attribute in order to 2047 * create free mft records. We extend the allocated size of $MFT/$DATA by 16 2048 * records at a time or one cluster, if cluster size is above 16kiB. If there 2049 * is not sufficient space to do this, we try to extend by a single mft record 2050 * or one cluster, if cluster size is above the mft record size. 2051 * 2052 * No matter how many mft records we allocate, we initialize only the first 2053 * allocated mft record, incrementing mft data size and initialized size 2054 * accordingly, open an struct ntfs_inode for it and return it to the caller, unless 2055 * there are less than 64 mft records, in which case we allocate and initialize 2056 * mft records until we reach record 64 which we consider as the first free mft 2057 * record for use by normal files. 2058 * 2059 * If during any stage we overflow the initialized data in the mft bitmap, we 2060 * extend the initialized size (and data size) by 8 bytes, allocating another 2061 * cluster if required. The bitmap data size has to be at least equal to the 2062 * number of mft records in the mft, but it can be bigger, in which case the 2063 * superfluous bits are padded with zeroes. 2064 * 2065 * Thus, when we return successfully (IS_ERR() is false), we will have: 2066 * - initialized / extended the mft bitmap if necessary, 2067 * - initialized / extended the mft data if necessary, 2068 * - set the bit corresponding to the mft record being allocated in the 2069 * mft bitmap, 2070 * - opened an struct ntfs_inode for the allocated mft record, and we will have 2071 * - returned the struct ntfs_inode as well as the allocated mapped, pinned, and 2072 * locked mft record. 2073 * 2074 * On error, the volume will be left in a consistent state and no record will 2075 * be allocated. If rolling back a partial operation fails, we may leave some 2076 * inconsistent metadata in which case we set NVolErrors() so the volume is 2077 * left dirty when unmounted. 2078 * 2079 * Note, this function cannot make use of most of the normal functions, like 2080 * for example for attribute resizing, etc, because when the run list overflows 2081 * the base mft record and an attribute list is used, it is very important that 2082 * the extension mft records used to store the $DATA attribute of $MFT can be 2083 * reached without having to read the information contained inside them, as 2084 * this would make it impossible to find them in the first place after the 2085 * volume is unmounted. $MFT/$BITMAP probably does not need to follow this 2086 * rule because the bitmap is not essential for finding the mft records, but on 2087 * the other hand, handling the bitmap in this special way would make life 2088 * easier because otherwise there might be circular invocations of functions 2089 * when reading the bitmap. 2090 */ 2091 int ntfs_mft_record_alloc(struct ntfs_volume *vol, const int mode, 2092 struct ntfs_inode **ni, struct ntfs_inode *base_ni, 2093 struct mft_record **ni_mrec) 2094 { 2095 s64 ll, bit, old_data_initialized, old_data_size; 2096 unsigned long flags; 2097 struct folio *folio; 2098 struct ntfs_inode *mft_ni, *mftbmp_ni; 2099 struct ntfs_attr_search_ctx *ctx; 2100 struct mft_record *m = NULL; 2101 struct attr_record *a; 2102 pgoff_t index; 2103 unsigned int ofs; 2104 int err; 2105 __le16 seq_no, usn; 2106 bool record_formatted = false; 2107 unsigned int memalloc_flags; 2108 2109 if (base_ni && *ni) 2110 return -EINVAL; 2111 2112 /* @mode and @base_ni are mutually exclusive. */ 2113 if (mode && base_ni) 2114 return -EINVAL; 2115 2116 if (base_ni) 2117 ntfs_debug("Entering (allocating an extent mft record for base mft record 0x%llx).", 2118 (long long)base_ni->mft_no); 2119 else 2120 ntfs_debug("Entering (allocating a base mft record)."); 2121 2122 memalloc_flags = memalloc_nofs_save(); 2123 2124 mft_ni = NTFS_I(vol->mft_ino); 2125 if (!base_ni || base_ni->mft_no != FILE_MFT) 2126 mutex_lock(&mft_ni->mrec_lock); 2127 mftbmp_ni = NTFS_I(vol->mftbmp_ino); 2128 search_free_rec: 2129 if (!base_ni || base_ni->mft_no != FILE_MFT) 2130 down_write(&vol->mftbmp_lock); 2131 bit = ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(vol, base_ni); 2132 if (bit >= 0) { 2133 ntfs_debug("Found and allocated free record (#1), bit 0x%llx.", 2134 (long long)bit); 2135 goto have_alloc_rec; 2136 } 2137 if (bit != -ENOSPC) { 2138 if (!base_ni || base_ni->mft_no != FILE_MFT) { 2139 up_write(&vol->mftbmp_lock); 2140 mutex_unlock(&mft_ni->mrec_lock); 2141 } 2142 memalloc_nofs_restore(memalloc_flags); 2143 return bit; 2144 } 2145 2146 if (base_ni && base_ni->mft_no == FILE_MFT) { 2147 memalloc_nofs_restore(memalloc_flags); 2148 return bit; 2149 } 2150 2151 /* 2152 * No free mft records left. If the mft bitmap already covers more 2153 * than the currently used mft records, the next records are all free, 2154 * so we can simply allocate the first unused mft record. 2155 * Note: We also have to make sure that the mft bitmap at least covers 2156 * the first 24 mft records as they are special and whilst they may not 2157 * be in use, we do not allocate from them. 2158 */ 2159 read_lock_irqsave(&mft_ni->size_lock, flags); 2160 ll = mft_ni->initialized_size >> vol->mft_record_size_bits; 2161 read_unlock_irqrestore(&mft_ni->size_lock, flags); 2162 read_lock_irqsave(&mftbmp_ni->size_lock, flags); 2163 old_data_initialized = mftbmp_ni->initialized_size; 2164 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 2165 if (old_data_initialized << 3 > ll && 2166 old_data_initialized > RESERVED_MFT_RECORDS / 8) { 2167 bit = ll; 2168 if (bit < RESERVED_MFT_RECORDS) 2169 bit = RESERVED_MFT_RECORDS; 2170 if (unlikely(bit >= (1ll << 32))) 2171 goto max_err_out; 2172 ntfs_debug("Found free record (#2), bit 0x%llx.", 2173 (long long)bit); 2174 goto found_free_rec; 2175 } 2176 /* 2177 * The mft bitmap needs to be expanded until it covers the first unused 2178 * mft record that we can allocate. 2179 * Note: The smallest mft record we allocate is mft record 24. 2180 */ 2181 bit = old_data_initialized << 3; 2182 if (unlikely(bit >= (1ll << 32))) 2183 goto max_err_out; 2184 read_lock_irqsave(&mftbmp_ni->size_lock, flags); 2185 old_data_size = mftbmp_ni->allocated_size; 2186 ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 2187 old_data_size, i_size_read(vol->mftbmp_ino), 2188 old_data_initialized); 2189 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 2190 if (old_data_initialized + 8 > old_data_size) { 2191 /* Need to extend bitmap by one more cluster. */ 2192 ntfs_debug("mftbmp: initialized_size + 8 > allocated_size."); 2193 err = ntfs_mft_bitmap_extend_allocation_nolock(vol); 2194 if (err == -EAGAIN) 2195 err = ntfs_mft_bitmap_extend_allocation_nolock(vol); 2196 2197 if (unlikely(err)) { 2198 if (!base_ni || base_ni->mft_no != FILE_MFT) 2199 up_write(&vol->mftbmp_lock); 2200 goto err_out; 2201 } 2202 #ifdef DEBUG 2203 read_lock_irqsave(&mftbmp_ni->size_lock, flags); 2204 ntfs_debug("Status of mftbmp after allocation extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 2205 mftbmp_ni->allocated_size, 2206 i_size_read(vol->mftbmp_ino), 2207 mftbmp_ni->initialized_size); 2208 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 2209 #endif /* DEBUG */ 2210 } 2211 /* 2212 * We now have sufficient allocated space, extend the initialized_size 2213 * as well as the data_size if necessary and fill the new space with 2214 * zeroes. 2215 */ 2216 err = ntfs_mft_bitmap_extend_initialized_nolock(vol); 2217 if (unlikely(err)) { 2218 if (!base_ni || base_ni->mft_no != FILE_MFT) 2219 up_write(&vol->mftbmp_lock); 2220 goto err_out; 2221 } 2222 #ifdef DEBUG 2223 read_lock_irqsave(&mftbmp_ni->size_lock, flags); 2224 ntfs_debug("Status of mftbmp after initialized extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 2225 mftbmp_ni->allocated_size, 2226 i_size_read(vol->mftbmp_ino), 2227 mftbmp_ni->initialized_size); 2228 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 2229 #endif /* DEBUG */ 2230 ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit); 2231 found_free_rec: 2232 /* @bit is the found free mft record, allocate it in the mft bitmap. */ 2233 ntfs_debug("At found_free_rec."); 2234 err = ntfs_bitmap_set_bit(vol->mftbmp_ino, bit); 2235 if (unlikely(err)) { 2236 ntfs_error(vol->sb, "Failed to allocate bit in mft bitmap."); 2237 if (!base_ni || base_ni->mft_no != FILE_MFT) 2238 up_write(&vol->mftbmp_lock); 2239 goto err_out; 2240 } 2241 ntfs_debug("Set bit 0x%llx in mft bitmap.", (long long)bit); 2242 have_alloc_rec: 2243 /* 2244 * The mft bitmap is now uptodate. Deal with mft data attribute now. 2245 * Note, we keep hold of the mft bitmap lock for writing until all 2246 * modifications to the mft data attribute are complete, too, as they 2247 * will impact decisions for mft bitmap and mft record allocation done 2248 * by a parallel allocation and if the lock is not maintained a 2249 * parallel allocation could allocate the same mft record as this one. 2250 */ 2251 ll = (bit + 1) << vol->mft_record_size_bits; 2252 read_lock_irqsave(&mft_ni->size_lock, flags); 2253 old_data_initialized = mft_ni->initialized_size; 2254 read_unlock_irqrestore(&mft_ni->size_lock, flags); 2255 if (ll <= old_data_initialized) { 2256 ntfs_debug("Allocated mft record already initialized."); 2257 goto mft_rec_already_initialized; 2258 } 2259 ntfs_debug("Initializing allocated mft record."); 2260 /* 2261 * The mft record is outside the initialized data. Extend the mft data 2262 * attribute until it covers the allocated record. The loop is only 2263 * actually traversed more than once when a freshly formatted volume is 2264 * first written to so it optimizes away nicely in the common case. 2265 */ 2266 if (!base_ni || base_ni->mft_no != FILE_MFT) { 2267 read_lock_irqsave(&mft_ni->size_lock, flags); 2268 ntfs_debug("Status of mft data before extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 2269 mft_ni->allocated_size, i_size_read(vol->mft_ino), 2270 mft_ni->initialized_size); 2271 while (ll > mft_ni->allocated_size) { 2272 read_unlock_irqrestore(&mft_ni->size_lock, flags); 2273 err = ntfs_mft_data_extend_allocation_nolock(vol); 2274 if (err == -EAGAIN) 2275 err = ntfs_mft_data_extend_allocation_nolock(vol); 2276 2277 if (unlikely(err)) { 2278 ntfs_error(vol->sb, "Failed to extend mft data allocation."); 2279 goto undo_mftbmp_alloc_nolock; 2280 } 2281 read_lock_irqsave(&mft_ni->size_lock, flags); 2282 ntfs_debug("Status of mft data after allocation extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 2283 mft_ni->allocated_size, i_size_read(vol->mft_ino), 2284 mft_ni->initialized_size); 2285 } 2286 read_unlock_irqrestore(&mft_ni->size_lock, flags); 2287 } else if (ll > mft_ni->allocated_size) { 2288 err = -ENOSPC; 2289 goto undo_mftbmp_alloc_nolock; 2290 } 2291 /* 2292 * Extend mft data initialized size (and data size of course) to reach 2293 * the allocated mft record, formatting the mft records allong the way. 2294 * Note: We only modify the struct ntfs_inode structure as that is all that is 2295 * needed by ntfs_mft_record_format(). We will update the attribute 2296 * record itself in one fell swoop later on. 2297 */ 2298 write_lock_irqsave(&mft_ni->size_lock, flags); 2299 old_data_initialized = mft_ni->initialized_size; 2300 old_data_size = vol->mft_ino->i_size; 2301 while (ll > mft_ni->initialized_size) { 2302 s64 new_initialized_size, mft_no; 2303 2304 new_initialized_size = mft_ni->initialized_size + 2305 vol->mft_record_size; 2306 mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits; 2307 if (new_initialized_size > i_size_read(vol->mft_ino)) 2308 i_size_write(vol->mft_ino, new_initialized_size); 2309 write_unlock_irqrestore(&mft_ni->size_lock, flags); 2310 ntfs_debug("Initializing mft record 0x%llx.", 2311 (long long)mft_no); 2312 err = ntfs_mft_record_format(vol, mft_no); 2313 if (unlikely(err)) { 2314 ntfs_error(vol->sb, "Failed to format mft record."); 2315 goto undo_data_init; 2316 } 2317 write_lock_irqsave(&mft_ni->size_lock, flags); 2318 mft_ni->initialized_size = new_initialized_size; 2319 } 2320 write_unlock_irqrestore(&mft_ni->size_lock, flags); 2321 record_formatted = true; 2322 /* Update the mft data attribute record to reflect the new sizes. */ 2323 m = map_mft_record(mft_ni); 2324 if (IS_ERR(m)) { 2325 ntfs_error(vol->sb, "Failed to map mft record."); 2326 err = PTR_ERR(m); 2327 goto undo_data_init; 2328 } 2329 ctx = ntfs_attr_get_search_ctx(mft_ni, m); 2330 if (unlikely(!ctx)) { 2331 ntfs_error(vol->sb, "Failed to get search context."); 2332 err = -ENOMEM; 2333 unmap_mft_record(mft_ni); 2334 goto undo_data_init; 2335 } 2336 err = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 2337 CASE_SENSITIVE, 0, NULL, 0, ctx); 2338 if (unlikely(err)) { 2339 ntfs_error(vol->sb, "Failed to find first attribute extent of mft data attribute."); 2340 ntfs_attr_put_search_ctx(ctx); 2341 unmap_mft_record(mft_ni); 2342 goto undo_data_init; 2343 } 2344 a = ctx->attr; 2345 read_lock_irqsave(&mft_ni->size_lock, flags); 2346 a->data.non_resident.initialized_size = 2347 cpu_to_le64(mft_ni->initialized_size); 2348 a->data.non_resident.data_size = 2349 cpu_to_le64(i_size_read(vol->mft_ino)); 2350 read_unlock_irqrestore(&mft_ni->size_lock, flags); 2351 /* Ensure the changes make it to disk. */ 2352 mark_mft_record_dirty(ctx->ntfs_ino); 2353 ntfs_attr_put_search_ctx(ctx); 2354 unmap_mft_record(mft_ni); 2355 read_lock_irqsave(&mft_ni->size_lock, flags); 2356 ntfs_debug("Status of mft data after mft record initialization: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 2357 mft_ni->allocated_size, i_size_read(vol->mft_ino), 2358 mft_ni->initialized_size); 2359 WARN_ON(i_size_read(vol->mft_ino) > mft_ni->allocated_size); 2360 WARN_ON(mft_ni->initialized_size > i_size_read(vol->mft_ino)); 2361 read_unlock_irqrestore(&mft_ni->size_lock, flags); 2362 mft_rec_already_initialized: 2363 /* 2364 * We can finally drop the mft bitmap lock as the mft data attribute 2365 * has been fully updated. The only disparity left is that the 2366 * allocated mft record still needs to be marked as in use to match the 2367 * set bit in the mft bitmap but this is actually not a problem since 2368 * this mft record is not referenced from anywhere yet and the fact 2369 * that it is allocated in the mft bitmap means that no-one will try to 2370 * allocate it either. 2371 */ 2372 if (!base_ni || base_ni->mft_no != FILE_MFT) 2373 up_write(&vol->mftbmp_lock); 2374 /* 2375 * We now have allocated and initialized the mft record. Calculate the 2376 * index of and the offset within the page cache page the record is in. 2377 */ 2378 index = NTFS_MFT_NR_TO_PIDX(vol, bit); 2379 ofs = NTFS_MFT_NR_TO_POFS(vol, bit); 2380 /* Read, map, and pin the folio containing the mft record. */ 2381 folio = read_mapping_folio(vol->mft_ino->i_mapping, index, NULL); 2382 if (IS_ERR(folio)) { 2383 ntfs_error(vol->sb, "Failed to map page containing allocated mft record 0x%llx.", 2384 bit); 2385 err = PTR_ERR(folio); 2386 goto undo_mftbmp_alloc; 2387 } 2388 folio_lock(folio); 2389 folio_clear_uptodate(folio); 2390 m = (struct mft_record *)((u8 *)kmap_local_folio(folio, 0) + ofs); 2391 /* If we just formatted the mft record no need to do it again. */ 2392 if (!record_formatted) { 2393 /* Sanity check that the mft record is really not in use. */ 2394 if (ntfs_is_file_record(m->magic) && 2395 (m->flags & MFT_RECORD_IN_USE)) { 2396 ntfs_warning(vol->sb, 2397 "Mft record 0x%llx was marked free in mft bitmap but is marked used itself. Unmount and run chkdsk.", 2398 bit); 2399 folio_mark_uptodate(folio); 2400 folio_unlock(folio); 2401 kunmap_local(m); 2402 folio_put(folio); 2403 NVolSetErrors(vol); 2404 goto search_free_rec; 2405 } 2406 /* 2407 * We need to (re-)format the mft record, preserving the 2408 * sequence number if it is not zero as well as the update 2409 * sequence number if it is not zero or -1 (0xffff). This 2410 * means we do not need to care whether or not something went 2411 * wrong with the previous mft record. 2412 */ 2413 seq_no = m->sequence_number; 2414 usn = *(__le16 *)((u8 *)m + le16_to_cpu(m->usa_ofs)); 2415 err = ntfs_mft_record_layout(vol, bit, m); 2416 if (unlikely(err)) { 2417 ntfs_error(vol->sb, "Failed to layout allocated mft record 0x%llx.", 2418 bit); 2419 folio_mark_uptodate(folio); 2420 folio_unlock(folio); 2421 kunmap_local(m); 2422 folio_put(folio); 2423 goto undo_mftbmp_alloc; 2424 } 2425 if (seq_no) 2426 m->sequence_number = seq_no; 2427 if (usn && le16_to_cpu(usn) != 0xffff) 2428 *(__le16 *)((u8 *)m + le16_to_cpu(m->usa_ofs)) = usn; 2429 pre_write_mst_fixup((struct ntfs_record *)m, vol->mft_record_size); 2430 } 2431 /* Set the mft record itself in use. */ 2432 m->flags |= MFT_RECORD_IN_USE; 2433 if (S_ISDIR(mode)) 2434 m->flags |= MFT_RECORD_IS_DIRECTORY; 2435 folio_mark_uptodate(folio); 2436 if (base_ni) { 2437 struct mft_record *m_tmp; 2438 2439 /* 2440 * Setup the base mft record in the extent mft record. This 2441 * completes initialization of the allocated extent mft record 2442 * and we can simply use it with map_extent_mft_record(). 2443 */ 2444 m->base_mft_record = MK_LE_MREF(base_ni->mft_no, 2445 base_ni->seq_no); 2446 /* 2447 * Allocate an extent inode structure for the new mft record, 2448 * attach it to the base inode @base_ni and map, pin, and lock 2449 * its, i.e. the allocated, mft record. 2450 */ 2451 m_tmp = map_extent_mft_record(base_ni, 2452 MK_MREF(bit, le16_to_cpu(m->sequence_number)), 2453 ni); 2454 if (IS_ERR(m_tmp)) { 2455 ntfs_error(vol->sb, "Failed to map allocated extent mft record 0x%llx.", 2456 bit); 2457 err = PTR_ERR(m_tmp); 2458 /* Set the mft record itself not in use. */ 2459 m->flags &= cpu_to_le16( 2460 ~le16_to_cpu(MFT_RECORD_IN_USE)); 2461 /* Make sure the mft record is written out to disk. */ 2462 ntfs_mft_mark_dirty(folio); 2463 folio_unlock(folio); 2464 kunmap_local(m); 2465 folio_put(folio); 2466 goto undo_mftbmp_alloc; 2467 } 2468 2469 /* 2470 * Make sure the allocated mft record is written out to disk. 2471 * No need to set the inode dirty because the caller is going 2472 * to do that anyway after finishing with the new extent mft 2473 * record (e.g. at a minimum a new attribute will be added to 2474 * the mft record. 2475 */ 2476 ntfs_mft_mark_dirty(folio); 2477 folio_unlock(folio); 2478 /* 2479 * Need to unmap the page since map_extent_mft_record() mapped 2480 * it as well so we have it mapped twice at the moment. 2481 */ 2482 kunmap_local(m); 2483 folio_put(folio); 2484 } else { 2485 /* 2486 * Manually map, pin, and lock the mft record as we already 2487 * have its page mapped and it is very easy to do. 2488 */ 2489 (*ni)->seq_no = le16_to_cpu(m->sequence_number); 2490 /* 2491 * Make sure the allocated mft record is written out to disk. 2492 * NOTE: We do not set the ntfs inode dirty because this would 2493 * fail in ntfs_write_inode() because the inode does not have a 2494 * standard information attribute yet. Also, there is no need 2495 * to set the inode dirty because the caller is going to do 2496 * that anyway after finishing with the new mft record (e.g. at 2497 * a minimum some new attributes will be added to the mft 2498 * record. 2499 */ 2500 2501 (*ni)->mrec = kmalloc(vol->mft_record_size, GFP_NOFS); 2502 if (!(*ni)->mrec) { 2503 folio_unlock(folio); 2504 kunmap_local(m); 2505 folio_put(folio); 2506 goto undo_mftbmp_alloc; 2507 } 2508 2509 memcpy((*ni)->mrec, m, vol->mft_record_size); 2510 post_read_mst_fixup((struct ntfs_record *)(*ni)->mrec, vol->mft_record_size); 2511 ntfs_mft_mark_dirty(folio); 2512 folio_unlock(folio); 2513 (*ni)->folio = folio; 2514 (*ni)->folio_ofs = ofs; 2515 atomic_inc(&(*ni)->count); 2516 /* Update the default mft allocation position. */ 2517 vol->mft_data_pos = bit + 1; 2518 } 2519 if (!base_ni || base_ni->mft_no != FILE_MFT) 2520 mutex_unlock(&mft_ni->mrec_lock); 2521 memalloc_nofs_restore(memalloc_flags); 2522 2523 /* 2524 * Return the opened, allocated inode of the allocated mft record as 2525 * well as the mapped, pinned, and locked mft record. 2526 */ 2527 ntfs_debug("Returning opened, allocated %sinode 0x%llx.", 2528 base_ni ? "extent " : "", bit); 2529 (*ni)->mft_no = bit; 2530 if (ni_mrec) 2531 *ni_mrec = (*ni)->mrec; 2532 ntfs_dec_free_mft_records(vol, 1); 2533 return 0; 2534 undo_data_init: 2535 write_lock_irqsave(&mft_ni->size_lock, flags); 2536 mft_ni->initialized_size = old_data_initialized; 2537 i_size_write(vol->mft_ino, old_data_size); 2538 write_unlock_irqrestore(&mft_ni->size_lock, flags); 2539 goto undo_mftbmp_alloc_nolock; 2540 undo_mftbmp_alloc: 2541 if (!base_ni || base_ni->mft_no != FILE_MFT) 2542 down_write(&vol->mftbmp_lock); 2543 undo_mftbmp_alloc_nolock: 2544 if (ntfs_bitmap_clear_bit(vol->mftbmp_ino, bit)) { 2545 ntfs_error(vol->sb, "Failed to clear bit in mft bitmap.%s", es); 2546 NVolSetErrors(vol); 2547 } 2548 if (!base_ni || base_ni->mft_no != FILE_MFT) 2549 up_write(&vol->mftbmp_lock); 2550 err_out: 2551 if (!base_ni || base_ni->mft_no != FILE_MFT) 2552 mutex_unlock(&mft_ni->mrec_lock); 2553 memalloc_nofs_restore(memalloc_flags); 2554 return err; 2555 max_err_out: 2556 ntfs_warning(vol->sb, 2557 "Cannot allocate mft record because the maximum number of inodes (2^32) has already been reached."); 2558 if (!base_ni || base_ni->mft_no != FILE_MFT) { 2559 up_write(&vol->mftbmp_lock); 2560 mutex_unlock(&mft_ni->mrec_lock); 2561 } 2562 memalloc_nofs_restore(memalloc_flags); 2563 return -ENOSPC; 2564 } 2565 2566 /* 2567 * ntfs_mft_record_free - free an mft record on an ntfs volume 2568 * @vol: volume on which to free the mft record 2569 * @ni: open ntfs inode of the mft record to free 2570 * 2571 * Free the mft record of the open inode @ni on the mounted ntfs volume @vol. 2572 * Note that this function calls ntfs_inode_close() internally and hence you 2573 * cannot use the pointer @ni any more after this function returns success. 2574 * 2575 * On success return 0 and on error return -1 with errno set to the error code. 2576 */ 2577 int ntfs_mft_record_free(struct ntfs_volume *vol, struct ntfs_inode *ni) 2578 { 2579 u64 mft_no; 2580 int err; 2581 u16 seq_no; 2582 __le16 old_seq_no; 2583 struct mft_record *ni_mrec; 2584 unsigned int memalloc_flags; 2585 struct ntfs_inode *base_ni; 2586 2587 if (!vol || !ni) 2588 return -EINVAL; 2589 2590 ntfs_debug("Entering for inode 0x%llx.\n", (long long)ni->mft_no); 2591 2592 ni_mrec = map_mft_record(ni); 2593 if (IS_ERR(ni_mrec)) 2594 return -EIO; 2595 2596 /* Cache the mft reference for later. */ 2597 mft_no = ni->mft_no; 2598 2599 /* Mark the mft record as not in use. */ 2600 ni_mrec->flags &= ~MFT_RECORD_IN_USE; 2601 2602 /* Increment the sequence number, skipping zero, if it is not zero. */ 2603 old_seq_no = ni_mrec->sequence_number; 2604 seq_no = le16_to_cpu(old_seq_no); 2605 if (seq_no == 0xffff) 2606 seq_no = 1; 2607 else if (seq_no) 2608 seq_no++; 2609 ni_mrec->sequence_number = cpu_to_le16(seq_no); 2610 2611 down_read(&NTFS_I(vol->mft_ino)->runlist.lock); 2612 err = ntfs_get_block_mft_record(NTFS_I(vol->mft_ino), ni); 2613 up_read(&NTFS_I(vol->mft_ino)->runlist.lock); 2614 if (err) { 2615 unmap_mft_record(ni); 2616 return err; 2617 } 2618 2619 /* 2620 * Set the ntfs inode dirty and write it out. We do not need to worry 2621 * about the base inode here since whatever caused the extent mft 2622 * record to be freed is guaranteed to do it already. 2623 */ 2624 NInoSetDirty(ni); 2625 err = write_mft_record(ni, ni_mrec, 0); 2626 if (err) 2627 goto sync_rollback; 2628 2629 if (likely(ni->nr_extents >= 0)) 2630 base_ni = ni; 2631 else 2632 base_ni = ni->ext.base_ntfs_ino; 2633 2634 /* Clear the bit in the $MFT/$BITMAP corresponding to this record. */ 2635 memalloc_flags = memalloc_nofs_save(); 2636 if (base_ni->mft_no != FILE_MFT) 2637 down_write(&vol->mftbmp_lock); 2638 err = ntfs_bitmap_clear_bit(vol->mftbmp_ino, mft_no); 2639 if (base_ni->mft_no != FILE_MFT) 2640 up_write(&vol->mftbmp_lock); 2641 memalloc_nofs_restore(memalloc_flags); 2642 if (err) 2643 goto bitmap_rollback; 2644 2645 unmap_mft_record(ni); 2646 ntfs_inc_free_mft_records(vol, 1); 2647 return 0; 2648 2649 /* Rollback what we did... */ 2650 bitmap_rollback: 2651 memalloc_flags = memalloc_nofs_save(); 2652 if (base_ni->mft_no != FILE_MFT) 2653 down_write(&vol->mftbmp_lock); 2654 if (ntfs_bitmap_set_bit(vol->mftbmp_ino, mft_no)) 2655 ntfs_error(vol->sb, "ntfs_bitmap_set_bit failed in bitmap_rollback\n"); 2656 if (base_ni->mft_no != FILE_MFT) 2657 up_write(&vol->mftbmp_lock); 2658 memalloc_nofs_restore(memalloc_flags); 2659 sync_rollback: 2660 ntfs_error(vol->sb, 2661 "Eeek! Rollback failed in %s. Leaving inconsistent metadata!\n", __func__); 2662 ni_mrec->flags |= MFT_RECORD_IN_USE; 2663 ni_mrec->sequence_number = old_seq_no; 2664 NInoSetDirty(ni); 2665 write_mft_record(ni, ni_mrec, 0); 2666 unmap_mft_record(ni); 2667 return err; 2668 } 2669 2670 static s64 lcn_from_index(struct ntfs_volume *vol, struct ntfs_inode *ni, 2671 unsigned long index) 2672 { 2673 s64 vcn; 2674 s64 lcn; 2675 2676 vcn = ntfs_pidx_to_cluster(vol, index); 2677 2678 down_read(&ni->runlist.lock); 2679 lcn = ntfs_attr_vcn_to_lcn_nolock(ni, vcn, false); 2680 up_read(&ni->runlist.lock); 2681 2682 return lcn; 2683 } 2684 2685 /* 2686 * ntfs_write_mft_block - Write back a folio containing MFT records 2687 * @folio: The folio to write back (contains one or more MFT records) 2688 * @wbc: Writeback control structure 2689 * 2690 * This function is called as part of the address_space_operations 2691 * .writepages implementation for the $MFT inode (or $MFTMirr). 2692 * It handles writing one folio (normally 4KiB page) worth of MFT records 2693 * to the underlying block device. 2694 * 2695 * Return: 0 on success, or -errno on error. 2696 */ 2697 static int ntfs_write_mft_block(struct folio *folio, struct writeback_control *wbc) 2698 { 2699 struct address_space *mapping = folio->mapping; 2700 struct inode *vi = mapping->host; 2701 struct ntfs_inode *ni = NTFS_I(vi); 2702 struct ntfs_volume *vol = ni->vol; 2703 u8 *kaddr; 2704 struct ntfs_inode **locked_nis __free(kfree) = kmalloc_array(PAGE_SIZE / NTFS_BLOCK_SIZE, 2705 sizeof(struct ntfs_inode *), GFP_NOFS); 2706 int nr_locked_nis = 0, err = 0, mft_ofs, prev_mft_ofs; 2707 struct inode **ref_inos __free(kfree) = kmalloc_array(PAGE_SIZE / NTFS_BLOCK_SIZE, 2708 sizeof(struct inode *), GFP_NOFS); 2709 int nr_ref_inos = 0; 2710 struct bio *bio = NULL; 2711 u64 mft_no; 2712 struct ntfs_inode *tni; 2713 s64 lcn; 2714 s64 vcn = ntfs_pidx_to_cluster(vol, folio->index); 2715 s64 end_vcn = ntfs_bytes_to_cluster(vol, ni->allocated_size); 2716 unsigned int folio_sz; 2717 struct runlist_element *rl; 2718 loff_t i_size = i_size_read(vi); 2719 2720 ntfs_debug("Entering for inode 0x%llx, attribute type 0x%x, folio index 0x%lx.", 2721 ni->mft_no, ni->type, folio->index); 2722 2723 if (!locked_nis || !ref_inos) 2724 return -ENOMEM; 2725 2726 /* We have to zero every time due to mmap-at-end-of-file. */ 2727 if (folio->index >= (i_size >> folio_shift(folio))) 2728 /* The page straddles i_size. */ 2729 folio_zero_segment(folio, 2730 offset_in_folio(folio, i_size), 2731 folio_size(folio)); 2732 2733 lcn = lcn_from_index(vol, ni, folio->index); 2734 if (lcn <= LCN_HOLE) { 2735 folio_start_writeback(folio); 2736 folio_unlock(folio); 2737 folio_end_writeback(folio); 2738 return -EIO; 2739 } 2740 2741 /* Map folio so we can access its contents. */ 2742 kaddr = kmap_local_folio(folio, 0); 2743 /* Clear the page uptodate flag whilst the mst fixups are applied. */ 2744 folio_clear_uptodate(folio); 2745 2746 for (mft_ofs = 0; mft_ofs < PAGE_SIZE && vcn < end_vcn; 2747 mft_ofs += vol->mft_record_size) { 2748 /* Get the mft record number. */ 2749 mft_no = (((s64)folio->index << PAGE_SHIFT) + mft_ofs) >> 2750 vol->mft_record_size_bits; 2751 vcn = ntfs_mft_no_to_cluster(vol, mft_no); 2752 /* Check whether to write this mft record. */ 2753 tni = NULL; 2754 if (ntfs_may_write_mft_record(vol, mft_no, 2755 (struct mft_record *)(kaddr + mft_ofs), 2756 &tni, &ref_inos[nr_ref_inos])) { 2757 unsigned int mft_record_off = 0; 2758 s64 vcn_off = vcn; 2759 2760 /* 2761 * Skip $MFT extent mft records and let them being written 2762 * by writeback to avioid deadlocks. the $MFT runlist 2763 * lock must be taken before $MFT extent mrec_lock is taken. 2764 */ 2765 if (tni && tni->nr_extents < 0 && 2766 tni->ext.base_ntfs_ino == NTFS_I(vol->mft_ino)) { 2767 mutex_unlock(&tni->mrec_lock); 2768 atomic_dec(&tni->count); 2769 iput(vol->mft_ino); 2770 continue; 2771 } 2772 2773 /* 2774 * The record should be written. If a locked ntfs 2775 * inode was returned, add it to the array of locked 2776 * ntfs inodes. 2777 */ 2778 if (tni) 2779 locked_nis[nr_locked_nis++] = tni; 2780 else if (ref_inos[nr_ref_inos]) 2781 nr_ref_inos++; 2782 2783 if (bio && (mft_ofs != prev_mft_ofs + vol->mft_record_size)) { 2784 flush_bio: 2785 bio->bi_end_io = ntfs_bio_end_io; 2786 submit_bio(bio); 2787 bio = NULL; 2788 } 2789 2790 if (vol->cluster_size < folio_size(folio)) { 2791 down_write(&ni->runlist.lock); 2792 rl = ntfs_attr_vcn_to_rl(ni, vcn_off, &lcn); 2793 up_write(&ni->runlist.lock); 2794 if (IS_ERR(rl) || lcn < 0) { 2795 err = -EIO; 2796 goto unm_done; 2797 } 2798 2799 if (bio && 2800 (bio_end_sector(bio) >> (vol->cluster_size_bits - 9)) != 2801 lcn) { 2802 bio->bi_end_io = ntfs_bio_end_io; 2803 submit_bio(bio); 2804 bio = NULL; 2805 } 2806 } 2807 2808 if (!bio) { 2809 unsigned int off; 2810 2811 off = ((mft_no << vol->mft_record_size_bits) + 2812 mft_record_off) & vol->cluster_size_mask; 2813 2814 bio = bio_alloc(vol->sb->s_bdev, 1, REQ_OP_WRITE, 2815 GFP_NOIO); 2816 bio->bi_iter.bi_sector = 2817 ntfs_bytes_to_sector(vol, 2818 ntfs_cluster_to_bytes(vol, lcn) + off); 2819 } 2820 2821 if (vol->cluster_size == NTFS_BLOCK_SIZE && 2822 (mft_record_off || 2823 rl->length - (vcn_off - rl->vcn) == 1 || 2824 mft_ofs + NTFS_BLOCK_SIZE >= PAGE_SIZE)) 2825 folio_sz = NTFS_BLOCK_SIZE; 2826 else 2827 folio_sz = vol->mft_record_size; 2828 if (!bio_add_folio(bio, folio, folio_sz, 2829 mft_ofs + mft_record_off)) { 2830 err = -EIO; 2831 bio_put(bio); 2832 goto unm_done; 2833 } 2834 mft_record_off += folio_sz; 2835 2836 if (mft_record_off != vol->mft_record_size) { 2837 vcn_off++; 2838 goto flush_bio; 2839 } 2840 prev_mft_ofs = mft_ofs; 2841 2842 if (mft_no < vol->mftmirr_size) 2843 ntfs_sync_mft_mirror(vol, mft_no, 2844 (struct mft_record *)(kaddr + mft_ofs)); 2845 } else if (ref_inos[nr_ref_inos]) 2846 nr_ref_inos++; 2847 } 2848 2849 if (bio) { 2850 bio->bi_end_io = ntfs_bio_end_io; 2851 submit_bio(bio); 2852 } 2853 unm_done: 2854 folio_mark_uptodate(folio); 2855 kunmap_local(kaddr); 2856 2857 folio_start_writeback(folio); 2858 folio_unlock(folio); 2859 folio_end_writeback(folio); 2860 2861 /* Unlock any locked inodes. */ 2862 while (nr_locked_nis-- > 0) { 2863 struct ntfs_inode *base_tni; 2864 2865 tni = locked_nis[nr_locked_nis]; 2866 mutex_unlock(&tni->mrec_lock); 2867 2868 /* Get the base inode. */ 2869 mutex_lock(&tni->extent_lock); 2870 if (tni->nr_extents >= 0) 2871 base_tni = tni; 2872 else 2873 base_tni = tni->ext.base_ntfs_ino; 2874 mutex_unlock(&tni->extent_lock); 2875 ntfs_debug("Unlocking %s inode 0x%llx.", 2876 tni == base_tni ? "base" : "extent", 2877 tni->mft_no); 2878 atomic_dec(&tni->count); 2879 iput(VFS_I(base_tni)); 2880 } 2881 2882 /* Dropping deferred references */ 2883 while (nr_ref_inos-- > 0) { 2884 if (ref_inos[nr_ref_inos]) 2885 iput(ref_inos[nr_ref_inos]); 2886 } 2887 2888 if (unlikely(err && err != -ENOMEM)) 2889 NVolSetErrors(vol); 2890 if (likely(!err)) 2891 ntfs_debug("Done."); 2892 return err; 2893 } 2894 2895 /* 2896 * ntfs_mft_writepages - Write back dirty folios for the $MFT inode 2897 * @mapping: address space of the $MFT inode 2898 * @wbc: writeback control 2899 * 2900 * Writeback iterator for MFT records. Iterates over dirty folios and 2901 * delegates actual writing to ntfs_write_mft_block() for each folio. 2902 * Called from the address_space_operations .writepages vector of the 2903 * $MFT inode. 2904 * 2905 * Returns 0 on success, or the first error encountered. 2906 */ 2907 int ntfs_mft_writepages(struct address_space *mapping, 2908 struct writeback_control *wbc) 2909 { 2910 struct folio *folio = NULL; 2911 int error; 2912 2913 if (NVolShutdown(NTFS_I(mapping->host)->vol)) 2914 return -EIO; 2915 2916 while ((folio = writeback_iter(mapping, wbc, folio, &error))) 2917 error = ntfs_write_mft_block(folio, wbc); 2918 return error; 2919 } 2920 2921 void ntfs_mft_mark_dirty(struct folio *folio) 2922 { 2923 iomap_dirty_folio(folio->mapping, folio); 2924 } 2925