11e9ea7e0SNamjae Jeon // SPDX-License-Identifier: GPL-2.0-or-later 21e9ea7e0SNamjae Jeon /* 3115380f9SNamjae Jeon * NTFS kernel mft record operations. 4115380f9SNamjae Jeon * Part of this file is based on code from the NTFS-3G. 51e9ea7e0SNamjae Jeon * 61e9ea7e0SNamjae Jeon * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. 71e9ea7e0SNamjae Jeon * Copyright (c) 2002 Richard Russon 8115380f9SNamjae Jeon * Copyright (c) 2025 LG Electronics Co., Ltd. 91e9ea7e0SNamjae Jeon */ 101e9ea7e0SNamjae Jeon 11115380f9SNamjae Jeon #include <linux/writeback.h> 121e9ea7e0SNamjae Jeon #include <linux/bio.h> 13115380f9SNamjae Jeon #include <linux/iomap.h> 141e9ea7e0SNamjae Jeon 151e9ea7e0SNamjae Jeon #include "bitmap.h" 161e9ea7e0SNamjae Jeon #include "lcnalloc.h" 171e9ea7e0SNamjae Jeon #include "mft.h" 181e9ea7e0SNamjae Jeon #include "ntfs.h" 191e9ea7e0SNamjae Jeon 20115380f9SNamjae Jeon /* 21115380f9SNamjae Jeon * ntfs_mft_record_check - Check the consistency of an MFT record 22115380f9SNamjae Jeon * 23115380f9SNamjae Jeon * Make sure its general fields are safe, then examine all its 24115380f9SNamjae Jeon * attributes and apply generic checks to them. 25115380f9SNamjae Jeon * 26115380f9SNamjae Jeon * Returns 0 if the checks are successful. If not, return -EIO. 27115380f9SNamjae Jeon */ 28115380f9SNamjae Jeon int ntfs_mft_record_check(const struct ntfs_volume *vol, struct mft_record *m, 29d9038d99SNamjae Jeon u64 mft_no) 30115380f9SNamjae Jeon { 31115380f9SNamjae Jeon struct attr_record *a; 32115380f9SNamjae Jeon struct super_block *sb = vol->sb; 331e9ea7e0SNamjae Jeon 34115380f9SNamjae Jeon if (!ntfs_is_file_record(m->magic)) { 35115380f9SNamjae Jeon ntfs_error(sb, "Record %llu has no FILE magic (0x%x)\n", 36d9038d99SNamjae Jeon mft_no, le32_to_cpu(*(__le32 *)m)); 37115380f9SNamjae Jeon goto err_out; 38115380f9SNamjae Jeon } 39115380f9SNamjae Jeon 40115380f9SNamjae Jeon if (le16_to_cpu(m->usa_ofs) & 0x1 || 41115380f9SNamjae Jeon (vol->mft_record_size >> NTFS_BLOCK_SIZE_BITS) + 1 != le16_to_cpu(m->usa_count) || 42115380f9SNamjae Jeon le16_to_cpu(m->usa_ofs) + le16_to_cpu(m->usa_count) * 2 > vol->mft_record_size) { 43115380f9SNamjae Jeon ntfs_error(sb, "Record %llu has corrupt fix-up values fields\n", 44d9038d99SNamjae Jeon mft_no); 45115380f9SNamjae Jeon goto err_out; 46115380f9SNamjae Jeon } 47115380f9SNamjae Jeon 48115380f9SNamjae Jeon if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) { 49115380f9SNamjae Jeon ntfs_error(sb, "Record %llu has corrupt allocation size (%u <> %u)\n", 50d9038d99SNamjae Jeon mft_no, vol->mft_record_size, 51115380f9SNamjae Jeon le32_to_cpu(m->bytes_allocated)); 52115380f9SNamjae Jeon goto err_out; 53115380f9SNamjae Jeon } 54115380f9SNamjae Jeon 55115380f9SNamjae Jeon if (le32_to_cpu(m->bytes_in_use) > vol->mft_record_size) { 56115380f9SNamjae Jeon ntfs_error(sb, "Record %llu has corrupt in-use size (%u > %u)\n", 57d9038d99SNamjae Jeon mft_no, le32_to_cpu(m->bytes_in_use), 58115380f9SNamjae Jeon vol->mft_record_size); 59115380f9SNamjae Jeon goto err_out; 60115380f9SNamjae Jeon } 61115380f9SNamjae Jeon 62115380f9SNamjae Jeon if (le16_to_cpu(m->attrs_offset) & 7) { 63115380f9SNamjae Jeon ntfs_error(sb, "Attributes badly aligned in record %llu\n", 64d9038d99SNamjae Jeon mft_no); 65115380f9SNamjae Jeon goto err_out; 66115380f9SNamjae Jeon } 67115380f9SNamjae Jeon 68115380f9SNamjae Jeon a = (struct attr_record *)((char *)m + le16_to_cpu(m->attrs_offset)); 69115380f9SNamjae Jeon if ((char *)a < (char *)m || (char *)a > (char *)m + vol->mft_record_size) { 70d9038d99SNamjae Jeon ntfs_error(sb, "Record %llu is corrupt\n", mft_no); 71115380f9SNamjae Jeon goto err_out; 72115380f9SNamjae Jeon } 73115380f9SNamjae Jeon 74115380f9SNamjae Jeon return 0; 75115380f9SNamjae Jeon 76115380f9SNamjae Jeon err_out: 77115380f9SNamjae Jeon return -EIO; 78115380f9SNamjae Jeon } 79115380f9SNamjae Jeon 80115380f9SNamjae Jeon /* 81115380f9SNamjae Jeon * map_mft_record_folio - map the folio in which a specific mft record resides 821e9ea7e0SNamjae Jeon * @ni: ntfs inode whose mft record page to map 831e9ea7e0SNamjae Jeon * 84115380f9SNamjae Jeon * This maps the folio in which the mft record of the ntfs inode @ni is 85115380f9SNamjae Jeon * situated. 861e9ea7e0SNamjae Jeon * 87115380f9SNamjae Jeon * This allocates a new buffer (@ni->mrec), copies the MFT record data from 88115380f9SNamjae Jeon * the mapped folio into this buffer, and applies the MST (Multi Sector 89115380f9SNamjae Jeon * Transfer) fixups on the copy. 90115380f9SNamjae Jeon * 91115380f9SNamjae Jeon * The folio is pinned (referenced) in @ni->folio to ensure the data remains 92115380f9SNamjae Jeon * valid in the page cache, but the returned pointer is the allocated copy. 93115380f9SNamjae Jeon * 94115380f9SNamjae Jeon * Return: A pointer to the allocated and fixed-up mft record (@ni->mrec). 95115380f9SNamjae Jeon * The return value needs to be checked with IS_ERR(). If it is true, 96115380f9SNamjae Jeon * PTR_ERR() contains the negative error code. 971e9ea7e0SNamjae Jeon */ 98115380f9SNamjae Jeon static inline struct mft_record *map_mft_record_folio(struct ntfs_inode *ni) 991e9ea7e0SNamjae Jeon { 1001e9ea7e0SNamjae Jeon loff_t i_size; 101115380f9SNamjae Jeon struct ntfs_volume *vol = ni->vol; 1021e9ea7e0SNamjae Jeon struct inode *mft_vi = vol->mft_ino; 103115380f9SNamjae Jeon struct folio *folio; 1041e9ea7e0SNamjae Jeon unsigned long index, end_index; 105115380f9SNamjae Jeon unsigned int ofs; 1061e9ea7e0SNamjae Jeon 107115380f9SNamjae Jeon WARN_ON(ni->folio); 1081e9ea7e0SNamjae Jeon /* 1091e9ea7e0SNamjae Jeon * The index into the page cache and the offset within the page cache 110115380f9SNamjae Jeon * page of the wanted mft record. 1111e9ea7e0SNamjae Jeon */ 112115380f9SNamjae Jeon index = NTFS_MFT_NR_TO_PIDX(vol, ni->mft_no); 113115380f9SNamjae Jeon ofs = NTFS_MFT_NR_TO_POFS(vol, ni->mft_no); 1141e9ea7e0SNamjae Jeon 1151e9ea7e0SNamjae Jeon i_size = i_size_read(mft_vi); 1161e9ea7e0SNamjae Jeon /* The maximum valid index into the page cache for $MFT's data. */ 1171e9ea7e0SNamjae Jeon end_index = i_size >> PAGE_SHIFT; 1181e9ea7e0SNamjae Jeon 1191e9ea7e0SNamjae Jeon /* If the wanted index is out of bounds the mft record doesn't exist. */ 1201e9ea7e0SNamjae Jeon if (unlikely(index >= end_index)) { 1211e9ea7e0SNamjae Jeon if (index > end_index || (i_size & ~PAGE_MASK) < ofs + 1221e9ea7e0SNamjae Jeon vol->mft_record_size) { 123115380f9SNamjae Jeon folio = ERR_PTR(-ENOENT); 124115380f9SNamjae Jeon ntfs_error(vol->sb, 125d9038d99SNamjae Jeon "Attempt to read mft record 0x%llx, which is beyond the end of the mft. This is probably a bug in the ntfs driver.", 126115380f9SNamjae Jeon ni->mft_no); 1271e9ea7e0SNamjae Jeon goto err_out; 1281e9ea7e0SNamjae Jeon } 1291e9ea7e0SNamjae Jeon } 130115380f9SNamjae Jeon 131115380f9SNamjae Jeon /* Read, map, and pin the folio. */ 132115380f9SNamjae Jeon folio = read_mapping_folio(mft_vi->i_mapping, index, NULL); 133115380f9SNamjae Jeon if (!IS_ERR(folio)) { 134115380f9SNamjae Jeon u8 *addr; 135115380f9SNamjae Jeon 136115380f9SNamjae Jeon ni->mrec = kmalloc(vol->mft_record_size, GFP_NOFS); 137115380f9SNamjae Jeon if (!ni->mrec) { 138115380f9SNamjae Jeon folio_put(folio); 139115380f9SNamjae Jeon folio = ERR_PTR(-ENOMEM); 140115380f9SNamjae Jeon goto err_out; 1411e9ea7e0SNamjae Jeon } 142115380f9SNamjae Jeon 143115380f9SNamjae Jeon addr = kmap_local_folio(folio, 0); 144115380f9SNamjae Jeon memcpy(ni->mrec, addr + ofs, vol->mft_record_size); 145115380f9SNamjae Jeon post_read_mst_fixup((struct ntfs_record *)ni->mrec, vol->mft_record_size); 146115380f9SNamjae Jeon 147115380f9SNamjae Jeon /* Catch multi sector transfer fixup errors. */ 148115380f9SNamjae Jeon if (!ntfs_mft_record_check(vol, (struct mft_record *)ni->mrec, ni->mft_no)) { 149115380f9SNamjae Jeon kunmap_local(addr); 150115380f9SNamjae Jeon ni->folio = folio; 151115380f9SNamjae Jeon ni->folio_ofs = ofs; 152115380f9SNamjae Jeon return ni->mrec; 153115380f9SNamjae Jeon } 154115380f9SNamjae Jeon kunmap_local(addr); 155115380f9SNamjae Jeon folio_put(folio); 156115380f9SNamjae Jeon kfree(ni->mrec); 157115380f9SNamjae Jeon ni->mrec = NULL; 158115380f9SNamjae Jeon folio = ERR_PTR(-EIO); 1591e9ea7e0SNamjae Jeon NVolSetErrors(vol); 1601e9ea7e0SNamjae Jeon } 1611e9ea7e0SNamjae Jeon err_out: 162115380f9SNamjae Jeon ni->folio = NULL; 163115380f9SNamjae Jeon ni->folio_ofs = 0; 164115380f9SNamjae Jeon return (struct mft_record *)folio; 1651e9ea7e0SNamjae Jeon } 1661e9ea7e0SNamjae Jeon 167115380f9SNamjae Jeon /* 168115380f9SNamjae Jeon * map_mft_record - map and pin an mft record 1691e9ea7e0SNamjae Jeon * @ni: ntfs inode whose MFT record to map 1701e9ea7e0SNamjae Jeon * 171115380f9SNamjae Jeon * This function ensures the MFT record for the given inode is mapped and 172115380f9SNamjae Jeon * accessible. 1731e9ea7e0SNamjae Jeon * 174115380f9SNamjae Jeon * It increments the reference count of the ntfs inode. If the record is 175115380f9SNamjae Jeon * already mapped (@ni->folio is set), it returns the cached record 176115380f9SNamjae Jeon * immediately. 1771e9ea7e0SNamjae Jeon * 178115380f9SNamjae Jeon * Otherwise, it calls map_mft_record_folio() to read the folio from disk 179115380f9SNamjae Jeon * (if necessary via read_mapping_folio), allocate a buffer, and copy the 180115380f9SNamjae Jeon * record data. 1811e9ea7e0SNamjae Jeon * 182115380f9SNamjae Jeon * Return: A pointer to the mft record. You need to check the returned 183115380f9SNamjae Jeon * pointer with IS_ERR(). 1841e9ea7e0SNamjae Jeon */ 185115380f9SNamjae Jeon struct mft_record *map_mft_record(struct ntfs_inode *ni) 1861e9ea7e0SNamjae Jeon { 187115380f9SNamjae Jeon struct mft_record *m; 188115380f9SNamjae Jeon 189115380f9SNamjae Jeon if (!ni) 190115380f9SNamjae Jeon return ERR_PTR(-EINVAL); 1911e9ea7e0SNamjae Jeon 192d9038d99SNamjae Jeon ntfs_debug("Entering for mft_no 0x%llx.", ni->mft_no); 1931e9ea7e0SNamjae Jeon 1941e9ea7e0SNamjae Jeon /* Make sure the ntfs inode doesn't go away. */ 1951e9ea7e0SNamjae Jeon atomic_inc(&ni->count); 1961e9ea7e0SNamjae Jeon 197115380f9SNamjae Jeon if (ni->folio) 198115380f9SNamjae Jeon return (struct mft_record *)ni->mrec; 1991e9ea7e0SNamjae Jeon 200115380f9SNamjae Jeon m = map_mft_record_folio(ni); 2011e9ea7e0SNamjae Jeon if (!IS_ERR(m)) 2021e9ea7e0SNamjae Jeon return m; 2031e9ea7e0SNamjae Jeon 2041e9ea7e0SNamjae Jeon atomic_dec(&ni->count); 2051e9ea7e0SNamjae Jeon ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m)); 2061e9ea7e0SNamjae Jeon return m; 2071e9ea7e0SNamjae Jeon } 2081e9ea7e0SNamjae Jeon 209115380f9SNamjae Jeon /* 210115380f9SNamjae Jeon * unmap_mft_record - release a reference to a mapped mft record 2111e9ea7e0SNamjae Jeon * @ni: ntfs inode whose MFT record to unmap 2121e9ea7e0SNamjae Jeon * 213115380f9SNamjae Jeon * This decrements the reference count of the ntfs inode. 214115380f9SNamjae Jeon * 215115380f9SNamjae Jeon * It releases the caller's hold on the inode. If the reference count indicates 216115380f9SNamjae Jeon * that there are still other users (count > 1), the function returns 217115380f9SNamjae Jeon * immediately, keeping the resources (folio and mrec buffer) pinned for 218115380f9SNamjae Jeon * those users. 2191e9ea7e0SNamjae Jeon * 2201e9ea7e0SNamjae Jeon * NOTE: If caller has modified the mft record, it is imperative to set the mft 2211e9ea7e0SNamjae Jeon * record dirty BEFORE calling unmap_mft_record(). 2221e9ea7e0SNamjae Jeon */ 223115380f9SNamjae Jeon void unmap_mft_record(struct ntfs_inode *ni) 2241e9ea7e0SNamjae Jeon { 225115380f9SNamjae Jeon struct folio *folio; 2261e9ea7e0SNamjae Jeon 227115380f9SNamjae Jeon if (!ni) 228115380f9SNamjae Jeon return; 2291e9ea7e0SNamjae Jeon 230d9038d99SNamjae Jeon ntfs_debug("Entering for mft_no 0x%llx.", ni->mft_no); 2311e9ea7e0SNamjae Jeon 232115380f9SNamjae Jeon folio = ni->folio; 233115380f9SNamjae Jeon if (atomic_dec_return(&ni->count) > 1) 2341e9ea7e0SNamjae Jeon return; 235115380f9SNamjae Jeon WARN_ON(!folio); 2361e9ea7e0SNamjae Jeon } 2371e9ea7e0SNamjae Jeon 238115380f9SNamjae Jeon /* 2391e9ea7e0SNamjae Jeon * map_extent_mft_record - load an extent inode and attach it to its base 2401e9ea7e0SNamjae Jeon * @base_ni: base ntfs inode 2411e9ea7e0SNamjae Jeon * @mref: mft reference of the extent inode to load 242115380f9SNamjae Jeon * @ntfs_ino: on successful return, pointer to the struct ntfs_inode structure 2431e9ea7e0SNamjae Jeon * 2441e9ea7e0SNamjae Jeon * Load the extent mft record @mref and attach it to its base inode @base_ni. 2451e9ea7e0SNamjae Jeon * Return the mapped extent mft record if IS_ERR(result) is false. Otherwise 2461e9ea7e0SNamjae Jeon * PTR_ERR(result) gives the negative error code. 2471e9ea7e0SNamjae Jeon * 2481e9ea7e0SNamjae Jeon * On successful return, @ntfs_ino contains a pointer to the ntfs_inode 2491e9ea7e0SNamjae Jeon * structure of the mapped extent inode. 2501e9ea7e0SNamjae Jeon */ 251115380f9SNamjae Jeon struct mft_record *map_extent_mft_record(struct ntfs_inode *base_ni, u64 mref, 252115380f9SNamjae Jeon struct ntfs_inode **ntfs_ino) 2531e9ea7e0SNamjae Jeon { 254115380f9SNamjae Jeon struct mft_record *m; 255115380f9SNamjae Jeon struct ntfs_inode *ni = NULL; 256115380f9SNamjae Jeon struct ntfs_inode **extent_nis = NULL; 2571e9ea7e0SNamjae Jeon int i; 258d9038d99SNamjae Jeon u64 mft_no = MREF(mref); 2591e9ea7e0SNamjae Jeon u16 seq_no = MSEQNO(mref); 2601e9ea7e0SNamjae Jeon bool destroy_ni = false; 2611e9ea7e0SNamjae Jeon 262d9038d99SNamjae Jeon ntfs_debug("Mapping extent mft record 0x%llx (base mft record 0x%llx).", 2631e9ea7e0SNamjae Jeon mft_no, base_ni->mft_no); 2641e9ea7e0SNamjae Jeon /* Make sure the base ntfs inode doesn't go away. */ 2651e9ea7e0SNamjae Jeon atomic_inc(&base_ni->count); 2661e9ea7e0SNamjae Jeon /* 2671e9ea7e0SNamjae Jeon * Check if this extent inode has already been added to the base inode, 2681e9ea7e0SNamjae Jeon * in which case just return it. If not found, add it to the base 2691e9ea7e0SNamjae Jeon * inode before returning it. 2701e9ea7e0SNamjae Jeon */ 271115380f9SNamjae Jeon retry: 2721e9ea7e0SNamjae Jeon mutex_lock(&base_ni->extent_lock); 2731e9ea7e0SNamjae Jeon if (base_ni->nr_extents > 0) { 2741e9ea7e0SNamjae Jeon extent_nis = base_ni->ext.extent_ntfs_inos; 2751e9ea7e0SNamjae Jeon for (i = 0; i < base_ni->nr_extents; i++) { 2761e9ea7e0SNamjae Jeon if (mft_no != extent_nis[i]->mft_no) 2771e9ea7e0SNamjae Jeon continue; 2781e9ea7e0SNamjae Jeon ni = extent_nis[i]; 2791e9ea7e0SNamjae Jeon /* Make sure the ntfs inode doesn't go away. */ 2801e9ea7e0SNamjae Jeon atomic_inc(&ni->count); 2811e9ea7e0SNamjae Jeon break; 2821e9ea7e0SNamjae Jeon } 2831e9ea7e0SNamjae Jeon } 2841e9ea7e0SNamjae Jeon if (likely(ni != NULL)) { 2851e9ea7e0SNamjae Jeon mutex_unlock(&base_ni->extent_lock); 2861e9ea7e0SNamjae Jeon atomic_dec(&base_ni->count); 2871e9ea7e0SNamjae Jeon /* We found the record; just have to map and return it. */ 2881e9ea7e0SNamjae Jeon m = map_mft_record(ni); 2891e9ea7e0SNamjae Jeon /* map_mft_record() has incremented this on success. */ 2901e9ea7e0SNamjae Jeon atomic_dec(&ni->count); 2911e9ea7e0SNamjae Jeon if (!IS_ERR(m)) { 2921e9ea7e0SNamjae Jeon /* Verify the sequence number. */ 2931e9ea7e0SNamjae Jeon if (likely(le16_to_cpu(m->sequence_number) == seq_no)) { 2941e9ea7e0SNamjae Jeon ntfs_debug("Done 1."); 2951e9ea7e0SNamjae Jeon *ntfs_ino = ni; 2961e9ea7e0SNamjae Jeon return m; 2971e9ea7e0SNamjae Jeon } 2981e9ea7e0SNamjae Jeon unmap_mft_record(ni); 299115380f9SNamjae Jeon ntfs_error(base_ni->vol->sb, 300115380f9SNamjae Jeon "Found stale extent mft reference! Corrupt filesystem. Run chkdsk."); 3011e9ea7e0SNamjae Jeon return ERR_PTR(-EIO); 3021e9ea7e0SNamjae Jeon } 3031e9ea7e0SNamjae Jeon map_err_out: 304115380f9SNamjae Jeon ntfs_error(base_ni->vol->sb, 305115380f9SNamjae Jeon "Failed to map extent mft record, error code %ld.", 306115380f9SNamjae Jeon -PTR_ERR(m)); 3071e9ea7e0SNamjae Jeon return m; 3081e9ea7e0SNamjae Jeon } 309115380f9SNamjae Jeon mutex_unlock(&base_ni->extent_lock); 310115380f9SNamjae Jeon 3111e9ea7e0SNamjae Jeon /* Record wasn't there. Get a new ntfs inode and initialize it. */ 3121e9ea7e0SNamjae Jeon ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no); 3131e9ea7e0SNamjae Jeon if (unlikely(!ni)) { 3141e9ea7e0SNamjae Jeon atomic_dec(&base_ni->count); 3151e9ea7e0SNamjae Jeon return ERR_PTR(-ENOMEM); 3161e9ea7e0SNamjae Jeon } 3171e9ea7e0SNamjae Jeon ni->vol = base_ni->vol; 3181e9ea7e0SNamjae Jeon ni->seq_no = seq_no; 3191e9ea7e0SNamjae Jeon ni->nr_extents = -1; 3201e9ea7e0SNamjae Jeon ni->ext.base_ntfs_ino = base_ni; 3211e9ea7e0SNamjae Jeon /* Now map the record. */ 3221e9ea7e0SNamjae Jeon m = map_mft_record(ni); 3231e9ea7e0SNamjae Jeon if (IS_ERR(m)) { 3241e9ea7e0SNamjae Jeon atomic_dec(&base_ni->count); 3251e9ea7e0SNamjae Jeon ntfs_clear_extent_inode(ni); 3261e9ea7e0SNamjae Jeon goto map_err_out; 3271e9ea7e0SNamjae Jeon } 3281e9ea7e0SNamjae Jeon /* Verify the sequence number if it is present. */ 3291e9ea7e0SNamjae Jeon if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) { 330115380f9SNamjae Jeon ntfs_error(base_ni->vol->sb, 331115380f9SNamjae Jeon "Found stale extent mft reference! Corrupt filesystem. Run chkdsk."); 3321e9ea7e0SNamjae Jeon destroy_ni = true; 3331e9ea7e0SNamjae Jeon m = ERR_PTR(-EIO); 334115380f9SNamjae Jeon goto unm_nolock_err_out; 335115380f9SNamjae Jeon } 336115380f9SNamjae Jeon 337115380f9SNamjae Jeon mutex_lock(&base_ni->extent_lock); 338115380f9SNamjae Jeon for (i = 0; i < base_ni->nr_extents; i++) { 339115380f9SNamjae Jeon if (mft_no == extent_nis[i]->mft_no) { 340115380f9SNamjae Jeon mutex_unlock(&base_ni->extent_lock); 341115380f9SNamjae Jeon ntfs_clear_extent_inode(ni); 342115380f9SNamjae Jeon goto retry; 343115380f9SNamjae Jeon } 3441e9ea7e0SNamjae Jeon } 3451e9ea7e0SNamjae Jeon /* Attach extent inode to base inode, reallocating memory if needed. */ 3461e9ea7e0SNamjae Jeon if (!(base_ni->nr_extents & 3)) { 347115380f9SNamjae Jeon struct ntfs_inode **tmp; 348115380f9SNamjae Jeon int new_size = (base_ni->nr_extents + 4) * sizeof(struct ntfs_inode *); 3491e9ea7e0SNamjae Jeon 350115380f9SNamjae Jeon tmp = kvzalloc(new_size, GFP_NOFS); 3511e9ea7e0SNamjae Jeon if (unlikely(!tmp)) { 352115380f9SNamjae Jeon ntfs_error(base_ni->vol->sb, "Failed to allocate internal buffer."); 3531e9ea7e0SNamjae Jeon destroy_ni = true; 3541e9ea7e0SNamjae Jeon m = ERR_PTR(-ENOMEM); 3551e9ea7e0SNamjae Jeon goto unm_err_out; 3561e9ea7e0SNamjae Jeon } 3571e9ea7e0SNamjae Jeon if (base_ni->nr_extents) { 358115380f9SNamjae Jeon WARN_ON(!base_ni->ext.extent_ntfs_inos); 3591e9ea7e0SNamjae Jeon memcpy(tmp, base_ni->ext.extent_ntfs_inos, new_size - 360115380f9SNamjae Jeon 4 * sizeof(struct ntfs_inode *)); 361115380f9SNamjae Jeon kvfree(base_ni->ext.extent_ntfs_inos); 3621e9ea7e0SNamjae Jeon } 3631e9ea7e0SNamjae Jeon base_ni->ext.extent_ntfs_inos = tmp; 3641e9ea7e0SNamjae Jeon } 3651e9ea7e0SNamjae Jeon base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni; 3661e9ea7e0SNamjae Jeon mutex_unlock(&base_ni->extent_lock); 3671e9ea7e0SNamjae Jeon atomic_dec(&base_ni->count); 3681e9ea7e0SNamjae Jeon ntfs_debug("Done 2."); 3691e9ea7e0SNamjae Jeon *ntfs_ino = ni; 3701e9ea7e0SNamjae Jeon return m; 3711e9ea7e0SNamjae Jeon unm_err_out: 3721e9ea7e0SNamjae Jeon mutex_unlock(&base_ni->extent_lock); 373115380f9SNamjae Jeon unm_nolock_err_out: 374115380f9SNamjae Jeon unmap_mft_record(ni); 3751e9ea7e0SNamjae Jeon atomic_dec(&base_ni->count); 3761e9ea7e0SNamjae Jeon /* 3771e9ea7e0SNamjae Jeon * If the extent inode was not attached to the base inode we need to 3781e9ea7e0SNamjae Jeon * release it or we will leak memory. 3791e9ea7e0SNamjae Jeon */ 3801e9ea7e0SNamjae Jeon if (destroy_ni) 3811e9ea7e0SNamjae Jeon ntfs_clear_extent_inode(ni); 3821e9ea7e0SNamjae Jeon return m; 3831e9ea7e0SNamjae Jeon } 3841e9ea7e0SNamjae Jeon 385115380f9SNamjae Jeon /* 386115380f9SNamjae Jeon * __mark_mft_record_dirty - mark the base vfs inode dirty 3871e9ea7e0SNamjae Jeon * @ni: ntfs inode describing the mapped mft record 3881e9ea7e0SNamjae Jeon * 3891e9ea7e0SNamjae Jeon * Internal function. Users should call mark_mft_record_dirty() instead. 3901e9ea7e0SNamjae Jeon * 391115380f9SNamjae Jeon * This function determines the base ntfs inode (in case @ni is an extent 392115380f9SNamjae Jeon * inode) and marks the corresponding VFS inode dirty. 3931e9ea7e0SNamjae Jeon * 3941e9ea7e0SNamjae Jeon * NOTE: We only set I_DIRTY_DATASYNC (and not I_DIRTY_PAGES) 3951e9ea7e0SNamjae Jeon * on the base vfs inode, because even though file data may have been modified, 3961e9ea7e0SNamjae Jeon * it is dirty in the inode meta data rather than the data page cache of the 3971e9ea7e0SNamjae Jeon * inode, and thus there are no data pages that need writing out. Therefore, a 3981e9ea7e0SNamjae Jeon * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the 3991e9ea7e0SNamjae Jeon * other hand, is not sufficient, because ->write_inode needs to be called even 4001e9ea7e0SNamjae Jeon * in case of fdatasync. This needs to happen or the file data would not 4011e9ea7e0SNamjae Jeon * necessarily hit the device synchronously, even though the vfs inode has the 4021e9ea7e0SNamjae Jeon * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just 4031e9ea7e0SNamjae Jeon * I_DIRTY_SYNC, since the file data has not actually hit the block device yet, 4041e9ea7e0SNamjae Jeon * which is not what I_DIRTY_SYNC on its own would suggest. 4051e9ea7e0SNamjae Jeon */ 406115380f9SNamjae Jeon void __mark_mft_record_dirty(struct ntfs_inode *ni) 4071e9ea7e0SNamjae Jeon { 408115380f9SNamjae Jeon struct ntfs_inode *base_ni; 4091e9ea7e0SNamjae Jeon 410d9038d99SNamjae Jeon ntfs_debug("Entering for inode 0x%llx.", ni->mft_no); 411115380f9SNamjae Jeon WARN_ON(NInoAttr(ni)); 4121e9ea7e0SNamjae Jeon /* Determine the base vfs inode and mark it dirty, too. */ 4131e9ea7e0SNamjae Jeon if (likely(ni->nr_extents >= 0)) 4141e9ea7e0SNamjae Jeon base_ni = ni; 4151e9ea7e0SNamjae Jeon else 4161e9ea7e0SNamjae Jeon base_ni = ni->ext.base_ntfs_ino; 4171e9ea7e0SNamjae Jeon __mark_inode_dirty(VFS_I(base_ni), I_DIRTY_DATASYNC); 4181e9ea7e0SNamjae Jeon } 4191e9ea7e0SNamjae Jeon 420115380f9SNamjae Jeon /* 421115380f9SNamjae Jeon * ntfs_bio_end_io - bio completion callback for MFT record writes 4221e9ea7e0SNamjae Jeon * 423115380f9SNamjae Jeon * Decrements the folio reference count that was incremented before 424115380f9SNamjae Jeon * submit_bio(). This prevents a race condition where umount could 425115380f9SNamjae Jeon * evict the inode and release the folio while I/O is still in flight, 426115380f9SNamjae Jeon * potentially causing data corruption or use-after-free. 4271e9ea7e0SNamjae Jeon */ 428115380f9SNamjae Jeon static void ntfs_bio_end_io(struct bio *bio) 4291e9ea7e0SNamjae Jeon { 430115380f9SNamjae Jeon if (bio->bi_private) 431115380f9SNamjae Jeon folio_put((struct folio *)bio->bi_private); 432115380f9SNamjae Jeon bio_put(bio); 4331e9ea7e0SNamjae Jeon } 4341e9ea7e0SNamjae Jeon 435115380f9SNamjae Jeon /* 4361e9ea7e0SNamjae Jeon * ntfs_sync_mft_mirror - synchronize an mft record to the mft mirror 4371e9ea7e0SNamjae Jeon * @vol: ntfs volume on which the mft record to synchronize resides 4381e9ea7e0SNamjae Jeon * @mft_no: mft record number of mft record to synchronize 4391e9ea7e0SNamjae Jeon * @m: mapped, mst protected (extent) mft record to synchronize 4401e9ea7e0SNamjae Jeon * 4411e9ea7e0SNamjae Jeon * Write the mapped, mst protected (extent) mft record @m with mft record 4421e9ea7e0SNamjae Jeon * number @mft_no to the mft mirror ($MFTMirr) of the ntfs volume @vol. 4431e9ea7e0SNamjae Jeon * 4441e9ea7e0SNamjae Jeon * On success return 0. On error return -errno and set the volume errors flag 4451e9ea7e0SNamjae Jeon * in the ntfs volume @vol. 4461e9ea7e0SNamjae Jeon * 447115380f9SNamjae Jeon * NOTE: We always perform synchronous i/o. 4481e9ea7e0SNamjae Jeon */ 449d9038d99SNamjae Jeon int ntfs_sync_mft_mirror(struct ntfs_volume *vol, const u64 mft_no, 450115380f9SNamjae Jeon struct mft_record *m) 4511e9ea7e0SNamjae Jeon { 452115380f9SNamjae Jeon u8 *kmirr = NULL; 453115380f9SNamjae Jeon struct folio *folio; 454115380f9SNamjae Jeon unsigned int folio_ofs, lcn_folio_off = 0; 455115380f9SNamjae Jeon int err = 0; 456115380f9SNamjae Jeon struct bio *bio; 4571e9ea7e0SNamjae Jeon 458d9038d99SNamjae Jeon ntfs_debug("Entering for inode 0x%llx.", mft_no); 459115380f9SNamjae Jeon 4601e9ea7e0SNamjae Jeon if (unlikely(!vol->mftmirr_ino)) { 4611e9ea7e0SNamjae Jeon /* This could happen during umount... */ 462115380f9SNamjae Jeon err = -EIO; 4631e9ea7e0SNamjae Jeon goto err_out; 4641e9ea7e0SNamjae Jeon } 4651e9ea7e0SNamjae Jeon /* Get the page containing the mirror copy of the mft record @m. */ 466115380f9SNamjae Jeon folio = read_mapping_folio(vol->mftmirr_ino->i_mapping, 467115380f9SNamjae Jeon NTFS_MFT_NR_TO_PIDX(vol, mft_no), NULL); 468115380f9SNamjae Jeon if (IS_ERR(folio)) { 4691e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to map mft mirror page."); 470115380f9SNamjae Jeon err = PTR_ERR(folio); 4711e9ea7e0SNamjae Jeon goto err_out; 4721e9ea7e0SNamjae Jeon } 473115380f9SNamjae Jeon 474115380f9SNamjae Jeon folio_lock(folio); 475115380f9SNamjae Jeon folio_clear_uptodate(folio); 4761e9ea7e0SNamjae Jeon /* Offset of the mft mirror record inside the page. */ 477115380f9SNamjae Jeon folio_ofs = NTFS_MFT_NR_TO_POFS(vol, mft_no); 4781e9ea7e0SNamjae Jeon /* The address in the page of the mirror copy of the mft record @m. */ 479115380f9SNamjae Jeon kmirr = kmap_local_folio(folio, 0) + folio_ofs; 4801e9ea7e0SNamjae Jeon /* Copy the mst protected mft record to the mirror. */ 4811e9ea7e0SNamjae Jeon memcpy(kmirr, m, vol->mft_record_size); 4821e9ea7e0SNamjae Jeon 483115380f9SNamjae Jeon if (vol->cluster_size_bits > PAGE_SHIFT) { 484115380f9SNamjae Jeon lcn_folio_off = folio->index << PAGE_SHIFT; 485115380f9SNamjae Jeon lcn_folio_off &= vol->cluster_size_mask; 4861e9ea7e0SNamjae Jeon } 4871e9ea7e0SNamjae Jeon 488115380f9SNamjae Jeon bio = bio_alloc(vol->sb->s_bdev, 1, REQ_OP_WRITE, GFP_NOIO); 489115380f9SNamjae Jeon bio->bi_iter.bi_sector = 490115380f9SNamjae Jeon NTFS_B_TO_SECTOR(vol, NTFS_CLU_TO_B(vol, vol->mftmirr_lcn) + 491115380f9SNamjae Jeon lcn_folio_off + folio_ofs); 492115380f9SNamjae Jeon 493115380f9SNamjae Jeon if (!bio_add_folio(bio, folio, vol->mft_record_size, folio_ofs)) { 4941e9ea7e0SNamjae Jeon err = -EIO; 495115380f9SNamjae Jeon bio_put(bio); 496115380f9SNamjae Jeon goto unlock_folio; 4971e9ea7e0SNamjae Jeon } 4981e9ea7e0SNamjae Jeon 499115380f9SNamjae Jeon bio->bi_end_io = ntfs_bio_end_io; 500115380f9SNamjae Jeon submit_bio(bio); 5011e9ea7e0SNamjae Jeon /* Current state: all buffers are clean, unlocked, and uptodate. */ 502115380f9SNamjae Jeon folio_mark_uptodate(folio); 503115380f9SNamjae Jeon 504115380f9SNamjae Jeon unlock_folio: 505115380f9SNamjae Jeon folio_unlock(folio); 506115380f9SNamjae Jeon kunmap_local(kmirr); 507115380f9SNamjae Jeon folio_put(folio); 5081e9ea7e0SNamjae Jeon if (likely(!err)) { 5091e9ea7e0SNamjae Jeon ntfs_debug("Done."); 5101e9ea7e0SNamjae Jeon } else { 511d9038d99SNamjae Jeon ntfs_error(vol->sb, "I/O error while writing mft mirror record 0x%llx!", mft_no); 5121e9ea7e0SNamjae Jeon err_out: 513115380f9SNamjae Jeon ntfs_error(vol->sb, 514115380f9SNamjae Jeon "Failed to synchronize $MFTMirr (error code %i). Volume will be left marked dirty on umount. Run chkdsk on the partition after umounting to correct this.", 515115380f9SNamjae Jeon err); 5161e9ea7e0SNamjae Jeon NVolSetErrors(vol); 5171e9ea7e0SNamjae Jeon } 5181e9ea7e0SNamjae Jeon return err; 5191e9ea7e0SNamjae Jeon } 5201e9ea7e0SNamjae Jeon 521115380f9SNamjae Jeon /* 5221e9ea7e0SNamjae Jeon * write_mft_record_nolock - write out a mapped (extent) mft record 5231e9ea7e0SNamjae Jeon * @ni: ntfs inode describing the mapped (extent) mft record 5241e9ea7e0SNamjae Jeon * @m: mapped (extent) mft record to write 5251e9ea7e0SNamjae Jeon * @sync: if true, wait for i/o completion 5261e9ea7e0SNamjae Jeon * 5271e9ea7e0SNamjae Jeon * Write the mapped (extent) mft record @m described by the (regular or extent) 5281e9ea7e0SNamjae Jeon * ntfs inode @ni to backing store. If the mft record @m has a counterpart in 5291e9ea7e0SNamjae Jeon * the mft mirror, that is also updated. 5301e9ea7e0SNamjae Jeon * 531115380f9SNamjae Jeon * We only write the mft record if the ntfs inode @ni is dirty. 5321e9ea7e0SNamjae Jeon * 533115380f9SNamjae Jeon * On success, clean the mft record and return 0. 534115380f9SNamjae Jeon * On error (specifically ENOMEM), we redirty the record so it can be retried. 535115380f9SNamjae Jeon * For other errors, we mark the volume with errors. 5361e9ea7e0SNamjae Jeon */ 537115380f9SNamjae Jeon int write_mft_record_nolock(struct ntfs_inode *ni, struct mft_record *m, int sync) 5381e9ea7e0SNamjae Jeon { 539115380f9SNamjae Jeon struct ntfs_volume *vol = ni->vol; 540115380f9SNamjae Jeon struct folio *folio = ni->folio; 541115380f9SNamjae Jeon int err = 0, i = 0; 542115380f9SNamjae Jeon u8 *kaddr; 543115380f9SNamjae Jeon struct mft_record *fixup_m; 544115380f9SNamjae Jeon struct bio *bio; 545115380f9SNamjae Jeon unsigned int offset = 0, folio_size; 5461e9ea7e0SNamjae Jeon 547d9038d99SNamjae Jeon ntfs_debug("Entering for inode 0x%llx.", ni->mft_no); 548115380f9SNamjae Jeon 549115380f9SNamjae Jeon WARN_ON(NInoAttr(ni)); 550115380f9SNamjae Jeon WARN_ON(!folio_test_locked(folio)); 551115380f9SNamjae Jeon 5521e9ea7e0SNamjae Jeon /* 553115380f9SNamjae Jeon * If the struct ntfs_inode is clean no need to do anything. If it is dirty, 5541e9ea7e0SNamjae Jeon * mark it as clean now so that it can be redirtied later on if needed. 5551e9ea7e0SNamjae Jeon * There is no danger of races since the caller is holding the locks 5561e9ea7e0SNamjae Jeon * for the mft record @m and the page it is in. 5571e9ea7e0SNamjae Jeon */ 5581e9ea7e0SNamjae Jeon if (!NInoTestClearDirty(ni)) 5591e9ea7e0SNamjae Jeon goto done; 5601e9ea7e0SNamjae Jeon 561115380f9SNamjae Jeon kaddr = kmap_local_folio(folio, 0); 562115380f9SNamjae Jeon fixup_m = (struct mft_record *)(kaddr + ni->folio_ofs); 563115380f9SNamjae Jeon memcpy(fixup_m, m, vol->mft_record_size); 564115380f9SNamjae Jeon 5651e9ea7e0SNamjae Jeon /* Apply the mst protection fixups. */ 566115380f9SNamjae Jeon err = pre_write_mst_fixup((struct ntfs_record *)fixup_m, vol->mft_record_size); 5671e9ea7e0SNamjae Jeon if (err) { 5681e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to apply mst fixups!"); 569115380f9SNamjae Jeon goto err_out; 5701e9ea7e0SNamjae Jeon } 5711e9ea7e0SNamjae Jeon 572115380f9SNamjae Jeon folio_size = vol->mft_record_size / ni->mft_lcn_count; 573115380f9SNamjae Jeon while (i < ni->mft_lcn_count) { 574115380f9SNamjae Jeon unsigned int clu_off; 575115380f9SNamjae Jeon 576115380f9SNamjae Jeon clu_off = (unsigned int)((s64)ni->mft_no * vol->mft_record_size + offset) & 577115380f9SNamjae Jeon vol->cluster_size_mask; 578115380f9SNamjae Jeon 579115380f9SNamjae Jeon bio = bio_alloc(vol->sb->s_bdev, 1, REQ_OP_WRITE, GFP_NOIO); 580115380f9SNamjae Jeon bio->bi_iter.bi_sector = 581115380f9SNamjae Jeon NTFS_B_TO_SECTOR(vol, NTFS_CLU_TO_B(vol, ni->mft_lcn[i]) + 582115380f9SNamjae Jeon clu_off); 583115380f9SNamjae Jeon 584115380f9SNamjae Jeon if (!bio_add_folio(bio, folio, folio_size, 585115380f9SNamjae Jeon ni->folio_ofs + offset)) { 586115380f9SNamjae Jeon err = -EIO; 587115380f9SNamjae Jeon goto put_bio_out; 5881e9ea7e0SNamjae Jeon } 589115380f9SNamjae Jeon 5901e9ea7e0SNamjae Jeon /* Synchronize the mft mirror now if not @sync. */ 5911e9ea7e0SNamjae Jeon if (!sync && ni->mft_no < vol->mftmirr_size) 592115380f9SNamjae Jeon ntfs_sync_mft_mirror(vol, ni->mft_no, fixup_m); 5931e9ea7e0SNamjae Jeon 594115380f9SNamjae Jeon folio_get(folio); 595115380f9SNamjae Jeon bio->bi_private = folio; 596115380f9SNamjae Jeon bio->bi_end_io = ntfs_bio_end_io; 597115380f9SNamjae Jeon submit_bio(bio); 598115380f9SNamjae Jeon offset += vol->cluster_size; 599115380f9SNamjae Jeon i++; 6001e9ea7e0SNamjae Jeon } 601115380f9SNamjae Jeon 6021e9ea7e0SNamjae Jeon /* If @sync, now synchronize the mft mirror. */ 6031e9ea7e0SNamjae Jeon if (sync && ni->mft_no < vol->mftmirr_size) 604115380f9SNamjae Jeon ntfs_sync_mft_mirror(vol, ni->mft_no, fixup_m); 605115380f9SNamjae Jeon kunmap_local(kaddr); 6061e9ea7e0SNamjae Jeon if (unlikely(err)) { 6071e9ea7e0SNamjae Jeon /* I/O error during writing. This is really bad! */ 608115380f9SNamjae Jeon ntfs_error(vol->sb, 609d9038d99SNamjae Jeon "I/O error while writing mft record 0x%llx! Marking base inode as bad. You should unmount the volume and run chkdsk.", 6101e9ea7e0SNamjae Jeon ni->mft_no); 6111e9ea7e0SNamjae Jeon goto err_out; 6121e9ea7e0SNamjae Jeon } 6131e9ea7e0SNamjae Jeon done: 6141e9ea7e0SNamjae Jeon ntfs_debug("Done."); 6151e9ea7e0SNamjae Jeon return 0; 616115380f9SNamjae Jeon put_bio_out: 617115380f9SNamjae Jeon bio_put(bio); 6181e9ea7e0SNamjae Jeon err_out: 6191e9ea7e0SNamjae Jeon /* 6201e9ea7e0SNamjae Jeon * Current state: all buffers are clean, unlocked, and uptodate. 6211e9ea7e0SNamjae Jeon * The caller should mark the base inode as bad so that no more i/o 622115380f9SNamjae Jeon * happens. ->drop_inode() will still be invoked so all extent inodes 6231e9ea7e0SNamjae Jeon * and other allocated memory will be freed. 6241e9ea7e0SNamjae Jeon */ 6251e9ea7e0SNamjae Jeon if (err == -ENOMEM) { 626115380f9SNamjae Jeon ntfs_error(vol->sb, 627115380f9SNamjae Jeon "Not enough memory to write mft record. Redirtying so the write is retried later."); 6281e9ea7e0SNamjae Jeon mark_mft_record_dirty(ni); 6291e9ea7e0SNamjae Jeon err = 0; 6301e9ea7e0SNamjae Jeon } else 6311e9ea7e0SNamjae Jeon NVolSetErrors(vol); 6321e9ea7e0SNamjae Jeon return err; 6331e9ea7e0SNamjae Jeon } 6341e9ea7e0SNamjae Jeon 635*cdd4dc3aSLinus Torvalds static int ntfs_test_inode_wb(struct inode *vi, u64 ino, void *data) 636115380f9SNamjae Jeon { 637115380f9SNamjae Jeon struct ntfs_attr *na = data; 638115380f9SNamjae Jeon 639115380f9SNamjae Jeon if (!ntfs_test_inode(vi, na)) 640115380f9SNamjae Jeon return 0; 641115380f9SNamjae Jeon 642115380f9SNamjae Jeon /* 643115380f9SNamjae Jeon * Without this, ntfs_write_mst_block() could call iput_final() 644115380f9SNamjae Jeon * , and ntfs_evict_big_inode() could try to unlink this inode 645115380f9SNamjae Jeon * and the contex could be blocked infinitly in map_mft_record(). 646115380f9SNamjae Jeon */ 647115380f9SNamjae Jeon if (NInoBeingDeleted(NTFS_I(vi))) { 648115380f9SNamjae Jeon na->state = NI_BeingDeleted; 649115380f9SNamjae Jeon return -1; 650115380f9SNamjae Jeon } 651115380f9SNamjae Jeon 652115380f9SNamjae Jeon /* 653115380f9SNamjae Jeon * This condition can prevent ntfs_write_mst_block() 654115380f9SNamjae Jeon * from applying/undo fixups while ntfs_create() being 655115380f9SNamjae Jeon * called 656115380f9SNamjae Jeon */ 657115380f9SNamjae Jeon spin_lock(&vi->i_lock); 658115380f9SNamjae Jeon if (inode_state_read_once(vi) & I_CREATING) { 659115380f9SNamjae Jeon spin_unlock(&vi->i_lock); 660115380f9SNamjae Jeon na->state = NI_BeingCreated; 661115380f9SNamjae Jeon return -1; 662115380f9SNamjae Jeon } 663115380f9SNamjae Jeon spin_unlock(&vi->i_lock); 664115380f9SNamjae Jeon 665115380f9SNamjae Jeon return igrab(vi) ? 1 : -1; 666115380f9SNamjae Jeon } 667115380f9SNamjae Jeon 668115380f9SNamjae Jeon /* 6691e9ea7e0SNamjae Jeon * ntfs_may_write_mft_record - check if an mft record may be written out 6701e9ea7e0SNamjae Jeon * @vol: [IN] ntfs volume on which the mft record to check resides 6711e9ea7e0SNamjae Jeon * @mft_no: [IN] mft record number of the mft record to check 6721e9ea7e0SNamjae Jeon * @m: [IN] mapped mft record to check 6731e9ea7e0SNamjae Jeon * @locked_ni: [OUT] caller has to unlock this ntfs inode if one is returned 674115380f9SNamjae Jeon * @ref_vi: [OUT] caller has to drop this vfs inode if one is returned 6751e9ea7e0SNamjae Jeon * 6761e9ea7e0SNamjae Jeon * Check if the mapped (base or extent) mft record @m with mft record number 6771e9ea7e0SNamjae Jeon * @mft_no belonging to the ntfs volume @vol may be written out. If necessary 6781e9ea7e0SNamjae Jeon * and possible the ntfs inode of the mft record is locked and the base vfs 6791e9ea7e0SNamjae Jeon * inode is pinned. The locked ntfs inode is then returned in @locked_ni. The 6801e9ea7e0SNamjae Jeon * caller is responsible for unlocking the ntfs inode and unpinning the base 6811e9ea7e0SNamjae Jeon * vfs inode. 6821e9ea7e0SNamjae Jeon * 683115380f9SNamjae Jeon * To avoid deadlock when the caller holds a folio lock, if the function 684115380f9SNamjae Jeon * returns @ref_vi it defers dropping the vfs inode reference by returning 685115380f9SNamjae Jeon * it in @ref_vi instead of calling iput() directly. The caller must call 686115380f9SNamjae Jeon * iput() on @ref_vi after releasing the folio lock. 687115380f9SNamjae Jeon * 6881e9ea7e0SNamjae Jeon * Return 'true' if the mft record may be written out and 'false' if not. 6891e9ea7e0SNamjae Jeon * 6901e9ea7e0SNamjae Jeon * The caller has locked the page and cleared the uptodate flag on it which 6911e9ea7e0SNamjae Jeon * means that we can safely write out any dirty mft records that do not have 692115380f9SNamjae Jeon * their inodes in icache as determined by find_inode_nowait(). 6931e9ea7e0SNamjae Jeon * 6941e9ea7e0SNamjae Jeon * Here is a description of the tests we perform: 6951e9ea7e0SNamjae Jeon * 6961e9ea7e0SNamjae Jeon * If the inode is found in icache we know the mft record must be a base mft 6971e9ea7e0SNamjae Jeon * record. If it is dirty, we do not write it and return 'false' as the vfs 6981e9ea7e0SNamjae Jeon * inode write paths will result in the access times being updated which would 699115380f9SNamjae Jeon * cause the base mft record to be redirtied and written out again. 7001e9ea7e0SNamjae Jeon * 7011e9ea7e0SNamjae Jeon * If the inode is in icache and not dirty, we attempt to lock the mft record 7021e9ea7e0SNamjae Jeon * and if we find the lock was already taken, it is not safe to write the mft 7031e9ea7e0SNamjae Jeon * record and we return 'false'. 7041e9ea7e0SNamjae Jeon * 7051e9ea7e0SNamjae Jeon * If we manage to obtain the lock we have exclusive access to the mft record, 7061e9ea7e0SNamjae Jeon * which also allows us safe writeout of the mft record. We then set 7071e9ea7e0SNamjae Jeon * @locked_ni to the locked ntfs inode and return 'true'. 7081e9ea7e0SNamjae Jeon * 7091e9ea7e0SNamjae Jeon * Note we cannot just lock the mft record and sleep while waiting for the lock 710115380f9SNamjae Jeon * because this would deadlock due to lock reversal. 7111e9ea7e0SNamjae Jeon * 7121e9ea7e0SNamjae Jeon * If the inode is not in icache we need to perform further checks. 7131e9ea7e0SNamjae Jeon * 7141e9ea7e0SNamjae Jeon * If the mft record is not a FILE record or it is a base mft record, we can 7151e9ea7e0SNamjae Jeon * safely write it and return 'true'. 7161e9ea7e0SNamjae Jeon * 7171e9ea7e0SNamjae Jeon * We now know the mft record is an extent mft record. We check if the inode 718115380f9SNamjae Jeon * corresponding to its base mft record is in icache. If it is not, we cannot 719115380f9SNamjae Jeon * safely determine the state of the extent inode, so we return 'false'. 7201e9ea7e0SNamjae Jeon * 7211e9ea7e0SNamjae Jeon * We now have the base inode for the extent mft record. We check if it has an 722115380f9SNamjae Jeon * ntfs inode for the extent mft record attached. If not, it is safe to write 7231e9ea7e0SNamjae Jeon * the extent mft record and we return 'true'. 7241e9ea7e0SNamjae Jeon * 725115380f9SNamjae Jeon * If the extent inode is attached, we check if it is dirty. If so, we return 726115380f9SNamjae Jeon * 'false' (letting the standard write_inode path handle it). 727115380f9SNamjae Jeon * 728115380f9SNamjae Jeon * If it is not dirty, we attempt to lock the extent mft record. If the lock 729115380f9SNamjae Jeon * was already taken, it is not safe to write and we return 'false'. 7301e9ea7e0SNamjae Jeon * 7311e9ea7e0SNamjae Jeon * If we manage to obtain the lock we have exclusive access to the extent mft 732115380f9SNamjae Jeon * record. We set @locked_ni to the now locked ntfs inode and return 'true'. 7331e9ea7e0SNamjae Jeon */ 734d9038d99SNamjae Jeon static bool ntfs_may_write_mft_record(struct ntfs_volume *vol, const u64 mft_no, 735115380f9SNamjae Jeon const struct mft_record *m, struct ntfs_inode **locked_ni, 736115380f9SNamjae Jeon struct inode **ref_vi) 7371e9ea7e0SNamjae Jeon { 7381e9ea7e0SNamjae Jeon struct super_block *sb = vol->sb; 7391e9ea7e0SNamjae Jeon struct inode *mft_vi = vol->mft_ino; 7401e9ea7e0SNamjae Jeon struct inode *vi; 741115380f9SNamjae Jeon struct ntfs_inode *ni, *eni, **extent_nis; 7421e9ea7e0SNamjae Jeon int i; 743115380f9SNamjae Jeon struct ntfs_attr na = {0}; 7441e9ea7e0SNamjae Jeon 745d9038d99SNamjae Jeon ntfs_debug("Entering for inode 0x%llx.", mft_no); 7461e9ea7e0SNamjae Jeon /* 7471e9ea7e0SNamjae Jeon * Normally we do not return a locked inode so set @locked_ni to NULL. 7481e9ea7e0SNamjae Jeon */ 7491e9ea7e0SNamjae Jeon *locked_ni = NULL; 750115380f9SNamjae Jeon *ref_vi = NULL; 751115380f9SNamjae Jeon 7521e9ea7e0SNamjae Jeon /* 7531e9ea7e0SNamjae Jeon * Check if the inode corresponding to this mft record is in the VFS 7541e9ea7e0SNamjae Jeon * inode cache and obtain a reference to it if it is. 7551e9ea7e0SNamjae Jeon */ 756d9038d99SNamjae Jeon ntfs_debug("Looking for inode 0x%llx in icache.", mft_no); 7571e9ea7e0SNamjae Jeon na.mft_no = mft_no; 7581e9ea7e0SNamjae Jeon na.type = AT_UNUSED; 7591e9ea7e0SNamjae Jeon /* 7601e9ea7e0SNamjae Jeon * Optimize inode 0, i.e. $MFT itself, since we have it in memory and 7611e9ea7e0SNamjae Jeon * we get here for it rather often. 7621e9ea7e0SNamjae Jeon */ 7631e9ea7e0SNamjae Jeon if (!mft_no) { 7641e9ea7e0SNamjae Jeon /* Balance the below iput(). */ 7651e9ea7e0SNamjae Jeon vi = igrab(mft_vi); 766115380f9SNamjae Jeon WARN_ON(vi != mft_vi); 7671e9ea7e0SNamjae Jeon } else { 7681e9ea7e0SNamjae Jeon /* 769115380f9SNamjae Jeon * Have to use find_inode_nowait() since ilookup5_nowait() 770115380f9SNamjae Jeon * waits for inode with I_FREEING, which causes ntfs to deadlock 771115380f9SNamjae Jeon * when inodes are unlinked concurrently 7721e9ea7e0SNamjae Jeon */ 773115380f9SNamjae Jeon vi = find_inode_nowait(sb, mft_no, ntfs_test_inode_wb, &na); 774115380f9SNamjae Jeon if (na.state == NI_BeingDeleted || na.state == NI_BeingCreated) 775115380f9SNamjae Jeon return false; 7761e9ea7e0SNamjae Jeon } 7771e9ea7e0SNamjae Jeon if (vi) { 778d9038d99SNamjae Jeon ntfs_debug("Base inode 0x%llx is in icache.", mft_no); 7791e9ea7e0SNamjae Jeon /* The inode is in icache. */ 7801e9ea7e0SNamjae Jeon ni = NTFS_I(vi); 7811e9ea7e0SNamjae Jeon /* Take a reference to the ntfs inode. */ 7821e9ea7e0SNamjae Jeon atomic_inc(&ni->count); 7831e9ea7e0SNamjae Jeon /* If the inode is dirty, do not write this record. */ 7841e9ea7e0SNamjae Jeon if (NInoDirty(ni)) { 785d9038d99SNamjae Jeon ntfs_debug("Inode 0x%llx is dirty, do not write it.", 7861e9ea7e0SNamjae Jeon mft_no); 7871e9ea7e0SNamjae Jeon atomic_dec(&ni->count); 788115380f9SNamjae Jeon *ref_vi = vi; 7891e9ea7e0SNamjae Jeon return false; 7901e9ea7e0SNamjae Jeon } 791d9038d99SNamjae Jeon ntfs_debug("Inode 0x%llx is not dirty.", mft_no); 7921e9ea7e0SNamjae Jeon /* The inode is not dirty, try to take the mft record lock. */ 7931e9ea7e0SNamjae Jeon if (unlikely(!mutex_trylock(&ni->mrec_lock))) { 794d9038d99SNamjae Jeon ntfs_debug("Mft record 0x%llx is already locked, do not write it.", mft_no); 7951e9ea7e0SNamjae Jeon atomic_dec(&ni->count); 796115380f9SNamjae Jeon *ref_vi = vi; 7971e9ea7e0SNamjae Jeon return false; 7981e9ea7e0SNamjae Jeon } 799d9038d99SNamjae Jeon ntfs_debug("Managed to lock mft record 0x%llx, write it.", 8001e9ea7e0SNamjae Jeon mft_no); 8011e9ea7e0SNamjae Jeon /* 8021e9ea7e0SNamjae Jeon * The write has to occur while we hold the mft record lock so 8031e9ea7e0SNamjae Jeon * return the locked ntfs inode. 8041e9ea7e0SNamjae Jeon */ 8051e9ea7e0SNamjae Jeon *locked_ni = ni; 8061e9ea7e0SNamjae Jeon return true; 8071e9ea7e0SNamjae Jeon } 808d9038d99SNamjae Jeon ntfs_debug("Inode 0x%llx is not in icache.", mft_no); 8091e9ea7e0SNamjae Jeon /* The inode is not in icache. */ 8101e9ea7e0SNamjae Jeon /* Write the record if it is not a mft record (type "FILE"). */ 8111e9ea7e0SNamjae Jeon if (!ntfs_is_mft_record(m->magic)) { 812d9038d99SNamjae Jeon ntfs_debug("Mft record 0x%llx is not a FILE record, write it.", 8131e9ea7e0SNamjae Jeon mft_no); 8141e9ea7e0SNamjae Jeon return true; 8151e9ea7e0SNamjae Jeon } 8161e9ea7e0SNamjae Jeon /* Write the mft record if it is a base inode. */ 8171e9ea7e0SNamjae Jeon if (!m->base_mft_record) { 818d9038d99SNamjae Jeon ntfs_debug("Mft record 0x%llx is a base record, write it.", 8191e9ea7e0SNamjae Jeon mft_no); 8201e9ea7e0SNamjae Jeon return true; 8211e9ea7e0SNamjae Jeon } 8221e9ea7e0SNamjae Jeon /* 8231e9ea7e0SNamjae Jeon * This is an extent mft record. Check if the inode corresponding to 8241e9ea7e0SNamjae Jeon * its base mft record is in icache and obtain a reference to it if it 8251e9ea7e0SNamjae Jeon * is. 8261e9ea7e0SNamjae Jeon */ 8271e9ea7e0SNamjae Jeon na.mft_no = MREF_LE(m->base_mft_record); 828115380f9SNamjae Jeon na.state = 0; 829d9038d99SNamjae Jeon ntfs_debug("Mft record 0x%llx is an extent record. Looking for base inode 0x%llx in icache.", 830115380f9SNamjae Jeon mft_no, na.mft_no); 8311e9ea7e0SNamjae Jeon if (!na.mft_no) { 8321e9ea7e0SNamjae Jeon /* Balance the below iput(). */ 8331e9ea7e0SNamjae Jeon vi = igrab(mft_vi); 834115380f9SNamjae Jeon WARN_ON(vi != mft_vi); 835115380f9SNamjae Jeon } else { 836115380f9SNamjae Jeon vi = find_inode_nowait(sb, mft_no, ntfs_test_inode_wb, &na); 837115380f9SNamjae Jeon if (na.state == NI_BeingDeleted || na.state == NI_BeingCreated) 838115380f9SNamjae Jeon return false; 8391e9ea7e0SNamjae Jeon } 840115380f9SNamjae Jeon 841115380f9SNamjae Jeon if (!vi) 842115380f9SNamjae Jeon return false; 843d9038d99SNamjae Jeon ntfs_debug("Base inode 0x%llx is in icache.", na.mft_no); 8441e9ea7e0SNamjae Jeon /* 8451e9ea7e0SNamjae Jeon * The base inode is in icache. Check if it has the extent inode 8461e9ea7e0SNamjae Jeon * corresponding to this extent mft record attached. 8471e9ea7e0SNamjae Jeon */ 8481e9ea7e0SNamjae Jeon ni = NTFS_I(vi); 8491e9ea7e0SNamjae Jeon mutex_lock(&ni->extent_lock); 8501e9ea7e0SNamjae Jeon if (ni->nr_extents <= 0) { 8511e9ea7e0SNamjae Jeon /* 8521e9ea7e0SNamjae Jeon * The base inode has no attached extent inodes, write this 8531e9ea7e0SNamjae Jeon * extent mft record. 8541e9ea7e0SNamjae Jeon */ 8551e9ea7e0SNamjae Jeon mutex_unlock(&ni->extent_lock); 856115380f9SNamjae Jeon *ref_vi = vi; 857d9038d99SNamjae Jeon ntfs_debug("Base inode 0x%llx has no attached extent inodes, write the extent record.", 858115380f9SNamjae Jeon na.mft_no); 8591e9ea7e0SNamjae Jeon return true; 8601e9ea7e0SNamjae Jeon } 8611e9ea7e0SNamjae Jeon /* Iterate over the attached extent inodes. */ 8621e9ea7e0SNamjae Jeon extent_nis = ni->ext.extent_ntfs_inos; 8631e9ea7e0SNamjae Jeon for (eni = NULL, i = 0; i < ni->nr_extents; ++i) { 8641e9ea7e0SNamjae Jeon if (mft_no == extent_nis[i]->mft_no) { 8651e9ea7e0SNamjae Jeon /* 8661e9ea7e0SNamjae Jeon * Found the extent inode corresponding to this extent 8671e9ea7e0SNamjae Jeon * mft record. 8681e9ea7e0SNamjae Jeon */ 8691e9ea7e0SNamjae Jeon eni = extent_nis[i]; 8701e9ea7e0SNamjae Jeon break; 8711e9ea7e0SNamjae Jeon } 8721e9ea7e0SNamjae Jeon } 8731e9ea7e0SNamjae Jeon /* 8741e9ea7e0SNamjae Jeon * If the extent inode was not attached to the base inode, write this 8751e9ea7e0SNamjae Jeon * extent mft record. 8761e9ea7e0SNamjae Jeon */ 8771e9ea7e0SNamjae Jeon if (!eni) { 8781e9ea7e0SNamjae Jeon mutex_unlock(&ni->extent_lock); 879115380f9SNamjae Jeon *ref_vi = vi; 880d9038d99SNamjae Jeon ntfs_debug("Extent inode 0x%llx is not attached to its base inode 0x%llx, write the extent record.", 8811e9ea7e0SNamjae Jeon mft_no, na.mft_no); 8821e9ea7e0SNamjae Jeon return true; 8831e9ea7e0SNamjae Jeon } 884d9038d99SNamjae Jeon ntfs_debug("Extent inode 0x%llx is attached to its base inode 0x%llx.", 8851e9ea7e0SNamjae Jeon mft_no, na.mft_no); 8861e9ea7e0SNamjae Jeon /* Take a reference to the extent ntfs inode. */ 8871e9ea7e0SNamjae Jeon atomic_inc(&eni->count); 8881e9ea7e0SNamjae Jeon mutex_unlock(&ni->extent_lock); 889115380f9SNamjae Jeon 890115380f9SNamjae Jeon /* if extent inode is dirty, write_inode will write it */ 891115380f9SNamjae Jeon if (NInoDirty(eni)) { 892115380f9SNamjae Jeon atomic_dec(&eni->count); 893115380f9SNamjae Jeon *ref_vi = vi; 894115380f9SNamjae Jeon return false; 895115380f9SNamjae Jeon } 896115380f9SNamjae Jeon 8971e9ea7e0SNamjae Jeon /* 8981e9ea7e0SNamjae Jeon * Found the extent inode coresponding to this extent mft record. 8991e9ea7e0SNamjae Jeon * Try to take the mft record lock. 9001e9ea7e0SNamjae Jeon */ 9011e9ea7e0SNamjae Jeon if (unlikely(!mutex_trylock(&eni->mrec_lock))) { 9021e9ea7e0SNamjae Jeon atomic_dec(&eni->count); 903115380f9SNamjae Jeon *ref_vi = vi; 904d9038d99SNamjae Jeon ntfs_debug("Extent mft record 0x%llx is already locked, do not write it.", 905115380f9SNamjae Jeon mft_no); 9061e9ea7e0SNamjae Jeon return false; 9071e9ea7e0SNamjae Jeon } 908d9038d99SNamjae Jeon ntfs_debug("Managed to lock extent mft record 0x%llx, write it.", 9091e9ea7e0SNamjae Jeon mft_no); 9101e9ea7e0SNamjae Jeon /* 9111e9ea7e0SNamjae Jeon * The write has to occur while we hold the mft record lock so return 9121e9ea7e0SNamjae Jeon * the locked extent ntfs inode. 9131e9ea7e0SNamjae Jeon */ 9141e9ea7e0SNamjae Jeon *locked_ni = eni; 9151e9ea7e0SNamjae Jeon return true; 9161e9ea7e0SNamjae Jeon } 9171e9ea7e0SNamjae Jeon 918115380f9SNamjae Jeon static const char *es = " Leaving inconsistent metadata. Unmount and run chkdsk."; 9191e9ea7e0SNamjae Jeon 920115380f9SNamjae Jeon #define RESERVED_MFT_RECORDS 64 921115380f9SNamjae Jeon 922115380f9SNamjae Jeon /* 9231e9ea7e0SNamjae Jeon * ntfs_mft_bitmap_find_and_alloc_free_rec_nolock - see name 9241e9ea7e0SNamjae Jeon * @vol: volume on which to search for a free mft record 9251e9ea7e0SNamjae Jeon * @base_ni: open base inode if allocating an extent mft record or NULL 9261e9ea7e0SNamjae Jeon * 9271e9ea7e0SNamjae Jeon * Search for a free mft record in the mft bitmap attribute on the ntfs volume 9281e9ea7e0SNamjae Jeon * @vol. 9291e9ea7e0SNamjae Jeon * 9301e9ea7e0SNamjae Jeon * If @base_ni is NULL start the search at the default allocator position. 9311e9ea7e0SNamjae Jeon * 9321e9ea7e0SNamjae Jeon * If @base_ni is not NULL start the search at the mft record after the base 9331e9ea7e0SNamjae Jeon * mft record @base_ni. 9341e9ea7e0SNamjae Jeon * 9351e9ea7e0SNamjae Jeon * Return the free mft record on success and -errno on error. An error code of 9361e9ea7e0SNamjae Jeon * -ENOSPC means that there are no free mft records in the currently 9371e9ea7e0SNamjae Jeon * initialized mft bitmap. 9381e9ea7e0SNamjae Jeon * 9391e9ea7e0SNamjae Jeon * Locking: Caller must hold vol->mftbmp_lock for writing. 9401e9ea7e0SNamjae Jeon */ 941d9038d99SNamjae Jeon static s64 ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(struct ntfs_volume *vol, 942115380f9SNamjae Jeon struct ntfs_inode *base_ni) 9431e9ea7e0SNamjae Jeon { 9441e9ea7e0SNamjae Jeon s64 pass_end, ll, data_pos, pass_start, ofs, bit; 9451e9ea7e0SNamjae Jeon unsigned long flags; 9461e9ea7e0SNamjae Jeon struct address_space *mftbmp_mapping; 947115380f9SNamjae Jeon u8 *buf = NULL, *byte; 948115380f9SNamjae Jeon struct folio *folio; 949115380f9SNamjae Jeon unsigned int folio_ofs, size; 9501e9ea7e0SNamjae Jeon u8 pass, b; 9511e9ea7e0SNamjae Jeon 952115380f9SNamjae Jeon ntfs_debug("Searching for free mft record in the currently initialized mft bitmap."); 9531e9ea7e0SNamjae Jeon mftbmp_mapping = vol->mftbmp_ino->i_mapping; 9541e9ea7e0SNamjae Jeon /* 9551e9ea7e0SNamjae Jeon * Set the end of the pass making sure we do not overflow the mft 9561e9ea7e0SNamjae Jeon * bitmap. 9571e9ea7e0SNamjae Jeon */ 9581e9ea7e0SNamjae Jeon read_lock_irqsave(&NTFS_I(vol->mft_ino)->size_lock, flags); 9591e9ea7e0SNamjae Jeon pass_end = NTFS_I(vol->mft_ino)->allocated_size >> 9601e9ea7e0SNamjae Jeon vol->mft_record_size_bits; 9611e9ea7e0SNamjae Jeon read_unlock_irqrestore(&NTFS_I(vol->mft_ino)->size_lock, flags); 9621e9ea7e0SNamjae Jeon read_lock_irqsave(&NTFS_I(vol->mftbmp_ino)->size_lock, flags); 9631e9ea7e0SNamjae Jeon ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3; 9641e9ea7e0SNamjae Jeon read_unlock_irqrestore(&NTFS_I(vol->mftbmp_ino)->size_lock, flags); 9651e9ea7e0SNamjae Jeon if (pass_end > ll) 9661e9ea7e0SNamjae Jeon pass_end = ll; 9671e9ea7e0SNamjae Jeon pass = 1; 9681e9ea7e0SNamjae Jeon if (!base_ni) 9691e9ea7e0SNamjae Jeon data_pos = vol->mft_data_pos; 9701e9ea7e0SNamjae Jeon else 9711e9ea7e0SNamjae Jeon data_pos = base_ni->mft_no + 1; 972115380f9SNamjae Jeon if (data_pos < RESERVED_MFT_RECORDS) 973115380f9SNamjae Jeon data_pos = RESERVED_MFT_RECORDS; 9741e9ea7e0SNamjae Jeon if (data_pos >= pass_end) { 975115380f9SNamjae Jeon data_pos = RESERVED_MFT_RECORDS; 9761e9ea7e0SNamjae Jeon pass = 2; 9771e9ea7e0SNamjae Jeon /* This happens on a freshly formatted volume. */ 9781e9ea7e0SNamjae Jeon if (data_pos >= pass_end) 9791e9ea7e0SNamjae Jeon return -ENOSPC; 9801e9ea7e0SNamjae Jeon } 981115380f9SNamjae Jeon 982115380f9SNamjae Jeon if (base_ni && base_ni->mft_no == FILE_MFT) { 983115380f9SNamjae Jeon data_pos = 0; 984115380f9SNamjae Jeon pass = 2; 985115380f9SNamjae Jeon } 986115380f9SNamjae Jeon 9871e9ea7e0SNamjae Jeon pass_start = data_pos; 988115380f9SNamjae Jeon ntfs_debug("Starting bitmap search: pass %u, pass_start 0x%llx, pass_end 0x%llx, data_pos 0x%llx.", 989115380f9SNamjae Jeon pass, pass_start, pass_end, data_pos); 9901e9ea7e0SNamjae Jeon /* Loop until a free mft record is found. */ 9911e9ea7e0SNamjae Jeon for (; pass <= 2;) { 9921e9ea7e0SNamjae Jeon /* Cap size to pass_end. */ 9931e9ea7e0SNamjae Jeon ofs = data_pos >> 3; 994115380f9SNamjae Jeon folio_ofs = ofs & ~PAGE_MASK; 995115380f9SNamjae Jeon size = PAGE_SIZE - folio_ofs; 9961e9ea7e0SNamjae Jeon ll = ((pass_end + 7) >> 3) - ofs; 9971e9ea7e0SNamjae Jeon if (size > ll) 9981e9ea7e0SNamjae Jeon size = ll; 9991e9ea7e0SNamjae Jeon size <<= 3; 10001e9ea7e0SNamjae Jeon /* 10011e9ea7e0SNamjae Jeon * If we are still within the active pass, search the next page 10021e9ea7e0SNamjae Jeon * for a zero bit. 10031e9ea7e0SNamjae Jeon */ 10041e9ea7e0SNamjae Jeon if (size) { 1005115380f9SNamjae Jeon folio = read_mapping_folio(mftbmp_mapping, 1006115380f9SNamjae Jeon ofs >> PAGE_SHIFT, NULL); 1007115380f9SNamjae Jeon if (IS_ERR(folio)) { 1008115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to read mft bitmap, aborting."); 1009115380f9SNamjae Jeon return PTR_ERR(folio); 10101e9ea7e0SNamjae Jeon } 1011115380f9SNamjae Jeon folio_lock(folio); 1012115380f9SNamjae Jeon buf = (u8 *)kmap_local_folio(folio, 0) + folio_ofs; 10131e9ea7e0SNamjae Jeon bit = data_pos & 7; 10141e9ea7e0SNamjae Jeon data_pos &= ~7ull; 1015115380f9SNamjae Jeon ntfs_debug("Before inner for loop: size 0x%x, data_pos 0x%llx, bit 0x%llx", 1016115380f9SNamjae Jeon size, data_pos, bit); 10171e9ea7e0SNamjae Jeon for (; bit < size && data_pos + bit < pass_end; 10181e9ea7e0SNamjae Jeon bit &= ~7ull, bit += 8) { 1019115380f9SNamjae Jeon /* 1020115380f9SNamjae Jeon * If we're extending $MFT and running out of the first 1021115380f9SNamjae Jeon * mft record (base record) then give up searching since 1022115380f9SNamjae Jeon * no guarantee that the found record will be accessible. 1023115380f9SNamjae Jeon */ 1024115380f9SNamjae Jeon if (base_ni && base_ni->mft_no == FILE_MFT && bit > 400) { 1025115380f9SNamjae Jeon folio_unlock(folio); 1026115380f9SNamjae Jeon kunmap_local(buf); 1027115380f9SNamjae Jeon folio_put(folio); 1028115380f9SNamjae Jeon return -ENOSPC; 1029115380f9SNamjae Jeon } 1030115380f9SNamjae Jeon 10311e9ea7e0SNamjae Jeon byte = buf + (bit >> 3); 10321e9ea7e0SNamjae Jeon if (*byte == 0xff) 10331e9ea7e0SNamjae Jeon continue; 10341e9ea7e0SNamjae Jeon b = ffz((unsigned long)*byte); 10351e9ea7e0SNamjae Jeon if (b < 8 && b >= (bit & 7)) { 10361e9ea7e0SNamjae Jeon ll = data_pos + (bit & ~7ull) + b; 10371e9ea7e0SNamjae Jeon if (unlikely(ll > (1ll << 32))) { 1038115380f9SNamjae Jeon folio_unlock(folio); 1039115380f9SNamjae Jeon kunmap_local(buf); 1040115380f9SNamjae Jeon folio_put(folio); 10411e9ea7e0SNamjae Jeon return -ENOSPC; 10421e9ea7e0SNamjae Jeon } 10431e9ea7e0SNamjae Jeon *byte |= 1 << b; 1044115380f9SNamjae Jeon folio_mark_dirty(folio); 1045115380f9SNamjae Jeon folio_unlock(folio); 1046115380f9SNamjae Jeon kunmap_local(buf); 1047115380f9SNamjae Jeon folio_put(folio); 1048115380f9SNamjae Jeon ntfs_debug("Done. (Found and allocated mft record 0x%llx.)", 1049115380f9SNamjae Jeon ll); 10501e9ea7e0SNamjae Jeon return ll; 10511e9ea7e0SNamjae Jeon } 10521e9ea7e0SNamjae Jeon } 1053115380f9SNamjae Jeon ntfs_debug("After inner for loop: size 0x%x, data_pos 0x%llx, bit 0x%llx", 1054115380f9SNamjae Jeon size, data_pos, bit); 10551e9ea7e0SNamjae Jeon data_pos += size; 1056115380f9SNamjae Jeon folio_unlock(folio); 1057115380f9SNamjae Jeon kunmap_local(buf); 1058115380f9SNamjae Jeon folio_put(folio); 10591e9ea7e0SNamjae Jeon /* 10601e9ea7e0SNamjae Jeon * If the end of the pass has not been reached yet, 10611e9ea7e0SNamjae Jeon * continue searching the mft bitmap for a zero bit. 10621e9ea7e0SNamjae Jeon */ 10631e9ea7e0SNamjae Jeon if (data_pos < pass_end) 10641e9ea7e0SNamjae Jeon continue; 10651e9ea7e0SNamjae Jeon } 10661e9ea7e0SNamjae Jeon /* Do the next pass. */ 10671e9ea7e0SNamjae Jeon if (++pass == 2) { 10681e9ea7e0SNamjae Jeon /* 10691e9ea7e0SNamjae Jeon * Starting the second pass, in which we scan the first 10701e9ea7e0SNamjae Jeon * part of the zone which we omitted earlier. 10711e9ea7e0SNamjae Jeon */ 10721e9ea7e0SNamjae Jeon pass_end = pass_start; 1073115380f9SNamjae Jeon data_pos = pass_start = RESERVED_MFT_RECORDS; 1074115380f9SNamjae Jeon ntfs_debug("pass %i, pass_start 0x%llx, pass_end 0x%llx.", 1075115380f9SNamjae Jeon pass, pass_start, pass_end); 10761e9ea7e0SNamjae Jeon if (data_pos >= pass_end) 10771e9ea7e0SNamjae Jeon break; 10781e9ea7e0SNamjae Jeon } 10791e9ea7e0SNamjae Jeon } 10801e9ea7e0SNamjae Jeon /* No free mft records in currently initialized mft bitmap. */ 1081115380f9SNamjae Jeon ntfs_debug("Done. (No free mft records left in currently initialized mft bitmap.)"); 10821e9ea7e0SNamjae Jeon return -ENOSPC; 10831e9ea7e0SNamjae Jeon } 10841e9ea7e0SNamjae Jeon 1085115380f9SNamjae Jeon static int ntfs_mft_attr_extend(struct ntfs_inode *ni) 1086115380f9SNamjae Jeon { 1087115380f9SNamjae Jeon int ret = 0; 1088115380f9SNamjae Jeon struct ntfs_inode *base_ni; 1089115380f9SNamjae Jeon 1090115380f9SNamjae Jeon if (NInoAttr(ni)) 1091115380f9SNamjae Jeon base_ni = ni->ext.base_ntfs_ino; 1092115380f9SNamjae Jeon else 1093115380f9SNamjae Jeon base_ni = ni; 1094115380f9SNamjae Jeon 1095115380f9SNamjae Jeon if (!NInoAttrList(base_ni)) { 1096115380f9SNamjae Jeon ret = ntfs_inode_add_attrlist(base_ni); 1097115380f9SNamjae Jeon if (ret) { 1098115380f9SNamjae Jeon pr_err("Can not add attrlist\n"); 1099115380f9SNamjae Jeon goto out; 1100115380f9SNamjae Jeon } else { 1101115380f9SNamjae Jeon ret = -EAGAIN; 1102115380f9SNamjae Jeon goto out; 1103115380f9SNamjae Jeon } 1104115380f9SNamjae Jeon } 1105115380f9SNamjae Jeon 1106115380f9SNamjae Jeon ret = ntfs_attr_update_mapping_pairs(ni, 0); 1107115380f9SNamjae Jeon if (ret) 1108115380f9SNamjae Jeon pr_err("MP update failed\n"); 1109115380f9SNamjae Jeon 1110115380f9SNamjae Jeon out: 1111115380f9SNamjae Jeon return ret; 1112115380f9SNamjae Jeon } 1113115380f9SNamjae Jeon 1114115380f9SNamjae Jeon /* 11151e9ea7e0SNamjae Jeon * ntfs_mft_bitmap_extend_allocation_nolock - extend mft bitmap by a cluster 11161e9ea7e0SNamjae Jeon * @vol: volume on which to extend the mft bitmap attribute 11171e9ea7e0SNamjae Jeon * 11181e9ea7e0SNamjae Jeon * Extend the mft bitmap attribute on the ntfs volume @vol by one cluster. 11191e9ea7e0SNamjae Jeon * 11201e9ea7e0SNamjae Jeon * Note: Only changes allocated_size, i.e. does not touch initialized_size or 11211e9ea7e0SNamjae Jeon * data_size. 11221e9ea7e0SNamjae Jeon * 11231e9ea7e0SNamjae Jeon * Return 0 on success and -errno on error. 11241e9ea7e0SNamjae Jeon * 11251e9ea7e0SNamjae Jeon * Locking: - Caller must hold vol->mftbmp_lock for writing. 11261e9ea7e0SNamjae Jeon * - This function takes NTFS_I(vol->mftbmp_ino)->runlist.lock for 11271e9ea7e0SNamjae Jeon * writing and releases it before returning. 11281e9ea7e0SNamjae Jeon * - This function takes vol->lcnbmp_lock for writing and releases it 11291e9ea7e0SNamjae Jeon * before returning. 11301e9ea7e0SNamjae Jeon */ 1131115380f9SNamjae Jeon static int ntfs_mft_bitmap_extend_allocation_nolock(struct ntfs_volume *vol) 11321e9ea7e0SNamjae Jeon { 1133115380f9SNamjae Jeon s64 lcn; 11341e9ea7e0SNamjae Jeon s64 ll; 11351e9ea7e0SNamjae Jeon unsigned long flags; 1136115380f9SNamjae Jeon struct folio *folio; 1137115380f9SNamjae Jeon struct ntfs_inode *mft_ni, *mftbmp_ni; 1138115380f9SNamjae Jeon struct runlist_element *rl, *rl2 = NULL; 1139115380f9SNamjae Jeon struct ntfs_attr_search_ctx *ctx = NULL; 1140115380f9SNamjae Jeon struct mft_record *mrec; 1141115380f9SNamjae Jeon struct attr_record *a = NULL; 11421e9ea7e0SNamjae Jeon int ret, mp_size; 11431e9ea7e0SNamjae Jeon u32 old_alen = 0; 11441e9ea7e0SNamjae Jeon u8 *b, tb; 11451e9ea7e0SNamjae Jeon struct { 11461e9ea7e0SNamjae Jeon u8 added_cluster:1; 11471e9ea7e0SNamjae Jeon u8 added_run:1; 11481e9ea7e0SNamjae Jeon u8 mp_rebuilt:1; 1149115380f9SNamjae Jeon u8 mp_extended:1; 1150115380f9SNamjae Jeon } status = { 0, 0, 0, 0 }; 1151115380f9SNamjae Jeon size_t new_rl_count; 11521e9ea7e0SNamjae Jeon 11531e9ea7e0SNamjae Jeon ntfs_debug("Extending mft bitmap allocation."); 11541e9ea7e0SNamjae Jeon mft_ni = NTFS_I(vol->mft_ino); 11551e9ea7e0SNamjae Jeon mftbmp_ni = NTFS_I(vol->mftbmp_ino); 11561e9ea7e0SNamjae Jeon /* 11571e9ea7e0SNamjae Jeon * Determine the last lcn of the mft bitmap. The allocated size of the 11581e9ea7e0SNamjae Jeon * mft bitmap cannot be zero so we are ok to do this. 11591e9ea7e0SNamjae Jeon */ 11601e9ea7e0SNamjae Jeon down_write(&mftbmp_ni->runlist.lock); 11611e9ea7e0SNamjae Jeon read_lock_irqsave(&mftbmp_ni->size_lock, flags); 11621e9ea7e0SNamjae Jeon ll = mftbmp_ni->allocated_size; 11631e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 11641e9ea7e0SNamjae Jeon rl = ntfs_attr_find_vcn_nolock(mftbmp_ni, 1165115380f9SNamjae Jeon NTFS_B_TO_CLU(vol, ll - 1), NULL); 11661e9ea7e0SNamjae Jeon if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) { 11671e9ea7e0SNamjae Jeon up_write(&mftbmp_ni->runlist.lock); 1168115380f9SNamjae Jeon ntfs_error(vol->sb, 1169115380f9SNamjae Jeon "Failed to determine last allocated cluster of mft bitmap attribute."); 11701e9ea7e0SNamjae Jeon if (!IS_ERR(rl)) 11711e9ea7e0SNamjae Jeon ret = -EIO; 11721e9ea7e0SNamjae Jeon else 11731e9ea7e0SNamjae Jeon ret = PTR_ERR(rl); 11741e9ea7e0SNamjae Jeon return ret; 11751e9ea7e0SNamjae Jeon } 11761e9ea7e0SNamjae Jeon lcn = rl->lcn + rl->length; 11771e9ea7e0SNamjae Jeon ntfs_debug("Last lcn of mft bitmap attribute is 0x%llx.", 11781e9ea7e0SNamjae Jeon (long long)lcn); 11791e9ea7e0SNamjae Jeon /* 11801e9ea7e0SNamjae Jeon * Attempt to get the cluster following the last allocated cluster by 11811e9ea7e0SNamjae Jeon * hand as it may be in the MFT zone so the allocator would not give it 11821e9ea7e0SNamjae Jeon * to us. 11831e9ea7e0SNamjae Jeon */ 11841e9ea7e0SNamjae Jeon ll = lcn >> 3; 1185115380f9SNamjae Jeon folio = read_mapping_folio(vol->lcnbmp_ino->i_mapping, 1186115380f9SNamjae Jeon ll >> PAGE_SHIFT, NULL); 1187115380f9SNamjae Jeon if (IS_ERR(folio)) { 11881e9ea7e0SNamjae Jeon up_write(&mftbmp_ni->runlist.lock); 11891e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to read from lcn bitmap."); 1190115380f9SNamjae Jeon return PTR_ERR(folio); 11911e9ea7e0SNamjae Jeon } 1192115380f9SNamjae Jeon 11931e9ea7e0SNamjae Jeon down_write(&vol->lcnbmp_lock); 1194115380f9SNamjae Jeon folio_lock(folio); 1195115380f9SNamjae Jeon b = (u8 *)kmap_local_folio(folio, 0) + (ll & ~PAGE_MASK); 1196115380f9SNamjae Jeon tb = 1 << (lcn & 7ull); 11971e9ea7e0SNamjae Jeon if (*b != 0xff && !(*b & tb)) { 11981e9ea7e0SNamjae Jeon /* Next cluster is free, allocate it. */ 11991e9ea7e0SNamjae Jeon *b |= tb; 1200115380f9SNamjae Jeon folio_mark_dirty(folio); 1201115380f9SNamjae Jeon folio_unlock(folio); 1202115380f9SNamjae Jeon kunmap_local(b); 1203115380f9SNamjae Jeon folio_put(folio); 12041e9ea7e0SNamjae Jeon up_write(&vol->lcnbmp_lock); 12051e9ea7e0SNamjae Jeon /* Update the mft bitmap runlist. */ 12061e9ea7e0SNamjae Jeon rl->length++; 12071e9ea7e0SNamjae Jeon rl[1].vcn++; 12081e9ea7e0SNamjae Jeon status.added_cluster = 1; 12091e9ea7e0SNamjae Jeon ntfs_debug("Appending one cluster to mft bitmap."); 12101e9ea7e0SNamjae Jeon } else { 1211115380f9SNamjae Jeon folio_unlock(folio); 1212115380f9SNamjae Jeon kunmap_local(b); 1213115380f9SNamjae Jeon folio_put(folio); 12141e9ea7e0SNamjae Jeon up_write(&vol->lcnbmp_lock); 12151e9ea7e0SNamjae Jeon /* Allocate a cluster from the DATA_ZONE. */ 12161e9ea7e0SNamjae Jeon rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE, 1217115380f9SNamjae Jeon true, false, false); 12181e9ea7e0SNamjae Jeon if (IS_ERR(rl2)) { 12191e9ea7e0SNamjae Jeon up_write(&mftbmp_ni->runlist.lock); 1220115380f9SNamjae Jeon ntfs_error(vol->sb, 1221115380f9SNamjae Jeon "Failed to allocate a cluster for the mft bitmap."); 12221e9ea7e0SNamjae Jeon return PTR_ERR(rl2); 12231e9ea7e0SNamjae Jeon } 1224115380f9SNamjae Jeon rl = ntfs_runlists_merge(&mftbmp_ni->runlist, rl2, 0, &new_rl_count); 12251e9ea7e0SNamjae Jeon if (IS_ERR(rl)) { 12261e9ea7e0SNamjae Jeon up_write(&mftbmp_ni->runlist.lock); 1227115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to merge runlists for mft bitmap."); 12281e9ea7e0SNamjae Jeon if (ntfs_cluster_free_from_rl(vol, rl2)) { 1229115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to deallocate allocated cluster.%s", 1230115380f9SNamjae Jeon es); 12311e9ea7e0SNamjae Jeon NVolSetErrors(vol); 12321e9ea7e0SNamjae Jeon } 1233115380f9SNamjae Jeon kvfree(rl2); 12341e9ea7e0SNamjae Jeon return PTR_ERR(rl); 12351e9ea7e0SNamjae Jeon } 12361e9ea7e0SNamjae Jeon mftbmp_ni->runlist.rl = rl; 1237115380f9SNamjae Jeon mftbmp_ni->runlist.count = new_rl_count; 12381e9ea7e0SNamjae Jeon status.added_run = 1; 12391e9ea7e0SNamjae Jeon ntfs_debug("Adding one run to mft bitmap."); 12401e9ea7e0SNamjae Jeon /* Find the last run in the new runlist. */ 12411e9ea7e0SNamjae Jeon for (; rl[1].length; rl++) 12421e9ea7e0SNamjae Jeon ; 12431e9ea7e0SNamjae Jeon } 12441e9ea7e0SNamjae Jeon /* 12451e9ea7e0SNamjae Jeon * Update the attribute record as well. Note: @rl is the last 12461e9ea7e0SNamjae Jeon * (non-terminator) runlist element of mft bitmap. 12471e9ea7e0SNamjae Jeon */ 12481e9ea7e0SNamjae Jeon mrec = map_mft_record(mft_ni); 12491e9ea7e0SNamjae Jeon if (IS_ERR(mrec)) { 12501e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to map mft record."); 12511e9ea7e0SNamjae Jeon ret = PTR_ERR(mrec); 12521e9ea7e0SNamjae Jeon goto undo_alloc; 12531e9ea7e0SNamjae Jeon } 12541e9ea7e0SNamjae Jeon ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 12551e9ea7e0SNamjae Jeon if (unlikely(!ctx)) { 12561e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to get search context."); 12571e9ea7e0SNamjae Jeon ret = -ENOMEM; 12581e9ea7e0SNamjae Jeon goto undo_alloc; 12591e9ea7e0SNamjae Jeon } 12601e9ea7e0SNamjae Jeon ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 12611e9ea7e0SNamjae Jeon mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL, 12621e9ea7e0SNamjae Jeon 0, ctx); 12631e9ea7e0SNamjae Jeon if (unlikely(ret)) { 1264115380f9SNamjae Jeon ntfs_error(vol->sb, 1265115380f9SNamjae Jeon "Failed to find last attribute extent of mft bitmap attribute."); 12661e9ea7e0SNamjae Jeon if (ret == -ENOENT) 12671e9ea7e0SNamjae Jeon ret = -EIO; 12681e9ea7e0SNamjae Jeon goto undo_alloc; 12691e9ea7e0SNamjae Jeon } 12701e9ea7e0SNamjae Jeon a = ctx->attr; 1271115380f9SNamjae Jeon ll = le64_to_cpu(a->data.non_resident.lowest_vcn); 12721e9ea7e0SNamjae Jeon /* Search back for the previous last allocated cluster of mft bitmap. */ 12731e9ea7e0SNamjae Jeon for (rl2 = rl; rl2 > mftbmp_ni->runlist.rl; rl2--) { 12741e9ea7e0SNamjae Jeon if (ll >= rl2->vcn) 12751e9ea7e0SNamjae Jeon break; 12761e9ea7e0SNamjae Jeon } 1277115380f9SNamjae Jeon WARN_ON(ll < rl2->vcn); 1278115380f9SNamjae Jeon WARN_ON(ll >= rl2->vcn + rl2->length); 12791e9ea7e0SNamjae Jeon /* Get the size for the new mapping pairs array for this extent. */ 1280115380f9SNamjae Jeon mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1, -1); 12811e9ea7e0SNamjae Jeon if (unlikely(mp_size <= 0)) { 1282115380f9SNamjae Jeon ntfs_error(vol->sb, 1283115380f9SNamjae Jeon "Get size for mapping pairs failed for mft bitmap attribute extent."); 12841e9ea7e0SNamjae Jeon ret = mp_size; 12851e9ea7e0SNamjae Jeon if (!ret) 12861e9ea7e0SNamjae Jeon ret = -EIO; 12871e9ea7e0SNamjae Jeon goto undo_alloc; 12881e9ea7e0SNamjae Jeon } 12891e9ea7e0SNamjae Jeon /* Expand the attribute record if necessary. */ 12901e9ea7e0SNamjae Jeon old_alen = le32_to_cpu(a->length); 12911e9ea7e0SNamjae Jeon ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size + 12921e9ea7e0SNamjae Jeon le16_to_cpu(a->data.non_resident.mapping_pairs_offset)); 12931e9ea7e0SNamjae Jeon if (unlikely(ret)) { 1294115380f9SNamjae Jeon ret = ntfs_mft_attr_extend(mftbmp_ni); 1295115380f9SNamjae Jeon if (!ret) 1296115380f9SNamjae Jeon goto extended_ok; 1297115380f9SNamjae Jeon if (ret != -EAGAIN) 1298115380f9SNamjae Jeon status.mp_extended = 1; 12991e9ea7e0SNamjae Jeon goto undo_alloc; 13001e9ea7e0SNamjae Jeon } 13011e9ea7e0SNamjae Jeon status.mp_rebuilt = 1; 13021e9ea7e0SNamjae Jeon /* Generate the mapping pairs array directly into the attr record. */ 13031e9ea7e0SNamjae Jeon ret = ntfs_mapping_pairs_build(vol, (u8 *)a + 13041e9ea7e0SNamjae Jeon le16_to_cpu(a->data.non_resident.mapping_pairs_offset), 1305115380f9SNamjae Jeon mp_size, rl2, ll, -1, NULL, NULL, NULL); 13061e9ea7e0SNamjae Jeon if (unlikely(ret)) { 1307115380f9SNamjae Jeon ntfs_error(vol->sb, 1308115380f9SNamjae Jeon "Failed to build mapping pairs array for mft bitmap attribute."); 13091e9ea7e0SNamjae Jeon goto undo_alloc; 13101e9ea7e0SNamjae Jeon } 13111e9ea7e0SNamjae Jeon /* Update the highest_vcn. */ 1312115380f9SNamjae Jeon a->data.non_resident.highest_vcn = cpu_to_le64(rl[1].vcn - 1); 13131e9ea7e0SNamjae Jeon /* 13141e9ea7e0SNamjae Jeon * We now have extended the mft bitmap allocated_size by one cluster. 1315115380f9SNamjae Jeon * Reflect this in the struct ntfs_inode structure and the attribute record. 13161e9ea7e0SNamjae Jeon */ 13171e9ea7e0SNamjae Jeon if (a->data.non_resident.lowest_vcn) { 13181e9ea7e0SNamjae Jeon /* 13191e9ea7e0SNamjae Jeon * We are not in the first attribute extent, switch to it, but 13201e9ea7e0SNamjae Jeon * first ensure the changes will make it to disk later. 13211e9ea7e0SNamjae Jeon */ 13221e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 1323115380f9SNamjae Jeon extended_ok: 13241e9ea7e0SNamjae Jeon ntfs_attr_reinit_search_ctx(ctx); 13251e9ea7e0SNamjae Jeon ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 13261e9ea7e0SNamjae Jeon mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 13271e9ea7e0SNamjae Jeon 0, ctx); 13281e9ea7e0SNamjae Jeon if (unlikely(ret)) { 1329115380f9SNamjae Jeon ntfs_error(vol->sb, 1330115380f9SNamjae Jeon "Failed to find first attribute extent of mft bitmap attribute."); 13311e9ea7e0SNamjae Jeon goto restore_undo_alloc; 13321e9ea7e0SNamjae Jeon } 13331e9ea7e0SNamjae Jeon a = ctx->attr; 13341e9ea7e0SNamjae Jeon } 1335115380f9SNamjae Jeon 13361e9ea7e0SNamjae Jeon write_lock_irqsave(&mftbmp_ni->size_lock, flags); 13371e9ea7e0SNamjae Jeon mftbmp_ni->allocated_size += vol->cluster_size; 13381e9ea7e0SNamjae Jeon a->data.non_resident.allocated_size = 1339115380f9SNamjae Jeon cpu_to_le64(mftbmp_ni->allocated_size); 13401e9ea7e0SNamjae Jeon write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 13411e9ea7e0SNamjae Jeon /* Ensure the changes make it to disk. */ 13421e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 13431e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 13441e9ea7e0SNamjae Jeon unmap_mft_record(mft_ni); 13451e9ea7e0SNamjae Jeon up_write(&mftbmp_ni->runlist.lock); 13461e9ea7e0SNamjae Jeon ntfs_debug("Done."); 13471e9ea7e0SNamjae Jeon return 0; 1348115380f9SNamjae Jeon 13491e9ea7e0SNamjae Jeon restore_undo_alloc: 13501e9ea7e0SNamjae Jeon ntfs_attr_reinit_search_ctx(ctx); 13511e9ea7e0SNamjae Jeon if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 13521e9ea7e0SNamjae Jeon mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL, 13531e9ea7e0SNamjae Jeon 0, ctx)) { 1354115380f9SNamjae Jeon ntfs_error(vol->sb, 1355115380f9SNamjae Jeon "Failed to find last attribute extent of mft bitmap attribute.%s", es); 13561e9ea7e0SNamjae Jeon write_lock_irqsave(&mftbmp_ni->size_lock, flags); 13571e9ea7e0SNamjae Jeon mftbmp_ni->allocated_size += vol->cluster_size; 13581e9ea7e0SNamjae Jeon write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 13591e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 13601e9ea7e0SNamjae Jeon unmap_mft_record(mft_ni); 13611e9ea7e0SNamjae Jeon up_write(&mftbmp_ni->runlist.lock); 13621e9ea7e0SNamjae Jeon /* 13631e9ea7e0SNamjae Jeon * The only thing that is now wrong is ->allocated_size of the 13641e9ea7e0SNamjae Jeon * base attribute extent which chkdsk should be able to fix. 13651e9ea7e0SNamjae Jeon */ 13661e9ea7e0SNamjae Jeon NVolSetErrors(vol); 13671e9ea7e0SNamjae Jeon return ret; 13681e9ea7e0SNamjae Jeon } 13691e9ea7e0SNamjae Jeon a = ctx->attr; 1370115380f9SNamjae Jeon a->data.non_resident.highest_vcn = cpu_to_le64(rl[1].vcn - 2); 13711e9ea7e0SNamjae Jeon undo_alloc: 13721e9ea7e0SNamjae Jeon if (status.added_cluster) { 13731e9ea7e0SNamjae Jeon /* Truncate the last run in the runlist by one cluster. */ 13741e9ea7e0SNamjae Jeon rl->length--; 13751e9ea7e0SNamjae Jeon rl[1].vcn--; 13761e9ea7e0SNamjae Jeon } else if (status.added_run) { 13771e9ea7e0SNamjae Jeon lcn = rl->lcn; 13781e9ea7e0SNamjae Jeon /* Remove the last run from the runlist. */ 13791e9ea7e0SNamjae Jeon rl->lcn = rl[1].lcn; 13801e9ea7e0SNamjae Jeon rl->length = 0; 1381115380f9SNamjae Jeon mftbmp_ni->runlist.count--; 13821e9ea7e0SNamjae Jeon } 13831e9ea7e0SNamjae Jeon /* Deallocate the cluster. */ 13841e9ea7e0SNamjae Jeon down_write(&vol->lcnbmp_lock); 13851e9ea7e0SNamjae Jeon if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) { 13861e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to free allocated cluster.%s", es); 13871e9ea7e0SNamjae Jeon NVolSetErrors(vol); 1388115380f9SNamjae Jeon } else 1389115380f9SNamjae Jeon ntfs_inc_free_clusters(vol, 1); 13901e9ea7e0SNamjae Jeon up_write(&vol->lcnbmp_lock); 13911e9ea7e0SNamjae Jeon if (status.mp_rebuilt) { 13921e9ea7e0SNamjae Jeon if (ntfs_mapping_pairs_build(vol, (u8 *)a + le16_to_cpu( 13931e9ea7e0SNamjae Jeon a->data.non_resident.mapping_pairs_offset), 13941e9ea7e0SNamjae Jeon old_alen - le16_to_cpu( 13951e9ea7e0SNamjae Jeon a->data.non_resident.mapping_pairs_offset), 1396115380f9SNamjae Jeon rl2, ll, -1, NULL, NULL, NULL)) { 1397115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to restore mapping pairs array.%s", es); 13981e9ea7e0SNamjae Jeon NVolSetErrors(vol); 13991e9ea7e0SNamjae Jeon } 14001e9ea7e0SNamjae Jeon if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) { 1401115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to restore attribute record.%s", es); 14021e9ea7e0SNamjae Jeon NVolSetErrors(vol); 14031e9ea7e0SNamjae Jeon } 14041e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 1405115380f9SNamjae Jeon } else if (status.mp_extended && ntfs_attr_update_mapping_pairs(mftbmp_ni, 0)) { 1406115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to restore mapping pairs.%s", es); 1407115380f9SNamjae Jeon NVolSetErrors(vol); 14081e9ea7e0SNamjae Jeon } 14091e9ea7e0SNamjae Jeon if (ctx) 14101e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 14111e9ea7e0SNamjae Jeon if (!IS_ERR(mrec)) 14121e9ea7e0SNamjae Jeon unmap_mft_record(mft_ni); 14131e9ea7e0SNamjae Jeon up_write(&mftbmp_ni->runlist.lock); 14141e9ea7e0SNamjae Jeon return ret; 14151e9ea7e0SNamjae Jeon } 14161e9ea7e0SNamjae Jeon 1417115380f9SNamjae Jeon /* 14181e9ea7e0SNamjae Jeon * ntfs_mft_bitmap_extend_initialized_nolock - extend mftbmp initialized data 14191e9ea7e0SNamjae Jeon * @vol: volume on which to extend the mft bitmap attribute 14201e9ea7e0SNamjae Jeon * 14211e9ea7e0SNamjae Jeon * Extend the initialized portion of the mft bitmap attribute on the ntfs 14221e9ea7e0SNamjae Jeon * volume @vol by 8 bytes. 14231e9ea7e0SNamjae Jeon * 14241e9ea7e0SNamjae Jeon * Note: Only changes initialized_size and data_size, i.e. requires that 14251e9ea7e0SNamjae Jeon * allocated_size is big enough to fit the new initialized_size. 14261e9ea7e0SNamjae Jeon * 14271e9ea7e0SNamjae Jeon * Return 0 on success and -error on error. 14281e9ea7e0SNamjae Jeon * 14291e9ea7e0SNamjae Jeon * Locking: Caller must hold vol->mftbmp_lock for writing. 14301e9ea7e0SNamjae Jeon */ 1431115380f9SNamjae Jeon static int ntfs_mft_bitmap_extend_initialized_nolock(struct ntfs_volume *vol) 14321e9ea7e0SNamjae Jeon { 14331e9ea7e0SNamjae Jeon s64 old_data_size, old_initialized_size; 14341e9ea7e0SNamjae Jeon unsigned long flags; 14351e9ea7e0SNamjae Jeon struct inode *mftbmp_vi; 1436115380f9SNamjae Jeon struct ntfs_inode *mft_ni, *mftbmp_ni; 1437115380f9SNamjae Jeon struct ntfs_attr_search_ctx *ctx; 1438115380f9SNamjae Jeon struct mft_record *mrec; 1439115380f9SNamjae Jeon struct attr_record *a; 14401e9ea7e0SNamjae Jeon int ret; 14411e9ea7e0SNamjae Jeon 1442e6eb3a05SColin Ian King ntfs_debug("Extending mft bitmap initialized (and data) size."); 14431e9ea7e0SNamjae Jeon mft_ni = NTFS_I(vol->mft_ino); 14441e9ea7e0SNamjae Jeon mftbmp_vi = vol->mftbmp_ino; 14451e9ea7e0SNamjae Jeon mftbmp_ni = NTFS_I(mftbmp_vi); 14461e9ea7e0SNamjae Jeon /* Get the attribute record. */ 14471e9ea7e0SNamjae Jeon mrec = map_mft_record(mft_ni); 14481e9ea7e0SNamjae Jeon if (IS_ERR(mrec)) { 14491e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to map mft record."); 14501e9ea7e0SNamjae Jeon return PTR_ERR(mrec); 14511e9ea7e0SNamjae Jeon } 14521e9ea7e0SNamjae Jeon ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 14531e9ea7e0SNamjae Jeon if (unlikely(!ctx)) { 14541e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to get search context."); 14551e9ea7e0SNamjae Jeon ret = -ENOMEM; 14561e9ea7e0SNamjae Jeon goto unm_err_out; 14571e9ea7e0SNamjae Jeon } 14581e9ea7e0SNamjae Jeon ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 14591e9ea7e0SNamjae Jeon mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx); 14601e9ea7e0SNamjae Jeon if (unlikely(ret)) { 1461115380f9SNamjae Jeon ntfs_error(vol->sb, 1462115380f9SNamjae Jeon "Failed to find first attribute extent of mft bitmap attribute."); 14631e9ea7e0SNamjae Jeon if (ret == -ENOENT) 14641e9ea7e0SNamjae Jeon ret = -EIO; 14651e9ea7e0SNamjae Jeon goto put_err_out; 14661e9ea7e0SNamjae Jeon } 14671e9ea7e0SNamjae Jeon a = ctx->attr; 14681e9ea7e0SNamjae Jeon write_lock_irqsave(&mftbmp_ni->size_lock, flags); 14691e9ea7e0SNamjae Jeon old_data_size = i_size_read(mftbmp_vi); 14701e9ea7e0SNamjae Jeon old_initialized_size = mftbmp_ni->initialized_size; 14711e9ea7e0SNamjae Jeon /* 14721e9ea7e0SNamjae Jeon * We can simply update the initialized_size before filling the space 14731e9ea7e0SNamjae Jeon * with zeroes because the caller is holding the mft bitmap lock for 14741e9ea7e0SNamjae Jeon * writing which ensures that no one else is trying to access the data. 14751e9ea7e0SNamjae Jeon */ 14761e9ea7e0SNamjae Jeon mftbmp_ni->initialized_size += 8; 14771e9ea7e0SNamjae Jeon a->data.non_resident.initialized_size = 1478115380f9SNamjae Jeon cpu_to_le64(mftbmp_ni->initialized_size); 14791e9ea7e0SNamjae Jeon if (mftbmp_ni->initialized_size > old_data_size) { 14801e9ea7e0SNamjae Jeon i_size_write(mftbmp_vi, mftbmp_ni->initialized_size); 14811e9ea7e0SNamjae Jeon a->data.non_resident.data_size = 1482115380f9SNamjae Jeon cpu_to_le64(mftbmp_ni->initialized_size); 14831e9ea7e0SNamjae Jeon } 14841e9ea7e0SNamjae Jeon write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 14851e9ea7e0SNamjae Jeon /* Ensure the changes make it to disk. */ 14861e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 14871e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 14881e9ea7e0SNamjae Jeon unmap_mft_record(mft_ni); 14891e9ea7e0SNamjae Jeon /* Initialize the mft bitmap attribute value with zeroes. */ 14901e9ea7e0SNamjae Jeon ret = ntfs_attr_set(mftbmp_ni, old_initialized_size, 8, 0); 14911e9ea7e0SNamjae Jeon if (likely(!ret)) { 1492115380f9SNamjae Jeon ntfs_debug("Done. (Wrote eight initialized bytes to mft bitmap."); 1493115380f9SNamjae Jeon ntfs_inc_free_mft_records(vol, 8 * 8); 14941e9ea7e0SNamjae Jeon return 0; 14951e9ea7e0SNamjae Jeon } 14961e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to write to mft bitmap."); 14971e9ea7e0SNamjae Jeon /* Try to recover from the error. */ 14981e9ea7e0SNamjae Jeon mrec = map_mft_record(mft_ni); 14991e9ea7e0SNamjae Jeon if (IS_ERR(mrec)) { 15001e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to map mft record.%s", es); 15011e9ea7e0SNamjae Jeon NVolSetErrors(vol); 15021e9ea7e0SNamjae Jeon return ret; 15031e9ea7e0SNamjae Jeon } 15041e9ea7e0SNamjae Jeon ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 15051e9ea7e0SNamjae Jeon if (unlikely(!ctx)) { 15061e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to get search context.%s", es); 15071e9ea7e0SNamjae Jeon NVolSetErrors(vol); 15081e9ea7e0SNamjae Jeon goto unm_err_out; 15091e9ea7e0SNamjae Jeon } 15101e9ea7e0SNamjae Jeon if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 15111e9ea7e0SNamjae Jeon mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx)) { 1512115380f9SNamjae Jeon ntfs_error(vol->sb, 1513115380f9SNamjae Jeon "Failed to find first attribute extent of mft bitmap attribute.%s", es); 15141e9ea7e0SNamjae Jeon NVolSetErrors(vol); 15151e9ea7e0SNamjae Jeon put_err_out: 15161e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 15171e9ea7e0SNamjae Jeon unm_err_out: 15181e9ea7e0SNamjae Jeon unmap_mft_record(mft_ni); 15191e9ea7e0SNamjae Jeon goto err_out; 15201e9ea7e0SNamjae Jeon } 15211e9ea7e0SNamjae Jeon a = ctx->attr; 15221e9ea7e0SNamjae Jeon write_lock_irqsave(&mftbmp_ni->size_lock, flags); 15231e9ea7e0SNamjae Jeon mftbmp_ni->initialized_size = old_initialized_size; 15241e9ea7e0SNamjae Jeon a->data.non_resident.initialized_size = 1525115380f9SNamjae Jeon cpu_to_le64(old_initialized_size); 15261e9ea7e0SNamjae Jeon if (i_size_read(mftbmp_vi) != old_data_size) { 15271e9ea7e0SNamjae Jeon i_size_write(mftbmp_vi, old_data_size); 1528115380f9SNamjae Jeon a->data.non_resident.data_size = cpu_to_le64(old_data_size); 15291e9ea7e0SNamjae Jeon } 15301e9ea7e0SNamjae Jeon write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 15311e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 15321e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 15331e9ea7e0SNamjae Jeon unmap_mft_record(mft_ni); 15341e9ea7e0SNamjae Jeon #ifdef DEBUG 15351e9ea7e0SNamjae Jeon read_lock_irqsave(&mftbmp_ni->size_lock, flags); 1536115380f9SNamjae Jeon ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 1537115380f9SNamjae Jeon mftbmp_ni->allocated_size, i_size_read(mftbmp_vi), 1538115380f9SNamjae Jeon mftbmp_ni->initialized_size); 15391e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 15401e9ea7e0SNamjae Jeon #endif /* DEBUG */ 15411e9ea7e0SNamjae Jeon err_out: 15421e9ea7e0SNamjae Jeon return ret; 15431e9ea7e0SNamjae Jeon } 15441e9ea7e0SNamjae Jeon 1545115380f9SNamjae Jeon /* 15461e9ea7e0SNamjae Jeon * ntfs_mft_data_extend_allocation_nolock - extend mft data attribute 15471e9ea7e0SNamjae Jeon * @vol: volume on which to extend the mft data attribute 15481e9ea7e0SNamjae Jeon * 15491e9ea7e0SNamjae Jeon * Extend the mft data attribute on the ntfs volume @vol by 16 mft records 15501e9ea7e0SNamjae Jeon * worth of clusters or if not enough space for this by one mft record worth 15511e9ea7e0SNamjae Jeon * of clusters. 15521e9ea7e0SNamjae Jeon * 15531e9ea7e0SNamjae Jeon * Note: Only changes allocated_size, i.e. does not touch initialized_size or 15541e9ea7e0SNamjae Jeon * data_size. 15551e9ea7e0SNamjae Jeon * 15561e9ea7e0SNamjae Jeon * Return 0 on success and -errno on error. 15571e9ea7e0SNamjae Jeon * 15581e9ea7e0SNamjae Jeon * Locking: - Caller must hold vol->mftbmp_lock for writing. 15591e9ea7e0SNamjae Jeon * - This function takes NTFS_I(vol->mft_ino)->runlist.lock for 15601e9ea7e0SNamjae Jeon * writing and releases it before returning. 15611e9ea7e0SNamjae Jeon * - This function calls functions which take vol->lcnbmp_lock for 15621e9ea7e0SNamjae Jeon * writing and release it before returning. 15631e9ea7e0SNamjae Jeon */ 1564115380f9SNamjae Jeon static int ntfs_mft_data_extend_allocation_nolock(struct ntfs_volume *vol) 15651e9ea7e0SNamjae Jeon { 1566115380f9SNamjae Jeon s64 lcn; 1567115380f9SNamjae Jeon s64 old_last_vcn; 15681e9ea7e0SNamjae Jeon s64 min_nr, nr, ll; 15691e9ea7e0SNamjae Jeon unsigned long flags; 1570115380f9SNamjae Jeon struct ntfs_inode *mft_ni; 1571115380f9SNamjae Jeon struct runlist_element *rl, *rl2; 1572115380f9SNamjae Jeon struct ntfs_attr_search_ctx *ctx = NULL; 1573115380f9SNamjae Jeon struct mft_record *mrec; 1574115380f9SNamjae Jeon struct attr_record *a = NULL; 15751e9ea7e0SNamjae Jeon int ret, mp_size; 15761e9ea7e0SNamjae Jeon u32 old_alen = 0; 1577115380f9SNamjae Jeon bool mp_rebuilt = false, mp_extended = false; 1578115380f9SNamjae Jeon size_t new_rl_count; 15791e9ea7e0SNamjae Jeon 15801e9ea7e0SNamjae Jeon ntfs_debug("Extending mft data allocation."); 15811e9ea7e0SNamjae Jeon mft_ni = NTFS_I(vol->mft_ino); 15821e9ea7e0SNamjae Jeon /* 15831e9ea7e0SNamjae Jeon * Determine the preferred allocation location, i.e. the last lcn of 15841e9ea7e0SNamjae Jeon * the mft data attribute. The allocated size of the mft data 15851e9ea7e0SNamjae Jeon * attribute cannot be zero so we are ok to do this. 15861e9ea7e0SNamjae Jeon */ 15871e9ea7e0SNamjae Jeon down_write(&mft_ni->runlist.lock); 15881e9ea7e0SNamjae Jeon read_lock_irqsave(&mft_ni->size_lock, flags); 15891e9ea7e0SNamjae Jeon ll = mft_ni->allocated_size; 15901e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mft_ni->size_lock, flags); 15911e9ea7e0SNamjae Jeon rl = ntfs_attr_find_vcn_nolock(mft_ni, 1592115380f9SNamjae Jeon NTFS_B_TO_CLU(vol, ll - 1), NULL); 15931e9ea7e0SNamjae Jeon if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) { 15941e9ea7e0SNamjae Jeon up_write(&mft_ni->runlist.lock); 1595115380f9SNamjae Jeon ntfs_error(vol->sb, 1596115380f9SNamjae Jeon "Failed to determine last allocated cluster of mft data attribute."); 15971e9ea7e0SNamjae Jeon if (!IS_ERR(rl)) 15981e9ea7e0SNamjae Jeon ret = -EIO; 15991e9ea7e0SNamjae Jeon else 16001e9ea7e0SNamjae Jeon ret = PTR_ERR(rl); 16011e9ea7e0SNamjae Jeon return ret; 16021e9ea7e0SNamjae Jeon } 16031e9ea7e0SNamjae Jeon lcn = rl->lcn + rl->length; 1604115380f9SNamjae Jeon ntfs_debug("Last lcn of mft data attribute is 0x%llx.", lcn); 16051e9ea7e0SNamjae Jeon /* Minimum allocation is one mft record worth of clusters. */ 1606115380f9SNamjae Jeon min_nr = NTFS_B_TO_CLU(vol, vol->mft_record_size); 16071e9ea7e0SNamjae Jeon if (!min_nr) 16081e9ea7e0SNamjae Jeon min_nr = 1; 16091e9ea7e0SNamjae Jeon /* Want to allocate 16 mft records worth of clusters. */ 16101e9ea7e0SNamjae Jeon nr = vol->mft_record_size << 4 >> vol->cluster_size_bits; 16111e9ea7e0SNamjae Jeon if (!nr) 16121e9ea7e0SNamjae Jeon nr = min_nr; 16131e9ea7e0SNamjae Jeon /* Ensure we do not go above 2^32-1 mft records. */ 16141e9ea7e0SNamjae Jeon read_lock_irqsave(&mft_ni->size_lock, flags); 16151e9ea7e0SNamjae Jeon ll = mft_ni->allocated_size; 16161e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mft_ni->size_lock, flags); 1617115380f9SNamjae Jeon if (unlikely((ll + NTFS_CLU_TO_B(vol, nr)) >> 16181e9ea7e0SNamjae Jeon vol->mft_record_size_bits >= (1ll << 32))) { 16191e9ea7e0SNamjae Jeon nr = min_nr; 1620115380f9SNamjae Jeon if (unlikely((ll + NTFS_CLU_TO_B(vol, nr)) >> 16211e9ea7e0SNamjae Jeon vol->mft_record_size_bits >= (1ll << 32))) { 1622115380f9SNamjae Jeon ntfs_warning(vol->sb, 1623115380f9SNamjae Jeon "Cannot allocate mft record because the maximum number of inodes (2^32) has already been reached."); 16241e9ea7e0SNamjae Jeon up_write(&mft_ni->runlist.lock); 16251e9ea7e0SNamjae Jeon return -ENOSPC; 16261e9ea7e0SNamjae Jeon } 16271e9ea7e0SNamjae Jeon } 16281e9ea7e0SNamjae Jeon ntfs_debug("Trying mft data allocation with %s cluster count %lli.", 16291e9ea7e0SNamjae Jeon nr > min_nr ? "default" : "minimal", (long long)nr); 16301e9ea7e0SNamjae Jeon old_last_vcn = rl[1].vcn; 1631115380f9SNamjae Jeon /* 1632115380f9SNamjae Jeon * We can release the mft_ni runlist lock, Because this function is 1633115380f9SNamjae Jeon * the only one that expends $MFT data attribute and is called with 1634115380f9SNamjae Jeon * mft_ni->mrec_lock. 1635115380f9SNamjae Jeon * This is required for the lock order, vol->lcnbmp_lock => 1636115380f9SNamjae Jeon * mft_ni->runlist.lock. 1637115380f9SNamjae Jeon */ 1638115380f9SNamjae Jeon up_write(&mft_ni->runlist.lock); 1639115380f9SNamjae Jeon 16401e9ea7e0SNamjae Jeon do { 16411e9ea7e0SNamjae Jeon rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE, 1642115380f9SNamjae Jeon true, false, false); 16431e9ea7e0SNamjae Jeon if (!IS_ERR(rl2)) 16441e9ea7e0SNamjae Jeon break; 16451e9ea7e0SNamjae Jeon if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) { 1646115380f9SNamjae Jeon ntfs_error(vol->sb, 1647115380f9SNamjae Jeon "Failed to allocate the minimal number of clusters (%lli) for the mft data attribute.", 1648115380f9SNamjae Jeon nr); 16491e9ea7e0SNamjae Jeon return PTR_ERR(rl2); 16501e9ea7e0SNamjae Jeon } 16511e9ea7e0SNamjae Jeon /* 16521e9ea7e0SNamjae Jeon * There is not enough space to do the allocation, but there 16531e9ea7e0SNamjae Jeon * might be enough space to do a minimal allocation so try that 16541e9ea7e0SNamjae Jeon * before failing. 16551e9ea7e0SNamjae Jeon */ 16561e9ea7e0SNamjae Jeon nr = min_nr; 1657115380f9SNamjae Jeon ntfs_debug("Retrying mft data allocation with minimal cluster count %lli.", nr); 16581e9ea7e0SNamjae Jeon } while (1); 1659115380f9SNamjae Jeon 1660115380f9SNamjae Jeon down_write(&mft_ni->runlist.lock); 1661115380f9SNamjae Jeon rl = ntfs_runlists_merge(&mft_ni->runlist, rl2, 0, &new_rl_count); 16621e9ea7e0SNamjae Jeon if (IS_ERR(rl)) { 16631e9ea7e0SNamjae Jeon up_write(&mft_ni->runlist.lock); 1664115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to merge runlists for mft data attribute."); 16651e9ea7e0SNamjae Jeon if (ntfs_cluster_free_from_rl(vol, rl2)) { 1666115380f9SNamjae Jeon ntfs_error(vol->sb, 1667115380f9SNamjae Jeon "Failed to deallocate clusters from the mft data attribute.%s", es); 16681e9ea7e0SNamjae Jeon NVolSetErrors(vol); 16691e9ea7e0SNamjae Jeon } 1670115380f9SNamjae Jeon kvfree(rl2); 16711e9ea7e0SNamjae Jeon return PTR_ERR(rl); 16721e9ea7e0SNamjae Jeon } 16731e9ea7e0SNamjae Jeon mft_ni->runlist.rl = rl; 1674115380f9SNamjae Jeon mft_ni->runlist.count = new_rl_count; 16751e9ea7e0SNamjae Jeon ntfs_debug("Allocated %lli clusters.", (long long)nr); 16761e9ea7e0SNamjae Jeon /* Find the last run in the new runlist. */ 16771e9ea7e0SNamjae Jeon for (; rl[1].length; rl++) 16781e9ea7e0SNamjae Jeon ; 1679115380f9SNamjae Jeon up_write(&mft_ni->runlist.lock); 1680115380f9SNamjae Jeon 16811e9ea7e0SNamjae Jeon /* Update the attribute record as well. */ 16821e9ea7e0SNamjae Jeon mrec = map_mft_record(mft_ni); 16831e9ea7e0SNamjae Jeon if (IS_ERR(mrec)) { 16841e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to map mft record."); 16851e9ea7e0SNamjae Jeon ret = PTR_ERR(mrec); 1686115380f9SNamjae Jeon down_write(&mft_ni->runlist.lock); 16871e9ea7e0SNamjae Jeon goto undo_alloc; 16881e9ea7e0SNamjae Jeon } 16891e9ea7e0SNamjae Jeon ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 16901e9ea7e0SNamjae Jeon if (unlikely(!ctx)) { 16911e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to get search context."); 16921e9ea7e0SNamjae Jeon ret = -ENOMEM; 16931e9ea7e0SNamjae Jeon goto undo_alloc; 16941e9ea7e0SNamjae Jeon } 16951e9ea7e0SNamjae Jeon ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 16961e9ea7e0SNamjae Jeon CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx); 16971e9ea7e0SNamjae Jeon if (unlikely(ret)) { 1698115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to find last attribute extent of mft data attribute."); 16991e9ea7e0SNamjae Jeon if (ret == -ENOENT) 17001e9ea7e0SNamjae Jeon ret = -EIO; 17011e9ea7e0SNamjae Jeon goto undo_alloc; 17021e9ea7e0SNamjae Jeon } 17031e9ea7e0SNamjae Jeon a = ctx->attr; 1704115380f9SNamjae Jeon ll = le64_to_cpu(a->data.non_resident.lowest_vcn); 1705115380f9SNamjae Jeon 1706115380f9SNamjae Jeon down_write(&mft_ni->runlist.lock); 17071e9ea7e0SNamjae Jeon /* Search back for the previous last allocated cluster of mft bitmap. */ 17081e9ea7e0SNamjae Jeon for (rl2 = rl; rl2 > mft_ni->runlist.rl; rl2--) { 17091e9ea7e0SNamjae Jeon if (ll >= rl2->vcn) 17101e9ea7e0SNamjae Jeon break; 17111e9ea7e0SNamjae Jeon } 1712115380f9SNamjae Jeon WARN_ON(ll < rl2->vcn); 1713115380f9SNamjae Jeon WARN_ON(ll >= rl2->vcn + rl2->length); 17141e9ea7e0SNamjae Jeon /* Get the size for the new mapping pairs array for this extent. */ 1715115380f9SNamjae Jeon mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1, -1); 17161e9ea7e0SNamjae Jeon if (unlikely(mp_size <= 0)) { 1717115380f9SNamjae Jeon ntfs_error(vol->sb, 1718115380f9SNamjae Jeon "Get size for mapping pairs failed for mft data attribute extent."); 17191e9ea7e0SNamjae Jeon ret = mp_size; 17201e9ea7e0SNamjae Jeon if (!ret) 17211e9ea7e0SNamjae Jeon ret = -EIO; 1722115380f9SNamjae Jeon up_write(&mft_ni->runlist.lock); 17231e9ea7e0SNamjae Jeon goto undo_alloc; 17241e9ea7e0SNamjae Jeon } 1725115380f9SNamjae Jeon up_write(&mft_ni->runlist.lock); 1726115380f9SNamjae Jeon 17271e9ea7e0SNamjae Jeon /* Expand the attribute record if necessary. */ 17281e9ea7e0SNamjae Jeon old_alen = le32_to_cpu(a->length); 17291e9ea7e0SNamjae Jeon ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size + 17301e9ea7e0SNamjae Jeon le16_to_cpu(a->data.non_resident.mapping_pairs_offset)); 17311e9ea7e0SNamjae Jeon if (unlikely(ret)) { 1732115380f9SNamjae Jeon ret = ntfs_mft_attr_extend(mft_ni); 1733115380f9SNamjae Jeon if (!ret) 1734115380f9SNamjae Jeon goto extended_ok; 1735115380f9SNamjae Jeon if (ret != -EAGAIN) 1736115380f9SNamjae Jeon mp_extended = true; 17371e9ea7e0SNamjae Jeon goto undo_alloc; 17381e9ea7e0SNamjae Jeon } 17391e9ea7e0SNamjae Jeon mp_rebuilt = true; 17401e9ea7e0SNamjae Jeon /* Generate the mapping pairs array directly into the attr record. */ 17411e9ea7e0SNamjae Jeon ret = ntfs_mapping_pairs_build(vol, (u8 *)a + 17421e9ea7e0SNamjae Jeon le16_to_cpu(a->data.non_resident.mapping_pairs_offset), 1743115380f9SNamjae Jeon mp_size, rl2, ll, -1, NULL, NULL, NULL); 17441e9ea7e0SNamjae Jeon if (unlikely(ret)) { 1745115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to build mapping pairs array of mft data attribute."); 17461e9ea7e0SNamjae Jeon goto undo_alloc; 17471e9ea7e0SNamjae Jeon } 17481e9ea7e0SNamjae Jeon /* Update the highest_vcn. */ 1749115380f9SNamjae Jeon a->data.non_resident.highest_vcn = cpu_to_le64(rl[1].vcn - 1); 17501e9ea7e0SNamjae Jeon /* 17511e9ea7e0SNamjae Jeon * We now have extended the mft data allocated_size by nr clusters. 1752115380f9SNamjae Jeon * Reflect this in the struct ntfs_inode structure and the attribute record. 17531e9ea7e0SNamjae Jeon * @rl is the last (non-terminator) runlist element of mft data 17541e9ea7e0SNamjae Jeon * attribute. 17551e9ea7e0SNamjae Jeon */ 17561e9ea7e0SNamjae Jeon if (a->data.non_resident.lowest_vcn) { 17571e9ea7e0SNamjae Jeon /* 17581e9ea7e0SNamjae Jeon * We are not in the first attribute extent, switch to it, but 17591e9ea7e0SNamjae Jeon * first ensure the changes will make it to disk later. 17601e9ea7e0SNamjae Jeon */ 17611e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 1762115380f9SNamjae Jeon extended_ok: 17631e9ea7e0SNamjae Jeon ntfs_attr_reinit_search_ctx(ctx); 17641e9ea7e0SNamjae Jeon ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name, 17651e9ea7e0SNamjae Jeon mft_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, 17661e9ea7e0SNamjae Jeon ctx); 17671e9ea7e0SNamjae Jeon if (unlikely(ret)) { 1768115380f9SNamjae Jeon ntfs_error(vol->sb, 1769115380f9SNamjae Jeon "Failed to find first attribute extent of mft data attribute."); 17701e9ea7e0SNamjae Jeon goto restore_undo_alloc; 17711e9ea7e0SNamjae Jeon } 17721e9ea7e0SNamjae Jeon a = ctx->attr; 17731e9ea7e0SNamjae Jeon } 1774115380f9SNamjae Jeon 17751e9ea7e0SNamjae Jeon write_lock_irqsave(&mft_ni->size_lock, flags); 1776115380f9SNamjae Jeon mft_ni->allocated_size += NTFS_CLU_TO_B(vol, nr); 17771e9ea7e0SNamjae Jeon a->data.non_resident.allocated_size = 1778115380f9SNamjae Jeon cpu_to_le64(mft_ni->allocated_size); 17791e9ea7e0SNamjae Jeon write_unlock_irqrestore(&mft_ni->size_lock, flags); 17801e9ea7e0SNamjae Jeon /* Ensure the changes make it to disk. */ 17811e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 17821e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 17831e9ea7e0SNamjae Jeon unmap_mft_record(mft_ni); 17841e9ea7e0SNamjae Jeon ntfs_debug("Done."); 17851e9ea7e0SNamjae Jeon return 0; 17861e9ea7e0SNamjae Jeon restore_undo_alloc: 17871e9ea7e0SNamjae Jeon ntfs_attr_reinit_search_ctx(ctx); 17881e9ea7e0SNamjae Jeon if (ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 17891e9ea7e0SNamjae Jeon CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) { 1790115380f9SNamjae Jeon ntfs_error(vol->sb, 1791115380f9SNamjae Jeon "Failed to find last attribute extent of mft data attribute.%s", es); 17921e9ea7e0SNamjae Jeon write_lock_irqsave(&mft_ni->size_lock, flags); 1793115380f9SNamjae Jeon mft_ni->allocated_size += NTFS_CLU_TO_B(vol, nr); 17941e9ea7e0SNamjae Jeon write_unlock_irqrestore(&mft_ni->size_lock, flags); 17951e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 17961e9ea7e0SNamjae Jeon unmap_mft_record(mft_ni); 17971e9ea7e0SNamjae Jeon up_write(&mft_ni->runlist.lock); 17981e9ea7e0SNamjae Jeon /* 17991e9ea7e0SNamjae Jeon * The only thing that is now wrong is ->allocated_size of the 18001e9ea7e0SNamjae Jeon * base attribute extent which chkdsk should be able to fix. 18011e9ea7e0SNamjae Jeon */ 18021e9ea7e0SNamjae Jeon NVolSetErrors(vol); 18031e9ea7e0SNamjae Jeon return ret; 18041e9ea7e0SNamjae Jeon } 18051e9ea7e0SNamjae Jeon ctx->attr->data.non_resident.highest_vcn = 1806115380f9SNamjae Jeon cpu_to_le64(old_last_vcn - 1); 18071e9ea7e0SNamjae Jeon undo_alloc: 18081e9ea7e0SNamjae Jeon if (ntfs_cluster_free(mft_ni, old_last_vcn, -1, ctx) < 0) { 1809115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to free clusters from mft data attribute.%s", es); 18101e9ea7e0SNamjae Jeon NVolSetErrors(vol); 18111e9ea7e0SNamjae Jeon } 18121e9ea7e0SNamjae Jeon 18131e9ea7e0SNamjae Jeon if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) { 1814115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to truncate mft data attribute runlist.%s", es); 1815115380f9SNamjae Jeon NVolSetErrors(vol); 1816115380f9SNamjae Jeon } 1817115380f9SNamjae Jeon if (mp_extended && ntfs_attr_update_mapping_pairs(mft_ni, 0)) { 1818115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to restore mapping pairs.%s", 1819115380f9SNamjae Jeon es); 18201e9ea7e0SNamjae Jeon NVolSetErrors(vol); 18211e9ea7e0SNamjae Jeon } 18221e9ea7e0SNamjae Jeon if (ctx) { 18231e9ea7e0SNamjae Jeon a = ctx->attr; 18241e9ea7e0SNamjae Jeon if (mp_rebuilt && !IS_ERR(ctx->mrec)) { 18251e9ea7e0SNamjae Jeon if (ntfs_mapping_pairs_build(vol, (u8 *)a + le16_to_cpu( 18261e9ea7e0SNamjae Jeon a->data.non_resident.mapping_pairs_offset), 18271e9ea7e0SNamjae Jeon old_alen - le16_to_cpu( 18281e9ea7e0SNamjae Jeon a->data.non_resident.mapping_pairs_offset), 1829115380f9SNamjae Jeon rl2, ll, -1, NULL, NULL, NULL)) { 1830115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to restore mapping pairs array.%s", es); 18311e9ea7e0SNamjae Jeon NVolSetErrors(vol); 18321e9ea7e0SNamjae Jeon } 18331e9ea7e0SNamjae Jeon if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) { 1834115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to restore attribute record.%s", es); 18351e9ea7e0SNamjae Jeon NVolSetErrors(vol); 18361e9ea7e0SNamjae Jeon } 18371e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 18381e9ea7e0SNamjae Jeon } else if (IS_ERR(ctx->mrec)) { 1839115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to restore attribute search context.%s", es); 18401e9ea7e0SNamjae Jeon NVolSetErrors(vol); 18411e9ea7e0SNamjae Jeon } 18421e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 18431e9ea7e0SNamjae Jeon } 18441e9ea7e0SNamjae Jeon if (!IS_ERR(mrec)) 18451e9ea7e0SNamjae Jeon unmap_mft_record(mft_ni); 18461e9ea7e0SNamjae Jeon return ret; 18471e9ea7e0SNamjae Jeon } 18481e9ea7e0SNamjae Jeon 1849115380f9SNamjae Jeon /* 18501e9ea7e0SNamjae Jeon * ntfs_mft_record_layout - layout an mft record into a memory buffer 18511e9ea7e0SNamjae Jeon * @vol: volume to which the mft record will belong 18521e9ea7e0SNamjae Jeon * @mft_no: mft reference specifying the mft record number 18531e9ea7e0SNamjae Jeon * @m: destination buffer of size >= @vol->mft_record_size bytes 18541e9ea7e0SNamjae Jeon * 18551e9ea7e0SNamjae Jeon * Layout an empty, unused mft record with the mft record number @mft_no into 18561e9ea7e0SNamjae Jeon * the buffer @m. The volume @vol is needed because the mft record structure 18571e9ea7e0SNamjae Jeon * was modified in NTFS 3.1 so we need to know which volume version this mft 18581e9ea7e0SNamjae Jeon * record will be used on. 18591e9ea7e0SNamjae Jeon * 18601e9ea7e0SNamjae Jeon * Return 0 on success and -errno on error. 18611e9ea7e0SNamjae Jeon */ 1862115380f9SNamjae Jeon static int ntfs_mft_record_layout(const struct ntfs_volume *vol, const s64 mft_no, 1863115380f9SNamjae Jeon struct mft_record *m) 18641e9ea7e0SNamjae Jeon { 1865115380f9SNamjae Jeon struct attr_record *a; 18661e9ea7e0SNamjae Jeon 18671e9ea7e0SNamjae Jeon ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no); 18681e9ea7e0SNamjae Jeon if (mft_no >= (1ll << 32)) { 1869115380f9SNamjae Jeon ntfs_error(vol->sb, "Mft record number 0x%llx exceeds maximum of 2^32.", 1870115380f9SNamjae Jeon (long long)mft_no); 18711e9ea7e0SNamjae Jeon return -ERANGE; 18721e9ea7e0SNamjae Jeon } 18731e9ea7e0SNamjae Jeon /* Start by clearing the whole mft record to gives us a clean slate. */ 18741e9ea7e0SNamjae Jeon memset(m, 0, vol->mft_record_size); 18751e9ea7e0SNamjae Jeon /* Aligned to 2-byte boundary. */ 18761e9ea7e0SNamjae Jeon if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver)) 1877115380f9SNamjae Jeon m->usa_ofs = cpu_to_le16((sizeof(struct mft_record_old) + 1) & ~1); 18781e9ea7e0SNamjae Jeon else { 1879115380f9SNamjae Jeon m->usa_ofs = cpu_to_le16((sizeof(struct mft_record) + 1) & ~1); 18801e9ea7e0SNamjae Jeon /* 18811e9ea7e0SNamjae Jeon * Set the NTFS 3.1+ specific fields while we know that the 18821e9ea7e0SNamjae Jeon * volume version is 3.1+. 18831e9ea7e0SNamjae Jeon */ 18841e9ea7e0SNamjae Jeon m->reserved = 0; 18851e9ea7e0SNamjae Jeon m->mft_record_number = cpu_to_le32((u32)mft_no); 18861e9ea7e0SNamjae Jeon } 18871e9ea7e0SNamjae Jeon m->magic = magic_FILE; 18881e9ea7e0SNamjae Jeon if (vol->mft_record_size >= NTFS_BLOCK_SIZE) 18891e9ea7e0SNamjae Jeon m->usa_count = cpu_to_le16(vol->mft_record_size / 18901e9ea7e0SNamjae Jeon NTFS_BLOCK_SIZE + 1); 18911e9ea7e0SNamjae Jeon else { 18921e9ea7e0SNamjae Jeon m->usa_count = cpu_to_le16(1); 1893115380f9SNamjae Jeon ntfs_warning(vol->sb, 1894115380f9SNamjae Jeon "Sector size is bigger than mft record size. Setting usa_count to 1. If chkdsk reports this as corruption"); 18951e9ea7e0SNamjae Jeon } 18961e9ea7e0SNamjae Jeon /* Set the update sequence number to 1. */ 1897115380f9SNamjae Jeon *(__le16 *)((u8 *)m + le16_to_cpu(m->usa_ofs)) = cpu_to_le16(1); 18981e9ea7e0SNamjae Jeon m->lsn = 0; 18991e9ea7e0SNamjae Jeon m->sequence_number = cpu_to_le16(1); 19001e9ea7e0SNamjae Jeon m->link_count = 0; 19011e9ea7e0SNamjae Jeon /* 19021e9ea7e0SNamjae Jeon * Place the attributes straight after the update sequence array, 19031e9ea7e0SNamjae Jeon * aligned to 8-byte boundary. 19041e9ea7e0SNamjae Jeon */ 19051e9ea7e0SNamjae Jeon m->attrs_offset = cpu_to_le16((le16_to_cpu(m->usa_ofs) + 19061e9ea7e0SNamjae Jeon (le16_to_cpu(m->usa_count) << 1) + 7) & ~7); 19071e9ea7e0SNamjae Jeon m->flags = 0; 19081e9ea7e0SNamjae Jeon /* 19091e9ea7e0SNamjae Jeon * Using attrs_offset plus eight bytes (for the termination attribute). 19101e9ea7e0SNamjae Jeon * attrs_offset is already aligned to 8-byte boundary, so no need to 19111e9ea7e0SNamjae Jeon * align again. 19121e9ea7e0SNamjae Jeon */ 19131e9ea7e0SNamjae Jeon m->bytes_in_use = cpu_to_le32(le16_to_cpu(m->attrs_offset) + 8); 19141e9ea7e0SNamjae Jeon m->bytes_allocated = cpu_to_le32(vol->mft_record_size); 19151e9ea7e0SNamjae Jeon m->base_mft_record = 0; 19161e9ea7e0SNamjae Jeon m->next_attr_instance = 0; 19171e9ea7e0SNamjae Jeon /* Add the termination attribute. */ 1918115380f9SNamjae Jeon a = (struct attr_record *)((u8 *)m + le16_to_cpu(m->attrs_offset)); 19191e9ea7e0SNamjae Jeon a->type = AT_END; 19201e9ea7e0SNamjae Jeon a->length = 0; 19211e9ea7e0SNamjae Jeon ntfs_debug("Done."); 19221e9ea7e0SNamjae Jeon return 0; 19231e9ea7e0SNamjae Jeon } 19241e9ea7e0SNamjae Jeon 1925115380f9SNamjae Jeon /* 19261e9ea7e0SNamjae Jeon * ntfs_mft_record_format - format an mft record on an ntfs volume 19271e9ea7e0SNamjae Jeon * @vol: volume on which to format the mft record 19281e9ea7e0SNamjae Jeon * @mft_no: mft record number to format 19291e9ea7e0SNamjae Jeon * 19301e9ea7e0SNamjae Jeon * Format the mft record @mft_no in $MFT/$DATA, i.e. lay out an empty, unused 19311e9ea7e0SNamjae Jeon * mft record into the appropriate place of the mft data attribute. This is 19321e9ea7e0SNamjae Jeon * used when extending the mft data attribute. 19331e9ea7e0SNamjae Jeon * 19341e9ea7e0SNamjae Jeon * Return 0 on success and -errno on error. 19351e9ea7e0SNamjae Jeon */ 1936115380f9SNamjae Jeon static int ntfs_mft_record_format(const struct ntfs_volume *vol, const s64 mft_no) 19371e9ea7e0SNamjae Jeon { 19381e9ea7e0SNamjae Jeon loff_t i_size; 19391e9ea7e0SNamjae Jeon struct inode *mft_vi = vol->mft_ino; 1940115380f9SNamjae Jeon struct folio *folio; 1941115380f9SNamjae Jeon struct mft_record *m; 19421e9ea7e0SNamjae Jeon pgoff_t index, end_index; 19431e9ea7e0SNamjae Jeon unsigned int ofs; 19441e9ea7e0SNamjae Jeon int err; 19451e9ea7e0SNamjae Jeon 19461e9ea7e0SNamjae Jeon ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no); 19471e9ea7e0SNamjae Jeon /* 19481e9ea7e0SNamjae Jeon * The index into the page cache and the offset within the page cache 19491e9ea7e0SNamjae Jeon * page of the wanted mft record. 19501e9ea7e0SNamjae Jeon */ 1951115380f9SNamjae Jeon index = NTFS_MFT_NR_TO_PIDX(vol, mft_no); 1952115380f9SNamjae Jeon ofs = NTFS_MFT_NR_TO_POFS(vol, mft_no); 19531e9ea7e0SNamjae Jeon /* The maximum valid index into the page cache for $MFT's data. */ 19541e9ea7e0SNamjae Jeon i_size = i_size_read(mft_vi); 19551e9ea7e0SNamjae Jeon end_index = i_size >> PAGE_SHIFT; 19561e9ea7e0SNamjae Jeon if (unlikely(index >= end_index)) { 1957115380f9SNamjae Jeon if (unlikely(index > end_index || 1958115380f9SNamjae Jeon ofs + vol->mft_record_size > (i_size & ~PAGE_MASK))) { 1959115380f9SNamjae Jeon ntfs_error(vol->sb, "Tried to format non-existing mft record 0x%llx.", 1960115380f9SNamjae Jeon (long long)mft_no); 19611e9ea7e0SNamjae Jeon return -ENOENT; 19621e9ea7e0SNamjae Jeon } 19631e9ea7e0SNamjae Jeon } 1964115380f9SNamjae Jeon 1965115380f9SNamjae Jeon /* Read, map, and pin the folio containing the mft record. */ 1966115380f9SNamjae Jeon folio = read_mapping_folio(mft_vi->i_mapping, index, NULL); 1967115380f9SNamjae Jeon if (IS_ERR(folio)) { 1968115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to map page containing mft record to format 0x%llx.", 1969115380f9SNamjae Jeon (long long)mft_no); 1970115380f9SNamjae Jeon return PTR_ERR(folio); 19711e9ea7e0SNamjae Jeon } 1972115380f9SNamjae Jeon folio_lock(folio); 1973115380f9SNamjae Jeon folio_clear_uptodate(folio); 1974115380f9SNamjae Jeon m = (struct mft_record *)((u8 *)kmap_local_folio(folio, 0) + ofs); 19751e9ea7e0SNamjae Jeon err = ntfs_mft_record_layout(vol, mft_no, m); 19761e9ea7e0SNamjae Jeon if (unlikely(err)) { 19771e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to layout mft record 0x%llx.", 19781e9ea7e0SNamjae Jeon (long long)mft_no); 1979115380f9SNamjae Jeon folio_mark_uptodate(folio); 1980115380f9SNamjae Jeon folio_unlock(folio); 1981115380f9SNamjae Jeon kunmap_local(m); 1982115380f9SNamjae Jeon folio_put(folio); 19831e9ea7e0SNamjae Jeon return err; 19841e9ea7e0SNamjae Jeon } 1985115380f9SNamjae Jeon pre_write_mst_fixup((struct ntfs_record *)m, vol->mft_record_size); 1986115380f9SNamjae Jeon folio_mark_uptodate(folio); 19871e9ea7e0SNamjae Jeon /* 19881e9ea7e0SNamjae Jeon * Make sure the mft record is written out to disk. We could use 19891e9ea7e0SNamjae Jeon * ilookup5() to check if an inode is in icache and so on but this is 19901e9ea7e0SNamjae Jeon * unnecessary as ntfs_writepage() will write the dirty record anyway. 19911e9ea7e0SNamjae Jeon */ 1992115380f9SNamjae Jeon ntfs_mft_mark_dirty(folio); 1993115380f9SNamjae Jeon folio_unlock(folio); 1994115380f9SNamjae Jeon kunmap_local(m); 1995115380f9SNamjae Jeon folio_put(folio); 19961e9ea7e0SNamjae Jeon ntfs_debug("Done."); 19971e9ea7e0SNamjae Jeon return 0; 19981e9ea7e0SNamjae Jeon } 19991e9ea7e0SNamjae Jeon 2000115380f9SNamjae Jeon /* 20011e9ea7e0SNamjae Jeon * ntfs_mft_record_alloc - allocate an mft record on an ntfs volume 20021e9ea7e0SNamjae Jeon * @vol: [IN] volume on which to allocate the mft record 20031e9ea7e0SNamjae Jeon * @mode: [IN] mode if want a file or directory, i.e. base inode or 0 2004115380f9SNamjae Jeon * @ni: [OUT] on success, set to the allocated ntfs inode 20051e9ea7e0SNamjae Jeon * @base_ni: [IN] open base inode if allocating an extent mft record or NULL 2006115380f9SNamjae Jeon * @ni_mrec: [OUT] on successful return this is the mapped mft record 20071e9ea7e0SNamjae Jeon * 20081e9ea7e0SNamjae Jeon * Allocate an mft record in $MFT/$DATA of an open ntfs volume @vol. 20091e9ea7e0SNamjae Jeon * 20101e9ea7e0SNamjae Jeon * If @base_ni is NULL make the mft record a base mft record, i.e. a file or 20111e9ea7e0SNamjae Jeon * direvctory inode, and allocate it at the default allocator position. In 20121e9ea7e0SNamjae Jeon * this case @mode is the file mode as given to us by the caller. We in 20131e9ea7e0SNamjae Jeon * particular use @mode to distinguish whether a file or a directory is being 20141e9ea7e0SNamjae Jeon * created (S_IFDIR(mode) and S_IFREG(mode), respectively). 20151e9ea7e0SNamjae Jeon * 20161e9ea7e0SNamjae Jeon * If @base_ni is not NULL make the allocated mft record an extent record, 20171e9ea7e0SNamjae Jeon * allocate it starting at the mft record after the base mft record and attach 20181e9ea7e0SNamjae Jeon * the allocated and opened ntfs inode to the base inode @base_ni. In this 20191e9ea7e0SNamjae Jeon * case @mode must be 0 as it is meaningless for extent inodes. 20201e9ea7e0SNamjae Jeon * 20211e9ea7e0SNamjae Jeon * You need to check the return value with IS_ERR(). If false, the function 20221e9ea7e0SNamjae Jeon * was successful and the return value is the now opened ntfs inode of the 20231e9ea7e0SNamjae Jeon * allocated mft record. *@mrec is then set to the allocated, mapped, pinned, 20241e9ea7e0SNamjae Jeon * and locked mft record. If IS_ERR() is true, the function failed and the 20251e9ea7e0SNamjae Jeon * error code is obtained from PTR_ERR(return value). *@mrec is undefined in 20261e9ea7e0SNamjae Jeon * this case. 20271e9ea7e0SNamjae Jeon * 20281e9ea7e0SNamjae Jeon * Allocation strategy: 20291e9ea7e0SNamjae Jeon * 20301e9ea7e0SNamjae Jeon * To find a free mft record, we scan the mft bitmap for a zero bit. To 20311e9ea7e0SNamjae Jeon * optimize this we start scanning at the place specified by @base_ni or if 20321e9ea7e0SNamjae Jeon * @base_ni is NULL we start where we last stopped and we perform wrap around 20331e9ea7e0SNamjae Jeon * when we reach the end. Note, we do not try to allocate mft records below 2034115380f9SNamjae Jeon * number 64 because numbers 0 to 15 are the defined system files anyway and 16 2035115380f9SNamjae Jeon * to 64 are special in that they are used for storing extension mft records 20361e9ea7e0SNamjae Jeon * for the $DATA attribute of $MFT. This is required to avoid the possibility 20371e9ea7e0SNamjae Jeon * of creating a runlist with a circular dependency which once written to disk 20381e9ea7e0SNamjae Jeon * can never be read in again. Windows will only use records 16 to 24 for 20391e9ea7e0SNamjae Jeon * normal files if the volume is completely out of space. We never use them 20401e9ea7e0SNamjae Jeon * which means that when the volume is really out of space we cannot create any 20411e9ea7e0SNamjae Jeon * more files while Windows can still create up to 8 small files. We can start 20421e9ea7e0SNamjae Jeon * doing this at some later time, it does not matter much for now. 20431e9ea7e0SNamjae Jeon * 20441e9ea7e0SNamjae Jeon * When scanning the mft bitmap, we only search up to the last allocated mft 2045115380f9SNamjae Jeon * record. If there are no free records left in the range 64 to number of 20461e9ea7e0SNamjae Jeon * allocated mft records, then we extend the $MFT/$DATA attribute in order to 20471e9ea7e0SNamjae Jeon * create free mft records. We extend the allocated size of $MFT/$DATA by 16 20481e9ea7e0SNamjae Jeon * records at a time or one cluster, if cluster size is above 16kiB. If there 20491e9ea7e0SNamjae Jeon * is not sufficient space to do this, we try to extend by a single mft record 20501e9ea7e0SNamjae Jeon * or one cluster, if cluster size is above the mft record size. 20511e9ea7e0SNamjae Jeon * 20521e9ea7e0SNamjae Jeon * No matter how many mft records we allocate, we initialize only the first 20531e9ea7e0SNamjae Jeon * allocated mft record, incrementing mft data size and initialized size 2054115380f9SNamjae Jeon * accordingly, open an struct ntfs_inode for it and return it to the caller, unless 2055115380f9SNamjae Jeon * there are less than 64 mft records, in which case we allocate and initialize 2056115380f9SNamjae Jeon * mft records until we reach record 64 which we consider as the first free mft 20571e9ea7e0SNamjae Jeon * record for use by normal files. 20581e9ea7e0SNamjae Jeon * 20591e9ea7e0SNamjae Jeon * If during any stage we overflow the initialized data in the mft bitmap, we 20601e9ea7e0SNamjae Jeon * extend the initialized size (and data size) by 8 bytes, allocating another 20611e9ea7e0SNamjae Jeon * cluster if required. The bitmap data size has to be at least equal to the 20621e9ea7e0SNamjae Jeon * number of mft records in the mft, but it can be bigger, in which case the 2063115380f9SNamjae Jeon * superfluous bits are padded with zeroes. 20641e9ea7e0SNamjae Jeon * 20651e9ea7e0SNamjae Jeon * Thus, when we return successfully (IS_ERR() is false), we will have: 20661e9ea7e0SNamjae Jeon * - initialized / extended the mft bitmap if necessary, 20671e9ea7e0SNamjae Jeon * - initialized / extended the mft data if necessary, 20681e9ea7e0SNamjae Jeon * - set the bit corresponding to the mft record being allocated in the 20691e9ea7e0SNamjae Jeon * mft bitmap, 2070115380f9SNamjae Jeon * - opened an struct ntfs_inode for the allocated mft record, and we will have 2071115380f9SNamjae Jeon * - returned the struct ntfs_inode as well as the allocated mapped, pinned, and 20721e9ea7e0SNamjae Jeon * locked mft record. 20731e9ea7e0SNamjae Jeon * 20741e9ea7e0SNamjae Jeon * On error, the volume will be left in a consistent state and no record will 20751e9ea7e0SNamjae Jeon * be allocated. If rolling back a partial operation fails, we may leave some 20761e9ea7e0SNamjae Jeon * inconsistent metadata in which case we set NVolErrors() so the volume is 20771e9ea7e0SNamjae Jeon * left dirty when unmounted. 20781e9ea7e0SNamjae Jeon * 20791e9ea7e0SNamjae Jeon * Note, this function cannot make use of most of the normal functions, like 20801e9ea7e0SNamjae Jeon * for example for attribute resizing, etc, because when the run list overflows 20811e9ea7e0SNamjae Jeon * the base mft record and an attribute list is used, it is very important that 20821e9ea7e0SNamjae Jeon * the extension mft records used to store the $DATA attribute of $MFT can be 20831e9ea7e0SNamjae Jeon * reached without having to read the information contained inside them, as 20841e9ea7e0SNamjae Jeon * this would make it impossible to find them in the first place after the 20851e9ea7e0SNamjae Jeon * volume is unmounted. $MFT/$BITMAP probably does not need to follow this 20861e9ea7e0SNamjae Jeon * rule because the bitmap is not essential for finding the mft records, but on 20871e9ea7e0SNamjae Jeon * the other hand, handling the bitmap in this special way would make life 20881e9ea7e0SNamjae Jeon * easier because otherwise there might be circular invocations of functions 20891e9ea7e0SNamjae Jeon * when reading the bitmap. 20901e9ea7e0SNamjae Jeon */ 2091115380f9SNamjae Jeon int ntfs_mft_record_alloc(struct ntfs_volume *vol, const int mode, 2092115380f9SNamjae Jeon struct ntfs_inode **ni, struct ntfs_inode *base_ni, 2093115380f9SNamjae Jeon struct mft_record **ni_mrec) 20941e9ea7e0SNamjae Jeon { 20951e9ea7e0SNamjae Jeon s64 ll, bit, old_data_initialized, old_data_size; 20961e9ea7e0SNamjae Jeon unsigned long flags; 2097115380f9SNamjae Jeon struct folio *folio; 2098115380f9SNamjae Jeon struct ntfs_inode *mft_ni, *mftbmp_ni; 2099115380f9SNamjae Jeon struct ntfs_attr_search_ctx *ctx; 2100115380f9SNamjae Jeon struct mft_record *m = NULL; 2101115380f9SNamjae Jeon struct attr_record *a; 21021e9ea7e0SNamjae Jeon pgoff_t index; 21031e9ea7e0SNamjae Jeon unsigned int ofs; 21041e9ea7e0SNamjae Jeon int err; 2105115380f9SNamjae Jeon __le16 seq_no, usn; 21061e9ea7e0SNamjae Jeon bool record_formatted = false; 2107115380f9SNamjae Jeon unsigned int memalloc_flags; 21081e9ea7e0SNamjae Jeon 2109115380f9SNamjae Jeon if (base_ni && *ni) 2110115380f9SNamjae Jeon return -EINVAL; 2111115380f9SNamjae Jeon 2112115380f9SNamjae Jeon /* @mode and @base_ni are mutually exclusive. */ 2113115380f9SNamjae Jeon if (mode && base_ni) 2114115380f9SNamjae Jeon return -EINVAL; 2115115380f9SNamjae Jeon 2116115380f9SNamjae Jeon if (base_ni) 2117115380f9SNamjae Jeon ntfs_debug("Entering (allocating an extent mft record for base mft record 0x%llx).", 21181e9ea7e0SNamjae Jeon (long long)base_ni->mft_no); 2119115380f9SNamjae Jeon else 21201e9ea7e0SNamjae Jeon ntfs_debug("Entering (allocating a base mft record)."); 2121115380f9SNamjae Jeon 2122115380f9SNamjae Jeon memalloc_flags = memalloc_nofs_save(); 2123115380f9SNamjae Jeon 21241e9ea7e0SNamjae Jeon mft_ni = NTFS_I(vol->mft_ino); 2125115380f9SNamjae Jeon if (!base_ni || base_ni->mft_no != FILE_MFT) 2126115380f9SNamjae Jeon mutex_lock(&mft_ni->mrec_lock); 21271e9ea7e0SNamjae Jeon mftbmp_ni = NTFS_I(vol->mftbmp_ino); 2128115380f9SNamjae Jeon search_free_rec: 2129115380f9SNamjae Jeon if (!base_ni || base_ni->mft_no != FILE_MFT) 21301e9ea7e0SNamjae Jeon down_write(&vol->mftbmp_lock); 21311e9ea7e0SNamjae Jeon bit = ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(vol, base_ni); 21321e9ea7e0SNamjae Jeon if (bit >= 0) { 21331e9ea7e0SNamjae Jeon ntfs_debug("Found and allocated free record (#1), bit 0x%llx.", 21341e9ea7e0SNamjae Jeon (long long)bit); 21351e9ea7e0SNamjae Jeon goto have_alloc_rec; 21361e9ea7e0SNamjae Jeon } 21371e9ea7e0SNamjae Jeon if (bit != -ENOSPC) { 2138115380f9SNamjae Jeon if (!base_ni || base_ni->mft_no != FILE_MFT) { 21391e9ea7e0SNamjae Jeon up_write(&vol->mftbmp_lock); 2140115380f9SNamjae Jeon mutex_unlock(&mft_ni->mrec_lock); 21411e9ea7e0SNamjae Jeon } 2142115380f9SNamjae Jeon memalloc_nofs_restore(memalloc_flags); 2143115380f9SNamjae Jeon return bit; 2144115380f9SNamjae Jeon } 2145115380f9SNamjae Jeon 2146115380f9SNamjae Jeon if (base_ni && base_ni->mft_no == FILE_MFT) { 2147115380f9SNamjae Jeon memalloc_nofs_restore(memalloc_flags); 2148115380f9SNamjae Jeon return bit; 2149115380f9SNamjae Jeon } 2150115380f9SNamjae Jeon 21511e9ea7e0SNamjae Jeon /* 21521e9ea7e0SNamjae Jeon * No free mft records left. If the mft bitmap already covers more 21531e9ea7e0SNamjae Jeon * than the currently used mft records, the next records are all free, 21541e9ea7e0SNamjae Jeon * so we can simply allocate the first unused mft record. 21551e9ea7e0SNamjae Jeon * Note: We also have to make sure that the mft bitmap at least covers 21561e9ea7e0SNamjae Jeon * the first 24 mft records as they are special and whilst they may not 21571e9ea7e0SNamjae Jeon * be in use, we do not allocate from them. 21581e9ea7e0SNamjae Jeon */ 21591e9ea7e0SNamjae Jeon read_lock_irqsave(&mft_ni->size_lock, flags); 21601e9ea7e0SNamjae Jeon ll = mft_ni->initialized_size >> vol->mft_record_size_bits; 21611e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mft_ni->size_lock, flags); 21621e9ea7e0SNamjae Jeon read_lock_irqsave(&mftbmp_ni->size_lock, flags); 21631e9ea7e0SNamjae Jeon old_data_initialized = mftbmp_ni->initialized_size; 21641e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 2165115380f9SNamjae Jeon if (old_data_initialized << 3 > ll && 2166115380f9SNamjae Jeon old_data_initialized > RESERVED_MFT_RECORDS / 8) { 21671e9ea7e0SNamjae Jeon bit = ll; 2168115380f9SNamjae Jeon if (bit < RESERVED_MFT_RECORDS) 2169115380f9SNamjae Jeon bit = RESERVED_MFT_RECORDS; 21701e9ea7e0SNamjae Jeon if (unlikely(bit >= (1ll << 32))) 21711e9ea7e0SNamjae Jeon goto max_err_out; 21721e9ea7e0SNamjae Jeon ntfs_debug("Found free record (#2), bit 0x%llx.", 21731e9ea7e0SNamjae Jeon (long long)bit); 21741e9ea7e0SNamjae Jeon goto found_free_rec; 21751e9ea7e0SNamjae Jeon } 21761e9ea7e0SNamjae Jeon /* 21771e9ea7e0SNamjae Jeon * The mft bitmap needs to be expanded until it covers the first unused 21781e9ea7e0SNamjae Jeon * mft record that we can allocate. 21791e9ea7e0SNamjae Jeon * Note: The smallest mft record we allocate is mft record 24. 21801e9ea7e0SNamjae Jeon */ 21811e9ea7e0SNamjae Jeon bit = old_data_initialized << 3; 21821e9ea7e0SNamjae Jeon if (unlikely(bit >= (1ll << 32))) 21831e9ea7e0SNamjae Jeon goto max_err_out; 21841e9ea7e0SNamjae Jeon read_lock_irqsave(&mftbmp_ni->size_lock, flags); 21851e9ea7e0SNamjae Jeon old_data_size = mftbmp_ni->allocated_size; 2186115380f9SNamjae Jeon ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 2187115380f9SNamjae Jeon old_data_size, i_size_read(vol->mftbmp_ino), 2188115380f9SNamjae Jeon old_data_initialized); 21891e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 21901e9ea7e0SNamjae Jeon if (old_data_initialized + 8 > old_data_size) { 21911e9ea7e0SNamjae Jeon /* Need to extend bitmap by one more cluster. */ 21921e9ea7e0SNamjae Jeon ntfs_debug("mftbmp: initialized_size + 8 > allocated_size."); 21931e9ea7e0SNamjae Jeon err = ntfs_mft_bitmap_extend_allocation_nolock(vol); 2194115380f9SNamjae Jeon if (err == -EAGAIN) 2195115380f9SNamjae Jeon err = ntfs_mft_bitmap_extend_allocation_nolock(vol); 2196115380f9SNamjae Jeon 21971e9ea7e0SNamjae Jeon if (unlikely(err)) { 2198115380f9SNamjae Jeon if (!base_ni || base_ni->mft_no != FILE_MFT) 21991e9ea7e0SNamjae Jeon up_write(&vol->mftbmp_lock); 22001e9ea7e0SNamjae Jeon goto err_out; 22011e9ea7e0SNamjae Jeon } 22021e9ea7e0SNamjae Jeon #ifdef DEBUG 22031e9ea7e0SNamjae Jeon read_lock_irqsave(&mftbmp_ni->size_lock, flags); 2204115380f9SNamjae Jeon ntfs_debug("Status of mftbmp after allocation extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 2205115380f9SNamjae Jeon mftbmp_ni->allocated_size, 2206115380f9SNamjae Jeon i_size_read(vol->mftbmp_ino), 2207115380f9SNamjae Jeon mftbmp_ni->initialized_size); 22081e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 22091e9ea7e0SNamjae Jeon #endif /* DEBUG */ 22101e9ea7e0SNamjae Jeon } 22111e9ea7e0SNamjae Jeon /* 22121e9ea7e0SNamjae Jeon * We now have sufficient allocated space, extend the initialized_size 22131e9ea7e0SNamjae Jeon * as well as the data_size if necessary and fill the new space with 22141e9ea7e0SNamjae Jeon * zeroes. 22151e9ea7e0SNamjae Jeon */ 22161e9ea7e0SNamjae Jeon err = ntfs_mft_bitmap_extend_initialized_nolock(vol); 22171e9ea7e0SNamjae Jeon if (unlikely(err)) { 2218115380f9SNamjae Jeon if (!base_ni || base_ni->mft_no != FILE_MFT) 22191e9ea7e0SNamjae Jeon up_write(&vol->mftbmp_lock); 22201e9ea7e0SNamjae Jeon goto err_out; 22211e9ea7e0SNamjae Jeon } 22221e9ea7e0SNamjae Jeon #ifdef DEBUG 22231e9ea7e0SNamjae Jeon read_lock_irqsave(&mftbmp_ni->size_lock, flags); 2224115380f9SNamjae Jeon ntfs_debug("Status of mftbmp after initialized extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 2225115380f9SNamjae Jeon mftbmp_ni->allocated_size, 2226115380f9SNamjae Jeon i_size_read(vol->mftbmp_ino), 2227115380f9SNamjae Jeon mftbmp_ni->initialized_size); 22281e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 22291e9ea7e0SNamjae Jeon #endif /* DEBUG */ 22301e9ea7e0SNamjae Jeon ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit); 22311e9ea7e0SNamjae Jeon found_free_rec: 22321e9ea7e0SNamjae Jeon /* @bit is the found free mft record, allocate it in the mft bitmap. */ 22331e9ea7e0SNamjae Jeon ntfs_debug("At found_free_rec."); 22341e9ea7e0SNamjae Jeon err = ntfs_bitmap_set_bit(vol->mftbmp_ino, bit); 22351e9ea7e0SNamjae Jeon if (unlikely(err)) { 22361e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to allocate bit in mft bitmap."); 2237115380f9SNamjae Jeon if (!base_ni || base_ni->mft_no != FILE_MFT) 22381e9ea7e0SNamjae Jeon up_write(&vol->mftbmp_lock); 22391e9ea7e0SNamjae Jeon goto err_out; 22401e9ea7e0SNamjae Jeon } 22411e9ea7e0SNamjae Jeon ntfs_debug("Set bit 0x%llx in mft bitmap.", (long long)bit); 22421e9ea7e0SNamjae Jeon have_alloc_rec: 22431e9ea7e0SNamjae Jeon /* 22441e9ea7e0SNamjae Jeon * The mft bitmap is now uptodate. Deal with mft data attribute now. 22451e9ea7e0SNamjae Jeon * Note, we keep hold of the mft bitmap lock for writing until all 22461e9ea7e0SNamjae Jeon * modifications to the mft data attribute are complete, too, as they 22471e9ea7e0SNamjae Jeon * will impact decisions for mft bitmap and mft record allocation done 22481e9ea7e0SNamjae Jeon * by a parallel allocation and if the lock is not maintained a 22491e9ea7e0SNamjae Jeon * parallel allocation could allocate the same mft record as this one. 22501e9ea7e0SNamjae Jeon */ 22511e9ea7e0SNamjae Jeon ll = (bit + 1) << vol->mft_record_size_bits; 22521e9ea7e0SNamjae Jeon read_lock_irqsave(&mft_ni->size_lock, flags); 22531e9ea7e0SNamjae Jeon old_data_initialized = mft_ni->initialized_size; 22541e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mft_ni->size_lock, flags); 22551e9ea7e0SNamjae Jeon if (ll <= old_data_initialized) { 22561e9ea7e0SNamjae Jeon ntfs_debug("Allocated mft record already initialized."); 22571e9ea7e0SNamjae Jeon goto mft_rec_already_initialized; 22581e9ea7e0SNamjae Jeon } 22591e9ea7e0SNamjae Jeon ntfs_debug("Initializing allocated mft record."); 22601e9ea7e0SNamjae Jeon /* 22611e9ea7e0SNamjae Jeon * The mft record is outside the initialized data. Extend the mft data 22621e9ea7e0SNamjae Jeon * attribute until it covers the allocated record. The loop is only 22631e9ea7e0SNamjae Jeon * actually traversed more than once when a freshly formatted volume is 22641e9ea7e0SNamjae Jeon * first written to so it optimizes away nicely in the common case. 22651e9ea7e0SNamjae Jeon */ 2266115380f9SNamjae Jeon if (!base_ni || base_ni->mft_no != FILE_MFT) { 22671e9ea7e0SNamjae Jeon read_lock_irqsave(&mft_ni->size_lock, flags); 2268115380f9SNamjae Jeon ntfs_debug("Status of mft data before extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 2269115380f9SNamjae Jeon mft_ni->allocated_size, i_size_read(vol->mft_ino), 2270115380f9SNamjae Jeon mft_ni->initialized_size); 22711e9ea7e0SNamjae Jeon while (ll > mft_ni->allocated_size) { 22721e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mft_ni->size_lock, flags); 22731e9ea7e0SNamjae Jeon err = ntfs_mft_data_extend_allocation_nolock(vol); 2274115380f9SNamjae Jeon if (err == -EAGAIN) 2275115380f9SNamjae Jeon err = ntfs_mft_data_extend_allocation_nolock(vol); 2276115380f9SNamjae Jeon 22771e9ea7e0SNamjae Jeon if (unlikely(err)) { 2278115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to extend mft data allocation."); 22791e9ea7e0SNamjae Jeon goto undo_mftbmp_alloc_nolock; 22801e9ea7e0SNamjae Jeon } 22811e9ea7e0SNamjae Jeon read_lock_irqsave(&mft_ni->size_lock, flags); 2282115380f9SNamjae Jeon ntfs_debug("Status of mft data after allocation extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 2283115380f9SNamjae Jeon mft_ni->allocated_size, i_size_read(vol->mft_ino), 2284115380f9SNamjae Jeon mft_ni->initialized_size); 22851e9ea7e0SNamjae Jeon } 22861e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mft_ni->size_lock, flags); 2287115380f9SNamjae Jeon } else if (ll > mft_ni->allocated_size) { 2288115380f9SNamjae Jeon err = -ENOSPC; 2289115380f9SNamjae Jeon goto undo_mftbmp_alloc_nolock; 2290115380f9SNamjae Jeon } 22911e9ea7e0SNamjae Jeon /* 22921e9ea7e0SNamjae Jeon * Extend mft data initialized size (and data size of course) to reach 22931e9ea7e0SNamjae Jeon * the allocated mft record, formatting the mft records allong the way. 2294115380f9SNamjae Jeon * Note: We only modify the struct ntfs_inode structure as that is all that is 22951e9ea7e0SNamjae Jeon * needed by ntfs_mft_record_format(). We will update the attribute 22961e9ea7e0SNamjae Jeon * record itself in one fell swoop later on. 22971e9ea7e0SNamjae Jeon */ 22981e9ea7e0SNamjae Jeon write_lock_irqsave(&mft_ni->size_lock, flags); 22991e9ea7e0SNamjae Jeon old_data_initialized = mft_ni->initialized_size; 23001e9ea7e0SNamjae Jeon old_data_size = vol->mft_ino->i_size; 23011e9ea7e0SNamjae Jeon while (ll > mft_ni->initialized_size) { 23021e9ea7e0SNamjae Jeon s64 new_initialized_size, mft_no; 23031e9ea7e0SNamjae Jeon 23041e9ea7e0SNamjae Jeon new_initialized_size = mft_ni->initialized_size + 23051e9ea7e0SNamjae Jeon vol->mft_record_size; 23061e9ea7e0SNamjae Jeon mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits; 23071e9ea7e0SNamjae Jeon if (new_initialized_size > i_size_read(vol->mft_ino)) 23081e9ea7e0SNamjae Jeon i_size_write(vol->mft_ino, new_initialized_size); 23091e9ea7e0SNamjae Jeon write_unlock_irqrestore(&mft_ni->size_lock, flags); 23101e9ea7e0SNamjae Jeon ntfs_debug("Initializing mft record 0x%llx.", 23111e9ea7e0SNamjae Jeon (long long)mft_no); 23121e9ea7e0SNamjae Jeon err = ntfs_mft_record_format(vol, mft_no); 23131e9ea7e0SNamjae Jeon if (unlikely(err)) { 23141e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to format mft record."); 23151e9ea7e0SNamjae Jeon goto undo_data_init; 23161e9ea7e0SNamjae Jeon } 23171e9ea7e0SNamjae Jeon write_lock_irqsave(&mft_ni->size_lock, flags); 23181e9ea7e0SNamjae Jeon mft_ni->initialized_size = new_initialized_size; 23191e9ea7e0SNamjae Jeon } 23201e9ea7e0SNamjae Jeon write_unlock_irqrestore(&mft_ni->size_lock, flags); 23211e9ea7e0SNamjae Jeon record_formatted = true; 23221e9ea7e0SNamjae Jeon /* Update the mft data attribute record to reflect the new sizes. */ 23231e9ea7e0SNamjae Jeon m = map_mft_record(mft_ni); 23241e9ea7e0SNamjae Jeon if (IS_ERR(m)) { 23251e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to map mft record."); 23261e9ea7e0SNamjae Jeon err = PTR_ERR(m); 23271e9ea7e0SNamjae Jeon goto undo_data_init; 23281e9ea7e0SNamjae Jeon } 23291e9ea7e0SNamjae Jeon ctx = ntfs_attr_get_search_ctx(mft_ni, m); 23301e9ea7e0SNamjae Jeon if (unlikely(!ctx)) { 23311e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to get search context."); 23321e9ea7e0SNamjae Jeon err = -ENOMEM; 23331e9ea7e0SNamjae Jeon unmap_mft_record(mft_ni); 23341e9ea7e0SNamjae Jeon goto undo_data_init; 23351e9ea7e0SNamjae Jeon } 23361e9ea7e0SNamjae Jeon err = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 23371e9ea7e0SNamjae Jeon CASE_SENSITIVE, 0, NULL, 0, ctx); 23381e9ea7e0SNamjae Jeon if (unlikely(err)) { 2339115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to find first attribute extent of mft data attribute."); 23401e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 23411e9ea7e0SNamjae Jeon unmap_mft_record(mft_ni); 23421e9ea7e0SNamjae Jeon goto undo_data_init; 23431e9ea7e0SNamjae Jeon } 23441e9ea7e0SNamjae Jeon a = ctx->attr; 23451e9ea7e0SNamjae Jeon read_lock_irqsave(&mft_ni->size_lock, flags); 23461e9ea7e0SNamjae Jeon a->data.non_resident.initialized_size = 2347115380f9SNamjae Jeon cpu_to_le64(mft_ni->initialized_size); 23481e9ea7e0SNamjae Jeon a->data.non_resident.data_size = 2349115380f9SNamjae Jeon cpu_to_le64(i_size_read(vol->mft_ino)); 23501e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mft_ni->size_lock, flags); 23511e9ea7e0SNamjae Jeon /* Ensure the changes make it to disk. */ 23521e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 23531e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 23541e9ea7e0SNamjae Jeon unmap_mft_record(mft_ni); 23551e9ea7e0SNamjae Jeon read_lock_irqsave(&mft_ni->size_lock, flags); 2356115380f9SNamjae Jeon ntfs_debug("Status of mft data after mft record initialization: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.", 2357115380f9SNamjae Jeon mft_ni->allocated_size, i_size_read(vol->mft_ino), 2358115380f9SNamjae Jeon mft_ni->initialized_size); 2359115380f9SNamjae Jeon WARN_ON(i_size_read(vol->mft_ino) > mft_ni->allocated_size); 2360115380f9SNamjae Jeon WARN_ON(mft_ni->initialized_size > i_size_read(vol->mft_ino)); 23611e9ea7e0SNamjae Jeon read_unlock_irqrestore(&mft_ni->size_lock, flags); 23621e9ea7e0SNamjae Jeon mft_rec_already_initialized: 23631e9ea7e0SNamjae Jeon /* 23641e9ea7e0SNamjae Jeon * We can finally drop the mft bitmap lock as the mft data attribute 23651e9ea7e0SNamjae Jeon * has been fully updated. The only disparity left is that the 23661e9ea7e0SNamjae Jeon * allocated mft record still needs to be marked as in use to match the 23671e9ea7e0SNamjae Jeon * set bit in the mft bitmap but this is actually not a problem since 23681e9ea7e0SNamjae Jeon * this mft record is not referenced from anywhere yet and the fact 23691e9ea7e0SNamjae Jeon * that it is allocated in the mft bitmap means that no-one will try to 23701e9ea7e0SNamjae Jeon * allocate it either. 23711e9ea7e0SNamjae Jeon */ 2372115380f9SNamjae Jeon if (!base_ni || base_ni->mft_no != FILE_MFT) 23731e9ea7e0SNamjae Jeon up_write(&vol->mftbmp_lock); 23741e9ea7e0SNamjae Jeon /* 23751e9ea7e0SNamjae Jeon * We now have allocated and initialized the mft record. Calculate the 23761e9ea7e0SNamjae Jeon * index of and the offset within the page cache page the record is in. 23771e9ea7e0SNamjae Jeon */ 2378115380f9SNamjae Jeon index = NTFS_MFT_NR_TO_PIDX(vol, bit); 2379115380f9SNamjae Jeon ofs = NTFS_MFT_NR_TO_POFS(vol, bit); 2380115380f9SNamjae Jeon /* Read, map, and pin the folio containing the mft record. */ 2381115380f9SNamjae Jeon folio = read_mapping_folio(vol->mft_ino->i_mapping, index, NULL); 2382115380f9SNamjae Jeon if (IS_ERR(folio)) { 2383115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to map page containing allocated mft record 0x%llx.", 2384115380f9SNamjae Jeon bit); 2385115380f9SNamjae Jeon err = PTR_ERR(folio); 23861e9ea7e0SNamjae Jeon goto undo_mftbmp_alloc; 23871e9ea7e0SNamjae Jeon } 2388115380f9SNamjae Jeon folio_lock(folio); 2389115380f9SNamjae Jeon folio_clear_uptodate(folio); 2390115380f9SNamjae Jeon m = (struct mft_record *)((u8 *)kmap_local_folio(folio, 0) + ofs); 23911e9ea7e0SNamjae Jeon /* If we just formatted the mft record no need to do it again. */ 23921e9ea7e0SNamjae Jeon if (!record_formatted) { 23931e9ea7e0SNamjae Jeon /* Sanity check that the mft record is really not in use. */ 23941e9ea7e0SNamjae Jeon if (ntfs_is_file_record(m->magic) && 23951e9ea7e0SNamjae Jeon (m->flags & MFT_RECORD_IN_USE)) { 2396115380f9SNamjae Jeon ntfs_warning(vol->sb, 2397115380f9SNamjae Jeon "Mft record 0x%llx was marked free in mft bitmap but is marked used itself. Unmount and run chkdsk.", 2398115380f9SNamjae Jeon bit); 2399115380f9SNamjae Jeon folio_mark_uptodate(folio); 2400115380f9SNamjae Jeon folio_unlock(folio); 2401115380f9SNamjae Jeon kunmap_local(m); 2402115380f9SNamjae Jeon folio_put(folio); 24031e9ea7e0SNamjae Jeon NVolSetErrors(vol); 2404115380f9SNamjae Jeon goto search_free_rec; 24051e9ea7e0SNamjae Jeon } 24061e9ea7e0SNamjae Jeon /* 24071e9ea7e0SNamjae Jeon * We need to (re-)format the mft record, preserving the 24081e9ea7e0SNamjae Jeon * sequence number if it is not zero as well as the update 24091e9ea7e0SNamjae Jeon * sequence number if it is not zero or -1 (0xffff). This 24101e9ea7e0SNamjae Jeon * means we do not need to care whether or not something went 24111e9ea7e0SNamjae Jeon * wrong with the previous mft record. 24121e9ea7e0SNamjae Jeon */ 24131e9ea7e0SNamjae Jeon seq_no = m->sequence_number; 2414115380f9SNamjae Jeon usn = *(__le16 *)((u8 *)m + le16_to_cpu(m->usa_ofs)); 24151e9ea7e0SNamjae Jeon err = ntfs_mft_record_layout(vol, bit, m); 24161e9ea7e0SNamjae Jeon if (unlikely(err)) { 2417115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to layout allocated mft record 0x%llx.", 2418115380f9SNamjae Jeon bit); 2419115380f9SNamjae Jeon folio_mark_uptodate(folio); 2420115380f9SNamjae Jeon folio_unlock(folio); 2421115380f9SNamjae Jeon kunmap_local(m); 2422115380f9SNamjae Jeon folio_put(folio); 24231e9ea7e0SNamjae Jeon goto undo_mftbmp_alloc; 24241e9ea7e0SNamjae Jeon } 24251e9ea7e0SNamjae Jeon if (seq_no) 24261e9ea7e0SNamjae Jeon m->sequence_number = seq_no; 24271e9ea7e0SNamjae Jeon if (usn && le16_to_cpu(usn) != 0xffff) 2428115380f9SNamjae Jeon *(__le16 *)((u8 *)m + le16_to_cpu(m->usa_ofs)) = usn; 2429115380f9SNamjae Jeon pre_write_mst_fixup((struct ntfs_record *)m, vol->mft_record_size); 24301e9ea7e0SNamjae Jeon } 24311e9ea7e0SNamjae Jeon /* Set the mft record itself in use. */ 24321e9ea7e0SNamjae Jeon m->flags |= MFT_RECORD_IN_USE; 24331e9ea7e0SNamjae Jeon if (S_ISDIR(mode)) 24341e9ea7e0SNamjae Jeon m->flags |= MFT_RECORD_IS_DIRECTORY; 2435115380f9SNamjae Jeon folio_mark_uptodate(folio); 24361e9ea7e0SNamjae Jeon if (base_ni) { 2437115380f9SNamjae Jeon struct mft_record *m_tmp; 24381e9ea7e0SNamjae Jeon 24391e9ea7e0SNamjae Jeon /* 24401e9ea7e0SNamjae Jeon * Setup the base mft record in the extent mft record. This 24411e9ea7e0SNamjae Jeon * completes initialization of the allocated extent mft record 24421e9ea7e0SNamjae Jeon * and we can simply use it with map_extent_mft_record(). 24431e9ea7e0SNamjae Jeon */ 24441e9ea7e0SNamjae Jeon m->base_mft_record = MK_LE_MREF(base_ni->mft_no, 24451e9ea7e0SNamjae Jeon base_ni->seq_no); 24461e9ea7e0SNamjae Jeon /* 24471e9ea7e0SNamjae Jeon * Allocate an extent inode structure for the new mft record, 24481e9ea7e0SNamjae Jeon * attach it to the base inode @base_ni and map, pin, and lock 24491e9ea7e0SNamjae Jeon * its, i.e. the allocated, mft record. 24501e9ea7e0SNamjae Jeon */ 2451115380f9SNamjae Jeon m_tmp = map_extent_mft_record(base_ni, 2452115380f9SNamjae Jeon MK_MREF(bit, le16_to_cpu(m->sequence_number)), 2453115380f9SNamjae Jeon ni); 24541e9ea7e0SNamjae Jeon if (IS_ERR(m_tmp)) { 2455115380f9SNamjae Jeon ntfs_error(vol->sb, "Failed to map allocated extent mft record 0x%llx.", 2456115380f9SNamjae Jeon bit); 24571e9ea7e0SNamjae Jeon err = PTR_ERR(m_tmp); 24581e9ea7e0SNamjae Jeon /* Set the mft record itself not in use. */ 24591e9ea7e0SNamjae Jeon m->flags &= cpu_to_le16( 24601e9ea7e0SNamjae Jeon ~le16_to_cpu(MFT_RECORD_IN_USE)); 24611e9ea7e0SNamjae Jeon /* Make sure the mft record is written out to disk. */ 2462115380f9SNamjae Jeon ntfs_mft_mark_dirty(folio); 2463115380f9SNamjae Jeon folio_unlock(folio); 2464115380f9SNamjae Jeon kunmap_local(m); 2465115380f9SNamjae Jeon folio_put(folio); 24661e9ea7e0SNamjae Jeon goto undo_mftbmp_alloc; 24671e9ea7e0SNamjae Jeon } 2468115380f9SNamjae Jeon 24691e9ea7e0SNamjae Jeon /* 24701e9ea7e0SNamjae Jeon * Make sure the allocated mft record is written out to disk. 24711e9ea7e0SNamjae Jeon * No need to set the inode dirty because the caller is going 24721e9ea7e0SNamjae Jeon * to do that anyway after finishing with the new extent mft 24731e9ea7e0SNamjae Jeon * record (e.g. at a minimum a new attribute will be added to 24741e9ea7e0SNamjae Jeon * the mft record. 24751e9ea7e0SNamjae Jeon */ 2476115380f9SNamjae Jeon ntfs_mft_mark_dirty(folio); 2477115380f9SNamjae Jeon folio_unlock(folio); 24781e9ea7e0SNamjae Jeon /* 24791e9ea7e0SNamjae Jeon * Need to unmap the page since map_extent_mft_record() mapped 24801e9ea7e0SNamjae Jeon * it as well so we have it mapped twice at the moment. 24811e9ea7e0SNamjae Jeon */ 2482115380f9SNamjae Jeon kunmap_local(m); 2483115380f9SNamjae Jeon folio_put(folio); 24841e9ea7e0SNamjae Jeon } else { 24851e9ea7e0SNamjae Jeon /* 24861e9ea7e0SNamjae Jeon * Manually map, pin, and lock the mft record as we already 24871e9ea7e0SNamjae Jeon * have its page mapped and it is very easy to do. 24881e9ea7e0SNamjae Jeon */ 2489115380f9SNamjae Jeon (*ni)->seq_no = le16_to_cpu(m->sequence_number); 24901e9ea7e0SNamjae Jeon /* 24911e9ea7e0SNamjae Jeon * Make sure the allocated mft record is written out to disk. 24921e9ea7e0SNamjae Jeon * NOTE: We do not set the ntfs inode dirty because this would 24931e9ea7e0SNamjae Jeon * fail in ntfs_write_inode() because the inode does not have a 24941e9ea7e0SNamjae Jeon * standard information attribute yet. Also, there is no need 24951e9ea7e0SNamjae Jeon * to set the inode dirty because the caller is going to do 24961e9ea7e0SNamjae Jeon * that anyway after finishing with the new mft record (e.g. at 24971e9ea7e0SNamjae Jeon * a minimum some new attributes will be added to the mft 24981e9ea7e0SNamjae Jeon * record. 24991e9ea7e0SNamjae Jeon */ 25001e9ea7e0SNamjae Jeon 2501115380f9SNamjae Jeon (*ni)->mrec = kmalloc(vol->mft_record_size, GFP_NOFS); 2502115380f9SNamjae Jeon if (!(*ni)->mrec) { 2503115380f9SNamjae Jeon folio_unlock(folio); 2504115380f9SNamjae Jeon kunmap_local(m); 2505115380f9SNamjae Jeon folio_put(folio); 2506115380f9SNamjae Jeon goto undo_mftbmp_alloc; 2507115380f9SNamjae Jeon } 25081e9ea7e0SNamjae Jeon 2509115380f9SNamjae Jeon memcpy((*ni)->mrec, m, vol->mft_record_size); 2510115380f9SNamjae Jeon post_read_mst_fixup((struct ntfs_record *)(*ni)->mrec, vol->mft_record_size); 2511115380f9SNamjae Jeon ntfs_mft_mark_dirty(folio); 2512115380f9SNamjae Jeon folio_unlock(folio); 2513115380f9SNamjae Jeon (*ni)->folio = folio; 2514115380f9SNamjae Jeon (*ni)->folio_ofs = ofs; 2515115380f9SNamjae Jeon atomic_inc(&(*ni)->count); 25161e9ea7e0SNamjae Jeon /* Update the default mft allocation position. */ 25171e9ea7e0SNamjae Jeon vol->mft_data_pos = bit + 1; 25181e9ea7e0SNamjae Jeon } 2519115380f9SNamjae Jeon if (!base_ni || base_ni->mft_no != FILE_MFT) 2520115380f9SNamjae Jeon mutex_unlock(&mft_ni->mrec_lock); 2521115380f9SNamjae Jeon memalloc_nofs_restore(memalloc_flags); 2522115380f9SNamjae Jeon 25231e9ea7e0SNamjae Jeon /* 25241e9ea7e0SNamjae Jeon * Return the opened, allocated inode of the allocated mft record as 25251e9ea7e0SNamjae Jeon * well as the mapped, pinned, and locked mft record. 25261e9ea7e0SNamjae Jeon */ 25271e9ea7e0SNamjae Jeon ntfs_debug("Returning opened, allocated %sinode 0x%llx.", 2528115380f9SNamjae Jeon base_ni ? "extent " : "", bit); 2529115380f9SNamjae Jeon (*ni)->mft_no = bit; 2530115380f9SNamjae Jeon if (ni_mrec) 2531115380f9SNamjae Jeon *ni_mrec = (*ni)->mrec; 2532115380f9SNamjae Jeon ntfs_dec_free_mft_records(vol, 1); 2533115380f9SNamjae Jeon return 0; 25341e9ea7e0SNamjae Jeon undo_data_init: 25351e9ea7e0SNamjae Jeon write_lock_irqsave(&mft_ni->size_lock, flags); 25361e9ea7e0SNamjae Jeon mft_ni->initialized_size = old_data_initialized; 25371e9ea7e0SNamjae Jeon i_size_write(vol->mft_ino, old_data_size); 25381e9ea7e0SNamjae Jeon write_unlock_irqrestore(&mft_ni->size_lock, flags); 25391e9ea7e0SNamjae Jeon goto undo_mftbmp_alloc_nolock; 25401e9ea7e0SNamjae Jeon undo_mftbmp_alloc: 2541115380f9SNamjae Jeon if (!base_ni || base_ni->mft_no != FILE_MFT) 25421e9ea7e0SNamjae Jeon down_write(&vol->mftbmp_lock); 25431e9ea7e0SNamjae Jeon undo_mftbmp_alloc_nolock: 25441e9ea7e0SNamjae Jeon if (ntfs_bitmap_clear_bit(vol->mftbmp_ino, bit)) { 25451e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to clear bit in mft bitmap.%s", es); 25461e9ea7e0SNamjae Jeon NVolSetErrors(vol); 25471e9ea7e0SNamjae Jeon } 2548115380f9SNamjae Jeon if (!base_ni || base_ni->mft_no != FILE_MFT) 25491e9ea7e0SNamjae Jeon up_write(&vol->mftbmp_lock); 25501e9ea7e0SNamjae Jeon err_out: 2551115380f9SNamjae Jeon if (!base_ni || base_ni->mft_no != FILE_MFT) 2552115380f9SNamjae Jeon mutex_unlock(&mft_ni->mrec_lock); 2553115380f9SNamjae Jeon memalloc_nofs_restore(memalloc_flags); 2554115380f9SNamjae Jeon return err; 25551e9ea7e0SNamjae Jeon max_err_out: 2556115380f9SNamjae Jeon ntfs_warning(vol->sb, 2557115380f9SNamjae Jeon "Cannot allocate mft record because the maximum number of inodes (2^32) has already been reached."); 2558115380f9SNamjae Jeon if (!base_ni || base_ni->mft_no != FILE_MFT) { 25591e9ea7e0SNamjae Jeon up_write(&vol->mftbmp_lock); 2560115380f9SNamjae Jeon mutex_unlock(&mft_ni->mrec_lock); 25611e9ea7e0SNamjae Jeon } 2562115380f9SNamjae Jeon memalloc_nofs_restore(memalloc_flags); 2563115380f9SNamjae Jeon return -ENOSPC; 25641e9ea7e0SNamjae Jeon } 25651e9ea7e0SNamjae Jeon 25661e9ea7e0SNamjae Jeon /* 2567115380f9SNamjae Jeon * ntfs_mft_record_free - free an mft record on an ntfs volume 2568115380f9SNamjae Jeon * @vol: volume on which to free the mft record 2569115380f9SNamjae Jeon * @ni: open ntfs inode of the mft record to free 2570115380f9SNamjae Jeon * 2571115380f9SNamjae Jeon * Free the mft record of the open inode @ni on the mounted ntfs volume @vol. 2572115380f9SNamjae Jeon * Note that this function calls ntfs_inode_close() internally and hence you 2573115380f9SNamjae Jeon * cannot use the pointer @ni any more after this function returns success. 2574115380f9SNamjae Jeon * 2575115380f9SNamjae Jeon * On success return 0 and on error return -1 with errno set to the error code. 25761e9ea7e0SNamjae Jeon */ 2577115380f9SNamjae Jeon int ntfs_mft_record_free(struct ntfs_volume *vol, struct ntfs_inode *ni) 2578115380f9SNamjae Jeon { 2579115380f9SNamjae Jeon u64 mft_no; 2580115380f9SNamjae Jeon int err; 2581115380f9SNamjae Jeon u16 seq_no; 2582115380f9SNamjae Jeon __le16 old_seq_no; 2583115380f9SNamjae Jeon struct mft_record *ni_mrec; 2584115380f9SNamjae Jeon unsigned int memalloc_flags; 2585115380f9SNamjae Jeon struct ntfs_inode *base_ni; 2586115380f9SNamjae Jeon 2587115380f9SNamjae Jeon if (!vol || !ni) 2588115380f9SNamjae Jeon return -EINVAL; 2589115380f9SNamjae Jeon 2590115380f9SNamjae Jeon ntfs_debug("Entering for inode 0x%llx.\n", (long long)ni->mft_no); 2591115380f9SNamjae Jeon 2592115380f9SNamjae Jeon ni_mrec = map_mft_record(ni); 2593115380f9SNamjae Jeon if (IS_ERR(ni_mrec)) 2594115380f9SNamjae Jeon return -EIO; 2595115380f9SNamjae Jeon 2596115380f9SNamjae Jeon /* Cache the mft reference for later. */ 2597115380f9SNamjae Jeon mft_no = ni->mft_no; 25981e9ea7e0SNamjae Jeon 25991e9ea7e0SNamjae Jeon /* Mark the mft record as not in use. */ 2600115380f9SNamjae Jeon ni_mrec->flags &= ~MFT_RECORD_IN_USE; 26011e9ea7e0SNamjae Jeon 26021e9ea7e0SNamjae Jeon /* Increment the sequence number, skipping zero, if it is not zero. */ 2603115380f9SNamjae Jeon old_seq_no = ni_mrec->sequence_number; 26041e9ea7e0SNamjae Jeon seq_no = le16_to_cpu(old_seq_no); 26051e9ea7e0SNamjae Jeon if (seq_no == 0xffff) 26061e9ea7e0SNamjae Jeon seq_no = 1; 26071e9ea7e0SNamjae Jeon else if (seq_no) 26081e9ea7e0SNamjae Jeon seq_no++; 2609115380f9SNamjae Jeon ni_mrec->sequence_number = cpu_to_le16(seq_no); 2610115380f9SNamjae Jeon 2611115380f9SNamjae Jeon down_read(&NTFS_I(vol->mft_ino)->runlist.lock); 2612115380f9SNamjae Jeon err = ntfs_get_block_mft_record(NTFS_I(vol->mft_ino), ni); 2613115380f9SNamjae Jeon up_read(&NTFS_I(vol->mft_ino)->runlist.lock); 2614115380f9SNamjae Jeon if (err) { 2615115380f9SNamjae Jeon unmap_mft_record(ni); 2616115380f9SNamjae Jeon return err; 2617115380f9SNamjae Jeon } 26181e9ea7e0SNamjae Jeon 26191e9ea7e0SNamjae Jeon /* 26201e9ea7e0SNamjae Jeon * Set the ntfs inode dirty and write it out. We do not need to worry 26211e9ea7e0SNamjae Jeon * about the base inode here since whatever caused the extent mft 26221e9ea7e0SNamjae Jeon * record to be freed is guaranteed to do it already. 26231e9ea7e0SNamjae Jeon */ 26241e9ea7e0SNamjae Jeon NInoSetDirty(ni); 2625115380f9SNamjae Jeon err = write_mft_record(ni, ni_mrec, 0); 2626115380f9SNamjae Jeon if (err) 2627115380f9SNamjae Jeon goto sync_rollback; 2628115380f9SNamjae Jeon 2629115380f9SNamjae Jeon if (likely(ni->nr_extents >= 0)) 2630115380f9SNamjae Jeon base_ni = ni; 2631115380f9SNamjae Jeon else 2632115380f9SNamjae Jeon base_ni = ni->ext.base_ntfs_ino; 26331e9ea7e0SNamjae Jeon 26341e9ea7e0SNamjae Jeon /* Clear the bit in the $MFT/$BITMAP corresponding to this record. */ 2635115380f9SNamjae Jeon memalloc_flags = memalloc_nofs_save(); 2636115380f9SNamjae Jeon if (base_ni->mft_no != FILE_MFT) 26371e9ea7e0SNamjae Jeon down_write(&vol->mftbmp_lock); 26381e9ea7e0SNamjae Jeon err = ntfs_bitmap_clear_bit(vol->mftbmp_ino, mft_no); 2639115380f9SNamjae Jeon if (base_ni->mft_no != FILE_MFT) 26401e9ea7e0SNamjae Jeon up_write(&vol->mftbmp_lock); 2641115380f9SNamjae Jeon memalloc_nofs_restore(memalloc_flags); 2642115380f9SNamjae Jeon if (err) 2643115380f9SNamjae Jeon goto bitmap_rollback; 26441e9ea7e0SNamjae Jeon 2645115380f9SNamjae Jeon unmap_mft_record(ni); 2646115380f9SNamjae Jeon ntfs_inc_free_mft_records(vol, 1); 2647115380f9SNamjae Jeon return 0; 2648115380f9SNamjae Jeon 2649115380f9SNamjae Jeon /* Rollback what we did... */ 2650115380f9SNamjae Jeon bitmap_rollback: 2651115380f9SNamjae Jeon memalloc_flags = memalloc_nofs_save(); 2652115380f9SNamjae Jeon if (base_ni->mft_no != FILE_MFT) 2653115380f9SNamjae Jeon down_write(&vol->mftbmp_lock); 2654115380f9SNamjae Jeon if (ntfs_bitmap_set_bit(vol->mftbmp_ino, mft_no)) 2655115380f9SNamjae Jeon ntfs_error(vol->sb, "ntfs_bitmap_set_bit failed in bitmap_rollback\n"); 2656115380f9SNamjae Jeon if (base_ni->mft_no != FILE_MFT) 2657115380f9SNamjae Jeon up_write(&vol->mftbmp_lock); 2658115380f9SNamjae Jeon memalloc_nofs_restore(memalloc_flags); 2659115380f9SNamjae Jeon sync_rollback: 2660115380f9SNamjae Jeon ntfs_error(vol->sb, 2661115380f9SNamjae Jeon "Eeek! Rollback failed in %s. Leaving inconsistent metadata!\n", __func__); 2662115380f9SNamjae Jeon ni_mrec->flags |= MFT_RECORD_IN_USE; 2663115380f9SNamjae Jeon ni_mrec->sequence_number = old_seq_no; 2664115380f9SNamjae Jeon NInoSetDirty(ni); 2665115380f9SNamjae Jeon write_mft_record(ni, ni_mrec, 0); 2666115380f9SNamjae Jeon unmap_mft_record(ni); 26671e9ea7e0SNamjae Jeon return err; 26681e9ea7e0SNamjae Jeon } 2669115380f9SNamjae Jeon 2670115380f9SNamjae Jeon static s64 lcn_from_index(struct ntfs_volume *vol, struct ntfs_inode *ni, 2671115380f9SNamjae Jeon unsigned long index) 2672115380f9SNamjae Jeon { 2673115380f9SNamjae Jeon s64 vcn; 2674115380f9SNamjae Jeon s64 lcn; 2675115380f9SNamjae Jeon 2676115380f9SNamjae Jeon vcn = ntfs_pidx_to_cluster(vol, index); 2677115380f9SNamjae Jeon 2678115380f9SNamjae Jeon down_read(&ni->runlist.lock); 2679115380f9SNamjae Jeon lcn = ntfs_attr_vcn_to_lcn_nolock(ni, vcn, false); 2680115380f9SNamjae Jeon up_read(&ni->runlist.lock); 2681115380f9SNamjae Jeon 2682115380f9SNamjae Jeon return lcn; 2683115380f9SNamjae Jeon } 2684115380f9SNamjae Jeon 2685115380f9SNamjae Jeon /* 2686115380f9SNamjae Jeon * ntfs_write_mft_block - Write back a folio containing MFT records 2687115380f9SNamjae Jeon * @folio: The folio to write back (contains one or more MFT records) 2688115380f9SNamjae Jeon * @wbc: Writeback control structure 2689115380f9SNamjae Jeon * 2690115380f9SNamjae Jeon * This function is called as part of the address_space_operations 2691115380f9SNamjae Jeon * .writepages implementation for the $MFT inode (or $MFTMirr). 2692115380f9SNamjae Jeon * It handles writing one folio (normally 4KiB page) worth of MFT records 2693115380f9SNamjae Jeon * to the underlying block device. 2694115380f9SNamjae Jeon * 2695115380f9SNamjae Jeon * Return: 0 on success, or -errno on error. 2696115380f9SNamjae Jeon */ 2697115380f9SNamjae Jeon static int ntfs_write_mft_block(struct folio *folio, struct writeback_control *wbc) 2698115380f9SNamjae Jeon { 2699115380f9SNamjae Jeon struct address_space *mapping = folio->mapping; 2700115380f9SNamjae Jeon struct inode *vi = mapping->host; 2701115380f9SNamjae Jeon struct ntfs_inode *ni = NTFS_I(vi); 2702115380f9SNamjae Jeon struct ntfs_volume *vol = ni->vol; 2703115380f9SNamjae Jeon u8 *kaddr; 2704f462fdf3SArnd Bergmann struct ntfs_inode **locked_nis __free(kfree) = kmalloc_array(PAGE_SIZE / NTFS_BLOCK_SIZE, 2705f462fdf3SArnd Bergmann sizeof(struct ntfs_inode *), GFP_NOFS); 2706115380f9SNamjae Jeon int nr_locked_nis = 0, err = 0, mft_ofs, prev_mft_ofs; 2707f462fdf3SArnd Bergmann struct inode **ref_inos __free(kfree) = kmalloc_array(PAGE_SIZE / NTFS_BLOCK_SIZE, 2708f462fdf3SArnd Bergmann sizeof(struct inode *), GFP_NOFS); 2709115380f9SNamjae Jeon int nr_ref_inos = 0; 2710115380f9SNamjae Jeon struct bio *bio = NULL; 2711d9038d99SNamjae Jeon u64 mft_no; 2712115380f9SNamjae Jeon struct ntfs_inode *tni; 2713115380f9SNamjae Jeon s64 lcn; 2714115380f9SNamjae Jeon s64 vcn = ntfs_pidx_to_cluster(vol, folio->index); 2715115380f9SNamjae Jeon s64 end_vcn = ntfs_bytes_to_cluster(vol, ni->allocated_size); 2716115380f9SNamjae Jeon unsigned int folio_sz; 2717115380f9SNamjae Jeon struct runlist_element *rl; 2718115380f9SNamjae Jeon loff_t i_size = i_size_read(vi); 2719115380f9SNamjae Jeon 2720e7d82353SNamjae Jeon ntfs_debug("Entering for inode 0x%llx, attribute type 0x%x, folio index 0x%lx.", 2721e7d82353SNamjae Jeon ni->mft_no, ni->type, folio->index); 2722115380f9SNamjae Jeon 2723f462fdf3SArnd Bergmann if (!locked_nis || !ref_inos) 2724f462fdf3SArnd Bergmann return -ENOMEM; 2725f462fdf3SArnd Bergmann 2726115380f9SNamjae Jeon /* We have to zero every time due to mmap-at-end-of-file. */ 2727115380f9SNamjae Jeon if (folio->index >= (i_size >> folio_shift(folio))) 2728115380f9SNamjae Jeon /* The page straddles i_size. */ 2729115380f9SNamjae Jeon folio_zero_segment(folio, 2730115380f9SNamjae Jeon offset_in_folio(folio, i_size), 2731115380f9SNamjae Jeon folio_size(folio)); 2732115380f9SNamjae Jeon 2733115380f9SNamjae Jeon lcn = lcn_from_index(vol, ni, folio->index); 2734115380f9SNamjae Jeon if (lcn <= LCN_HOLE) { 2735115380f9SNamjae Jeon folio_start_writeback(folio); 2736115380f9SNamjae Jeon folio_unlock(folio); 2737115380f9SNamjae Jeon folio_end_writeback(folio); 2738115380f9SNamjae Jeon return -EIO; 2739115380f9SNamjae Jeon } 2740115380f9SNamjae Jeon 2741115380f9SNamjae Jeon /* Map folio so we can access its contents. */ 2742115380f9SNamjae Jeon kaddr = kmap_local_folio(folio, 0); 2743115380f9SNamjae Jeon /* Clear the page uptodate flag whilst the mst fixups are applied. */ 2744115380f9SNamjae Jeon folio_clear_uptodate(folio); 2745115380f9SNamjae Jeon 2746115380f9SNamjae Jeon for (mft_ofs = 0; mft_ofs < PAGE_SIZE && vcn < end_vcn; 2747115380f9SNamjae Jeon mft_ofs += vol->mft_record_size) { 2748115380f9SNamjae Jeon /* Get the mft record number. */ 2749115380f9SNamjae Jeon mft_no = (((s64)folio->index << PAGE_SHIFT) + mft_ofs) >> 2750115380f9SNamjae Jeon vol->mft_record_size_bits; 2751115380f9SNamjae Jeon vcn = ntfs_mft_no_to_cluster(vol, mft_no); 2752115380f9SNamjae Jeon /* Check whether to write this mft record. */ 2753115380f9SNamjae Jeon tni = NULL; 2754115380f9SNamjae Jeon if (ntfs_may_write_mft_record(vol, mft_no, 2755115380f9SNamjae Jeon (struct mft_record *)(kaddr + mft_ofs), 2756115380f9SNamjae Jeon &tni, &ref_inos[nr_ref_inos])) { 2757115380f9SNamjae Jeon unsigned int mft_record_off = 0; 2758115380f9SNamjae Jeon s64 vcn_off = vcn; 2759115380f9SNamjae Jeon 2760115380f9SNamjae Jeon /* 2761115380f9SNamjae Jeon * Skip $MFT extent mft records and let them being written 2762115380f9SNamjae Jeon * by writeback to avioid deadlocks. the $MFT runlist 2763115380f9SNamjae Jeon * lock must be taken before $MFT extent mrec_lock is taken. 2764115380f9SNamjae Jeon */ 2765115380f9SNamjae Jeon if (tni && tni->nr_extents < 0 && 2766115380f9SNamjae Jeon tni->ext.base_ntfs_ino == NTFS_I(vol->mft_ino)) { 2767115380f9SNamjae Jeon mutex_unlock(&tni->mrec_lock); 2768115380f9SNamjae Jeon atomic_dec(&tni->count); 2769115380f9SNamjae Jeon iput(vol->mft_ino); 2770115380f9SNamjae Jeon continue; 2771115380f9SNamjae Jeon } 2772115380f9SNamjae Jeon 2773115380f9SNamjae Jeon /* 2774115380f9SNamjae Jeon * The record should be written. If a locked ntfs 2775115380f9SNamjae Jeon * inode was returned, add it to the array of locked 2776115380f9SNamjae Jeon * ntfs inodes. 2777115380f9SNamjae Jeon */ 2778115380f9SNamjae Jeon if (tni) 2779115380f9SNamjae Jeon locked_nis[nr_locked_nis++] = tni; 2780115380f9SNamjae Jeon else if (ref_inos[nr_ref_inos]) 2781115380f9SNamjae Jeon nr_ref_inos++; 2782115380f9SNamjae Jeon 2783115380f9SNamjae Jeon if (bio && (mft_ofs != prev_mft_ofs + vol->mft_record_size)) { 2784115380f9SNamjae Jeon flush_bio: 2785115380f9SNamjae Jeon bio->bi_end_io = ntfs_bio_end_io; 2786115380f9SNamjae Jeon submit_bio(bio); 2787115380f9SNamjae Jeon bio = NULL; 2788115380f9SNamjae Jeon } 2789115380f9SNamjae Jeon 2790115380f9SNamjae Jeon if (vol->cluster_size < folio_size(folio)) { 2791115380f9SNamjae Jeon down_write(&ni->runlist.lock); 2792115380f9SNamjae Jeon rl = ntfs_attr_vcn_to_rl(ni, vcn_off, &lcn); 2793115380f9SNamjae Jeon up_write(&ni->runlist.lock); 2794115380f9SNamjae Jeon if (IS_ERR(rl) || lcn < 0) { 2795115380f9SNamjae Jeon err = -EIO; 2796115380f9SNamjae Jeon goto unm_done; 2797115380f9SNamjae Jeon } 2798115380f9SNamjae Jeon 2799115380f9SNamjae Jeon if (bio && 2800115380f9SNamjae Jeon (bio_end_sector(bio) >> (vol->cluster_size_bits - 9)) != 2801115380f9SNamjae Jeon lcn) { 2802115380f9SNamjae Jeon bio->bi_end_io = ntfs_bio_end_io; 2803115380f9SNamjae Jeon submit_bio(bio); 2804115380f9SNamjae Jeon bio = NULL; 2805115380f9SNamjae Jeon } 2806115380f9SNamjae Jeon } 2807115380f9SNamjae Jeon 2808115380f9SNamjae Jeon if (!bio) { 2809115380f9SNamjae Jeon unsigned int off; 2810115380f9SNamjae Jeon 2811115380f9SNamjae Jeon off = ((mft_no << vol->mft_record_size_bits) + 2812115380f9SNamjae Jeon mft_record_off) & vol->cluster_size_mask; 2813115380f9SNamjae Jeon 2814115380f9SNamjae Jeon bio = bio_alloc(vol->sb->s_bdev, 1, REQ_OP_WRITE, 2815115380f9SNamjae Jeon GFP_NOIO); 2816115380f9SNamjae Jeon bio->bi_iter.bi_sector = 2817115380f9SNamjae Jeon ntfs_bytes_to_sector(vol, 2818115380f9SNamjae Jeon ntfs_cluster_to_bytes(vol, lcn) + off); 2819115380f9SNamjae Jeon } 2820115380f9SNamjae Jeon 2821115380f9SNamjae Jeon if (vol->cluster_size == NTFS_BLOCK_SIZE && 2822115380f9SNamjae Jeon (mft_record_off || 2823115380f9SNamjae Jeon rl->length - (vcn_off - rl->vcn) == 1 || 2824115380f9SNamjae Jeon mft_ofs + NTFS_BLOCK_SIZE >= PAGE_SIZE)) 2825115380f9SNamjae Jeon folio_sz = NTFS_BLOCK_SIZE; 2826115380f9SNamjae Jeon else 2827115380f9SNamjae Jeon folio_sz = vol->mft_record_size; 2828115380f9SNamjae Jeon if (!bio_add_folio(bio, folio, folio_sz, 2829115380f9SNamjae Jeon mft_ofs + mft_record_off)) { 2830115380f9SNamjae Jeon err = -EIO; 2831115380f9SNamjae Jeon bio_put(bio); 2832115380f9SNamjae Jeon goto unm_done; 2833115380f9SNamjae Jeon } 2834115380f9SNamjae Jeon mft_record_off += folio_sz; 2835115380f9SNamjae Jeon 2836115380f9SNamjae Jeon if (mft_record_off != vol->mft_record_size) { 2837115380f9SNamjae Jeon vcn_off++; 2838115380f9SNamjae Jeon goto flush_bio; 2839115380f9SNamjae Jeon } 2840115380f9SNamjae Jeon prev_mft_ofs = mft_ofs; 2841115380f9SNamjae Jeon 2842115380f9SNamjae Jeon if (mft_no < vol->mftmirr_size) 2843115380f9SNamjae Jeon ntfs_sync_mft_mirror(vol, mft_no, 2844115380f9SNamjae Jeon (struct mft_record *)(kaddr + mft_ofs)); 2845115380f9SNamjae Jeon } else if (ref_inos[nr_ref_inos]) 2846115380f9SNamjae Jeon nr_ref_inos++; 2847115380f9SNamjae Jeon } 2848115380f9SNamjae Jeon 2849115380f9SNamjae Jeon if (bio) { 2850115380f9SNamjae Jeon bio->bi_end_io = ntfs_bio_end_io; 2851115380f9SNamjae Jeon submit_bio(bio); 2852115380f9SNamjae Jeon } 2853115380f9SNamjae Jeon unm_done: 2854115380f9SNamjae Jeon folio_mark_uptodate(folio); 2855115380f9SNamjae Jeon kunmap_local(kaddr); 2856115380f9SNamjae Jeon 2857115380f9SNamjae Jeon folio_start_writeback(folio); 2858115380f9SNamjae Jeon folio_unlock(folio); 2859115380f9SNamjae Jeon folio_end_writeback(folio); 2860115380f9SNamjae Jeon 2861115380f9SNamjae Jeon /* Unlock any locked inodes. */ 2862115380f9SNamjae Jeon while (nr_locked_nis-- > 0) { 2863115380f9SNamjae Jeon struct ntfs_inode *base_tni; 2864115380f9SNamjae Jeon 2865115380f9SNamjae Jeon tni = locked_nis[nr_locked_nis]; 2866115380f9SNamjae Jeon mutex_unlock(&tni->mrec_lock); 2867115380f9SNamjae Jeon 2868115380f9SNamjae Jeon /* Get the base inode. */ 2869115380f9SNamjae Jeon mutex_lock(&tni->extent_lock); 2870115380f9SNamjae Jeon if (tni->nr_extents >= 0) 2871115380f9SNamjae Jeon base_tni = tni; 2872115380f9SNamjae Jeon else 2873115380f9SNamjae Jeon base_tni = tni->ext.base_ntfs_ino; 2874115380f9SNamjae Jeon mutex_unlock(&tni->extent_lock); 2875d9038d99SNamjae Jeon ntfs_debug("Unlocking %s inode 0x%llx.", 2876115380f9SNamjae Jeon tni == base_tni ? "base" : "extent", 2877115380f9SNamjae Jeon tni->mft_no); 2878115380f9SNamjae Jeon atomic_dec(&tni->count); 2879115380f9SNamjae Jeon iput(VFS_I(base_tni)); 2880115380f9SNamjae Jeon } 2881115380f9SNamjae Jeon 2882115380f9SNamjae Jeon /* Dropping deferred references */ 2883115380f9SNamjae Jeon while (nr_ref_inos-- > 0) { 2884115380f9SNamjae Jeon if (ref_inos[nr_ref_inos]) 2885115380f9SNamjae Jeon iput(ref_inos[nr_ref_inos]); 2886115380f9SNamjae Jeon } 2887115380f9SNamjae Jeon 2888115380f9SNamjae Jeon if (unlikely(err && err != -ENOMEM)) 2889115380f9SNamjae Jeon NVolSetErrors(vol); 2890115380f9SNamjae Jeon if (likely(!err)) 2891115380f9SNamjae Jeon ntfs_debug("Done."); 2892115380f9SNamjae Jeon return err; 2893115380f9SNamjae Jeon } 2894115380f9SNamjae Jeon 2895115380f9SNamjae Jeon /* 2896115380f9SNamjae Jeon * ntfs_mft_writepages - Write back dirty folios for the $MFT inode 2897115380f9SNamjae Jeon * @mapping: address space of the $MFT inode 2898115380f9SNamjae Jeon * @wbc: writeback control 2899115380f9SNamjae Jeon * 2900115380f9SNamjae Jeon * Writeback iterator for MFT records. Iterates over dirty folios and 2901115380f9SNamjae Jeon * delegates actual writing to ntfs_write_mft_block() for each folio. 2902115380f9SNamjae Jeon * Called from the address_space_operations .writepages vector of the 2903115380f9SNamjae Jeon * $MFT inode. 2904115380f9SNamjae Jeon * 2905115380f9SNamjae Jeon * Returns 0 on success, or the first error encountered. 2906115380f9SNamjae Jeon */ 2907115380f9SNamjae Jeon int ntfs_mft_writepages(struct address_space *mapping, 2908115380f9SNamjae Jeon struct writeback_control *wbc) 2909115380f9SNamjae Jeon { 2910115380f9SNamjae Jeon struct folio *folio = NULL; 2911115380f9SNamjae Jeon int error; 2912115380f9SNamjae Jeon 2913115380f9SNamjae Jeon if (NVolShutdown(NTFS_I(mapping->host)->vol)) 2914115380f9SNamjae Jeon return -EIO; 2915115380f9SNamjae Jeon 2916115380f9SNamjae Jeon while ((folio = writeback_iter(mapping, wbc, folio, &error))) 2917115380f9SNamjae Jeon error = ntfs_write_mft_block(folio, wbc); 2918115380f9SNamjae Jeon return error; 2919115380f9SNamjae Jeon } 2920115380f9SNamjae Jeon 2921115380f9SNamjae Jeon void ntfs_mft_mark_dirty(struct folio *folio) 2922115380f9SNamjae Jeon { 2923115380f9SNamjae Jeon iomap_dirty_folio(folio->mapping, folio); 2924115380f9SNamjae Jeon } 2925