xref: /linux/fs/ntfs/mft.c (revision cdd4dc3aebeab43a72ce0bc2b5bab6f0a80b97a5)
11e9ea7e0SNamjae Jeon // SPDX-License-Identifier: GPL-2.0-or-later
21e9ea7e0SNamjae Jeon /*
3115380f9SNamjae Jeon  * NTFS kernel mft record operations.
4115380f9SNamjae Jeon  * Part of this file is based on code from the NTFS-3G.
51e9ea7e0SNamjae Jeon  *
61e9ea7e0SNamjae Jeon  * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
71e9ea7e0SNamjae Jeon  * Copyright (c) 2002 Richard Russon
8115380f9SNamjae Jeon  * Copyright (c) 2025 LG Electronics Co., Ltd.
91e9ea7e0SNamjae Jeon  */
101e9ea7e0SNamjae Jeon 
11115380f9SNamjae Jeon #include <linux/writeback.h>
121e9ea7e0SNamjae Jeon #include <linux/bio.h>
13115380f9SNamjae Jeon #include <linux/iomap.h>
141e9ea7e0SNamjae Jeon 
151e9ea7e0SNamjae Jeon #include "bitmap.h"
161e9ea7e0SNamjae Jeon #include "lcnalloc.h"
171e9ea7e0SNamjae Jeon #include "mft.h"
181e9ea7e0SNamjae Jeon #include "ntfs.h"
191e9ea7e0SNamjae Jeon 
20115380f9SNamjae Jeon /*
21115380f9SNamjae Jeon  * ntfs_mft_record_check - Check the consistency of an MFT record
22115380f9SNamjae Jeon  *
23115380f9SNamjae Jeon  * Make sure its general fields are safe, then examine all its
24115380f9SNamjae Jeon  * attributes and apply generic checks to them.
25115380f9SNamjae Jeon  *
26115380f9SNamjae Jeon  * Returns 0 if the checks are successful. If not, return -EIO.
27115380f9SNamjae Jeon  */
28115380f9SNamjae Jeon int ntfs_mft_record_check(const struct ntfs_volume *vol, struct mft_record *m,
29d9038d99SNamjae Jeon 		u64 mft_no)
30115380f9SNamjae Jeon {
31115380f9SNamjae Jeon 	struct attr_record *a;
32115380f9SNamjae Jeon 	struct super_block *sb = vol->sb;
331e9ea7e0SNamjae Jeon 
34115380f9SNamjae Jeon 	if (!ntfs_is_file_record(m->magic)) {
35115380f9SNamjae Jeon 		ntfs_error(sb, "Record %llu has no FILE magic (0x%x)\n",
36d9038d99SNamjae Jeon 				mft_no, le32_to_cpu(*(__le32 *)m));
37115380f9SNamjae Jeon 		goto err_out;
38115380f9SNamjae Jeon 	}
39115380f9SNamjae Jeon 
40115380f9SNamjae Jeon 	if (le16_to_cpu(m->usa_ofs) & 0x1 ||
41115380f9SNamjae Jeon 	    (vol->mft_record_size >> NTFS_BLOCK_SIZE_BITS) + 1 != le16_to_cpu(m->usa_count) ||
42115380f9SNamjae Jeon 	    le16_to_cpu(m->usa_ofs) + le16_to_cpu(m->usa_count) * 2 > vol->mft_record_size) {
43115380f9SNamjae Jeon 		ntfs_error(sb, "Record %llu has corrupt fix-up values fields\n",
44d9038d99SNamjae Jeon 				mft_no);
45115380f9SNamjae Jeon 		goto err_out;
46115380f9SNamjae Jeon 	}
47115380f9SNamjae Jeon 
48115380f9SNamjae Jeon 	if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
49115380f9SNamjae Jeon 		ntfs_error(sb, "Record %llu has corrupt allocation size (%u <> %u)\n",
50d9038d99SNamjae Jeon 				mft_no, vol->mft_record_size,
51115380f9SNamjae Jeon 				le32_to_cpu(m->bytes_allocated));
52115380f9SNamjae Jeon 		goto err_out;
53115380f9SNamjae Jeon 	}
54115380f9SNamjae Jeon 
55115380f9SNamjae Jeon 	if (le32_to_cpu(m->bytes_in_use) > vol->mft_record_size) {
56115380f9SNamjae Jeon 		ntfs_error(sb, "Record %llu has corrupt in-use size (%u > %u)\n",
57d9038d99SNamjae Jeon 				mft_no, le32_to_cpu(m->bytes_in_use),
58115380f9SNamjae Jeon 				vol->mft_record_size);
59115380f9SNamjae Jeon 		goto err_out;
60115380f9SNamjae Jeon 	}
61115380f9SNamjae Jeon 
62115380f9SNamjae Jeon 	if (le16_to_cpu(m->attrs_offset) & 7) {
63115380f9SNamjae Jeon 		ntfs_error(sb, "Attributes badly aligned in record %llu\n",
64d9038d99SNamjae Jeon 				mft_no);
65115380f9SNamjae Jeon 		goto err_out;
66115380f9SNamjae Jeon 	}
67115380f9SNamjae Jeon 
68115380f9SNamjae Jeon 	a = (struct attr_record *)((char *)m + le16_to_cpu(m->attrs_offset));
69115380f9SNamjae Jeon 	if ((char *)a < (char *)m || (char *)a > (char *)m + vol->mft_record_size) {
70d9038d99SNamjae Jeon 		ntfs_error(sb, "Record %llu is corrupt\n", mft_no);
71115380f9SNamjae Jeon 		goto err_out;
72115380f9SNamjae Jeon 	}
73115380f9SNamjae Jeon 
74115380f9SNamjae Jeon 	return 0;
75115380f9SNamjae Jeon 
76115380f9SNamjae Jeon err_out:
77115380f9SNamjae Jeon 	return -EIO;
78115380f9SNamjae Jeon }
79115380f9SNamjae Jeon 
80115380f9SNamjae Jeon /*
81115380f9SNamjae Jeon  * map_mft_record_folio - map the folio in which a specific mft record resides
821e9ea7e0SNamjae Jeon  * @ni:		ntfs inode whose mft record page to map
831e9ea7e0SNamjae Jeon  *
84115380f9SNamjae Jeon  * This maps the folio in which the mft record of the ntfs inode @ni is
85115380f9SNamjae Jeon  * situated.
861e9ea7e0SNamjae Jeon  *
87115380f9SNamjae Jeon  * This allocates a new buffer (@ni->mrec), copies the MFT record data from
88115380f9SNamjae Jeon  * the mapped folio into this buffer, and applies the MST (Multi Sector
89115380f9SNamjae Jeon  * Transfer) fixups on the copy.
90115380f9SNamjae Jeon  *
91115380f9SNamjae Jeon  * The folio is pinned (referenced) in @ni->folio to ensure the data remains
92115380f9SNamjae Jeon  * valid in the page cache, but the returned pointer is the allocated copy.
93115380f9SNamjae Jeon  *
94115380f9SNamjae Jeon  * Return: A pointer to the allocated and fixed-up mft record (@ni->mrec).
95115380f9SNamjae Jeon  * The return value needs to be checked with IS_ERR(). If it is true,
96115380f9SNamjae Jeon  * PTR_ERR() contains the negative error code.
971e9ea7e0SNamjae Jeon  */
98115380f9SNamjae Jeon static inline struct mft_record *map_mft_record_folio(struct ntfs_inode *ni)
991e9ea7e0SNamjae Jeon {
1001e9ea7e0SNamjae Jeon 	loff_t i_size;
101115380f9SNamjae Jeon 	struct ntfs_volume *vol = ni->vol;
1021e9ea7e0SNamjae Jeon 	struct inode *mft_vi = vol->mft_ino;
103115380f9SNamjae Jeon 	struct folio *folio;
1041e9ea7e0SNamjae Jeon 	unsigned long index, end_index;
105115380f9SNamjae Jeon 	unsigned int ofs;
1061e9ea7e0SNamjae Jeon 
107115380f9SNamjae Jeon 	WARN_ON(ni->folio);
1081e9ea7e0SNamjae Jeon 	/*
1091e9ea7e0SNamjae Jeon 	 * The index into the page cache and the offset within the page cache
110115380f9SNamjae Jeon 	 * page of the wanted mft record.
1111e9ea7e0SNamjae Jeon 	 */
112115380f9SNamjae Jeon 	index = NTFS_MFT_NR_TO_PIDX(vol, ni->mft_no);
113115380f9SNamjae Jeon 	ofs = NTFS_MFT_NR_TO_POFS(vol, ni->mft_no);
1141e9ea7e0SNamjae Jeon 
1151e9ea7e0SNamjae Jeon 	i_size = i_size_read(mft_vi);
1161e9ea7e0SNamjae Jeon 	/* The maximum valid index into the page cache for $MFT's data. */
1171e9ea7e0SNamjae Jeon 	end_index = i_size >> PAGE_SHIFT;
1181e9ea7e0SNamjae Jeon 
1191e9ea7e0SNamjae Jeon 	/* If the wanted index is out of bounds the mft record doesn't exist. */
1201e9ea7e0SNamjae Jeon 	if (unlikely(index >= end_index)) {
1211e9ea7e0SNamjae Jeon 		if (index > end_index || (i_size & ~PAGE_MASK) < ofs +
1221e9ea7e0SNamjae Jeon 				vol->mft_record_size) {
123115380f9SNamjae Jeon 			folio = ERR_PTR(-ENOENT);
124115380f9SNamjae Jeon 			ntfs_error(vol->sb,
125d9038d99SNamjae Jeon 				"Attempt to read mft record 0x%llx, which is beyond the end of the mft. This is probably a bug in the ntfs driver.",
126115380f9SNamjae Jeon 				ni->mft_no);
1271e9ea7e0SNamjae Jeon 			goto err_out;
1281e9ea7e0SNamjae Jeon 		}
1291e9ea7e0SNamjae Jeon 	}
130115380f9SNamjae Jeon 
131115380f9SNamjae Jeon 	/* Read, map, and pin the folio. */
132115380f9SNamjae Jeon 	folio = read_mapping_folio(mft_vi->i_mapping, index, NULL);
133115380f9SNamjae Jeon 	if (!IS_ERR(folio)) {
134115380f9SNamjae Jeon 		u8 *addr;
135115380f9SNamjae Jeon 
136115380f9SNamjae Jeon 		ni->mrec = kmalloc(vol->mft_record_size, GFP_NOFS);
137115380f9SNamjae Jeon 		if (!ni->mrec) {
138115380f9SNamjae Jeon 			folio_put(folio);
139115380f9SNamjae Jeon 			folio = ERR_PTR(-ENOMEM);
140115380f9SNamjae Jeon 			goto err_out;
1411e9ea7e0SNamjae Jeon 		}
142115380f9SNamjae Jeon 
143115380f9SNamjae Jeon 		addr = kmap_local_folio(folio, 0);
144115380f9SNamjae Jeon 		memcpy(ni->mrec, addr + ofs, vol->mft_record_size);
145115380f9SNamjae Jeon 		post_read_mst_fixup((struct ntfs_record *)ni->mrec, vol->mft_record_size);
146115380f9SNamjae Jeon 
147115380f9SNamjae Jeon 		/* Catch multi sector transfer fixup errors. */
148115380f9SNamjae Jeon 		if (!ntfs_mft_record_check(vol, (struct mft_record *)ni->mrec, ni->mft_no)) {
149115380f9SNamjae Jeon 			kunmap_local(addr);
150115380f9SNamjae Jeon 			ni->folio = folio;
151115380f9SNamjae Jeon 			ni->folio_ofs = ofs;
152115380f9SNamjae Jeon 			return ni->mrec;
153115380f9SNamjae Jeon 		}
154115380f9SNamjae Jeon 		kunmap_local(addr);
155115380f9SNamjae Jeon 		folio_put(folio);
156115380f9SNamjae Jeon 		kfree(ni->mrec);
157115380f9SNamjae Jeon 		ni->mrec = NULL;
158115380f9SNamjae Jeon 		folio = ERR_PTR(-EIO);
1591e9ea7e0SNamjae Jeon 		NVolSetErrors(vol);
1601e9ea7e0SNamjae Jeon 	}
1611e9ea7e0SNamjae Jeon err_out:
162115380f9SNamjae Jeon 	ni->folio = NULL;
163115380f9SNamjae Jeon 	ni->folio_ofs = 0;
164115380f9SNamjae Jeon 	return (struct mft_record *)folio;
1651e9ea7e0SNamjae Jeon }
1661e9ea7e0SNamjae Jeon 
167115380f9SNamjae Jeon /*
168115380f9SNamjae Jeon  * map_mft_record - map and pin an mft record
1691e9ea7e0SNamjae Jeon  * @ni:		ntfs inode whose MFT record to map
1701e9ea7e0SNamjae Jeon  *
171115380f9SNamjae Jeon  * This function ensures the MFT record for the given inode is mapped and
172115380f9SNamjae Jeon  * accessible.
1731e9ea7e0SNamjae Jeon  *
174115380f9SNamjae Jeon  * It increments the reference count of the ntfs inode. If the record is
175115380f9SNamjae Jeon  * already mapped (@ni->folio is set), it returns the cached record
176115380f9SNamjae Jeon  * immediately.
1771e9ea7e0SNamjae Jeon  *
178115380f9SNamjae Jeon  * Otherwise, it calls map_mft_record_folio() to read the folio from disk
179115380f9SNamjae Jeon  * (if necessary via read_mapping_folio), allocate a buffer, and copy the
180115380f9SNamjae Jeon  * record data.
1811e9ea7e0SNamjae Jeon  *
182115380f9SNamjae Jeon  * Return: A pointer to the mft record. You need to check the returned
183115380f9SNamjae Jeon  * pointer with IS_ERR().
1841e9ea7e0SNamjae Jeon  */
185115380f9SNamjae Jeon struct mft_record *map_mft_record(struct ntfs_inode *ni)
1861e9ea7e0SNamjae Jeon {
187115380f9SNamjae Jeon 	struct mft_record *m;
188115380f9SNamjae Jeon 
189115380f9SNamjae Jeon 	if (!ni)
190115380f9SNamjae Jeon 		return ERR_PTR(-EINVAL);
1911e9ea7e0SNamjae Jeon 
192d9038d99SNamjae Jeon 	ntfs_debug("Entering for mft_no 0x%llx.", ni->mft_no);
1931e9ea7e0SNamjae Jeon 
1941e9ea7e0SNamjae Jeon 	/* Make sure the ntfs inode doesn't go away. */
1951e9ea7e0SNamjae Jeon 	atomic_inc(&ni->count);
1961e9ea7e0SNamjae Jeon 
197115380f9SNamjae Jeon 	if (ni->folio)
198115380f9SNamjae Jeon 		return (struct mft_record *)ni->mrec;
1991e9ea7e0SNamjae Jeon 
200115380f9SNamjae Jeon 	m = map_mft_record_folio(ni);
2011e9ea7e0SNamjae Jeon 	if (!IS_ERR(m))
2021e9ea7e0SNamjae Jeon 		return m;
2031e9ea7e0SNamjae Jeon 
2041e9ea7e0SNamjae Jeon 	atomic_dec(&ni->count);
2051e9ea7e0SNamjae Jeon 	ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m));
2061e9ea7e0SNamjae Jeon 	return m;
2071e9ea7e0SNamjae Jeon }
2081e9ea7e0SNamjae Jeon 
209115380f9SNamjae Jeon /*
210115380f9SNamjae Jeon  * unmap_mft_record - release a reference to a mapped mft record
2111e9ea7e0SNamjae Jeon  * @ni:		ntfs inode whose MFT record to unmap
2121e9ea7e0SNamjae Jeon  *
213115380f9SNamjae Jeon  * This decrements the reference count of the ntfs inode.
214115380f9SNamjae Jeon  *
215115380f9SNamjae Jeon  * It releases the caller's hold on the inode. If the reference count indicates
216115380f9SNamjae Jeon  * that there are still other users (count > 1), the function returns
217115380f9SNamjae Jeon  * immediately, keeping the resources (folio and mrec buffer) pinned for
218115380f9SNamjae Jeon  * those users.
2191e9ea7e0SNamjae Jeon  *
2201e9ea7e0SNamjae Jeon  * NOTE: If caller has modified the mft record, it is imperative to set the mft
2211e9ea7e0SNamjae Jeon  * record dirty BEFORE calling unmap_mft_record().
2221e9ea7e0SNamjae Jeon  */
223115380f9SNamjae Jeon void unmap_mft_record(struct ntfs_inode *ni)
2241e9ea7e0SNamjae Jeon {
225115380f9SNamjae Jeon 	struct folio *folio;
2261e9ea7e0SNamjae Jeon 
227115380f9SNamjae Jeon 	if (!ni)
228115380f9SNamjae Jeon 		return;
2291e9ea7e0SNamjae Jeon 
230d9038d99SNamjae Jeon 	ntfs_debug("Entering for mft_no 0x%llx.", ni->mft_no);
2311e9ea7e0SNamjae Jeon 
232115380f9SNamjae Jeon 	folio = ni->folio;
233115380f9SNamjae Jeon 	if (atomic_dec_return(&ni->count) > 1)
2341e9ea7e0SNamjae Jeon 		return;
235115380f9SNamjae Jeon 	WARN_ON(!folio);
2361e9ea7e0SNamjae Jeon }
2371e9ea7e0SNamjae Jeon 
238115380f9SNamjae Jeon /*
2391e9ea7e0SNamjae Jeon  * map_extent_mft_record - load an extent inode and attach it to its base
2401e9ea7e0SNamjae Jeon  * @base_ni:	base ntfs inode
2411e9ea7e0SNamjae Jeon  * @mref:	mft reference of the extent inode to load
242115380f9SNamjae Jeon  * @ntfs_ino:	on successful return, pointer to the struct ntfs_inode structure
2431e9ea7e0SNamjae Jeon  *
2441e9ea7e0SNamjae Jeon  * Load the extent mft record @mref and attach it to its base inode @base_ni.
2451e9ea7e0SNamjae Jeon  * Return the mapped extent mft record if IS_ERR(result) is false.  Otherwise
2461e9ea7e0SNamjae Jeon  * PTR_ERR(result) gives the negative error code.
2471e9ea7e0SNamjae Jeon  *
2481e9ea7e0SNamjae Jeon  * On successful return, @ntfs_ino contains a pointer to the ntfs_inode
2491e9ea7e0SNamjae Jeon  * structure of the mapped extent inode.
2501e9ea7e0SNamjae Jeon  */
251115380f9SNamjae Jeon struct mft_record *map_extent_mft_record(struct ntfs_inode *base_ni, u64 mref,
252115380f9SNamjae Jeon 		struct ntfs_inode **ntfs_ino)
2531e9ea7e0SNamjae Jeon {
254115380f9SNamjae Jeon 	struct mft_record *m;
255115380f9SNamjae Jeon 	struct ntfs_inode *ni = NULL;
256115380f9SNamjae Jeon 	struct ntfs_inode **extent_nis = NULL;
2571e9ea7e0SNamjae Jeon 	int i;
258d9038d99SNamjae Jeon 	u64 mft_no = MREF(mref);
2591e9ea7e0SNamjae Jeon 	u16 seq_no = MSEQNO(mref);
2601e9ea7e0SNamjae Jeon 	bool destroy_ni = false;
2611e9ea7e0SNamjae Jeon 
262d9038d99SNamjae Jeon 	ntfs_debug("Mapping extent mft record 0x%llx (base mft record 0x%llx).",
2631e9ea7e0SNamjae Jeon 			mft_no, base_ni->mft_no);
2641e9ea7e0SNamjae Jeon 	/* Make sure the base ntfs inode doesn't go away. */
2651e9ea7e0SNamjae Jeon 	atomic_inc(&base_ni->count);
2661e9ea7e0SNamjae Jeon 	/*
2671e9ea7e0SNamjae Jeon 	 * Check if this extent inode has already been added to the base inode,
2681e9ea7e0SNamjae Jeon 	 * in which case just return it. If not found, add it to the base
2691e9ea7e0SNamjae Jeon 	 * inode before returning it.
2701e9ea7e0SNamjae Jeon 	 */
271115380f9SNamjae Jeon retry:
2721e9ea7e0SNamjae Jeon 	mutex_lock(&base_ni->extent_lock);
2731e9ea7e0SNamjae Jeon 	if (base_ni->nr_extents > 0) {
2741e9ea7e0SNamjae Jeon 		extent_nis = base_ni->ext.extent_ntfs_inos;
2751e9ea7e0SNamjae Jeon 		for (i = 0; i < base_ni->nr_extents; i++) {
2761e9ea7e0SNamjae Jeon 			if (mft_no != extent_nis[i]->mft_no)
2771e9ea7e0SNamjae Jeon 				continue;
2781e9ea7e0SNamjae Jeon 			ni = extent_nis[i];
2791e9ea7e0SNamjae Jeon 			/* Make sure the ntfs inode doesn't go away. */
2801e9ea7e0SNamjae Jeon 			atomic_inc(&ni->count);
2811e9ea7e0SNamjae Jeon 			break;
2821e9ea7e0SNamjae Jeon 		}
2831e9ea7e0SNamjae Jeon 	}
2841e9ea7e0SNamjae Jeon 	if (likely(ni != NULL)) {
2851e9ea7e0SNamjae Jeon 		mutex_unlock(&base_ni->extent_lock);
2861e9ea7e0SNamjae Jeon 		atomic_dec(&base_ni->count);
2871e9ea7e0SNamjae Jeon 		/* We found the record; just have to map and return it. */
2881e9ea7e0SNamjae Jeon 		m = map_mft_record(ni);
2891e9ea7e0SNamjae Jeon 		/* map_mft_record() has incremented this on success. */
2901e9ea7e0SNamjae Jeon 		atomic_dec(&ni->count);
2911e9ea7e0SNamjae Jeon 		if (!IS_ERR(m)) {
2921e9ea7e0SNamjae Jeon 			/* Verify the sequence number. */
2931e9ea7e0SNamjae Jeon 			if (likely(le16_to_cpu(m->sequence_number) == seq_no)) {
2941e9ea7e0SNamjae Jeon 				ntfs_debug("Done 1.");
2951e9ea7e0SNamjae Jeon 				*ntfs_ino = ni;
2961e9ea7e0SNamjae Jeon 				return m;
2971e9ea7e0SNamjae Jeon 			}
2981e9ea7e0SNamjae Jeon 			unmap_mft_record(ni);
299115380f9SNamjae Jeon 			ntfs_error(base_ni->vol->sb,
300115380f9SNamjae Jeon 					"Found stale extent mft reference! Corrupt filesystem. Run chkdsk.");
3011e9ea7e0SNamjae Jeon 			return ERR_PTR(-EIO);
3021e9ea7e0SNamjae Jeon 		}
3031e9ea7e0SNamjae Jeon map_err_out:
304115380f9SNamjae Jeon 		ntfs_error(base_ni->vol->sb,
305115380f9SNamjae Jeon 				"Failed to map extent mft record, error code %ld.",
306115380f9SNamjae Jeon 				-PTR_ERR(m));
3071e9ea7e0SNamjae Jeon 		return m;
3081e9ea7e0SNamjae Jeon 	}
309115380f9SNamjae Jeon 	mutex_unlock(&base_ni->extent_lock);
310115380f9SNamjae Jeon 
3111e9ea7e0SNamjae Jeon 	/* Record wasn't there. Get a new ntfs inode and initialize it. */
3121e9ea7e0SNamjae Jeon 	ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no);
3131e9ea7e0SNamjae Jeon 	if (unlikely(!ni)) {
3141e9ea7e0SNamjae Jeon 		atomic_dec(&base_ni->count);
3151e9ea7e0SNamjae Jeon 		return ERR_PTR(-ENOMEM);
3161e9ea7e0SNamjae Jeon 	}
3171e9ea7e0SNamjae Jeon 	ni->vol = base_ni->vol;
3181e9ea7e0SNamjae Jeon 	ni->seq_no = seq_no;
3191e9ea7e0SNamjae Jeon 	ni->nr_extents = -1;
3201e9ea7e0SNamjae Jeon 	ni->ext.base_ntfs_ino = base_ni;
3211e9ea7e0SNamjae Jeon 	/* Now map the record. */
3221e9ea7e0SNamjae Jeon 	m = map_mft_record(ni);
3231e9ea7e0SNamjae Jeon 	if (IS_ERR(m)) {
3241e9ea7e0SNamjae Jeon 		atomic_dec(&base_ni->count);
3251e9ea7e0SNamjae Jeon 		ntfs_clear_extent_inode(ni);
3261e9ea7e0SNamjae Jeon 		goto map_err_out;
3271e9ea7e0SNamjae Jeon 	}
3281e9ea7e0SNamjae Jeon 	/* Verify the sequence number if it is present. */
3291e9ea7e0SNamjae Jeon 	if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) {
330115380f9SNamjae Jeon 		ntfs_error(base_ni->vol->sb,
331115380f9SNamjae Jeon 				"Found stale extent mft reference! Corrupt filesystem. Run chkdsk.");
3321e9ea7e0SNamjae Jeon 		destroy_ni = true;
3331e9ea7e0SNamjae Jeon 		m = ERR_PTR(-EIO);
334115380f9SNamjae Jeon 		goto unm_nolock_err_out;
335115380f9SNamjae Jeon 	}
336115380f9SNamjae Jeon 
337115380f9SNamjae Jeon 	mutex_lock(&base_ni->extent_lock);
338115380f9SNamjae Jeon 	for (i = 0; i < base_ni->nr_extents; i++) {
339115380f9SNamjae Jeon 		if (mft_no == extent_nis[i]->mft_no) {
340115380f9SNamjae Jeon 			mutex_unlock(&base_ni->extent_lock);
341115380f9SNamjae Jeon 			ntfs_clear_extent_inode(ni);
342115380f9SNamjae Jeon 			goto retry;
343115380f9SNamjae Jeon 		}
3441e9ea7e0SNamjae Jeon 	}
3451e9ea7e0SNamjae Jeon 	/* Attach extent inode to base inode, reallocating memory if needed. */
3461e9ea7e0SNamjae Jeon 	if (!(base_ni->nr_extents & 3)) {
347115380f9SNamjae Jeon 		struct ntfs_inode **tmp;
348115380f9SNamjae Jeon 		int new_size = (base_ni->nr_extents + 4) * sizeof(struct ntfs_inode *);
3491e9ea7e0SNamjae Jeon 
350115380f9SNamjae Jeon 		tmp = kvzalloc(new_size, GFP_NOFS);
3511e9ea7e0SNamjae Jeon 		if (unlikely(!tmp)) {
352115380f9SNamjae Jeon 			ntfs_error(base_ni->vol->sb, "Failed to allocate internal buffer.");
3531e9ea7e0SNamjae Jeon 			destroy_ni = true;
3541e9ea7e0SNamjae Jeon 			m = ERR_PTR(-ENOMEM);
3551e9ea7e0SNamjae Jeon 			goto unm_err_out;
3561e9ea7e0SNamjae Jeon 		}
3571e9ea7e0SNamjae Jeon 		if (base_ni->nr_extents) {
358115380f9SNamjae Jeon 			WARN_ON(!base_ni->ext.extent_ntfs_inos);
3591e9ea7e0SNamjae Jeon 			memcpy(tmp, base_ni->ext.extent_ntfs_inos, new_size -
360115380f9SNamjae Jeon 					4 * sizeof(struct ntfs_inode *));
361115380f9SNamjae Jeon 			kvfree(base_ni->ext.extent_ntfs_inos);
3621e9ea7e0SNamjae Jeon 		}
3631e9ea7e0SNamjae Jeon 		base_ni->ext.extent_ntfs_inos = tmp;
3641e9ea7e0SNamjae Jeon 	}
3651e9ea7e0SNamjae Jeon 	base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni;
3661e9ea7e0SNamjae Jeon 	mutex_unlock(&base_ni->extent_lock);
3671e9ea7e0SNamjae Jeon 	atomic_dec(&base_ni->count);
3681e9ea7e0SNamjae Jeon 	ntfs_debug("Done 2.");
3691e9ea7e0SNamjae Jeon 	*ntfs_ino = ni;
3701e9ea7e0SNamjae Jeon 	return m;
3711e9ea7e0SNamjae Jeon unm_err_out:
3721e9ea7e0SNamjae Jeon 	mutex_unlock(&base_ni->extent_lock);
373115380f9SNamjae Jeon unm_nolock_err_out:
374115380f9SNamjae Jeon 	unmap_mft_record(ni);
3751e9ea7e0SNamjae Jeon 	atomic_dec(&base_ni->count);
3761e9ea7e0SNamjae Jeon 	/*
3771e9ea7e0SNamjae Jeon 	 * If the extent inode was not attached to the base inode we need to
3781e9ea7e0SNamjae Jeon 	 * release it or we will leak memory.
3791e9ea7e0SNamjae Jeon 	 */
3801e9ea7e0SNamjae Jeon 	if (destroy_ni)
3811e9ea7e0SNamjae Jeon 		ntfs_clear_extent_inode(ni);
3821e9ea7e0SNamjae Jeon 	return m;
3831e9ea7e0SNamjae Jeon }
3841e9ea7e0SNamjae Jeon 
385115380f9SNamjae Jeon /*
386115380f9SNamjae Jeon  * __mark_mft_record_dirty - mark the base vfs inode dirty
3871e9ea7e0SNamjae Jeon  * @ni:		ntfs inode describing the mapped mft record
3881e9ea7e0SNamjae Jeon  *
3891e9ea7e0SNamjae Jeon  * Internal function.  Users should call mark_mft_record_dirty() instead.
3901e9ea7e0SNamjae Jeon  *
391115380f9SNamjae Jeon  * This function determines the base ntfs inode (in case @ni is an extent
392115380f9SNamjae Jeon  * inode) and marks the corresponding VFS inode dirty.
3931e9ea7e0SNamjae Jeon  *
3941e9ea7e0SNamjae Jeon  * NOTE:  We only set I_DIRTY_DATASYNC (and not I_DIRTY_PAGES)
3951e9ea7e0SNamjae Jeon  * on the base vfs inode, because even though file data may have been modified,
3961e9ea7e0SNamjae Jeon  * it is dirty in the inode meta data rather than the data page cache of the
3971e9ea7e0SNamjae Jeon  * inode, and thus there are no data pages that need writing out.  Therefore, a
3981e9ea7e0SNamjae Jeon  * full mark_inode_dirty() is overkill.  A mark_inode_dirty_sync(), on the
3991e9ea7e0SNamjae Jeon  * other hand, is not sufficient, because ->write_inode needs to be called even
4001e9ea7e0SNamjae Jeon  * in case of fdatasync. This needs to happen or the file data would not
4011e9ea7e0SNamjae Jeon  * necessarily hit the device synchronously, even though the vfs inode has the
4021e9ea7e0SNamjae Jeon  * O_SYNC flag set.  Also, I_DIRTY_DATASYNC simply "feels" better than just
4031e9ea7e0SNamjae Jeon  * I_DIRTY_SYNC, since the file data has not actually hit the block device yet,
4041e9ea7e0SNamjae Jeon  * which is not what I_DIRTY_SYNC on its own would suggest.
4051e9ea7e0SNamjae Jeon  */
406115380f9SNamjae Jeon void __mark_mft_record_dirty(struct ntfs_inode *ni)
4071e9ea7e0SNamjae Jeon {
408115380f9SNamjae Jeon 	struct ntfs_inode *base_ni;
4091e9ea7e0SNamjae Jeon 
410d9038d99SNamjae Jeon 	ntfs_debug("Entering for inode 0x%llx.", ni->mft_no);
411115380f9SNamjae Jeon 	WARN_ON(NInoAttr(ni));
4121e9ea7e0SNamjae Jeon 	/* Determine the base vfs inode and mark it dirty, too. */
4131e9ea7e0SNamjae Jeon 	if (likely(ni->nr_extents >= 0))
4141e9ea7e0SNamjae Jeon 		base_ni = ni;
4151e9ea7e0SNamjae Jeon 	else
4161e9ea7e0SNamjae Jeon 		base_ni = ni->ext.base_ntfs_ino;
4171e9ea7e0SNamjae Jeon 	__mark_inode_dirty(VFS_I(base_ni), I_DIRTY_DATASYNC);
4181e9ea7e0SNamjae Jeon }
4191e9ea7e0SNamjae Jeon 
420115380f9SNamjae Jeon /*
421115380f9SNamjae Jeon  * ntfs_bio_end_io - bio completion callback for MFT record writes
4221e9ea7e0SNamjae Jeon  *
423115380f9SNamjae Jeon  * Decrements the folio reference count that was incremented before
424115380f9SNamjae Jeon  * submit_bio(). This prevents a race condition where umount could
425115380f9SNamjae Jeon  * evict the inode and release the folio while I/O is still in flight,
426115380f9SNamjae Jeon  * potentially causing data corruption or use-after-free.
4271e9ea7e0SNamjae Jeon  */
428115380f9SNamjae Jeon static void ntfs_bio_end_io(struct bio *bio)
4291e9ea7e0SNamjae Jeon {
430115380f9SNamjae Jeon 	if (bio->bi_private)
431115380f9SNamjae Jeon 		folio_put((struct folio *)bio->bi_private);
432115380f9SNamjae Jeon 	bio_put(bio);
4331e9ea7e0SNamjae Jeon }
4341e9ea7e0SNamjae Jeon 
435115380f9SNamjae Jeon /*
4361e9ea7e0SNamjae Jeon  * ntfs_sync_mft_mirror - synchronize an mft record to the mft mirror
4371e9ea7e0SNamjae Jeon  * @vol:	ntfs volume on which the mft record to synchronize resides
4381e9ea7e0SNamjae Jeon  * @mft_no:	mft record number of mft record to synchronize
4391e9ea7e0SNamjae Jeon  * @m:		mapped, mst protected (extent) mft record to synchronize
4401e9ea7e0SNamjae Jeon  *
4411e9ea7e0SNamjae Jeon  * Write the mapped, mst protected (extent) mft record @m with mft record
4421e9ea7e0SNamjae Jeon  * number @mft_no to the mft mirror ($MFTMirr) of the ntfs volume @vol.
4431e9ea7e0SNamjae Jeon  *
4441e9ea7e0SNamjae Jeon  * On success return 0.  On error return -errno and set the volume errors flag
4451e9ea7e0SNamjae Jeon  * in the ntfs volume @vol.
4461e9ea7e0SNamjae Jeon  *
447115380f9SNamjae Jeon  * NOTE:  We always perform synchronous i/o.
4481e9ea7e0SNamjae Jeon  */
449d9038d99SNamjae Jeon int ntfs_sync_mft_mirror(struct ntfs_volume *vol, const u64 mft_no,
450115380f9SNamjae Jeon 		struct mft_record *m)
4511e9ea7e0SNamjae Jeon {
452115380f9SNamjae Jeon 	u8 *kmirr = NULL;
453115380f9SNamjae Jeon 	struct folio *folio;
454115380f9SNamjae Jeon 	unsigned int folio_ofs, lcn_folio_off = 0;
455115380f9SNamjae Jeon 	int err = 0;
456115380f9SNamjae Jeon 	struct bio *bio;
4571e9ea7e0SNamjae Jeon 
458d9038d99SNamjae Jeon 	ntfs_debug("Entering for inode 0x%llx.", mft_no);
459115380f9SNamjae Jeon 
4601e9ea7e0SNamjae Jeon 	if (unlikely(!vol->mftmirr_ino)) {
4611e9ea7e0SNamjae Jeon 		/* This could happen during umount... */
462115380f9SNamjae Jeon 		err = -EIO;
4631e9ea7e0SNamjae Jeon 		goto err_out;
4641e9ea7e0SNamjae Jeon 	}
4651e9ea7e0SNamjae Jeon 	/* Get the page containing the mirror copy of the mft record @m. */
466115380f9SNamjae Jeon 	folio = read_mapping_folio(vol->mftmirr_ino->i_mapping,
467115380f9SNamjae Jeon 			NTFS_MFT_NR_TO_PIDX(vol, mft_no), NULL);
468115380f9SNamjae Jeon 	if (IS_ERR(folio)) {
4691e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to map mft mirror page.");
470115380f9SNamjae Jeon 		err = PTR_ERR(folio);
4711e9ea7e0SNamjae Jeon 		goto err_out;
4721e9ea7e0SNamjae Jeon 	}
473115380f9SNamjae Jeon 
474115380f9SNamjae Jeon 	folio_lock(folio);
475115380f9SNamjae Jeon 	folio_clear_uptodate(folio);
4761e9ea7e0SNamjae Jeon 	/* Offset of the mft mirror record inside the page. */
477115380f9SNamjae Jeon 	folio_ofs = NTFS_MFT_NR_TO_POFS(vol, mft_no);
4781e9ea7e0SNamjae Jeon 	/* The address in the page of the mirror copy of the mft record @m. */
479115380f9SNamjae Jeon 	kmirr = kmap_local_folio(folio, 0) + folio_ofs;
4801e9ea7e0SNamjae Jeon 	/* Copy the mst protected mft record to the mirror. */
4811e9ea7e0SNamjae Jeon 	memcpy(kmirr, m, vol->mft_record_size);
4821e9ea7e0SNamjae Jeon 
483115380f9SNamjae Jeon 	if (vol->cluster_size_bits > PAGE_SHIFT) {
484115380f9SNamjae Jeon 		lcn_folio_off = folio->index << PAGE_SHIFT;
485115380f9SNamjae Jeon 		lcn_folio_off &= vol->cluster_size_mask;
4861e9ea7e0SNamjae Jeon 	}
4871e9ea7e0SNamjae Jeon 
488115380f9SNamjae Jeon 	bio = bio_alloc(vol->sb->s_bdev, 1, REQ_OP_WRITE, GFP_NOIO);
489115380f9SNamjae Jeon 	bio->bi_iter.bi_sector =
490115380f9SNamjae Jeon 		NTFS_B_TO_SECTOR(vol, NTFS_CLU_TO_B(vol, vol->mftmirr_lcn) +
491115380f9SNamjae Jeon 				 lcn_folio_off + folio_ofs);
492115380f9SNamjae Jeon 
493115380f9SNamjae Jeon 	if (!bio_add_folio(bio, folio, vol->mft_record_size, folio_ofs)) {
4941e9ea7e0SNamjae Jeon 		err = -EIO;
495115380f9SNamjae Jeon 		bio_put(bio);
496115380f9SNamjae Jeon 		goto unlock_folio;
4971e9ea7e0SNamjae Jeon 	}
4981e9ea7e0SNamjae Jeon 
499115380f9SNamjae Jeon 	bio->bi_end_io = ntfs_bio_end_io;
500115380f9SNamjae Jeon 	submit_bio(bio);
5011e9ea7e0SNamjae Jeon 	/* Current state: all buffers are clean, unlocked, and uptodate. */
502115380f9SNamjae Jeon 	folio_mark_uptodate(folio);
503115380f9SNamjae Jeon 
504115380f9SNamjae Jeon unlock_folio:
505115380f9SNamjae Jeon 	folio_unlock(folio);
506115380f9SNamjae Jeon 	kunmap_local(kmirr);
507115380f9SNamjae Jeon 	folio_put(folio);
5081e9ea7e0SNamjae Jeon 	if (likely(!err)) {
5091e9ea7e0SNamjae Jeon 		ntfs_debug("Done.");
5101e9ea7e0SNamjae Jeon 	} else {
511d9038d99SNamjae Jeon 		ntfs_error(vol->sb, "I/O error while writing mft mirror record 0x%llx!", mft_no);
5121e9ea7e0SNamjae Jeon err_out:
513115380f9SNamjae Jeon 		ntfs_error(vol->sb,
514115380f9SNamjae Jeon 			"Failed to synchronize $MFTMirr (error code %i).  Volume will be left marked dirty on umount.  Run chkdsk on the partition after umounting to correct this.",
515115380f9SNamjae Jeon 			err);
5161e9ea7e0SNamjae Jeon 		NVolSetErrors(vol);
5171e9ea7e0SNamjae Jeon 	}
5181e9ea7e0SNamjae Jeon 	return err;
5191e9ea7e0SNamjae Jeon }
5201e9ea7e0SNamjae Jeon 
521115380f9SNamjae Jeon /*
5221e9ea7e0SNamjae Jeon  * write_mft_record_nolock - write out a mapped (extent) mft record
5231e9ea7e0SNamjae Jeon  * @ni:		ntfs inode describing the mapped (extent) mft record
5241e9ea7e0SNamjae Jeon  * @m:		mapped (extent) mft record to write
5251e9ea7e0SNamjae Jeon  * @sync:	if true, wait for i/o completion
5261e9ea7e0SNamjae Jeon  *
5271e9ea7e0SNamjae Jeon  * Write the mapped (extent) mft record @m described by the (regular or extent)
5281e9ea7e0SNamjae Jeon  * ntfs inode @ni to backing store.  If the mft record @m has a counterpart in
5291e9ea7e0SNamjae Jeon  * the mft mirror, that is also updated.
5301e9ea7e0SNamjae Jeon  *
531115380f9SNamjae Jeon  * We only write the mft record if the ntfs inode @ni is dirty.
5321e9ea7e0SNamjae Jeon  *
533115380f9SNamjae Jeon  * On success, clean the mft record and return 0.
534115380f9SNamjae Jeon  * On error (specifically ENOMEM), we redirty the record so it can be retried.
535115380f9SNamjae Jeon  * For other errors, we mark the volume with errors.
5361e9ea7e0SNamjae Jeon  */
537115380f9SNamjae Jeon int write_mft_record_nolock(struct ntfs_inode *ni, struct mft_record *m, int sync)
5381e9ea7e0SNamjae Jeon {
539115380f9SNamjae Jeon 	struct ntfs_volume *vol = ni->vol;
540115380f9SNamjae Jeon 	struct folio *folio = ni->folio;
541115380f9SNamjae Jeon 	int err = 0, i = 0;
542115380f9SNamjae Jeon 	u8 *kaddr;
543115380f9SNamjae Jeon 	struct mft_record *fixup_m;
544115380f9SNamjae Jeon 	struct bio *bio;
545115380f9SNamjae Jeon 	unsigned int offset = 0, folio_size;
5461e9ea7e0SNamjae Jeon 
547d9038d99SNamjae Jeon 	ntfs_debug("Entering for inode 0x%llx.", ni->mft_no);
548115380f9SNamjae Jeon 
549115380f9SNamjae Jeon 	WARN_ON(NInoAttr(ni));
550115380f9SNamjae Jeon 	WARN_ON(!folio_test_locked(folio));
551115380f9SNamjae Jeon 
5521e9ea7e0SNamjae Jeon 	/*
553115380f9SNamjae Jeon 	 * If the struct ntfs_inode is clean no need to do anything.  If it is dirty,
5541e9ea7e0SNamjae Jeon 	 * mark it as clean now so that it can be redirtied later on if needed.
5551e9ea7e0SNamjae Jeon 	 * There is no danger of races since the caller is holding the locks
5561e9ea7e0SNamjae Jeon 	 * for the mft record @m and the page it is in.
5571e9ea7e0SNamjae Jeon 	 */
5581e9ea7e0SNamjae Jeon 	if (!NInoTestClearDirty(ni))
5591e9ea7e0SNamjae Jeon 		goto done;
5601e9ea7e0SNamjae Jeon 
561115380f9SNamjae Jeon 	kaddr = kmap_local_folio(folio, 0);
562115380f9SNamjae Jeon 	fixup_m = (struct mft_record *)(kaddr + ni->folio_ofs);
563115380f9SNamjae Jeon 	memcpy(fixup_m, m, vol->mft_record_size);
564115380f9SNamjae Jeon 
5651e9ea7e0SNamjae Jeon 	/* Apply the mst protection fixups. */
566115380f9SNamjae Jeon 	err = pre_write_mst_fixup((struct ntfs_record *)fixup_m, vol->mft_record_size);
5671e9ea7e0SNamjae Jeon 	if (err) {
5681e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to apply mst fixups!");
569115380f9SNamjae Jeon 		goto err_out;
5701e9ea7e0SNamjae Jeon 	}
5711e9ea7e0SNamjae Jeon 
572115380f9SNamjae Jeon 	folio_size = vol->mft_record_size / ni->mft_lcn_count;
573115380f9SNamjae Jeon 	while (i < ni->mft_lcn_count) {
574115380f9SNamjae Jeon 		unsigned int clu_off;
575115380f9SNamjae Jeon 
576115380f9SNamjae Jeon 		clu_off = (unsigned int)((s64)ni->mft_no * vol->mft_record_size + offset) &
577115380f9SNamjae Jeon 			vol->cluster_size_mask;
578115380f9SNamjae Jeon 
579115380f9SNamjae Jeon 		bio = bio_alloc(vol->sb->s_bdev, 1, REQ_OP_WRITE, GFP_NOIO);
580115380f9SNamjae Jeon 		bio->bi_iter.bi_sector =
581115380f9SNamjae Jeon 			NTFS_B_TO_SECTOR(vol, NTFS_CLU_TO_B(vol, ni->mft_lcn[i]) +
582115380f9SNamjae Jeon 					 clu_off);
583115380f9SNamjae Jeon 
584115380f9SNamjae Jeon 		if (!bio_add_folio(bio, folio, folio_size,
585115380f9SNamjae Jeon 				   ni->folio_ofs + offset)) {
586115380f9SNamjae Jeon 			err = -EIO;
587115380f9SNamjae Jeon 			goto put_bio_out;
5881e9ea7e0SNamjae Jeon 		}
589115380f9SNamjae Jeon 
5901e9ea7e0SNamjae Jeon 		/* Synchronize the mft mirror now if not @sync. */
5911e9ea7e0SNamjae Jeon 		if (!sync && ni->mft_no < vol->mftmirr_size)
592115380f9SNamjae Jeon 			ntfs_sync_mft_mirror(vol, ni->mft_no, fixup_m);
5931e9ea7e0SNamjae Jeon 
594115380f9SNamjae Jeon 		folio_get(folio);
595115380f9SNamjae Jeon 		bio->bi_private = folio;
596115380f9SNamjae Jeon 		bio->bi_end_io = ntfs_bio_end_io;
597115380f9SNamjae Jeon 		submit_bio(bio);
598115380f9SNamjae Jeon 		offset += vol->cluster_size;
599115380f9SNamjae Jeon 		i++;
6001e9ea7e0SNamjae Jeon 	}
601115380f9SNamjae Jeon 
6021e9ea7e0SNamjae Jeon 	/* If @sync, now synchronize the mft mirror. */
6031e9ea7e0SNamjae Jeon 	if (sync && ni->mft_no < vol->mftmirr_size)
604115380f9SNamjae Jeon 		ntfs_sync_mft_mirror(vol, ni->mft_no, fixup_m);
605115380f9SNamjae Jeon 	kunmap_local(kaddr);
6061e9ea7e0SNamjae Jeon 	if (unlikely(err)) {
6071e9ea7e0SNamjae Jeon 		/* I/O error during writing.  This is really bad! */
608115380f9SNamjae Jeon 		ntfs_error(vol->sb,
609d9038d99SNamjae Jeon 			"I/O error while writing mft record 0x%llx!  Marking base inode as bad.  You should unmount the volume and run chkdsk.",
6101e9ea7e0SNamjae Jeon 			ni->mft_no);
6111e9ea7e0SNamjae Jeon 		goto err_out;
6121e9ea7e0SNamjae Jeon 	}
6131e9ea7e0SNamjae Jeon done:
6141e9ea7e0SNamjae Jeon 	ntfs_debug("Done.");
6151e9ea7e0SNamjae Jeon 	return 0;
616115380f9SNamjae Jeon put_bio_out:
617115380f9SNamjae Jeon 	bio_put(bio);
6181e9ea7e0SNamjae Jeon err_out:
6191e9ea7e0SNamjae Jeon 	/*
6201e9ea7e0SNamjae Jeon 	 * Current state: all buffers are clean, unlocked, and uptodate.
6211e9ea7e0SNamjae Jeon 	 * The caller should mark the base inode as bad so that no more i/o
622115380f9SNamjae Jeon 	 * happens.  ->drop_inode() will still be invoked so all extent inodes
6231e9ea7e0SNamjae Jeon 	 * and other allocated memory will be freed.
6241e9ea7e0SNamjae Jeon 	 */
6251e9ea7e0SNamjae Jeon 	if (err == -ENOMEM) {
626115380f9SNamjae Jeon 		ntfs_error(vol->sb,
627115380f9SNamjae Jeon 			"Not enough memory to write mft record. Redirtying so the write is retried later.");
6281e9ea7e0SNamjae Jeon 		mark_mft_record_dirty(ni);
6291e9ea7e0SNamjae Jeon 		err = 0;
6301e9ea7e0SNamjae Jeon 	} else
6311e9ea7e0SNamjae Jeon 		NVolSetErrors(vol);
6321e9ea7e0SNamjae Jeon 	return err;
6331e9ea7e0SNamjae Jeon }
6341e9ea7e0SNamjae Jeon 
635*cdd4dc3aSLinus Torvalds static int ntfs_test_inode_wb(struct inode *vi, u64 ino, void *data)
636115380f9SNamjae Jeon {
637115380f9SNamjae Jeon 	struct ntfs_attr *na = data;
638115380f9SNamjae Jeon 
639115380f9SNamjae Jeon 	if (!ntfs_test_inode(vi, na))
640115380f9SNamjae Jeon 		return 0;
641115380f9SNamjae Jeon 
642115380f9SNamjae Jeon 	/*
643115380f9SNamjae Jeon 	 * Without this, ntfs_write_mst_block() could call iput_final()
644115380f9SNamjae Jeon 	 * , and ntfs_evict_big_inode() could try to unlink this inode
645115380f9SNamjae Jeon 	 * and the contex could be blocked infinitly in map_mft_record().
646115380f9SNamjae Jeon 	 */
647115380f9SNamjae Jeon 	if (NInoBeingDeleted(NTFS_I(vi))) {
648115380f9SNamjae Jeon 		na->state = NI_BeingDeleted;
649115380f9SNamjae Jeon 		return -1;
650115380f9SNamjae Jeon 	}
651115380f9SNamjae Jeon 
652115380f9SNamjae Jeon 	/*
653115380f9SNamjae Jeon 	 * This condition can prevent ntfs_write_mst_block()
654115380f9SNamjae Jeon 	 * from applying/undo fixups while ntfs_create() being
655115380f9SNamjae Jeon 	 * called
656115380f9SNamjae Jeon 	 */
657115380f9SNamjae Jeon 	spin_lock(&vi->i_lock);
658115380f9SNamjae Jeon 	if (inode_state_read_once(vi) & I_CREATING) {
659115380f9SNamjae Jeon 		spin_unlock(&vi->i_lock);
660115380f9SNamjae Jeon 		na->state = NI_BeingCreated;
661115380f9SNamjae Jeon 		return -1;
662115380f9SNamjae Jeon 	}
663115380f9SNamjae Jeon 	spin_unlock(&vi->i_lock);
664115380f9SNamjae Jeon 
665115380f9SNamjae Jeon 	return igrab(vi) ? 1 : -1;
666115380f9SNamjae Jeon }
667115380f9SNamjae Jeon 
668115380f9SNamjae Jeon /*
6691e9ea7e0SNamjae Jeon  * ntfs_may_write_mft_record - check if an mft record may be written out
6701e9ea7e0SNamjae Jeon  * @vol:	[IN]  ntfs volume on which the mft record to check resides
6711e9ea7e0SNamjae Jeon  * @mft_no:	[IN]  mft record number of the mft record to check
6721e9ea7e0SNamjae Jeon  * @m:		[IN]  mapped mft record to check
6731e9ea7e0SNamjae Jeon  * @locked_ni:	[OUT] caller has to unlock this ntfs inode if one is returned
674115380f9SNamjae Jeon  * @ref_vi:	[OUT] caller has to drop this vfs inode if one is returned
6751e9ea7e0SNamjae Jeon  *
6761e9ea7e0SNamjae Jeon  * Check if the mapped (base or extent) mft record @m with mft record number
6771e9ea7e0SNamjae Jeon  * @mft_no belonging to the ntfs volume @vol may be written out.  If necessary
6781e9ea7e0SNamjae Jeon  * and possible the ntfs inode of the mft record is locked and the base vfs
6791e9ea7e0SNamjae Jeon  * inode is pinned.  The locked ntfs inode is then returned in @locked_ni.  The
6801e9ea7e0SNamjae Jeon  * caller is responsible for unlocking the ntfs inode and unpinning the base
6811e9ea7e0SNamjae Jeon  * vfs inode.
6821e9ea7e0SNamjae Jeon  *
683115380f9SNamjae Jeon  * To avoid deadlock when the caller holds a folio lock, if the function
684115380f9SNamjae Jeon  * returns @ref_vi it defers dropping the vfs inode reference by returning
685115380f9SNamjae Jeon  * it in @ref_vi instead of calling iput() directly.  The caller must call
686115380f9SNamjae Jeon  * iput() on @ref_vi after releasing the folio lock.
687115380f9SNamjae Jeon  *
6881e9ea7e0SNamjae Jeon  * Return 'true' if the mft record may be written out and 'false' if not.
6891e9ea7e0SNamjae Jeon  *
6901e9ea7e0SNamjae Jeon  * The caller has locked the page and cleared the uptodate flag on it which
6911e9ea7e0SNamjae Jeon  * means that we can safely write out any dirty mft records that do not have
692115380f9SNamjae Jeon  * their inodes in icache as determined by find_inode_nowait().
6931e9ea7e0SNamjae Jeon  *
6941e9ea7e0SNamjae Jeon  * Here is a description of the tests we perform:
6951e9ea7e0SNamjae Jeon  *
6961e9ea7e0SNamjae Jeon  * If the inode is found in icache we know the mft record must be a base mft
6971e9ea7e0SNamjae Jeon  * record.  If it is dirty, we do not write it and return 'false' as the vfs
6981e9ea7e0SNamjae Jeon  * inode write paths will result in the access times being updated which would
699115380f9SNamjae Jeon  * cause the base mft record to be redirtied and written out again.
7001e9ea7e0SNamjae Jeon  *
7011e9ea7e0SNamjae Jeon  * If the inode is in icache and not dirty, we attempt to lock the mft record
7021e9ea7e0SNamjae Jeon  * and if we find the lock was already taken, it is not safe to write the mft
7031e9ea7e0SNamjae Jeon  * record and we return 'false'.
7041e9ea7e0SNamjae Jeon  *
7051e9ea7e0SNamjae Jeon  * If we manage to obtain the lock we have exclusive access to the mft record,
7061e9ea7e0SNamjae Jeon  * which also allows us safe writeout of the mft record.  We then set
7071e9ea7e0SNamjae Jeon  * @locked_ni to the locked ntfs inode and return 'true'.
7081e9ea7e0SNamjae Jeon  *
7091e9ea7e0SNamjae Jeon  * Note we cannot just lock the mft record and sleep while waiting for the lock
710115380f9SNamjae Jeon  * because this would deadlock due to lock reversal.
7111e9ea7e0SNamjae Jeon  *
7121e9ea7e0SNamjae Jeon  * If the inode is not in icache we need to perform further checks.
7131e9ea7e0SNamjae Jeon  *
7141e9ea7e0SNamjae Jeon  * If the mft record is not a FILE record or it is a base mft record, we can
7151e9ea7e0SNamjae Jeon  * safely write it and return 'true'.
7161e9ea7e0SNamjae Jeon  *
7171e9ea7e0SNamjae Jeon  * We now know the mft record is an extent mft record.  We check if the inode
718115380f9SNamjae Jeon  * corresponding to its base mft record is in icache. If it is not, we cannot
719115380f9SNamjae Jeon  * safely determine the state of the extent inode, so we return 'false'.
7201e9ea7e0SNamjae Jeon  *
7211e9ea7e0SNamjae Jeon  * We now have the base inode for the extent mft record.  We check if it has an
722115380f9SNamjae Jeon  * ntfs inode for the extent mft record attached. If not, it is safe to write
7231e9ea7e0SNamjae Jeon  * the extent mft record and we return 'true'.
7241e9ea7e0SNamjae Jeon  *
725115380f9SNamjae Jeon  * If the extent inode is attached, we check if it is dirty. If so, we return
726115380f9SNamjae Jeon  * 'false' (letting the standard write_inode path handle it).
727115380f9SNamjae Jeon  *
728115380f9SNamjae Jeon  * If it is not dirty, we attempt to lock the extent mft record. If the lock
729115380f9SNamjae Jeon  * was already taken, it is not safe to write and we return 'false'.
7301e9ea7e0SNamjae Jeon  *
7311e9ea7e0SNamjae Jeon  * If we manage to obtain the lock we have exclusive access to the extent mft
732115380f9SNamjae Jeon  * record. We set @locked_ni to the now locked ntfs inode and return 'true'.
7331e9ea7e0SNamjae Jeon  */
734d9038d99SNamjae Jeon static bool ntfs_may_write_mft_record(struct ntfs_volume *vol, const u64 mft_no,
735115380f9SNamjae Jeon 		const struct mft_record *m, struct ntfs_inode **locked_ni,
736115380f9SNamjae Jeon 		struct inode **ref_vi)
7371e9ea7e0SNamjae Jeon {
7381e9ea7e0SNamjae Jeon 	struct super_block *sb = vol->sb;
7391e9ea7e0SNamjae Jeon 	struct inode *mft_vi = vol->mft_ino;
7401e9ea7e0SNamjae Jeon 	struct inode *vi;
741115380f9SNamjae Jeon 	struct ntfs_inode *ni, *eni, **extent_nis;
7421e9ea7e0SNamjae Jeon 	int i;
743115380f9SNamjae Jeon 	struct ntfs_attr na = {0};
7441e9ea7e0SNamjae Jeon 
745d9038d99SNamjae Jeon 	ntfs_debug("Entering for inode 0x%llx.", mft_no);
7461e9ea7e0SNamjae Jeon 	/*
7471e9ea7e0SNamjae Jeon 	 * Normally we do not return a locked inode so set @locked_ni to NULL.
7481e9ea7e0SNamjae Jeon 	 */
7491e9ea7e0SNamjae Jeon 	*locked_ni = NULL;
750115380f9SNamjae Jeon 	*ref_vi = NULL;
751115380f9SNamjae Jeon 
7521e9ea7e0SNamjae Jeon 	/*
7531e9ea7e0SNamjae Jeon 	 * Check if the inode corresponding to this mft record is in the VFS
7541e9ea7e0SNamjae Jeon 	 * inode cache and obtain a reference to it if it is.
7551e9ea7e0SNamjae Jeon 	 */
756d9038d99SNamjae Jeon 	ntfs_debug("Looking for inode 0x%llx in icache.", mft_no);
7571e9ea7e0SNamjae Jeon 	na.mft_no = mft_no;
7581e9ea7e0SNamjae Jeon 	na.type = AT_UNUSED;
7591e9ea7e0SNamjae Jeon 	/*
7601e9ea7e0SNamjae Jeon 	 * Optimize inode 0, i.e. $MFT itself, since we have it in memory and
7611e9ea7e0SNamjae Jeon 	 * we get here for it rather often.
7621e9ea7e0SNamjae Jeon 	 */
7631e9ea7e0SNamjae Jeon 	if (!mft_no) {
7641e9ea7e0SNamjae Jeon 		/* Balance the below iput(). */
7651e9ea7e0SNamjae Jeon 		vi = igrab(mft_vi);
766115380f9SNamjae Jeon 		WARN_ON(vi != mft_vi);
7671e9ea7e0SNamjae Jeon 	} else {
7681e9ea7e0SNamjae Jeon 		/*
769115380f9SNamjae Jeon 		 * Have to use find_inode_nowait() since ilookup5_nowait()
770115380f9SNamjae Jeon 		 * waits for inode with I_FREEING, which causes ntfs to deadlock
771115380f9SNamjae Jeon 		 * when inodes are unlinked concurrently
7721e9ea7e0SNamjae Jeon 		 */
773115380f9SNamjae Jeon 		vi = find_inode_nowait(sb, mft_no, ntfs_test_inode_wb, &na);
774115380f9SNamjae Jeon 		if (na.state == NI_BeingDeleted || na.state == NI_BeingCreated)
775115380f9SNamjae Jeon 			return false;
7761e9ea7e0SNamjae Jeon 	}
7771e9ea7e0SNamjae Jeon 	if (vi) {
778d9038d99SNamjae Jeon 		ntfs_debug("Base inode 0x%llx is in icache.", mft_no);
7791e9ea7e0SNamjae Jeon 		/* The inode is in icache. */
7801e9ea7e0SNamjae Jeon 		ni = NTFS_I(vi);
7811e9ea7e0SNamjae Jeon 		/* Take a reference to the ntfs inode. */
7821e9ea7e0SNamjae Jeon 		atomic_inc(&ni->count);
7831e9ea7e0SNamjae Jeon 		/* If the inode is dirty, do not write this record. */
7841e9ea7e0SNamjae Jeon 		if (NInoDirty(ni)) {
785d9038d99SNamjae Jeon 			ntfs_debug("Inode 0x%llx is dirty, do not write it.",
7861e9ea7e0SNamjae Jeon 					mft_no);
7871e9ea7e0SNamjae Jeon 			atomic_dec(&ni->count);
788115380f9SNamjae Jeon 			*ref_vi = vi;
7891e9ea7e0SNamjae Jeon 			return false;
7901e9ea7e0SNamjae Jeon 		}
791d9038d99SNamjae Jeon 		ntfs_debug("Inode 0x%llx is not dirty.", mft_no);
7921e9ea7e0SNamjae Jeon 		/* The inode is not dirty, try to take the mft record lock. */
7931e9ea7e0SNamjae Jeon 		if (unlikely(!mutex_trylock(&ni->mrec_lock))) {
794d9038d99SNamjae Jeon 			ntfs_debug("Mft record 0x%llx is already locked, do not write it.", mft_no);
7951e9ea7e0SNamjae Jeon 			atomic_dec(&ni->count);
796115380f9SNamjae Jeon 			*ref_vi = vi;
7971e9ea7e0SNamjae Jeon 			return false;
7981e9ea7e0SNamjae Jeon 		}
799d9038d99SNamjae Jeon 		ntfs_debug("Managed to lock mft record 0x%llx, write it.",
8001e9ea7e0SNamjae Jeon 				mft_no);
8011e9ea7e0SNamjae Jeon 		/*
8021e9ea7e0SNamjae Jeon 		 * The write has to occur while we hold the mft record lock so
8031e9ea7e0SNamjae Jeon 		 * return the locked ntfs inode.
8041e9ea7e0SNamjae Jeon 		 */
8051e9ea7e0SNamjae Jeon 		*locked_ni = ni;
8061e9ea7e0SNamjae Jeon 		return true;
8071e9ea7e0SNamjae Jeon 	}
808d9038d99SNamjae Jeon 	ntfs_debug("Inode 0x%llx is not in icache.", mft_no);
8091e9ea7e0SNamjae Jeon 	/* The inode is not in icache. */
8101e9ea7e0SNamjae Jeon 	/* Write the record if it is not a mft record (type "FILE"). */
8111e9ea7e0SNamjae Jeon 	if (!ntfs_is_mft_record(m->magic)) {
812d9038d99SNamjae Jeon 		ntfs_debug("Mft record 0x%llx is not a FILE record, write it.",
8131e9ea7e0SNamjae Jeon 				mft_no);
8141e9ea7e0SNamjae Jeon 		return true;
8151e9ea7e0SNamjae Jeon 	}
8161e9ea7e0SNamjae Jeon 	/* Write the mft record if it is a base inode. */
8171e9ea7e0SNamjae Jeon 	if (!m->base_mft_record) {
818d9038d99SNamjae Jeon 		ntfs_debug("Mft record 0x%llx is a base record, write it.",
8191e9ea7e0SNamjae Jeon 				mft_no);
8201e9ea7e0SNamjae Jeon 		return true;
8211e9ea7e0SNamjae Jeon 	}
8221e9ea7e0SNamjae Jeon 	/*
8231e9ea7e0SNamjae Jeon 	 * This is an extent mft record.  Check if the inode corresponding to
8241e9ea7e0SNamjae Jeon 	 * its base mft record is in icache and obtain a reference to it if it
8251e9ea7e0SNamjae Jeon 	 * is.
8261e9ea7e0SNamjae Jeon 	 */
8271e9ea7e0SNamjae Jeon 	na.mft_no = MREF_LE(m->base_mft_record);
828115380f9SNamjae Jeon 	na.state = 0;
829d9038d99SNamjae Jeon 	ntfs_debug("Mft record 0x%llx is an extent record.  Looking for base inode 0x%llx in icache.",
830115380f9SNamjae Jeon 			mft_no, na.mft_no);
8311e9ea7e0SNamjae Jeon 	if (!na.mft_no) {
8321e9ea7e0SNamjae Jeon 		/* Balance the below iput(). */
8331e9ea7e0SNamjae Jeon 		vi = igrab(mft_vi);
834115380f9SNamjae Jeon 		WARN_ON(vi != mft_vi);
835115380f9SNamjae Jeon 	} else {
836115380f9SNamjae Jeon 		vi = find_inode_nowait(sb, mft_no, ntfs_test_inode_wb, &na);
837115380f9SNamjae Jeon 		if (na.state == NI_BeingDeleted || na.state == NI_BeingCreated)
838115380f9SNamjae Jeon 			return false;
8391e9ea7e0SNamjae Jeon 	}
840115380f9SNamjae Jeon 
841115380f9SNamjae Jeon 	if (!vi)
842115380f9SNamjae Jeon 		return false;
843d9038d99SNamjae Jeon 	ntfs_debug("Base inode 0x%llx is in icache.", na.mft_no);
8441e9ea7e0SNamjae Jeon 	/*
8451e9ea7e0SNamjae Jeon 	 * The base inode is in icache.  Check if it has the extent inode
8461e9ea7e0SNamjae Jeon 	 * corresponding to this extent mft record attached.
8471e9ea7e0SNamjae Jeon 	 */
8481e9ea7e0SNamjae Jeon 	ni = NTFS_I(vi);
8491e9ea7e0SNamjae Jeon 	mutex_lock(&ni->extent_lock);
8501e9ea7e0SNamjae Jeon 	if (ni->nr_extents <= 0) {
8511e9ea7e0SNamjae Jeon 		/*
8521e9ea7e0SNamjae Jeon 		 * The base inode has no attached extent inodes, write this
8531e9ea7e0SNamjae Jeon 		 * extent mft record.
8541e9ea7e0SNamjae Jeon 		 */
8551e9ea7e0SNamjae Jeon 		mutex_unlock(&ni->extent_lock);
856115380f9SNamjae Jeon 		*ref_vi = vi;
857d9038d99SNamjae Jeon 		ntfs_debug("Base inode 0x%llx has no attached extent inodes, write the extent record.",
858115380f9SNamjae Jeon 				na.mft_no);
8591e9ea7e0SNamjae Jeon 		return true;
8601e9ea7e0SNamjae Jeon 	}
8611e9ea7e0SNamjae Jeon 	/* Iterate over the attached extent inodes. */
8621e9ea7e0SNamjae Jeon 	extent_nis = ni->ext.extent_ntfs_inos;
8631e9ea7e0SNamjae Jeon 	for (eni = NULL, i = 0; i < ni->nr_extents; ++i) {
8641e9ea7e0SNamjae Jeon 		if (mft_no == extent_nis[i]->mft_no) {
8651e9ea7e0SNamjae Jeon 			/*
8661e9ea7e0SNamjae Jeon 			 * Found the extent inode corresponding to this extent
8671e9ea7e0SNamjae Jeon 			 * mft record.
8681e9ea7e0SNamjae Jeon 			 */
8691e9ea7e0SNamjae Jeon 			eni = extent_nis[i];
8701e9ea7e0SNamjae Jeon 			break;
8711e9ea7e0SNamjae Jeon 		}
8721e9ea7e0SNamjae Jeon 	}
8731e9ea7e0SNamjae Jeon 	/*
8741e9ea7e0SNamjae Jeon 	 * If the extent inode was not attached to the base inode, write this
8751e9ea7e0SNamjae Jeon 	 * extent mft record.
8761e9ea7e0SNamjae Jeon 	 */
8771e9ea7e0SNamjae Jeon 	if (!eni) {
8781e9ea7e0SNamjae Jeon 		mutex_unlock(&ni->extent_lock);
879115380f9SNamjae Jeon 		*ref_vi = vi;
880d9038d99SNamjae Jeon 		ntfs_debug("Extent inode 0x%llx is not attached to its base inode 0x%llx, write the extent record.",
8811e9ea7e0SNamjae Jeon 				mft_no, na.mft_no);
8821e9ea7e0SNamjae Jeon 		return true;
8831e9ea7e0SNamjae Jeon 	}
884d9038d99SNamjae Jeon 	ntfs_debug("Extent inode 0x%llx is attached to its base inode 0x%llx.",
8851e9ea7e0SNamjae Jeon 			mft_no, na.mft_no);
8861e9ea7e0SNamjae Jeon 	/* Take a reference to the extent ntfs inode. */
8871e9ea7e0SNamjae Jeon 	atomic_inc(&eni->count);
8881e9ea7e0SNamjae Jeon 	mutex_unlock(&ni->extent_lock);
889115380f9SNamjae Jeon 
890115380f9SNamjae Jeon 	/* if extent inode is dirty, write_inode will write it */
891115380f9SNamjae Jeon 	if (NInoDirty(eni)) {
892115380f9SNamjae Jeon 		atomic_dec(&eni->count);
893115380f9SNamjae Jeon 		*ref_vi = vi;
894115380f9SNamjae Jeon 		return false;
895115380f9SNamjae Jeon 	}
896115380f9SNamjae Jeon 
8971e9ea7e0SNamjae Jeon 	/*
8981e9ea7e0SNamjae Jeon 	 * Found the extent inode coresponding to this extent mft record.
8991e9ea7e0SNamjae Jeon 	 * Try to take the mft record lock.
9001e9ea7e0SNamjae Jeon 	 */
9011e9ea7e0SNamjae Jeon 	if (unlikely(!mutex_trylock(&eni->mrec_lock))) {
9021e9ea7e0SNamjae Jeon 		atomic_dec(&eni->count);
903115380f9SNamjae Jeon 		*ref_vi = vi;
904d9038d99SNamjae Jeon 		ntfs_debug("Extent mft record 0x%llx is already locked, do not write it.",
905115380f9SNamjae Jeon 				mft_no);
9061e9ea7e0SNamjae Jeon 		return false;
9071e9ea7e0SNamjae Jeon 	}
908d9038d99SNamjae Jeon 	ntfs_debug("Managed to lock extent mft record 0x%llx, write it.",
9091e9ea7e0SNamjae Jeon 			mft_no);
9101e9ea7e0SNamjae Jeon 	/*
9111e9ea7e0SNamjae Jeon 	 * The write has to occur while we hold the mft record lock so return
9121e9ea7e0SNamjae Jeon 	 * the locked extent ntfs inode.
9131e9ea7e0SNamjae Jeon 	 */
9141e9ea7e0SNamjae Jeon 	*locked_ni = eni;
9151e9ea7e0SNamjae Jeon 	return true;
9161e9ea7e0SNamjae Jeon }
9171e9ea7e0SNamjae Jeon 
918115380f9SNamjae Jeon static const char *es = "  Leaving inconsistent metadata.  Unmount and run chkdsk.";
9191e9ea7e0SNamjae Jeon 
920115380f9SNamjae Jeon #define RESERVED_MFT_RECORDS	64
921115380f9SNamjae Jeon 
922115380f9SNamjae Jeon /*
9231e9ea7e0SNamjae Jeon  * ntfs_mft_bitmap_find_and_alloc_free_rec_nolock - see name
9241e9ea7e0SNamjae Jeon  * @vol:	volume on which to search for a free mft record
9251e9ea7e0SNamjae Jeon  * @base_ni:	open base inode if allocating an extent mft record or NULL
9261e9ea7e0SNamjae Jeon  *
9271e9ea7e0SNamjae Jeon  * Search for a free mft record in the mft bitmap attribute on the ntfs volume
9281e9ea7e0SNamjae Jeon  * @vol.
9291e9ea7e0SNamjae Jeon  *
9301e9ea7e0SNamjae Jeon  * If @base_ni is NULL start the search at the default allocator position.
9311e9ea7e0SNamjae Jeon  *
9321e9ea7e0SNamjae Jeon  * If @base_ni is not NULL start the search at the mft record after the base
9331e9ea7e0SNamjae Jeon  * mft record @base_ni.
9341e9ea7e0SNamjae Jeon  *
9351e9ea7e0SNamjae Jeon  * Return the free mft record on success and -errno on error.  An error code of
9361e9ea7e0SNamjae Jeon  * -ENOSPC means that there are no free mft records in the currently
9371e9ea7e0SNamjae Jeon  * initialized mft bitmap.
9381e9ea7e0SNamjae Jeon  *
9391e9ea7e0SNamjae Jeon  * Locking: Caller must hold vol->mftbmp_lock for writing.
9401e9ea7e0SNamjae Jeon  */
941d9038d99SNamjae Jeon static s64 ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(struct ntfs_volume *vol,
942115380f9SNamjae Jeon 		struct ntfs_inode *base_ni)
9431e9ea7e0SNamjae Jeon {
9441e9ea7e0SNamjae Jeon 	s64 pass_end, ll, data_pos, pass_start, ofs, bit;
9451e9ea7e0SNamjae Jeon 	unsigned long flags;
9461e9ea7e0SNamjae Jeon 	struct address_space *mftbmp_mapping;
947115380f9SNamjae Jeon 	u8 *buf = NULL, *byte;
948115380f9SNamjae Jeon 	struct folio *folio;
949115380f9SNamjae Jeon 	unsigned int folio_ofs, size;
9501e9ea7e0SNamjae Jeon 	u8 pass, b;
9511e9ea7e0SNamjae Jeon 
952115380f9SNamjae Jeon 	ntfs_debug("Searching for free mft record in the currently initialized mft bitmap.");
9531e9ea7e0SNamjae Jeon 	mftbmp_mapping = vol->mftbmp_ino->i_mapping;
9541e9ea7e0SNamjae Jeon 	/*
9551e9ea7e0SNamjae Jeon 	 * Set the end of the pass making sure we do not overflow the mft
9561e9ea7e0SNamjae Jeon 	 * bitmap.
9571e9ea7e0SNamjae Jeon 	 */
9581e9ea7e0SNamjae Jeon 	read_lock_irqsave(&NTFS_I(vol->mft_ino)->size_lock, flags);
9591e9ea7e0SNamjae Jeon 	pass_end = NTFS_I(vol->mft_ino)->allocated_size >>
9601e9ea7e0SNamjae Jeon 			vol->mft_record_size_bits;
9611e9ea7e0SNamjae Jeon 	read_unlock_irqrestore(&NTFS_I(vol->mft_ino)->size_lock, flags);
9621e9ea7e0SNamjae Jeon 	read_lock_irqsave(&NTFS_I(vol->mftbmp_ino)->size_lock, flags);
9631e9ea7e0SNamjae Jeon 	ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3;
9641e9ea7e0SNamjae Jeon 	read_unlock_irqrestore(&NTFS_I(vol->mftbmp_ino)->size_lock, flags);
9651e9ea7e0SNamjae Jeon 	if (pass_end > ll)
9661e9ea7e0SNamjae Jeon 		pass_end = ll;
9671e9ea7e0SNamjae Jeon 	pass = 1;
9681e9ea7e0SNamjae Jeon 	if (!base_ni)
9691e9ea7e0SNamjae Jeon 		data_pos = vol->mft_data_pos;
9701e9ea7e0SNamjae Jeon 	else
9711e9ea7e0SNamjae Jeon 		data_pos = base_ni->mft_no + 1;
972115380f9SNamjae Jeon 	if (data_pos < RESERVED_MFT_RECORDS)
973115380f9SNamjae Jeon 		data_pos = RESERVED_MFT_RECORDS;
9741e9ea7e0SNamjae Jeon 	if (data_pos >= pass_end) {
975115380f9SNamjae Jeon 		data_pos = RESERVED_MFT_RECORDS;
9761e9ea7e0SNamjae Jeon 		pass = 2;
9771e9ea7e0SNamjae Jeon 		/* This happens on a freshly formatted volume. */
9781e9ea7e0SNamjae Jeon 		if (data_pos >= pass_end)
9791e9ea7e0SNamjae Jeon 			return -ENOSPC;
9801e9ea7e0SNamjae Jeon 	}
981115380f9SNamjae Jeon 
982115380f9SNamjae Jeon 	if (base_ni && base_ni->mft_no == FILE_MFT) {
983115380f9SNamjae Jeon 		data_pos = 0;
984115380f9SNamjae Jeon 		pass = 2;
985115380f9SNamjae Jeon 	}
986115380f9SNamjae Jeon 
9871e9ea7e0SNamjae Jeon 	pass_start = data_pos;
988115380f9SNamjae Jeon 	ntfs_debug("Starting bitmap search: pass %u, pass_start 0x%llx, pass_end 0x%llx, data_pos 0x%llx.",
989115380f9SNamjae Jeon 			pass, pass_start, pass_end, data_pos);
9901e9ea7e0SNamjae Jeon 	/* Loop until a free mft record is found. */
9911e9ea7e0SNamjae Jeon 	for (; pass <= 2;) {
9921e9ea7e0SNamjae Jeon 		/* Cap size to pass_end. */
9931e9ea7e0SNamjae Jeon 		ofs = data_pos >> 3;
994115380f9SNamjae Jeon 		folio_ofs = ofs & ~PAGE_MASK;
995115380f9SNamjae Jeon 		size = PAGE_SIZE - folio_ofs;
9961e9ea7e0SNamjae Jeon 		ll = ((pass_end + 7) >> 3) - ofs;
9971e9ea7e0SNamjae Jeon 		if (size > ll)
9981e9ea7e0SNamjae Jeon 			size = ll;
9991e9ea7e0SNamjae Jeon 		size <<= 3;
10001e9ea7e0SNamjae Jeon 		/*
10011e9ea7e0SNamjae Jeon 		 * If we are still within the active pass, search the next page
10021e9ea7e0SNamjae Jeon 		 * for a zero bit.
10031e9ea7e0SNamjae Jeon 		 */
10041e9ea7e0SNamjae Jeon 		if (size) {
1005115380f9SNamjae Jeon 			folio = read_mapping_folio(mftbmp_mapping,
1006115380f9SNamjae Jeon 					ofs >> PAGE_SHIFT, NULL);
1007115380f9SNamjae Jeon 			if (IS_ERR(folio)) {
1008115380f9SNamjae Jeon 				ntfs_error(vol->sb, "Failed to read mft bitmap, aborting.");
1009115380f9SNamjae Jeon 				return PTR_ERR(folio);
10101e9ea7e0SNamjae Jeon 			}
1011115380f9SNamjae Jeon 			folio_lock(folio);
1012115380f9SNamjae Jeon 			buf = (u8 *)kmap_local_folio(folio, 0) + folio_ofs;
10131e9ea7e0SNamjae Jeon 			bit = data_pos & 7;
10141e9ea7e0SNamjae Jeon 			data_pos &= ~7ull;
1015115380f9SNamjae Jeon 			ntfs_debug("Before inner for loop: size 0x%x, data_pos 0x%llx, bit 0x%llx",
1016115380f9SNamjae Jeon 					size, data_pos, bit);
10171e9ea7e0SNamjae Jeon 			for (; bit < size && data_pos + bit < pass_end;
10181e9ea7e0SNamjae Jeon 					bit &= ~7ull, bit += 8) {
1019115380f9SNamjae Jeon 				/*
1020115380f9SNamjae Jeon 				 * If we're extending $MFT and running out of the first
1021115380f9SNamjae Jeon 				 * mft record (base record) then give up searching since
1022115380f9SNamjae Jeon 				 * no guarantee that the found record will be accessible.
1023115380f9SNamjae Jeon 				 */
1024115380f9SNamjae Jeon 				if (base_ni && base_ni->mft_no == FILE_MFT && bit > 400) {
1025115380f9SNamjae Jeon 					folio_unlock(folio);
1026115380f9SNamjae Jeon 					kunmap_local(buf);
1027115380f9SNamjae Jeon 					folio_put(folio);
1028115380f9SNamjae Jeon 					return -ENOSPC;
1029115380f9SNamjae Jeon 				}
1030115380f9SNamjae Jeon 
10311e9ea7e0SNamjae Jeon 				byte = buf + (bit >> 3);
10321e9ea7e0SNamjae Jeon 				if (*byte == 0xff)
10331e9ea7e0SNamjae Jeon 					continue;
10341e9ea7e0SNamjae Jeon 				b = ffz((unsigned long)*byte);
10351e9ea7e0SNamjae Jeon 				if (b < 8 && b >= (bit & 7)) {
10361e9ea7e0SNamjae Jeon 					ll = data_pos + (bit & ~7ull) + b;
10371e9ea7e0SNamjae Jeon 					if (unlikely(ll > (1ll << 32))) {
1038115380f9SNamjae Jeon 						folio_unlock(folio);
1039115380f9SNamjae Jeon 						kunmap_local(buf);
1040115380f9SNamjae Jeon 						folio_put(folio);
10411e9ea7e0SNamjae Jeon 						return -ENOSPC;
10421e9ea7e0SNamjae Jeon 					}
10431e9ea7e0SNamjae Jeon 					*byte |= 1 << b;
1044115380f9SNamjae Jeon 					folio_mark_dirty(folio);
1045115380f9SNamjae Jeon 					folio_unlock(folio);
1046115380f9SNamjae Jeon 					kunmap_local(buf);
1047115380f9SNamjae Jeon 					folio_put(folio);
1048115380f9SNamjae Jeon 					ntfs_debug("Done.  (Found and allocated mft record 0x%llx.)",
1049115380f9SNamjae Jeon 							ll);
10501e9ea7e0SNamjae Jeon 					return ll;
10511e9ea7e0SNamjae Jeon 				}
10521e9ea7e0SNamjae Jeon 			}
1053115380f9SNamjae Jeon 			ntfs_debug("After inner for loop: size 0x%x, data_pos 0x%llx, bit 0x%llx",
1054115380f9SNamjae Jeon 					size, data_pos, bit);
10551e9ea7e0SNamjae Jeon 			data_pos += size;
1056115380f9SNamjae Jeon 			folio_unlock(folio);
1057115380f9SNamjae Jeon 			kunmap_local(buf);
1058115380f9SNamjae Jeon 			folio_put(folio);
10591e9ea7e0SNamjae Jeon 			/*
10601e9ea7e0SNamjae Jeon 			 * If the end of the pass has not been reached yet,
10611e9ea7e0SNamjae Jeon 			 * continue searching the mft bitmap for a zero bit.
10621e9ea7e0SNamjae Jeon 			 */
10631e9ea7e0SNamjae Jeon 			if (data_pos < pass_end)
10641e9ea7e0SNamjae Jeon 				continue;
10651e9ea7e0SNamjae Jeon 		}
10661e9ea7e0SNamjae Jeon 		/* Do the next pass. */
10671e9ea7e0SNamjae Jeon 		if (++pass == 2) {
10681e9ea7e0SNamjae Jeon 			/*
10691e9ea7e0SNamjae Jeon 			 * Starting the second pass, in which we scan the first
10701e9ea7e0SNamjae Jeon 			 * part of the zone which we omitted earlier.
10711e9ea7e0SNamjae Jeon 			 */
10721e9ea7e0SNamjae Jeon 			pass_end = pass_start;
1073115380f9SNamjae Jeon 			data_pos = pass_start = RESERVED_MFT_RECORDS;
1074115380f9SNamjae Jeon 			ntfs_debug("pass %i, pass_start 0x%llx, pass_end 0x%llx.",
1075115380f9SNamjae Jeon 					pass, pass_start, pass_end);
10761e9ea7e0SNamjae Jeon 			if (data_pos >= pass_end)
10771e9ea7e0SNamjae Jeon 				break;
10781e9ea7e0SNamjae Jeon 		}
10791e9ea7e0SNamjae Jeon 	}
10801e9ea7e0SNamjae Jeon 	/* No free mft records in currently initialized mft bitmap. */
1081115380f9SNamjae Jeon 	ntfs_debug("Done.  (No free mft records left in currently initialized mft bitmap.)");
10821e9ea7e0SNamjae Jeon 	return -ENOSPC;
10831e9ea7e0SNamjae Jeon }
10841e9ea7e0SNamjae Jeon 
1085115380f9SNamjae Jeon static int ntfs_mft_attr_extend(struct ntfs_inode *ni)
1086115380f9SNamjae Jeon {
1087115380f9SNamjae Jeon 	int ret = 0;
1088115380f9SNamjae Jeon 	struct ntfs_inode *base_ni;
1089115380f9SNamjae Jeon 
1090115380f9SNamjae Jeon 	if (NInoAttr(ni))
1091115380f9SNamjae Jeon 		base_ni = ni->ext.base_ntfs_ino;
1092115380f9SNamjae Jeon 	else
1093115380f9SNamjae Jeon 		base_ni = ni;
1094115380f9SNamjae Jeon 
1095115380f9SNamjae Jeon 	if (!NInoAttrList(base_ni)) {
1096115380f9SNamjae Jeon 		ret = ntfs_inode_add_attrlist(base_ni);
1097115380f9SNamjae Jeon 		if (ret) {
1098115380f9SNamjae Jeon 			pr_err("Can not add attrlist\n");
1099115380f9SNamjae Jeon 			goto out;
1100115380f9SNamjae Jeon 		} else {
1101115380f9SNamjae Jeon 			ret = -EAGAIN;
1102115380f9SNamjae Jeon 			goto out;
1103115380f9SNamjae Jeon 		}
1104115380f9SNamjae Jeon 	}
1105115380f9SNamjae Jeon 
1106115380f9SNamjae Jeon 	ret = ntfs_attr_update_mapping_pairs(ni, 0);
1107115380f9SNamjae Jeon 	if (ret)
1108115380f9SNamjae Jeon 		pr_err("MP update failed\n");
1109115380f9SNamjae Jeon 
1110115380f9SNamjae Jeon out:
1111115380f9SNamjae Jeon 	return ret;
1112115380f9SNamjae Jeon }
1113115380f9SNamjae Jeon 
1114115380f9SNamjae Jeon /*
11151e9ea7e0SNamjae Jeon  * ntfs_mft_bitmap_extend_allocation_nolock - extend mft bitmap by a cluster
11161e9ea7e0SNamjae Jeon  * @vol:	volume on which to extend the mft bitmap attribute
11171e9ea7e0SNamjae Jeon  *
11181e9ea7e0SNamjae Jeon  * Extend the mft bitmap attribute on the ntfs volume @vol by one cluster.
11191e9ea7e0SNamjae Jeon  *
11201e9ea7e0SNamjae Jeon  * Note: Only changes allocated_size, i.e. does not touch initialized_size or
11211e9ea7e0SNamjae Jeon  * data_size.
11221e9ea7e0SNamjae Jeon  *
11231e9ea7e0SNamjae Jeon  * Return 0 on success and -errno on error.
11241e9ea7e0SNamjae Jeon  *
11251e9ea7e0SNamjae Jeon  * Locking: - Caller must hold vol->mftbmp_lock for writing.
11261e9ea7e0SNamjae Jeon  *	    - This function takes NTFS_I(vol->mftbmp_ino)->runlist.lock for
11271e9ea7e0SNamjae Jeon  *	      writing and releases it before returning.
11281e9ea7e0SNamjae Jeon  *	    - This function takes vol->lcnbmp_lock for writing and releases it
11291e9ea7e0SNamjae Jeon  *	      before returning.
11301e9ea7e0SNamjae Jeon  */
1131115380f9SNamjae Jeon static int ntfs_mft_bitmap_extend_allocation_nolock(struct ntfs_volume *vol)
11321e9ea7e0SNamjae Jeon {
1133115380f9SNamjae Jeon 	s64 lcn;
11341e9ea7e0SNamjae Jeon 	s64 ll;
11351e9ea7e0SNamjae Jeon 	unsigned long flags;
1136115380f9SNamjae Jeon 	struct folio *folio;
1137115380f9SNamjae Jeon 	struct ntfs_inode *mft_ni, *mftbmp_ni;
1138115380f9SNamjae Jeon 	struct runlist_element *rl, *rl2 = NULL;
1139115380f9SNamjae Jeon 	struct ntfs_attr_search_ctx *ctx = NULL;
1140115380f9SNamjae Jeon 	struct mft_record *mrec;
1141115380f9SNamjae Jeon 	struct attr_record *a = NULL;
11421e9ea7e0SNamjae Jeon 	int ret, mp_size;
11431e9ea7e0SNamjae Jeon 	u32 old_alen = 0;
11441e9ea7e0SNamjae Jeon 	u8 *b, tb;
11451e9ea7e0SNamjae Jeon 	struct {
11461e9ea7e0SNamjae Jeon 		u8 added_cluster:1;
11471e9ea7e0SNamjae Jeon 		u8 added_run:1;
11481e9ea7e0SNamjae Jeon 		u8 mp_rebuilt:1;
1149115380f9SNamjae Jeon 		u8 mp_extended:1;
1150115380f9SNamjae Jeon 	} status = { 0, 0, 0, 0 };
1151115380f9SNamjae Jeon 	size_t new_rl_count;
11521e9ea7e0SNamjae Jeon 
11531e9ea7e0SNamjae Jeon 	ntfs_debug("Extending mft bitmap allocation.");
11541e9ea7e0SNamjae Jeon 	mft_ni = NTFS_I(vol->mft_ino);
11551e9ea7e0SNamjae Jeon 	mftbmp_ni = NTFS_I(vol->mftbmp_ino);
11561e9ea7e0SNamjae Jeon 	/*
11571e9ea7e0SNamjae Jeon 	 * Determine the last lcn of the mft bitmap.  The allocated size of the
11581e9ea7e0SNamjae Jeon 	 * mft bitmap cannot be zero so we are ok to do this.
11591e9ea7e0SNamjae Jeon 	 */
11601e9ea7e0SNamjae Jeon 	down_write(&mftbmp_ni->runlist.lock);
11611e9ea7e0SNamjae Jeon 	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
11621e9ea7e0SNamjae Jeon 	ll = mftbmp_ni->allocated_size;
11631e9ea7e0SNamjae Jeon 	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
11641e9ea7e0SNamjae Jeon 	rl = ntfs_attr_find_vcn_nolock(mftbmp_ni,
1165115380f9SNamjae Jeon 			NTFS_B_TO_CLU(vol, ll - 1), NULL);
11661e9ea7e0SNamjae Jeon 	if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) {
11671e9ea7e0SNamjae Jeon 		up_write(&mftbmp_ni->runlist.lock);
1168115380f9SNamjae Jeon 		ntfs_error(vol->sb,
1169115380f9SNamjae Jeon 			"Failed to determine last allocated cluster of mft bitmap attribute.");
11701e9ea7e0SNamjae Jeon 		if (!IS_ERR(rl))
11711e9ea7e0SNamjae Jeon 			ret = -EIO;
11721e9ea7e0SNamjae Jeon 		else
11731e9ea7e0SNamjae Jeon 			ret = PTR_ERR(rl);
11741e9ea7e0SNamjae Jeon 		return ret;
11751e9ea7e0SNamjae Jeon 	}
11761e9ea7e0SNamjae Jeon 	lcn = rl->lcn + rl->length;
11771e9ea7e0SNamjae Jeon 	ntfs_debug("Last lcn of mft bitmap attribute is 0x%llx.",
11781e9ea7e0SNamjae Jeon 			(long long)lcn);
11791e9ea7e0SNamjae Jeon 	/*
11801e9ea7e0SNamjae Jeon 	 * Attempt to get the cluster following the last allocated cluster by
11811e9ea7e0SNamjae Jeon 	 * hand as it may be in the MFT zone so the allocator would not give it
11821e9ea7e0SNamjae Jeon 	 * to us.
11831e9ea7e0SNamjae Jeon 	 */
11841e9ea7e0SNamjae Jeon 	ll = lcn >> 3;
1185115380f9SNamjae Jeon 	folio = read_mapping_folio(vol->lcnbmp_ino->i_mapping,
1186115380f9SNamjae Jeon 			ll >> PAGE_SHIFT, NULL);
1187115380f9SNamjae Jeon 	if (IS_ERR(folio)) {
11881e9ea7e0SNamjae Jeon 		up_write(&mftbmp_ni->runlist.lock);
11891e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to read from lcn bitmap.");
1190115380f9SNamjae Jeon 		return PTR_ERR(folio);
11911e9ea7e0SNamjae Jeon 	}
1192115380f9SNamjae Jeon 
11931e9ea7e0SNamjae Jeon 	down_write(&vol->lcnbmp_lock);
1194115380f9SNamjae Jeon 	folio_lock(folio);
1195115380f9SNamjae Jeon 	b = (u8 *)kmap_local_folio(folio, 0) + (ll & ~PAGE_MASK);
1196115380f9SNamjae Jeon 	tb = 1 << (lcn & 7ull);
11971e9ea7e0SNamjae Jeon 	if (*b != 0xff && !(*b & tb)) {
11981e9ea7e0SNamjae Jeon 		/* Next cluster is free, allocate it. */
11991e9ea7e0SNamjae Jeon 		*b |= tb;
1200115380f9SNamjae Jeon 		folio_mark_dirty(folio);
1201115380f9SNamjae Jeon 		folio_unlock(folio);
1202115380f9SNamjae Jeon 		kunmap_local(b);
1203115380f9SNamjae Jeon 		folio_put(folio);
12041e9ea7e0SNamjae Jeon 		up_write(&vol->lcnbmp_lock);
12051e9ea7e0SNamjae Jeon 		/* Update the mft bitmap runlist. */
12061e9ea7e0SNamjae Jeon 		rl->length++;
12071e9ea7e0SNamjae Jeon 		rl[1].vcn++;
12081e9ea7e0SNamjae Jeon 		status.added_cluster = 1;
12091e9ea7e0SNamjae Jeon 		ntfs_debug("Appending one cluster to mft bitmap.");
12101e9ea7e0SNamjae Jeon 	} else {
1211115380f9SNamjae Jeon 		folio_unlock(folio);
1212115380f9SNamjae Jeon 		kunmap_local(b);
1213115380f9SNamjae Jeon 		folio_put(folio);
12141e9ea7e0SNamjae Jeon 		up_write(&vol->lcnbmp_lock);
12151e9ea7e0SNamjae Jeon 		/* Allocate a cluster from the DATA_ZONE. */
12161e9ea7e0SNamjae Jeon 		rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE,
1217115380f9SNamjae Jeon 				true, false, false);
12181e9ea7e0SNamjae Jeon 		if (IS_ERR(rl2)) {
12191e9ea7e0SNamjae Jeon 			up_write(&mftbmp_ni->runlist.lock);
1220115380f9SNamjae Jeon 			ntfs_error(vol->sb,
1221115380f9SNamjae Jeon 					"Failed to allocate a cluster for the mft bitmap.");
12221e9ea7e0SNamjae Jeon 			return PTR_ERR(rl2);
12231e9ea7e0SNamjae Jeon 		}
1224115380f9SNamjae Jeon 		rl = ntfs_runlists_merge(&mftbmp_ni->runlist, rl2, 0, &new_rl_count);
12251e9ea7e0SNamjae Jeon 		if (IS_ERR(rl)) {
12261e9ea7e0SNamjae Jeon 			up_write(&mftbmp_ni->runlist.lock);
1227115380f9SNamjae Jeon 			ntfs_error(vol->sb, "Failed to merge runlists for mft bitmap.");
12281e9ea7e0SNamjae Jeon 			if (ntfs_cluster_free_from_rl(vol, rl2)) {
1229115380f9SNamjae Jeon 				ntfs_error(vol->sb, "Failed to deallocate allocated cluster.%s",
1230115380f9SNamjae Jeon 						es);
12311e9ea7e0SNamjae Jeon 				NVolSetErrors(vol);
12321e9ea7e0SNamjae Jeon 			}
1233115380f9SNamjae Jeon 			kvfree(rl2);
12341e9ea7e0SNamjae Jeon 			return PTR_ERR(rl);
12351e9ea7e0SNamjae Jeon 		}
12361e9ea7e0SNamjae Jeon 		mftbmp_ni->runlist.rl = rl;
1237115380f9SNamjae Jeon 		mftbmp_ni->runlist.count = new_rl_count;
12381e9ea7e0SNamjae Jeon 		status.added_run = 1;
12391e9ea7e0SNamjae Jeon 		ntfs_debug("Adding one run to mft bitmap.");
12401e9ea7e0SNamjae Jeon 		/* Find the last run in the new runlist. */
12411e9ea7e0SNamjae Jeon 		for (; rl[1].length; rl++)
12421e9ea7e0SNamjae Jeon 			;
12431e9ea7e0SNamjae Jeon 	}
12441e9ea7e0SNamjae Jeon 	/*
12451e9ea7e0SNamjae Jeon 	 * Update the attribute record as well.  Note: @rl is the last
12461e9ea7e0SNamjae Jeon 	 * (non-terminator) runlist element of mft bitmap.
12471e9ea7e0SNamjae Jeon 	 */
12481e9ea7e0SNamjae Jeon 	mrec = map_mft_record(mft_ni);
12491e9ea7e0SNamjae Jeon 	if (IS_ERR(mrec)) {
12501e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to map mft record.");
12511e9ea7e0SNamjae Jeon 		ret = PTR_ERR(mrec);
12521e9ea7e0SNamjae Jeon 		goto undo_alloc;
12531e9ea7e0SNamjae Jeon 	}
12541e9ea7e0SNamjae Jeon 	ctx = ntfs_attr_get_search_ctx(mft_ni, mrec);
12551e9ea7e0SNamjae Jeon 	if (unlikely(!ctx)) {
12561e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to get search context.");
12571e9ea7e0SNamjae Jeon 		ret = -ENOMEM;
12581e9ea7e0SNamjae Jeon 		goto undo_alloc;
12591e9ea7e0SNamjae Jeon 	}
12601e9ea7e0SNamjae Jeon 	ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
12611e9ea7e0SNamjae Jeon 			mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL,
12621e9ea7e0SNamjae Jeon 			0, ctx);
12631e9ea7e0SNamjae Jeon 	if (unlikely(ret)) {
1264115380f9SNamjae Jeon 		ntfs_error(vol->sb,
1265115380f9SNamjae Jeon 			"Failed to find last attribute extent of mft bitmap attribute.");
12661e9ea7e0SNamjae Jeon 		if (ret == -ENOENT)
12671e9ea7e0SNamjae Jeon 			ret = -EIO;
12681e9ea7e0SNamjae Jeon 		goto undo_alloc;
12691e9ea7e0SNamjae Jeon 	}
12701e9ea7e0SNamjae Jeon 	a = ctx->attr;
1271115380f9SNamjae Jeon 	ll = le64_to_cpu(a->data.non_resident.lowest_vcn);
12721e9ea7e0SNamjae Jeon 	/* Search back for the previous last allocated cluster of mft bitmap. */
12731e9ea7e0SNamjae Jeon 	for (rl2 = rl; rl2 > mftbmp_ni->runlist.rl; rl2--) {
12741e9ea7e0SNamjae Jeon 		if (ll >= rl2->vcn)
12751e9ea7e0SNamjae Jeon 			break;
12761e9ea7e0SNamjae Jeon 	}
1277115380f9SNamjae Jeon 	WARN_ON(ll < rl2->vcn);
1278115380f9SNamjae Jeon 	WARN_ON(ll >= rl2->vcn + rl2->length);
12791e9ea7e0SNamjae Jeon 	/* Get the size for the new mapping pairs array for this extent. */
1280115380f9SNamjae Jeon 	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1, -1);
12811e9ea7e0SNamjae Jeon 	if (unlikely(mp_size <= 0)) {
1282115380f9SNamjae Jeon 		ntfs_error(vol->sb,
1283115380f9SNamjae Jeon 			"Get size for mapping pairs failed for mft bitmap attribute extent.");
12841e9ea7e0SNamjae Jeon 		ret = mp_size;
12851e9ea7e0SNamjae Jeon 		if (!ret)
12861e9ea7e0SNamjae Jeon 			ret = -EIO;
12871e9ea7e0SNamjae Jeon 		goto undo_alloc;
12881e9ea7e0SNamjae Jeon 	}
12891e9ea7e0SNamjae Jeon 	/* Expand the attribute record if necessary. */
12901e9ea7e0SNamjae Jeon 	old_alen = le32_to_cpu(a->length);
12911e9ea7e0SNamjae Jeon 	ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size +
12921e9ea7e0SNamjae Jeon 			le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
12931e9ea7e0SNamjae Jeon 	if (unlikely(ret)) {
1294115380f9SNamjae Jeon 		ret = ntfs_mft_attr_extend(mftbmp_ni);
1295115380f9SNamjae Jeon 		if (!ret)
1296115380f9SNamjae Jeon 			goto extended_ok;
1297115380f9SNamjae Jeon 		if (ret != -EAGAIN)
1298115380f9SNamjae Jeon 			status.mp_extended = 1;
12991e9ea7e0SNamjae Jeon 		goto undo_alloc;
13001e9ea7e0SNamjae Jeon 	}
13011e9ea7e0SNamjae Jeon 	status.mp_rebuilt = 1;
13021e9ea7e0SNamjae Jeon 	/* Generate the mapping pairs array directly into the attr record. */
13031e9ea7e0SNamjae Jeon 	ret = ntfs_mapping_pairs_build(vol, (u8 *)a +
13041e9ea7e0SNamjae Jeon 			le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
1305115380f9SNamjae Jeon 			mp_size, rl2, ll, -1, NULL, NULL, NULL);
13061e9ea7e0SNamjae Jeon 	if (unlikely(ret)) {
1307115380f9SNamjae Jeon 		ntfs_error(vol->sb,
1308115380f9SNamjae Jeon 			"Failed to build mapping pairs array for mft bitmap attribute.");
13091e9ea7e0SNamjae Jeon 		goto undo_alloc;
13101e9ea7e0SNamjae Jeon 	}
13111e9ea7e0SNamjae Jeon 	/* Update the highest_vcn. */
1312115380f9SNamjae Jeon 	a->data.non_resident.highest_vcn = cpu_to_le64(rl[1].vcn - 1);
13131e9ea7e0SNamjae Jeon 	/*
13141e9ea7e0SNamjae Jeon 	 * We now have extended the mft bitmap allocated_size by one cluster.
1315115380f9SNamjae Jeon 	 * Reflect this in the struct ntfs_inode structure and the attribute record.
13161e9ea7e0SNamjae Jeon 	 */
13171e9ea7e0SNamjae Jeon 	if (a->data.non_resident.lowest_vcn) {
13181e9ea7e0SNamjae Jeon 		/*
13191e9ea7e0SNamjae Jeon 		 * We are not in the first attribute extent, switch to it, but
13201e9ea7e0SNamjae Jeon 		 * first ensure the changes will make it to disk later.
13211e9ea7e0SNamjae Jeon 		 */
13221e9ea7e0SNamjae Jeon 		mark_mft_record_dirty(ctx->ntfs_ino);
1323115380f9SNamjae Jeon extended_ok:
13241e9ea7e0SNamjae Jeon 		ntfs_attr_reinit_search_ctx(ctx);
13251e9ea7e0SNamjae Jeon 		ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
13261e9ea7e0SNamjae Jeon 				mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL,
13271e9ea7e0SNamjae Jeon 				0, ctx);
13281e9ea7e0SNamjae Jeon 		if (unlikely(ret)) {
1329115380f9SNamjae Jeon 			ntfs_error(vol->sb,
1330115380f9SNamjae Jeon 				"Failed to find first attribute extent of mft bitmap attribute.");
13311e9ea7e0SNamjae Jeon 			goto restore_undo_alloc;
13321e9ea7e0SNamjae Jeon 		}
13331e9ea7e0SNamjae Jeon 		a = ctx->attr;
13341e9ea7e0SNamjae Jeon 	}
1335115380f9SNamjae Jeon 
13361e9ea7e0SNamjae Jeon 	write_lock_irqsave(&mftbmp_ni->size_lock, flags);
13371e9ea7e0SNamjae Jeon 	mftbmp_ni->allocated_size += vol->cluster_size;
13381e9ea7e0SNamjae Jeon 	a->data.non_resident.allocated_size =
1339115380f9SNamjae Jeon 			cpu_to_le64(mftbmp_ni->allocated_size);
13401e9ea7e0SNamjae Jeon 	write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
13411e9ea7e0SNamjae Jeon 	/* Ensure the changes make it to disk. */
13421e9ea7e0SNamjae Jeon 	mark_mft_record_dirty(ctx->ntfs_ino);
13431e9ea7e0SNamjae Jeon 	ntfs_attr_put_search_ctx(ctx);
13441e9ea7e0SNamjae Jeon 	unmap_mft_record(mft_ni);
13451e9ea7e0SNamjae Jeon 	up_write(&mftbmp_ni->runlist.lock);
13461e9ea7e0SNamjae Jeon 	ntfs_debug("Done.");
13471e9ea7e0SNamjae Jeon 	return 0;
1348115380f9SNamjae Jeon 
13491e9ea7e0SNamjae Jeon restore_undo_alloc:
13501e9ea7e0SNamjae Jeon 	ntfs_attr_reinit_search_ctx(ctx);
13511e9ea7e0SNamjae Jeon 	if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
13521e9ea7e0SNamjae Jeon 			mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL,
13531e9ea7e0SNamjae Jeon 			0, ctx)) {
1354115380f9SNamjae Jeon 		ntfs_error(vol->sb,
1355115380f9SNamjae Jeon 			"Failed to find last attribute extent of mft bitmap attribute.%s", es);
13561e9ea7e0SNamjae Jeon 		write_lock_irqsave(&mftbmp_ni->size_lock, flags);
13571e9ea7e0SNamjae Jeon 		mftbmp_ni->allocated_size += vol->cluster_size;
13581e9ea7e0SNamjae Jeon 		write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
13591e9ea7e0SNamjae Jeon 		ntfs_attr_put_search_ctx(ctx);
13601e9ea7e0SNamjae Jeon 		unmap_mft_record(mft_ni);
13611e9ea7e0SNamjae Jeon 		up_write(&mftbmp_ni->runlist.lock);
13621e9ea7e0SNamjae Jeon 		/*
13631e9ea7e0SNamjae Jeon 		 * The only thing that is now wrong is ->allocated_size of the
13641e9ea7e0SNamjae Jeon 		 * base attribute extent which chkdsk should be able to fix.
13651e9ea7e0SNamjae Jeon 		 */
13661e9ea7e0SNamjae Jeon 		NVolSetErrors(vol);
13671e9ea7e0SNamjae Jeon 		return ret;
13681e9ea7e0SNamjae Jeon 	}
13691e9ea7e0SNamjae Jeon 	a = ctx->attr;
1370115380f9SNamjae Jeon 	a->data.non_resident.highest_vcn = cpu_to_le64(rl[1].vcn - 2);
13711e9ea7e0SNamjae Jeon undo_alloc:
13721e9ea7e0SNamjae Jeon 	if (status.added_cluster) {
13731e9ea7e0SNamjae Jeon 		/* Truncate the last run in the runlist by one cluster. */
13741e9ea7e0SNamjae Jeon 		rl->length--;
13751e9ea7e0SNamjae Jeon 		rl[1].vcn--;
13761e9ea7e0SNamjae Jeon 	} else if (status.added_run) {
13771e9ea7e0SNamjae Jeon 		lcn = rl->lcn;
13781e9ea7e0SNamjae Jeon 		/* Remove the last run from the runlist. */
13791e9ea7e0SNamjae Jeon 		rl->lcn = rl[1].lcn;
13801e9ea7e0SNamjae Jeon 		rl->length = 0;
1381115380f9SNamjae Jeon 		mftbmp_ni->runlist.count--;
13821e9ea7e0SNamjae Jeon 	}
13831e9ea7e0SNamjae Jeon 	/* Deallocate the cluster. */
13841e9ea7e0SNamjae Jeon 	down_write(&vol->lcnbmp_lock);
13851e9ea7e0SNamjae Jeon 	if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) {
13861e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to free allocated cluster.%s", es);
13871e9ea7e0SNamjae Jeon 		NVolSetErrors(vol);
1388115380f9SNamjae Jeon 	} else
1389115380f9SNamjae Jeon 		ntfs_inc_free_clusters(vol, 1);
13901e9ea7e0SNamjae Jeon 	up_write(&vol->lcnbmp_lock);
13911e9ea7e0SNamjae Jeon 	if (status.mp_rebuilt) {
13921e9ea7e0SNamjae Jeon 		if (ntfs_mapping_pairs_build(vol, (u8 *)a + le16_to_cpu(
13931e9ea7e0SNamjae Jeon 				a->data.non_resident.mapping_pairs_offset),
13941e9ea7e0SNamjae Jeon 				old_alen - le16_to_cpu(
13951e9ea7e0SNamjae Jeon 				a->data.non_resident.mapping_pairs_offset),
1396115380f9SNamjae Jeon 				rl2, ll, -1, NULL, NULL, NULL)) {
1397115380f9SNamjae Jeon 			ntfs_error(vol->sb, "Failed to restore mapping pairs array.%s", es);
13981e9ea7e0SNamjae Jeon 			NVolSetErrors(vol);
13991e9ea7e0SNamjae Jeon 		}
14001e9ea7e0SNamjae Jeon 		if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) {
1401115380f9SNamjae Jeon 			ntfs_error(vol->sb, "Failed to restore attribute record.%s", es);
14021e9ea7e0SNamjae Jeon 			NVolSetErrors(vol);
14031e9ea7e0SNamjae Jeon 		}
14041e9ea7e0SNamjae Jeon 		mark_mft_record_dirty(ctx->ntfs_ino);
1405115380f9SNamjae Jeon 	} else if (status.mp_extended && ntfs_attr_update_mapping_pairs(mftbmp_ni, 0)) {
1406115380f9SNamjae Jeon 		ntfs_error(vol->sb, "Failed to restore mapping pairs.%s", es);
1407115380f9SNamjae Jeon 		NVolSetErrors(vol);
14081e9ea7e0SNamjae Jeon 	}
14091e9ea7e0SNamjae Jeon 	if (ctx)
14101e9ea7e0SNamjae Jeon 		ntfs_attr_put_search_ctx(ctx);
14111e9ea7e0SNamjae Jeon 	if (!IS_ERR(mrec))
14121e9ea7e0SNamjae Jeon 		unmap_mft_record(mft_ni);
14131e9ea7e0SNamjae Jeon 	up_write(&mftbmp_ni->runlist.lock);
14141e9ea7e0SNamjae Jeon 	return ret;
14151e9ea7e0SNamjae Jeon }
14161e9ea7e0SNamjae Jeon 
1417115380f9SNamjae Jeon /*
14181e9ea7e0SNamjae Jeon  * ntfs_mft_bitmap_extend_initialized_nolock - extend mftbmp initialized data
14191e9ea7e0SNamjae Jeon  * @vol:	volume on which to extend the mft bitmap attribute
14201e9ea7e0SNamjae Jeon  *
14211e9ea7e0SNamjae Jeon  * Extend the initialized portion of the mft bitmap attribute on the ntfs
14221e9ea7e0SNamjae Jeon  * volume @vol by 8 bytes.
14231e9ea7e0SNamjae Jeon  *
14241e9ea7e0SNamjae Jeon  * Note:  Only changes initialized_size and data_size, i.e. requires that
14251e9ea7e0SNamjae Jeon  * allocated_size is big enough to fit the new initialized_size.
14261e9ea7e0SNamjae Jeon  *
14271e9ea7e0SNamjae Jeon  * Return 0 on success and -error on error.
14281e9ea7e0SNamjae Jeon  *
14291e9ea7e0SNamjae Jeon  * Locking: Caller must hold vol->mftbmp_lock for writing.
14301e9ea7e0SNamjae Jeon  */
1431115380f9SNamjae Jeon static int ntfs_mft_bitmap_extend_initialized_nolock(struct ntfs_volume *vol)
14321e9ea7e0SNamjae Jeon {
14331e9ea7e0SNamjae Jeon 	s64 old_data_size, old_initialized_size;
14341e9ea7e0SNamjae Jeon 	unsigned long flags;
14351e9ea7e0SNamjae Jeon 	struct inode *mftbmp_vi;
1436115380f9SNamjae Jeon 	struct ntfs_inode *mft_ni, *mftbmp_ni;
1437115380f9SNamjae Jeon 	struct ntfs_attr_search_ctx *ctx;
1438115380f9SNamjae Jeon 	struct mft_record *mrec;
1439115380f9SNamjae Jeon 	struct attr_record *a;
14401e9ea7e0SNamjae Jeon 	int ret;
14411e9ea7e0SNamjae Jeon 
1442e6eb3a05SColin Ian King 	ntfs_debug("Extending mft bitmap initialized (and data) size.");
14431e9ea7e0SNamjae Jeon 	mft_ni = NTFS_I(vol->mft_ino);
14441e9ea7e0SNamjae Jeon 	mftbmp_vi = vol->mftbmp_ino;
14451e9ea7e0SNamjae Jeon 	mftbmp_ni = NTFS_I(mftbmp_vi);
14461e9ea7e0SNamjae Jeon 	/* Get the attribute record. */
14471e9ea7e0SNamjae Jeon 	mrec = map_mft_record(mft_ni);
14481e9ea7e0SNamjae Jeon 	if (IS_ERR(mrec)) {
14491e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to map mft record.");
14501e9ea7e0SNamjae Jeon 		return PTR_ERR(mrec);
14511e9ea7e0SNamjae Jeon 	}
14521e9ea7e0SNamjae Jeon 	ctx = ntfs_attr_get_search_ctx(mft_ni, mrec);
14531e9ea7e0SNamjae Jeon 	if (unlikely(!ctx)) {
14541e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to get search context.");
14551e9ea7e0SNamjae Jeon 		ret = -ENOMEM;
14561e9ea7e0SNamjae Jeon 		goto unm_err_out;
14571e9ea7e0SNamjae Jeon 	}
14581e9ea7e0SNamjae Jeon 	ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
14591e9ea7e0SNamjae Jeon 			mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx);
14601e9ea7e0SNamjae Jeon 	if (unlikely(ret)) {
1461115380f9SNamjae Jeon 		ntfs_error(vol->sb,
1462115380f9SNamjae Jeon 			"Failed to find first attribute extent of mft bitmap attribute.");
14631e9ea7e0SNamjae Jeon 		if (ret == -ENOENT)
14641e9ea7e0SNamjae Jeon 			ret = -EIO;
14651e9ea7e0SNamjae Jeon 		goto put_err_out;
14661e9ea7e0SNamjae Jeon 	}
14671e9ea7e0SNamjae Jeon 	a = ctx->attr;
14681e9ea7e0SNamjae Jeon 	write_lock_irqsave(&mftbmp_ni->size_lock, flags);
14691e9ea7e0SNamjae Jeon 	old_data_size = i_size_read(mftbmp_vi);
14701e9ea7e0SNamjae Jeon 	old_initialized_size = mftbmp_ni->initialized_size;
14711e9ea7e0SNamjae Jeon 	/*
14721e9ea7e0SNamjae Jeon 	 * We can simply update the initialized_size before filling the space
14731e9ea7e0SNamjae Jeon 	 * with zeroes because the caller is holding the mft bitmap lock for
14741e9ea7e0SNamjae Jeon 	 * writing which ensures that no one else is trying to access the data.
14751e9ea7e0SNamjae Jeon 	 */
14761e9ea7e0SNamjae Jeon 	mftbmp_ni->initialized_size += 8;
14771e9ea7e0SNamjae Jeon 	a->data.non_resident.initialized_size =
1478115380f9SNamjae Jeon 			cpu_to_le64(mftbmp_ni->initialized_size);
14791e9ea7e0SNamjae Jeon 	if (mftbmp_ni->initialized_size > old_data_size) {
14801e9ea7e0SNamjae Jeon 		i_size_write(mftbmp_vi, mftbmp_ni->initialized_size);
14811e9ea7e0SNamjae Jeon 		a->data.non_resident.data_size =
1482115380f9SNamjae Jeon 				cpu_to_le64(mftbmp_ni->initialized_size);
14831e9ea7e0SNamjae Jeon 	}
14841e9ea7e0SNamjae Jeon 	write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
14851e9ea7e0SNamjae Jeon 	/* Ensure the changes make it to disk. */
14861e9ea7e0SNamjae Jeon 	mark_mft_record_dirty(ctx->ntfs_ino);
14871e9ea7e0SNamjae Jeon 	ntfs_attr_put_search_ctx(ctx);
14881e9ea7e0SNamjae Jeon 	unmap_mft_record(mft_ni);
14891e9ea7e0SNamjae Jeon 	/* Initialize the mft bitmap attribute value with zeroes. */
14901e9ea7e0SNamjae Jeon 	ret = ntfs_attr_set(mftbmp_ni, old_initialized_size, 8, 0);
14911e9ea7e0SNamjae Jeon 	if (likely(!ret)) {
1492115380f9SNamjae Jeon 		ntfs_debug("Done.  (Wrote eight initialized bytes to mft bitmap.");
1493115380f9SNamjae Jeon 		ntfs_inc_free_mft_records(vol, 8 * 8);
14941e9ea7e0SNamjae Jeon 		return 0;
14951e9ea7e0SNamjae Jeon 	}
14961e9ea7e0SNamjae Jeon 	ntfs_error(vol->sb, "Failed to write to mft bitmap.");
14971e9ea7e0SNamjae Jeon 	/* Try to recover from the error. */
14981e9ea7e0SNamjae Jeon 	mrec = map_mft_record(mft_ni);
14991e9ea7e0SNamjae Jeon 	if (IS_ERR(mrec)) {
15001e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to map mft record.%s", es);
15011e9ea7e0SNamjae Jeon 		NVolSetErrors(vol);
15021e9ea7e0SNamjae Jeon 		return ret;
15031e9ea7e0SNamjae Jeon 	}
15041e9ea7e0SNamjae Jeon 	ctx = ntfs_attr_get_search_ctx(mft_ni, mrec);
15051e9ea7e0SNamjae Jeon 	if (unlikely(!ctx)) {
15061e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to get search context.%s", es);
15071e9ea7e0SNamjae Jeon 		NVolSetErrors(vol);
15081e9ea7e0SNamjae Jeon 		goto unm_err_out;
15091e9ea7e0SNamjae Jeon 	}
15101e9ea7e0SNamjae Jeon 	if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
15111e9ea7e0SNamjae Jeon 			mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx)) {
1512115380f9SNamjae Jeon 		ntfs_error(vol->sb,
1513115380f9SNamjae Jeon 			"Failed to find first attribute extent of mft bitmap attribute.%s", es);
15141e9ea7e0SNamjae Jeon 		NVolSetErrors(vol);
15151e9ea7e0SNamjae Jeon put_err_out:
15161e9ea7e0SNamjae Jeon 		ntfs_attr_put_search_ctx(ctx);
15171e9ea7e0SNamjae Jeon unm_err_out:
15181e9ea7e0SNamjae Jeon 		unmap_mft_record(mft_ni);
15191e9ea7e0SNamjae Jeon 		goto err_out;
15201e9ea7e0SNamjae Jeon 	}
15211e9ea7e0SNamjae Jeon 	a = ctx->attr;
15221e9ea7e0SNamjae Jeon 	write_lock_irqsave(&mftbmp_ni->size_lock, flags);
15231e9ea7e0SNamjae Jeon 	mftbmp_ni->initialized_size = old_initialized_size;
15241e9ea7e0SNamjae Jeon 	a->data.non_resident.initialized_size =
1525115380f9SNamjae Jeon 			cpu_to_le64(old_initialized_size);
15261e9ea7e0SNamjae Jeon 	if (i_size_read(mftbmp_vi) != old_data_size) {
15271e9ea7e0SNamjae Jeon 		i_size_write(mftbmp_vi, old_data_size);
1528115380f9SNamjae Jeon 		a->data.non_resident.data_size = cpu_to_le64(old_data_size);
15291e9ea7e0SNamjae Jeon 	}
15301e9ea7e0SNamjae Jeon 	write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
15311e9ea7e0SNamjae Jeon 	mark_mft_record_dirty(ctx->ntfs_ino);
15321e9ea7e0SNamjae Jeon 	ntfs_attr_put_search_ctx(ctx);
15331e9ea7e0SNamjae Jeon 	unmap_mft_record(mft_ni);
15341e9ea7e0SNamjae Jeon #ifdef DEBUG
15351e9ea7e0SNamjae Jeon 	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
1536115380f9SNamjae Jeon 	ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.",
1537115380f9SNamjae Jeon 			mftbmp_ni->allocated_size, i_size_read(mftbmp_vi),
1538115380f9SNamjae Jeon 			mftbmp_ni->initialized_size);
15391e9ea7e0SNamjae Jeon 	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
15401e9ea7e0SNamjae Jeon #endif /* DEBUG */
15411e9ea7e0SNamjae Jeon err_out:
15421e9ea7e0SNamjae Jeon 	return ret;
15431e9ea7e0SNamjae Jeon }
15441e9ea7e0SNamjae Jeon 
1545115380f9SNamjae Jeon /*
15461e9ea7e0SNamjae Jeon  * ntfs_mft_data_extend_allocation_nolock - extend mft data attribute
15471e9ea7e0SNamjae Jeon  * @vol:	volume on which to extend the mft data attribute
15481e9ea7e0SNamjae Jeon  *
15491e9ea7e0SNamjae Jeon  * Extend the mft data attribute on the ntfs volume @vol by 16 mft records
15501e9ea7e0SNamjae Jeon  * worth of clusters or if not enough space for this by one mft record worth
15511e9ea7e0SNamjae Jeon  * of clusters.
15521e9ea7e0SNamjae Jeon  *
15531e9ea7e0SNamjae Jeon  * Note:  Only changes allocated_size, i.e. does not touch initialized_size or
15541e9ea7e0SNamjae Jeon  * data_size.
15551e9ea7e0SNamjae Jeon  *
15561e9ea7e0SNamjae Jeon  * Return 0 on success and -errno on error.
15571e9ea7e0SNamjae Jeon  *
15581e9ea7e0SNamjae Jeon  * Locking: - Caller must hold vol->mftbmp_lock for writing.
15591e9ea7e0SNamjae Jeon  *	    - This function takes NTFS_I(vol->mft_ino)->runlist.lock for
15601e9ea7e0SNamjae Jeon  *	      writing and releases it before returning.
15611e9ea7e0SNamjae Jeon  *	    - This function calls functions which take vol->lcnbmp_lock for
15621e9ea7e0SNamjae Jeon  *	      writing and release it before returning.
15631e9ea7e0SNamjae Jeon  */
1564115380f9SNamjae Jeon static int ntfs_mft_data_extend_allocation_nolock(struct ntfs_volume *vol)
15651e9ea7e0SNamjae Jeon {
1566115380f9SNamjae Jeon 	s64 lcn;
1567115380f9SNamjae Jeon 	s64 old_last_vcn;
15681e9ea7e0SNamjae Jeon 	s64 min_nr, nr, ll;
15691e9ea7e0SNamjae Jeon 	unsigned long flags;
1570115380f9SNamjae Jeon 	struct ntfs_inode *mft_ni;
1571115380f9SNamjae Jeon 	struct runlist_element *rl, *rl2;
1572115380f9SNamjae Jeon 	struct ntfs_attr_search_ctx *ctx = NULL;
1573115380f9SNamjae Jeon 	struct mft_record *mrec;
1574115380f9SNamjae Jeon 	struct attr_record *a = NULL;
15751e9ea7e0SNamjae Jeon 	int ret, mp_size;
15761e9ea7e0SNamjae Jeon 	u32 old_alen = 0;
1577115380f9SNamjae Jeon 	bool mp_rebuilt = false, mp_extended = false;
1578115380f9SNamjae Jeon 	size_t new_rl_count;
15791e9ea7e0SNamjae Jeon 
15801e9ea7e0SNamjae Jeon 	ntfs_debug("Extending mft data allocation.");
15811e9ea7e0SNamjae Jeon 	mft_ni = NTFS_I(vol->mft_ino);
15821e9ea7e0SNamjae Jeon 	/*
15831e9ea7e0SNamjae Jeon 	 * Determine the preferred allocation location, i.e. the last lcn of
15841e9ea7e0SNamjae Jeon 	 * the mft data attribute.  The allocated size of the mft data
15851e9ea7e0SNamjae Jeon 	 * attribute cannot be zero so we are ok to do this.
15861e9ea7e0SNamjae Jeon 	 */
15871e9ea7e0SNamjae Jeon 	down_write(&mft_ni->runlist.lock);
15881e9ea7e0SNamjae Jeon 	read_lock_irqsave(&mft_ni->size_lock, flags);
15891e9ea7e0SNamjae Jeon 	ll = mft_ni->allocated_size;
15901e9ea7e0SNamjae Jeon 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
15911e9ea7e0SNamjae Jeon 	rl = ntfs_attr_find_vcn_nolock(mft_ni,
1592115380f9SNamjae Jeon 			NTFS_B_TO_CLU(vol, ll - 1), NULL);
15931e9ea7e0SNamjae Jeon 	if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) {
15941e9ea7e0SNamjae Jeon 		up_write(&mft_ni->runlist.lock);
1595115380f9SNamjae Jeon 		ntfs_error(vol->sb,
1596115380f9SNamjae Jeon 			"Failed to determine last allocated cluster of mft data attribute.");
15971e9ea7e0SNamjae Jeon 		if (!IS_ERR(rl))
15981e9ea7e0SNamjae Jeon 			ret = -EIO;
15991e9ea7e0SNamjae Jeon 		else
16001e9ea7e0SNamjae Jeon 			ret = PTR_ERR(rl);
16011e9ea7e0SNamjae Jeon 		return ret;
16021e9ea7e0SNamjae Jeon 	}
16031e9ea7e0SNamjae Jeon 	lcn = rl->lcn + rl->length;
1604115380f9SNamjae Jeon 	ntfs_debug("Last lcn of mft data attribute is 0x%llx.", lcn);
16051e9ea7e0SNamjae Jeon 	/* Minimum allocation is one mft record worth of clusters. */
1606115380f9SNamjae Jeon 	min_nr = NTFS_B_TO_CLU(vol, vol->mft_record_size);
16071e9ea7e0SNamjae Jeon 	if (!min_nr)
16081e9ea7e0SNamjae Jeon 		min_nr = 1;
16091e9ea7e0SNamjae Jeon 	/* Want to allocate 16 mft records worth of clusters. */
16101e9ea7e0SNamjae Jeon 	nr = vol->mft_record_size << 4 >> vol->cluster_size_bits;
16111e9ea7e0SNamjae Jeon 	if (!nr)
16121e9ea7e0SNamjae Jeon 		nr = min_nr;
16131e9ea7e0SNamjae Jeon 	/* Ensure we do not go above 2^32-1 mft records. */
16141e9ea7e0SNamjae Jeon 	read_lock_irqsave(&mft_ni->size_lock, flags);
16151e9ea7e0SNamjae Jeon 	ll = mft_ni->allocated_size;
16161e9ea7e0SNamjae Jeon 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
1617115380f9SNamjae Jeon 	if (unlikely((ll + NTFS_CLU_TO_B(vol, nr)) >>
16181e9ea7e0SNamjae Jeon 			vol->mft_record_size_bits >= (1ll << 32))) {
16191e9ea7e0SNamjae Jeon 		nr = min_nr;
1620115380f9SNamjae Jeon 		if (unlikely((ll + NTFS_CLU_TO_B(vol, nr)) >>
16211e9ea7e0SNamjae Jeon 				vol->mft_record_size_bits >= (1ll << 32))) {
1622115380f9SNamjae Jeon 			ntfs_warning(vol->sb,
1623115380f9SNamjae Jeon 				"Cannot allocate mft record because the maximum number of inodes (2^32) has already been reached.");
16241e9ea7e0SNamjae Jeon 			up_write(&mft_ni->runlist.lock);
16251e9ea7e0SNamjae Jeon 			return -ENOSPC;
16261e9ea7e0SNamjae Jeon 		}
16271e9ea7e0SNamjae Jeon 	}
16281e9ea7e0SNamjae Jeon 	ntfs_debug("Trying mft data allocation with %s cluster count %lli.",
16291e9ea7e0SNamjae Jeon 			nr > min_nr ? "default" : "minimal", (long long)nr);
16301e9ea7e0SNamjae Jeon 	old_last_vcn = rl[1].vcn;
1631115380f9SNamjae Jeon 	/*
1632115380f9SNamjae Jeon 	 * We can release the mft_ni runlist lock, Because this function is
1633115380f9SNamjae Jeon 	 * the only one that expends $MFT data attribute and is called with
1634115380f9SNamjae Jeon 	 * mft_ni->mrec_lock.
1635115380f9SNamjae Jeon 	 * This is required for the lock order, vol->lcnbmp_lock =>
1636115380f9SNamjae Jeon 	 * mft_ni->runlist.lock.
1637115380f9SNamjae Jeon 	 */
1638115380f9SNamjae Jeon 	up_write(&mft_ni->runlist.lock);
1639115380f9SNamjae Jeon 
16401e9ea7e0SNamjae Jeon 	do {
16411e9ea7e0SNamjae Jeon 		rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE,
1642115380f9SNamjae Jeon 				true, false, false);
16431e9ea7e0SNamjae Jeon 		if (!IS_ERR(rl2))
16441e9ea7e0SNamjae Jeon 			break;
16451e9ea7e0SNamjae Jeon 		if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) {
1646115380f9SNamjae Jeon 			ntfs_error(vol->sb,
1647115380f9SNamjae Jeon 				"Failed to allocate the minimal number of clusters (%lli) for the mft data attribute.",
1648115380f9SNamjae Jeon 				nr);
16491e9ea7e0SNamjae Jeon 			return PTR_ERR(rl2);
16501e9ea7e0SNamjae Jeon 		}
16511e9ea7e0SNamjae Jeon 		/*
16521e9ea7e0SNamjae Jeon 		 * There is not enough space to do the allocation, but there
16531e9ea7e0SNamjae Jeon 		 * might be enough space to do a minimal allocation so try that
16541e9ea7e0SNamjae Jeon 		 * before failing.
16551e9ea7e0SNamjae Jeon 		 */
16561e9ea7e0SNamjae Jeon 		nr = min_nr;
1657115380f9SNamjae Jeon 		ntfs_debug("Retrying mft data allocation with minimal cluster count %lli.", nr);
16581e9ea7e0SNamjae Jeon 	} while (1);
1659115380f9SNamjae Jeon 
1660115380f9SNamjae Jeon 	down_write(&mft_ni->runlist.lock);
1661115380f9SNamjae Jeon 	rl = ntfs_runlists_merge(&mft_ni->runlist, rl2, 0, &new_rl_count);
16621e9ea7e0SNamjae Jeon 	if (IS_ERR(rl)) {
16631e9ea7e0SNamjae Jeon 		up_write(&mft_ni->runlist.lock);
1664115380f9SNamjae Jeon 		ntfs_error(vol->sb, "Failed to merge runlists for mft data attribute.");
16651e9ea7e0SNamjae Jeon 		if (ntfs_cluster_free_from_rl(vol, rl2)) {
1666115380f9SNamjae Jeon 			ntfs_error(vol->sb,
1667115380f9SNamjae Jeon 				"Failed to deallocate clusters from the mft data attribute.%s", es);
16681e9ea7e0SNamjae Jeon 			NVolSetErrors(vol);
16691e9ea7e0SNamjae Jeon 		}
1670115380f9SNamjae Jeon 		kvfree(rl2);
16711e9ea7e0SNamjae Jeon 		return PTR_ERR(rl);
16721e9ea7e0SNamjae Jeon 	}
16731e9ea7e0SNamjae Jeon 	mft_ni->runlist.rl = rl;
1674115380f9SNamjae Jeon 	mft_ni->runlist.count = new_rl_count;
16751e9ea7e0SNamjae Jeon 	ntfs_debug("Allocated %lli clusters.", (long long)nr);
16761e9ea7e0SNamjae Jeon 	/* Find the last run in the new runlist. */
16771e9ea7e0SNamjae Jeon 	for (; rl[1].length; rl++)
16781e9ea7e0SNamjae Jeon 		;
1679115380f9SNamjae Jeon 	up_write(&mft_ni->runlist.lock);
1680115380f9SNamjae Jeon 
16811e9ea7e0SNamjae Jeon 	/* Update the attribute record as well. */
16821e9ea7e0SNamjae Jeon 	mrec = map_mft_record(mft_ni);
16831e9ea7e0SNamjae Jeon 	if (IS_ERR(mrec)) {
16841e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to map mft record.");
16851e9ea7e0SNamjae Jeon 		ret = PTR_ERR(mrec);
1686115380f9SNamjae Jeon 		down_write(&mft_ni->runlist.lock);
16871e9ea7e0SNamjae Jeon 		goto undo_alloc;
16881e9ea7e0SNamjae Jeon 	}
16891e9ea7e0SNamjae Jeon 	ctx = ntfs_attr_get_search_ctx(mft_ni, mrec);
16901e9ea7e0SNamjae Jeon 	if (unlikely(!ctx)) {
16911e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to get search context.");
16921e9ea7e0SNamjae Jeon 		ret = -ENOMEM;
16931e9ea7e0SNamjae Jeon 		goto undo_alloc;
16941e9ea7e0SNamjae Jeon 	}
16951e9ea7e0SNamjae Jeon 	ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len,
16961e9ea7e0SNamjae Jeon 			CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx);
16971e9ea7e0SNamjae Jeon 	if (unlikely(ret)) {
1698115380f9SNamjae Jeon 		ntfs_error(vol->sb, "Failed to find last attribute extent of mft data attribute.");
16991e9ea7e0SNamjae Jeon 		if (ret == -ENOENT)
17001e9ea7e0SNamjae Jeon 			ret = -EIO;
17011e9ea7e0SNamjae Jeon 		goto undo_alloc;
17021e9ea7e0SNamjae Jeon 	}
17031e9ea7e0SNamjae Jeon 	a = ctx->attr;
1704115380f9SNamjae Jeon 	ll = le64_to_cpu(a->data.non_resident.lowest_vcn);
1705115380f9SNamjae Jeon 
1706115380f9SNamjae Jeon 	down_write(&mft_ni->runlist.lock);
17071e9ea7e0SNamjae Jeon 	/* Search back for the previous last allocated cluster of mft bitmap. */
17081e9ea7e0SNamjae Jeon 	for (rl2 = rl; rl2 > mft_ni->runlist.rl; rl2--) {
17091e9ea7e0SNamjae Jeon 		if (ll >= rl2->vcn)
17101e9ea7e0SNamjae Jeon 			break;
17111e9ea7e0SNamjae Jeon 	}
1712115380f9SNamjae Jeon 	WARN_ON(ll < rl2->vcn);
1713115380f9SNamjae Jeon 	WARN_ON(ll >= rl2->vcn + rl2->length);
17141e9ea7e0SNamjae Jeon 	/* Get the size for the new mapping pairs array for this extent. */
1715115380f9SNamjae Jeon 	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1, -1);
17161e9ea7e0SNamjae Jeon 	if (unlikely(mp_size <= 0)) {
1717115380f9SNamjae Jeon 		ntfs_error(vol->sb,
1718115380f9SNamjae Jeon 			"Get size for mapping pairs failed for mft data attribute extent.");
17191e9ea7e0SNamjae Jeon 		ret = mp_size;
17201e9ea7e0SNamjae Jeon 		if (!ret)
17211e9ea7e0SNamjae Jeon 			ret = -EIO;
1722115380f9SNamjae Jeon 		up_write(&mft_ni->runlist.lock);
17231e9ea7e0SNamjae Jeon 		goto undo_alloc;
17241e9ea7e0SNamjae Jeon 	}
1725115380f9SNamjae Jeon 	up_write(&mft_ni->runlist.lock);
1726115380f9SNamjae Jeon 
17271e9ea7e0SNamjae Jeon 	/* Expand the attribute record if necessary. */
17281e9ea7e0SNamjae Jeon 	old_alen = le32_to_cpu(a->length);
17291e9ea7e0SNamjae Jeon 	ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size +
17301e9ea7e0SNamjae Jeon 			le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
17311e9ea7e0SNamjae Jeon 	if (unlikely(ret)) {
1732115380f9SNamjae Jeon 		ret = ntfs_mft_attr_extend(mft_ni);
1733115380f9SNamjae Jeon 		if (!ret)
1734115380f9SNamjae Jeon 			goto extended_ok;
1735115380f9SNamjae Jeon 		if (ret != -EAGAIN)
1736115380f9SNamjae Jeon 			mp_extended = true;
17371e9ea7e0SNamjae Jeon 		goto undo_alloc;
17381e9ea7e0SNamjae Jeon 	}
17391e9ea7e0SNamjae Jeon 	mp_rebuilt = true;
17401e9ea7e0SNamjae Jeon 	/* Generate the mapping pairs array directly into the attr record. */
17411e9ea7e0SNamjae Jeon 	ret = ntfs_mapping_pairs_build(vol, (u8 *)a +
17421e9ea7e0SNamjae Jeon 			le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
1743115380f9SNamjae Jeon 			mp_size, rl2, ll, -1, NULL, NULL, NULL);
17441e9ea7e0SNamjae Jeon 	if (unlikely(ret)) {
1745115380f9SNamjae Jeon 		ntfs_error(vol->sb, "Failed to build mapping pairs array of mft data attribute.");
17461e9ea7e0SNamjae Jeon 		goto undo_alloc;
17471e9ea7e0SNamjae Jeon 	}
17481e9ea7e0SNamjae Jeon 	/* Update the highest_vcn. */
1749115380f9SNamjae Jeon 	a->data.non_resident.highest_vcn = cpu_to_le64(rl[1].vcn - 1);
17501e9ea7e0SNamjae Jeon 	/*
17511e9ea7e0SNamjae Jeon 	 * We now have extended the mft data allocated_size by nr clusters.
1752115380f9SNamjae Jeon 	 * Reflect this in the struct ntfs_inode structure and the attribute record.
17531e9ea7e0SNamjae Jeon 	 * @rl is the last (non-terminator) runlist element of mft data
17541e9ea7e0SNamjae Jeon 	 * attribute.
17551e9ea7e0SNamjae Jeon 	 */
17561e9ea7e0SNamjae Jeon 	if (a->data.non_resident.lowest_vcn) {
17571e9ea7e0SNamjae Jeon 		/*
17581e9ea7e0SNamjae Jeon 		 * We are not in the first attribute extent, switch to it, but
17591e9ea7e0SNamjae Jeon 		 * first ensure the changes will make it to disk later.
17601e9ea7e0SNamjae Jeon 		 */
17611e9ea7e0SNamjae Jeon 		mark_mft_record_dirty(ctx->ntfs_ino);
1762115380f9SNamjae Jeon extended_ok:
17631e9ea7e0SNamjae Jeon 		ntfs_attr_reinit_search_ctx(ctx);
17641e9ea7e0SNamjae Jeon 		ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name,
17651e9ea7e0SNamjae Jeon 				mft_ni->name_len, CASE_SENSITIVE, 0, NULL, 0,
17661e9ea7e0SNamjae Jeon 				ctx);
17671e9ea7e0SNamjae Jeon 		if (unlikely(ret)) {
1768115380f9SNamjae Jeon 			ntfs_error(vol->sb,
1769115380f9SNamjae Jeon 				"Failed to find first attribute extent of mft data attribute.");
17701e9ea7e0SNamjae Jeon 			goto restore_undo_alloc;
17711e9ea7e0SNamjae Jeon 		}
17721e9ea7e0SNamjae Jeon 		a = ctx->attr;
17731e9ea7e0SNamjae Jeon 	}
1774115380f9SNamjae Jeon 
17751e9ea7e0SNamjae Jeon 	write_lock_irqsave(&mft_ni->size_lock, flags);
1776115380f9SNamjae Jeon 	mft_ni->allocated_size += NTFS_CLU_TO_B(vol, nr);
17771e9ea7e0SNamjae Jeon 	a->data.non_resident.allocated_size =
1778115380f9SNamjae Jeon 			cpu_to_le64(mft_ni->allocated_size);
17791e9ea7e0SNamjae Jeon 	write_unlock_irqrestore(&mft_ni->size_lock, flags);
17801e9ea7e0SNamjae Jeon 	/* Ensure the changes make it to disk. */
17811e9ea7e0SNamjae Jeon 	mark_mft_record_dirty(ctx->ntfs_ino);
17821e9ea7e0SNamjae Jeon 	ntfs_attr_put_search_ctx(ctx);
17831e9ea7e0SNamjae Jeon 	unmap_mft_record(mft_ni);
17841e9ea7e0SNamjae Jeon 	ntfs_debug("Done.");
17851e9ea7e0SNamjae Jeon 	return 0;
17861e9ea7e0SNamjae Jeon restore_undo_alloc:
17871e9ea7e0SNamjae Jeon 	ntfs_attr_reinit_search_ctx(ctx);
17881e9ea7e0SNamjae Jeon 	if (ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len,
17891e9ea7e0SNamjae Jeon 			CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) {
1790115380f9SNamjae Jeon 		ntfs_error(vol->sb,
1791115380f9SNamjae Jeon 			"Failed to find last attribute extent of mft data attribute.%s", es);
17921e9ea7e0SNamjae Jeon 		write_lock_irqsave(&mft_ni->size_lock, flags);
1793115380f9SNamjae Jeon 		mft_ni->allocated_size += NTFS_CLU_TO_B(vol, nr);
17941e9ea7e0SNamjae Jeon 		write_unlock_irqrestore(&mft_ni->size_lock, flags);
17951e9ea7e0SNamjae Jeon 		ntfs_attr_put_search_ctx(ctx);
17961e9ea7e0SNamjae Jeon 		unmap_mft_record(mft_ni);
17971e9ea7e0SNamjae Jeon 		up_write(&mft_ni->runlist.lock);
17981e9ea7e0SNamjae Jeon 		/*
17991e9ea7e0SNamjae Jeon 		 * The only thing that is now wrong is ->allocated_size of the
18001e9ea7e0SNamjae Jeon 		 * base attribute extent which chkdsk should be able to fix.
18011e9ea7e0SNamjae Jeon 		 */
18021e9ea7e0SNamjae Jeon 		NVolSetErrors(vol);
18031e9ea7e0SNamjae Jeon 		return ret;
18041e9ea7e0SNamjae Jeon 	}
18051e9ea7e0SNamjae Jeon 	ctx->attr->data.non_resident.highest_vcn =
1806115380f9SNamjae Jeon 			cpu_to_le64(old_last_vcn - 1);
18071e9ea7e0SNamjae Jeon undo_alloc:
18081e9ea7e0SNamjae Jeon 	if (ntfs_cluster_free(mft_ni, old_last_vcn, -1, ctx) < 0) {
1809115380f9SNamjae Jeon 		ntfs_error(vol->sb, "Failed to free clusters from mft data attribute.%s", es);
18101e9ea7e0SNamjae Jeon 		NVolSetErrors(vol);
18111e9ea7e0SNamjae Jeon 	}
18121e9ea7e0SNamjae Jeon 
18131e9ea7e0SNamjae Jeon 	if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) {
1814115380f9SNamjae Jeon 		ntfs_error(vol->sb, "Failed to truncate mft data attribute runlist.%s", es);
1815115380f9SNamjae Jeon 		NVolSetErrors(vol);
1816115380f9SNamjae Jeon 	}
1817115380f9SNamjae Jeon 	if (mp_extended && ntfs_attr_update_mapping_pairs(mft_ni, 0)) {
1818115380f9SNamjae Jeon 		ntfs_error(vol->sb, "Failed to restore mapping pairs.%s",
1819115380f9SNamjae Jeon 			   es);
18201e9ea7e0SNamjae Jeon 		NVolSetErrors(vol);
18211e9ea7e0SNamjae Jeon 	}
18221e9ea7e0SNamjae Jeon 	if (ctx) {
18231e9ea7e0SNamjae Jeon 		a = ctx->attr;
18241e9ea7e0SNamjae Jeon 		if (mp_rebuilt && !IS_ERR(ctx->mrec)) {
18251e9ea7e0SNamjae Jeon 			if (ntfs_mapping_pairs_build(vol, (u8 *)a + le16_to_cpu(
18261e9ea7e0SNamjae Jeon 				a->data.non_resident.mapping_pairs_offset),
18271e9ea7e0SNamjae Jeon 				old_alen - le16_to_cpu(
18281e9ea7e0SNamjae Jeon 					a->data.non_resident.mapping_pairs_offset),
1829115380f9SNamjae Jeon 				rl2, ll, -1, NULL, NULL, NULL)) {
1830115380f9SNamjae Jeon 				ntfs_error(vol->sb, "Failed to restore mapping pairs array.%s", es);
18311e9ea7e0SNamjae Jeon 				NVolSetErrors(vol);
18321e9ea7e0SNamjae Jeon 			}
18331e9ea7e0SNamjae Jeon 			if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) {
1834115380f9SNamjae Jeon 				ntfs_error(vol->sb, "Failed to restore attribute record.%s", es);
18351e9ea7e0SNamjae Jeon 				NVolSetErrors(vol);
18361e9ea7e0SNamjae Jeon 			}
18371e9ea7e0SNamjae Jeon 			mark_mft_record_dirty(ctx->ntfs_ino);
18381e9ea7e0SNamjae Jeon 		} else if (IS_ERR(ctx->mrec)) {
1839115380f9SNamjae Jeon 			ntfs_error(vol->sb, "Failed to restore attribute search context.%s", es);
18401e9ea7e0SNamjae Jeon 			NVolSetErrors(vol);
18411e9ea7e0SNamjae Jeon 		}
18421e9ea7e0SNamjae Jeon 		ntfs_attr_put_search_ctx(ctx);
18431e9ea7e0SNamjae Jeon 	}
18441e9ea7e0SNamjae Jeon 	if (!IS_ERR(mrec))
18451e9ea7e0SNamjae Jeon 		unmap_mft_record(mft_ni);
18461e9ea7e0SNamjae Jeon 	return ret;
18471e9ea7e0SNamjae Jeon }
18481e9ea7e0SNamjae Jeon 
1849115380f9SNamjae Jeon /*
18501e9ea7e0SNamjae Jeon  * ntfs_mft_record_layout - layout an mft record into a memory buffer
18511e9ea7e0SNamjae Jeon  * @vol:	volume to which the mft record will belong
18521e9ea7e0SNamjae Jeon  * @mft_no:	mft reference specifying the mft record number
18531e9ea7e0SNamjae Jeon  * @m:		destination buffer of size >= @vol->mft_record_size bytes
18541e9ea7e0SNamjae Jeon  *
18551e9ea7e0SNamjae Jeon  * Layout an empty, unused mft record with the mft record number @mft_no into
18561e9ea7e0SNamjae Jeon  * the buffer @m.  The volume @vol is needed because the mft record structure
18571e9ea7e0SNamjae Jeon  * was modified in NTFS 3.1 so we need to know which volume version this mft
18581e9ea7e0SNamjae Jeon  * record will be used on.
18591e9ea7e0SNamjae Jeon  *
18601e9ea7e0SNamjae Jeon  * Return 0 on success and -errno on error.
18611e9ea7e0SNamjae Jeon  */
1862115380f9SNamjae Jeon static int ntfs_mft_record_layout(const struct ntfs_volume *vol, const s64 mft_no,
1863115380f9SNamjae Jeon 		struct mft_record *m)
18641e9ea7e0SNamjae Jeon {
1865115380f9SNamjae Jeon 	struct attr_record *a;
18661e9ea7e0SNamjae Jeon 
18671e9ea7e0SNamjae Jeon 	ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no);
18681e9ea7e0SNamjae Jeon 	if (mft_no >= (1ll << 32)) {
1869115380f9SNamjae Jeon 		ntfs_error(vol->sb, "Mft record number 0x%llx exceeds maximum of 2^32.",
1870115380f9SNamjae Jeon 				(long long)mft_no);
18711e9ea7e0SNamjae Jeon 		return -ERANGE;
18721e9ea7e0SNamjae Jeon 	}
18731e9ea7e0SNamjae Jeon 	/* Start by clearing the whole mft record to gives us a clean slate. */
18741e9ea7e0SNamjae Jeon 	memset(m, 0, vol->mft_record_size);
18751e9ea7e0SNamjae Jeon 	/* Aligned to 2-byte boundary. */
18761e9ea7e0SNamjae Jeon 	if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver))
1877115380f9SNamjae Jeon 		m->usa_ofs = cpu_to_le16((sizeof(struct mft_record_old) + 1) & ~1);
18781e9ea7e0SNamjae Jeon 	else {
1879115380f9SNamjae Jeon 		m->usa_ofs = cpu_to_le16((sizeof(struct mft_record) + 1) & ~1);
18801e9ea7e0SNamjae Jeon 		/*
18811e9ea7e0SNamjae Jeon 		 * Set the NTFS 3.1+ specific fields while we know that the
18821e9ea7e0SNamjae Jeon 		 * volume version is 3.1+.
18831e9ea7e0SNamjae Jeon 		 */
18841e9ea7e0SNamjae Jeon 		m->reserved = 0;
18851e9ea7e0SNamjae Jeon 		m->mft_record_number = cpu_to_le32((u32)mft_no);
18861e9ea7e0SNamjae Jeon 	}
18871e9ea7e0SNamjae Jeon 	m->magic = magic_FILE;
18881e9ea7e0SNamjae Jeon 	if (vol->mft_record_size >= NTFS_BLOCK_SIZE)
18891e9ea7e0SNamjae Jeon 		m->usa_count = cpu_to_le16(vol->mft_record_size /
18901e9ea7e0SNamjae Jeon 				NTFS_BLOCK_SIZE + 1);
18911e9ea7e0SNamjae Jeon 	else {
18921e9ea7e0SNamjae Jeon 		m->usa_count = cpu_to_le16(1);
1893115380f9SNamjae Jeon 		ntfs_warning(vol->sb,
1894115380f9SNamjae Jeon 			"Sector size is bigger than mft record size.  Setting usa_count to 1.  If chkdsk reports this as corruption");
18951e9ea7e0SNamjae Jeon 	}
18961e9ea7e0SNamjae Jeon 	/* Set the update sequence number to 1. */
1897115380f9SNamjae Jeon 	*(__le16 *)((u8 *)m + le16_to_cpu(m->usa_ofs)) = cpu_to_le16(1);
18981e9ea7e0SNamjae Jeon 	m->lsn = 0;
18991e9ea7e0SNamjae Jeon 	m->sequence_number = cpu_to_le16(1);
19001e9ea7e0SNamjae Jeon 	m->link_count = 0;
19011e9ea7e0SNamjae Jeon 	/*
19021e9ea7e0SNamjae Jeon 	 * Place the attributes straight after the update sequence array,
19031e9ea7e0SNamjae Jeon 	 * aligned to 8-byte boundary.
19041e9ea7e0SNamjae Jeon 	 */
19051e9ea7e0SNamjae Jeon 	m->attrs_offset = cpu_to_le16((le16_to_cpu(m->usa_ofs) +
19061e9ea7e0SNamjae Jeon 			(le16_to_cpu(m->usa_count) << 1) + 7) & ~7);
19071e9ea7e0SNamjae Jeon 	m->flags = 0;
19081e9ea7e0SNamjae Jeon 	/*
19091e9ea7e0SNamjae Jeon 	 * Using attrs_offset plus eight bytes (for the termination attribute).
19101e9ea7e0SNamjae Jeon 	 * attrs_offset is already aligned to 8-byte boundary, so no need to
19111e9ea7e0SNamjae Jeon 	 * align again.
19121e9ea7e0SNamjae Jeon 	 */
19131e9ea7e0SNamjae Jeon 	m->bytes_in_use = cpu_to_le32(le16_to_cpu(m->attrs_offset) + 8);
19141e9ea7e0SNamjae Jeon 	m->bytes_allocated = cpu_to_le32(vol->mft_record_size);
19151e9ea7e0SNamjae Jeon 	m->base_mft_record = 0;
19161e9ea7e0SNamjae Jeon 	m->next_attr_instance = 0;
19171e9ea7e0SNamjae Jeon 	/* Add the termination attribute. */
1918115380f9SNamjae Jeon 	a = (struct attr_record *)((u8 *)m + le16_to_cpu(m->attrs_offset));
19191e9ea7e0SNamjae Jeon 	a->type = AT_END;
19201e9ea7e0SNamjae Jeon 	a->length = 0;
19211e9ea7e0SNamjae Jeon 	ntfs_debug("Done.");
19221e9ea7e0SNamjae Jeon 	return 0;
19231e9ea7e0SNamjae Jeon }
19241e9ea7e0SNamjae Jeon 
1925115380f9SNamjae Jeon /*
19261e9ea7e0SNamjae Jeon  * ntfs_mft_record_format - format an mft record on an ntfs volume
19271e9ea7e0SNamjae Jeon  * @vol:	volume on which to format the mft record
19281e9ea7e0SNamjae Jeon  * @mft_no:	mft record number to format
19291e9ea7e0SNamjae Jeon  *
19301e9ea7e0SNamjae Jeon  * Format the mft record @mft_no in $MFT/$DATA, i.e. lay out an empty, unused
19311e9ea7e0SNamjae Jeon  * mft record into the appropriate place of the mft data attribute.  This is
19321e9ea7e0SNamjae Jeon  * used when extending the mft data attribute.
19331e9ea7e0SNamjae Jeon  *
19341e9ea7e0SNamjae Jeon  * Return 0 on success and -errno on error.
19351e9ea7e0SNamjae Jeon  */
1936115380f9SNamjae Jeon static int ntfs_mft_record_format(const struct ntfs_volume *vol, const s64 mft_no)
19371e9ea7e0SNamjae Jeon {
19381e9ea7e0SNamjae Jeon 	loff_t i_size;
19391e9ea7e0SNamjae Jeon 	struct inode *mft_vi = vol->mft_ino;
1940115380f9SNamjae Jeon 	struct folio *folio;
1941115380f9SNamjae Jeon 	struct mft_record *m;
19421e9ea7e0SNamjae Jeon 	pgoff_t index, end_index;
19431e9ea7e0SNamjae Jeon 	unsigned int ofs;
19441e9ea7e0SNamjae Jeon 	int err;
19451e9ea7e0SNamjae Jeon 
19461e9ea7e0SNamjae Jeon 	ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no);
19471e9ea7e0SNamjae Jeon 	/*
19481e9ea7e0SNamjae Jeon 	 * The index into the page cache and the offset within the page cache
19491e9ea7e0SNamjae Jeon 	 * page of the wanted mft record.
19501e9ea7e0SNamjae Jeon 	 */
1951115380f9SNamjae Jeon 	index = NTFS_MFT_NR_TO_PIDX(vol, mft_no);
1952115380f9SNamjae Jeon 	ofs = NTFS_MFT_NR_TO_POFS(vol, mft_no);
19531e9ea7e0SNamjae Jeon 	/* The maximum valid index into the page cache for $MFT's data. */
19541e9ea7e0SNamjae Jeon 	i_size = i_size_read(mft_vi);
19551e9ea7e0SNamjae Jeon 	end_index = i_size >> PAGE_SHIFT;
19561e9ea7e0SNamjae Jeon 	if (unlikely(index >= end_index)) {
1957115380f9SNamjae Jeon 		if (unlikely(index > end_index ||
1958115380f9SNamjae Jeon 			     ofs + vol->mft_record_size > (i_size & ~PAGE_MASK))) {
1959115380f9SNamjae Jeon 			ntfs_error(vol->sb, "Tried to format non-existing mft record 0x%llx.",
1960115380f9SNamjae Jeon 					(long long)mft_no);
19611e9ea7e0SNamjae Jeon 			return -ENOENT;
19621e9ea7e0SNamjae Jeon 		}
19631e9ea7e0SNamjae Jeon 	}
1964115380f9SNamjae Jeon 
1965115380f9SNamjae Jeon 	/* Read, map, and pin the folio containing the mft record. */
1966115380f9SNamjae Jeon 	folio = read_mapping_folio(mft_vi->i_mapping, index, NULL);
1967115380f9SNamjae Jeon 	if (IS_ERR(folio)) {
1968115380f9SNamjae Jeon 		ntfs_error(vol->sb, "Failed to map page containing mft record to format 0x%llx.",
1969115380f9SNamjae Jeon 				(long long)mft_no);
1970115380f9SNamjae Jeon 		return PTR_ERR(folio);
19711e9ea7e0SNamjae Jeon 	}
1972115380f9SNamjae Jeon 	folio_lock(folio);
1973115380f9SNamjae Jeon 	folio_clear_uptodate(folio);
1974115380f9SNamjae Jeon 	m = (struct mft_record *)((u8 *)kmap_local_folio(folio, 0) + ofs);
19751e9ea7e0SNamjae Jeon 	err = ntfs_mft_record_layout(vol, mft_no, m);
19761e9ea7e0SNamjae Jeon 	if (unlikely(err)) {
19771e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to layout mft record 0x%llx.",
19781e9ea7e0SNamjae Jeon 				(long long)mft_no);
1979115380f9SNamjae Jeon 		folio_mark_uptodate(folio);
1980115380f9SNamjae Jeon 		folio_unlock(folio);
1981115380f9SNamjae Jeon 		kunmap_local(m);
1982115380f9SNamjae Jeon 		folio_put(folio);
19831e9ea7e0SNamjae Jeon 		return err;
19841e9ea7e0SNamjae Jeon 	}
1985115380f9SNamjae Jeon 	pre_write_mst_fixup((struct ntfs_record *)m, vol->mft_record_size);
1986115380f9SNamjae Jeon 	folio_mark_uptodate(folio);
19871e9ea7e0SNamjae Jeon 	/*
19881e9ea7e0SNamjae Jeon 	 * Make sure the mft record is written out to disk.  We could use
19891e9ea7e0SNamjae Jeon 	 * ilookup5() to check if an inode is in icache and so on but this is
19901e9ea7e0SNamjae Jeon 	 * unnecessary as ntfs_writepage() will write the dirty record anyway.
19911e9ea7e0SNamjae Jeon 	 */
1992115380f9SNamjae Jeon 	ntfs_mft_mark_dirty(folio);
1993115380f9SNamjae Jeon 	folio_unlock(folio);
1994115380f9SNamjae Jeon 	kunmap_local(m);
1995115380f9SNamjae Jeon 	folio_put(folio);
19961e9ea7e0SNamjae Jeon 	ntfs_debug("Done.");
19971e9ea7e0SNamjae Jeon 	return 0;
19981e9ea7e0SNamjae Jeon }
19991e9ea7e0SNamjae Jeon 
2000115380f9SNamjae Jeon /*
20011e9ea7e0SNamjae Jeon  * ntfs_mft_record_alloc - allocate an mft record on an ntfs volume
20021e9ea7e0SNamjae Jeon  * @vol:	[IN]  volume on which to allocate the mft record
20031e9ea7e0SNamjae Jeon  * @mode:	[IN]  mode if want a file or directory, i.e. base inode or 0
2004115380f9SNamjae Jeon  * @ni:		[OUT] on success, set to the allocated ntfs inode
20051e9ea7e0SNamjae Jeon  * @base_ni:	[IN]  open base inode if allocating an extent mft record or NULL
2006115380f9SNamjae Jeon  * @ni_mrec:	[OUT] on successful return this is the mapped mft record
20071e9ea7e0SNamjae Jeon  *
20081e9ea7e0SNamjae Jeon  * Allocate an mft record in $MFT/$DATA of an open ntfs volume @vol.
20091e9ea7e0SNamjae Jeon  *
20101e9ea7e0SNamjae Jeon  * If @base_ni is NULL make the mft record a base mft record, i.e. a file or
20111e9ea7e0SNamjae Jeon  * direvctory inode, and allocate it at the default allocator position.  In
20121e9ea7e0SNamjae Jeon  * this case @mode is the file mode as given to us by the caller.  We in
20131e9ea7e0SNamjae Jeon  * particular use @mode to distinguish whether a file or a directory is being
20141e9ea7e0SNamjae Jeon  * created (S_IFDIR(mode) and S_IFREG(mode), respectively).
20151e9ea7e0SNamjae Jeon  *
20161e9ea7e0SNamjae Jeon  * If @base_ni is not NULL make the allocated mft record an extent record,
20171e9ea7e0SNamjae Jeon  * allocate it starting at the mft record after the base mft record and attach
20181e9ea7e0SNamjae Jeon  * the allocated and opened ntfs inode to the base inode @base_ni.  In this
20191e9ea7e0SNamjae Jeon  * case @mode must be 0 as it is meaningless for extent inodes.
20201e9ea7e0SNamjae Jeon  *
20211e9ea7e0SNamjae Jeon  * You need to check the return value with IS_ERR().  If false, the function
20221e9ea7e0SNamjae Jeon  * was successful and the return value is the now opened ntfs inode of the
20231e9ea7e0SNamjae Jeon  * allocated mft record.  *@mrec is then set to the allocated, mapped, pinned,
20241e9ea7e0SNamjae Jeon  * and locked mft record.  If IS_ERR() is true, the function failed and the
20251e9ea7e0SNamjae Jeon  * error code is obtained from PTR_ERR(return value).  *@mrec is undefined in
20261e9ea7e0SNamjae Jeon  * this case.
20271e9ea7e0SNamjae Jeon  *
20281e9ea7e0SNamjae Jeon  * Allocation strategy:
20291e9ea7e0SNamjae Jeon  *
20301e9ea7e0SNamjae Jeon  * To find a free mft record, we scan the mft bitmap for a zero bit.  To
20311e9ea7e0SNamjae Jeon  * optimize this we start scanning at the place specified by @base_ni or if
20321e9ea7e0SNamjae Jeon  * @base_ni is NULL we start where we last stopped and we perform wrap around
20331e9ea7e0SNamjae Jeon  * when we reach the end.  Note, we do not try to allocate mft records below
2034115380f9SNamjae Jeon  * number 64 because numbers 0 to 15 are the defined system files anyway and 16
2035115380f9SNamjae Jeon  * to 64 are special in that they are used for storing extension mft records
20361e9ea7e0SNamjae Jeon  * for the $DATA attribute of $MFT.  This is required to avoid the possibility
20371e9ea7e0SNamjae Jeon  * of creating a runlist with a circular dependency which once written to disk
20381e9ea7e0SNamjae Jeon  * can never be read in again.  Windows will only use records 16 to 24 for
20391e9ea7e0SNamjae Jeon  * normal files if the volume is completely out of space.  We never use them
20401e9ea7e0SNamjae Jeon  * which means that when the volume is really out of space we cannot create any
20411e9ea7e0SNamjae Jeon  * more files while Windows can still create up to 8 small files.  We can start
20421e9ea7e0SNamjae Jeon  * doing this at some later time, it does not matter much for now.
20431e9ea7e0SNamjae Jeon  *
20441e9ea7e0SNamjae Jeon  * When scanning the mft bitmap, we only search up to the last allocated mft
2045115380f9SNamjae Jeon  * record.  If there are no free records left in the range 64 to number of
20461e9ea7e0SNamjae Jeon  * allocated mft records, then we extend the $MFT/$DATA attribute in order to
20471e9ea7e0SNamjae Jeon  * create free mft records.  We extend the allocated size of $MFT/$DATA by 16
20481e9ea7e0SNamjae Jeon  * records at a time or one cluster, if cluster size is above 16kiB.  If there
20491e9ea7e0SNamjae Jeon  * is not sufficient space to do this, we try to extend by a single mft record
20501e9ea7e0SNamjae Jeon  * or one cluster, if cluster size is above the mft record size.
20511e9ea7e0SNamjae Jeon  *
20521e9ea7e0SNamjae Jeon  * No matter how many mft records we allocate, we initialize only the first
20531e9ea7e0SNamjae Jeon  * allocated mft record, incrementing mft data size and initialized size
2054115380f9SNamjae Jeon  * accordingly, open an struct ntfs_inode for it and return it to the caller, unless
2055115380f9SNamjae Jeon  * there are less than 64 mft records, in which case we allocate and initialize
2056115380f9SNamjae Jeon  * mft records until we reach record 64 which we consider as the first free mft
20571e9ea7e0SNamjae Jeon  * record for use by normal files.
20581e9ea7e0SNamjae Jeon  *
20591e9ea7e0SNamjae Jeon  * If during any stage we overflow the initialized data in the mft bitmap, we
20601e9ea7e0SNamjae Jeon  * extend the initialized size (and data size) by 8 bytes, allocating another
20611e9ea7e0SNamjae Jeon  * cluster if required.  The bitmap data size has to be at least equal to the
20621e9ea7e0SNamjae Jeon  * number of mft records in the mft, but it can be bigger, in which case the
2063115380f9SNamjae Jeon  * superfluous bits are padded with zeroes.
20641e9ea7e0SNamjae Jeon  *
20651e9ea7e0SNamjae Jeon  * Thus, when we return successfully (IS_ERR() is false), we will have:
20661e9ea7e0SNamjae Jeon  *	- initialized / extended the mft bitmap if necessary,
20671e9ea7e0SNamjae Jeon  *	- initialized / extended the mft data if necessary,
20681e9ea7e0SNamjae Jeon  *	- set the bit corresponding to the mft record being allocated in the
20691e9ea7e0SNamjae Jeon  *	  mft bitmap,
2070115380f9SNamjae Jeon  *	- opened an struct ntfs_inode for the allocated mft record, and we will have
2071115380f9SNamjae Jeon  *	- returned the struct ntfs_inode as well as the allocated mapped, pinned, and
20721e9ea7e0SNamjae Jeon  *	  locked mft record.
20731e9ea7e0SNamjae Jeon  *
20741e9ea7e0SNamjae Jeon  * On error, the volume will be left in a consistent state and no record will
20751e9ea7e0SNamjae Jeon  * be allocated.  If rolling back a partial operation fails, we may leave some
20761e9ea7e0SNamjae Jeon  * inconsistent metadata in which case we set NVolErrors() so the volume is
20771e9ea7e0SNamjae Jeon  * left dirty when unmounted.
20781e9ea7e0SNamjae Jeon  *
20791e9ea7e0SNamjae Jeon  * Note, this function cannot make use of most of the normal functions, like
20801e9ea7e0SNamjae Jeon  * for example for attribute resizing, etc, because when the run list overflows
20811e9ea7e0SNamjae Jeon  * the base mft record and an attribute list is used, it is very important that
20821e9ea7e0SNamjae Jeon  * the extension mft records used to store the $DATA attribute of $MFT can be
20831e9ea7e0SNamjae Jeon  * reached without having to read the information contained inside them, as
20841e9ea7e0SNamjae Jeon  * this would make it impossible to find them in the first place after the
20851e9ea7e0SNamjae Jeon  * volume is unmounted.  $MFT/$BITMAP probably does not need to follow this
20861e9ea7e0SNamjae Jeon  * rule because the bitmap is not essential for finding the mft records, but on
20871e9ea7e0SNamjae Jeon  * the other hand, handling the bitmap in this special way would make life
20881e9ea7e0SNamjae Jeon  * easier because otherwise there might be circular invocations of functions
20891e9ea7e0SNamjae Jeon  * when reading the bitmap.
20901e9ea7e0SNamjae Jeon  */
2091115380f9SNamjae Jeon int ntfs_mft_record_alloc(struct ntfs_volume *vol, const int mode,
2092115380f9SNamjae Jeon 			  struct ntfs_inode **ni, struct ntfs_inode *base_ni,
2093115380f9SNamjae Jeon 			  struct mft_record **ni_mrec)
20941e9ea7e0SNamjae Jeon {
20951e9ea7e0SNamjae Jeon 	s64 ll, bit, old_data_initialized, old_data_size;
20961e9ea7e0SNamjae Jeon 	unsigned long flags;
2097115380f9SNamjae Jeon 	struct folio *folio;
2098115380f9SNamjae Jeon 	struct ntfs_inode *mft_ni, *mftbmp_ni;
2099115380f9SNamjae Jeon 	struct ntfs_attr_search_ctx *ctx;
2100115380f9SNamjae Jeon 	struct mft_record *m = NULL;
2101115380f9SNamjae Jeon 	struct attr_record *a;
21021e9ea7e0SNamjae Jeon 	pgoff_t index;
21031e9ea7e0SNamjae Jeon 	unsigned int ofs;
21041e9ea7e0SNamjae Jeon 	int err;
2105115380f9SNamjae Jeon 	__le16 seq_no, usn;
21061e9ea7e0SNamjae Jeon 	bool record_formatted = false;
2107115380f9SNamjae Jeon 	unsigned int memalloc_flags;
21081e9ea7e0SNamjae Jeon 
2109115380f9SNamjae Jeon 	if (base_ni && *ni)
2110115380f9SNamjae Jeon 		return -EINVAL;
2111115380f9SNamjae Jeon 
2112115380f9SNamjae Jeon 	/* @mode and @base_ni are mutually exclusive. */
2113115380f9SNamjae Jeon 	if (mode && base_ni)
2114115380f9SNamjae Jeon 		return -EINVAL;
2115115380f9SNamjae Jeon 
2116115380f9SNamjae Jeon 	if (base_ni)
2117115380f9SNamjae Jeon 		ntfs_debug("Entering (allocating an extent mft record for base mft record 0x%llx).",
21181e9ea7e0SNamjae Jeon 				(long long)base_ni->mft_no);
2119115380f9SNamjae Jeon 	else
21201e9ea7e0SNamjae Jeon 		ntfs_debug("Entering (allocating a base mft record).");
2121115380f9SNamjae Jeon 
2122115380f9SNamjae Jeon 	memalloc_flags = memalloc_nofs_save();
2123115380f9SNamjae Jeon 
21241e9ea7e0SNamjae Jeon 	mft_ni = NTFS_I(vol->mft_ino);
2125115380f9SNamjae Jeon 	if (!base_ni || base_ni->mft_no != FILE_MFT)
2126115380f9SNamjae Jeon 		mutex_lock(&mft_ni->mrec_lock);
21271e9ea7e0SNamjae Jeon 	mftbmp_ni = NTFS_I(vol->mftbmp_ino);
2128115380f9SNamjae Jeon search_free_rec:
2129115380f9SNamjae Jeon 	if (!base_ni || base_ni->mft_no != FILE_MFT)
21301e9ea7e0SNamjae Jeon 		down_write(&vol->mftbmp_lock);
21311e9ea7e0SNamjae Jeon 	bit = ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(vol, base_ni);
21321e9ea7e0SNamjae Jeon 	if (bit >= 0) {
21331e9ea7e0SNamjae Jeon 		ntfs_debug("Found and allocated free record (#1), bit 0x%llx.",
21341e9ea7e0SNamjae Jeon 				(long long)bit);
21351e9ea7e0SNamjae Jeon 		goto have_alloc_rec;
21361e9ea7e0SNamjae Jeon 	}
21371e9ea7e0SNamjae Jeon 	if (bit != -ENOSPC) {
2138115380f9SNamjae Jeon 		if (!base_ni || base_ni->mft_no != FILE_MFT) {
21391e9ea7e0SNamjae Jeon 			up_write(&vol->mftbmp_lock);
2140115380f9SNamjae Jeon 			mutex_unlock(&mft_ni->mrec_lock);
21411e9ea7e0SNamjae Jeon 		}
2142115380f9SNamjae Jeon 		memalloc_nofs_restore(memalloc_flags);
2143115380f9SNamjae Jeon 		return bit;
2144115380f9SNamjae Jeon 	}
2145115380f9SNamjae Jeon 
2146115380f9SNamjae Jeon 	if (base_ni && base_ni->mft_no == FILE_MFT) {
2147115380f9SNamjae Jeon 		memalloc_nofs_restore(memalloc_flags);
2148115380f9SNamjae Jeon 		return bit;
2149115380f9SNamjae Jeon 	}
2150115380f9SNamjae Jeon 
21511e9ea7e0SNamjae Jeon 	/*
21521e9ea7e0SNamjae Jeon 	 * No free mft records left.  If the mft bitmap already covers more
21531e9ea7e0SNamjae Jeon 	 * than the currently used mft records, the next records are all free,
21541e9ea7e0SNamjae Jeon 	 * so we can simply allocate the first unused mft record.
21551e9ea7e0SNamjae Jeon 	 * Note: We also have to make sure that the mft bitmap at least covers
21561e9ea7e0SNamjae Jeon 	 * the first 24 mft records as they are special and whilst they may not
21571e9ea7e0SNamjae Jeon 	 * be in use, we do not allocate from them.
21581e9ea7e0SNamjae Jeon 	 */
21591e9ea7e0SNamjae Jeon 	read_lock_irqsave(&mft_ni->size_lock, flags);
21601e9ea7e0SNamjae Jeon 	ll = mft_ni->initialized_size >> vol->mft_record_size_bits;
21611e9ea7e0SNamjae Jeon 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
21621e9ea7e0SNamjae Jeon 	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
21631e9ea7e0SNamjae Jeon 	old_data_initialized = mftbmp_ni->initialized_size;
21641e9ea7e0SNamjae Jeon 	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
2165115380f9SNamjae Jeon 	if (old_data_initialized << 3 > ll &&
2166115380f9SNamjae Jeon 	    old_data_initialized > RESERVED_MFT_RECORDS / 8) {
21671e9ea7e0SNamjae Jeon 		bit = ll;
2168115380f9SNamjae Jeon 		if (bit < RESERVED_MFT_RECORDS)
2169115380f9SNamjae Jeon 			bit = RESERVED_MFT_RECORDS;
21701e9ea7e0SNamjae Jeon 		if (unlikely(bit >= (1ll << 32)))
21711e9ea7e0SNamjae Jeon 			goto max_err_out;
21721e9ea7e0SNamjae Jeon 		ntfs_debug("Found free record (#2), bit 0x%llx.",
21731e9ea7e0SNamjae Jeon 				(long long)bit);
21741e9ea7e0SNamjae Jeon 		goto found_free_rec;
21751e9ea7e0SNamjae Jeon 	}
21761e9ea7e0SNamjae Jeon 	/*
21771e9ea7e0SNamjae Jeon 	 * The mft bitmap needs to be expanded until it covers the first unused
21781e9ea7e0SNamjae Jeon 	 * mft record that we can allocate.
21791e9ea7e0SNamjae Jeon 	 * Note: The smallest mft record we allocate is mft record 24.
21801e9ea7e0SNamjae Jeon 	 */
21811e9ea7e0SNamjae Jeon 	bit = old_data_initialized << 3;
21821e9ea7e0SNamjae Jeon 	if (unlikely(bit >= (1ll << 32)))
21831e9ea7e0SNamjae Jeon 		goto max_err_out;
21841e9ea7e0SNamjae Jeon 	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
21851e9ea7e0SNamjae Jeon 	old_data_size = mftbmp_ni->allocated_size;
2186115380f9SNamjae Jeon 	ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.",
2187115380f9SNamjae Jeon 			old_data_size, i_size_read(vol->mftbmp_ino),
2188115380f9SNamjae Jeon 			old_data_initialized);
21891e9ea7e0SNamjae Jeon 	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
21901e9ea7e0SNamjae Jeon 	if (old_data_initialized + 8 > old_data_size) {
21911e9ea7e0SNamjae Jeon 		/* Need to extend bitmap by one more cluster. */
21921e9ea7e0SNamjae Jeon 		ntfs_debug("mftbmp: initialized_size + 8 > allocated_size.");
21931e9ea7e0SNamjae Jeon 		err = ntfs_mft_bitmap_extend_allocation_nolock(vol);
2194115380f9SNamjae Jeon 		if (err == -EAGAIN)
2195115380f9SNamjae Jeon 			err = ntfs_mft_bitmap_extend_allocation_nolock(vol);
2196115380f9SNamjae Jeon 
21971e9ea7e0SNamjae Jeon 		if (unlikely(err)) {
2198115380f9SNamjae Jeon 			if (!base_ni || base_ni->mft_no != FILE_MFT)
21991e9ea7e0SNamjae Jeon 				up_write(&vol->mftbmp_lock);
22001e9ea7e0SNamjae Jeon 			goto err_out;
22011e9ea7e0SNamjae Jeon 		}
22021e9ea7e0SNamjae Jeon #ifdef DEBUG
22031e9ea7e0SNamjae Jeon 		read_lock_irqsave(&mftbmp_ni->size_lock, flags);
2204115380f9SNamjae Jeon 		ntfs_debug("Status of mftbmp after allocation extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.",
2205115380f9SNamjae Jeon 				mftbmp_ni->allocated_size,
2206115380f9SNamjae Jeon 				i_size_read(vol->mftbmp_ino),
2207115380f9SNamjae Jeon 				mftbmp_ni->initialized_size);
22081e9ea7e0SNamjae Jeon 		read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
22091e9ea7e0SNamjae Jeon #endif /* DEBUG */
22101e9ea7e0SNamjae Jeon 	}
22111e9ea7e0SNamjae Jeon 	/*
22121e9ea7e0SNamjae Jeon 	 * We now have sufficient allocated space, extend the initialized_size
22131e9ea7e0SNamjae Jeon 	 * as well as the data_size if necessary and fill the new space with
22141e9ea7e0SNamjae Jeon 	 * zeroes.
22151e9ea7e0SNamjae Jeon 	 */
22161e9ea7e0SNamjae Jeon 	err = ntfs_mft_bitmap_extend_initialized_nolock(vol);
22171e9ea7e0SNamjae Jeon 	if (unlikely(err)) {
2218115380f9SNamjae Jeon 		if (!base_ni || base_ni->mft_no != FILE_MFT)
22191e9ea7e0SNamjae Jeon 			up_write(&vol->mftbmp_lock);
22201e9ea7e0SNamjae Jeon 		goto err_out;
22211e9ea7e0SNamjae Jeon 	}
22221e9ea7e0SNamjae Jeon #ifdef DEBUG
22231e9ea7e0SNamjae Jeon 	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
2224115380f9SNamjae Jeon 	ntfs_debug("Status of mftbmp after initialized extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.",
2225115380f9SNamjae Jeon 			mftbmp_ni->allocated_size,
2226115380f9SNamjae Jeon 			i_size_read(vol->mftbmp_ino),
2227115380f9SNamjae Jeon 			mftbmp_ni->initialized_size);
22281e9ea7e0SNamjae Jeon 	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
22291e9ea7e0SNamjae Jeon #endif /* DEBUG */
22301e9ea7e0SNamjae Jeon 	ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit);
22311e9ea7e0SNamjae Jeon found_free_rec:
22321e9ea7e0SNamjae Jeon 	/* @bit is the found free mft record, allocate it in the mft bitmap. */
22331e9ea7e0SNamjae Jeon 	ntfs_debug("At found_free_rec.");
22341e9ea7e0SNamjae Jeon 	err = ntfs_bitmap_set_bit(vol->mftbmp_ino, bit);
22351e9ea7e0SNamjae Jeon 	if (unlikely(err)) {
22361e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to allocate bit in mft bitmap.");
2237115380f9SNamjae Jeon 		if (!base_ni || base_ni->mft_no != FILE_MFT)
22381e9ea7e0SNamjae Jeon 			up_write(&vol->mftbmp_lock);
22391e9ea7e0SNamjae Jeon 		goto err_out;
22401e9ea7e0SNamjae Jeon 	}
22411e9ea7e0SNamjae Jeon 	ntfs_debug("Set bit 0x%llx in mft bitmap.", (long long)bit);
22421e9ea7e0SNamjae Jeon have_alloc_rec:
22431e9ea7e0SNamjae Jeon 	/*
22441e9ea7e0SNamjae Jeon 	 * The mft bitmap is now uptodate.  Deal with mft data attribute now.
22451e9ea7e0SNamjae Jeon 	 * Note, we keep hold of the mft bitmap lock for writing until all
22461e9ea7e0SNamjae Jeon 	 * modifications to the mft data attribute are complete, too, as they
22471e9ea7e0SNamjae Jeon 	 * will impact decisions for mft bitmap and mft record allocation done
22481e9ea7e0SNamjae Jeon 	 * by a parallel allocation and if the lock is not maintained a
22491e9ea7e0SNamjae Jeon 	 * parallel allocation could allocate the same mft record as this one.
22501e9ea7e0SNamjae Jeon 	 */
22511e9ea7e0SNamjae Jeon 	ll = (bit + 1) << vol->mft_record_size_bits;
22521e9ea7e0SNamjae Jeon 	read_lock_irqsave(&mft_ni->size_lock, flags);
22531e9ea7e0SNamjae Jeon 	old_data_initialized = mft_ni->initialized_size;
22541e9ea7e0SNamjae Jeon 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
22551e9ea7e0SNamjae Jeon 	if (ll <= old_data_initialized) {
22561e9ea7e0SNamjae Jeon 		ntfs_debug("Allocated mft record already initialized.");
22571e9ea7e0SNamjae Jeon 		goto mft_rec_already_initialized;
22581e9ea7e0SNamjae Jeon 	}
22591e9ea7e0SNamjae Jeon 	ntfs_debug("Initializing allocated mft record.");
22601e9ea7e0SNamjae Jeon 	/*
22611e9ea7e0SNamjae Jeon 	 * The mft record is outside the initialized data.  Extend the mft data
22621e9ea7e0SNamjae Jeon 	 * attribute until it covers the allocated record.  The loop is only
22631e9ea7e0SNamjae Jeon 	 * actually traversed more than once when a freshly formatted volume is
22641e9ea7e0SNamjae Jeon 	 * first written to so it optimizes away nicely in the common case.
22651e9ea7e0SNamjae Jeon 	 */
2266115380f9SNamjae Jeon 	if (!base_ni || base_ni->mft_no != FILE_MFT) {
22671e9ea7e0SNamjae Jeon 		read_lock_irqsave(&mft_ni->size_lock, flags);
2268115380f9SNamjae Jeon 		ntfs_debug("Status of mft data before extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.",
2269115380f9SNamjae Jeon 				mft_ni->allocated_size, i_size_read(vol->mft_ino),
2270115380f9SNamjae Jeon 				mft_ni->initialized_size);
22711e9ea7e0SNamjae Jeon 		while (ll > mft_ni->allocated_size) {
22721e9ea7e0SNamjae Jeon 			read_unlock_irqrestore(&mft_ni->size_lock, flags);
22731e9ea7e0SNamjae Jeon 			err = ntfs_mft_data_extend_allocation_nolock(vol);
2274115380f9SNamjae Jeon 			if (err == -EAGAIN)
2275115380f9SNamjae Jeon 				err = ntfs_mft_data_extend_allocation_nolock(vol);
2276115380f9SNamjae Jeon 
22771e9ea7e0SNamjae Jeon 			if (unlikely(err)) {
2278115380f9SNamjae Jeon 				ntfs_error(vol->sb, "Failed to extend mft data allocation.");
22791e9ea7e0SNamjae Jeon 				goto undo_mftbmp_alloc_nolock;
22801e9ea7e0SNamjae Jeon 			}
22811e9ea7e0SNamjae Jeon 			read_lock_irqsave(&mft_ni->size_lock, flags);
2282115380f9SNamjae Jeon 			ntfs_debug("Status of mft data after allocation extension: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.",
2283115380f9SNamjae Jeon 					mft_ni->allocated_size, i_size_read(vol->mft_ino),
2284115380f9SNamjae Jeon 					mft_ni->initialized_size);
22851e9ea7e0SNamjae Jeon 		}
22861e9ea7e0SNamjae Jeon 		read_unlock_irqrestore(&mft_ni->size_lock, flags);
2287115380f9SNamjae Jeon 	} else if (ll > mft_ni->allocated_size) {
2288115380f9SNamjae Jeon 		err = -ENOSPC;
2289115380f9SNamjae Jeon 		goto undo_mftbmp_alloc_nolock;
2290115380f9SNamjae Jeon 	}
22911e9ea7e0SNamjae Jeon 	/*
22921e9ea7e0SNamjae Jeon 	 * Extend mft data initialized size (and data size of course) to reach
22931e9ea7e0SNamjae Jeon 	 * the allocated mft record, formatting the mft records allong the way.
2294115380f9SNamjae Jeon 	 * Note: We only modify the struct ntfs_inode structure as that is all that is
22951e9ea7e0SNamjae Jeon 	 * needed by ntfs_mft_record_format().  We will update the attribute
22961e9ea7e0SNamjae Jeon 	 * record itself in one fell swoop later on.
22971e9ea7e0SNamjae Jeon 	 */
22981e9ea7e0SNamjae Jeon 	write_lock_irqsave(&mft_ni->size_lock, flags);
22991e9ea7e0SNamjae Jeon 	old_data_initialized = mft_ni->initialized_size;
23001e9ea7e0SNamjae Jeon 	old_data_size = vol->mft_ino->i_size;
23011e9ea7e0SNamjae Jeon 	while (ll > mft_ni->initialized_size) {
23021e9ea7e0SNamjae Jeon 		s64 new_initialized_size, mft_no;
23031e9ea7e0SNamjae Jeon 
23041e9ea7e0SNamjae Jeon 		new_initialized_size = mft_ni->initialized_size +
23051e9ea7e0SNamjae Jeon 				vol->mft_record_size;
23061e9ea7e0SNamjae Jeon 		mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits;
23071e9ea7e0SNamjae Jeon 		if (new_initialized_size > i_size_read(vol->mft_ino))
23081e9ea7e0SNamjae Jeon 			i_size_write(vol->mft_ino, new_initialized_size);
23091e9ea7e0SNamjae Jeon 		write_unlock_irqrestore(&mft_ni->size_lock, flags);
23101e9ea7e0SNamjae Jeon 		ntfs_debug("Initializing mft record 0x%llx.",
23111e9ea7e0SNamjae Jeon 				(long long)mft_no);
23121e9ea7e0SNamjae Jeon 		err = ntfs_mft_record_format(vol, mft_no);
23131e9ea7e0SNamjae Jeon 		if (unlikely(err)) {
23141e9ea7e0SNamjae Jeon 			ntfs_error(vol->sb, "Failed to format mft record.");
23151e9ea7e0SNamjae Jeon 			goto undo_data_init;
23161e9ea7e0SNamjae Jeon 		}
23171e9ea7e0SNamjae Jeon 		write_lock_irqsave(&mft_ni->size_lock, flags);
23181e9ea7e0SNamjae Jeon 		mft_ni->initialized_size = new_initialized_size;
23191e9ea7e0SNamjae Jeon 	}
23201e9ea7e0SNamjae Jeon 	write_unlock_irqrestore(&mft_ni->size_lock, flags);
23211e9ea7e0SNamjae Jeon 	record_formatted = true;
23221e9ea7e0SNamjae Jeon 	/* Update the mft data attribute record to reflect the new sizes. */
23231e9ea7e0SNamjae Jeon 	m = map_mft_record(mft_ni);
23241e9ea7e0SNamjae Jeon 	if (IS_ERR(m)) {
23251e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to map mft record.");
23261e9ea7e0SNamjae Jeon 		err = PTR_ERR(m);
23271e9ea7e0SNamjae Jeon 		goto undo_data_init;
23281e9ea7e0SNamjae Jeon 	}
23291e9ea7e0SNamjae Jeon 	ctx = ntfs_attr_get_search_ctx(mft_ni, m);
23301e9ea7e0SNamjae Jeon 	if (unlikely(!ctx)) {
23311e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to get search context.");
23321e9ea7e0SNamjae Jeon 		err = -ENOMEM;
23331e9ea7e0SNamjae Jeon 		unmap_mft_record(mft_ni);
23341e9ea7e0SNamjae Jeon 		goto undo_data_init;
23351e9ea7e0SNamjae Jeon 	}
23361e9ea7e0SNamjae Jeon 	err = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len,
23371e9ea7e0SNamjae Jeon 			CASE_SENSITIVE, 0, NULL, 0, ctx);
23381e9ea7e0SNamjae Jeon 	if (unlikely(err)) {
2339115380f9SNamjae Jeon 		ntfs_error(vol->sb, "Failed to find first attribute extent of mft data attribute.");
23401e9ea7e0SNamjae Jeon 		ntfs_attr_put_search_ctx(ctx);
23411e9ea7e0SNamjae Jeon 		unmap_mft_record(mft_ni);
23421e9ea7e0SNamjae Jeon 		goto undo_data_init;
23431e9ea7e0SNamjae Jeon 	}
23441e9ea7e0SNamjae Jeon 	a = ctx->attr;
23451e9ea7e0SNamjae Jeon 	read_lock_irqsave(&mft_ni->size_lock, flags);
23461e9ea7e0SNamjae Jeon 	a->data.non_resident.initialized_size =
2347115380f9SNamjae Jeon 			cpu_to_le64(mft_ni->initialized_size);
23481e9ea7e0SNamjae Jeon 	a->data.non_resident.data_size =
2349115380f9SNamjae Jeon 			cpu_to_le64(i_size_read(vol->mft_ino));
23501e9ea7e0SNamjae Jeon 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
23511e9ea7e0SNamjae Jeon 	/* Ensure the changes make it to disk. */
23521e9ea7e0SNamjae Jeon 	mark_mft_record_dirty(ctx->ntfs_ino);
23531e9ea7e0SNamjae Jeon 	ntfs_attr_put_search_ctx(ctx);
23541e9ea7e0SNamjae Jeon 	unmap_mft_record(mft_ni);
23551e9ea7e0SNamjae Jeon 	read_lock_irqsave(&mft_ni->size_lock, flags);
2356115380f9SNamjae Jeon 	ntfs_debug("Status of mft data after mft record initialization: allocated_size 0x%llx, data_size 0x%llx, initialized_size 0x%llx.",
2357115380f9SNamjae Jeon 			mft_ni->allocated_size,	i_size_read(vol->mft_ino),
2358115380f9SNamjae Jeon 			mft_ni->initialized_size);
2359115380f9SNamjae Jeon 	WARN_ON(i_size_read(vol->mft_ino) > mft_ni->allocated_size);
2360115380f9SNamjae Jeon 	WARN_ON(mft_ni->initialized_size > i_size_read(vol->mft_ino));
23611e9ea7e0SNamjae Jeon 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
23621e9ea7e0SNamjae Jeon mft_rec_already_initialized:
23631e9ea7e0SNamjae Jeon 	/*
23641e9ea7e0SNamjae Jeon 	 * We can finally drop the mft bitmap lock as the mft data attribute
23651e9ea7e0SNamjae Jeon 	 * has been fully updated.  The only disparity left is that the
23661e9ea7e0SNamjae Jeon 	 * allocated mft record still needs to be marked as in use to match the
23671e9ea7e0SNamjae Jeon 	 * set bit in the mft bitmap but this is actually not a problem since
23681e9ea7e0SNamjae Jeon 	 * this mft record is not referenced from anywhere yet and the fact
23691e9ea7e0SNamjae Jeon 	 * that it is allocated in the mft bitmap means that no-one will try to
23701e9ea7e0SNamjae Jeon 	 * allocate it either.
23711e9ea7e0SNamjae Jeon 	 */
2372115380f9SNamjae Jeon 	if (!base_ni || base_ni->mft_no != FILE_MFT)
23731e9ea7e0SNamjae Jeon 		up_write(&vol->mftbmp_lock);
23741e9ea7e0SNamjae Jeon 	/*
23751e9ea7e0SNamjae Jeon 	 * We now have allocated and initialized the mft record.  Calculate the
23761e9ea7e0SNamjae Jeon 	 * index of and the offset within the page cache page the record is in.
23771e9ea7e0SNamjae Jeon 	 */
2378115380f9SNamjae Jeon 	index = NTFS_MFT_NR_TO_PIDX(vol, bit);
2379115380f9SNamjae Jeon 	ofs = NTFS_MFT_NR_TO_POFS(vol, bit);
2380115380f9SNamjae Jeon 	/* Read, map, and pin the folio containing the mft record. */
2381115380f9SNamjae Jeon 	folio = read_mapping_folio(vol->mft_ino->i_mapping, index, NULL);
2382115380f9SNamjae Jeon 	if (IS_ERR(folio)) {
2383115380f9SNamjae Jeon 		ntfs_error(vol->sb, "Failed to map page containing allocated mft record 0x%llx.",
2384115380f9SNamjae Jeon 				bit);
2385115380f9SNamjae Jeon 		err = PTR_ERR(folio);
23861e9ea7e0SNamjae Jeon 		goto undo_mftbmp_alloc;
23871e9ea7e0SNamjae Jeon 	}
2388115380f9SNamjae Jeon 	folio_lock(folio);
2389115380f9SNamjae Jeon 	folio_clear_uptodate(folio);
2390115380f9SNamjae Jeon 	m = (struct mft_record *)((u8 *)kmap_local_folio(folio, 0) + ofs);
23911e9ea7e0SNamjae Jeon 	/* If we just formatted the mft record no need to do it again. */
23921e9ea7e0SNamjae Jeon 	if (!record_formatted) {
23931e9ea7e0SNamjae Jeon 		/* Sanity check that the mft record is really not in use. */
23941e9ea7e0SNamjae Jeon 		if (ntfs_is_file_record(m->magic) &&
23951e9ea7e0SNamjae Jeon 				(m->flags & MFT_RECORD_IN_USE)) {
2396115380f9SNamjae Jeon 			ntfs_warning(vol->sb,
2397115380f9SNamjae Jeon 				"Mft record 0x%llx was marked free in mft bitmap but is marked used itself. Unmount and run chkdsk.",
2398115380f9SNamjae Jeon 				bit);
2399115380f9SNamjae Jeon 			folio_mark_uptodate(folio);
2400115380f9SNamjae Jeon 			folio_unlock(folio);
2401115380f9SNamjae Jeon 			kunmap_local(m);
2402115380f9SNamjae Jeon 			folio_put(folio);
24031e9ea7e0SNamjae Jeon 			NVolSetErrors(vol);
2404115380f9SNamjae Jeon 			goto search_free_rec;
24051e9ea7e0SNamjae Jeon 		}
24061e9ea7e0SNamjae Jeon 		/*
24071e9ea7e0SNamjae Jeon 		 * We need to (re-)format the mft record, preserving the
24081e9ea7e0SNamjae Jeon 		 * sequence number if it is not zero as well as the update
24091e9ea7e0SNamjae Jeon 		 * sequence number if it is not zero or -1 (0xffff).  This
24101e9ea7e0SNamjae Jeon 		 * means we do not need to care whether or not something went
24111e9ea7e0SNamjae Jeon 		 * wrong with the previous mft record.
24121e9ea7e0SNamjae Jeon 		 */
24131e9ea7e0SNamjae Jeon 		seq_no = m->sequence_number;
2414115380f9SNamjae Jeon 		usn = *(__le16 *)((u8 *)m + le16_to_cpu(m->usa_ofs));
24151e9ea7e0SNamjae Jeon 		err = ntfs_mft_record_layout(vol, bit, m);
24161e9ea7e0SNamjae Jeon 		if (unlikely(err)) {
2417115380f9SNamjae Jeon 			ntfs_error(vol->sb, "Failed to layout allocated mft record 0x%llx.",
2418115380f9SNamjae Jeon 					bit);
2419115380f9SNamjae Jeon 			folio_mark_uptodate(folio);
2420115380f9SNamjae Jeon 			folio_unlock(folio);
2421115380f9SNamjae Jeon 			kunmap_local(m);
2422115380f9SNamjae Jeon 			folio_put(folio);
24231e9ea7e0SNamjae Jeon 			goto undo_mftbmp_alloc;
24241e9ea7e0SNamjae Jeon 		}
24251e9ea7e0SNamjae Jeon 		if (seq_no)
24261e9ea7e0SNamjae Jeon 			m->sequence_number = seq_no;
24271e9ea7e0SNamjae Jeon 		if (usn && le16_to_cpu(usn) != 0xffff)
2428115380f9SNamjae Jeon 			*(__le16 *)((u8 *)m + le16_to_cpu(m->usa_ofs)) = usn;
2429115380f9SNamjae Jeon 		pre_write_mst_fixup((struct ntfs_record *)m, vol->mft_record_size);
24301e9ea7e0SNamjae Jeon 	}
24311e9ea7e0SNamjae Jeon 	/* Set the mft record itself in use. */
24321e9ea7e0SNamjae Jeon 	m->flags |= MFT_RECORD_IN_USE;
24331e9ea7e0SNamjae Jeon 	if (S_ISDIR(mode))
24341e9ea7e0SNamjae Jeon 		m->flags |= MFT_RECORD_IS_DIRECTORY;
2435115380f9SNamjae Jeon 	folio_mark_uptodate(folio);
24361e9ea7e0SNamjae Jeon 	if (base_ni) {
2437115380f9SNamjae Jeon 		struct mft_record *m_tmp;
24381e9ea7e0SNamjae Jeon 
24391e9ea7e0SNamjae Jeon 		/*
24401e9ea7e0SNamjae Jeon 		 * Setup the base mft record in the extent mft record.  This
24411e9ea7e0SNamjae Jeon 		 * completes initialization of the allocated extent mft record
24421e9ea7e0SNamjae Jeon 		 * and we can simply use it with map_extent_mft_record().
24431e9ea7e0SNamjae Jeon 		 */
24441e9ea7e0SNamjae Jeon 		m->base_mft_record = MK_LE_MREF(base_ni->mft_no,
24451e9ea7e0SNamjae Jeon 				base_ni->seq_no);
24461e9ea7e0SNamjae Jeon 		/*
24471e9ea7e0SNamjae Jeon 		 * Allocate an extent inode structure for the new mft record,
24481e9ea7e0SNamjae Jeon 		 * attach it to the base inode @base_ni and map, pin, and lock
24491e9ea7e0SNamjae Jeon 		 * its, i.e. the allocated, mft record.
24501e9ea7e0SNamjae Jeon 		 */
2451115380f9SNamjae Jeon 		m_tmp = map_extent_mft_record(base_ni,
2452115380f9SNamjae Jeon 					      MK_MREF(bit, le16_to_cpu(m->sequence_number)),
2453115380f9SNamjae Jeon 					      ni);
24541e9ea7e0SNamjae Jeon 		if (IS_ERR(m_tmp)) {
2455115380f9SNamjae Jeon 			ntfs_error(vol->sb, "Failed to map allocated extent mft record 0x%llx.",
2456115380f9SNamjae Jeon 					bit);
24571e9ea7e0SNamjae Jeon 			err = PTR_ERR(m_tmp);
24581e9ea7e0SNamjae Jeon 			/* Set the mft record itself not in use. */
24591e9ea7e0SNamjae Jeon 			m->flags &= cpu_to_le16(
24601e9ea7e0SNamjae Jeon 					~le16_to_cpu(MFT_RECORD_IN_USE));
24611e9ea7e0SNamjae Jeon 			/* Make sure the mft record is written out to disk. */
2462115380f9SNamjae Jeon 			ntfs_mft_mark_dirty(folio);
2463115380f9SNamjae Jeon 			folio_unlock(folio);
2464115380f9SNamjae Jeon 			kunmap_local(m);
2465115380f9SNamjae Jeon 			folio_put(folio);
24661e9ea7e0SNamjae Jeon 			goto undo_mftbmp_alloc;
24671e9ea7e0SNamjae Jeon 		}
2468115380f9SNamjae Jeon 
24691e9ea7e0SNamjae Jeon 		/*
24701e9ea7e0SNamjae Jeon 		 * Make sure the allocated mft record is written out to disk.
24711e9ea7e0SNamjae Jeon 		 * No need to set the inode dirty because the caller is going
24721e9ea7e0SNamjae Jeon 		 * to do that anyway after finishing with the new extent mft
24731e9ea7e0SNamjae Jeon 		 * record (e.g. at a minimum a new attribute will be added to
24741e9ea7e0SNamjae Jeon 		 * the mft record.
24751e9ea7e0SNamjae Jeon 		 */
2476115380f9SNamjae Jeon 		ntfs_mft_mark_dirty(folio);
2477115380f9SNamjae Jeon 		folio_unlock(folio);
24781e9ea7e0SNamjae Jeon 		/*
24791e9ea7e0SNamjae Jeon 		 * Need to unmap the page since map_extent_mft_record() mapped
24801e9ea7e0SNamjae Jeon 		 * it as well so we have it mapped twice at the moment.
24811e9ea7e0SNamjae Jeon 		 */
2482115380f9SNamjae Jeon 		kunmap_local(m);
2483115380f9SNamjae Jeon 		folio_put(folio);
24841e9ea7e0SNamjae Jeon 	} else {
24851e9ea7e0SNamjae Jeon 		/*
24861e9ea7e0SNamjae Jeon 		 * Manually map, pin, and lock the mft record as we already
24871e9ea7e0SNamjae Jeon 		 * have its page mapped and it is very easy to do.
24881e9ea7e0SNamjae Jeon 		 */
2489115380f9SNamjae Jeon 		(*ni)->seq_no = le16_to_cpu(m->sequence_number);
24901e9ea7e0SNamjae Jeon 		/*
24911e9ea7e0SNamjae Jeon 		 * Make sure the allocated mft record is written out to disk.
24921e9ea7e0SNamjae Jeon 		 * NOTE: We do not set the ntfs inode dirty because this would
24931e9ea7e0SNamjae Jeon 		 * fail in ntfs_write_inode() because the inode does not have a
24941e9ea7e0SNamjae Jeon 		 * standard information attribute yet.  Also, there is no need
24951e9ea7e0SNamjae Jeon 		 * to set the inode dirty because the caller is going to do
24961e9ea7e0SNamjae Jeon 		 * that anyway after finishing with the new mft record (e.g. at
24971e9ea7e0SNamjae Jeon 		 * a minimum some new attributes will be added to the mft
24981e9ea7e0SNamjae Jeon 		 * record.
24991e9ea7e0SNamjae Jeon 		 */
25001e9ea7e0SNamjae Jeon 
2501115380f9SNamjae Jeon 		(*ni)->mrec = kmalloc(vol->mft_record_size, GFP_NOFS);
2502115380f9SNamjae Jeon 		if (!(*ni)->mrec) {
2503115380f9SNamjae Jeon 			folio_unlock(folio);
2504115380f9SNamjae Jeon 			kunmap_local(m);
2505115380f9SNamjae Jeon 			folio_put(folio);
2506115380f9SNamjae Jeon 			goto undo_mftbmp_alloc;
2507115380f9SNamjae Jeon 		}
25081e9ea7e0SNamjae Jeon 
2509115380f9SNamjae Jeon 		memcpy((*ni)->mrec, m, vol->mft_record_size);
2510115380f9SNamjae Jeon 		post_read_mst_fixup((struct ntfs_record *)(*ni)->mrec, vol->mft_record_size);
2511115380f9SNamjae Jeon 		ntfs_mft_mark_dirty(folio);
2512115380f9SNamjae Jeon 		folio_unlock(folio);
2513115380f9SNamjae Jeon 		(*ni)->folio = folio;
2514115380f9SNamjae Jeon 		(*ni)->folio_ofs = ofs;
2515115380f9SNamjae Jeon 		atomic_inc(&(*ni)->count);
25161e9ea7e0SNamjae Jeon 		/* Update the default mft allocation position. */
25171e9ea7e0SNamjae Jeon 		vol->mft_data_pos = bit + 1;
25181e9ea7e0SNamjae Jeon 	}
2519115380f9SNamjae Jeon 	if (!base_ni || base_ni->mft_no != FILE_MFT)
2520115380f9SNamjae Jeon 		mutex_unlock(&mft_ni->mrec_lock);
2521115380f9SNamjae Jeon 	memalloc_nofs_restore(memalloc_flags);
2522115380f9SNamjae Jeon 
25231e9ea7e0SNamjae Jeon 	/*
25241e9ea7e0SNamjae Jeon 	 * Return the opened, allocated inode of the allocated mft record as
25251e9ea7e0SNamjae Jeon 	 * well as the mapped, pinned, and locked mft record.
25261e9ea7e0SNamjae Jeon 	 */
25271e9ea7e0SNamjae Jeon 	ntfs_debug("Returning opened, allocated %sinode 0x%llx.",
2528115380f9SNamjae Jeon 			base_ni ? "extent " : "", bit);
2529115380f9SNamjae Jeon 	(*ni)->mft_no = bit;
2530115380f9SNamjae Jeon 	if (ni_mrec)
2531115380f9SNamjae Jeon 		*ni_mrec = (*ni)->mrec;
2532115380f9SNamjae Jeon 	ntfs_dec_free_mft_records(vol, 1);
2533115380f9SNamjae Jeon 	return 0;
25341e9ea7e0SNamjae Jeon undo_data_init:
25351e9ea7e0SNamjae Jeon 	write_lock_irqsave(&mft_ni->size_lock, flags);
25361e9ea7e0SNamjae Jeon 	mft_ni->initialized_size = old_data_initialized;
25371e9ea7e0SNamjae Jeon 	i_size_write(vol->mft_ino, old_data_size);
25381e9ea7e0SNamjae Jeon 	write_unlock_irqrestore(&mft_ni->size_lock, flags);
25391e9ea7e0SNamjae Jeon 	goto undo_mftbmp_alloc_nolock;
25401e9ea7e0SNamjae Jeon undo_mftbmp_alloc:
2541115380f9SNamjae Jeon 	if (!base_ni || base_ni->mft_no != FILE_MFT)
25421e9ea7e0SNamjae Jeon 		down_write(&vol->mftbmp_lock);
25431e9ea7e0SNamjae Jeon undo_mftbmp_alloc_nolock:
25441e9ea7e0SNamjae Jeon 	if (ntfs_bitmap_clear_bit(vol->mftbmp_ino, bit)) {
25451e9ea7e0SNamjae Jeon 		ntfs_error(vol->sb, "Failed to clear bit in mft bitmap.%s", es);
25461e9ea7e0SNamjae Jeon 		NVolSetErrors(vol);
25471e9ea7e0SNamjae Jeon 	}
2548115380f9SNamjae Jeon 	if (!base_ni || base_ni->mft_no != FILE_MFT)
25491e9ea7e0SNamjae Jeon 		up_write(&vol->mftbmp_lock);
25501e9ea7e0SNamjae Jeon err_out:
2551115380f9SNamjae Jeon 	if (!base_ni || base_ni->mft_no != FILE_MFT)
2552115380f9SNamjae Jeon 		mutex_unlock(&mft_ni->mrec_lock);
2553115380f9SNamjae Jeon 	memalloc_nofs_restore(memalloc_flags);
2554115380f9SNamjae Jeon 	return err;
25551e9ea7e0SNamjae Jeon max_err_out:
2556115380f9SNamjae Jeon 	ntfs_warning(vol->sb,
2557115380f9SNamjae Jeon 		"Cannot allocate mft record because the maximum number of inodes (2^32) has already been reached.");
2558115380f9SNamjae Jeon 	if (!base_ni || base_ni->mft_no != FILE_MFT) {
25591e9ea7e0SNamjae Jeon 		up_write(&vol->mftbmp_lock);
2560115380f9SNamjae Jeon 		mutex_unlock(&mft_ni->mrec_lock);
25611e9ea7e0SNamjae Jeon 	}
2562115380f9SNamjae Jeon 	memalloc_nofs_restore(memalloc_flags);
2563115380f9SNamjae Jeon 	return -ENOSPC;
25641e9ea7e0SNamjae Jeon }
25651e9ea7e0SNamjae Jeon 
25661e9ea7e0SNamjae Jeon /*
2567115380f9SNamjae Jeon  * ntfs_mft_record_free - free an mft record on an ntfs volume
2568115380f9SNamjae Jeon  * @vol:	volume on which to free the mft record
2569115380f9SNamjae Jeon  * @ni:		open ntfs inode of the mft record to free
2570115380f9SNamjae Jeon  *
2571115380f9SNamjae Jeon  * Free the mft record of the open inode @ni on the mounted ntfs volume @vol.
2572115380f9SNamjae Jeon  * Note that this function calls ntfs_inode_close() internally and hence you
2573115380f9SNamjae Jeon  * cannot use the pointer @ni any more after this function returns success.
2574115380f9SNamjae Jeon  *
2575115380f9SNamjae Jeon  * On success return 0 and on error return -1 with errno set to the error code.
25761e9ea7e0SNamjae Jeon  */
2577115380f9SNamjae Jeon int ntfs_mft_record_free(struct ntfs_volume *vol, struct ntfs_inode *ni)
2578115380f9SNamjae Jeon {
2579115380f9SNamjae Jeon 	u64 mft_no;
2580115380f9SNamjae Jeon 	int err;
2581115380f9SNamjae Jeon 	u16 seq_no;
2582115380f9SNamjae Jeon 	__le16 old_seq_no;
2583115380f9SNamjae Jeon 	struct mft_record *ni_mrec;
2584115380f9SNamjae Jeon 	unsigned int memalloc_flags;
2585115380f9SNamjae Jeon 	struct ntfs_inode *base_ni;
2586115380f9SNamjae Jeon 
2587115380f9SNamjae Jeon 	if (!vol || !ni)
2588115380f9SNamjae Jeon 		return -EINVAL;
2589115380f9SNamjae Jeon 
2590115380f9SNamjae Jeon 	ntfs_debug("Entering for inode 0x%llx.\n", (long long)ni->mft_no);
2591115380f9SNamjae Jeon 
2592115380f9SNamjae Jeon 	ni_mrec = map_mft_record(ni);
2593115380f9SNamjae Jeon 	if (IS_ERR(ni_mrec))
2594115380f9SNamjae Jeon 		return -EIO;
2595115380f9SNamjae Jeon 
2596115380f9SNamjae Jeon 	/* Cache the mft reference for later. */
2597115380f9SNamjae Jeon 	mft_no = ni->mft_no;
25981e9ea7e0SNamjae Jeon 
25991e9ea7e0SNamjae Jeon 	/* Mark the mft record as not in use. */
2600115380f9SNamjae Jeon 	ni_mrec->flags &= ~MFT_RECORD_IN_USE;
26011e9ea7e0SNamjae Jeon 
26021e9ea7e0SNamjae Jeon 	/* Increment the sequence number, skipping zero, if it is not zero. */
2603115380f9SNamjae Jeon 	old_seq_no = ni_mrec->sequence_number;
26041e9ea7e0SNamjae Jeon 	seq_no = le16_to_cpu(old_seq_no);
26051e9ea7e0SNamjae Jeon 	if (seq_no == 0xffff)
26061e9ea7e0SNamjae Jeon 		seq_no = 1;
26071e9ea7e0SNamjae Jeon 	else if (seq_no)
26081e9ea7e0SNamjae Jeon 		seq_no++;
2609115380f9SNamjae Jeon 	ni_mrec->sequence_number = cpu_to_le16(seq_no);
2610115380f9SNamjae Jeon 
2611115380f9SNamjae Jeon 	down_read(&NTFS_I(vol->mft_ino)->runlist.lock);
2612115380f9SNamjae Jeon 	err = ntfs_get_block_mft_record(NTFS_I(vol->mft_ino), ni);
2613115380f9SNamjae Jeon 	up_read(&NTFS_I(vol->mft_ino)->runlist.lock);
2614115380f9SNamjae Jeon 	if (err) {
2615115380f9SNamjae Jeon 		unmap_mft_record(ni);
2616115380f9SNamjae Jeon 		return err;
2617115380f9SNamjae Jeon 	}
26181e9ea7e0SNamjae Jeon 
26191e9ea7e0SNamjae Jeon 	/*
26201e9ea7e0SNamjae Jeon 	 * Set the ntfs inode dirty and write it out.  We do not need to worry
26211e9ea7e0SNamjae Jeon 	 * about the base inode here since whatever caused the extent mft
26221e9ea7e0SNamjae Jeon 	 * record to be freed is guaranteed to do it already.
26231e9ea7e0SNamjae Jeon 	 */
26241e9ea7e0SNamjae Jeon 	NInoSetDirty(ni);
2625115380f9SNamjae Jeon 	err = write_mft_record(ni, ni_mrec, 0);
2626115380f9SNamjae Jeon 	if (err)
2627115380f9SNamjae Jeon 		goto sync_rollback;
2628115380f9SNamjae Jeon 
2629115380f9SNamjae Jeon 	if (likely(ni->nr_extents >= 0))
2630115380f9SNamjae Jeon 		base_ni = ni;
2631115380f9SNamjae Jeon 	else
2632115380f9SNamjae Jeon 		base_ni = ni->ext.base_ntfs_ino;
26331e9ea7e0SNamjae Jeon 
26341e9ea7e0SNamjae Jeon 	/* Clear the bit in the $MFT/$BITMAP corresponding to this record. */
2635115380f9SNamjae Jeon 	memalloc_flags = memalloc_nofs_save();
2636115380f9SNamjae Jeon 	if (base_ni->mft_no != FILE_MFT)
26371e9ea7e0SNamjae Jeon 		down_write(&vol->mftbmp_lock);
26381e9ea7e0SNamjae Jeon 	err = ntfs_bitmap_clear_bit(vol->mftbmp_ino, mft_no);
2639115380f9SNamjae Jeon 	if (base_ni->mft_no != FILE_MFT)
26401e9ea7e0SNamjae Jeon 		up_write(&vol->mftbmp_lock);
2641115380f9SNamjae Jeon 	memalloc_nofs_restore(memalloc_flags);
2642115380f9SNamjae Jeon 	if (err)
2643115380f9SNamjae Jeon 		goto bitmap_rollback;
26441e9ea7e0SNamjae Jeon 
2645115380f9SNamjae Jeon 	unmap_mft_record(ni);
2646115380f9SNamjae Jeon 	ntfs_inc_free_mft_records(vol, 1);
2647115380f9SNamjae Jeon 	return 0;
2648115380f9SNamjae Jeon 
2649115380f9SNamjae Jeon 	/* Rollback what we did... */
2650115380f9SNamjae Jeon bitmap_rollback:
2651115380f9SNamjae Jeon 	memalloc_flags = memalloc_nofs_save();
2652115380f9SNamjae Jeon 	if (base_ni->mft_no != FILE_MFT)
2653115380f9SNamjae Jeon 		down_write(&vol->mftbmp_lock);
2654115380f9SNamjae Jeon 	if (ntfs_bitmap_set_bit(vol->mftbmp_ino, mft_no))
2655115380f9SNamjae Jeon 		ntfs_error(vol->sb, "ntfs_bitmap_set_bit failed in bitmap_rollback\n");
2656115380f9SNamjae Jeon 	if (base_ni->mft_no != FILE_MFT)
2657115380f9SNamjae Jeon 		up_write(&vol->mftbmp_lock);
2658115380f9SNamjae Jeon 	memalloc_nofs_restore(memalloc_flags);
2659115380f9SNamjae Jeon sync_rollback:
2660115380f9SNamjae Jeon 	ntfs_error(vol->sb,
2661115380f9SNamjae Jeon 		"Eeek! Rollback failed in %s. Leaving inconsistent metadata!\n", __func__);
2662115380f9SNamjae Jeon 	ni_mrec->flags |= MFT_RECORD_IN_USE;
2663115380f9SNamjae Jeon 	ni_mrec->sequence_number = old_seq_no;
2664115380f9SNamjae Jeon 	NInoSetDirty(ni);
2665115380f9SNamjae Jeon 	write_mft_record(ni, ni_mrec, 0);
2666115380f9SNamjae Jeon 	unmap_mft_record(ni);
26671e9ea7e0SNamjae Jeon 	return err;
26681e9ea7e0SNamjae Jeon }
2669115380f9SNamjae Jeon 
2670115380f9SNamjae Jeon static s64 lcn_from_index(struct ntfs_volume *vol, struct ntfs_inode *ni,
2671115380f9SNamjae Jeon 		unsigned long index)
2672115380f9SNamjae Jeon {
2673115380f9SNamjae Jeon 	s64 vcn;
2674115380f9SNamjae Jeon 	s64 lcn;
2675115380f9SNamjae Jeon 
2676115380f9SNamjae Jeon 	vcn = ntfs_pidx_to_cluster(vol, index);
2677115380f9SNamjae Jeon 
2678115380f9SNamjae Jeon 	down_read(&ni->runlist.lock);
2679115380f9SNamjae Jeon 	lcn = ntfs_attr_vcn_to_lcn_nolock(ni, vcn, false);
2680115380f9SNamjae Jeon 	up_read(&ni->runlist.lock);
2681115380f9SNamjae Jeon 
2682115380f9SNamjae Jeon 	return lcn;
2683115380f9SNamjae Jeon }
2684115380f9SNamjae Jeon 
2685115380f9SNamjae Jeon /*
2686115380f9SNamjae Jeon  * ntfs_write_mft_block - Write back a folio containing MFT records
2687115380f9SNamjae Jeon  * @folio:	The folio to write back (contains one or more MFT records)
2688115380f9SNamjae Jeon  * @wbc:	Writeback control structure
2689115380f9SNamjae Jeon  *
2690115380f9SNamjae Jeon  * This function is called as part of the address_space_operations
2691115380f9SNamjae Jeon  * .writepages implementation for the $MFT inode (or $MFTMirr).
2692115380f9SNamjae Jeon  * It handles writing one folio (normally 4KiB page) worth of MFT records
2693115380f9SNamjae Jeon  * to the underlying block device.
2694115380f9SNamjae Jeon  *
2695115380f9SNamjae Jeon  * Return: 0 on success, or -errno on error.
2696115380f9SNamjae Jeon  */
2697115380f9SNamjae Jeon static int ntfs_write_mft_block(struct folio *folio, struct writeback_control *wbc)
2698115380f9SNamjae Jeon {
2699115380f9SNamjae Jeon 	struct address_space *mapping = folio->mapping;
2700115380f9SNamjae Jeon 	struct inode *vi = mapping->host;
2701115380f9SNamjae Jeon 	struct ntfs_inode *ni = NTFS_I(vi);
2702115380f9SNamjae Jeon 	struct ntfs_volume *vol = ni->vol;
2703115380f9SNamjae Jeon 	u8 *kaddr;
2704f462fdf3SArnd Bergmann 	struct ntfs_inode **locked_nis __free(kfree) = kmalloc_array(PAGE_SIZE / NTFS_BLOCK_SIZE,
2705f462fdf3SArnd Bergmann 							sizeof(struct ntfs_inode *), GFP_NOFS);
2706115380f9SNamjae Jeon 	int nr_locked_nis = 0, err = 0, mft_ofs, prev_mft_ofs;
2707f462fdf3SArnd Bergmann 	struct inode **ref_inos __free(kfree) = kmalloc_array(PAGE_SIZE / NTFS_BLOCK_SIZE,
2708f462fdf3SArnd Bergmann 							      sizeof(struct inode *), GFP_NOFS);
2709115380f9SNamjae Jeon 	int nr_ref_inos = 0;
2710115380f9SNamjae Jeon 	struct bio *bio = NULL;
2711d9038d99SNamjae Jeon 	u64 mft_no;
2712115380f9SNamjae Jeon 	struct ntfs_inode *tni;
2713115380f9SNamjae Jeon 	s64 lcn;
2714115380f9SNamjae Jeon 	s64 vcn = ntfs_pidx_to_cluster(vol, folio->index);
2715115380f9SNamjae Jeon 	s64 end_vcn = ntfs_bytes_to_cluster(vol, ni->allocated_size);
2716115380f9SNamjae Jeon 	unsigned int folio_sz;
2717115380f9SNamjae Jeon 	struct runlist_element *rl;
2718115380f9SNamjae Jeon 	loff_t i_size = i_size_read(vi);
2719115380f9SNamjae Jeon 
2720e7d82353SNamjae Jeon 	ntfs_debug("Entering for inode 0x%llx, attribute type 0x%x, folio index 0x%lx.",
2721e7d82353SNamjae Jeon 			ni->mft_no, ni->type, folio->index);
2722115380f9SNamjae Jeon 
2723f462fdf3SArnd Bergmann 	if (!locked_nis || !ref_inos)
2724f462fdf3SArnd Bergmann 		return -ENOMEM;
2725f462fdf3SArnd Bergmann 
2726115380f9SNamjae Jeon 	/* We have to zero every time due to mmap-at-end-of-file. */
2727115380f9SNamjae Jeon 	if (folio->index >= (i_size >> folio_shift(folio)))
2728115380f9SNamjae Jeon 		/* The page straddles i_size. */
2729115380f9SNamjae Jeon 		folio_zero_segment(folio,
2730115380f9SNamjae Jeon 				   offset_in_folio(folio, i_size),
2731115380f9SNamjae Jeon 				   folio_size(folio));
2732115380f9SNamjae Jeon 
2733115380f9SNamjae Jeon 	lcn = lcn_from_index(vol, ni, folio->index);
2734115380f9SNamjae Jeon 	if (lcn <= LCN_HOLE) {
2735115380f9SNamjae Jeon 		folio_start_writeback(folio);
2736115380f9SNamjae Jeon 		folio_unlock(folio);
2737115380f9SNamjae Jeon 		folio_end_writeback(folio);
2738115380f9SNamjae Jeon 		return -EIO;
2739115380f9SNamjae Jeon 	}
2740115380f9SNamjae Jeon 
2741115380f9SNamjae Jeon 	/* Map folio so we can access its contents. */
2742115380f9SNamjae Jeon 	kaddr = kmap_local_folio(folio, 0);
2743115380f9SNamjae Jeon 	/* Clear the page uptodate flag whilst the mst fixups are applied. */
2744115380f9SNamjae Jeon 	folio_clear_uptodate(folio);
2745115380f9SNamjae Jeon 
2746115380f9SNamjae Jeon 	for (mft_ofs = 0; mft_ofs < PAGE_SIZE && vcn < end_vcn;
2747115380f9SNamjae Jeon 	     mft_ofs += vol->mft_record_size) {
2748115380f9SNamjae Jeon 		/* Get the mft record number. */
2749115380f9SNamjae Jeon 		mft_no = (((s64)folio->index << PAGE_SHIFT) + mft_ofs) >>
2750115380f9SNamjae Jeon 			vol->mft_record_size_bits;
2751115380f9SNamjae Jeon 		vcn = ntfs_mft_no_to_cluster(vol, mft_no);
2752115380f9SNamjae Jeon 		/* Check whether to write this mft record. */
2753115380f9SNamjae Jeon 		tni = NULL;
2754115380f9SNamjae Jeon 		if (ntfs_may_write_mft_record(vol, mft_no,
2755115380f9SNamjae Jeon 					(struct mft_record *)(kaddr + mft_ofs),
2756115380f9SNamjae Jeon 					&tni, &ref_inos[nr_ref_inos])) {
2757115380f9SNamjae Jeon 			unsigned int mft_record_off = 0;
2758115380f9SNamjae Jeon 			s64 vcn_off = vcn;
2759115380f9SNamjae Jeon 
2760115380f9SNamjae Jeon 			/*
2761115380f9SNamjae Jeon 			 * Skip $MFT extent mft records and let them being written
2762115380f9SNamjae Jeon 			 * by writeback to avioid deadlocks. the $MFT runlist
2763115380f9SNamjae Jeon 			 * lock must be taken before $MFT extent mrec_lock is taken.
2764115380f9SNamjae Jeon 			 */
2765115380f9SNamjae Jeon 			if (tni && tni->nr_extents < 0 &&
2766115380f9SNamjae Jeon 				tni->ext.base_ntfs_ino == NTFS_I(vol->mft_ino)) {
2767115380f9SNamjae Jeon 				mutex_unlock(&tni->mrec_lock);
2768115380f9SNamjae Jeon 				atomic_dec(&tni->count);
2769115380f9SNamjae Jeon 				iput(vol->mft_ino);
2770115380f9SNamjae Jeon 				continue;
2771115380f9SNamjae Jeon 			}
2772115380f9SNamjae Jeon 
2773115380f9SNamjae Jeon 			/*
2774115380f9SNamjae Jeon 			 * The record should be written.  If a locked ntfs
2775115380f9SNamjae Jeon 			 * inode was returned, add it to the array of locked
2776115380f9SNamjae Jeon 			 * ntfs inodes.
2777115380f9SNamjae Jeon 			 */
2778115380f9SNamjae Jeon 			if (tni)
2779115380f9SNamjae Jeon 				locked_nis[nr_locked_nis++] = tni;
2780115380f9SNamjae Jeon 			else if (ref_inos[nr_ref_inos])
2781115380f9SNamjae Jeon 				nr_ref_inos++;
2782115380f9SNamjae Jeon 
2783115380f9SNamjae Jeon 			if (bio && (mft_ofs != prev_mft_ofs + vol->mft_record_size)) {
2784115380f9SNamjae Jeon flush_bio:
2785115380f9SNamjae Jeon 				bio->bi_end_io = ntfs_bio_end_io;
2786115380f9SNamjae Jeon 				submit_bio(bio);
2787115380f9SNamjae Jeon 				bio = NULL;
2788115380f9SNamjae Jeon 			}
2789115380f9SNamjae Jeon 
2790115380f9SNamjae Jeon 			if (vol->cluster_size < folio_size(folio)) {
2791115380f9SNamjae Jeon 				down_write(&ni->runlist.lock);
2792115380f9SNamjae Jeon 				rl = ntfs_attr_vcn_to_rl(ni, vcn_off, &lcn);
2793115380f9SNamjae Jeon 				up_write(&ni->runlist.lock);
2794115380f9SNamjae Jeon 				if (IS_ERR(rl) || lcn < 0) {
2795115380f9SNamjae Jeon 					err = -EIO;
2796115380f9SNamjae Jeon 					goto unm_done;
2797115380f9SNamjae Jeon 				}
2798115380f9SNamjae Jeon 
2799115380f9SNamjae Jeon 				if (bio &&
2800115380f9SNamjae Jeon 				   (bio_end_sector(bio) >> (vol->cluster_size_bits - 9)) !=
2801115380f9SNamjae Jeon 				    lcn) {
2802115380f9SNamjae Jeon 					bio->bi_end_io = ntfs_bio_end_io;
2803115380f9SNamjae Jeon 					submit_bio(bio);
2804115380f9SNamjae Jeon 					bio = NULL;
2805115380f9SNamjae Jeon 				}
2806115380f9SNamjae Jeon 			}
2807115380f9SNamjae Jeon 
2808115380f9SNamjae Jeon 			if (!bio) {
2809115380f9SNamjae Jeon 				unsigned int off;
2810115380f9SNamjae Jeon 
2811115380f9SNamjae Jeon 				off = ((mft_no << vol->mft_record_size_bits) +
2812115380f9SNamjae Jeon 				       mft_record_off) & vol->cluster_size_mask;
2813115380f9SNamjae Jeon 
2814115380f9SNamjae Jeon 				bio = bio_alloc(vol->sb->s_bdev, 1, REQ_OP_WRITE,
2815115380f9SNamjae Jeon 						GFP_NOIO);
2816115380f9SNamjae Jeon 				bio->bi_iter.bi_sector =
2817115380f9SNamjae Jeon 					ntfs_bytes_to_sector(vol,
2818115380f9SNamjae Jeon 							ntfs_cluster_to_bytes(vol, lcn) + off);
2819115380f9SNamjae Jeon 			}
2820115380f9SNamjae Jeon 
2821115380f9SNamjae Jeon 			if (vol->cluster_size == NTFS_BLOCK_SIZE &&
2822115380f9SNamjae Jeon 			    (mft_record_off ||
2823115380f9SNamjae Jeon 			     rl->length - (vcn_off - rl->vcn) == 1 ||
2824115380f9SNamjae Jeon 			     mft_ofs + NTFS_BLOCK_SIZE >= PAGE_SIZE))
2825115380f9SNamjae Jeon 				folio_sz = NTFS_BLOCK_SIZE;
2826115380f9SNamjae Jeon 			else
2827115380f9SNamjae Jeon 				folio_sz = vol->mft_record_size;
2828115380f9SNamjae Jeon 			if (!bio_add_folio(bio, folio, folio_sz,
2829115380f9SNamjae Jeon 					   mft_ofs + mft_record_off)) {
2830115380f9SNamjae Jeon 				err = -EIO;
2831115380f9SNamjae Jeon 				bio_put(bio);
2832115380f9SNamjae Jeon 				goto unm_done;
2833115380f9SNamjae Jeon 			}
2834115380f9SNamjae Jeon 			mft_record_off += folio_sz;
2835115380f9SNamjae Jeon 
2836115380f9SNamjae Jeon 			if (mft_record_off != vol->mft_record_size) {
2837115380f9SNamjae Jeon 				vcn_off++;
2838115380f9SNamjae Jeon 				goto flush_bio;
2839115380f9SNamjae Jeon 			}
2840115380f9SNamjae Jeon 			prev_mft_ofs = mft_ofs;
2841115380f9SNamjae Jeon 
2842115380f9SNamjae Jeon 			if (mft_no < vol->mftmirr_size)
2843115380f9SNamjae Jeon 				ntfs_sync_mft_mirror(vol, mft_no,
2844115380f9SNamjae Jeon 						(struct mft_record *)(kaddr + mft_ofs));
2845115380f9SNamjae Jeon 		} else if (ref_inos[nr_ref_inos])
2846115380f9SNamjae Jeon 			nr_ref_inos++;
2847115380f9SNamjae Jeon 	}
2848115380f9SNamjae Jeon 
2849115380f9SNamjae Jeon 	if (bio) {
2850115380f9SNamjae Jeon 		bio->bi_end_io = ntfs_bio_end_io;
2851115380f9SNamjae Jeon 		submit_bio(bio);
2852115380f9SNamjae Jeon 	}
2853115380f9SNamjae Jeon unm_done:
2854115380f9SNamjae Jeon 	folio_mark_uptodate(folio);
2855115380f9SNamjae Jeon 	kunmap_local(kaddr);
2856115380f9SNamjae Jeon 
2857115380f9SNamjae Jeon 	folio_start_writeback(folio);
2858115380f9SNamjae Jeon 	folio_unlock(folio);
2859115380f9SNamjae Jeon 	folio_end_writeback(folio);
2860115380f9SNamjae Jeon 
2861115380f9SNamjae Jeon 	/* Unlock any locked inodes. */
2862115380f9SNamjae Jeon 	while (nr_locked_nis-- > 0) {
2863115380f9SNamjae Jeon 		struct ntfs_inode *base_tni;
2864115380f9SNamjae Jeon 
2865115380f9SNamjae Jeon 		tni = locked_nis[nr_locked_nis];
2866115380f9SNamjae Jeon 		mutex_unlock(&tni->mrec_lock);
2867115380f9SNamjae Jeon 
2868115380f9SNamjae Jeon 		/* Get the base inode. */
2869115380f9SNamjae Jeon 		mutex_lock(&tni->extent_lock);
2870115380f9SNamjae Jeon 		if (tni->nr_extents >= 0)
2871115380f9SNamjae Jeon 			base_tni = tni;
2872115380f9SNamjae Jeon 		else
2873115380f9SNamjae Jeon 			base_tni = tni->ext.base_ntfs_ino;
2874115380f9SNamjae Jeon 		mutex_unlock(&tni->extent_lock);
2875d9038d99SNamjae Jeon 		ntfs_debug("Unlocking %s inode 0x%llx.",
2876115380f9SNamjae Jeon 				tni == base_tni ? "base" : "extent",
2877115380f9SNamjae Jeon 				tni->mft_no);
2878115380f9SNamjae Jeon 		atomic_dec(&tni->count);
2879115380f9SNamjae Jeon 		iput(VFS_I(base_tni));
2880115380f9SNamjae Jeon 	}
2881115380f9SNamjae Jeon 
2882115380f9SNamjae Jeon 	/* Dropping deferred references */
2883115380f9SNamjae Jeon 	while (nr_ref_inos-- > 0) {
2884115380f9SNamjae Jeon 		if (ref_inos[nr_ref_inos])
2885115380f9SNamjae Jeon 			iput(ref_inos[nr_ref_inos]);
2886115380f9SNamjae Jeon 	}
2887115380f9SNamjae Jeon 
2888115380f9SNamjae Jeon 	if (unlikely(err && err != -ENOMEM))
2889115380f9SNamjae Jeon 		NVolSetErrors(vol);
2890115380f9SNamjae Jeon 	if (likely(!err))
2891115380f9SNamjae Jeon 		ntfs_debug("Done.");
2892115380f9SNamjae Jeon 	return err;
2893115380f9SNamjae Jeon }
2894115380f9SNamjae Jeon 
2895115380f9SNamjae Jeon /*
2896115380f9SNamjae Jeon  * ntfs_mft_writepages - Write back dirty folios for the $MFT inode
2897115380f9SNamjae Jeon  * @mapping:	address space of the $MFT inode
2898115380f9SNamjae Jeon  * @wbc:	writeback control
2899115380f9SNamjae Jeon  *
2900115380f9SNamjae Jeon  * Writeback iterator for MFT records. Iterates over dirty folios and
2901115380f9SNamjae Jeon  * delegates actual writing to ntfs_write_mft_block() for each folio.
2902115380f9SNamjae Jeon  * Called from the address_space_operations .writepages vector of the
2903115380f9SNamjae Jeon  * $MFT inode.
2904115380f9SNamjae Jeon  *
2905115380f9SNamjae Jeon  * Returns 0 on success, or the first error encountered.
2906115380f9SNamjae Jeon  */
2907115380f9SNamjae Jeon int ntfs_mft_writepages(struct address_space *mapping,
2908115380f9SNamjae Jeon 		struct writeback_control *wbc)
2909115380f9SNamjae Jeon {
2910115380f9SNamjae Jeon 	struct folio *folio = NULL;
2911115380f9SNamjae Jeon 	int error;
2912115380f9SNamjae Jeon 
2913115380f9SNamjae Jeon 	if (NVolShutdown(NTFS_I(mapping->host)->vol))
2914115380f9SNamjae Jeon 		return -EIO;
2915115380f9SNamjae Jeon 
2916115380f9SNamjae Jeon 	while ((folio = writeback_iter(mapping, wbc, folio, &error)))
2917115380f9SNamjae Jeon 		error = ntfs_write_mft_block(folio, wbc);
2918115380f9SNamjae Jeon 	return error;
2919115380f9SNamjae Jeon }
2920115380f9SNamjae Jeon 
2921115380f9SNamjae Jeon void ntfs_mft_mark_dirty(struct folio *folio)
2922115380f9SNamjae Jeon {
2923115380f9SNamjae Jeon 	iomap_dirty_folio(folio->mapping, folio);
2924115380f9SNamjae Jeon }
2925