xref: /linux/fs/ntfs/inode.c (revision a73258681279bceb4e9210d86204bae14d3ea795)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NTFS kernel inode handling.
4  *
5  * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
6  * Copyright (c) 2025 LG Electronics Co., Ltd.
7  */
8 
9 #include <linux/writeback.h>
10 #include <linux/seq_file.h>
11 
12 #include "lcnalloc.h"
13 #include "time.h"
14 #include "ntfs.h"
15 #include "index.h"
16 #include "attrlist.h"
17 #include "reparse.h"
18 #include "ea.h"
19 #include "attrib.h"
20 #include "iomap.h"
21 #include "object_id.h"
22 
23 /*
24  * ntfs_test_inode - compare two (possibly fake) inodes for equality
25  * @vi:		vfs inode which to test
26  * @data:	data which is being tested with
27  *
28  * Compare the ntfs attribute embedded in the ntfs specific part of the vfs
29  * inode @vi for equality with the ntfs attribute @data.
30  *
31  * If searching for the normal file/directory inode, set @na->type to AT_UNUSED.
32  * @na->name and @na->name_len are then ignored.
33  *
34  * Return 1 if the attributes match and 0 if not.
35  *
36  * NOTE: This function runs with the inode_hash_lock spin lock held so it is not
37  * allowed to sleep.
38  */
39 int ntfs_test_inode(struct inode *vi, void *data)
40 {
41 	struct ntfs_attr *na = data;
42 	struct ntfs_inode *ni = NTFS_I(vi);
43 
44 	if (vi->i_ino != na->mft_no)
45 		return 0;
46 
47 	/* If !NInoAttr(ni), @vi is a normal file or directory inode. */
48 	if (likely(!NInoAttr(ni))) {
49 		/* If not looking for a normal inode this is a mismatch. */
50 		if (unlikely(na->type != AT_UNUSED))
51 			return 0;
52 	} else {
53 		/* A fake inode describing an attribute. */
54 		if (ni->type != na->type)
55 			return 0;
56 		if (ni->name_len != na->name_len)
57 			return 0;
58 		if (na->name_len && memcmp(ni->name, na->name,
59 				na->name_len * sizeof(__le16)))
60 			return 0;
61 		if (!ni->ext.base_ntfs_ino)
62 			return 0;
63 	}
64 
65 	/* Match! */
66 	return 1;
67 }
68 
69 /*
70  * ntfs_init_locked_inode - initialize an inode
71  * @vi:		vfs inode to initialize
72  * @data:	data which to initialize @vi to
73  *
74  * Initialize the vfs inode @vi with the values from the ntfs attribute @data in
75  * order to enable ntfs_test_inode() to do its work.
76  *
77  * If initializing the normal file/directory inode, set @na->type to AT_UNUSED.
78  * In that case, @na->name and @na->name_len should be set to NULL and 0,
79  * respectively. Although that is not strictly necessary as
80  * ntfs_read_locked_inode() will fill them in later.
81  *
82  * Return 0 on success and error.
83  *
84  * NOTE: This function runs with the inode->i_lock spin lock held so it is not
85  * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
86  */
87 static int ntfs_init_locked_inode(struct inode *vi, void *data)
88 {
89 	struct ntfs_attr *na = data;
90 	struct ntfs_inode *ni = NTFS_I(vi);
91 
92 	vi->i_ino = (unsigned long)na->mft_no;
93 
94 	if (na->type == AT_INDEX_ALLOCATION)
95 		NInoSetMstProtected(ni);
96 	else
97 		ni->type = na->type;
98 
99 	ni->name = na->name;
100 	ni->name_len = na->name_len;
101 	ni->folio = NULL;
102 	atomic_set(&ni->count, 1);
103 
104 	/* If initializing a normal inode, we are done. */
105 	if (likely(na->type == AT_UNUSED))
106 		return 0;
107 
108 	/* It is a fake inode. */
109 	NInoSetAttr(ni);
110 
111 	/*
112 	 * We have I30 global constant as an optimization as it is the name
113 	 * in >99.9% of named attributes! The other <0.1% incur a GFP_ATOMIC
114 	 * allocation but that is ok. And most attributes are unnamed anyway,
115 	 * thus the fraction of named attributes with name != I30 is actually
116 	 * absolutely tiny.
117 	 */
118 	if (na->name_len && na->name != I30) {
119 		unsigned int i;
120 
121 		i = na->name_len * sizeof(__le16);
122 		ni->name = kmalloc(i + sizeof(__le16), GFP_ATOMIC);
123 		if (!ni->name)
124 			return -ENOMEM;
125 		memcpy(ni->name, na->name, i);
126 		ni->name[na->name_len] = 0;
127 	}
128 	return 0;
129 }
130 
131 static int ntfs_read_locked_inode(struct inode *vi);
132 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi);
133 static int ntfs_read_locked_index_inode(struct inode *base_vi,
134 		struct inode *vi);
135 
136 /*
137  * ntfs_iget - obtain a struct inode corresponding to a specific normal inode
138  * @sb:		super block of mounted volume
139  * @mft_no:	mft record number / inode number to obtain
140  *
141  * Obtain the struct inode corresponding to a specific normal inode (i.e. a
142  * file or directory).
143  *
144  * If the inode is in the cache, it is just returned with an increased
145  * reference count. Otherwise, a new struct inode is allocated and initialized,
146  * and finally ntfs_read_locked_inode() is called to read in the inode and
147  * fill in the remainder of the inode structure.
148  *
149  * Return the struct inode on success. Check the return value with IS_ERR() and
150  * if true, the function failed and the error code is obtained from PTR_ERR().
151  */
152 struct inode *ntfs_iget(struct super_block *sb, u64 mft_no)
153 {
154 	struct inode *vi;
155 	int err;
156 	struct ntfs_attr na;
157 
158 	na.mft_no = mft_no;
159 	na.type = AT_UNUSED;
160 	na.name = NULL;
161 	na.name_len = 0;
162 
163 	vi = iget5_locked(sb, mft_no, ntfs_test_inode,
164 			ntfs_init_locked_inode, &na);
165 	if (unlikely(!vi))
166 		return ERR_PTR(-ENOMEM);
167 
168 	err = 0;
169 
170 	/* If this is a freshly allocated inode, need to read it now. */
171 	if (inode_state_read_once(vi) & I_NEW) {
172 		err = ntfs_read_locked_inode(vi);
173 		unlock_new_inode(vi);
174 	}
175 	/*
176 	 * There is no point in keeping bad inodes around if the failure was
177 	 * due to ENOMEM. We want to be able to retry again later.
178 	 */
179 	if (unlikely(err == -ENOMEM)) {
180 		iput(vi);
181 		vi = ERR_PTR(err);
182 	}
183 	return vi;
184 }
185 
186 /*
187  * ntfs_attr_iget - obtain a struct inode corresponding to an attribute
188  * @base_vi:	vfs base inode containing the attribute
189  * @type:	attribute type
190  * @name:	Unicode name of the attribute (NULL if unnamed)
191  * @name_len:	length of @name in Unicode characters (0 if unnamed)
192  *
193  * Obtain the (fake) struct inode corresponding to the attribute specified by
194  * @type, @name, and @name_len, which is present in the base mft record
195  * specified by the vfs inode @base_vi.
196  *
197  * If the attribute inode is in the cache, it is just returned with an
198  * increased reference count. Otherwise, a new struct inode is allocated and
199  * initialized, and finally ntfs_read_locked_attr_inode() is called to read the
200  * attribute and fill in the inode structure.
201  *
202  * Note, for index allocation attributes, you need to use ntfs_index_iget()
203  * instead of ntfs_attr_iget() as working with indices is a lot more complex.
204  *
205  * Return the struct inode of the attribute inode on success. Check the return
206  * value with IS_ERR() and if true, the function failed and the error code is
207  * obtained from PTR_ERR().
208  */
209 struct inode *ntfs_attr_iget(struct inode *base_vi, __le32 type,
210 		__le16 *name, u32 name_len)
211 {
212 	struct inode *vi;
213 	int err;
214 	struct ntfs_attr na;
215 
216 	/* Make sure no one calls ntfs_attr_iget() for indices. */
217 	WARN_ON(type == AT_INDEX_ALLOCATION);
218 
219 	na.mft_no = base_vi->i_ino;
220 	na.type = type;
221 	na.name = name;
222 	na.name_len = name_len;
223 
224 	vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode,
225 			ntfs_init_locked_inode, &na);
226 	if (unlikely(!vi))
227 		return ERR_PTR(-ENOMEM);
228 	err = 0;
229 
230 	/* If this is a freshly allocated inode, need to read it now. */
231 	if (inode_state_read_once(vi) & I_NEW) {
232 		err = ntfs_read_locked_attr_inode(base_vi, vi);
233 		unlock_new_inode(vi);
234 	}
235 	/*
236 	 * There is no point in keeping bad attribute inodes around. This also
237 	 * simplifies things in that we never need to check for bad attribute
238 	 * inodes elsewhere.
239 	 */
240 	if (unlikely(err)) {
241 		iput(vi);
242 		vi = ERR_PTR(err);
243 	}
244 	return vi;
245 }
246 
247 /*
248  * ntfs_index_iget - obtain a struct inode corresponding to an index
249  * @base_vi:	vfs base inode containing the index related attributes
250  * @name:	Unicode name of the index
251  * @name_len:	length of @name in Unicode characters
252  *
253  * Obtain the (fake) struct inode corresponding to the index specified by @name
254  * and @name_len, which is present in the base mft record specified by the vfs
255  * inode @base_vi.
256  *
257  * If the index inode is in the cache, it is just returned with an increased
258  * reference count.  Otherwise, a new struct inode is allocated and
259  * initialized, and finally ntfs_read_locked_index_inode() is called to read
260  * the index related attributes and fill in the inode structure.
261  *
262  * Return the struct inode of the index inode on success. Check the return
263  * value with IS_ERR() and if true, the function failed and the error code is
264  * obtained from PTR_ERR().
265  */
266 struct inode *ntfs_index_iget(struct inode *base_vi, __le16 *name,
267 		u32 name_len)
268 {
269 	struct inode *vi;
270 	int err;
271 	struct ntfs_attr na;
272 
273 	na.mft_no = base_vi->i_ino;
274 	na.type = AT_INDEX_ALLOCATION;
275 	na.name = name;
276 	na.name_len = name_len;
277 
278 	vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode,
279 			ntfs_init_locked_inode, &na);
280 	if (unlikely(!vi))
281 		return ERR_PTR(-ENOMEM);
282 
283 	err = 0;
284 
285 	/* If this is a freshly allocated inode, need to read it now. */
286 	if (inode_state_read_once(vi) & I_NEW) {
287 		err = ntfs_read_locked_index_inode(base_vi, vi);
288 		unlock_new_inode(vi);
289 	}
290 	/*
291 	 * There is no point in keeping bad index inodes around.  This also
292 	 * simplifies things in that we never need to check for bad index
293 	 * inodes elsewhere.
294 	 */
295 	if (unlikely(err)) {
296 		iput(vi);
297 		vi = ERR_PTR(err);
298 	}
299 	return vi;
300 }
301 
302 struct inode *ntfs_alloc_big_inode(struct super_block *sb)
303 {
304 	struct ntfs_inode *ni;
305 
306 	ntfs_debug("Entering.");
307 	ni = alloc_inode_sb(sb, ntfs_big_inode_cache, GFP_NOFS);
308 	if (likely(ni != NULL)) {
309 		ni->state = 0;
310 		ni->type = 0;
311 		ni->mft_no = 0;
312 		return VFS_I(ni);
313 	}
314 	ntfs_error(sb, "Allocation of NTFS big inode structure failed.");
315 	return NULL;
316 }
317 
318 void ntfs_free_big_inode(struct inode *inode)
319 {
320 	kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
321 }
322 
323 static int ntfs_non_resident_dealloc_clusters(struct ntfs_inode *ni)
324 {
325 	struct super_block *sb = ni->vol->sb;
326 	struct ntfs_attr_search_ctx *actx;
327 	int err = 0;
328 
329 	actx = ntfs_attr_get_search_ctx(ni, NULL);
330 	if (!actx)
331 		return -ENOMEM;
332 	WARN_ON(actx->mrec->link_count != 0);
333 
334 	/**
335 	 * ntfs_truncate_vfs cannot be called in evict() context due
336 	 * to some limitations, which are the @ni vfs inode is marked
337 	 * with I_FREEING, and etc.
338 	 */
339 	if (NInoRunlistDirty(ni)) {
340 		err = ntfs_cluster_free_from_rl(ni->vol, ni->runlist.rl);
341 		if (err)
342 			ntfs_error(sb,
343 					"Failed to free clusters. Leaving inconsistent metadata.\n");
344 	}
345 
346 	while ((err = ntfs_attrs_walk(actx)) == 0) {
347 		if (actx->attr->non_resident &&
348 				(!NInoRunlistDirty(ni) || actx->attr->type != AT_DATA)) {
349 			struct runlist_element *rl;
350 			size_t new_rl_count;
351 
352 			rl = ntfs_mapping_pairs_decompress(ni->vol, actx->attr, NULL,
353 					&new_rl_count);
354 			if (IS_ERR(rl)) {
355 				err = PTR_ERR(rl);
356 				ntfs_error(sb,
357 					   "Failed to decompress runlist. Leaving inconsistent metadata.\n");
358 				continue;
359 			}
360 
361 			err = ntfs_cluster_free_from_rl(ni->vol, rl);
362 			if (err)
363 				ntfs_error(sb,
364 					   "Failed to free attribute clusters. Leaving inconsistent metadata.\n");
365 			kvfree(rl);
366 		}
367 	}
368 
369 	ntfs_release_dirty_clusters(ni->vol, ni->i_dealloc_clusters);
370 	ntfs_attr_put_search_ctx(actx);
371 	return err;
372 }
373 
374 int ntfs_drop_big_inode(struct inode *inode)
375 {
376 	struct ntfs_inode *ni = NTFS_I(inode);
377 
378 	if (!inode_unhashed(inode) && inode_state_read_once(inode) & I_SYNC) {
379 		if (ni->type == AT_DATA || ni->type == AT_INDEX_ALLOCATION) {
380 			if (!inode->i_nlink) {
381 				struct ntfs_inode *ni = NTFS_I(inode);
382 
383 				if (ni->data_size == 0)
384 					return 0;
385 
386 				/* To avoid evict_inode call simultaneously */
387 				atomic_inc(&inode->i_count);
388 				spin_unlock(&inode->i_lock);
389 
390 				truncate_setsize(VFS_I(ni), 0);
391 				ntfs_truncate_vfs(VFS_I(ni), 0, 1);
392 
393 				sb_start_intwrite(inode->i_sb);
394 				i_size_write(inode, 0);
395 				ni->allocated_size = ni->initialized_size = ni->data_size = 0;
396 
397 				truncate_inode_pages_final(inode->i_mapping);
398 				sb_end_intwrite(inode->i_sb);
399 
400 				spin_lock(&inode->i_lock);
401 				atomic_dec(&inode->i_count);
402 			}
403 		}
404 		return 0;
405 	}
406 
407 	return inode_generic_drop(inode);
408 }
409 
410 static inline struct ntfs_inode *ntfs_alloc_extent_inode(void)
411 {
412 	struct ntfs_inode *ni;
413 
414 	ntfs_debug("Entering.");
415 	ni = kmem_cache_alloc(ntfs_inode_cache, GFP_NOFS);
416 	if (likely(ni != NULL)) {
417 		ni->state = 0;
418 		return ni;
419 	}
420 	ntfs_error(NULL, "Allocation of NTFS inode structure failed.");
421 	return NULL;
422 }
423 
424 static void ntfs_destroy_extent_inode(struct ntfs_inode *ni)
425 {
426 	ntfs_debug("Entering.");
427 
428 	if (!atomic_dec_and_test(&ni->count))
429 		WARN_ON(1);
430 	if (ni->folio)
431 		folio_put(ni->folio);
432 	kfree(ni->mrec);
433 	kmem_cache_free(ntfs_inode_cache, ni);
434 }
435 
436 static struct lock_class_key attr_inode_mrec_lock_class;
437 static struct lock_class_key attr_list_inode_mrec_lock_class;
438 
439 /*
440  * The attribute runlist lock has separate locking rules from the
441  * normal runlist lock, so split the two lock-classes:
442  */
443 static struct lock_class_key attr_list_rl_lock_class;
444 
445 /*
446  * __ntfs_init_inode - initialize ntfs specific part of an inode
447  * @sb:		super block of mounted volume
448  * @ni:		freshly allocated ntfs inode which to initialize
449  *
450  * Initialize an ntfs inode to defaults.
451  *
452  * NOTE: ni->mft_no, ni->state, ni->type, ni->name, and ni->name_len are left
453  * untouched. Make sure to initialize them elsewhere.
454  */
455 void __ntfs_init_inode(struct super_block *sb, struct ntfs_inode *ni)
456 {
457 	ntfs_debug("Entering.");
458 	rwlock_init(&ni->size_lock);
459 	ni->initialized_size = ni->allocated_size = 0;
460 	ni->seq_no = 0;
461 	atomic_set(&ni->count, 1);
462 	ni->vol = NTFS_SB(sb);
463 	ntfs_init_runlist(&ni->runlist);
464 	mutex_init(&ni->mrec_lock);
465 	if (ni->type == AT_ATTRIBUTE_LIST) {
466 		lockdep_set_class(&ni->mrec_lock,
467 				  &attr_list_inode_mrec_lock_class);
468 		lockdep_set_class(&ni->runlist.lock,
469 				  &attr_list_rl_lock_class);
470 	} else if (NInoAttr(ni)) {
471 		lockdep_set_class(&ni->mrec_lock,
472 				  &attr_inode_mrec_lock_class);
473 	}
474 
475 	ni->folio = NULL;
476 	ni->folio_ofs = 0;
477 	ni->mrec = NULL;
478 	ni->attr_list_size = 0;
479 	ni->attr_list = NULL;
480 	ni->itype.index.block_size = 0;
481 	ni->itype.index.vcn_size = 0;
482 	ni->itype.index.collation_rule = 0;
483 	ni->itype.index.block_size_bits = 0;
484 	ni->itype.index.vcn_size_bits = 0;
485 	mutex_init(&ni->extent_lock);
486 	ni->nr_extents = 0;
487 	ni->ext.base_ntfs_ino = NULL;
488 	ni->flags = 0;
489 	ni->mft_lcn[0] = LCN_RL_NOT_MAPPED;
490 	ni->mft_lcn_count = 0;
491 	ni->target = NULL;
492 	ni->i_dealloc_clusters = 0;
493 }
494 
495 /*
496  * Extent inodes get MFT-mapped in a nested way, while the base inode
497  * is still mapped. Teach this nesting to the lock validator by creating
498  * a separate class for nested inode's mrec_lock's:
499  */
500 static struct lock_class_key extent_inode_mrec_lock_key;
501 
502 inline struct ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
503 		u64 mft_no)
504 {
505 	struct ntfs_inode *ni = ntfs_alloc_extent_inode();
506 
507 	ntfs_debug("Entering.");
508 	if (likely(ni != NULL)) {
509 		__ntfs_init_inode(sb, ni);
510 		lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);
511 		ni->mft_no = mft_no;
512 		ni->type = AT_UNUSED;
513 		ni->name = NULL;
514 		ni->name_len = 0;
515 	}
516 	return ni;
517 }
518 
519 /*
520  * ntfs_is_extended_system_file - check if a file is in the $Extend directory
521  * @ctx:	initialized attribute search context
522  *
523  * Search all file name attributes in the inode described by the attribute
524  * search context @ctx and check if any of the names are in the $Extend system
525  * directory.
526  *
527  * Return values:
528  *	   3: file is $ObjId in $Extend directory
529  *	   2: file is $Reparse in $Extend directory
530  *	   1: file is in $Extend directory
531  *	   0: file is not in $Extend directory
532  *    -errno: failed to determine if the file is in the $Extend directory
533  */
534 static int ntfs_is_extended_system_file(struct ntfs_attr_search_ctx *ctx)
535 {
536 	int nr_links, err;
537 
538 	/* Restart search. */
539 	ntfs_attr_reinit_search_ctx(ctx);
540 
541 	/* Get number of hard links. */
542 	nr_links = le16_to_cpu(ctx->mrec->link_count);
543 
544 	/* Loop through all hard links. */
545 	while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0,
546 			ctx))) {
547 		struct file_name_attr *file_name_attr;
548 		struct attr_record *attr = ctx->attr;
549 		u8 *p, *p2;
550 
551 		nr_links--;
552 		/*
553 		 * Maximum sanity checking as we are called on an inode that
554 		 * we suspect might be corrupt.
555 		 */
556 		p = (u8 *)attr + le32_to_cpu(attr->length);
557 		if (p < (u8 *)ctx->mrec || (u8 *)p > (u8 *)ctx->mrec +
558 				le32_to_cpu(ctx->mrec->bytes_in_use)) {
559 err_corrupt_attr:
560 			ntfs_error(ctx->ntfs_ino->vol->sb,
561 					"Corrupt file name attribute. You should run chkdsk.");
562 			return -EIO;
563 		}
564 		if (attr->non_resident) {
565 			ntfs_error(ctx->ntfs_ino->vol->sb,
566 					"Non-resident file name. You should run chkdsk.");
567 			return -EIO;
568 		}
569 		if (attr->flags) {
570 			ntfs_error(ctx->ntfs_ino->vol->sb,
571 					"File name with invalid flags. You should run chkdsk.");
572 			return -EIO;
573 		}
574 		if (!(attr->data.resident.flags & RESIDENT_ATTR_IS_INDEXED)) {
575 			ntfs_error(ctx->ntfs_ino->vol->sb,
576 					"Unindexed file name. You should run chkdsk.");
577 			return -EIO;
578 		}
579 		file_name_attr = (struct file_name_attr *)((u8 *)attr +
580 				le16_to_cpu(attr->data.resident.value_offset));
581 		p2 = (u8 *)file_name_attr + le32_to_cpu(attr->data.resident.value_length);
582 		if (p2 < (u8 *)attr || p2 > p)
583 			goto err_corrupt_attr;
584 		/* This attribute is ok, but is it in the $Extend directory? */
585 		if (MREF_LE(file_name_attr->parent_directory) == FILE_Extend) {
586 			unsigned char *s;
587 
588 			s = ntfs_attr_name_get(ctx->ntfs_ino->vol,
589 					file_name_attr->file_name,
590 					file_name_attr->file_name_length);
591 			if (!s)
592 				return 1;
593 			if (!strcmp("$Reparse", s)) {
594 				ntfs_attr_name_free(&s);
595 				return 2; /* it's reparse point file */
596 			}
597 			if (!strcmp("$ObjId", s)) {
598 				ntfs_attr_name_free(&s);
599 				return 3; /* it's object id file */
600 			}
601 			ntfs_attr_name_free(&s);
602 			return 1;	/* YES, it's an extended system file. */
603 		}
604 	}
605 	if (unlikely(err != -ENOENT))
606 		return err;
607 	if (unlikely(nr_links)) {
608 		ntfs_error(ctx->ntfs_ino->vol->sb,
609 			"Inode hard link count doesn't match number of name attributes. You should run chkdsk.");
610 		return -EIO;
611 	}
612 	return 0;	/* NO, it is not an extended system file. */
613 }
614 
615 static struct lock_class_key ntfs_dir_inval_lock_key;
616 
617 void ntfs_set_vfs_operations(struct inode *inode, mode_t mode, dev_t dev)
618 {
619 	if (S_ISDIR(mode)) {
620 		if (!NInoAttr(NTFS_I(inode))) {
621 			inode->i_op = &ntfs_dir_inode_ops;
622 			inode->i_fop = &ntfs_dir_ops;
623 		}
624 		inode->i_mapping->a_ops = &ntfs_aops;
625 		lockdep_set_class(&inode->i_mapping->invalidate_lock,
626 				  &ntfs_dir_inval_lock_key);
627 	} else if (S_ISLNK(mode)) {
628 		inode->i_op = &ntfs_symlink_inode_operations;
629 		inode->i_mapping->a_ops = &ntfs_aops;
630 	} else if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) {
631 		inode->i_op = &ntfs_special_inode_operations;
632 		init_special_inode(inode, inode->i_mode, dev);
633 	} else {
634 		if (!NInoAttr(NTFS_I(inode))) {
635 			inode->i_op = &ntfs_file_inode_ops;
636 			inode->i_fop = &ntfs_file_ops;
637 		}
638 		if (inode->i_ino == FILE_MFT)
639 			inode->i_mapping->a_ops = &ntfs_mft_aops;
640 		else
641 			inode->i_mapping->a_ops = &ntfs_aops;
642 	}
643 }
644 
645 /*
646  * ntfs_read_locked_inode - read an inode from its device
647  * @vi:		inode to read
648  *
649  * ntfs_read_locked_inode() is called from ntfs_iget() to read the inode
650  * described by @vi into memory from the device.
651  *
652  * The only fields in @vi that we need to/can look at when the function is
653  * called are i_sb, pointing to the mounted device's super block, and i_ino,
654  * the number of the inode to load.
655  *
656  * ntfs_read_locked_inode() maps, pins and locks the mft record number i_ino
657  * for reading and sets up the necessary @vi fields as well as initializing
658  * the ntfs inode.
659  *
660  * Q: What locks are held when the function is called?
661  * A: i_state has I_NEW set, hence the inode is locked, also
662  *    i_count is set to 1, so it is not going to go away
663  *    i_flags is set to 0 and we have no business touching it.  Only an ioctl()
664  *    is allowed to write to them. We should of course be honouring them but
665  *    we need to do that using the IS_* macros defined in include/linux/fs.h.
666  *    In any case ntfs_read_locked_inode() has nothing to do with i_flags.
667  *
668  * Return 0 on success and -errno on error.
669  */
670 static int ntfs_read_locked_inode(struct inode *vi)
671 {
672 	struct ntfs_volume *vol = NTFS_SB(vi->i_sb);
673 	struct ntfs_inode *ni = NTFS_I(vi);
674 	struct mft_record *m;
675 	struct attr_record *a;
676 	struct standard_information *si;
677 	struct ntfs_attr_search_ctx *ctx;
678 	int err = 0;
679 	__le16 *name = I30;
680 	unsigned int name_len = 4, flags = 0;
681 	int extend_sys = 0;
682 	dev_t dev = 0;
683 	bool vol_err = true;
684 
685 	ntfs_debug("Entering for i_ino 0x%llx.", ni->mft_no);
686 
687 	if (uid_valid(vol->uid)) {
688 		vi->i_uid = vol->uid;
689 		flags |= NTFS_VOL_UID;
690 	} else
691 		vi->i_uid = GLOBAL_ROOT_UID;
692 
693 	if (gid_valid(vol->gid)) {
694 		vi->i_gid = vol->gid;
695 		flags |= NTFS_VOL_GID;
696 	} else
697 		vi->i_gid = GLOBAL_ROOT_GID;
698 
699 	vi->i_mode = 0777;
700 
701 	/*
702 	 * Initialize the ntfs specific part of @vi special casing
703 	 * FILE_MFT which we need to do at mount time.
704 	 */
705 	if (vi->i_ino != FILE_MFT)
706 		ntfs_init_big_inode(vi);
707 
708 	m = map_mft_record(ni);
709 	if (IS_ERR(m)) {
710 		err = PTR_ERR(m);
711 		goto err_out;
712 	}
713 
714 	ctx = ntfs_attr_get_search_ctx(ni, m);
715 	if (!ctx) {
716 		err = -ENOMEM;
717 		goto unm_err_out;
718 	}
719 
720 	if (!(m->flags & MFT_RECORD_IN_USE)) {
721 		err = -ENOENT;
722 		vol_err = false;
723 		goto unm_err_out;
724 	}
725 
726 	if (m->base_mft_record) {
727 		ntfs_error(vi->i_sb, "Inode is an extent inode!");
728 		goto unm_err_out;
729 	}
730 
731 	/* Transfer information from mft record into vfs and ntfs inodes. */
732 	vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
733 
734 	if (le16_to_cpu(m->link_count) < 1) {
735 		ntfs_error(vi->i_sb, "Inode link count is 0!");
736 		goto unm_err_out;
737 	}
738 	set_nlink(vi, le16_to_cpu(m->link_count));
739 
740 	/* If read-only, no one gets write permissions. */
741 	if (IS_RDONLY(vi))
742 		vi->i_mode &= ~0222;
743 
744 	/*
745 	 * Find the standard information attribute in the mft record. At this
746 	 * stage we haven't setup the attribute list stuff yet, so this could
747 	 * in fact fail if the standard information is in an extent record, but
748 	 * I don't think this actually ever happens.
749 	 */
750 	ntfs_attr_reinit_search_ctx(ctx);
751 	err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0, 0, 0, NULL, 0,
752 			ctx);
753 	if (unlikely(err)) {
754 		if (err == -ENOENT)
755 			ntfs_error(vi->i_sb, "$STANDARD_INFORMATION attribute is missing.");
756 		goto unm_err_out;
757 	}
758 	a = ctx->attr;
759 	/* Get the standard information attribute value. */
760 	if ((u8 *)a + le16_to_cpu(a->data.resident.value_offset)
761 			+ le32_to_cpu(a->data.resident.value_length) >
762 			(u8 *)ctx->mrec + vol->mft_record_size) {
763 		ntfs_error(vi->i_sb, "Corrupt standard information attribute in inode.");
764 		goto unm_err_out;
765 	}
766 	si = (struct standard_information *)((u8 *)a +
767 			le16_to_cpu(a->data.resident.value_offset));
768 
769 	/* Transfer information from the standard information into vi. */
770 	/*
771 	 * Note: The i_?times do not quite map perfectly onto the NTFS times,
772 	 * but they are close enough, and in the end it doesn't really matter
773 	 * that much...
774 	 */
775 	/*
776 	 * mtime is the last change of the data within the file. Not changed
777 	 * when only metadata is changed, e.g. a rename doesn't affect mtime.
778 	 */
779 	ni->i_crtime = ntfs2utc(si->creation_time);
780 
781 	inode_set_mtime_to_ts(vi, ntfs2utc(si->last_data_change_time));
782 	/*
783 	 * ctime is the last change of the metadata of the file. This obviously
784 	 * always changes, when mtime is changed. ctime can be changed on its
785 	 * own, mtime is then not changed, e.g. when a file is renamed.
786 	 */
787 	inode_set_ctime_to_ts(vi, ntfs2utc(si->last_mft_change_time));
788 	/*
789 	 * Last access to the data within the file. Not changed during a rename
790 	 * for example but changed whenever the file is written to.
791 	 */
792 	inode_set_atime_to_ts(vi, ntfs2utc(si->last_access_time));
793 	ni->flags = si->file_attributes;
794 
795 	/* Find the attribute list attribute if present. */
796 	ntfs_attr_reinit_search_ctx(ctx);
797 	err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
798 	if (err) {
799 		if (unlikely(err != -ENOENT)) {
800 			ntfs_error(vi->i_sb, "Failed to lookup attribute list attribute.");
801 			goto unm_err_out;
802 		}
803 	} else {
804 		if (vi->i_ino == FILE_MFT)
805 			goto skip_attr_list_load;
806 		ntfs_debug("Attribute list found in inode 0x%llx.", ni->mft_no);
807 		NInoSetAttrList(ni);
808 		a = ctx->attr;
809 		if (a->flags & ATTR_COMPRESSION_MASK) {
810 			ntfs_error(vi->i_sb,
811 				"Attribute list attribute is compressed.");
812 			goto unm_err_out;
813 		}
814 		if (a->flags & ATTR_IS_ENCRYPTED ||
815 				a->flags & ATTR_IS_SPARSE) {
816 			if (a->non_resident) {
817 				ntfs_error(vi->i_sb,
818 					"Non-resident attribute list attribute is encrypted/sparse.");
819 				goto unm_err_out;
820 			}
821 			ntfs_warning(vi->i_sb,
822 				"Resident attribute list attribute in inode 0x%llx is marked encrypted/sparse which is not true.  However, Windows allows this and chkdsk does not detect or correct it so we will just ignore the invalid flags and pretend they are not set.",
823 				ni->mft_no);
824 		}
825 		/* Now allocate memory for the attribute list. */
826 		ni->attr_list_size = (u32)ntfs_attr_size(a);
827 		if (!ni->attr_list_size) {
828 			ntfs_error(vi->i_sb, "Attr_list_size is zero");
829 			goto unm_err_out;
830 		}
831 		ni->attr_list = kvzalloc(ni->attr_list_size, GFP_NOFS);
832 		if (!ni->attr_list) {
833 			ntfs_error(vi->i_sb,
834 				"Not enough memory to allocate buffer for attribute list.");
835 			err = -ENOMEM;
836 			goto unm_err_out;
837 		}
838 		if (a->non_resident) {
839 			NInoSetAttrListNonResident(ni);
840 			if (a->data.non_resident.lowest_vcn) {
841 				ntfs_error(vi->i_sb, "Attribute list has non zero lowest_vcn.");
842 				goto unm_err_out;
843 			}
844 
845 			/* Now load the attribute list. */
846 			err = load_attribute_list(ni, ni->attr_list, ni->attr_list_size);
847 			if (err) {
848 				ntfs_error(vi->i_sb, "Failed to load attribute list attribute.");
849 				goto unm_err_out;
850 			}
851 		} else /* if (!a->non_resident) */ {
852 			if ((u8 *)a + le16_to_cpu(a->data.resident.value_offset)
853 					+ le32_to_cpu(
854 					a->data.resident.value_length) >
855 					(u8 *)ctx->mrec + vol->mft_record_size) {
856 				ntfs_error(vi->i_sb, "Corrupt attribute list in inode.");
857 				goto unm_err_out;
858 			}
859 			/* Now copy the attribute list. */
860 			memcpy(ni->attr_list, (u8 *)a + le16_to_cpu(
861 					a->data.resident.value_offset),
862 					le32_to_cpu(
863 					a->data.resident.value_length));
864 		}
865 	}
866 skip_attr_list_load:
867 	err = ntfs_attr_lookup(AT_EA_INFORMATION, NULL, 0, 0, 0, NULL, 0, ctx);
868 	if (!err) {
869 		NInoSetHasEA(ni);
870 		ntfs_ea_get_wsl_inode(vi, &dev, flags);
871 	}
872 
873 	if (m->flags & MFT_RECORD_IS_DIRECTORY) {
874 		vi->i_mode |= S_IFDIR;
875 		/*
876 		 * Apply the directory permissions mask set in the mount
877 		 * options.
878 		 */
879 		vi->i_mode &= ~vol->dmask;
880 		/* Things break without this kludge! */
881 		if (vi->i_nlink > 1)
882 			set_nlink(vi, 1);
883 	} else {
884 		if (ni->flags & FILE_ATTR_REPARSE_POINT) {
885 			unsigned int mode;
886 
887 			mode = ntfs_make_symlink(ni);
888 			if (mode)
889 				vi->i_mode |= mode;
890 			else {
891 				vi->i_mode &= ~S_IFLNK;
892 				vi->i_mode |= S_IFREG;
893 			}
894 		} else
895 			vi->i_mode |= S_IFREG;
896 		/* Apply the file permissions mask set in the mount options. */
897 		vi->i_mode &= ~vol->fmask;
898 	}
899 
900 	/*
901 	 * If an attribute list is present we now have the attribute list value
902 	 * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes.
903 	 */
904 	if (S_ISDIR(vi->i_mode)) {
905 		struct index_root *ir;
906 		u8 *ir_end, *index_end;
907 
908 view_index_meta:
909 		/* It is a directory, find index root attribute. */
910 		ntfs_attr_reinit_search_ctx(ctx);
911 		err = ntfs_attr_lookup(AT_INDEX_ROOT, name, name_len, CASE_SENSITIVE,
912 				0, NULL, 0, ctx);
913 		if (unlikely(err)) {
914 			if (err == -ENOENT)
915 				ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is missing.");
916 			goto unm_err_out;
917 		}
918 		a = ctx->attr;
919 		/* Set up the state. */
920 		if (unlikely(a->non_resident)) {
921 			ntfs_error(vol->sb,
922 				"$INDEX_ROOT attribute is not resident.");
923 			goto unm_err_out;
924 		}
925 		/* Ensure the attribute name is placed before the value. */
926 		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
927 				le16_to_cpu(a->data.resident.value_offset)))) {
928 			ntfs_error(vol->sb,
929 				"$INDEX_ROOT attribute name is placed after the attribute value.");
930 			goto unm_err_out;
931 		}
932 		/*
933 		 * Compressed/encrypted index root just means that the newly
934 		 * created files in that directory should be created compressed/
935 		 * encrypted. However index root cannot be both compressed and
936 		 * encrypted.
937 		 */
938 		if (a->flags & ATTR_COMPRESSION_MASK) {
939 			NInoSetCompressed(ni);
940 			ni->flags |= FILE_ATTR_COMPRESSED;
941 		}
942 		if (a->flags & ATTR_IS_ENCRYPTED) {
943 			if (a->flags & ATTR_COMPRESSION_MASK) {
944 				ntfs_error(vi->i_sb, "Found encrypted and compressed attribute.");
945 				goto unm_err_out;
946 			}
947 			NInoSetEncrypted(ni);
948 			ni->flags |= FILE_ATTR_ENCRYPTED;
949 		}
950 		if (a->flags & ATTR_IS_SPARSE) {
951 			NInoSetSparse(ni);
952 			ni->flags |= FILE_ATTR_SPARSE_FILE;
953 		}
954 		ir = (struct index_root *)((u8 *)a +
955 				le16_to_cpu(a->data.resident.value_offset));
956 		ir_end = (u8 *)ir + le32_to_cpu(a->data.resident.value_length);
957 		if (ir_end > (u8 *)ctx->mrec + vol->mft_record_size) {
958 			ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt.");
959 			goto unm_err_out;
960 		}
961 		index_end = (u8 *)&ir->index +
962 				le32_to_cpu(ir->index.index_length);
963 		if (index_end > ir_end) {
964 			ntfs_error(vi->i_sb, "Directory index is corrupt.");
965 			goto unm_err_out;
966 		}
967 
968 		if (extend_sys) {
969 			if (ir->type) {
970 				ntfs_error(vi->i_sb, "Indexed attribute is not zero.");
971 				goto unm_err_out;
972 			}
973 		} else {
974 			if (ir->type != AT_FILE_NAME) {
975 				ntfs_error(vi->i_sb, "Indexed attribute is not $FILE_NAME.");
976 				goto unm_err_out;
977 			}
978 
979 			if (ir->collation_rule != COLLATION_FILE_NAME) {
980 				ntfs_error(vi->i_sb,
981 					"Index collation rule is not COLLATION_FILE_NAME.");
982 				goto unm_err_out;
983 			}
984 		}
985 
986 		ni->itype.index.collation_rule = ir->collation_rule;
987 		ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
988 		if (ni->itype.index.block_size &
989 				(ni->itype.index.block_size - 1)) {
990 			ntfs_error(vi->i_sb, "Index block size (%u) is not a power of two.",
991 					ni->itype.index.block_size);
992 			goto unm_err_out;
993 		}
994 		if (ni->itype.index.block_size > PAGE_SIZE) {
995 			ntfs_error(vi->i_sb,
996 				"Index block size (%u) > PAGE_SIZE (%ld) is not supported.",
997 				ni->itype.index.block_size,
998 				PAGE_SIZE);
999 			err = -EOPNOTSUPP;
1000 			goto unm_err_out;
1001 		}
1002 		if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
1003 			ntfs_error(vi->i_sb,
1004 				"Index block size (%u) < NTFS_BLOCK_SIZE (%i) is not supported.",
1005 				ni->itype.index.block_size,
1006 				NTFS_BLOCK_SIZE);
1007 			err = -EOPNOTSUPP;
1008 			goto unm_err_out;
1009 		}
1010 		ni->itype.index.block_size_bits =
1011 				ffs(ni->itype.index.block_size) - 1;
1012 		/* Determine the size of a vcn in the directory index. */
1013 		if (vol->cluster_size <= ni->itype.index.block_size) {
1014 			ni->itype.index.vcn_size = vol->cluster_size;
1015 			ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
1016 		} else {
1017 			ni->itype.index.vcn_size = vol->sector_size;
1018 			ni->itype.index.vcn_size_bits = vol->sector_size_bits;
1019 		}
1020 
1021 		/* Setup the index allocation attribute, even if not present. */
1022 		ni->type = AT_INDEX_ROOT;
1023 		ni->name = name;
1024 		ni->name_len = name_len;
1025 		vi->i_size = ni->initialized_size = ni->data_size =
1026 			le32_to_cpu(a->data.resident.value_length);
1027 		ni->allocated_size = (ni->data_size + 7) & ~7;
1028 		/* We are done with the mft record, so we release it. */
1029 		ntfs_attr_put_search_ctx(ctx);
1030 		unmap_mft_record(ni);
1031 		m = NULL;
1032 		ctx = NULL;
1033 		/* Setup the operations for this inode. */
1034 		ntfs_set_vfs_operations(vi, S_IFDIR, 0);
1035 		if (ir->index.flags & LARGE_INDEX)
1036 			NInoSetIndexAllocPresent(ni);
1037 	} else {
1038 		/* It is a file. */
1039 		ntfs_attr_reinit_search_ctx(ctx);
1040 
1041 		/* Setup the data attribute, even if not present. */
1042 		ni->type = AT_DATA;
1043 		ni->name = AT_UNNAMED;
1044 		ni->name_len = 0;
1045 
1046 		/* Find first extent of the unnamed data attribute. */
1047 		err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, 0, NULL, 0, ctx);
1048 		if (unlikely(err)) {
1049 			vi->i_size = ni->initialized_size =
1050 					ni->allocated_size = 0;
1051 			if (err != -ENOENT) {
1052 				ntfs_error(vi->i_sb, "Failed to lookup $DATA attribute.");
1053 				goto unm_err_out;
1054 			}
1055 			/*
1056 			 * FILE_Secure does not have an unnamed $DATA
1057 			 * attribute, so we special case it here.
1058 			 */
1059 			if (vi->i_ino == FILE_Secure)
1060 				goto no_data_attr_special_case;
1061 			/*
1062 			 * Most if not all the system files in the $Extend
1063 			 * system directory do not have unnamed data
1064 			 * attributes so we need to check if the parent
1065 			 * directory of the file is FILE_Extend and if it is
1066 			 * ignore this error. To do this we need to get the
1067 			 * name of this inode from the mft record as the name
1068 			 * contains the back reference to the parent directory.
1069 			 */
1070 			extend_sys = ntfs_is_extended_system_file(ctx);
1071 			if (extend_sys > 0) {
1072 				if (m->flags & MFT_RECORD_IS_VIEW_INDEX) {
1073 					if (extend_sys == 2) {
1074 						name = reparse_index_name;
1075 						name_len = 2;
1076 						goto view_index_meta;
1077 					} else if (extend_sys == 3) {
1078 						name = objid_index_name;
1079 						name_len = 2;
1080 						goto view_index_meta;
1081 					}
1082 				}
1083 				goto no_data_attr_special_case;
1084 			}
1085 
1086 			err = extend_sys;
1087 			ntfs_error(vi->i_sb, "$DATA attribute is missing, err : %d", err);
1088 			goto unm_err_out;
1089 		}
1090 		a = ctx->attr;
1091 		/* Setup the state. */
1092 		if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1093 			if (a->flags & ATTR_COMPRESSION_MASK) {
1094 				NInoSetCompressed(ni);
1095 				ni->flags |= FILE_ATTR_COMPRESSED;
1096 				if (vol->cluster_size > 4096) {
1097 					ntfs_error(vi->i_sb,
1098 						"Found compressed data but compression is disabled due to cluster size (%i) > 4kiB.",
1099 						vol->cluster_size);
1100 					goto unm_err_out;
1101 				}
1102 				if ((a->flags & ATTR_COMPRESSION_MASK)
1103 						!= ATTR_IS_COMPRESSED) {
1104 					ntfs_error(vi->i_sb,
1105 						"Found unknown compression method or corrupt file.");
1106 					goto unm_err_out;
1107 				}
1108 			}
1109 			if (a->flags & ATTR_IS_SPARSE) {
1110 				NInoSetSparse(ni);
1111 				ni->flags |= FILE_ATTR_SPARSE_FILE;
1112 			}
1113 		}
1114 		if (a->flags & ATTR_IS_ENCRYPTED) {
1115 			if (NInoCompressed(ni)) {
1116 				ntfs_error(vi->i_sb, "Found encrypted and compressed data.");
1117 				goto unm_err_out;
1118 			}
1119 			NInoSetEncrypted(ni);
1120 			ni->flags |= FILE_ATTR_ENCRYPTED;
1121 		}
1122 		if (a->non_resident) {
1123 			NInoSetNonResident(ni);
1124 			if (NInoCompressed(ni) || NInoSparse(ni)) {
1125 				if (NInoCompressed(ni) &&
1126 				    a->data.non_resident.compression_unit != 4) {
1127 					ntfs_error(vi->i_sb,
1128 						"Found non-standard compression unit (%u instead of 4).  Cannot handle this.",
1129 						a->data.non_resident.compression_unit);
1130 					err = -EOPNOTSUPP;
1131 					goto unm_err_out;
1132 				}
1133 
1134 				if (NInoSparse(ni) &&
1135 				    a->data.non_resident.compression_unit &&
1136 				    a->data.non_resident.compression_unit !=
1137 				     vol->sparse_compression_unit) {
1138 					ntfs_error(vi->i_sb,
1139 						   "Found non-standard compression unit (%u instead of 0 or %d).  Cannot handle this.",
1140 						   a->data.non_resident.compression_unit,
1141 						   vol->sparse_compression_unit);
1142 					err = -EOPNOTSUPP;
1143 					goto unm_err_out;
1144 				}
1145 
1146 
1147 				if (a->data.non_resident.compression_unit) {
1148 					ni->itype.compressed.block_size = 1U <<
1149 							(a->data.non_resident.compression_unit +
1150 							vol->cluster_size_bits);
1151 					ni->itype.compressed.block_size_bits =
1152 							ffs(ni->itype.compressed.block_size) - 1;
1153 					ni->itype.compressed.block_clusters =
1154 							1U << a->data.non_resident.compression_unit;
1155 				} else {
1156 					ni->itype.compressed.block_size = 0;
1157 					ni->itype.compressed.block_size_bits =
1158 							0;
1159 					ni->itype.compressed.block_clusters =
1160 							0;
1161 				}
1162 				ni->itype.compressed.size = le64_to_cpu(
1163 						a->data.non_resident.compressed_size);
1164 			}
1165 			if (a->data.non_resident.lowest_vcn) {
1166 				ntfs_error(vi->i_sb,
1167 					"First extent of $DATA attribute has non zero lowest_vcn.");
1168 				goto unm_err_out;
1169 			}
1170 			vi->i_size = ni->data_size = le64_to_cpu(a->data.non_resident.data_size);
1171 			ni->initialized_size = le64_to_cpu(a->data.non_resident.initialized_size);
1172 			ni->allocated_size = le64_to_cpu(a->data.non_resident.allocated_size);
1173 		} else { /* Resident attribute. */
1174 			vi->i_size = ni->data_size = ni->initialized_size = le32_to_cpu(
1175 					a->data.resident.value_length);
1176 			ni->allocated_size = le32_to_cpu(a->length) -
1177 					le16_to_cpu(
1178 					a->data.resident.value_offset);
1179 			if (vi->i_size > ni->allocated_size) {
1180 				ntfs_error(vi->i_sb,
1181 					"Resident data attribute is corrupt (size exceeds allocation).");
1182 				goto unm_err_out;
1183 			}
1184 		}
1185 no_data_attr_special_case:
1186 		/* We are done with the mft record, so we release it. */
1187 		ntfs_attr_put_search_ctx(ctx);
1188 		unmap_mft_record(ni);
1189 		m = NULL;
1190 		ctx = NULL;
1191 		/* Setup the operations for this inode. */
1192 		ntfs_set_vfs_operations(vi, vi->i_mode, dev);
1193 	}
1194 
1195 	if (NVolSysImmutable(vol) && (ni->flags & FILE_ATTR_SYSTEM) &&
1196 	    !S_ISFIFO(vi->i_mode) && !S_ISSOCK(vi->i_mode) && !S_ISLNK(vi->i_mode))
1197 		vi->i_flags |= S_IMMUTABLE;
1198 
1199 	/*
1200 	 * The number of 512-byte blocks used on disk (for stat). This is in so
1201 	 * far inaccurate as it doesn't account for any named streams or other
1202 	 * special non-resident attributes, but that is how Windows works, too,
1203 	 * so we are at least consistent with Windows, if not entirely
1204 	 * consistent with the Linux Way. Doing it the Linux Way would cause a
1205 	 * significant slowdown as it would involve iterating over all
1206 	 * attributes in the mft record and adding the allocated/compressed
1207 	 * sizes of all non-resident attributes present to give us the Linux
1208 	 * correct size that should go into i_blocks (after division by 512).
1209 	 */
1210 	if (S_ISREG(vi->i_mode) && (NInoCompressed(ni) || NInoSparse(ni)))
1211 		vi->i_blocks = ni->itype.compressed.size >> 9;
1212 	else
1213 		vi->i_blocks = ni->allocated_size >> 9;
1214 
1215 	ntfs_debug("Done.");
1216 	return 0;
1217 unm_err_out:
1218 	if (!err)
1219 		err = -EIO;
1220 	if (ctx)
1221 		ntfs_attr_put_search_ctx(ctx);
1222 	if (m)
1223 		unmap_mft_record(ni);
1224 err_out:
1225 	if (err != -EOPNOTSUPP && err != -ENOMEM && vol_err == true) {
1226 		ntfs_error(vol->sb,
1227 			"Failed with error code %i.  Marking corrupt inode 0x%llx as bad.  Run chkdsk.",
1228 			err, ni->mft_no);
1229 		NVolSetErrors(vol);
1230 	}
1231 	return err;
1232 }
1233 
1234 /*
1235  * ntfs_read_locked_attr_inode - read an attribute inode from its base inode
1236  * @base_vi:	base inode
1237  * @vi:		attribute inode to read
1238  *
1239  * ntfs_read_locked_attr_inode() is called from ntfs_attr_iget() to read the
1240  * attribute inode described by @vi into memory from the base mft record
1241  * described by @base_ni.
1242  *
1243  * ntfs_read_locked_attr_inode() maps, pins and locks the base inode for
1244  * reading and looks up the attribute described by @vi before setting up the
1245  * necessary fields in @vi as well as initializing the ntfs inode.
1246  *
1247  * Q: What locks are held when the function is called?
1248  * A: i_state has I_NEW set, hence the inode is locked, also
1249  *    i_count is set to 1, so it is not going to go away
1250  *
1251  * Return 0 on success and -errno on error.
1252  *
1253  * Note this cannot be called for AT_INDEX_ALLOCATION.
1254  */
1255 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1256 {
1257 	struct ntfs_volume *vol = NTFS_SB(vi->i_sb);
1258 	struct ntfs_inode *ni = NTFS_I(vi), *base_ni = NTFS_I(base_vi);
1259 	struct mft_record *m;
1260 	struct attr_record *a;
1261 	struct ntfs_attr_search_ctx *ctx;
1262 	int err = 0;
1263 
1264 	ntfs_debug("Entering for i_ino 0x%llx.", ni->mft_no);
1265 
1266 	ntfs_init_big_inode(vi);
1267 
1268 	/* Just mirror the values from the base inode. */
1269 	vi->i_uid	= base_vi->i_uid;
1270 	vi->i_gid	= base_vi->i_gid;
1271 	set_nlink(vi, base_vi->i_nlink);
1272 	inode_set_mtime_to_ts(vi, inode_get_mtime(base_vi));
1273 	inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
1274 	inode_set_atime_to_ts(vi, inode_get_atime(base_vi));
1275 	vi->i_generation = ni->seq_no = base_ni->seq_no;
1276 
1277 	/* Set inode type to zero but preserve permissions. */
1278 	vi->i_mode	= base_vi->i_mode & ~S_IFMT;
1279 
1280 	m = map_mft_record(base_ni);
1281 	if (IS_ERR(m)) {
1282 		err = PTR_ERR(m);
1283 		goto err_out;
1284 	}
1285 	ctx = ntfs_attr_get_search_ctx(base_ni, m);
1286 	if (!ctx) {
1287 		err = -ENOMEM;
1288 		goto unm_err_out;
1289 	}
1290 	/* Find the attribute. */
1291 	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1292 			CASE_SENSITIVE, 0, NULL, 0, ctx);
1293 	if (unlikely(err))
1294 		goto unm_err_out;
1295 	a = ctx->attr;
1296 	if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1297 		if (a->flags & ATTR_COMPRESSION_MASK) {
1298 			NInoSetCompressed(ni);
1299 			ni->flags |= FILE_ATTR_COMPRESSED;
1300 			if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
1301 					ni->name_len)) {
1302 				ntfs_error(vi->i_sb,
1303 					   "Found compressed non-data or named data attribute.");
1304 				goto unm_err_out;
1305 			}
1306 			if (vol->cluster_size > 4096) {
1307 				ntfs_error(vi->i_sb,
1308 					"Found compressed attribute but compression is disabled due to cluster size (%i) > 4kiB.",
1309 					vol->cluster_size);
1310 				goto unm_err_out;
1311 			}
1312 			if ((a->flags & ATTR_COMPRESSION_MASK) !=
1313 					ATTR_IS_COMPRESSED) {
1314 				ntfs_error(vi->i_sb, "Found unknown compression method.");
1315 				goto unm_err_out;
1316 			}
1317 		}
1318 		/*
1319 		 * The compressed/sparse flag set in an index root just means
1320 		 * to compress all files.
1321 		 */
1322 		if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1323 			ntfs_error(vi->i_sb,
1324 				"Found mst protected attribute but the attribute is %s.",
1325 				NInoCompressed(ni) ? "compressed" : "sparse");
1326 			goto unm_err_out;
1327 		}
1328 		if (a->flags & ATTR_IS_SPARSE) {
1329 			NInoSetSparse(ni);
1330 			ni->flags |= FILE_ATTR_SPARSE_FILE;
1331 		}
1332 	}
1333 	if (a->flags & ATTR_IS_ENCRYPTED) {
1334 		if (NInoCompressed(ni)) {
1335 			ntfs_error(vi->i_sb, "Found encrypted and compressed data.");
1336 			goto unm_err_out;
1337 		}
1338 		/*
1339 		 * The encryption flag set in an index root just means to
1340 		 * encrypt all files.
1341 		 */
1342 		if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1343 			ntfs_error(vi->i_sb,
1344 				"Found mst protected attribute but the attribute is encrypted.");
1345 			goto unm_err_out;
1346 		}
1347 		if (ni->type != AT_DATA) {
1348 			ntfs_error(vi->i_sb,
1349 				"Found encrypted non-data attribute.");
1350 			goto unm_err_out;
1351 		}
1352 		NInoSetEncrypted(ni);
1353 		ni->flags |= FILE_ATTR_ENCRYPTED;
1354 	}
1355 	if (!a->non_resident) {
1356 		/* Ensure the attribute name is placed before the value. */
1357 		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1358 				le16_to_cpu(a->data.resident.value_offset)))) {
1359 			ntfs_error(vol->sb,
1360 				"Attribute name is placed after the attribute value.");
1361 			goto unm_err_out;
1362 		}
1363 		if (NInoMstProtected(ni)) {
1364 			ntfs_error(vi->i_sb,
1365 				"Found mst protected attribute but the attribute is resident.");
1366 			goto unm_err_out;
1367 		}
1368 		vi->i_size = ni->initialized_size = ni->data_size = le32_to_cpu(
1369 				a->data.resident.value_length);
1370 		ni->allocated_size = le32_to_cpu(a->length) -
1371 				le16_to_cpu(a->data.resident.value_offset);
1372 		if (vi->i_size > ni->allocated_size) {
1373 			ntfs_error(vi->i_sb,
1374 				"Resident attribute is corrupt (size exceeds allocation).");
1375 			goto unm_err_out;
1376 		}
1377 	} else {
1378 		NInoSetNonResident(ni);
1379 		/*
1380 		 * Ensure the attribute name is placed before the mapping pairs
1381 		 * array.
1382 		 */
1383 		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1384 				le16_to_cpu(
1385 				a->data.non_resident.mapping_pairs_offset)))) {
1386 			ntfs_error(vol->sb,
1387 				"Attribute name is placed after the mapping pairs array.");
1388 			goto unm_err_out;
1389 		}
1390 		if (NInoCompressed(ni) || NInoSparse(ni)) {
1391 			if (NInoCompressed(ni) && a->data.non_resident.compression_unit != 4) {
1392 				ntfs_error(vi->i_sb,
1393 					"Found non-standard compression unit (%u instead of 4).  Cannot handle this.",
1394 					a->data.non_resident.compression_unit);
1395 				err = -EOPNOTSUPP;
1396 				goto unm_err_out;
1397 			}
1398 			if (a->data.non_resident.compression_unit) {
1399 				ni->itype.compressed.block_size = 1U <<
1400 						(a->data.non_resident.compression_unit +
1401 						vol->cluster_size_bits);
1402 				ni->itype.compressed.block_size_bits =
1403 						ffs(ni->itype.compressed.block_size) - 1;
1404 				ni->itype.compressed.block_clusters = 1U <<
1405 						a->data.non_resident.compression_unit;
1406 			} else {
1407 				ni->itype.compressed.block_size = 0;
1408 				ni->itype.compressed.block_size_bits = 0;
1409 				ni->itype.compressed.block_clusters = 0;
1410 			}
1411 			ni->itype.compressed.size = le64_to_cpu(
1412 					a->data.non_resident.compressed_size);
1413 		}
1414 		if (a->data.non_resident.lowest_vcn) {
1415 			ntfs_error(vi->i_sb, "First extent of attribute has non-zero lowest_vcn.");
1416 			goto unm_err_out;
1417 		}
1418 		vi->i_size = ni->data_size = le64_to_cpu(a->data.non_resident.data_size);
1419 		ni->initialized_size = le64_to_cpu(a->data.non_resident.initialized_size);
1420 		ni->allocated_size = le64_to_cpu(a->data.non_resident.allocated_size);
1421 	}
1422 	vi->i_mapping->a_ops = &ntfs_aops;
1423 	if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT)
1424 		vi->i_blocks = ni->itype.compressed.size >> 9;
1425 	else
1426 		vi->i_blocks = ni->allocated_size >> 9;
1427 	/*
1428 	 * Make sure the base inode does not go away and attach it to the
1429 	 * attribute inode.
1430 	 */
1431 	if (!igrab(base_vi)) {
1432 		err = -ENOENT;
1433 		goto unm_err_out;
1434 	}
1435 	ni->ext.base_ntfs_ino = base_ni;
1436 	ni->nr_extents = -1;
1437 
1438 	ntfs_attr_put_search_ctx(ctx);
1439 	unmap_mft_record(base_ni);
1440 
1441 	ntfs_debug("Done.");
1442 	return 0;
1443 
1444 unm_err_out:
1445 	if (!err)
1446 		err = -EIO;
1447 	if (ctx)
1448 		ntfs_attr_put_search_ctx(ctx);
1449 	unmap_mft_record(base_ni);
1450 err_out:
1451 	if (err != -ENOENT)
1452 		ntfs_error(vol->sb,
1453 			"Failed with error code %i while reading attribute inode (mft_no 0x%llx, type 0x%x, name_len %i).  Marking corrupt inode and base inode 0x%llx as bad.  Run chkdsk.",
1454 			err, ni->mft_no, ni->type, ni->name_len,
1455 			base_ni->mft_no);
1456 	if (err != -ENOENT && err != -ENOMEM)
1457 		NVolSetErrors(vol);
1458 	return err;
1459 }
1460 
1461 /*
1462  * ntfs_read_locked_index_inode - read an index inode from its base inode
1463  * @base_vi:	base inode
1464  * @vi:		index inode to read
1465  *
1466  * ntfs_read_locked_index_inode() is called from ntfs_index_iget() to read the
1467  * index inode described by @vi into memory from the base mft record described
1468  * by @base_ni.
1469  *
1470  * ntfs_read_locked_index_inode() maps, pins and locks the base inode for
1471  * reading and looks up the attributes relating to the index described by @vi
1472  * before setting up the necessary fields in @vi as well as initializing the
1473  * ntfs inode.
1474  *
1475  * Note, index inodes are essentially attribute inodes (NInoAttr() is true)
1476  * with the attribute type set to AT_INDEX_ALLOCATION.  Apart from that, they
1477  * are setup like directory inodes since directories are a special case of
1478  * indices ao they need to be treated in much the same way.  Most importantly,
1479  * for small indices the index allocation attribute might not actually exist.
1480  * However, the index root attribute always exists but this does not need to
1481  * have an inode associated with it and this is why we define a new inode type
1482  * index.  Also, like for directories, we need to have an attribute inode for
1483  * the bitmap attribute corresponding to the index allocation attribute and we
1484  * can store this in the appropriate field of the inode, just like we do for
1485  * normal directory inodes.
1486  *
1487  * Q: What locks are held when the function is called?
1488  * A: i_state has I_NEW set, hence the inode is locked, also
1489  *    i_count is set to 1, so it is not going to go away
1490  *
1491  * Return 0 on success and -errno on error.
1492  */
1493 static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1494 {
1495 	loff_t bvi_size;
1496 	struct ntfs_volume *vol = NTFS_SB(vi->i_sb);
1497 	struct ntfs_inode *ni = NTFS_I(vi), *base_ni = NTFS_I(base_vi), *bni;
1498 	struct inode *bvi;
1499 	struct mft_record *m;
1500 	struct attr_record *a;
1501 	struct ntfs_attr_search_ctx *ctx;
1502 	struct index_root *ir;
1503 	u8 *ir_end, *index_end;
1504 	int err = 0;
1505 
1506 	ntfs_debug("Entering for i_ino 0x%llx.", ni->mft_no);
1507 	lockdep_assert_held(&base_ni->mrec_lock);
1508 
1509 	ntfs_init_big_inode(vi);
1510 	/* Just mirror the values from the base inode. */
1511 	vi->i_uid	= base_vi->i_uid;
1512 	vi->i_gid	= base_vi->i_gid;
1513 	set_nlink(vi, base_vi->i_nlink);
1514 	inode_set_mtime_to_ts(vi, inode_get_mtime(base_vi));
1515 	inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
1516 	inode_set_atime_to_ts(vi, inode_get_atime(base_vi));
1517 	vi->i_generation = ni->seq_no = base_ni->seq_no;
1518 	/* Set inode type to zero but preserve permissions. */
1519 	vi->i_mode	= base_vi->i_mode & ~S_IFMT;
1520 	/* Map the mft record for the base inode. */
1521 	m = map_mft_record(base_ni);
1522 	if (IS_ERR(m)) {
1523 		err = PTR_ERR(m);
1524 		goto err_out;
1525 	}
1526 	ctx = ntfs_attr_get_search_ctx(base_ni, m);
1527 	if (!ctx) {
1528 		err = -ENOMEM;
1529 		goto unm_err_out;
1530 	}
1531 	/* Find the index root attribute. */
1532 	err = ntfs_attr_lookup(AT_INDEX_ROOT, ni->name, ni->name_len,
1533 			CASE_SENSITIVE, 0, NULL, 0, ctx);
1534 	if (unlikely(err)) {
1535 		if (err == -ENOENT)
1536 			ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is missing.");
1537 		goto unm_err_out;
1538 	}
1539 	a = ctx->attr;
1540 	/* Set up the state. */
1541 	if (unlikely(a->non_resident)) {
1542 		ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident.");
1543 		goto unm_err_out;
1544 	}
1545 	/* Ensure the attribute name is placed before the value. */
1546 	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1547 			le16_to_cpu(a->data.resident.value_offset)))) {
1548 		ntfs_error(vol->sb,
1549 			"$INDEX_ROOT attribute name is placed after the attribute value.");
1550 		goto unm_err_out;
1551 	}
1552 
1553 	ir = (struct index_root *)((u8 *)a + le16_to_cpu(a->data.resident.value_offset));
1554 	ir_end = (u8 *)ir + le32_to_cpu(a->data.resident.value_length);
1555 	if (ir_end > (u8 *)ctx->mrec + vol->mft_record_size) {
1556 		ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt.");
1557 		goto unm_err_out;
1558 	}
1559 	index_end = (u8 *)&ir->index + le32_to_cpu(ir->index.index_length);
1560 	if (index_end > ir_end) {
1561 		ntfs_error(vi->i_sb, "Index is corrupt.");
1562 		goto unm_err_out;
1563 	}
1564 
1565 	ni->itype.index.collation_rule = ir->collation_rule;
1566 	ntfs_debug("Index collation rule is 0x%x.",
1567 			le32_to_cpu(ir->collation_rule));
1568 	ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
1569 	if (!is_power_of_2(ni->itype.index.block_size)) {
1570 		ntfs_error(vi->i_sb, "Index block size (%u) is not a power of two.",
1571 				ni->itype.index.block_size);
1572 		goto unm_err_out;
1573 	}
1574 	if (ni->itype.index.block_size > PAGE_SIZE) {
1575 		ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_SIZE (%ld) is not supported.",
1576 				ni->itype.index.block_size, PAGE_SIZE);
1577 		err = -EOPNOTSUPP;
1578 		goto unm_err_out;
1579 	}
1580 	if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
1581 		ntfs_error(vi->i_sb,
1582 				"Index block size (%u) < NTFS_BLOCK_SIZE (%i) is not supported.",
1583 				ni->itype.index.block_size, NTFS_BLOCK_SIZE);
1584 		err = -EOPNOTSUPP;
1585 		goto unm_err_out;
1586 	}
1587 	ni->itype.index.block_size_bits = ffs(ni->itype.index.block_size) - 1;
1588 	/* Determine the size of a vcn in the index. */
1589 	if (vol->cluster_size <= ni->itype.index.block_size) {
1590 		ni->itype.index.vcn_size = vol->cluster_size;
1591 		ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
1592 	} else {
1593 		ni->itype.index.vcn_size = vol->sector_size;
1594 		ni->itype.index.vcn_size_bits = vol->sector_size_bits;
1595 	}
1596 
1597 	/* Find index allocation attribute. */
1598 	ntfs_attr_reinit_search_ctx(ctx);
1599 	err = ntfs_attr_lookup(AT_INDEX_ALLOCATION, ni->name, ni->name_len,
1600 			CASE_SENSITIVE, 0, NULL, 0, ctx);
1601 	if (unlikely(err)) {
1602 		if (err == -ENOENT) {
1603 			/* No index allocation. */
1604 			vi->i_size = ni->initialized_size = ni->allocated_size = 0;
1605 			/* We are done with the mft record, so we release it. */
1606 			ntfs_attr_put_search_ctx(ctx);
1607 			unmap_mft_record(base_ni);
1608 			m = NULL;
1609 			ctx = NULL;
1610 			goto skip_large_index_stuff;
1611 		} else
1612 			ntfs_error(vi->i_sb, "Failed to lookup $INDEX_ALLOCATION attribute.");
1613 		goto unm_err_out;
1614 	}
1615 	NInoSetIndexAllocPresent(ni);
1616 	NInoSetNonResident(ni);
1617 	ni->type = AT_INDEX_ALLOCATION;
1618 
1619 	a = ctx->attr;
1620 	if (!a->non_resident) {
1621 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is resident.");
1622 		goto unm_err_out;
1623 	}
1624 	/*
1625 	 * Ensure the attribute name is placed before the mapping pairs array.
1626 	 */
1627 	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1628 			le16_to_cpu(a->data.non_resident.mapping_pairs_offset)))) {
1629 		ntfs_error(vol->sb,
1630 			"$INDEX_ALLOCATION attribute name is placed after the mapping pairs array.");
1631 		goto unm_err_out;
1632 	}
1633 	if (a->flags & ATTR_IS_ENCRYPTED) {
1634 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is encrypted.");
1635 		goto unm_err_out;
1636 	}
1637 	if (a->flags & ATTR_IS_SPARSE) {
1638 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse.");
1639 		goto unm_err_out;
1640 	}
1641 	if (a->flags & ATTR_COMPRESSION_MASK) {
1642 		ntfs_error(vi->i_sb,
1643 			"$INDEX_ALLOCATION attribute is compressed.");
1644 		goto unm_err_out;
1645 	}
1646 	if (a->data.non_resident.lowest_vcn) {
1647 		ntfs_error(vi->i_sb,
1648 			"First extent of $INDEX_ALLOCATION attribute has non zero lowest_vcn.");
1649 		goto unm_err_out;
1650 	}
1651 	vi->i_size = ni->data_size = le64_to_cpu(a->data.non_resident.data_size);
1652 	ni->initialized_size = le64_to_cpu(a->data.non_resident.initialized_size);
1653 	ni->allocated_size = le64_to_cpu(a->data.non_resident.allocated_size);
1654 	/*
1655 	 * We are done with the mft record, so we release it.  Otherwise
1656 	 * we would deadlock in ntfs_attr_iget().
1657 	 */
1658 	ntfs_attr_put_search_ctx(ctx);
1659 	unmap_mft_record(base_ni);
1660 	m = NULL;
1661 	ctx = NULL;
1662 	/* Get the index bitmap attribute inode. */
1663 	bvi = ntfs_attr_iget(base_vi, AT_BITMAP, ni->name, ni->name_len);
1664 	if (IS_ERR(bvi)) {
1665 		ntfs_error(vi->i_sb, "Failed to get bitmap attribute.");
1666 		err = PTR_ERR(bvi);
1667 		goto unm_err_out;
1668 	}
1669 	bni = NTFS_I(bvi);
1670 	if (NInoCompressed(bni) || NInoEncrypted(bni) ||
1671 			NInoSparse(bni)) {
1672 		ntfs_error(vi->i_sb,
1673 			"$BITMAP attribute is compressed and/or encrypted and/or sparse.");
1674 		goto iput_unm_err_out;
1675 	}
1676 	/* Consistency check bitmap size vs. index allocation size. */
1677 	bvi_size = i_size_read(bvi);
1678 	if ((bvi_size << 3) < (vi->i_size >> ni->itype.index.block_size_bits)) {
1679 		ntfs_error(vi->i_sb,
1680 			"Index bitmap too small (0x%llx) for index allocation (0x%llx).",
1681 			bvi_size << 3, vi->i_size);
1682 		goto iput_unm_err_out;
1683 	}
1684 	iput(bvi);
1685 skip_large_index_stuff:
1686 	/* Setup the operations for this index inode. */
1687 	ntfs_set_vfs_operations(vi, S_IFDIR, 0);
1688 	vi->i_blocks = ni->allocated_size >> 9;
1689 	/*
1690 	 * Make sure the base inode doesn't go away and attach it to the
1691 	 * index inode.
1692 	 */
1693 	if (!igrab(base_vi))
1694 		goto unm_err_out;
1695 	ni->ext.base_ntfs_ino = base_ni;
1696 	ni->nr_extents = -1;
1697 
1698 	ntfs_debug("Done.");
1699 	return 0;
1700 iput_unm_err_out:
1701 	iput(bvi);
1702 unm_err_out:
1703 	if (!err)
1704 		err = -EIO;
1705 	if (ctx)
1706 		ntfs_attr_put_search_ctx(ctx);
1707 	if (m)
1708 		unmap_mft_record(base_ni);
1709 err_out:
1710 	ntfs_error(vi->i_sb,
1711 		"Failed with error code %i while reading index inode (mft_no 0x%llx, name_len %i.",
1712 		err, ni->mft_no, ni->name_len);
1713 	if (err != -EOPNOTSUPP && err != -ENOMEM)
1714 		NVolSetErrors(vol);
1715 	return err;
1716 }
1717 
1718 /*
1719  * load_attribute_list_mount - load an attribute list into memory
1720  * @vol:		ntfs volume from which to read
1721  * @rl:			runlist of the attribute list
1722  * @al_start:		destination buffer
1723  * @size:		size of the destination buffer in bytes
1724  * @initialized_size:	initialized size of the attribute list
1725  *
1726  * Walk the runlist @rl and load all clusters from it copying them into
1727  * the linear buffer @al. The maximum number of bytes copied to @al is @size
1728  * bytes. Note, @size does not need to be a multiple of the cluster size. If
1729  * @initialized_size is less than @size, the region in @al between
1730  * @initialized_size and @size will be zeroed and not read from disk.
1731  *
1732  * Return 0 on success or -errno on error.
1733  */
1734 static int load_attribute_list_mount(struct ntfs_volume *vol,
1735 		struct runlist_element *rl, u8 *al_start, const s64 size,
1736 		const s64 initialized_size)
1737 {
1738 	s64 lcn;
1739 	u8 *al = al_start;
1740 	u8 *al_end = al + initialized_size;
1741 	struct super_block *sb;
1742 	int err = 0;
1743 	loff_t rl_byte_off, rl_byte_len;
1744 
1745 	ntfs_debug("Entering.");
1746 	if (!vol || !rl || !al || size <= 0 || initialized_size < 0 ||
1747 			initialized_size > size)
1748 		return -EINVAL;
1749 	if (!initialized_size) {
1750 		memset(al, 0, size);
1751 		return 0;
1752 	}
1753 	sb = vol->sb;
1754 
1755 	/* Read all clusters specified by the runlist one run at a time. */
1756 	while (rl->length) {
1757 		lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn);
1758 		ntfs_debug("Reading vcn = 0x%llx, lcn = 0x%llx.",
1759 				(unsigned long long)rl->vcn,
1760 				(unsigned long long)lcn);
1761 		/* The attribute list cannot be sparse. */
1762 		if (lcn < 0) {
1763 			ntfs_error(sb, "ntfs_rl_vcn_to_lcn() failed. Cannot read attribute list.");
1764 			goto err_out;
1765 		}
1766 
1767 		rl_byte_off = ntfs_cluster_to_bytes(vol, lcn);
1768 		rl_byte_len = ntfs_cluster_to_bytes(vol, rl->length);
1769 
1770 		if (al + rl_byte_len > al_end)
1771 			rl_byte_len = al_end - al;
1772 
1773 		err = ntfs_bdev_read(sb->s_bdev, al, rl_byte_off,
1774 				   round_up(rl_byte_len, SECTOR_SIZE));
1775 		if (err) {
1776 			ntfs_error(sb, "Cannot read attribute list.");
1777 			goto err_out;
1778 		}
1779 
1780 		if (al + rl_byte_len >= al_end) {
1781 			if (initialized_size < size)
1782 				goto initialize;
1783 			goto done;
1784 		}
1785 
1786 		al += rl_byte_len;
1787 		rl++;
1788 	}
1789 	if (initialized_size < size) {
1790 initialize:
1791 		memset(al_start + initialized_size, 0, size - initialized_size);
1792 	}
1793 done:
1794 	return err;
1795 	/* Real overflow! */
1796 	ntfs_error(sb, "Attribute list buffer overflow. Read attribute list is truncated.");
1797 err_out:
1798 	err = -EIO;
1799 	goto done;
1800 }
1801 
1802 /*
1803  * The MFT inode has special locking, so teach the lock validator
1804  * about this by splitting off the locking rules of the MFT from
1805  * the locking rules of other inodes. The MFT inode can never be
1806  * accessed from the VFS side (or even internally), only by the
1807  * map_mft functions.
1808  */
1809 static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key;
1810 
1811 /*
1812  * ntfs_read_inode_mount - special read_inode for mount time use only
1813  * @vi:		inode to read
1814  *
1815  * Read inode FILE_MFT at mount time, only called with super_block lock
1816  * held from within the read_super() code path.
1817  *
1818  * This function exists because when it is called the page cache for $MFT/$DATA
1819  * is not initialized and hence we cannot get at the contents of mft records
1820  * by calling map_mft_record*().
1821  *
1822  * Further it needs to cope with the circular references problem, i.e. cannot
1823  * load any attributes other than $ATTRIBUTE_LIST until $DATA is loaded, because
1824  * we do not know where the other extent mft records are yet and again, because
1825  * we cannot call map_mft_record*() yet.  Obviously this applies only when an
1826  * attribute list is actually present in $MFT inode.
1827  *
1828  * We solve these problems by starting with the $DATA attribute before anything
1829  * else and iterating using ntfs_attr_lookup($DATA) over all extents.  As each
1830  * extent is found, we ntfs_mapping_pairs_decompress() including the implied
1831  * ntfs_runlists_merge().  Each step of the iteration necessarily provides
1832  * sufficient information for the next step to complete.
1833  *
1834  * This should work but there are two possible pit falls (see inline comments
1835  * below), but only time will tell if they are real pits or just smoke...
1836  */
1837 int ntfs_read_inode_mount(struct inode *vi)
1838 {
1839 	s64 next_vcn, last_vcn, highest_vcn;
1840 	struct super_block *sb = vi->i_sb;
1841 	struct ntfs_volume *vol = NTFS_SB(sb);
1842 	struct ntfs_inode *ni = NTFS_I(vi);
1843 	struct mft_record *m = NULL;
1844 	struct attr_record *a;
1845 	struct ntfs_attr_search_ctx *ctx;
1846 	unsigned int i, nr_blocks;
1847 	int err;
1848 	size_t new_rl_count;
1849 
1850 	ntfs_debug("Entering.");
1851 
1852 	/* Initialize the ntfs specific part of @vi. */
1853 	ntfs_init_big_inode(vi);
1854 
1855 
1856 	/* Setup the data attribute. It is special as it is mst protected. */
1857 	NInoSetNonResident(ni);
1858 	NInoSetMstProtected(ni);
1859 	NInoSetSparseDisabled(ni);
1860 	ni->type = AT_DATA;
1861 	ni->name = AT_UNNAMED;
1862 	ni->name_len = 0;
1863 	/*
1864 	 * This sets up our little cheat allowing us to reuse the async read io
1865 	 * completion handler for directories.
1866 	 */
1867 	ni->itype.index.block_size = vol->mft_record_size;
1868 	ni->itype.index.block_size_bits = vol->mft_record_size_bits;
1869 
1870 	/* Very important! Needed to be able to call map_mft_record*(). */
1871 	vol->mft_ino = vi;
1872 
1873 	/* Allocate enough memory to read the first mft record. */
1874 	if (vol->mft_record_size > 64 * 1024) {
1875 		ntfs_error(sb, "Unsupported mft record size %i (max 64kiB).",
1876 				vol->mft_record_size);
1877 		goto err_out;
1878 	}
1879 
1880 	i = vol->mft_record_size;
1881 	if (i < sb->s_blocksize)
1882 		i = sb->s_blocksize;
1883 
1884 	m = kzalloc(i, GFP_NOFS);
1885 	if (!m) {
1886 		ntfs_error(sb, "Failed to allocate buffer for $MFT record 0.");
1887 		goto err_out;
1888 	}
1889 
1890 	/* Determine the first block of the $MFT/$DATA attribute. */
1891 	nr_blocks = ntfs_bytes_to_sector(vol, vol->mft_record_size);
1892 	if (!nr_blocks)
1893 		nr_blocks = 1;
1894 
1895 	/* Load $MFT/$DATA's first mft record. */
1896 	err = ntfs_bdev_read(sb->s_bdev, (char *)m,
1897 			     ntfs_cluster_to_bytes(vol, vol->mft_lcn), i);
1898 	if (err) {
1899 		ntfs_error(sb, "Device read failed.");
1900 		goto err_out;
1901 	}
1902 
1903 	if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
1904 		ntfs_error(sb, "Incorrect mft record size %u in superblock, should be %u.",
1905 				le32_to_cpu(m->bytes_allocated), vol->mft_record_size);
1906 		goto err_out;
1907 	}
1908 
1909 	/* Apply the mst fixups. */
1910 	if (post_read_mst_fixup((struct ntfs_record *)m, vol->mft_record_size)) {
1911 		ntfs_error(sb, "MST fixup failed. $MFT is corrupt.");
1912 		goto err_out;
1913 	}
1914 
1915 	if (ntfs_mft_record_check(vol, m, FILE_MFT)) {
1916 		ntfs_error(sb, "ntfs_mft_record_check failed. $MFT is corrupt.");
1917 		goto err_out;
1918 	}
1919 
1920 	/* Need this to sanity check attribute list references to $MFT. */
1921 	vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
1922 
1923 	/* Provides read_folio() for map_mft_record(). */
1924 	vi->i_mapping->a_ops = &ntfs_mft_aops;
1925 
1926 	ctx = ntfs_attr_get_search_ctx(ni, m);
1927 	if (!ctx) {
1928 		err = -ENOMEM;
1929 		goto err_out;
1930 	}
1931 
1932 	/* Find the attribute list attribute if present. */
1933 	err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
1934 	if (err) {
1935 		if (unlikely(err != -ENOENT)) {
1936 			ntfs_error(sb,
1937 				"Failed to lookup attribute list attribute. You should run chkdsk.");
1938 			goto put_err_out;
1939 		}
1940 	} else /* if (!err) */ {
1941 		struct attr_list_entry *al_entry, *next_al_entry;
1942 		u8 *al_end;
1943 		static const char *es = "  Not allowed.  $MFT is corrupt.  You should run chkdsk.";
1944 
1945 		ntfs_debug("Attribute list attribute found in $MFT.");
1946 		NInoSetAttrList(ni);
1947 		a = ctx->attr;
1948 		if (a->flags & ATTR_COMPRESSION_MASK) {
1949 			ntfs_error(sb,
1950 				"Attribute list attribute is compressed.%s",
1951 				es);
1952 			goto put_err_out;
1953 		}
1954 		if (a->flags & ATTR_IS_ENCRYPTED ||
1955 				a->flags & ATTR_IS_SPARSE) {
1956 			if (a->non_resident) {
1957 				ntfs_error(sb,
1958 					"Non-resident attribute list attribute is encrypted/sparse.%s",
1959 					es);
1960 				goto put_err_out;
1961 			}
1962 			ntfs_warning(sb,
1963 				"Resident attribute list attribute in $MFT system file is marked encrypted/sparse which is not true.  However, Windows allows this and chkdsk does not detect or correct it so we will just ignore the invalid flags and pretend they are not set.");
1964 		}
1965 		/* Now allocate memory for the attribute list. */
1966 		ni->attr_list_size = (u32)ntfs_attr_size(a);
1967 		if (!ni->attr_list_size) {
1968 			ntfs_error(sb, "Attr_list_size is zero");
1969 			goto put_err_out;
1970 		}
1971 		ni->attr_list = kvzalloc(round_up(ni->attr_list_size, SECTOR_SIZE),
1972 					 GFP_NOFS);
1973 		if (!ni->attr_list) {
1974 			ntfs_error(sb, "Not enough memory to allocate buffer for attribute list.");
1975 			goto put_err_out;
1976 		}
1977 		if (a->non_resident) {
1978 			struct runlist_element *rl;
1979 			size_t new_rl_count;
1980 
1981 			NInoSetAttrListNonResident(ni);
1982 			if (a->data.non_resident.lowest_vcn) {
1983 				ntfs_error(sb,
1984 					"Attribute list has non zero lowest_vcn. $MFT is corrupt. You should run chkdsk.");
1985 				goto put_err_out;
1986 			}
1987 
1988 			rl = ntfs_mapping_pairs_decompress(vol, a, NULL, &new_rl_count);
1989 			if (IS_ERR(rl)) {
1990 				err = PTR_ERR(rl);
1991 				ntfs_error(sb,
1992 					   "Mapping pairs decompression failed with error code %i.",
1993 					   -err);
1994 				goto put_err_out;
1995 			}
1996 
1997 			err = load_attribute_list_mount(vol, rl, ni->attr_list, ni->attr_list_size,
1998 					le64_to_cpu(a->data.non_resident.initialized_size));
1999 			kvfree(rl);
2000 			if (err) {
2001 				ntfs_error(sb,
2002 					   "Failed to load attribute list with error code %i.",
2003 					   -err);
2004 				goto put_err_out;
2005 			}
2006 		} else /* if (!ctx.attr->non_resident) */ {
2007 			if ((u8 *)a + le16_to_cpu(
2008 					a->data.resident.value_offset) +
2009 					le32_to_cpu(a->data.resident.value_length) >
2010 					(u8 *)ctx->mrec + vol->mft_record_size) {
2011 				ntfs_error(sb, "Corrupt attribute list attribute.");
2012 				goto put_err_out;
2013 			}
2014 			/* Now copy the attribute list. */
2015 			memcpy(ni->attr_list, (u8 *)a + le16_to_cpu(
2016 					a->data.resident.value_offset),
2017 					le32_to_cpu(a->data.resident.value_length));
2018 		}
2019 		/* The attribute list is now setup in memory. */
2020 		al_entry = (struct attr_list_entry *)ni->attr_list;
2021 		al_end = (u8 *)al_entry + ni->attr_list_size;
2022 		for (;; al_entry = next_al_entry) {
2023 			/* Out of bounds check. */
2024 			if ((u8 *)al_entry < ni->attr_list ||
2025 					(u8 *)al_entry > al_end)
2026 				goto em_put_err_out;
2027 			/* Catch the end of the attribute list. */
2028 			if ((u8 *)al_entry == al_end)
2029 				goto em_put_err_out;
2030 			if (!al_entry->length)
2031 				goto em_put_err_out;
2032 			if ((u8 *)al_entry + 6 > al_end ||
2033 			    (u8 *)al_entry + le16_to_cpu(al_entry->length) > al_end)
2034 				goto em_put_err_out;
2035 			next_al_entry = (struct attr_list_entry *)((u8 *)al_entry +
2036 					le16_to_cpu(al_entry->length));
2037 			if (le32_to_cpu(al_entry->type) > le32_to_cpu(AT_DATA))
2038 				goto em_put_err_out;
2039 			if (al_entry->type != AT_DATA)
2040 				continue;
2041 			/* We want an unnamed attribute. */
2042 			if (al_entry->name_length)
2043 				goto em_put_err_out;
2044 			/* Want the first entry, i.e. lowest_vcn == 0. */
2045 			if (al_entry->lowest_vcn)
2046 				goto em_put_err_out;
2047 			/* First entry has to be in the base mft record. */
2048 			if (MREF_LE(al_entry->mft_reference) != vi->i_ino) {
2049 				/* MFT references do not match, logic fails. */
2050 				ntfs_error(sb,
2051 					"BUG: The first $DATA extent of $MFT is not in the base mft record.");
2052 				goto put_err_out;
2053 			} else {
2054 				/* Sequence numbers must match. */
2055 				if (MSEQNO_LE(al_entry->mft_reference) !=
2056 						ni->seq_no)
2057 					goto em_put_err_out;
2058 				/* Got it. All is ok. We can stop now. */
2059 				break;
2060 			}
2061 		}
2062 	}
2063 
2064 	ntfs_attr_reinit_search_ctx(ctx);
2065 
2066 	/* Now load all attribute extents. */
2067 	a = NULL;
2068 	next_vcn = last_vcn = highest_vcn = 0;
2069 	while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0,
2070 			ctx))) {
2071 		struct runlist_element *nrl;
2072 
2073 		/* Cache the current attribute. */
2074 		a = ctx->attr;
2075 		/* $MFT must be non-resident. */
2076 		if (!a->non_resident) {
2077 			ntfs_error(sb,
2078 				"$MFT must be non-resident but a resident extent was found. $MFT is corrupt. Run chkdsk.");
2079 			goto put_err_out;
2080 		}
2081 		/* $MFT must be uncompressed and unencrypted. */
2082 		if (a->flags & ATTR_COMPRESSION_MASK ||
2083 				a->flags & ATTR_IS_ENCRYPTED ||
2084 				a->flags & ATTR_IS_SPARSE) {
2085 			ntfs_error(sb,
2086 				"$MFT must be uncompressed, non-sparse, and unencrypted but a compressed/sparse/encrypted extent was found. $MFT is corrupt. Run chkdsk.");
2087 			goto put_err_out;
2088 		}
2089 		/*
2090 		 * Decompress the mapping pairs array of this extent and merge
2091 		 * the result into the existing runlist. No need for locking
2092 		 * as we have exclusive access to the inode at this time and we
2093 		 * are a mount in progress task, too.
2094 		 */
2095 		nrl = ntfs_mapping_pairs_decompress(vol, a, &ni->runlist,
2096 						    &new_rl_count);
2097 		if (IS_ERR(nrl)) {
2098 			ntfs_error(sb,
2099 				"ntfs_mapping_pairs_decompress() failed with error code %ld.",
2100 				PTR_ERR(nrl));
2101 			goto put_err_out;
2102 		}
2103 		ni->runlist.rl = nrl;
2104 		ni->runlist.count = new_rl_count;
2105 
2106 		/* Are we in the first extent? */
2107 		if (!next_vcn) {
2108 			if (a->data.non_resident.lowest_vcn) {
2109 				ntfs_error(sb,
2110 					"First extent of $DATA attribute has non zero lowest_vcn. $MFT is corrupt. You should run chkdsk.");
2111 				goto put_err_out;
2112 			}
2113 			/* Get the last vcn in the $DATA attribute. */
2114 			last_vcn = ntfs_bytes_to_cluster(vol,
2115 					le64_to_cpu(a->data.non_resident.allocated_size));
2116 			/* Fill in the inode size. */
2117 			vi->i_size = le64_to_cpu(a->data.non_resident.data_size);
2118 			ni->initialized_size = le64_to_cpu(a->data.non_resident.initialized_size);
2119 			ni->allocated_size = le64_to_cpu(a->data.non_resident.allocated_size);
2120 			/*
2121 			 * Verify the number of mft records does not exceed
2122 			 * 2^32 - 1.
2123 			 */
2124 			if ((vi->i_size >> vol->mft_record_size_bits) >=
2125 					(1ULL << 32)) {
2126 				ntfs_error(sb, "$MFT is too big! Aborting.");
2127 				goto put_err_out;
2128 			}
2129 			/*
2130 			 * We have got the first extent of the runlist for
2131 			 * $MFT which means it is now relatively safe to call
2132 			 * the normal ntfs_read_inode() function.
2133 			 * Complete reading the inode, this will actually
2134 			 * re-read the mft record for $MFT, this time entering
2135 			 * it into the page cache with which we complete the
2136 			 * kick start of the volume. It should be safe to do
2137 			 * this now as the first extent of $MFT/$DATA is
2138 			 * already known and we would hope that we don't need
2139 			 * further extents in order to find the other
2140 			 * attributes belonging to $MFT. Only time will tell if
2141 			 * this is really the case. If not we will have to play
2142 			 * magic at this point, possibly duplicating a lot of
2143 			 * ntfs_read_inode() at this point. We will need to
2144 			 * ensure we do enough of its work to be able to call
2145 			 * ntfs_read_inode() on extents of $MFT/$DATA. But lets
2146 			 * hope this never happens...
2147 			 */
2148 			err = ntfs_read_locked_inode(vi);
2149 			if (err) {
2150 				ntfs_error(sb, "ntfs_read_inode() of $MFT failed.\n");
2151 				ntfs_attr_put_search_ctx(ctx);
2152 				/* Revert to the safe super operations. */
2153 				kfree(m);
2154 				return -1;
2155 			}
2156 			/*
2157 			 * Re-initialize some specifics about $MFT's inode as
2158 			 * ntfs_read_inode() will have set up the default ones.
2159 			 */
2160 			/* Set uid and gid to root. */
2161 			vi->i_uid = GLOBAL_ROOT_UID;
2162 			vi->i_gid = GLOBAL_ROOT_GID;
2163 			/* Regular file. No access for anyone. */
2164 			vi->i_mode = S_IFREG;
2165 			/* No VFS initiated operations allowed for $MFT. */
2166 			vi->i_op = &ntfs_empty_inode_ops;
2167 			vi->i_fop = &ntfs_empty_file_ops;
2168 		}
2169 
2170 		/* Get the lowest vcn for the next extent. */
2171 		highest_vcn = le64_to_cpu(a->data.non_resident.highest_vcn);
2172 		next_vcn = highest_vcn + 1;
2173 
2174 		/* Only one extent or error, which we catch below. */
2175 		if (next_vcn <= 0)
2176 			break;
2177 
2178 		/* Avoid endless loops due to corruption. */
2179 		if (next_vcn < le64_to_cpu(a->data.non_resident.lowest_vcn)) {
2180 			ntfs_error(sb, "$MFT has corrupt attribute list attribute. Run chkdsk.");
2181 			goto put_err_out;
2182 		}
2183 	}
2184 	if (err != -ENOENT) {
2185 		ntfs_error(sb, "Failed to lookup $MFT/$DATA attribute extent. Run chkdsk.\n");
2186 		goto put_err_out;
2187 	}
2188 	if (!a) {
2189 		ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is corrupt. Run chkdsk.");
2190 		goto put_err_out;
2191 	}
2192 	if (highest_vcn && highest_vcn != last_vcn - 1) {
2193 		ntfs_error(sb, "Failed to load the complete runlist for $MFT/$DATA. Run chkdsk.");
2194 		ntfs_debug("highest_vcn = 0x%llx, last_vcn - 1 = 0x%llx",
2195 				(unsigned long long)highest_vcn,
2196 				(unsigned long long)last_vcn - 1);
2197 		goto put_err_out;
2198 	}
2199 	ntfs_attr_put_search_ctx(ctx);
2200 	ntfs_debug("Done.");
2201 	kfree(m);
2202 
2203 	/*
2204 	 * Split the locking rules of the MFT inode from the
2205 	 * locking rules of other inodes:
2206 	 */
2207 	lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key);
2208 	lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key);
2209 
2210 	return 0;
2211 
2212 em_put_err_out:
2213 	ntfs_error(sb,
2214 		"Couldn't find first extent of $DATA attribute in attribute list. $MFT is corrupt. Run chkdsk.");
2215 put_err_out:
2216 	ntfs_attr_put_search_ctx(ctx);
2217 err_out:
2218 	ntfs_error(sb, "Failed. Marking inode as bad.");
2219 	kfree(m);
2220 	return -1;
2221 }
2222 
2223 static void __ntfs_clear_inode(struct ntfs_inode *ni)
2224 {
2225 	/* Free all alocated memory. */
2226 	if (NInoNonResident(ni) && ni->runlist.rl) {
2227 		kvfree(ni->runlist.rl);
2228 		ni->runlist.rl = NULL;
2229 	}
2230 
2231 	if (ni->attr_list) {
2232 		kvfree(ni->attr_list);
2233 		ni->attr_list = NULL;
2234 	}
2235 
2236 	if (ni->name_len && ni->name != I30 &&
2237 	    ni->name != reparse_index_name &&
2238 	    ni->name != objid_index_name) {
2239 		WARN_ON(!ni->name);
2240 		kfree(ni->name);
2241 	}
2242 }
2243 
2244 void ntfs_clear_extent_inode(struct ntfs_inode *ni)
2245 {
2246 	ntfs_debug("Entering for inode 0x%llx.", ni->mft_no);
2247 
2248 	WARN_ON(NInoAttr(ni));
2249 	WARN_ON(ni->nr_extents != -1);
2250 
2251 	__ntfs_clear_inode(ni);
2252 	ntfs_destroy_extent_inode(ni);
2253 }
2254 
2255 static int ntfs_delete_base_inode(struct ntfs_inode *ni)
2256 {
2257 	struct super_block *sb = ni->vol->sb;
2258 	int err;
2259 
2260 	if (NInoAttr(ni) || ni->nr_extents == -1)
2261 		return 0;
2262 
2263 	err = ntfs_non_resident_dealloc_clusters(ni);
2264 
2265 	/*
2266 	 * Deallocate extent mft records and free extent inodes.
2267 	 * No need to lock as no one else has a reference.
2268 	 */
2269 	while (ni->nr_extents) {
2270 		err = ntfs_mft_record_free(ni->vol, *(ni->ext.extent_ntfs_inos));
2271 		if (err)
2272 			ntfs_error(sb,
2273 				"Failed to free extent MFT record. Leaving inconsistent metadata.\n");
2274 		ntfs_inode_close(*(ni->ext.extent_ntfs_inos));
2275 	}
2276 
2277 	/* Deallocate base mft record */
2278 	err = ntfs_mft_record_free(ni->vol, ni);
2279 	if (err)
2280 		ntfs_error(sb, "Failed to free base MFT record. Leaving inconsistent metadata.\n");
2281 	return err;
2282 }
2283 
2284 /*
2285  * ntfs_evict_big_inode - clean up the ntfs specific part of an inode
2286  * @vi:		vfs inode pending annihilation
2287  *
2288  * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
2289  * is called, which deallocates all memory belonging to the NTFS specific part
2290  * of the inode and returns.
2291  *
2292  * If the MFT record is dirty, we commit it before doing anything else.
2293  */
2294 void ntfs_evict_big_inode(struct inode *vi)
2295 {
2296 	struct ntfs_inode *ni = NTFS_I(vi);
2297 
2298 	truncate_inode_pages_final(&vi->i_data);
2299 
2300 	if (!vi->i_nlink) {
2301 		if (!NInoAttr(ni)) {
2302 			/* Never called with extent inodes */
2303 			WARN_ON(ni->nr_extents == -1);
2304 			ntfs_delete_base_inode(ni);
2305 		}
2306 		goto release;
2307 	}
2308 
2309 	if (NInoDirty(ni)) {
2310 		/* Committing the inode also commits all extent inodes. */
2311 		ntfs_commit_inode(vi);
2312 
2313 		if (NInoDirty(ni)) {
2314 			ntfs_debug("Failed to commit dirty inode 0x%llx.  Losing data!",
2315 				   ni->mft_no);
2316 			NInoClearAttrListDirty(ni);
2317 			NInoClearDirty(ni);
2318 		}
2319 	}
2320 
2321 	/* No need to lock at this stage as no one else has a reference. */
2322 	if (ni->nr_extents > 0) {
2323 		int i;
2324 
2325 		for (i = 0; i < ni->nr_extents; i++) {
2326 			if (ni->ext.extent_ntfs_inos[i])
2327 				ntfs_clear_extent_inode(ni->ext.extent_ntfs_inos[i]);
2328 		}
2329 		ni->nr_extents = 0;
2330 		kvfree(ni->ext.extent_ntfs_inos);
2331 	}
2332 
2333 release:
2334 	clear_inode(vi);
2335 	__ntfs_clear_inode(ni);
2336 
2337 	if (NInoAttr(ni)) {
2338 		/* Release the base inode if we are holding it. */
2339 		if (ni->nr_extents == -1) {
2340 			iput(VFS_I(ni->ext.base_ntfs_ino));
2341 			ni->nr_extents = 0;
2342 			ni->ext.base_ntfs_ino = NULL;
2343 		}
2344 	}
2345 
2346 	if (!atomic_dec_and_test(&ni->count))
2347 		WARN_ON(1);
2348 	if (ni->folio)
2349 		folio_put(ni->folio);
2350 	kfree(ni->mrec);
2351 	kvfree(ni->target);
2352 }
2353 
2354 /*
2355  * ntfs_show_options - show mount options in /proc/mounts
2356  * @sf:		seq_file in which to write our mount options
2357  * @root:	root of the mounted tree whose mount options to display
2358  *
2359  * Called by the VFS once for each mounted ntfs volume when someone reads
2360  * /proc/mounts in order to display the NTFS specific mount options of each
2361  * mount. The mount options of fs specified by @root are written to the seq file
2362  * @sf and success is returned.
2363  */
2364 int ntfs_show_options(struct seq_file *sf, struct dentry *root)
2365 {
2366 	struct ntfs_volume *vol = NTFS_SB(root->d_sb);
2367 	int i;
2368 
2369 	if (uid_valid(vol->uid))
2370 		seq_printf(sf, ",uid=%i", from_kuid_munged(&init_user_ns, vol->uid));
2371 	if (gid_valid(vol->gid))
2372 		seq_printf(sf, ",gid=%i", from_kgid_munged(&init_user_ns, vol->gid));
2373 	if (vol->fmask == vol->dmask)
2374 		seq_printf(sf, ",umask=0%o", vol->fmask);
2375 	else {
2376 		seq_printf(sf, ",fmask=0%o", vol->fmask);
2377 		seq_printf(sf, ",dmask=0%o", vol->dmask);
2378 	}
2379 	seq_printf(sf, ",iocharset=%s", vol->nls_map->charset);
2380 	if (NVolCaseSensitive(vol))
2381 		seq_puts(sf, ",case_sensitive");
2382 	else
2383 		seq_puts(sf, ",nocase");
2384 	if (NVolShowSystemFiles(vol))
2385 		seq_puts(sf, ",show_sys_files,showmeta");
2386 	for (i = 0; on_errors_arr[i].val; i++) {
2387 		if (on_errors_arr[i].val == vol->on_errors)
2388 			seq_printf(sf, ",errors=%s", on_errors_arr[i].str);
2389 	}
2390 	seq_printf(sf, ",mft_zone_multiplier=%i", vol->mft_zone_multiplier);
2391 	if (NVolSysImmutable(vol))
2392 		seq_puts(sf, ",sys_immutable");
2393 	if (!NVolShowHiddenFiles(vol))
2394 		seq_puts(sf, ",nohidden");
2395 	if (NVolHideDotFiles(vol))
2396 		seq_puts(sf, ",hide_dot_files");
2397 	if (NVolCheckWindowsNames(vol))
2398 		seq_puts(sf, ",windows_names");
2399 	if (NVolDiscard(vol))
2400 		seq_puts(sf, ",discard");
2401 	if (NVolDisableSparse(vol))
2402 		seq_puts(sf, ",disable_sparse");
2403 	if (vol->sb->s_flags & SB_POSIXACL)
2404 		seq_puts(sf, ",acl");
2405 	return 0;
2406 }
2407 
2408 int ntfs_extend_initialized_size(struct inode *vi, const loff_t offset,
2409 				 const loff_t new_size, bool bsync)
2410 {
2411 	struct ntfs_inode *ni = NTFS_I(vi);
2412 	loff_t old_init_size;
2413 	unsigned long flags;
2414 	int err;
2415 
2416 	read_lock_irqsave(&ni->size_lock, flags);
2417 	old_init_size = ni->initialized_size;
2418 	read_unlock_irqrestore(&ni->size_lock, flags);
2419 
2420 	if (!NInoNonResident(ni))
2421 		return -EINVAL;
2422 	if (old_init_size >= new_size)
2423 		return 0;
2424 
2425 	err = ntfs_attr_map_whole_runlist(ni);
2426 	if (err)
2427 		return err;
2428 
2429 	if (!NInoCompressed(ni) && old_init_size < offset) {
2430 		err = iomap_zero_range(vi, old_init_size,
2431 				       offset - old_init_size,
2432 				       NULL, &ntfs_seek_iomap_ops,
2433 				       &ntfs_iomap_folio_ops, NULL);
2434 		if (err)
2435 			return err;
2436 		if (bsync)
2437 			err = filemap_write_and_wait_range(vi->i_mapping,
2438 							   old_init_size,
2439 							   offset - 1);
2440 	}
2441 
2442 
2443 	mutex_lock(&ni->mrec_lock);
2444 	err = ntfs_attr_set_initialized_size(ni, new_size);
2445 	mutex_unlock(&ni->mrec_lock);
2446 	if (err)
2447 		truncate_setsize(vi, old_init_size);
2448 	return err;
2449 }
2450 
2451 int ntfs_truncate_vfs(struct inode *vi, loff_t new_size, loff_t i_size)
2452 {
2453 	struct ntfs_inode *ni = NTFS_I(vi);
2454 	int err;
2455 
2456 	mutex_lock(&ni->mrec_lock);
2457 	err = __ntfs_attr_truncate_vfs(ni, new_size, i_size);
2458 	mutex_unlock(&ni->mrec_lock);
2459 	if (err < 0)
2460 		return err;
2461 
2462 	inode_set_mtime_to_ts(vi, inode_set_ctime_current(vi));
2463 	return 0;
2464 }
2465 
2466 /*
2467  * ntfs_inode_sync_standard_information - update standard information attribute
2468  * @vi:	inode to update standard information
2469  * @m:	mft record
2470  *
2471  * Return 0 on success or -errno on error.
2472  */
2473 static int ntfs_inode_sync_standard_information(struct inode *vi, struct mft_record *m)
2474 {
2475 	struct ntfs_inode *ni = NTFS_I(vi);
2476 	struct ntfs_attr_search_ctx *ctx;
2477 	struct standard_information *si;
2478 	__le64 nt;
2479 	int err = 0;
2480 	bool modified = false;
2481 
2482 	/* Update the access times in the standard information attribute. */
2483 	ctx = ntfs_attr_get_search_ctx(ni, m);
2484 	if (unlikely(!ctx))
2485 		return -ENOMEM;
2486 	err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0,
2487 			CASE_SENSITIVE, 0, NULL, 0, ctx);
2488 	if (unlikely(err)) {
2489 		ntfs_attr_put_search_ctx(ctx);
2490 		return err;
2491 	}
2492 	si = (struct standard_information *)((u8 *)ctx->attr +
2493 			le16_to_cpu(ctx->attr->data.resident.value_offset));
2494 	if (si->file_attributes != ni->flags) {
2495 		si->file_attributes = ni->flags;
2496 		modified = true;
2497 	}
2498 
2499 	/* Update the creation times if they have changed. */
2500 	nt = utc2ntfs(ni->i_crtime);
2501 	if (si->creation_time != nt) {
2502 		ntfs_debug("Updating creation time for inode 0x%llx: old = 0x%llx, new = 0x%llx",
2503 				ni->mft_no, le64_to_cpu(si->creation_time),
2504 				le64_to_cpu(nt));
2505 		si->creation_time = nt;
2506 		modified = true;
2507 	}
2508 
2509 	/* Update the access times if they have changed. */
2510 	nt = utc2ntfs(inode_get_mtime(vi));
2511 	if (si->last_data_change_time != nt) {
2512 		ntfs_debug("Updating mtime for inode 0x%llx: old = 0x%llx, new = 0x%llx",
2513 				ni->mft_no, le64_to_cpu(si->last_data_change_time),
2514 				le64_to_cpu(nt));
2515 		si->last_data_change_time = nt;
2516 		modified = true;
2517 	}
2518 
2519 	nt = utc2ntfs(inode_get_ctime(vi));
2520 	if (si->last_mft_change_time != nt) {
2521 		ntfs_debug("Updating ctime for inode 0x%llx: old = 0x%llx, new = 0x%llx",
2522 				ni->mft_no, le64_to_cpu(si->last_mft_change_time),
2523 				le64_to_cpu(nt));
2524 		si->last_mft_change_time = nt;
2525 		modified = true;
2526 	}
2527 	nt = utc2ntfs(inode_get_atime(vi));
2528 	if (si->last_access_time != nt) {
2529 		ntfs_debug("Updating atime for inode 0x%llx: old = 0x%llx, new = 0x%llx",
2530 				ni->mft_no,
2531 				le64_to_cpu(si->last_access_time),
2532 				le64_to_cpu(nt));
2533 		si->last_access_time = nt;
2534 		modified = true;
2535 	}
2536 
2537 	/*
2538 	 * If we just modified the standard information attribute we need to
2539 	 * mark the mft record it is in dirty.  We do this manually so that
2540 	 * mark_inode_dirty() is not called which would redirty the inode and
2541 	 * hence result in an infinite loop of trying to write the inode.
2542 	 * There is no need to mark the base inode nor the base mft record
2543 	 * dirty, since we are going to write this mft record below in any case
2544 	 * and the base mft record may actually not have been modified so it
2545 	 * might not need to be written out.
2546 	 * NOTE: It is not a problem when the inode for $MFT itself is being
2547 	 * written out as ntfs_mft_mark_dirty() will only set I_DIRTY_PAGES
2548 	 * on the $MFT inode and hence ntfs_write_inode() will not be
2549 	 * re-invoked because of it which in turn is ok since the dirtied mft
2550 	 * record will be cleaned and written out to disk below, i.e. before
2551 	 * this function returns.
2552 	 */
2553 	if (modified)
2554 		NInoSetDirty(ctx->ntfs_ino);
2555 	ntfs_attr_put_search_ctx(ctx);
2556 
2557 	return err;
2558 }
2559 
2560 /*
2561  * ntfs_inode_sync_filename - update FILE_NAME attributes
2562  * @ni:	ntfs inode to update FILE_NAME attributes
2563  *
2564  * Update all FILE_NAME attributes for inode @ni in the index.
2565  *
2566  * Return 0 on success or error.
2567  */
2568 int ntfs_inode_sync_filename(struct ntfs_inode *ni)
2569 {
2570 	struct inode *index_vi;
2571 	struct super_block *sb = VFS_I(ni)->i_sb;
2572 	struct ntfs_attr_search_ctx *ctx = NULL;
2573 	struct ntfs_index_context *ictx;
2574 	struct ntfs_inode *index_ni;
2575 	struct file_name_attr *fn;
2576 	struct file_name_attr *fnx;
2577 	struct reparse_point *rpp;
2578 	__le32 reparse_tag;
2579 	int err = 0;
2580 	unsigned long flags;
2581 
2582 	ntfs_debug("Entering for inode %llu\n", ni->mft_no);
2583 
2584 	ctx = ntfs_attr_get_search_ctx(ni, NULL);
2585 	if (!ctx)
2586 		return -ENOMEM;
2587 
2588 	/* Collect the reparse tag, if any */
2589 	reparse_tag = cpu_to_le32(0);
2590 	if (ni->flags & FILE_ATTR_REPARSE_POINT) {
2591 		if (!ntfs_attr_lookup(AT_REPARSE_POINT, NULL,
2592 					0, CASE_SENSITIVE, 0, NULL, 0, ctx)) {
2593 			rpp = (struct reparse_point *)((u8 *)ctx->attr +
2594 					le16_to_cpu(ctx->attr->data.resident.value_offset));
2595 			reparse_tag = rpp->reparse_tag;
2596 		}
2597 		ntfs_attr_reinit_search_ctx(ctx);
2598 	}
2599 
2600 	/* Walk through all FILE_NAME attributes and update them. */
2601 	while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, ctx))) {
2602 		fn = (struct file_name_attr *)((u8 *)ctx->attr +
2603 				le16_to_cpu(ctx->attr->data.resident.value_offset));
2604 		if (MREF_LE(fn->parent_directory) == ni->mft_no)
2605 			continue;
2606 
2607 		index_vi = ntfs_iget(sb, MREF_LE(fn->parent_directory));
2608 		if (IS_ERR(index_vi)) {
2609 			ntfs_error(sb, "Failed to open inode %lld with index",
2610 					(long long)MREF_LE(fn->parent_directory));
2611 			continue;
2612 		}
2613 
2614 		index_ni = NTFS_I(index_vi);
2615 
2616 		mutex_lock_nested(&index_ni->mrec_lock, NTFS_INODE_MUTEX_PARENT);
2617 		if (NInoBeingDeleted(ni)) {
2618 			iput(index_vi);
2619 			mutex_unlock(&index_ni->mrec_lock);
2620 			continue;
2621 		}
2622 
2623 		ictx = ntfs_index_ctx_get(index_ni, I30, 4);
2624 		if (!ictx) {
2625 			ntfs_error(sb, "Failed to get index ctx, inode %llu",
2626 					index_ni->mft_no);
2627 			iput(index_vi);
2628 			mutex_unlock(&index_ni->mrec_lock);
2629 			continue;
2630 		}
2631 
2632 		err = ntfs_index_lookup(fn, sizeof(struct file_name_attr), ictx);
2633 		if (err) {
2634 			ntfs_debug("Index lookup failed, inode %llu",
2635 					index_ni->mft_no);
2636 			ntfs_index_ctx_put(ictx);
2637 			iput(index_vi);
2638 			mutex_unlock(&index_ni->mrec_lock);
2639 			continue;
2640 		}
2641 		/* Update flags and file size. */
2642 		fnx = (struct file_name_attr *)ictx->data;
2643 		fnx->file_attributes =
2644 			(fnx->file_attributes & ~FILE_ATTR_VALID_FLAGS) |
2645 			(ni->flags & FILE_ATTR_VALID_FLAGS);
2646 		if (ctx->mrec->flags & MFT_RECORD_IS_DIRECTORY)
2647 			fnx->data_size = fnx->allocated_size = 0;
2648 		else {
2649 			read_lock_irqsave(&ni->size_lock, flags);
2650 			if (NInoSparse(ni) || NInoCompressed(ni))
2651 				fnx->allocated_size = cpu_to_le64(ni->itype.compressed.size);
2652 			else
2653 				fnx->allocated_size = cpu_to_le64(ni->allocated_size);
2654 			fnx->data_size = cpu_to_le64(ni->data_size);
2655 
2656 			/*
2657 			 * The file name record has also to be fixed if some
2658 			 * attribute update implied the unnamed data to be
2659 			 * made non-resident
2660 			 */
2661 			fn->allocated_size = fnx->allocated_size;
2662 			fn->data_size = fnx->data_size;
2663 			read_unlock_irqrestore(&ni->size_lock, flags);
2664 		}
2665 
2666 		/* update or clear the reparse tag in the index */
2667 		fnx->type.rp.reparse_point_tag = reparse_tag;
2668 		fnx->creation_time = fn->creation_time;
2669 		fnx->last_data_change_time = fn->last_data_change_time;
2670 		fnx->last_mft_change_time = fn->last_mft_change_time;
2671 		fnx->last_access_time = fn->last_access_time;
2672 		ntfs_index_entry_mark_dirty(ictx);
2673 		ntfs_icx_ib_sync_write(ictx);
2674 		NInoSetDirty(ctx->ntfs_ino);
2675 		ntfs_index_ctx_put(ictx);
2676 		mutex_unlock(&index_ni->mrec_lock);
2677 		iput(index_vi);
2678 	}
2679 	/* Check for real error occurred. */
2680 	if (err != -ENOENT) {
2681 		ntfs_error(sb, "Attribute lookup failed, err : %d, inode %llu", err,
2682 				ni->mft_no);
2683 	} else
2684 		err = 0;
2685 
2686 	ntfs_attr_put_search_ctx(ctx);
2687 	return err;
2688 }
2689 
2690 int ntfs_get_block_mft_record(struct ntfs_inode *mft_ni, struct ntfs_inode *ni)
2691 {
2692 	s64 vcn;
2693 	struct runlist_element *rl;
2694 
2695 	if (ni->mft_lcn[0] != LCN_RL_NOT_MAPPED)
2696 		return 0;
2697 
2698 	vcn = (s64)ni->mft_no << mft_ni->vol->mft_record_size_bits >>
2699 	      mft_ni->vol->cluster_size_bits;
2700 
2701 	rl = mft_ni->runlist.rl;
2702 	if (!rl) {
2703 		ntfs_error(mft_ni->vol->sb, "$MFT runlist is not present");
2704 		return -EIO;
2705 	}
2706 
2707 	/* Seek to element containing target vcn. */
2708 	while (rl->length && rl[1].vcn <= vcn)
2709 		rl++;
2710 	ni->mft_lcn[0] = ntfs_rl_vcn_to_lcn(rl, vcn);
2711 	ni->mft_lcn_count = 1;
2712 
2713 	if (mft_ni->vol->cluster_size < mft_ni->vol->mft_record_size &&
2714 	    (rl->length - (vcn - rl->vcn)) <= 1) {
2715 		rl++;
2716 		ni->mft_lcn[1] = ntfs_rl_vcn_to_lcn(rl, vcn + 1);
2717 		ni->mft_lcn_count++;
2718 	}
2719 	return 0;
2720 }
2721 
2722 /*
2723  * __ntfs_write_inode - write out a dirty inode
2724  * @vi:		inode to write out
2725  * @sync:	if true, write out synchronously
2726  *
2727  * Write out a dirty inode to disk including any extent inodes if present.
2728  *
2729  * If @sync is true, commit the inode to disk and wait for io completion.  This
2730  * is done using write_mft_record().
2731  *
2732  * If @sync is false, just schedule the write to happen but do not wait for i/o
2733  * completion.
2734  *
2735  * Return 0 on success and -errno on error.
2736  */
2737 int __ntfs_write_inode(struct inode *vi, int sync)
2738 {
2739 	struct ntfs_inode *ni = NTFS_I(vi);
2740 	struct ntfs_inode *mft_ni = NTFS_I(ni->vol->mft_ino);
2741 	struct mft_record *m;
2742 	int err = 0;
2743 	bool need_iput = false;
2744 
2745 	ntfs_debug("Entering for %sinode 0x%llx.", NInoAttr(ni) ? "attr " : "",
2746 			ni->mft_no);
2747 
2748 	if (NVolShutdown(ni->vol))
2749 		return -EIO;
2750 
2751 	/*
2752 	 * Dirty attribute inodes are written via their real inodes so just
2753 	 * clean them here.  Access time updates are taken care off when the
2754 	 * real inode is written.
2755 	 */
2756 	if (NInoAttr(ni) || ni->nr_extents == -1) {
2757 		NInoClearDirty(ni);
2758 		ntfs_debug("Done.");
2759 		return 0;
2760 	}
2761 
2762 	/* igrab prevents vi from being evicted while mrec_lock is hold. */
2763 	if (igrab(vi) != NULL)
2764 		need_iput = true;
2765 
2766 	mutex_lock_nested(&ni->mrec_lock, NTFS_INODE_MUTEX_NORMAL);
2767 	/* Map, pin, and lock the mft record belonging to the inode. */
2768 	m = map_mft_record(ni);
2769 	if (IS_ERR(m)) {
2770 		mutex_unlock(&ni->mrec_lock);
2771 		err = PTR_ERR(m);
2772 		goto err_out;
2773 	}
2774 
2775 	if (NInoNonResident(ni) && NInoRunlistDirty(ni)) {
2776 		down_write(&ni->runlist.lock);
2777 		err = ntfs_attr_update_mapping_pairs(ni, 0);
2778 		if (!err)
2779 			NInoClearRunlistDirty(ni);
2780 		up_write(&ni->runlist.lock);
2781 	}
2782 
2783 	err = ntfs_inode_sync_standard_information(vi, m);
2784 	if (err)
2785 		goto unm_err_out;
2786 
2787 	/*
2788 	 * when being umounted and inodes are evicted, write_inode()
2789 	 * is called with all inodes being marked with I_FREEING.
2790 	 * then ntfs_inode_sync_filename() waits infinitly because
2791 	 * of ntfs_iget. This situation happens only where sync_filesysem()
2792 	 * from umount fails because of a disk unplug and etc.
2793 	 * the absent of SB_ACTIVE means umounting.
2794 	 */
2795 	if ((vi->i_sb->s_flags & SB_ACTIVE) && NInoTestClearFileNameDirty(ni))
2796 		ntfs_inode_sync_filename(ni);
2797 
2798 	/* Now the access times are updated, write the base mft record. */
2799 	if (NInoDirty(ni)) {
2800 		down_read(&mft_ni->runlist.lock);
2801 		err = ntfs_get_block_mft_record(mft_ni, ni);
2802 		up_read(&mft_ni->runlist.lock);
2803 		if (err)
2804 			goto unm_err_out;
2805 
2806 		err = write_mft_record(ni, m, sync);
2807 		if (err)
2808 			ntfs_error(vi->i_sb, "write_mft_record failed, err : %d\n", err);
2809 	}
2810 	unmap_mft_record(ni);
2811 
2812 	/* Map any unmapped extent mft records with LCNs. */
2813 	down_read(&mft_ni->runlist.lock);
2814 	mutex_lock(&ni->extent_lock);
2815 	if (ni->nr_extents > 0) {
2816 		int i;
2817 
2818 		for (i = 0; i < ni->nr_extents; i++) {
2819 			err = ntfs_get_block_mft_record(mft_ni,
2820 						   ni->ext.extent_ntfs_inos[i]);
2821 			if (err) {
2822 				mutex_unlock(&ni->extent_lock);
2823 				up_read(&mft_ni->runlist.lock);
2824 				mutex_unlock(&ni->mrec_lock);
2825 				goto err_out;
2826 			}
2827 		}
2828 	}
2829 	mutex_unlock(&ni->extent_lock);
2830 	up_read(&mft_ni->runlist.lock);
2831 
2832 	/* Write all attached extent mft records. */
2833 	mutex_lock(&ni->extent_lock);
2834 	if (ni->nr_extents > 0) {
2835 		struct ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos;
2836 		int i;
2837 
2838 		ntfs_debug("Writing %i extent inodes.", ni->nr_extents);
2839 		for (i = 0; i < ni->nr_extents; i++) {
2840 			struct ntfs_inode *tni = extent_nis[i];
2841 
2842 			if (NInoDirty(tni)) {
2843 				struct mft_record *tm;
2844 				int ret;
2845 
2846 				mutex_lock(&tni->mrec_lock);
2847 				tm = map_mft_record(tni);
2848 				if (IS_ERR(tm)) {
2849 					mutex_unlock(&tni->mrec_lock);
2850 					if (!err || err == -ENOMEM)
2851 						err = PTR_ERR(tm);
2852 					continue;
2853 				}
2854 
2855 				ret = write_mft_record(tni, tm, sync);
2856 				unmap_mft_record(tni);
2857 				mutex_unlock(&tni->mrec_lock);
2858 
2859 				if (unlikely(ret)) {
2860 					if (!err || err == -ENOMEM)
2861 						err = ret;
2862 				}
2863 			}
2864 		}
2865 	}
2866 	mutex_unlock(&ni->extent_lock);
2867 	mutex_unlock(&ni->mrec_lock);
2868 
2869 	if (unlikely(err))
2870 		goto err_out;
2871 	if (need_iput)
2872 		iput(vi);
2873 	ntfs_debug("Done.");
2874 	return 0;
2875 unm_err_out:
2876 	unmap_mft_record(ni);
2877 	mutex_unlock(&ni->mrec_lock);
2878 err_out:
2879 	if (err == -ENOMEM)
2880 		mark_inode_dirty(vi);
2881 	else {
2882 		ntfs_error(vi->i_sb, "Failed (error %i):  Run chkdsk.", -err);
2883 		NVolSetErrors(ni->vol);
2884 	}
2885 	if (need_iput)
2886 		iput(vi);
2887 	return err;
2888 }
2889 
2890 /*
2891  * ntfs_extent_inode_open - load an extent inode and attach it to its base
2892  * @base_ni:	base ntfs inode
2893  * @mref:	mft reference of the extent inode to load (in little endian)
2894  *
2895  * First check if the extent inode @mref is already attached to the base ntfs
2896  * inode @base_ni, and if so, return a pointer to the attached extent inode.
2897  *
2898  * If the extent inode is not already attached to the base inode, allocate an
2899  * ntfs_inode structure and initialize it for the given inode @mref. @mref
2900  * specifies the inode number / mft record to read, including the sequence
2901  * number, which can be 0 if no sequence number checking is to be performed.
2902  *
2903  * Then, allocate a buffer for the mft record, read the mft record from the
2904  * volume @base_ni->vol, and attach it to the ntfs_inode structure (->mrec).
2905  * The mft record is mst deprotected and sanity checked for validity and we
2906  * abort if deprotection or checks fail.
2907  *
2908  * Finally attach the ntfs inode to its base inode @base_ni and return a
2909  * pointer to the ntfs_inode structure on success or NULL on error, with errno
2910  * set to the error code.
2911  *
2912  * Note, extent inodes are never closed directly. They are automatically
2913  * disposed off by the closing of the base inode.
2914  */
2915 static struct ntfs_inode *ntfs_extent_inode_open(struct ntfs_inode *base_ni,
2916 		const __le64 mref)
2917 {
2918 	u64 mft_no = MREF_LE(mref);
2919 	struct ntfs_inode *ni = NULL;
2920 	struct ntfs_inode **extent_nis;
2921 	int i;
2922 	struct mft_record *ni_mrec;
2923 	struct super_block *sb;
2924 
2925 	if (!base_ni)
2926 		return NULL;
2927 
2928 	sb = base_ni->vol->sb;
2929 	ntfs_debug("Opening extent inode %llu (base mft record %llu).\n",
2930 			mft_no, base_ni->mft_no);
2931 
2932 	/* Is the extent inode already open and attached to the base inode? */
2933 	if (base_ni->nr_extents > 0) {
2934 		extent_nis = base_ni->ext.extent_ntfs_inos;
2935 		for (i = 0; i < base_ni->nr_extents; i++) {
2936 			u16 seq_no;
2937 
2938 			ni = extent_nis[i];
2939 			if (mft_no != ni->mft_no)
2940 				continue;
2941 			ni_mrec = map_mft_record(ni);
2942 			if (IS_ERR(ni_mrec)) {
2943 				ntfs_error(sb, "failed to map mft record for %llu",
2944 						ni->mft_no);
2945 				goto out;
2946 			}
2947 			/* Verify the sequence number if given. */
2948 			seq_no = MSEQNO_LE(mref);
2949 			if (seq_no &&
2950 			    seq_no != le16_to_cpu(ni_mrec->sequence_number)) {
2951 				ntfs_error(sb, "Found stale extent mft reference mft=%llu",
2952 						ni->mft_no);
2953 				unmap_mft_record(ni);
2954 				goto out;
2955 			}
2956 			unmap_mft_record(ni);
2957 			goto out;
2958 		}
2959 	}
2960 	/* Wasn't there, we need to load the extent inode. */
2961 	ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no);
2962 	if (!ni)
2963 		goto out;
2964 
2965 	ni->seq_no = (u16)MSEQNO_LE(mref);
2966 	ni->nr_extents = -1;
2967 	ni->ext.base_ntfs_ino = base_ni;
2968 	/* Attach extent inode to base inode, reallocating memory if needed. */
2969 	if (!(base_ni->nr_extents & 3)) {
2970 		i = (base_ni->nr_extents + 4) * sizeof(struct ntfs_inode *);
2971 
2972 		extent_nis = kvzalloc(i, GFP_NOFS);
2973 		if (!extent_nis)
2974 			goto err_out;
2975 		if (base_ni->nr_extents) {
2976 			memcpy(extent_nis, base_ni->ext.extent_ntfs_inos,
2977 					i - 4 * sizeof(struct ntfs_inode *));
2978 			kvfree(base_ni->ext.extent_ntfs_inos);
2979 		}
2980 		base_ni->ext.extent_ntfs_inos = extent_nis;
2981 	}
2982 	base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni;
2983 
2984 out:
2985 	ntfs_debug("\n");
2986 	return ni;
2987 err_out:
2988 	ntfs_destroy_ext_inode(ni);
2989 	ni = NULL;
2990 	goto out;
2991 }
2992 
2993 /*
2994  * ntfs_inode_attach_all_extents - attach all extents for target inode
2995  * @ni:		opened ntfs inode for which perform attach
2996  *
2997  * Return 0 on success and error.
2998  */
2999 int ntfs_inode_attach_all_extents(struct ntfs_inode *ni)
3000 {
3001 	struct attr_list_entry *ale;
3002 	u64 prev_attached = 0;
3003 
3004 	if (!ni) {
3005 		ntfs_debug("Invalid arguments.\n");
3006 		return -EINVAL;
3007 	}
3008 
3009 	if (NInoAttr(ni))
3010 		ni = ni->ext.base_ntfs_ino;
3011 
3012 	ntfs_debug("Entering for inode 0x%llx.\n", ni->mft_no);
3013 
3014 	/* Inode haven't got attribute list, thus nothing to attach. */
3015 	if (!NInoAttrList(ni))
3016 		return 0;
3017 
3018 	if (!ni->attr_list) {
3019 		ntfs_debug("Corrupt in-memory struct.\n");
3020 		return -EINVAL;
3021 	}
3022 
3023 	/* Walk through attribute list and attach all extents. */
3024 	ale = (struct attr_list_entry *)ni->attr_list;
3025 	while ((u8 *)ale < ni->attr_list + ni->attr_list_size) {
3026 		if (ni->mft_no != MREF_LE(ale->mft_reference) &&
3027 				prev_attached != MREF_LE(ale->mft_reference)) {
3028 			if (!ntfs_extent_inode_open(ni, ale->mft_reference)) {
3029 				ntfs_debug("Couldn't attach extent inode.\n");
3030 				return -1;
3031 			}
3032 			prev_attached = MREF_LE(ale->mft_reference);
3033 		}
3034 		ale = (struct attr_list_entry *)((u8 *)ale + le16_to_cpu(ale->length));
3035 	}
3036 	return 0;
3037 }
3038 
3039 /*
3040  * ntfs_inode_add_attrlist - add attribute list to inode and fill it
3041  * @ni: opened ntfs inode to which add attribute list
3042  *
3043  * Return 0 on success or error.
3044  */
3045 int ntfs_inode_add_attrlist(struct ntfs_inode *ni)
3046 {
3047 	int err;
3048 	struct ntfs_attr_search_ctx *ctx;
3049 	u8 *al = NULL, *aln;
3050 	int al_len = 0;
3051 	struct attr_list_entry *ale = NULL;
3052 	struct mft_record *ni_mrec;
3053 	u32 attr_al_len;
3054 
3055 	if (!ni)
3056 		return -EINVAL;
3057 
3058 	ntfs_debug("inode %llu\n", ni->mft_no);
3059 
3060 	if (NInoAttrList(ni) || ni->nr_extents) {
3061 		ntfs_error(ni->vol->sb, "Inode already has attribute list");
3062 		return -EEXIST;
3063 	}
3064 
3065 	ni_mrec = map_mft_record(ni);
3066 	if (IS_ERR(ni_mrec))
3067 		return -EIO;
3068 
3069 	/* Form attribute list. */
3070 	ctx = ntfs_attr_get_search_ctx(ni, ni_mrec);
3071 	if (!ctx) {
3072 		err = -ENOMEM;
3073 		goto err_out;
3074 	}
3075 
3076 	/* Walk through all attributes. */
3077 	while (!(err = ntfs_attr_lookup(AT_UNUSED, NULL, 0, 0, 0, NULL, 0, ctx))) {
3078 		int ale_size;
3079 
3080 		if (ctx->attr->type == AT_ATTRIBUTE_LIST) {
3081 			err = -EIO;
3082 			ntfs_error(ni->vol->sb, "Attribute list already present");
3083 			goto put_err_out;
3084 		}
3085 
3086 		ale_size = (sizeof(struct attr_list_entry) + sizeof(__le16) *
3087 				ctx->attr->name_length + 7) & ~7;
3088 		al_len += ale_size;
3089 
3090 		aln = kvrealloc(al, al_len, GFP_NOFS);
3091 		if (!aln) {
3092 			err = -ENOMEM;
3093 			ntfs_error(ni->vol->sb, "Failed to realloc %d bytes", al_len);
3094 			goto put_err_out;
3095 		}
3096 		ale = (struct attr_list_entry *)(aln + ((u8 *)ale - al));
3097 		al = aln;
3098 
3099 		memset(ale, 0, ale_size);
3100 
3101 		/* Add attribute to attribute list. */
3102 		ale->type = ctx->attr->type;
3103 		ale->length = cpu_to_le16((sizeof(struct attr_list_entry) +
3104 					sizeof(__le16) * ctx->attr->name_length + 7) & ~7);
3105 		ale->name_length = ctx->attr->name_length;
3106 		ale->name_offset = (u8 *)ale->name - (u8 *)ale;
3107 		if (ctx->attr->non_resident)
3108 			ale->lowest_vcn =
3109 				ctx->attr->data.non_resident.lowest_vcn;
3110 		else
3111 			ale->lowest_vcn = 0;
3112 		ale->mft_reference = MK_LE_MREF(ni->mft_no,
3113 				le16_to_cpu(ni_mrec->sequence_number));
3114 		ale->instance = ctx->attr->instance;
3115 		memcpy(ale->name, (u8 *)ctx->attr +
3116 				le16_to_cpu(ctx->attr->name_offset),
3117 				ctx->attr->name_length * sizeof(__le16));
3118 		ale = (struct attr_list_entry *)(al + al_len);
3119 	}
3120 
3121 	/* Check for real error occurred. */
3122 	if (err != -ENOENT) {
3123 		ntfs_error(ni->vol->sb, "%s: Attribute lookup failed, inode %llu",
3124 				__func__, ni->mft_no);
3125 		goto put_err_out;
3126 	}
3127 
3128 	/* Set in-memory attribute list. */
3129 	ni->attr_list = al;
3130 	ni->attr_list_size = al_len;
3131 	NInoSetAttrList(ni);
3132 
3133 	attr_al_len = offsetof(struct attr_record, data.resident.reserved) + 1 +
3134 		((al_len + 7) & ~7);
3135 	/* Free space if there is not enough it for $ATTRIBUTE_LIST. */
3136 	if (le32_to_cpu(ni_mrec->bytes_allocated) -
3137 			le32_to_cpu(ni_mrec->bytes_in_use) < attr_al_len) {
3138 		if (ntfs_inode_free_space(ni, (int)attr_al_len)) {
3139 			/* Failed to free space. */
3140 			err = -ENOSPC;
3141 			ntfs_error(ni->vol->sb, "Failed to free space for attrlist");
3142 			goto rollback;
3143 		}
3144 	}
3145 
3146 	/* Add $ATTRIBUTE_LIST to mft record. */
3147 	err = ntfs_resident_attr_record_add(ni, AT_ATTRIBUTE_LIST, AT_UNNAMED, 0,
3148 					    NULL, al_len, 0);
3149 	if (err < 0) {
3150 		ntfs_error(ni->vol->sb, "Couldn't add $ATTRIBUTE_LIST to MFT");
3151 		goto rollback;
3152 	}
3153 
3154 	err = ntfs_attrlist_update(ni);
3155 	if (err < 0)
3156 		goto remove_attrlist_record;
3157 
3158 	ntfs_attr_put_search_ctx(ctx);
3159 	unmap_mft_record(ni);
3160 	return 0;
3161 
3162 remove_attrlist_record:
3163 	/* Prevent ntfs_attr_recorm_rm from freeing attribute list. */
3164 	ni->attr_list = NULL;
3165 	NInoClearAttrList(ni);
3166 	/* Remove $ATTRIBUTE_LIST record. */
3167 	ntfs_attr_reinit_search_ctx(ctx);
3168 	if (!ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0,
3169 				CASE_SENSITIVE, 0, NULL, 0, ctx)) {
3170 		if (ntfs_attr_record_rm(ctx))
3171 			ntfs_error(ni->vol->sb, "Rollback failed to remove attrlist");
3172 	} else {
3173 		ntfs_error(ni->vol->sb, "Rollback failed to find attrlist");
3174 	}
3175 
3176 	/* Setup back in-memory runlist. */
3177 	ni->attr_list = al;
3178 	ni->attr_list_size = al_len;
3179 	NInoSetAttrList(ni);
3180 rollback:
3181 	/*
3182 	 * Scan attribute list for attributes that placed not in the base MFT
3183 	 * record and move them to it.
3184 	 */
3185 	ntfs_attr_reinit_search_ctx(ctx);
3186 	ale = (struct attr_list_entry *)al;
3187 	while ((u8 *)ale < al + al_len) {
3188 		if (MREF_LE(ale->mft_reference) != ni->mft_no) {
3189 			if (!ntfs_attr_lookup(ale->type, ale->name,
3190 						ale->name_length,
3191 						CASE_SENSITIVE,
3192 						le64_to_cpu(ale->lowest_vcn),
3193 						NULL, 0, ctx)) {
3194 				if (ntfs_attr_record_move_to(ctx, ni))
3195 					ntfs_error(ni->vol->sb,
3196 							"Rollback failed to move attribute");
3197 			} else {
3198 				ntfs_error(ni->vol->sb, "Rollback failed to find attr");
3199 			}
3200 			ntfs_attr_reinit_search_ctx(ctx);
3201 		}
3202 		ale = (struct attr_list_entry *)((u8 *)ale + le16_to_cpu(ale->length));
3203 	}
3204 
3205 	/* Remove in-memory attribute list. */
3206 	ni->attr_list = NULL;
3207 	ni->attr_list_size = 0;
3208 	NInoClearAttrList(ni);
3209 	NInoClearAttrListDirty(ni);
3210 put_err_out:
3211 	ntfs_attr_put_search_ctx(ctx);
3212 err_out:
3213 	kvfree(al);
3214 	unmap_mft_record(ni);
3215 	return err;
3216 }
3217 
3218 /*
3219  * ntfs_inode_close - close an ntfs inode and free all associated memory
3220  * @ni:		ntfs inode to close
3221  *
3222  * Make sure the ntfs inode @ni is clean.
3223  *
3224  * If the ntfs inode @ni is a base inode, close all associated extent inodes,
3225  * then deallocate all memory attached to it, and finally free the ntfs inode
3226  * structure itself.
3227  *
3228  * If it is an extent inode, we disconnect it from its base inode before we
3229  * destroy it.
3230  *
3231  * It is OK to pass NULL to this function, it is just noop in this case.
3232  *
3233  * Return 0 on success or error.
3234  */
3235 int ntfs_inode_close(struct ntfs_inode *ni)
3236 {
3237 	int err = -1;
3238 	struct ntfs_inode **tmp_nis;
3239 	struct ntfs_inode *base_ni;
3240 	s32 i;
3241 
3242 	if (!ni)
3243 		return 0;
3244 
3245 	ntfs_debug("Entering for inode %llu\n", ni->mft_no);
3246 
3247 	/* Is this a base inode with mapped extent inodes? */
3248 	/*
3249 	 * If the inode is an extent inode, disconnect it from the
3250 	 * base inode before destroying it.
3251 	 */
3252 	base_ni = ni->ext.base_ntfs_ino;
3253 	for (i = 0; i < base_ni->nr_extents; ++i) {
3254 		tmp_nis = base_ni->ext.extent_ntfs_inos;
3255 		if (tmp_nis[i] != ni)
3256 			continue;
3257 		/* Found it. Disconnect. */
3258 		memmove(tmp_nis + i, tmp_nis + i + 1,
3259 				(base_ni->nr_extents - i - 1) *
3260 				sizeof(struct ntfs_inode *));
3261 		/* Buffer should be for multiple of four extents. */
3262 		if ((--base_ni->nr_extents) & 3)
3263 			break;
3264 		/*
3265 		 * ElectricFence is unhappy with realloc(x,0) as free(x)
3266 		 * thus we explicitly separate these two cases.
3267 		 */
3268 		if (base_ni->nr_extents) {
3269 			/* Resize the memory buffer. */
3270 			tmp_nis = kvrealloc(tmp_nis, base_ni->nr_extents *
3271 					sizeof(struct ntfs_inode *), GFP_NOFS);
3272 			/* Ignore errors, they don't really matter. */
3273 			if (tmp_nis)
3274 				base_ni->ext.extent_ntfs_inos = tmp_nis;
3275 		} else if (tmp_nis) {
3276 			kvfree(tmp_nis);
3277 			base_ni->ext.extent_ntfs_inos = NULL;
3278 		}
3279 		break;
3280 	}
3281 
3282 	if (NInoDirty(ni))
3283 		ntfs_error(ni->vol->sb, "Releasing dirty inode %llu!\n",
3284 				ni->mft_no);
3285 	if (NInoAttrList(ni) && ni->attr_list)
3286 		kvfree(ni->attr_list);
3287 	ntfs_destroy_ext_inode(ni);
3288 	err = 0;
3289 	ntfs_debug("\n");
3290 	return err;
3291 }
3292 
3293 void ntfs_destroy_ext_inode(struct ntfs_inode *ni)
3294 {
3295 	ntfs_debug("Entering.");
3296 	if (ni == NULL)
3297 		return;
3298 
3299 	ntfs_attr_close(ni);
3300 
3301 	if (NInoDirty(ni))
3302 		ntfs_error(ni->vol->sb, "Releasing dirty ext inode %llu!\n",
3303 				ni->mft_no);
3304 	if (NInoAttrList(ni) && ni->attr_list)
3305 		kvfree(ni->attr_list);
3306 	kfree(ni->mrec);
3307 	kmem_cache_free(ntfs_inode_cache, ni);
3308 }
3309 
3310 static struct ntfs_inode *ntfs_inode_base(struct ntfs_inode *ni)
3311 {
3312 	if (ni->nr_extents == -1)
3313 		return ni->ext.base_ntfs_ino;
3314 	return ni;
3315 }
3316 
3317 static int ntfs_attr_position(__le32 type, struct ntfs_attr_search_ctx *ctx)
3318 {
3319 	int err;
3320 
3321 	err = ntfs_attr_lookup(type, NULL, 0, CASE_SENSITIVE, 0, NULL,
3322 				0, ctx);
3323 	if (err) {
3324 		__le32 atype;
3325 
3326 		if (err != -ENOENT)
3327 			return err;
3328 
3329 		atype = ctx->attr->type;
3330 		if (atype == AT_END)
3331 			return -ENOSPC;
3332 
3333 		/*
3334 		 * if ntfs_external_attr_lookup return -ENOENT, ctx->al_entry
3335 		 * could point to an attribute in an extent mft record, but
3336 		 * ctx->attr and ctx->ntfs_ino always points to an attibute in
3337 		 * a base mft record.
3338 		 */
3339 		if (ctx->al_entry &&
3340 		    MREF_LE(ctx->al_entry->mft_reference) != ctx->ntfs_ino->mft_no) {
3341 			ntfs_attr_reinit_search_ctx(ctx);
3342 			err = ntfs_attr_lookup(atype, NULL, 0, CASE_SENSITIVE, 0, NULL,
3343 					       0, ctx);
3344 			if (err)
3345 				return err;
3346 		}
3347 	}
3348 	return 0;
3349 }
3350 
3351 /*
3352  * ntfs_inode_free_space - free space in the MFT record of inode
3353  * @ni:		ntfs inode in which MFT record free space
3354  * @size:	amount of space needed to free
3355  *
3356  * Return 0 on success or error.
3357  */
3358 int ntfs_inode_free_space(struct ntfs_inode *ni, int size)
3359 {
3360 	struct ntfs_attr_search_ctx *ctx;
3361 	int freed, err;
3362 	struct mft_record *ni_mrec;
3363 	struct super_block *sb;
3364 
3365 	if (!ni || size < 0)
3366 		return -EINVAL;
3367 	ntfs_debug("Entering for inode %llu, size %d\n", ni->mft_no, size);
3368 
3369 	sb = ni->vol->sb;
3370 	ni_mrec = map_mft_record(ni);
3371 	if (IS_ERR(ni_mrec))
3372 		return -EIO;
3373 
3374 	freed = (le32_to_cpu(ni_mrec->bytes_allocated) -
3375 			le32_to_cpu(ni_mrec->bytes_in_use));
3376 
3377 	unmap_mft_record(ni);
3378 
3379 	if (size <= freed)
3380 		return 0;
3381 
3382 	ctx = ntfs_attr_get_search_ctx(ni, NULL);
3383 	if (!ctx) {
3384 		ntfs_error(sb, "%s, Failed to get search context", __func__);
3385 		return -ENOMEM;
3386 	}
3387 
3388 	/*
3389 	 * Chkdsk complain if $STANDARD_INFORMATION is not in the base MFT
3390 	 * record.
3391 	 *
3392 	 * Also we can't move $ATTRIBUTE_LIST from base MFT_RECORD, so position
3393 	 * search context on first attribute after $STANDARD_INFORMATION and
3394 	 * $ATTRIBUTE_LIST.
3395 	 *
3396 	 * Why we reposition instead of simply skip this attributes during
3397 	 * enumeration? Because in case we have got only in-memory attribute
3398 	 * list ntfs_attr_lookup will fail when it will try to find
3399 	 * $ATTRIBUTE_LIST.
3400 	 */
3401 	err = ntfs_attr_position(AT_FILE_NAME, ctx);
3402 	if (err)
3403 		goto put_err_out;
3404 
3405 	while (1) {
3406 		int record_size;
3407 
3408 		/*
3409 		 * Check whether attribute is from different MFT record. If so,
3410 		 * find next, because we don't need such.
3411 		 */
3412 		while (ctx->ntfs_ino->mft_no != ni->mft_no) {
3413 retry:
3414 			err = ntfs_attr_lookup(AT_UNUSED, NULL, 0, CASE_SENSITIVE,
3415 						0, NULL, 0, ctx);
3416 			if (err) {
3417 				if (err != -ENOENT)
3418 					ntfs_error(sb, "Attr lookup failed #2");
3419 				else if (ctx->attr->type == AT_END)
3420 					err = -ENOSPC;
3421 				else
3422 					err = 0;
3423 
3424 				if (err)
3425 					goto put_err_out;
3426 			}
3427 		}
3428 
3429 		if (ntfs_inode_base(ctx->ntfs_ino)->mft_no == FILE_MFT &&
3430 				ctx->attr->type == AT_DATA)
3431 			goto retry;
3432 
3433 		if (ctx->attr->type == AT_INDEX_ROOT)
3434 			goto retry;
3435 
3436 		record_size = le32_to_cpu(ctx->attr->length);
3437 
3438 		/* Move away attribute. */
3439 		err = ntfs_attr_record_move_away(ctx, 0);
3440 		if (err) {
3441 			ntfs_error(sb, "Failed to move out attribute #2");
3442 			break;
3443 		}
3444 		freed += record_size;
3445 
3446 		/* Check whether we done. */
3447 		if (size <= freed) {
3448 			ntfs_attr_put_search_ctx(ctx);
3449 			return 0;
3450 		}
3451 
3452 		/*
3453 		 * Reposition to first attribute after $STANDARD_INFORMATION and
3454 		 * $ATTRIBUTE_LIST (see comments upwards).
3455 		 */
3456 		ntfs_attr_reinit_search_ctx(ctx);
3457 		err = ntfs_attr_position(AT_FILE_NAME, ctx);
3458 		if (err)
3459 			break;
3460 	}
3461 put_err_out:
3462 	ntfs_attr_put_search_ctx(ctx);
3463 	if (err == -ENOSPC)
3464 		ntfs_debug("No attributes left that can be moved out.\n");
3465 	return err;
3466 }
3467 
3468 s64 ntfs_inode_attr_pread(struct inode *vi, s64 pos, s64 count, u8 *buf)
3469 {
3470 	struct address_space *mapping = vi->i_mapping;
3471 	struct folio *folio;
3472 	struct ntfs_inode *ni = NTFS_I(vi);
3473 	s64 isize;
3474 	u32 attr_len, total = 0, offset;
3475 	pgoff_t index;
3476 	int err = 0;
3477 
3478 	WARN_ON(!NInoAttr(ni));
3479 	if (!count)
3480 		return 0;
3481 
3482 	mutex_lock(&ni->mrec_lock);
3483 	isize = i_size_read(vi);
3484 	if (pos > isize) {
3485 		mutex_unlock(&ni->mrec_lock);
3486 		return -EINVAL;
3487 	}
3488 	if (pos + count > isize)
3489 		count = isize - pos;
3490 
3491 	if (!NInoNonResident(ni)) {
3492 		struct ntfs_attr_search_ctx *ctx;
3493 		u8 *attr;
3494 
3495 		ctx = ntfs_attr_get_search_ctx(ni->ext.base_ntfs_ino, NULL);
3496 		if (!ctx) {
3497 			ntfs_error(vi->i_sb, "Failed to get attr search ctx");
3498 			err = -ENOMEM;
3499 			mutex_unlock(&ni->mrec_lock);
3500 			goto out;
3501 		}
3502 
3503 		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE,
3504 				       0, NULL, 0, ctx);
3505 		if (err) {
3506 			ntfs_error(vi->i_sb, "Failed to look up attr %#x", ni->type);
3507 			ntfs_attr_put_search_ctx(ctx);
3508 			mutex_unlock(&ni->mrec_lock);
3509 			goto out;
3510 		}
3511 
3512 		attr = (u8 *)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset);
3513 		memcpy(buf, (u8 *)attr + pos, count);
3514 		ntfs_attr_put_search_ctx(ctx);
3515 		mutex_unlock(&ni->mrec_lock);
3516 		return count;
3517 	}
3518 	mutex_unlock(&ni->mrec_lock);
3519 
3520 	index = pos >> PAGE_SHIFT;
3521 	do {
3522 		/* Update @index and get the next folio. */
3523 		folio = read_mapping_folio(mapping, index, NULL);
3524 		if (IS_ERR(folio))
3525 			break;
3526 
3527 		offset = offset_in_folio(folio, pos);
3528 		attr_len = min_t(size_t, (size_t)count, folio_size(folio) - offset);
3529 
3530 		folio_lock(folio);
3531 		memcpy_from_folio(buf, folio, offset, attr_len);
3532 		folio_unlock(folio);
3533 		folio_put(folio);
3534 
3535 		total += attr_len;
3536 		buf += attr_len;
3537 		pos += attr_len;
3538 		count -= attr_len;
3539 		index++;
3540 	} while (count);
3541 out:
3542 	return err ? (s64)err : total;
3543 }
3544 
3545 static inline int ntfs_enlarge_attribute(struct inode *vi, s64 pos, s64 count,
3546 					 struct ntfs_attr_search_ctx *ctx)
3547 {
3548 	struct ntfs_inode *ni = NTFS_I(vi);
3549 	struct super_block *sb = vi->i_sb;
3550 	int ret;
3551 
3552 	if (pos + count <= ni->initialized_size)
3553 		return 0;
3554 
3555 	if (NInoEncrypted(ni) && NInoNonResident(ni))
3556 		return -EACCES;
3557 
3558 	if (NInoCompressed(ni))
3559 		return -EOPNOTSUPP;
3560 
3561 	if (pos + count > ni->data_size) {
3562 		if (ntfs_attr_truncate(ni, pos + count)) {
3563 			ntfs_debug("Failed to truncate attribute");
3564 			return -1;
3565 		}
3566 
3567 		ntfs_attr_reinit_search_ctx(ctx);
3568 		ret = ntfs_attr_lookup(ni->type,
3569 				       ni->name, ni->name_len, CASE_SENSITIVE,
3570 				       0, NULL, 0, ctx);
3571 		if (ret) {
3572 			ntfs_error(sb, "Failed to look up attr %#x", ni->type);
3573 			return ret;
3574 		}
3575 	}
3576 
3577 	if (!NInoNonResident(ni)) {
3578 		if (likely(i_size_read(vi) < ni->data_size))
3579 			i_size_write(vi, ni->data_size);
3580 		return 0;
3581 	}
3582 
3583 	if (pos + count > ni->initialized_size) {
3584 		ctx->attr->data.non_resident.initialized_size = cpu_to_le64(pos + count);
3585 		mark_mft_record_dirty(ctx->ntfs_ino);
3586 		ni->initialized_size = pos + count;
3587 		if (i_size_read(vi) < ni->initialized_size)
3588 			i_size_write(vi, ni->initialized_size);
3589 	}
3590 	return 0;
3591 }
3592 
3593 static s64 __ntfs_inode_resident_attr_pwrite(struct inode *vi,
3594 					     s64 pos, s64 count, u8 *buf,
3595 					     struct ntfs_attr_search_ctx *ctx)
3596 {
3597 	struct ntfs_inode *ni = NTFS_I(vi);
3598 	struct folio *folio;
3599 	struct address_space *mapping = vi->i_mapping;
3600 	u8 *addr;
3601 	int err = 0;
3602 
3603 	WARN_ON(NInoNonResident(ni));
3604 	if (pos + count > PAGE_SIZE) {
3605 		ntfs_error(vi->i_sb, "Out of write into resident attr %#x", ni->type);
3606 		return -EINVAL;
3607 	}
3608 
3609 	/* Copy to mft record page */
3610 	addr = (u8 *)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset);
3611 	memcpy(addr + pos, buf, count);
3612 	mark_mft_record_dirty(ctx->ntfs_ino);
3613 
3614 	/* Keep the first page clean and uptodate */
3615 	folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS,
3616 				   mapping_gfp_mask(mapping));
3617 	if (IS_ERR(folio)) {
3618 		err = PTR_ERR(folio);
3619 		ntfs_error(vi->i_sb, "Failed to read a page 0 for attr %#x: %d",
3620 			   ni->type, err);
3621 		goto out;
3622 	}
3623 	if (!folio_test_uptodate(folio))
3624 		folio_fill_tail(folio, 0, addr,
3625 				le32_to_cpu(ctx->attr->data.resident.value_length));
3626 	else
3627 		memcpy_to_folio(folio, offset_in_folio(folio, pos), buf, count);
3628 	folio_mark_uptodate(folio);
3629 	folio_unlock(folio);
3630 	folio_put(folio);
3631 out:
3632 	return err ? err : count;
3633 }
3634 
3635 static s64 __ntfs_inode_non_resident_attr_pwrite(struct inode *vi,
3636 						 s64 pos, s64 count, u8 *buf,
3637 						 struct ntfs_attr_search_ctx *ctx,
3638 						 bool sync)
3639 {
3640 	struct ntfs_inode *ni = NTFS_I(vi);
3641 	struct address_space *mapping = vi->i_mapping;
3642 	struct folio *folio;
3643 	pgoff_t index;
3644 	unsigned long offset, length;
3645 	size_t attr_len;
3646 	s64 ret = 0, written = 0;
3647 
3648 	WARN_ON(!NInoNonResident(ni));
3649 
3650 	index = pos >> PAGE_SHIFT;
3651 	while (count) {
3652 		if (count == PAGE_SIZE) {
3653 			folio = __filemap_get_folio(vi->i_mapping, index,
3654 					FGP_CREAT | FGP_LOCK,
3655 					mapping_gfp_mask(mapping));
3656 			if (IS_ERR(folio)) {
3657 				ret = -ENOMEM;
3658 				break;
3659 			}
3660 		} else {
3661 			folio = read_mapping_folio(mapping, index, NULL);
3662 			if (IS_ERR(folio)) {
3663 				ret = PTR_ERR(folio);
3664 				ntfs_error(vi->i_sb, "Failed to read a page %lu for attr %#x: %ld",
3665 						index, ni->type, PTR_ERR(folio));
3666 				break;
3667 			}
3668 
3669 			folio_lock(folio);
3670 		}
3671 
3672 		if (count == PAGE_SIZE) {
3673 			offset = 0;
3674 			attr_len = count;
3675 		} else {
3676 			offset = offset_in_folio(folio, pos);
3677 			attr_len = min_t(size_t, (size_t)count, folio_size(folio) - offset);
3678 		}
3679 		memcpy_to_folio(folio, offset, buf, attr_len);
3680 
3681 		if (sync) {
3682 			struct ntfs_volume *vol = ni->vol;
3683 			s64 lcn, lcn_count;
3684 			unsigned int lcn_folio_off = 0;
3685 			struct bio *bio;
3686 			u64 rl_length = 0;
3687 			s64 vcn;
3688 			struct runlist_element *rl;
3689 
3690 			lcn_count = max_t(s64, 1, ntfs_bytes_to_cluster(vol, attr_len));
3691 			vcn = ntfs_pidx_to_cluster(vol, folio->index);
3692 
3693 			do {
3694 				down_write(&ni->runlist.lock);
3695 				rl = ntfs_attr_vcn_to_rl(ni, vcn, &lcn);
3696 				if (IS_ERR(rl)) {
3697 					ret = PTR_ERR(rl);
3698 					up_write(&ni->runlist.lock);
3699 					goto err_unlock_folio;
3700 				}
3701 
3702 				rl_length = rl->length - (vcn - rl->vcn);
3703 				if (rl_length < lcn_count) {
3704 					lcn_count -= rl_length;
3705 				} else {
3706 					rl_length = lcn_count;
3707 					lcn_count = 0;
3708 				}
3709 				up_write(&ni->runlist.lock);
3710 
3711 				if (vol->cluster_size_bits > PAGE_SHIFT) {
3712 					lcn_folio_off = folio->index << PAGE_SHIFT;
3713 					lcn_folio_off &= vol->cluster_size_mask;
3714 				}
3715 
3716 				bio = bio_alloc(vol->sb->s_bdev, 1, REQ_OP_WRITE,
3717 						GFP_NOIO);
3718 				bio->bi_iter.bi_sector =
3719 					ntfs_bytes_to_sector(vol,
3720 							ntfs_cluster_to_bytes(vol, lcn) +
3721 							lcn_folio_off);
3722 
3723 				length = min_t(unsigned long,
3724 					       ntfs_cluster_to_bytes(vol, rl_length),
3725 					       folio_size(folio));
3726 				if (!bio_add_folio(bio, folio, length, offset)) {
3727 					ret = -EIO;
3728 					bio_put(bio);
3729 					goto err_unlock_folio;
3730 				}
3731 
3732 				submit_bio_wait(bio);
3733 				bio_put(bio);
3734 				vcn += rl_length;
3735 				offset += length;
3736 			} while (lcn_count != 0);
3737 
3738 			folio_mark_uptodate(folio);
3739 		} else {
3740 			folio_mark_uptodate(folio);
3741 			folio_mark_dirty(folio);
3742 		}
3743 err_unlock_folio:
3744 		folio_unlock(folio);
3745 		folio_put(folio);
3746 
3747 		if (ret)
3748 			break;
3749 
3750 		written += attr_len;
3751 		buf += attr_len;
3752 		pos += attr_len;
3753 		count -= attr_len;
3754 		index++;
3755 
3756 		cond_resched();
3757 	}
3758 
3759 	return ret ? ret : written;
3760 }
3761 
3762 s64 ntfs_inode_attr_pwrite(struct inode *vi, s64 pos, s64 count, u8 *buf, bool sync)
3763 {
3764 	struct ntfs_inode *ni = NTFS_I(vi);
3765 	struct ntfs_attr_search_ctx *ctx;
3766 	s64 ret;
3767 
3768 	WARN_ON(!NInoAttr(ni));
3769 
3770 	ctx = ntfs_attr_get_search_ctx(ni->ext.base_ntfs_ino, NULL);
3771 	if (!ctx) {
3772 		ntfs_error(vi->i_sb, "Failed to get attr search ctx");
3773 		return -ENOMEM;
3774 	}
3775 
3776 	ret = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE,
3777 			       0, NULL, 0, ctx);
3778 	if (ret) {
3779 		ntfs_attr_put_search_ctx(ctx);
3780 		ntfs_error(vi->i_sb, "Failed to look up attr %#x", ni->type);
3781 		return ret;
3782 	}
3783 
3784 	mutex_lock(&ni->mrec_lock);
3785 	ret = ntfs_enlarge_attribute(vi, pos, count, ctx);
3786 	mutex_unlock(&ni->mrec_lock);
3787 	if (ret)
3788 		goto out;
3789 
3790 	if (NInoNonResident(ni))
3791 		ret = __ntfs_inode_non_resident_attr_pwrite(vi, pos, count, buf, ctx, sync);
3792 	else
3793 		ret = __ntfs_inode_resident_attr_pwrite(vi, pos, count, buf, ctx);
3794 out:
3795 	ntfs_attr_put_search_ctx(ctx);
3796 	return ret;
3797 }
3798 
3799 struct folio *ntfs_get_locked_folio(struct address_space *mapping,
3800 		pgoff_t index, pgoff_t end_index, struct file_ra_state *ra)
3801 {
3802 	struct folio *folio;
3803 
3804 	folio = filemap_lock_folio(mapping, index);
3805 	if (IS_ERR(folio)) {
3806 		if (PTR_ERR(folio) != -ENOENT)
3807 			return folio;
3808 
3809 		page_cache_sync_readahead(mapping, ra, NULL, index,
3810 				end_index - index);
3811 		folio = read_mapping_folio(mapping, index, NULL);
3812 		if (!IS_ERR(folio))
3813 			folio_lock(folio);
3814 	}
3815 
3816 	return folio;
3817 }
3818