xref: /linux/fs/ntfs/inode.c (revision cdd4dc3aebeab43a72ce0bc2b5bab6f0a80b97a5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NTFS kernel inode handling.
4  *
5  * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
6  * Copyright (c) 2025 LG Electronics Co., Ltd.
7  */
8 
9 #include <linux/writeback.h>
10 #include <linux/seq_file.h>
11 
12 #include "lcnalloc.h"
13 #include "time.h"
14 #include "ntfs.h"
15 #include "index.h"
16 #include "attrlist.h"
17 #include "reparse.h"
18 #include "ea.h"
19 #include "attrib.h"
20 #include "iomap.h"
21 #include "object_id.h"
22 
23 /*
24  * ntfs_test_inode - compare two (possibly fake) inodes for equality
25  * @vi:		vfs inode which to test
26  * @data:	data which is being tested with
27  *
28  * Compare the ntfs attribute embedded in the ntfs specific part of the vfs
29  * inode @vi for equality with the ntfs attribute @data.
30  *
31  * If searching for the normal file/directory inode, set @na->type to AT_UNUSED.
32  * @na->name and @na->name_len are then ignored.
33  *
34  * Return 1 if the attributes match and 0 if not.
35  *
36  * NOTE: This function runs with the inode_hash_lock spin lock held so it is not
37  * allowed to sleep.
38  */
39 int ntfs_test_inode(struct inode *vi, void *data)
40 {
41 	struct ntfs_attr *na = data;
42 	struct ntfs_inode *ni = NTFS_I(vi);
43 
44 	if (vi->i_ino != na->mft_no)
45 		return 0;
46 
47 	/* If !NInoAttr(ni), @vi is a normal file or directory inode. */
48 	if (likely(!NInoAttr(ni))) {
49 		/* If not looking for a normal inode this is a mismatch. */
50 		if (unlikely(na->type != AT_UNUSED))
51 			return 0;
52 	} else {
53 		/* A fake inode describing an attribute. */
54 		if (ni->type != na->type)
55 			return 0;
56 		if (ni->name_len != na->name_len)
57 			return 0;
58 		if (na->name_len && memcmp(ni->name, na->name,
59 				na->name_len * sizeof(__le16)))
60 			return 0;
61 		if (!ni->ext.base_ntfs_ino)
62 			return 0;
63 	}
64 
65 	/* Match! */
66 	return 1;
67 }
68 
69 /*
70  * ntfs_init_locked_inode - initialize an inode
71  * @vi:		vfs inode to initialize
72  * @data:	data which to initialize @vi to
73  *
74  * Initialize the vfs inode @vi with the values from the ntfs attribute @data in
75  * order to enable ntfs_test_inode() to do its work.
76  *
77  * If initializing the normal file/directory inode, set @na->type to AT_UNUSED.
78  * In that case, @na->name and @na->name_len should be set to NULL and 0,
79  * respectively. Although that is not strictly necessary as
80  * ntfs_read_locked_inode() will fill them in later.
81  *
82  * Return 0 on success and error.
83  *
84  * NOTE: This function runs with the inode->i_lock spin lock held so it is not
85  * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
86  */
87 static int ntfs_init_locked_inode(struct inode *vi, void *data)
88 {
89 	struct ntfs_attr *na = data;
90 	struct ntfs_inode *ni = NTFS_I(vi);
91 
92 	vi->i_ino = (unsigned long)na->mft_no;
93 
94 	if (na->type == AT_INDEX_ALLOCATION)
95 		NInoSetMstProtected(ni);
96 	else
97 		ni->type = na->type;
98 
99 	ni->name = na->name;
100 	ni->name_len = na->name_len;
101 	ni->folio = NULL;
102 	atomic_set(&ni->count, 1);
103 
104 	/* If initializing a normal inode, we are done. */
105 	if (likely(na->type == AT_UNUSED))
106 		return 0;
107 
108 	/* It is a fake inode. */
109 	NInoSetAttr(ni);
110 
111 	/*
112 	 * We have I30 global constant as an optimization as it is the name
113 	 * in >99.9% of named attributes! The other <0.1% incur a GFP_ATOMIC
114 	 * allocation but that is ok. And most attributes are unnamed anyway,
115 	 * thus the fraction of named attributes with name != I30 is actually
116 	 * absolutely tiny.
117 	 */
118 	if (na->name_len && na->name != I30) {
119 		unsigned int i;
120 
121 		i = na->name_len * sizeof(__le16);
122 		ni->name = kmalloc(i + sizeof(__le16), GFP_ATOMIC);
123 		if (!ni->name)
124 			return -ENOMEM;
125 		memcpy(ni->name, na->name, i);
126 		ni->name[na->name_len] = 0;
127 	}
128 	return 0;
129 }
130 
131 static int ntfs_read_locked_inode(struct inode *vi);
132 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi);
133 static int ntfs_read_locked_index_inode(struct inode *base_vi,
134 		struct inode *vi);
135 
136 /*
137  * ntfs_iget - obtain a struct inode corresponding to a specific normal inode
138  * @sb:		super block of mounted volume
139  * @mft_no:	mft record number / inode number to obtain
140  *
141  * Obtain the struct inode corresponding to a specific normal inode (i.e. a
142  * file or directory).
143  *
144  * If the inode is in the cache, it is just returned with an increased
145  * reference count. Otherwise, a new struct inode is allocated and initialized,
146  * and finally ntfs_read_locked_inode() is called to read in the inode and
147  * fill in the remainder of the inode structure.
148  *
149  * Return the struct inode on success. Check the return value with IS_ERR() and
150  * if true, the function failed and the error code is obtained from PTR_ERR().
151  */
152 struct inode *ntfs_iget(struct super_block *sb, u64 mft_no)
153 {
154 	struct inode *vi;
155 	int err;
156 	struct ntfs_attr na;
157 
158 	na.mft_no = mft_no;
159 	na.type = AT_UNUSED;
160 	na.name = NULL;
161 	na.name_len = 0;
162 
163 	vi = iget5_locked(sb, mft_no, ntfs_test_inode,
164 			ntfs_init_locked_inode, &na);
165 	if (unlikely(!vi))
166 		return ERR_PTR(-ENOMEM);
167 
168 	err = 0;
169 
170 	/* If this is a freshly allocated inode, need to read it now. */
171 	if (inode_state_read_once(vi) & I_NEW) {
172 		err = ntfs_read_locked_inode(vi);
173 		unlock_new_inode(vi);
174 	}
175 	/*
176 	 * There is no point in keeping bad inodes around if the failure was
177 	 * due to ENOMEM. We want to be able to retry again later.
178 	 */
179 	if (unlikely(err == -ENOMEM)) {
180 		iput(vi);
181 		vi = ERR_PTR(err);
182 	}
183 	return vi;
184 }
185 
186 /*
187  * ntfs_attr_iget - obtain a struct inode corresponding to an attribute
188  * @base_vi:	vfs base inode containing the attribute
189  * @type:	attribute type
190  * @name:	Unicode name of the attribute (NULL if unnamed)
191  * @name_len:	length of @name in Unicode characters (0 if unnamed)
192  *
193  * Obtain the (fake) struct inode corresponding to the attribute specified by
194  * @type, @name, and @name_len, which is present in the base mft record
195  * specified by the vfs inode @base_vi.
196  *
197  * If the attribute inode is in the cache, it is just returned with an
198  * increased reference count. Otherwise, a new struct inode is allocated and
199  * initialized, and finally ntfs_read_locked_attr_inode() is called to read the
200  * attribute and fill in the inode structure.
201  *
202  * Note, for index allocation attributes, you need to use ntfs_index_iget()
203  * instead of ntfs_attr_iget() as working with indices is a lot more complex.
204  *
205  * Return the struct inode of the attribute inode on success. Check the return
206  * value with IS_ERR() and if true, the function failed and the error code is
207  * obtained from PTR_ERR().
208  */
209 struct inode *ntfs_attr_iget(struct inode *base_vi, __le32 type,
210 		__le16 *name, u32 name_len)
211 {
212 	struct inode *vi;
213 	int err;
214 	struct ntfs_attr na;
215 
216 	/* Make sure no one calls ntfs_attr_iget() for indices. */
217 	WARN_ON(type == AT_INDEX_ALLOCATION);
218 
219 	na.mft_no = base_vi->i_ino;
220 	na.type = type;
221 	na.name = name;
222 	na.name_len = name_len;
223 
224 	vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode,
225 			ntfs_init_locked_inode, &na);
226 	if (unlikely(!vi))
227 		return ERR_PTR(-ENOMEM);
228 	err = 0;
229 
230 	/* If this is a freshly allocated inode, need to read it now. */
231 	if (inode_state_read_once(vi) & I_NEW) {
232 		err = ntfs_read_locked_attr_inode(base_vi, vi);
233 		unlock_new_inode(vi);
234 	}
235 	/*
236 	 * There is no point in keeping bad attribute inodes around. This also
237 	 * simplifies things in that we never need to check for bad attribute
238 	 * inodes elsewhere.
239 	 */
240 	if (unlikely(err)) {
241 		iput(vi);
242 		vi = ERR_PTR(err);
243 	}
244 	return vi;
245 }
246 
247 /*
248  * ntfs_index_iget - obtain a struct inode corresponding to an index
249  * @base_vi:	vfs base inode containing the index related attributes
250  * @name:	Unicode name of the index
251  * @name_len:	length of @name in Unicode characters
252  *
253  * Obtain the (fake) struct inode corresponding to the index specified by @name
254  * and @name_len, which is present in the base mft record specified by the vfs
255  * inode @base_vi.
256  *
257  * If the index inode is in the cache, it is just returned with an increased
258  * reference count.  Otherwise, a new struct inode is allocated and
259  * initialized, and finally ntfs_read_locked_index_inode() is called to read
260  * the index related attributes and fill in the inode structure.
261  *
262  * Return the struct inode of the index inode on success. Check the return
263  * value with IS_ERR() and if true, the function failed and the error code is
264  * obtained from PTR_ERR().
265  */
266 struct inode *ntfs_index_iget(struct inode *base_vi, __le16 *name,
267 		u32 name_len)
268 {
269 	struct inode *vi;
270 	int err;
271 	struct ntfs_attr na;
272 
273 	na.mft_no = base_vi->i_ino;
274 	na.type = AT_INDEX_ALLOCATION;
275 	na.name = name;
276 	na.name_len = name_len;
277 
278 	vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode,
279 			ntfs_init_locked_inode, &na);
280 	if (unlikely(!vi))
281 		return ERR_PTR(-ENOMEM);
282 
283 	err = 0;
284 
285 	/* If this is a freshly allocated inode, need to read it now. */
286 	if (inode_state_read_once(vi) & I_NEW) {
287 		err = ntfs_read_locked_index_inode(base_vi, vi);
288 		unlock_new_inode(vi);
289 	}
290 	/*
291 	 * There is no point in keeping bad index inodes around.  This also
292 	 * simplifies things in that we never need to check for bad index
293 	 * inodes elsewhere.
294 	 */
295 	if (unlikely(err)) {
296 		iput(vi);
297 		vi = ERR_PTR(err);
298 	}
299 	return vi;
300 }
301 
302 struct inode *ntfs_alloc_big_inode(struct super_block *sb)
303 {
304 	struct ntfs_inode *ni;
305 
306 	ntfs_debug("Entering.");
307 	ni = alloc_inode_sb(sb, ntfs_big_inode_cache, GFP_NOFS);
308 	if (likely(ni != NULL)) {
309 		ni->state = 0;
310 		ni->type = 0;
311 		ni->mft_no = 0;
312 		return VFS_I(ni);
313 	}
314 	ntfs_error(sb, "Allocation of NTFS big inode structure failed.");
315 	return NULL;
316 }
317 
318 void ntfs_free_big_inode(struct inode *inode)
319 {
320 	kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
321 }
322 
323 static int ntfs_non_resident_dealloc_clusters(struct ntfs_inode *ni)
324 {
325 	struct super_block *sb = ni->vol->sb;
326 	struct ntfs_attr_search_ctx *actx;
327 	int err = 0;
328 
329 	actx = ntfs_attr_get_search_ctx(ni, NULL);
330 	if (!actx)
331 		return -ENOMEM;
332 	WARN_ON(actx->mrec->link_count != 0);
333 
334 	/**
335 	 * ntfs_truncate_vfs cannot be called in evict() context due
336 	 * to some limitations, which are the @ni vfs inode is marked
337 	 * with I_FREEING, and etc.
338 	 */
339 	if (NInoRunlistDirty(ni)) {
340 		err = ntfs_cluster_free_from_rl(ni->vol, ni->runlist.rl);
341 		if (err)
342 			ntfs_error(sb,
343 					"Failed to free clusters. Leaving inconsistent metadata.\n");
344 	}
345 
346 	while ((err = ntfs_attrs_walk(actx)) == 0) {
347 		if (actx->attr->non_resident &&
348 				(!NInoRunlistDirty(ni) || actx->attr->type != AT_DATA)) {
349 			struct runlist_element *rl;
350 			size_t new_rl_count;
351 
352 			rl = ntfs_mapping_pairs_decompress(ni->vol, actx->attr, NULL,
353 					&new_rl_count);
354 			if (IS_ERR(rl)) {
355 				err = PTR_ERR(rl);
356 				ntfs_error(sb,
357 					   "Failed to decompress runlist. Leaving inconsistent metadata.\n");
358 				continue;
359 			}
360 
361 			err = ntfs_cluster_free_from_rl(ni->vol, rl);
362 			if (err)
363 				ntfs_error(sb,
364 					   "Failed to free attribute clusters. Leaving inconsistent metadata.\n");
365 			kvfree(rl);
366 		}
367 	}
368 
369 	ntfs_release_dirty_clusters(ni->vol, ni->i_dealloc_clusters);
370 	ntfs_attr_put_search_ctx(actx);
371 	return err;
372 }
373 
374 int ntfs_drop_big_inode(struct inode *inode)
375 {
376 	struct ntfs_inode *ni = NTFS_I(inode);
377 
378 	if (!inode_unhashed(inode) && inode_state_read_once(inode) & I_SYNC) {
379 		if (ni->type == AT_DATA || ni->type == AT_INDEX_ALLOCATION) {
380 			if (!inode->i_nlink) {
381 				struct ntfs_inode *ni = NTFS_I(inode);
382 
383 				if (ni->data_size == 0)
384 					return 0;
385 
386 				/* To avoid evict_inode call simultaneously */
387 				atomic_inc(&inode->i_count);
388 				spin_unlock(&inode->i_lock);
389 
390 				truncate_setsize(VFS_I(ni), 0);
391 				ntfs_truncate_vfs(VFS_I(ni), 0, 1);
392 
393 				sb_start_intwrite(inode->i_sb);
394 				i_size_write(inode, 0);
395 				ni->allocated_size = ni->initialized_size = ni->data_size = 0;
396 
397 				truncate_inode_pages_final(inode->i_mapping);
398 				sb_end_intwrite(inode->i_sb);
399 
400 				spin_lock(&inode->i_lock);
401 				atomic_dec(&inode->i_count);
402 			}
403 		}
404 		return 0;
405 	}
406 
407 	return inode_generic_drop(inode);
408 }
409 
410 static inline struct ntfs_inode *ntfs_alloc_extent_inode(void)
411 {
412 	struct ntfs_inode *ni;
413 
414 	ntfs_debug("Entering.");
415 	ni = kmem_cache_alloc(ntfs_inode_cache, GFP_NOFS);
416 	if (likely(ni != NULL)) {
417 		ni->state = 0;
418 		return ni;
419 	}
420 	ntfs_error(NULL, "Allocation of NTFS inode structure failed.");
421 	return NULL;
422 }
423 
424 static void ntfs_destroy_extent_inode(struct ntfs_inode *ni)
425 {
426 	ntfs_debug("Entering.");
427 
428 	if (!atomic_dec_and_test(&ni->count))
429 		WARN_ON(1);
430 	if (ni->folio)
431 		folio_put(ni->folio);
432 	kfree(ni->mrec);
433 	kmem_cache_free(ntfs_inode_cache, ni);
434 }
435 
436 static struct lock_class_key attr_inode_mrec_lock_class;
437 static struct lock_class_key attr_list_inode_mrec_lock_class;
438 
439 /*
440  * The attribute runlist lock has separate locking rules from the
441  * normal runlist lock, so split the two lock-classes:
442  */
443 static struct lock_class_key attr_list_rl_lock_class;
444 
445 /*
446  * __ntfs_init_inode - initialize ntfs specific part of an inode
447  * @sb:		super block of mounted volume
448  * @ni:		freshly allocated ntfs inode which to initialize
449  *
450  * Initialize an ntfs inode to defaults.
451  *
452  * NOTE: ni->mft_no, ni->state, ni->type, ni->name, and ni->name_len are left
453  * untouched. Make sure to initialize them elsewhere.
454  */
455 void __ntfs_init_inode(struct super_block *sb, struct ntfs_inode *ni)
456 {
457 	ntfs_debug("Entering.");
458 	rwlock_init(&ni->size_lock);
459 	ni->initialized_size = ni->allocated_size = 0;
460 	ni->seq_no = 0;
461 	atomic_set(&ni->count, 1);
462 	ni->vol = NTFS_SB(sb);
463 	ntfs_init_runlist(&ni->runlist);
464 	mutex_init(&ni->mrec_lock);
465 	if (ni->type == AT_ATTRIBUTE_LIST) {
466 		lockdep_set_class(&ni->mrec_lock,
467 				  &attr_list_inode_mrec_lock_class);
468 		lockdep_set_class(&ni->runlist.lock,
469 				  &attr_list_rl_lock_class);
470 	} else if (NInoAttr(ni)) {
471 		lockdep_set_class(&ni->mrec_lock,
472 				  &attr_inode_mrec_lock_class);
473 	}
474 
475 	ni->folio = NULL;
476 	ni->folio_ofs = 0;
477 	ni->mrec = NULL;
478 	ni->attr_list_size = 0;
479 	ni->attr_list = NULL;
480 	ni->itype.index.block_size = 0;
481 	ni->itype.index.vcn_size = 0;
482 	ni->itype.index.collation_rule = 0;
483 	ni->itype.index.block_size_bits = 0;
484 	ni->itype.index.vcn_size_bits = 0;
485 	mutex_init(&ni->extent_lock);
486 	ni->nr_extents = 0;
487 	ni->ext.base_ntfs_ino = NULL;
488 	ni->flags = 0;
489 	ni->mft_lcn[0] = LCN_RL_NOT_MAPPED;
490 	ni->mft_lcn_count = 0;
491 	ni->target = NULL;
492 	ni->i_dealloc_clusters = 0;
493 }
494 
495 /*
496  * Extent inodes get MFT-mapped in a nested way, while the base inode
497  * is still mapped. Teach this nesting to the lock validator by creating
498  * a separate class for nested inode's mrec_lock's:
499  */
500 static struct lock_class_key extent_inode_mrec_lock_key;
501 
502 inline struct ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
503 		u64 mft_no)
504 {
505 	struct ntfs_inode *ni = ntfs_alloc_extent_inode();
506 
507 	ntfs_debug("Entering.");
508 	if (likely(ni != NULL)) {
509 		__ntfs_init_inode(sb, ni);
510 		lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);
511 		ni->mft_no = mft_no;
512 		ni->type = AT_UNUSED;
513 		ni->name = NULL;
514 		ni->name_len = 0;
515 	}
516 	return ni;
517 }
518 
519 /*
520  * ntfs_is_extended_system_file - check if a file is in the $Extend directory
521  * @ctx:	initialized attribute search context
522  *
523  * Search all file name attributes in the inode described by the attribute
524  * search context @ctx and check if any of the names are in the $Extend system
525  * directory.
526  *
527  * Return values:
528  *	   3: file is $ObjId in $Extend directory
529  *	   2: file is $Reparse in $Extend directory
530  *	   1: file is in $Extend directory
531  *	   0: file is not in $Extend directory
532  *    -errno: failed to determine if the file is in the $Extend directory
533  */
534 static int ntfs_is_extended_system_file(struct ntfs_attr_search_ctx *ctx)
535 {
536 	int nr_links, err;
537 
538 	/* Restart search. */
539 	ntfs_attr_reinit_search_ctx(ctx);
540 
541 	/* Get number of hard links. */
542 	nr_links = le16_to_cpu(ctx->mrec->link_count);
543 
544 	/* Loop through all hard links. */
545 	while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0,
546 			ctx))) {
547 		struct file_name_attr *file_name_attr;
548 		struct attr_record *attr = ctx->attr;
549 		u8 *p, *p2;
550 
551 		nr_links--;
552 		/*
553 		 * Maximum sanity checking as we are called on an inode that
554 		 * we suspect might be corrupt.
555 		 */
556 		p = (u8 *)attr + le32_to_cpu(attr->length);
557 		if (p < (u8 *)ctx->mrec || (u8 *)p > (u8 *)ctx->mrec +
558 				le32_to_cpu(ctx->mrec->bytes_in_use)) {
559 err_corrupt_attr:
560 			ntfs_error(ctx->ntfs_ino->vol->sb,
561 					"Corrupt file name attribute. You should run chkdsk.");
562 			return -EIO;
563 		}
564 		if (attr->non_resident) {
565 			ntfs_error(ctx->ntfs_ino->vol->sb,
566 					"Non-resident file name. You should run chkdsk.");
567 			return -EIO;
568 		}
569 		if (attr->flags) {
570 			ntfs_error(ctx->ntfs_ino->vol->sb,
571 					"File name with invalid flags. You should run chkdsk.");
572 			return -EIO;
573 		}
574 		if (!(attr->data.resident.flags & RESIDENT_ATTR_IS_INDEXED)) {
575 			ntfs_error(ctx->ntfs_ino->vol->sb,
576 					"Unindexed file name. You should run chkdsk.");
577 			return -EIO;
578 		}
579 		file_name_attr = (struct file_name_attr *)((u8 *)attr +
580 				le16_to_cpu(attr->data.resident.value_offset));
581 		p2 = (u8 *)file_name_attr + le32_to_cpu(attr->data.resident.value_length);
582 		if (p2 < (u8 *)attr || p2 > p)
583 			goto err_corrupt_attr;
584 		/* This attribute is ok, but is it in the $Extend directory? */
585 		if (MREF_LE(file_name_attr->parent_directory) == FILE_Extend) {
586 			unsigned char *s;
587 
588 			s = ntfs_attr_name_get(ctx->ntfs_ino->vol,
589 					file_name_attr->file_name,
590 					file_name_attr->file_name_length);
591 			if (!s)
592 				return 1;
593 			if (!strcmp("$Reparse", s)) {
594 				ntfs_attr_name_free(&s);
595 				return 2; /* it's reparse point file */
596 			}
597 			if (!strcmp("$ObjId", s)) {
598 				ntfs_attr_name_free(&s);
599 				return 3; /* it's object id file */
600 			}
601 			ntfs_attr_name_free(&s);
602 			return 1;	/* YES, it's an extended system file. */
603 		}
604 	}
605 	if (unlikely(err != -ENOENT))
606 		return err;
607 	if (unlikely(nr_links)) {
608 		ntfs_error(ctx->ntfs_ino->vol->sb,
609 			"Inode hard link count doesn't match number of name attributes. You should run chkdsk.");
610 		return -EIO;
611 	}
612 	return 0;	/* NO, it is not an extended system file. */
613 }
614 
615 static struct lock_class_key ntfs_dir_inval_lock_key;
616 
617 void ntfs_set_vfs_operations(struct inode *inode, mode_t mode, dev_t dev)
618 {
619 	if (S_ISDIR(mode)) {
620 		if (!NInoAttr(NTFS_I(inode))) {
621 			inode->i_op = &ntfs_dir_inode_ops;
622 			inode->i_fop = &ntfs_dir_ops;
623 		}
624 		inode->i_mapping->a_ops = &ntfs_aops;
625 		lockdep_set_class(&inode->i_mapping->invalidate_lock,
626 				  &ntfs_dir_inval_lock_key);
627 	} else if (S_ISLNK(mode)) {
628 		inode->i_op = &ntfs_symlink_inode_operations;
629 		inode->i_mapping->a_ops = &ntfs_aops;
630 	} else if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) {
631 		inode->i_op = &ntfs_special_inode_operations;
632 		init_special_inode(inode, inode->i_mode, dev);
633 	} else {
634 		if (!NInoAttr(NTFS_I(inode))) {
635 			inode->i_op = &ntfs_file_inode_ops;
636 			inode->i_fop = &ntfs_file_ops;
637 		}
638 		if (inode->i_ino == FILE_MFT)
639 			inode->i_mapping->a_ops = &ntfs_mft_aops;
640 		else
641 			inode->i_mapping->a_ops = &ntfs_aops;
642 	}
643 }
644 
645 /*
646  * ntfs_read_locked_inode - read an inode from its device
647  * @vi:		inode to read
648  *
649  * ntfs_read_locked_inode() is called from ntfs_iget() to read the inode
650  * described by @vi into memory from the device.
651  *
652  * The only fields in @vi that we need to/can look at when the function is
653  * called are i_sb, pointing to the mounted device's super block, and i_ino,
654  * the number of the inode to load.
655  *
656  * ntfs_read_locked_inode() maps, pins and locks the mft record number i_ino
657  * for reading and sets up the necessary @vi fields as well as initializing
658  * the ntfs inode.
659  *
660  * Q: What locks are held when the function is called?
661  * A: i_state has I_NEW set, hence the inode is locked, also
662  *    i_count is set to 1, so it is not going to go away
663  *    i_flags is set to 0 and we have no business touching it.  Only an ioctl()
664  *    is allowed to write to them. We should of course be honouring them but
665  *    we need to do that using the IS_* macros defined in include/linux/fs.h.
666  *    In any case ntfs_read_locked_inode() has nothing to do with i_flags.
667  *
668  * Return 0 on success and -errno on error.
669  */
670 static int ntfs_read_locked_inode(struct inode *vi)
671 {
672 	struct ntfs_volume *vol = NTFS_SB(vi->i_sb);
673 	struct ntfs_inode *ni = NTFS_I(vi);
674 	struct mft_record *m;
675 	struct attr_record *a;
676 	struct standard_information *si;
677 	struct ntfs_attr_search_ctx *ctx;
678 	int err = 0;
679 	__le16 *name = I30;
680 	unsigned int name_len = 4, flags = 0;
681 	int extend_sys = 0;
682 	dev_t dev = 0;
683 	bool vol_err = true;
684 
685 	ntfs_debug("Entering for i_ino 0x%llx.", ni->mft_no);
686 
687 	if (uid_valid(vol->uid)) {
688 		vi->i_uid = vol->uid;
689 		flags |= NTFS_VOL_UID;
690 	} else
691 		vi->i_uid = GLOBAL_ROOT_UID;
692 
693 	if (gid_valid(vol->gid)) {
694 		vi->i_gid = vol->gid;
695 		flags |= NTFS_VOL_GID;
696 	} else
697 		vi->i_gid = GLOBAL_ROOT_GID;
698 
699 	vi->i_mode = 0777;
700 
701 	/*
702 	 * Initialize the ntfs specific part of @vi special casing
703 	 * FILE_MFT which we need to do at mount time.
704 	 */
705 	if (vi->i_ino != FILE_MFT)
706 		ntfs_init_big_inode(vi);
707 
708 	m = map_mft_record(ni);
709 	if (IS_ERR(m)) {
710 		err = PTR_ERR(m);
711 		goto err_out;
712 	}
713 
714 	ctx = ntfs_attr_get_search_ctx(ni, m);
715 	if (!ctx) {
716 		err = -ENOMEM;
717 		goto unm_err_out;
718 	}
719 
720 	if (!(m->flags & MFT_RECORD_IN_USE)) {
721 		err = -ENOENT;
722 		vol_err = false;
723 		goto unm_err_out;
724 	}
725 
726 	if (m->base_mft_record) {
727 		ntfs_error(vi->i_sb, "Inode is an extent inode!");
728 		goto unm_err_out;
729 	}
730 
731 	/* Transfer information from mft record into vfs and ntfs inodes. */
732 	vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
733 
734 	if (le16_to_cpu(m->link_count) < 1) {
735 		ntfs_error(vi->i_sb, "Inode link count is 0!");
736 		goto unm_err_out;
737 	}
738 	set_nlink(vi, le16_to_cpu(m->link_count));
739 
740 	/* If read-only, no one gets write permissions. */
741 	if (IS_RDONLY(vi))
742 		vi->i_mode &= ~0222;
743 
744 	/*
745 	 * Find the standard information attribute in the mft record. At this
746 	 * stage we haven't setup the attribute list stuff yet, so this could
747 	 * in fact fail if the standard information is in an extent record, but
748 	 * I don't think this actually ever happens.
749 	 */
750 	ntfs_attr_reinit_search_ctx(ctx);
751 	err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0, 0, 0, NULL, 0,
752 			ctx);
753 	if (unlikely(err)) {
754 		if (err == -ENOENT)
755 			ntfs_error(vi->i_sb, "$STANDARD_INFORMATION attribute is missing.");
756 		goto unm_err_out;
757 	}
758 	a = ctx->attr;
759 	/* Get the standard information attribute value. */
760 	si = (struct standard_information *)((u8 *)a +
761 			le16_to_cpu(a->data.resident.value_offset));
762 
763 	/* Transfer information from the standard information into vi. */
764 	/*
765 	 * Note: The i_?times do not quite map perfectly onto the NTFS times,
766 	 * but they are close enough, and in the end it doesn't really matter
767 	 * that much...
768 	 */
769 	/*
770 	 * mtime is the last change of the data within the file. Not changed
771 	 * when only metadata is changed, e.g. a rename doesn't affect mtime.
772 	 */
773 	ni->i_crtime = ntfs2utc(si->creation_time);
774 
775 	inode_set_mtime_to_ts(vi, ntfs2utc(si->last_data_change_time));
776 	/*
777 	 * ctime is the last change of the metadata of the file. This obviously
778 	 * always changes, when mtime is changed. ctime can be changed on its
779 	 * own, mtime is then not changed, e.g. when a file is renamed.
780 	 */
781 	inode_set_ctime_to_ts(vi, ntfs2utc(si->last_mft_change_time));
782 	/*
783 	 * Last access to the data within the file. Not changed during a rename
784 	 * for example but changed whenever the file is written to.
785 	 */
786 	inode_set_atime_to_ts(vi, ntfs2utc(si->last_access_time));
787 	ni->flags = si->file_attributes;
788 
789 	/* Find the attribute list attribute if present. */
790 	ntfs_attr_reinit_search_ctx(ctx);
791 	err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
792 	if (err) {
793 		if (unlikely(err != -ENOENT)) {
794 			ntfs_error(vi->i_sb, "Failed to lookup attribute list attribute.");
795 			goto unm_err_out;
796 		}
797 	} else {
798 		if (vi->i_ino == FILE_MFT)
799 			goto skip_attr_list_load;
800 		ntfs_debug("Attribute list found in inode 0x%llx.", ni->mft_no);
801 		NInoSetAttrList(ni);
802 		a = ctx->attr;
803 		if (a->flags & ATTR_COMPRESSION_MASK) {
804 			ntfs_error(vi->i_sb,
805 				"Attribute list attribute is compressed.");
806 			goto unm_err_out;
807 		}
808 		if (a->flags & ATTR_IS_ENCRYPTED ||
809 				a->flags & ATTR_IS_SPARSE) {
810 			if (a->non_resident) {
811 				ntfs_error(vi->i_sb,
812 					"Non-resident attribute list attribute is encrypted/sparse.");
813 				goto unm_err_out;
814 			}
815 			ntfs_warning(vi->i_sb,
816 				"Resident attribute list attribute in inode 0x%llx is marked encrypted/sparse which is not true.  However, Windows allows this and chkdsk does not detect or correct it so we will just ignore the invalid flags and pretend they are not set.",
817 				ni->mft_no);
818 		}
819 		/* Now allocate memory for the attribute list. */
820 		ni->attr_list_size = (u32)ntfs_attr_size(a);
821 		if (!ni->attr_list_size) {
822 			ntfs_error(vi->i_sb, "Attr_list_size is zero");
823 			goto unm_err_out;
824 		}
825 		ni->attr_list = kvzalloc(ni->attr_list_size, GFP_NOFS);
826 		if (!ni->attr_list) {
827 			ntfs_error(vi->i_sb,
828 				"Not enough memory to allocate buffer for attribute list.");
829 			err = -ENOMEM;
830 			goto unm_err_out;
831 		}
832 		if (a->non_resident) {
833 			NInoSetAttrListNonResident(ni);
834 			if (a->data.non_resident.lowest_vcn) {
835 				ntfs_error(vi->i_sb, "Attribute list has non zero lowest_vcn.");
836 				goto unm_err_out;
837 			}
838 
839 			/* Now load the attribute list. */
840 			err = load_attribute_list(ni, ni->attr_list, ni->attr_list_size);
841 			if (err) {
842 				ntfs_error(vi->i_sb, "Failed to load attribute list attribute.");
843 				goto unm_err_out;
844 			}
845 		} else /* if (!a->non_resident) */ {
846 			/* Now copy the attribute list. */
847 			memcpy(ni->attr_list, (u8 *)a + le16_to_cpu(
848 					a->data.resident.value_offset),
849 					le32_to_cpu(
850 					a->data.resident.value_length));
851 		}
852 	}
853 skip_attr_list_load:
854 	err = ntfs_attr_lookup(AT_EA_INFORMATION, NULL, 0, 0, 0, NULL, 0, ctx);
855 	if (!err) {
856 		NInoSetHasEA(ni);
857 		ntfs_ea_get_wsl_inode(vi, &dev, flags);
858 	}
859 
860 	if (m->flags & MFT_RECORD_IS_DIRECTORY) {
861 		vi->i_mode |= S_IFDIR;
862 		/*
863 		 * Apply the directory permissions mask set in the mount
864 		 * options.
865 		 */
866 		vi->i_mode &= ~vol->dmask;
867 		/* Things break without this kludge! */
868 		if (vi->i_nlink > 1)
869 			set_nlink(vi, 1);
870 	} else {
871 		if (ni->flags & FILE_ATTR_REPARSE_POINT) {
872 			unsigned int mode;
873 
874 			mode = ntfs_make_symlink(ni);
875 			if (mode)
876 				vi->i_mode |= mode;
877 			else {
878 				vi->i_mode &= ~S_IFLNK;
879 				vi->i_mode |= S_IFREG;
880 			}
881 		} else
882 			vi->i_mode |= S_IFREG;
883 		/* Apply the file permissions mask set in the mount options. */
884 		vi->i_mode &= ~vol->fmask;
885 	}
886 
887 	/*
888 	 * If an attribute list is present we now have the attribute list value
889 	 * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes.
890 	 */
891 	if (S_ISDIR(vi->i_mode)) {
892 		struct index_root *ir;
893 		u8 *ir_end, *index_end;
894 
895 view_index_meta:
896 		/* It is a directory, find index root attribute. */
897 		ntfs_attr_reinit_search_ctx(ctx);
898 		err = ntfs_attr_lookup(AT_INDEX_ROOT, name, name_len, CASE_SENSITIVE,
899 				0, NULL, 0, ctx);
900 		if (unlikely(err)) {
901 			if (err == -ENOENT)
902 				ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is missing.");
903 			goto unm_err_out;
904 		}
905 		a = ctx->attr;
906 		/* Set up the state. */
907 		if (unlikely(a->non_resident)) {
908 			ntfs_error(vol->sb,
909 				"$INDEX_ROOT attribute is not resident.");
910 			goto unm_err_out;
911 		}
912 		/* Ensure the attribute name is placed before the value. */
913 		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
914 				le16_to_cpu(a->data.resident.value_offset)))) {
915 			ntfs_error(vol->sb,
916 				"$INDEX_ROOT attribute name is placed after the attribute value.");
917 			goto unm_err_out;
918 		}
919 		/*
920 		 * Compressed/encrypted index root just means that the newly
921 		 * created files in that directory should be created compressed/
922 		 * encrypted. However index root cannot be both compressed and
923 		 * encrypted.
924 		 */
925 		if (a->flags & ATTR_COMPRESSION_MASK) {
926 			NInoSetCompressed(ni);
927 			ni->flags |= FILE_ATTR_COMPRESSED;
928 		}
929 		if (a->flags & ATTR_IS_ENCRYPTED) {
930 			if (a->flags & ATTR_COMPRESSION_MASK) {
931 				ntfs_error(vi->i_sb, "Found encrypted and compressed attribute.");
932 				goto unm_err_out;
933 			}
934 			NInoSetEncrypted(ni);
935 			ni->flags |= FILE_ATTR_ENCRYPTED;
936 		}
937 		if (a->flags & ATTR_IS_SPARSE) {
938 			NInoSetSparse(ni);
939 			ni->flags |= FILE_ATTR_SPARSE_FILE;
940 		}
941 		ir = (struct index_root *)((u8 *)a +
942 				le16_to_cpu(a->data.resident.value_offset));
943 		ir_end = (u8 *)ir + le32_to_cpu(a->data.resident.value_length);
944 		index_end = (u8 *)&ir->index +
945 				le32_to_cpu(ir->index.index_length);
946 		if (index_end > ir_end) {
947 			ntfs_error(vi->i_sb, "Directory index is corrupt.");
948 			goto unm_err_out;
949 		}
950 
951 		if (extend_sys) {
952 			if (ir->type) {
953 				ntfs_error(vi->i_sb, "Indexed attribute is not zero.");
954 				goto unm_err_out;
955 			}
956 		} else {
957 			if (ir->type != AT_FILE_NAME) {
958 				ntfs_error(vi->i_sb, "Indexed attribute is not $FILE_NAME.");
959 				goto unm_err_out;
960 			}
961 
962 			if (ir->collation_rule != COLLATION_FILE_NAME) {
963 				ntfs_error(vi->i_sb,
964 					"Index collation rule is not COLLATION_FILE_NAME.");
965 				goto unm_err_out;
966 			}
967 		}
968 
969 		ni->itype.index.collation_rule = ir->collation_rule;
970 		ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
971 		if (ni->itype.index.block_size &
972 				(ni->itype.index.block_size - 1)) {
973 			ntfs_error(vi->i_sb, "Index block size (%u) is not a power of two.",
974 					ni->itype.index.block_size);
975 			goto unm_err_out;
976 		}
977 		if (ni->itype.index.block_size > PAGE_SIZE) {
978 			ntfs_error(vi->i_sb,
979 				"Index block size (%u) > PAGE_SIZE (%ld) is not supported.",
980 				ni->itype.index.block_size,
981 				PAGE_SIZE);
982 			err = -EOPNOTSUPP;
983 			goto unm_err_out;
984 		}
985 		if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
986 			ntfs_error(vi->i_sb,
987 				"Index block size (%u) < NTFS_BLOCK_SIZE (%i) is not supported.",
988 				ni->itype.index.block_size,
989 				NTFS_BLOCK_SIZE);
990 			err = -EOPNOTSUPP;
991 			goto unm_err_out;
992 		}
993 		ni->itype.index.block_size_bits =
994 				ffs(ni->itype.index.block_size) - 1;
995 		/* Determine the size of a vcn in the directory index. */
996 		if (vol->cluster_size <= ni->itype.index.block_size) {
997 			ni->itype.index.vcn_size = vol->cluster_size;
998 			ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
999 		} else {
1000 			ni->itype.index.vcn_size = vol->sector_size;
1001 			ni->itype.index.vcn_size_bits = vol->sector_size_bits;
1002 		}
1003 
1004 		/* Setup the index allocation attribute, even if not present. */
1005 		ni->type = AT_INDEX_ROOT;
1006 		ni->name = name;
1007 		ni->name_len = name_len;
1008 		vi->i_size = ni->initialized_size = ni->data_size =
1009 			le32_to_cpu(a->data.resident.value_length);
1010 		ni->allocated_size = (ni->data_size + 7) & ~7;
1011 		/* We are done with the mft record, so we release it. */
1012 		ntfs_attr_put_search_ctx(ctx);
1013 		unmap_mft_record(ni);
1014 		m = NULL;
1015 		ctx = NULL;
1016 		/* Setup the operations for this inode. */
1017 		ntfs_set_vfs_operations(vi, S_IFDIR, 0);
1018 		if (ir->index.flags & LARGE_INDEX)
1019 			NInoSetIndexAllocPresent(ni);
1020 	} else {
1021 		/* It is a file. */
1022 		ntfs_attr_reinit_search_ctx(ctx);
1023 
1024 		/* Setup the data attribute, even if not present. */
1025 		ni->type = AT_DATA;
1026 		ni->name = AT_UNNAMED;
1027 		ni->name_len = 0;
1028 
1029 		/* Find first extent of the unnamed data attribute. */
1030 		err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, 0, NULL, 0, ctx);
1031 		if (unlikely(err)) {
1032 			vi->i_size = ni->initialized_size =
1033 					ni->allocated_size = 0;
1034 			if (err != -ENOENT) {
1035 				ntfs_error(vi->i_sb, "Failed to lookup $DATA attribute.");
1036 				goto unm_err_out;
1037 			}
1038 			/*
1039 			 * FILE_Secure does not have an unnamed $DATA
1040 			 * attribute, so we special case it here.
1041 			 */
1042 			if (vi->i_ino == FILE_Secure)
1043 				goto no_data_attr_special_case;
1044 			/*
1045 			 * Most if not all the system files in the $Extend
1046 			 * system directory do not have unnamed data
1047 			 * attributes so we need to check if the parent
1048 			 * directory of the file is FILE_Extend and if it is
1049 			 * ignore this error. To do this we need to get the
1050 			 * name of this inode from the mft record as the name
1051 			 * contains the back reference to the parent directory.
1052 			 */
1053 			extend_sys = ntfs_is_extended_system_file(ctx);
1054 			if (extend_sys > 0) {
1055 				if (m->flags & MFT_RECORD_IS_VIEW_INDEX) {
1056 					if (extend_sys == 2) {
1057 						name = reparse_index_name;
1058 						name_len = 2;
1059 						goto view_index_meta;
1060 					} else if (extend_sys == 3) {
1061 						name = objid_index_name;
1062 						name_len = 2;
1063 						goto view_index_meta;
1064 					}
1065 				}
1066 				goto no_data_attr_special_case;
1067 			}
1068 
1069 			err = extend_sys;
1070 			ntfs_error(vi->i_sb, "$DATA attribute is missing, err : %d", err);
1071 			goto unm_err_out;
1072 		}
1073 		a = ctx->attr;
1074 		/* Setup the state. */
1075 		if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1076 			if (a->flags & ATTR_COMPRESSION_MASK) {
1077 				NInoSetCompressed(ni);
1078 				ni->flags |= FILE_ATTR_COMPRESSED;
1079 				if (vol->cluster_size > 4096) {
1080 					ntfs_error(vi->i_sb,
1081 						"Found compressed data but compression is disabled due to cluster size (%i) > 4kiB.",
1082 						vol->cluster_size);
1083 					goto unm_err_out;
1084 				}
1085 				if ((a->flags & ATTR_COMPRESSION_MASK)
1086 						!= ATTR_IS_COMPRESSED) {
1087 					ntfs_error(vi->i_sb,
1088 						"Found unknown compression method or corrupt file.");
1089 					goto unm_err_out;
1090 				}
1091 			}
1092 			if (a->flags & ATTR_IS_SPARSE) {
1093 				NInoSetSparse(ni);
1094 				ni->flags |= FILE_ATTR_SPARSE_FILE;
1095 			}
1096 		}
1097 		if (a->flags & ATTR_IS_ENCRYPTED) {
1098 			if (NInoCompressed(ni)) {
1099 				ntfs_error(vi->i_sb, "Found encrypted and compressed data.");
1100 				goto unm_err_out;
1101 			}
1102 			NInoSetEncrypted(ni);
1103 			ni->flags |= FILE_ATTR_ENCRYPTED;
1104 		}
1105 		if (a->non_resident) {
1106 			NInoSetNonResident(ni);
1107 			if (NInoCompressed(ni) || NInoSparse(ni)) {
1108 				if (NInoCompressed(ni) &&
1109 				    a->data.non_resident.compression_unit != 4) {
1110 					ntfs_error(vi->i_sb,
1111 						"Found non-standard compression unit (%u instead of 4).  Cannot handle this.",
1112 						a->data.non_resident.compression_unit);
1113 					err = -EOPNOTSUPP;
1114 					goto unm_err_out;
1115 				}
1116 
1117 				if (NInoSparse(ni) &&
1118 				    a->data.non_resident.compression_unit &&
1119 				    a->data.non_resident.compression_unit !=
1120 				     vol->sparse_compression_unit) {
1121 					ntfs_error(vi->i_sb,
1122 						   "Found non-standard compression unit (%u instead of 0 or %d).  Cannot handle this.",
1123 						   a->data.non_resident.compression_unit,
1124 						   vol->sparse_compression_unit);
1125 					err = -EOPNOTSUPP;
1126 					goto unm_err_out;
1127 				}
1128 
1129 
1130 				if (a->data.non_resident.compression_unit) {
1131 					ni->itype.compressed.block_size = 1U <<
1132 							(a->data.non_resident.compression_unit +
1133 							vol->cluster_size_bits);
1134 					ni->itype.compressed.block_size_bits =
1135 							ffs(ni->itype.compressed.block_size) - 1;
1136 					ni->itype.compressed.block_clusters =
1137 							1U << a->data.non_resident.compression_unit;
1138 				} else {
1139 					ni->itype.compressed.block_size = 0;
1140 					ni->itype.compressed.block_size_bits =
1141 							0;
1142 					ni->itype.compressed.block_clusters =
1143 							0;
1144 				}
1145 				ni->itype.compressed.size = le64_to_cpu(
1146 						a->data.non_resident.compressed_size);
1147 			}
1148 			if (a->data.non_resident.lowest_vcn) {
1149 				ntfs_error(vi->i_sb,
1150 					"First extent of $DATA attribute has non zero lowest_vcn.");
1151 				goto unm_err_out;
1152 			}
1153 			vi->i_size = ni->data_size = le64_to_cpu(a->data.non_resident.data_size);
1154 			ni->initialized_size = le64_to_cpu(a->data.non_resident.initialized_size);
1155 			ni->allocated_size = le64_to_cpu(a->data.non_resident.allocated_size);
1156 		} else { /* Resident attribute. */
1157 			vi->i_size = ni->data_size = ni->initialized_size = le32_to_cpu(
1158 					a->data.resident.value_length);
1159 			ni->allocated_size = le32_to_cpu(a->length) -
1160 					le16_to_cpu(
1161 					a->data.resident.value_offset);
1162 			if (vi->i_size > ni->allocated_size) {
1163 				ntfs_error(vi->i_sb,
1164 					"Resident data attribute is corrupt (size exceeds allocation).");
1165 				goto unm_err_out;
1166 			}
1167 		}
1168 no_data_attr_special_case:
1169 		/* We are done with the mft record, so we release it. */
1170 		ntfs_attr_put_search_ctx(ctx);
1171 		unmap_mft_record(ni);
1172 		m = NULL;
1173 		ctx = NULL;
1174 		/* Setup the operations for this inode. */
1175 		ntfs_set_vfs_operations(vi, vi->i_mode, dev);
1176 	}
1177 
1178 	if (NVolSysImmutable(vol) && (ni->flags & FILE_ATTR_SYSTEM) &&
1179 	    !S_ISFIFO(vi->i_mode) && !S_ISSOCK(vi->i_mode) && !S_ISLNK(vi->i_mode))
1180 		vi->i_flags |= S_IMMUTABLE;
1181 
1182 	/*
1183 	 * The number of 512-byte blocks used on disk (for stat). This is in so
1184 	 * far inaccurate as it doesn't account for any named streams or other
1185 	 * special non-resident attributes, but that is how Windows works, too,
1186 	 * so we are at least consistent with Windows, if not entirely
1187 	 * consistent with the Linux Way. Doing it the Linux Way would cause a
1188 	 * significant slowdown as it would involve iterating over all
1189 	 * attributes in the mft record and adding the allocated/compressed
1190 	 * sizes of all non-resident attributes present to give us the Linux
1191 	 * correct size that should go into i_blocks (after division by 512).
1192 	 */
1193 	if (S_ISREG(vi->i_mode) && (NInoCompressed(ni) || NInoSparse(ni)))
1194 		vi->i_blocks = ni->itype.compressed.size >> 9;
1195 	else
1196 		vi->i_blocks = ni->allocated_size >> 9;
1197 
1198 	ntfs_debug("Done.");
1199 	return 0;
1200 unm_err_out:
1201 	if (!err)
1202 		err = -EIO;
1203 	if (ctx)
1204 		ntfs_attr_put_search_ctx(ctx);
1205 	if (m)
1206 		unmap_mft_record(ni);
1207 err_out:
1208 	if (err != -EOPNOTSUPP && err != -ENOMEM && vol_err == true) {
1209 		ntfs_error(vol->sb,
1210 			"Failed with error code %i.  Marking corrupt inode 0x%llx as bad.  Run chkdsk.",
1211 			err, ni->mft_no);
1212 		NVolSetErrors(vol);
1213 	}
1214 	return err;
1215 }
1216 
1217 /*
1218  * ntfs_read_locked_attr_inode - read an attribute inode from its base inode
1219  * @base_vi:	base inode
1220  * @vi:		attribute inode to read
1221  *
1222  * ntfs_read_locked_attr_inode() is called from ntfs_attr_iget() to read the
1223  * attribute inode described by @vi into memory from the base mft record
1224  * described by @base_ni.
1225  *
1226  * ntfs_read_locked_attr_inode() maps, pins and locks the base inode for
1227  * reading and looks up the attribute described by @vi before setting up the
1228  * necessary fields in @vi as well as initializing the ntfs inode.
1229  *
1230  * Q: What locks are held when the function is called?
1231  * A: i_state has I_NEW set, hence the inode is locked, also
1232  *    i_count is set to 1, so it is not going to go away
1233  *
1234  * Return 0 on success and -errno on error.
1235  *
1236  * Note this cannot be called for AT_INDEX_ALLOCATION.
1237  */
1238 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1239 {
1240 	struct ntfs_volume *vol = NTFS_SB(vi->i_sb);
1241 	struct ntfs_inode *ni = NTFS_I(vi), *base_ni = NTFS_I(base_vi);
1242 	struct mft_record *m;
1243 	struct attr_record *a;
1244 	struct ntfs_attr_search_ctx *ctx;
1245 	int err = 0;
1246 
1247 	ntfs_debug("Entering for i_ino 0x%llx.", ni->mft_no);
1248 
1249 	ntfs_init_big_inode(vi);
1250 
1251 	/* Just mirror the values from the base inode. */
1252 	vi->i_uid	= base_vi->i_uid;
1253 	vi->i_gid	= base_vi->i_gid;
1254 	set_nlink(vi, base_vi->i_nlink);
1255 	inode_set_mtime_to_ts(vi, inode_get_mtime(base_vi));
1256 	inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
1257 	inode_set_atime_to_ts(vi, inode_get_atime(base_vi));
1258 	vi->i_generation = ni->seq_no = base_ni->seq_no;
1259 
1260 	/* Set inode type to zero but preserve permissions. */
1261 	vi->i_mode	= base_vi->i_mode & ~S_IFMT;
1262 
1263 	m = map_mft_record(base_ni);
1264 	if (IS_ERR(m)) {
1265 		err = PTR_ERR(m);
1266 		goto err_out;
1267 	}
1268 	ctx = ntfs_attr_get_search_ctx(base_ni, m);
1269 	if (!ctx) {
1270 		err = -ENOMEM;
1271 		goto unm_err_out;
1272 	}
1273 	/* Find the attribute. */
1274 	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1275 			CASE_SENSITIVE, 0, NULL, 0, ctx);
1276 	if (unlikely(err))
1277 		goto unm_err_out;
1278 	a = ctx->attr;
1279 	if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1280 		if (a->flags & ATTR_COMPRESSION_MASK) {
1281 			NInoSetCompressed(ni);
1282 			ni->flags |= FILE_ATTR_COMPRESSED;
1283 			if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
1284 					ni->name_len)) {
1285 				ntfs_error(vi->i_sb,
1286 					   "Found compressed non-data or named data attribute.");
1287 				goto unm_err_out;
1288 			}
1289 			if (vol->cluster_size > 4096) {
1290 				ntfs_error(vi->i_sb,
1291 					"Found compressed attribute but compression is disabled due to cluster size (%i) > 4kiB.",
1292 					vol->cluster_size);
1293 				goto unm_err_out;
1294 			}
1295 			if ((a->flags & ATTR_COMPRESSION_MASK) !=
1296 					ATTR_IS_COMPRESSED) {
1297 				ntfs_error(vi->i_sb, "Found unknown compression method.");
1298 				goto unm_err_out;
1299 			}
1300 		}
1301 		/*
1302 		 * The compressed/sparse flag set in an index root just means
1303 		 * to compress all files.
1304 		 */
1305 		if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1306 			ntfs_error(vi->i_sb,
1307 				"Found mst protected attribute but the attribute is %s.",
1308 				NInoCompressed(ni) ? "compressed" : "sparse");
1309 			goto unm_err_out;
1310 		}
1311 		if (a->flags & ATTR_IS_SPARSE) {
1312 			NInoSetSparse(ni);
1313 			ni->flags |= FILE_ATTR_SPARSE_FILE;
1314 		}
1315 	}
1316 	if (a->flags & ATTR_IS_ENCRYPTED) {
1317 		if (NInoCompressed(ni)) {
1318 			ntfs_error(vi->i_sb, "Found encrypted and compressed data.");
1319 			goto unm_err_out;
1320 		}
1321 		/*
1322 		 * The encryption flag set in an index root just means to
1323 		 * encrypt all files.
1324 		 */
1325 		if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1326 			ntfs_error(vi->i_sb,
1327 				"Found mst protected attribute but the attribute is encrypted.");
1328 			goto unm_err_out;
1329 		}
1330 		if (ni->type != AT_DATA) {
1331 			ntfs_error(vi->i_sb,
1332 				"Found encrypted non-data attribute.");
1333 			goto unm_err_out;
1334 		}
1335 		NInoSetEncrypted(ni);
1336 		ni->flags |= FILE_ATTR_ENCRYPTED;
1337 	}
1338 	if (!a->non_resident) {
1339 		/* Ensure the attribute name is placed before the value. */
1340 		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1341 				le16_to_cpu(a->data.resident.value_offset)))) {
1342 			ntfs_error(vol->sb,
1343 				"Attribute name is placed after the attribute value.");
1344 			goto unm_err_out;
1345 		}
1346 		if (NInoMstProtected(ni)) {
1347 			ntfs_error(vi->i_sb,
1348 				"Found mst protected attribute but the attribute is resident.");
1349 			goto unm_err_out;
1350 		}
1351 		vi->i_size = ni->initialized_size = ni->data_size = le32_to_cpu(
1352 				a->data.resident.value_length);
1353 		ni->allocated_size = le32_to_cpu(a->length) -
1354 				le16_to_cpu(a->data.resident.value_offset);
1355 		if (vi->i_size > ni->allocated_size) {
1356 			ntfs_error(vi->i_sb,
1357 				"Resident attribute is corrupt (size exceeds allocation).");
1358 			goto unm_err_out;
1359 		}
1360 	} else {
1361 		NInoSetNonResident(ni);
1362 		/*
1363 		 * Ensure the attribute name is placed before the mapping pairs
1364 		 * array.
1365 		 */
1366 		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1367 				le16_to_cpu(
1368 				a->data.non_resident.mapping_pairs_offset)))) {
1369 			ntfs_error(vol->sb,
1370 				"Attribute name is placed after the mapping pairs array.");
1371 			goto unm_err_out;
1372 		}
1373 		if (NInoCompressed(ni) || NInoSparse(ni)) {
1374 			if (NInoCompressed(ni) && a->data.non_resident.compression_unit != 4) {
1375 				ntfs_error(vi->i_sb,
1376 					"Found non-standard compression unit (%u instead of 4).  Cannot handle this.",
1377 					a->data.non_resident.compression_unit);
1378 				err = -EOPNOTSUPP;
1379 				goto unm_err_out;
1380 			}
1381 			if (a->data.non_resident.compression_unit) {
1382 				ni->itype.compressed.block_size = 1U <<
1383 						(a->data.non_resident.compression_unit +
1384 						vol->cluster_size_bits);
1385 				ni->itype.compressed.block_size_bits =
1386 						ffs(ni->itype.compressed.block_size) - 1;
1387 				ni->itype.compressed.block_clusters = 1U <<
1388 						a->data.non_resident.compression_unit;
1389 			} else {
1390 				ni->itype.compressed.block_size = 0;
1391 				ni->itype.compressed.block_size_bits = 0;
1392 				ni->itype.compressed.block_clusters = 0;
1393 			}
1394 			ni->itype.compressed.size = le64_to_cpu(
1395 					a->data.non_resident.compressed_size);
1396 		}
1397 		if (a->data.non_resident.lowest_vcn) {
1398 			ntfs_error(vi->i_sb, "First extent of attribute has non-zero lowest_vcn.");
1399 			goto unm_err_out;
1400 		}
1401 		vi->i_size = ni->data_size = le64_to_cpu(a->data.non_resident.data_size);
1402 		ni->initialized_size = le64_to_cpu(a->data.non_resident.initialized_size);
1403 		ni->allocated_size = le64_to_cpu(a->data.non_resident.allocated_size);
1404 	}
1405 	vi->i_mapping->a_ops = &ntfs_aops;
1406 	if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT)
1407 		vi->i_blocks = ni->itype.compressed.size >> 9;
1408 	else
1409 		vi->i_blocks = ni->allocated_size >> 9;
1410 	/*
1411 	 * Make sure the base inode does not go away and attach it to the
1412 	 * attribute inode.
1413 	 */
1414 	if (!igrab(base_vi)) {
1415 		err = -ENOENT;
1416 		goto unm_err_out;
1417 	}
1418 	ni->ext.base_ntfs_ino = base_ni;
1419 	ni->nr_extents = -1;
1420 
1421 	ntfs_attr_put_search_ctx(ctx);
1422 	unmap_mft_record(base_ni);
1423 
1424 	ntfs_debug("Done.");
1425 	return 0;
1426 
1427 unm_err_out:
1428 	if (!err)
1429 		err = -EIO;
1430 	if (ctx)
1431 		ntfs_attr_put_search_ctx(ctx);
1432 	unmap_mft_record(base_ni);
1433 err_out:
1434 	if (err != -ENOENT)
1435 		ntfs_error(vol->sb,
1436 			"Failed with error code %i while reading attribute inode (mft_no 0x%llx, type 0x%x, name_len %i).  Marking corrupt inode and base inode 0x%llx as bad.  Run chkdsk.",
1437 			err, ni->mft_no, ni->type, ni->name_len,
1438 			base_ni->mft_no);
1439 	if (err != -ENOENT && err != -ENOMEM)
1440 		NVolSetErrors(vol);
1441 	return err;
1442 }
1443 
1444 /*
1445  * ntfs_read_locked_index_inode - read an index inode from its base inode
1446  * @base_vi:	base inode
1447  * @vi:		index inode to read
1448  *
1449  * ntfs_read_locked_index_inode() is called from ntfs_index_iget() to read the
1450  * index inode described by @vi into memory from the base mft record described
1451  * by @base_ni.
1452  *
1453  * ntfs_read_locked_index_inode() maps, pins and locks the base inode for
1454  * reading and looks up the attributes relating to the index described by @vi
1455  * before setting up the necessary fields in @vi as well as initializing the
1456  * ntfs inode.
1457  *
1458  * Note, index inodes are essentially attribute inodes (NInoAttr() is true)
1459  * with the attribute type set to AT_INDEX_ALLOCATION.  Apart from that, they
1460  * are setup like directory inodes since directories are a special case of
1461  * indices ao they need to be treated in much the same way.  Most importantly,
1462  * for small indices the index allocation attribute might not actually exist.
1463  * However, the index root attribute always exists but this does not need to
1464  * have an inode associated with it and this is why we define a new inode type
1465  * index.  Also, like for directories, we need to have an attribute inode for
1466  * the bitmap attribute corresponding to the index allocation attribute and we
1467  * can store this in the appropriate field of the inode, just like we do for
1468  * normal directory inodes.
1469  *
1470  * Q: What locks are held when the function is called?
1471  * A: i_state has I_NEW set, hence the inode is locked, also
1472  *    i_count is set to 1, so it is not going to go away
1473  *
1474  * Return 0 on success and -errno on error.
1475  */
1476 static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1477 {
1478 	loff_t bvi_size;
1479 	struct ntfs_volume *vol = NTFS_SB(vi->i_sb);
1480 	struct ntfs_inode *ni = NTFS_I(vi), *base_ni = NTFS_I(base_vi), *bni;
1481 	struct inode *bvi;
1482 	struct mft_record *m;
1483 	struct attr_record *a;
1484 	struct ntfs_attr_search_ctx *ctx;
1485 	struct index_root *ir;
1486 	u8 *ir_end, *index_end;
1487 	int err = 0;
1488 
1489 	ntfs_debug("Entering for i_ino 0x%llx.", ni->mft_no);
1490 	lockdep_assert_held(&base_ni->mrec_lock);
1491 
1492 	ntfs_init_big_inode(vi);
1493 	/* Just mirror the values from the base inode. */
1494 	vi->i_uid	= base_vi->i_uid;
1495 	vi->i_gid	= base_vi->i_gid;
1496 	set_nlink(vi, base_vi->i_nlink);
1497 	inode_set_mtime_to_ts(vi, inode_get_mtime(base_vi));
1498 	inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
1499 	inode_set_atime_to_ts(vi, inode_get_atime(base_vi));
1500 	vi->i_generation = ni->seq_no = base_ni->seq_no;
1501 	/* Set inode type to zero but preserve permissions. */
1502 	vi->i_mode	= base_vi->i_mode & ~S_IFMT;
1503 	/* Map the mft record for the base inode. */
1504 	m = map_mft_record(base_ni);
1505 	if (IS_ERR(m)) {
1506 		err = PTR_ERR(m);
1507 		goto err_out;
1508 	}
1509 	ctx = ntfs_attr_get_search_ctx(base_ni, m);
1510 	if (!ctx) {
1511 		err = -ENOMEM;
1512 		goto unm_err_out;
1513 	}
1514 	/* Find the index root attribute. */
1515 	err = ntfs_attr_lookup(AT_INDEX_ROOT, ni->name, ni->name_len,
1516 			CASE_SENSITIVE, 0, NULL, 0, ctx);
1517 	if (unlikely(err)) {
1518 		if (err == -ENOENT)
1519 			ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is missing.");
1520 		goto unm_err_out;
1521 	}
1522 	a = ctx->attr;
1523 	/* Set up the state. */
1524 	if (unlikely(a->non_resident)) {
1525 		ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident.");
1526 		goto unm_err_out;
1527 	}
1528 	/* Ensure the attribute name is placed before the value. */
1529 	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1530 			le16_to_cpu(a->data.resident.value_offset)))) {
1531 		ntfs_error(vol->sb,
1532 			"$INDEX_ROOT attribute name is placed after the attribute value.");
1533 		goto unm_err_out;
1534 	}
1535 
1536 	ir = (struct index_root *)((u8 *)a + le16_to_cpu(a->data.resident.value_offset));
1537 	ir_end = (u8 *)ir + le32_to_cpu(a->data.resident.value_length);
1538 	index_end = (u8 *)&ir->index + le32_to_cpu(ir->index.index_length);
1539 	if (index_end > ir_end) {
1540 		ntfs_error(vi->i_sb, "Index is corrupt.");
1541 		goto unm_err_out;
1542 	}
1543 
1544 	ni->itype.index.collation_rule = ir->collation_rule;
1545 	ntfs_debug("Index collation rule is 0x%x.",
1546 			le32_to_cpu(ir->collation_rule));
1547 	ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
1548 	if (!is_power_of_2(ni->itype.index.block_size)) {
1549 		ntfs_error(vi->i_sb, "Index block size (%u) is not a power of two.",
1550 				ni->itype.index.block_size);
1551 		goto unm_err_out;
1552 	}
1553 	if (ni->itype.index.block_size > PAGE_SIZE) {
1554 		ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_SIZE (%ld) is not supported.",
1555 				ni->itype.index.block_size, PAGE_SIZE);
1556 		err = -EOPNOTSUPP;
1557 		goto unm_err_out;
1558 	}
1559 	if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
1560 		ntfs_error(vi->i_sb,
1561 				"Index block size (%u) < NTFS_BLOCK_SIZE (%i) is not supported.",
1562 				ni->itype.index.block_size, NTFS_BLOCK_SIZE);
1563 		err = -EOPNOTSUPP;
1564 		goto unm_err_out;
1565 	}
1566 	ni->itype.index.block_size_bits = ffs(ni->itype.index.block_size) - 1;
1567 	/* Determine the size of a vcn in the index. */
1568 	if (vol->cluster_size <= ni->itype.index.block_size) {
1569 		ni->itype.index.vcn_size = vol->cluster_size;
1570 		ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
1571 	} else {
1572 		ni->itype.index.vcn_size = vol->sector_size;
1573 		ni->itype.index.vcn_size_bits = vol->sector_size_bits;
1574 	}
1575 
1576 	/* Find index allocation attribute. */
1577 	ntfs_attr_reinit_search_ctx(ctx);
1578 	err = ntfs_attr_lookup(AT_INDEX_ALLOCATION, ni->name, ni->name_len,
1579 			CASE_SENSITIVE, 0, NULL, 0, ctx);
1580 	if (unlikely(err)) {
1581 		if (err == -ENOENT) {
1582 			/* No index allocation. */
1583 			vi->i_size = ni->initialized_size = ni->allocated_size = 0;
1584 			/* We are done with the mft record, so we release it. */
1585 			ntfs_attr_put_search_ctx(ctx);
1586 			unmap_mft_record(base_ni);
1587 			m = NULL;
1588 			ctx = NULL;
1589 			goto skip_large_index_stuff;
1590 		} else
1591 			ntfs_error(vi->i_sb, "Failed to lookup $INDEX_ALLOCATION attribute.");
1592 		goto unm_err_out;
1593 	}
1594 	NInoSetIndexAllocPresent(ni);
1595 	NInoSetNonResident(ni);
1596 	ni->type = AT_INDEX_ALLOCATION;
1597 
1598 	a = ctx->attr;
1599 	if (!a->non_resident) {
1600 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is resident.");
1601 		goto unm_err_out;
1602 	}
1603 	/*
1604 	 * Ensure the attribute name is placed before the mapping pairs array.
1605 	 */
1606 	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1607 			le16_to_cpu(a->data.non_resident.mapping_pairs_offset)))) {
1608 		ntfs_error(vol->sb,
1609 			"$INDEX_ALLOCATION attribute name is placed after the mapping pairs array.");
1610 		goto unm_err_out;
1611 	}
1612 	if (a->flags & ATTR_IS_ENCRYPTED) {
1613 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is encrypted.");
1614 		goto unm_err_out;
1615 	}
1616 	if (a->flags & ATTR_IS_SPARSE) {
1617 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse.");
1618 		goto unm_err_out;
1619 	}
1620 	if (a->flags & ATTR_COMPRESSION_MASK) {
1621 		ntfs_error(vi->i_sb,
1622 			"$INDEX_ALLOCATION attribute is compressed.");
1623 		goto unm_err_out;
1624 	}
1625 	if (a->data.non_resident.lowest_vcn) {
1626 		ntfs_error(vi->i_sb,
1627 			"First extent of $INDEX_ALLOCATION attribute has non zero lowest_vcn.");
1628 		goto unm_err_out;
1629 	}
1630 	vi->i_size = ni->data_size = le64_to_cpu(a->data.non_resident.data_size);
1631 	ni->initialized_size = le64_to_cpu(a->data.non_resident.initialized_size);
1632 	ni->allocated_size = le64_to_cpu(a->data.non_resident.allocated_size);
1633 	/*
1634 	 * We are done with the mft record, so we release it.  Otherwise
1635 	 * we would deadlock in ntfs_attr_iget().
1636 	 */
1637 	ntfs_attr_put_search_ctx(ctx);
1638 	unmap_mft_record(base_ni);
1639 	m = NULL;
1640 	ctx = NULL;
1641 	/* Get the index bitmap attribute inode. */
1642 	bvi = ntfs_attr_iget(base_vi, AT_BITMAP, ni->name, ni->name_len);
1643 	if (IS_ERR(bvi)) {
1644 		ntfs_error(vi->i_sb, "Failed to get bitmap attribute.");
1645 		err = PTR_ERR(bvi);
1646 		goto unm_err_out;
1647 	}
1648 	bni = NTFS_I(bvi);
1649 	if (NInoCompressed(bni) || NInoEncrypted(bni) ||
1650 			NInoSparse(bni)) {
1651 		ntfs_error(vi->i_sb,
1652 			"$BITMAP attribute is compressed and/or encrypted and/or sparse.");
1653 		goto iput_unm_err_out;
1654 	}
1655 	/* Consistency check bitmap size vs. index allocation size. */
1656 	bvi_size = i_size_read(bvi);
1657 	if ((bvi_size << 3) < (vi->i_size >> ni->itype.index.block_size_bits)) {
1658 		ntfs_error(vi->i_sb,
1659 			"Index bitmap too small (0x%llx) for index allocation (0x%llx).",
1660 			bvi_size << 3, vi->i_size);
1661 		goto iput_unm_err_out;
1662 	}
1663 	iput(bvi);
1664 skip_large_index_stuff:
1665 	/* Setup the operations for this index inode. */
1666 	ntfs_set_vfs_operations(vi, S_IFDIR, 0);
1667 	vi->i_blocks = ni->allocated_size >> 9;
1668 	/*
1669 	 * Make sure the base inode doesn't go away and attach it to the
1670 	 * index inode.
1671 	 */
1672 	if (!igrab(base_vi))
1673 		goto unm_err_out;
1674 	ni->ext.base_ntfs_ino = base_ni;
1675 	ni->nr_extents = -1;
1676 
1677 	ntfs_debug("Done.");
1678 	return 0;
1679 iput_unm_err_out:
1680 	iput(bvi);
1681 unm_err_out:
1682 	if (!err)
1683 		err = -EIO;
1684 	if (ctx)
1685 		ntfs_attr_put_search_ctx(ctx);
1686 	if (m)
1687 		unmap_mft_record(base_ni);
1688 err_out:
1689 	ntfs_error(vi->i_sb,
1690 		"Failed with error code %i while reading index inode (mft_no 0x%llx, name_len %i.",
1691 		err, ni->mft_no, ni->name_len);
1692 	if (err != -EOPNOTSUPP && err != -ENOMEM)
1693 		NVolSetErrors(vol);
1694 	return err;
1695 }
1696 
1697 /*
1698  * load_attribute_list_mount - load an attribute list into memory
1699  * @vol:		ntfs volume from which to read
1700  * @rl:			runlist of the attribute list
1701  * @al_start:		destination buffer
1702  * @size:		size of the destination buffer in bytes
1703  * @initialized_size:	initialized size of the attribute list
1704  *
1705  * Walk the runlist @rl and load all clusters from it copying them into
1706  * the linear buffer @al. The maximum number of bytes copied to @al is @size
1707  * bytes. Note, @size does not need to be a multiple of the cluster size. If
1708  * @initialized_size is less than @size, the region in @al between
1709  * @initialized_size and @size will be zeroed and not read from disk.
1710  *
1711  * Return 0 on success or -errno on error.
1712  */
1713 static int load_attribute_list_mount(struct ntfs_volume *vol,
1714 		struct runlist_element *rl, u8 *al_start, const s64 size,
1715 		const s64 initialized_size)
1716 {
1717 	s64 lcn;
1718 	u8 *al = al_start;
1719 	u8 *al_end = al + initialized_size;
1720 	struct super_block *sb;
1721 	int err = 0;
1722 	loff_t rl_byte_off, rl_byte_len;
1723 
1724 	ntfs_debug("Entering.");
1725 	if (!vol || !rl || !al || size <= 0 || initialized_size < 0 ||
1726 			initialized_size > size)
1727 		return -EINVAL;
1728 	if (!initialized_size) {
1729 		memset(al, 0, size);
1730 		return 0;
1731 	}
1732 	sb = vol->sb;
1733 
1734 	/* Read all clusters specified by the runlist one run at a time. */
1735 	while (rl->length) {
1736 		lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn);
1737 		ntfs_debug("Reading vcn = 0x%llx, lcn = 0x%llx.",
1738 				(unsigned long long)rl->vcn,
1739 				(unsigned long long)lcn);
1740 		/* The attribute list cannot be sparse. */
1741 		if (lcn < 0) {
1742 			ntfs_error(sb, "ntfs_rl_vcn_to_lcn() failed. Cannot read attribute list.");
1743 			return -EIO;
1744 		}
1745 
1746 		rl_byte_off = ntfs_cluster_to_bytes(vol, lcn);
1747 		rl_byte_len = ntfs_cluster_to_bytes(vol, rl->length);
1748 
1749 		if (al + rl_byte_len > al_end)
1750 			rl_byte_len = al_end - al;
1751 
1752 		err = ntfs_bdev_read(sb->s_bdev, al, rl_byte_off,
1753 				   round_up(rl_byte_len, SECTOR_SIZE));
1754 		if (err) {
1755 			ntfs_error(sb, "Cannot read attribute list.");
1756 			return -EIO;
1757 		}
1758 
1759 		if (al + rl_byte_len >= al_end) {
1760 			if (initialized_size < size)
1761 				goto initialize;
1762 			goto done;
1763 		}
1764 
1765 		al += rl_byte_len;
1766 		rl++;
1767 	}
1768 	if (initialized_size < size) {
1769 initialize:
1770 		memset(al_start + initialized_size, 0, size - initialized_size);
1771 	}
1772 done:
1773 	return err;
1774 }
1775 
1776 /*
1777  * The MFT inode has special locking, so teach the lock validator
1778  * about this by splitting off the locking rules of the MFT from
1779  * the locking rules of other inodes. The MFT inode can never be
1780  * accessed from the VFS side (or even internally), only by the
1781  * map_mft functions.
1782  */
1783 static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key;
1784 
1785 /*
1786  * ntfs_read_inode_mount - special read_inode for mount time use only
1787  * @vi:		inode to read
1788  *
1789  * Read inode FILE_MFT at mount time, only called with super_block lock
1790  * held from within the read_super() code path.
1791  *
1792  * This function exists because when it is called the page cache for $MFT/$DATA
1793  * is not initialized and hence we cannot get at the contents of mft records
1794  * by calling map_mft_record*().
1795  *
1796  * Further it needs to cope with the circular references problem, i.e. cannot
1797  * load any attributes other than $ATTRIBUTE_LIST until $DATA is loaded, because
1798  * we do not know where the other extent mft records are yet and again, because
1799  * we cannot call map_mft_record*() yet.  Obviously this applies only when an
1800  * attribute list is actually present in $MFT inode.
1801  *
1802  * We solve these problems by starting with the $DATA attribute before anything
1803  * else and iterating using ntfs_attr_lookup($DATA) over all extents.  As each
1804  * extent is found, we ntfs_mapping_pairs_decompress() including the implied
1805  * ntfs_runlists_merge().  Each step of the iteration necessarily provides
1806  * sufficient information for the next step to complete.
1807  *
1808  * This should work but there are two possible pit falls (see inline comments
1809  * below), but only time will tell if they are real pits or just smoke...
1810  */
1811 int ntfs_read_inode_mount(struct inode *vi)
1812 {
1813 	s64 next_vcn, last_vcn, highest_vcn;
1814 	struct super_block *sb = vi->i_sb;
1815 	struct ntfs_volume *vol = NTFS_SB(sb);
1816 	struct ntfs_inode *ni = NTFS_I(vi);
1817 	struct mft_record *m = NULL;
1818 	struct attr_record *a;
1819 	struct ntfs_attr_search_ctx *ctx;
1820 	unsigned int i, nr_blocks;
1821 	int err;
1822 	size_t new_rl_count;
1823 
1824 	ntfs_debug("Entering.");
1825 
1826 	/* Initialize the ntfs specific part of @vi. */
1827 	ntfs_init_big_inode(vi);
1828 
1829 
1830 	/* Setup the data attribute. It is special as it is mst protected. */
1831 	NInoSetNonResident(ni);
1832 	NInoSetMstProtected(ni);
1833 	NInoSetSparseDisabled(ni);
1834 	ni->type = AT_DATA;
1835 	ni->name = AT_UNNAMED;
1836 	ni->name_len = 0;
1837 	/*
1838 	 * This sets up our little cheat allowing us to reuse the async read io
1839 	 * completion handler for directories.
1840 	 */
1841 	ni->itype.index.block_size = vol->mft_record_size;
1842 	ni->itype.index.block_size_bits = vol->mft_record_size_bits;
1843 
1844 	/* Very important! Needed to be able to call map_mft_record*(). */
1845 	vol->mft_ino = vi;
1846 
1847 	/* Allocate enough memory to read the first mft record. */
1848 	if (vol->mft_record_size > 64 * 1024) {
1849 		ntfs_error(sb, "Unsupported mft record size %i (max 64kiB).",
1850 				vol->mft_record_size);
1851 		goto err_out;
1852 	}
1853 
1854 	i = vol->mft_record_size;
1855 	if (i < sb->s_blocksize)
1856 		i = sb->s_blocksize;
1857 
1858 	m = kzalloc(i, GFP_NOFS);
1859 	if (!m) {
1860 		ntfs_error(sb, "Failed to allocate buffer for $MFT record 0.");
1861 		goto err_out;
1862 	}
1863 
1864 	/* Determine the first block of the $MFT/$DATA attribute. */
1865 	nr_blocks = ntfs_bytes_to_sector(vol, vol->mft_record_size);
1866 	if (!nr_blocks)
1867 		nr_blocks = 1;
1868 
1869 	/* Load $MFT/$DATA's first mft record. */
1870 	err = ntfs_bdev_read(sb->s_bdev, (char *)m,
1871 			     ntfs_cluster_to_bytes(vol, vol->mft_lcn), i);
1872 	if (err) {
1873 		ntfs_error(sb, "Device read failed.");
1874 		goto err_out;
1875 	}
1876 
1877 	if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
1878 		ntfs_error(sb, "Incorrect mft record size %u in superblock, should be %u.",
1879 				le32_to_cpu(m->bytes_allocated), vol->mft_record_size);
1880 		goto err_out;
1881 	}
1882 
1883 	/* Apply the mst fixups. */
1884 	if (post_read_mst_fixup((struct ntfs_record *)m, vol->mft_record_size)) {
1885 		ntfs_error(sb, "MST fixup failed. $MFT is corrupt.");
1886 		goto err_out;
1887 	}
1888 
1889 	if (ntfs_mft_record_check(vol, m, FILE_MFT)) {
1890 		ntfs_error(sb, "ntfs_mft_record_check failed. $MFT is corrupt.");
1891 		goto err_out;
1892 	}
1893 
1894 	/* Need this to sanity check attribute list references to $MFT. */
1895 	vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
1896 
1897 	/* Provides read_folio() for map_mft_record(). */
1898 	vi->i_mapping->a_ops = &ntfs_mft_aops;
1899 
1900 	ctx = ntfs_attr_get_search_ctx(ni, m);
1901 	if (!ctx) {
1902 		err = -ENOMEM;
1903 		goto err_out;
1904 	}
1905 
1906 	/* Find the attribute list attribute if present. */
1907 	err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
1908 	if (err) {
1909 		if (unlikely(err != -ENOENT)) {
1910 			ntfs_error(sb,
1911 				"Failed to lookup attribute list attribute. You should run chkdsk.");
1912 			goto put_err_out;
1913 		}
1914 	} else /* if (!err) */ {
1915 		struct attr_list_entry *al_entry, *next_al_entry;
1916 		u8 *al_end;
1917 		static const char *es = "  Not allowed.  $MFT is corrupt.  You should run chkdsk.";
1918 
1919 		ntfs_debug("Attribute list attribute found in $MFT.");
1920 		NInoSetAttrList(ni);
1921 		a = ctx->attr;
1922 		if (a->flags & ATTR_COMPRESSION_MASK) {
1923 			ntfs_error(sb,
1924 				"Attribute list attribute is compressed.%s",
1925 				es);
1926 			goto put_err_out;
1927 		}
1928 		if (a->flags & ATTR_IS_ENCRYPTED ||
1929 				a->flags & ATTR_IS_SPARSE) {
1930 			if (a->non_resident) {
1931 				ntfs_error(sb,
1932 					"Non-resident attribute list attribute is encrypted/sparse.%s",
1933 					es);
1934 				goto put_err_out;
1935 			}
1936 			ntfs_warning(sb,
1937 				"Resident attribute list attribute in $MFT system file is marked encrypted/sparse which is not true.  However, Windows allows this and chkdsk does not detect or correct it so we will just ignore the invalid flags and pretend they are not set.");
1938 		}
1939 		/* Now allocate memory for the attribute list. */
1940 		ni->attr_list_size = (u32)ntfs_attr_size(a);
1941 		if (!ni->attr_list_size) {
1942 			ntfs_error(sb, "Attr_list_size is zero");
1943 			goto put_err_out;
1944 		}
1945 		ni->attr_list = kvzalloc(round_up(ni->attr_list_size, SECTOR_SIZE),
1946 					 GFP_NOFS);
1947 		if (!ni->attr_list) {
1948 			ntfs_error(sb, "Not enough memory to allocate buffer for attribute list.");
1949 			goto put_err_out;
1950 		}
1951 		if (a->non_resident) {
1952 			struct runlist_element *rl;
1953 			size_t new_rl_count;
1954 
1955 			NInoSetAttrListNonResident(ni);
1956 			if (a->data.non_resident.lowest_vcn) {
1957 				ntfs_error(sb,
1958 					"Attribute list has non zero lowest_vcn. $MFT is corrupt. You should run chkdsk.");
1959 				goto put_err_out;
1960 			}
1961 
1962 			rl = ntfs_mapping_pairs_decompress(vol, a, NULL, &new_rl_count);
1963 			if (IS_ERR(rl)) {
1964 				err = PTR_ERR(rl);
1965 				ntfs_error(sb,
1966 					   "Mapping pairs decompression failed with error code %i.",
1967 					   -err);
1968 				goto put_err_out;
1969 			}
1970 
1971 			err = load_attribute_list_mount(vol, rl, ni->attr_list, ni->attr_list_size,
1972 					le64_to_cpu(a->data.non_resident.initialized_size));
1973 			kvfree(rl);
1974 			if (err) {
1975 				ntfs_error(sb,
1976 					   "Failed to load attribute list with error code %i.",
1977 					   -err);
1978 				goto put_err_out;
1979 			}
1980 		} else /* if (!ctx.attr->non_resident) */ {
1981 			/* Now copy the attribute list. */
1982 			memcpy(ni->attr_list, (u8 *)a + le16_to_cpu(
1983 					a->data.resident.value_offset),
1984 					le32_to_cpu(a->data.resident.value_length));
1985 		}
1986 		/* The attribute list is now setup in memory. */
1987 		al_entry = (struct attr_list_entry *)ni->attr_list;
1988 		al_end = (u8 *)al_entry + ni->attr_list_size;
1989 		for (;; al_entry = next_al_entry) {
1990 			/* Out of bounds check. */
1991 			if ((u8 *)al_entry < ni->attr_list ||
1992 					(u8 *)al_entry > al_end)
1993 				goto em_put_err_out;
1994 			/* Catch the end of the attribute list. */
1995 			if ((u8 *)al_entry == al_end)
1996 				goto em_put_err_out;
1997 			if (!al_entry->length)
1998 				goto em_put_err_out;
1999 			if ((u8 *)al_entry + 6 > al_end ||
2000 			    (u8 *)al_entry + le16_to_cpu(al_entry->length) > al_end)
2001 				goto em_put_err_out;
2002 			next_al_entry = (struct attr_list_entry *)((u8 *)al_entry +
2003 					le16_to_cpu(al_entry->length));
2004 			if (le32_to_cpu(al_entry->type) > le32_to_cpu(AT_DATA))
2005 				goto em_put_err_out;
2006 			if (al_entry->type != AT_DATA)
2007 				continue;
2008 			/* We want an unnamed attribute. */
2009 			if (al_entry->name_length)
2010 				goto em_put_err_out;
2011 			/* Want the first entry, i.e. lowest_vcn == 0. */
2012 			if (al_entry->lowest_vcn)
2013 				goto em_put_err_out;
2014 			/* First entry has to be in the base mft record. */
2015 			if (MREF_LE(al_entry->mft_reference) != vi->i_ino) {
2016 				/* MFT references do not match, logic fails. */
2017 				ntfs_error(sb,
2018 					"BUG: The first $DATA extent of $MFT is not in the base mft record.");
2019 				goto put_err_out;
2020 			} else {
2021 				/* Sequence numbers must match. */
2022 				if (MSEQNO_LE(al_entry->mft_reference) !=
2023 						ni->seq_no)
2024 					goto em_put_err_out;
2025 				/* Got it. All is ok. We can stop now. */
2026 				break;
2027 			}
2028 		}
2029 	}
2030 
2031 	ntfs_attr_reinit_search_ctx(ctx);
2032 
2033 	/* Now load all attribute extents. */
2034 	a = NULL;
2035 	next_vcn = last_vcn = highest_vcn = 0;
2036 	while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0,
2037 			ctx))) {
2038 		struct runlist_element *nrl;
2039 
2040 		/* Cache the current attribute. */
2041 		a = ctx->attr;
2042 		/* $MFT must be non-resident. */
2043 		if (!a->non_resident) {
2044 			ntfs_error(sb,
2045 				"$MFT must be non-resident but a resident extent was found. $MFT is corrupt. Run chkdsk.");
2046 			goto put_err_out;
2047 		}
2048 		/* $MFT must be uncompressed and unencrypted. */
2049 		if (a->flags & ATTR_COMPRESSION_MASK ||
2050 				a->flags & ATTR_IS_ENCRYPTED ||
2051 				a->flags & ATTR_IS_SPARSE) {
2052 			ntfs_error(sb,
2053 				"$MFT must be uncompressed, non-sparse, and unencrypted but a compressed/sparse/encrypted extent was found. $MFT is corrupt. Run chkdsk.");
2054 			goto put_err_out;
2055 		}
2056 		/*
2057 		 * Decompress the mapping pairs array of this extent and merge
2058 		 * the result into the existing runlist. No need for locking
2059 		 * as we have exclusive access to the inode at this time and we
2060 		 * are a mount in progress task, too.
2061 		 */
2062 		nrl = ntfs_mapping_pairs_decompress(vol, a, &ni->runlist,
2063 						    &new_rl_count);
2064 		if (IS_ERR(nrl)) {
2065 			ntfs_error(sb,
2066 				"ntfs_mapping_pairs_decompress() failed with error code %ld.",
2067 				PTR_ERR(nrl));
2068 			goto put_err_out;
2069 		}
2070 		ni->runlist.rl = nrl;
2071 		ni->runlist.count = new_rl_count;
2072 
2073 		/* Are we in the first extent? */
2074 		if (!next_vcn) {
2075 			if (a->data.non_resident.lowest_vcn) {
2076 				ntfs_error(sb,
2077 					"First extent of $DATA attribute has non zero lowest_vcn. $MFT is corrupt. You should run chkdsk.");
2078 				goto put_err_out;
2079 			}
2080 			/* Get the last vcn in the $DATA attribute. */
2081 			last_vcn = ntfs_bytes_to_cluster(vol,
2082 					le64_to_cpu(a->data.non_resident.allocated_size));
2083 			/* Fill in the inode size. */
2084 			vi->i_size = le64_to_cpu(a->data.non_resident.data_size);
2085 			ni->initialized_size = le64_to_cpu(a->data.non_resident.initialized_size);
2086 			ni->allocated_size = le64_to_cpu(a->data.non_resident.allocated_size);
2087 			/*
2088 			 * Verify the number of mft records does not exceed
2089 			 * 2^32 - 1.
2090 			 */
2091 			if ((vi->i_size >> vol->mft_record_size_bits) >=
2092 					(1ULL << 32)) {
2093 				ntfs_error(sb, "$MFT is too big! Aborting.");
2094 				goto put_err_out;
2095 			}
2096 			/*
2097 			 * We have got the first extent of the runlist for
2098 			 * $MFT which means it is now relatively safe to call
2099 			 * the normal ntfs_read_inode() function.
2100 			 * Complete reading the inode, this will actually
2101 			 * re-read the mft record for $MFT, this time entering
2102 			 * it into the page cache with which we complete the
2103 			 * kick start of the volume. It should be safe to do
2104 			 * this now as the first extent of $MFT/$DATA is
2105 			 * already known and we would hope that we don't need
2106 			 * further extents in order to find the other
2107 			 * attributes belonging to $MFT. Only time will tell if
2108 			 * this is really the case. If not we will have to play
2109 			 * magic at this point, possibly duplicating a lot of
2110 			 * ntfs_read_inode() at this point. We will need to
2111 			 * ensure we do enough of its work to be able to call
2112 			 * ntfs_read_inode() on extents of $MFT/$DATA. But lets
2113 			 * hope this never happens...
2114 			 */
2115 			err = ntfs_read_locked_inode(vi);
2116 			if (err) {
2117 				ntfs_error(sb, "ntfs_read_inode() of $MFT failed.\n");
2118 				ntfs_attr_put_search_ctx(ctx);
2119 				/* Revert to the safe super operations. */
2120 				kfree(m);
2121 				return -1;
2122 			}
2123 			/*
2124 			 * Re-initialize some specifics about $MFT's inode as
2125 			 * ntfs_read_inode() will have set up the default ones.
2126 			 */
2127 			/* Set uid and gid to root. */
2128 			vi->i_uid = GLOBAL_ROOT_UID;
2129 			vi->i_gid = GLOBAL_ROOT_GID;
2130 			/* Regular file. No access for anyone. */
2131 			vi->i_mode = S_IFREG;
2132 			/* No VFS initiated operations allowed for $MFT. */
2133 			vi->i_op = &ntfs_empty_inode_ops;
2134 			vi->i_fop = &ntfs_empty_file_ops;
2135 		}
2136 
2137 		/* Get the lowest vcn for the next extent. */
2138 		highest_vcn = le64_to_cpu(a->data.non_resident.highest_vcn);
2139 		next_vcn = highest_vcn + 1;
2140 
2141 		/* Only one extent or error, which we catch below. */
2142 		if (next_vcn <= 0)
2143 			break;
2144 
2145 		/* Avoid endless loops due to corruption. */
2146 		if (next_vcn < le64_to_cpu(a->data.non_resident.lowest_vcn)) {
2147 			ntfs_error(sb, "$MFT has corrupt attribute list attribute. Run chkdsk.");
2148 			goto put_err_out;
2149 		}
2150 	}
2151 	if (err != -ENOENT) {
2152 		ntfs_error(sb, "Failed to lookup $MFT/$DATA attribute extent. Run chkdsk.\n");
2153 		goto put_err_out;
2154 	}
2155 	if (!a) {
2156 		ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is corrupt. Run chkdsk.");
2157 		goto put_err_out;
2158 	}
2159 	if (highest_vcn && highest_vcn != last_vcn - 1) {
2160 		ntfs_error(sb, "Failed to load the complete runlist for $MFT/$DATA. Run chkdsk.");
2161 		ntfs_debug("highest_vcn = 0x%llx, last_vcn - 1 = 0x%llx",
2162 				(unsigned long long)highest_vcn,
2163 				(unsigned long long)last_vcn - 1);
2164 		goto put_err_out;
2165 	}
2166 	ntfs_attr_put_search_ctx(ctx);
2167 	ntfs_debug("Done.");
2168 	kfree(m);
2169 
2170 	/*
2171 	 * Split the locking rules of the MFT inode from the
2172 	 * locking rules of other inodes:
2173 	 */
2174 	lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key);
2175 	lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key);
2176 
2177 	return 0;
2178 
2179 em_put_err_out:
2180 	ntfs_error(sb,
2181 		"Couldn't find first extent of $DATA attribute in attribute list. $MFT is corrupt. Run chkdsk.");
2182 put_err_out:
2183 	ntfs_attr_put_search_ctx(ctx);
2184 err_out:
2185 	ntfs_error(sb, "Failed. Marking inode as bad.");
2186 	kfree(m);
2187 	return -1;
2188 }
2189 
2190 static void __ntfs_clear_inode(struct ntfs_inode *ni)
2191 {
2192 	/* Free all alocated memory. */
2193 	if (NInoNonResident(ni) && ni->runlist.rl) {
2194 		kvfree(ni->runlist.rl);
2195 		ni->runlist.rl = NULL;
2196 	}
2197 
2198 	if (ni->attr_list) {
2199 		kvfree(ni->attr_list);
2200 		ni->attr_list = NULL;
2201 	}
2202 
2203 	if (ni->name_len && ni->name != I30 &&
2204 	    ni->name != reparse_index_name &&
2205 	    ni->name != objid_index_name) {
2206 		WARN_ON(!ni->name);
2207 		kfree(ni->name);
2208 	}
2209 }
2210 
2211 void ntfs_clear_extent_inode(struct ntfs_inode *ni)
2212 {
2213 	ntfs_debug("Entering for inode 0x%llx.", ni->mft_no);
2214 
2215 	WARN_ON(NInoAttr(ni));
2216 	WARN_ON(ni->nr_extents != -1);
2217 
2218 	__ntfs_clear_inode(ni);
2219 	ntfs_destroy_extent_inode(ni);
2220 }
2221 
2222 static int ntfs_delete_base_inode(struct ntfs_inode *ni)
2223 {
2224 	struct super_block *sb = ni->vol->sb;
2225 	int err;
2226 
2227 	if (NInoAttr(ni) || ni->nr_extents == -1)
2228 		return 0;
2229 
2230 	err = ntfs_non_resident_dealloc_clusters(ni);
2231 
2232 	/*
2233 	 * Deallocate extent mft records and free extent inodes.
2234 	 * No need to lock as no one else has a reference.
2235 	 */
2236 	while (ni->nr_extents) {
2237 		err = ntfs_mft_record_free(ni->vol, *(ni->ext.extent_ntfs_inos));
2238 		if (err)
2239 			ntfs_error(sb,
2240 				"Failed to free extent MFT record. Leaving inconsistent metadata.\n");
2241 		ntfs_inode_close(*(ni->ext.extent_ntfs_inos));
2242 	}
2243 
2244 	/* Deallocate base mft record */
2245 	err = ntfs_mft_record_free(ni->vol, ni);
2246 	if (err)
2247 		ntfs_error(sb, "Failed to free base MFT record. Leaving inconsistent metadata.\n");
2248 	return err;
2249 }
2250 
2251 /*
2252  * ntfs_evict_big_inode - clean up the ntfs specific part of an inode
2253  * @vi:		vfs inode pending annihilation
2254  *
2255  * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
2256  * is called, which deallocates all memory belonging to the NTFS specific part
2257  * of the inode and returns.
2258  *
2259  * If the MFT record is dirty, we commit it before doing anything else.
2260  */
2261 void ntfs_evict_big_inode(struct inode *vi)
2262 {
2263 	struct ntfs_inode *ni = NTFS_I(vi);
2264 
2265 	truncate_inode_pages_final(&vi->i_data);
2266 
2267 	if (!vi->i_nlink) {
2268 		if (!NInoAttr(ni)) {
2269 			/* Never called with extent inodes */
2270 			WARN_ON(ni->nr_extents == -1);
2271 			ntfs_delete_base_inode(ni);
2272 		}
2273 		goto release;
2274 	}
2275 
2276 	if (NInoDirty(ni)) {
2277 		/* Committing the inode also commits all extent inodes. */
2278 		ntfs_commit_inode(vi);
2279 
2280 		if (NInoDirty(ni)) {
2281 			ntfs_debug("Failed to commit dirty inode 0x%llx.  Losing data!",
2282 				   ni->mft_no);
2283 			NInoClearAttrListDirty(ni);
2284 			NInoClearDirty(ni);
2285 		}
2286 	}
2287 
2288 	/* No need to lock at this stage as no one else has a reference. */
2289 	if (ni->nr_extents > 0) {
2290 		int i;
2291 
2292 		for (i = 0; i < ni->nr_extents; i++) {
2293 			if (ni->ext.extent_ntfs_inos[i])
2294 				ntfs_clear_extent_inode(ni->ext.extent_ntfs_inos[i]);
2295 		}
2296 		ni->nr_extents = 0;
2297 		kvfree(ni->ext.extent_ntfs_inos);
2298 	}
2299 
2300 release:
2301 	clear_inode(vi);
2302 	__ntfs_clear_inode(ni);
2303 
2304 	if (NInoAttr(ni)) {
2305 		/* Release the base inode if we are holding it. */
2306 		if (ni->nr_extents == -1) {
2307 			iput(VFS_I(ni->ext.base_ntfs_ino));
2308 			ni->nr_extents = 0;
2309 			ni->ext.base_ntfs_ino = NULL;
2310 		}
2311 	}
2312 
2313 	if (!atomic_dec_and_test(&ni->count))
2314 		WARN_ON(1);
2315 	if (ni->folio)
2316 		folio_put(ni->folio);
2317 	kfree(ni->mrec);
2318 	kvfree(ni->target);
2319 }
2320 
2321 /*
2322  * ntfs_show_options - show mount options in /proc/mounts
2323  * @sf:		seq_file in which to write our mount options
2324  * @root:	root of the mounted tree whose mount options to display
2325  *
2326  * Called by the VFS once for each mounted ntfs volume when someone reads
2327  * /proc/mounts in order to display the NTFS specific mount options of each
2328  * mount. The mount options of fs specified by @root are written to the seq file
2329  * @sf and success is returned.
2330  */
2331 int ntfs_show_options(struct seq_file *sf, struct dentry *root)
2332 {
2333 	struct ntfs_volume *vol = NTFS_SB(root->d_sb);
2334 	int i;
2335 
2336 	if (uid_valid(vol->uid))
2337 		seq_printf(sf, ",uid=%i", from_kuid_munged(&init_user_ns, vol->uid));
2338 	if (gid_valid(vol->gid))
2339 		seq_printf(sf, ",gid=%i", from_kgid_munged(&init_user_ns, vol->gid));
2340 	if (vol->fmask == vol->dmask)
2341 		seq_printf(sf, ",umask=0%o", vol->fmask);
2342 	else {
2343 		seq_printf(sf, ",fmask=0%o", vol->fmask);
2344 		seq_printf(sf, ",dmask=0%o", vol->dmask);
2345 	}
2346 	seq_printf(sf, ",iocharset=%s", vol->nls_map->charset);
2347 	if (NVolCaseSensitive(vol))
2348 		seq_puts(sf, ",case_sensitive");
2349 	else
2350 		seq_puts(sf, ",nocase");
2351 	if (NVolShowSystemFiles(vol))
2352 		seq_puts(sf, ",show_sys_files,showmeta");
2353 	for (i = 0; on_errors_arr[i].val; i++) {
2354 		if (on_errors_arr[i].val == vol->on_errors)
2355 			seq_printf(sf, ",errors=%s", on_errors_arr[i].str);
2356 	}
2357 	seq_printf(sf, ",mft_zone_multiplier=%i", vol->mft_zone_multiplier);
2358 	if (NVolSysImmutable(vol))
2359 		seq_puts(sf, ",sys_immutable");
2360 	if (!NVolShowHiddenFiles(vol))
2361 		seq_puts(sf, ",nohidden");
2362 	if (NVolHideDotFiles(vol))
2363 		seq_puts(sf, ",hide_dot_files");
2364 	if (NVolCheckWindowsNames(vol))
2365 		seq_puts(sf, ",windows_names");
2366 	if (NVolDiscard(vol))
2367 		seq_puts(sf, ",discard");
2368 	if (NVolDisableSparse(vol))
2369 		seq_puts(sf, ",disable_sparse");
2370 	if (vol->sb->s_flags & SB_POSIXACL)
2371 		seq_puts(sf, ",acl");
2372 	return 0;
2373 }
2374 
2375 int ntfs_extend_initialized_size(struct inode *vi, const loff_t offset,
2376 				 const loff_t new_size, bool bsync)
2377 {
2378 	struct ntfs_inode *ni = NTFS_I(vi);
2379 	loff_t old_init_size;
2380 	unsigned long flags;
2381 	int err;
2382 
2383 	read_lock_irqsave(&ni->size_lock, flags);
2384 	old_init_size = ni->initialized_size;
2385 	read_unlock_irqrestore(&ni->size_lock, flags);
2386 
2387 	if (!NInoNonResident(ni))
2388 		return -EINVAL;
2389 	if (old_init_size >= new_size)
2390 		return 0;
2391 
2392 	err = ntfs_attr_map_whole_runlist(ni);
2393 	if (err)
2394 		return err;
2395 
2396 	if (!NInoCompressed(ni) && old_init_size < offset) {
2397 		err = iomap_zero_range(vi, old_init_size,
2398 				       offset - old_init_size,
2399 				       NULL, &ntfs_seek_iomap_ops,
2400 				       &ntfs_iomap_folio_ops, NULL);
2401 		if (err)
2402 			return err;
2403 		if (bsync)
2404 			err = filemap_write_and_wait_range(vi->i_mapping,
2405 							   old_init_size,
2406 							   offset - 1);
2407 	}
2408 
2409 
2410 	mutex_lock(&ni->mrec_lock);
2411 	err = ntfs_attr_set_initialized_size(ni, new_size);
2412 	mutex_unlock(&ni->mrec_lock);
2413 	if (err)
2414 		truncate_setsize(vi, old_init_size);
2415 	return err;
2416 }
2417 
2418 int ntfs_truncate_vfs(struct inode *vi, loff_t new_size, loff_t i_size)
2419 {
2420 	struct ntfs_inode *ni = NTFS_I(vi);
2421 	int err;
2422 
2423 	mutex_lock(&ni->mrec_lock);
2424 	err = __ntfs_attr_truncate_vfs(ni, new_size, i_size);
2425 	mutex_unlock(&ni->mrec_lock);
2426 	if (err < 0)
2427 		return err;
2428 
2429 	inode_set_mtime_to_ts(vi, inode_set_ctime_current(vi));
2430 	return 0;
2431 }
2432 
2433 /*
2434  * ntfs_inode_sync_standard_information - update standard information attribute
2435  * @vi:	inode to update standard information
2436  * @m:	mft record
2437  *
2438  * Return 0 on success or -errno on error.
2439  */
2440 static int ntfs_inode_sync_standard_information(struct inode *vi, struct mft_record *m)
2441 {
2442 	struct ntfs_inode *ni = NTFS_I(vi);
2443 	struct ntfs_attr_search_ctx *ctx;
2444 	struct standard_information *si;
2445 	__le64 nt;
2446 	int err = 0;
2447 	bool modified = false;
2448 
2449 	/* Update the access times in the standard information attribute. */
2450 	ctx = ntfs_attr_get_search_ctx(ni, m);
2451 	if (unlikely(!ctx))
2452 		return -ENOMEM;
2453 	err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0,
2454 			CASE_SENSITIVE, 0, NULL, 0, ctx);
2455 	if (unlikely(err)) {
2456 		ntfs_attr_put_search_ctx(ctx);
2457 		return err;
2458 	}
2459 	si = (struct standard_information *)((u8 *)ctx->attr +
2460 			le16_to_cpu(ctx->attr->data.resident.value_offset));
2461 	if (si->file_attributes != ni->flags) {
2462 		si->file_attributes = ni->flags;
2463 		modified = true;
2464 	}
2465 
2466 	/* Update the creation times if they have changed. */
2467 	nt = utc2ntfs(ni->i_crtime);
2468 	if (si->creation_time != nt) {
2469 		ntfs_debug("Updating creation time for inode 0x%llx: old = 0x%llx, new = 0x%llx",
2470 				ni->mft_no, le64_to_cpu(si->creation_time),
2471 				le64_to_cpu(nt));
2472 		si->creation_time = nt;
2473 		modified = true;
2474 	}
2475 
2476 	/* Update the access times if they have changed. */
2477 	nt = utc2ntfs(inode_get_mtime(vi));
2478 	if (si->last_data_change_time != nt) {
2479 		ntfs_debug("Updating mtime for inode 0x%llx: old = 0x%llx, new = 0x%llx",
2480 				ni->mft_no, le64_to_cpu(si->last_data_change_time),
2481 				le64_to_cpu(nt));
2482 		si->last_data_change_time = nt;
2483 		modified = true;
2484 	}
2485 
2486 	nt = utc2ntfs(inode_get_ctime(vi));
2487 	if (si->last_mft_change_time != nt) {
2488 		ntfs_debug("Updating ctime for inode 0x%llx: old = 0x%llx, new = 0x%llx",
2489 				ni->mft_no, le64_to_cpu(si->last_mft_change_time),
2490 				le64_to_cpu(nt));
2491 		si->last_mft_change_time = nt;
2492 		modified = true;
2493 	}
2494 	nt = utc2ntfs(inode_get_atime(vi));
2495 	if (si->last_access_time != nt) {
2496 		ntfs_debug("Updating atime for inode 0x%llx: old = 0x%llx, new = 0x%llx",
2497 				ni->mft_no,
2498 				le64_to_cpu(si->last_access_time),
2499 				le64_to_cpu(nt));
2500 		si->last_access_time = nt;
2501 		modified = true;
2502 	}
2503 
2504 	/*
2505 	 * If we just modified the standard information attribute we need to
2506 	 * mark the mft record it is in dirty.  We do this manually so that
2507 	 * mark_inode_dirty() is not called which would redirty the inode and
2508 	 * hence result in an infinite loop of trying to write the inode.
2509 	 * There is no need to mark the base inode nor the base mft record
2510 	 * dirty, since we are going to write this mft record below in any case
2511 	 * and the base mft record may actually not have been modified so it
2512 	 * might not need to be written out.
2513 	 * NOTE: It is not a problem when the inode for $MFT itself is being
2514 	 * written out as ntfs_mft_mark_dirty() will only set I_DIRTY_PAGES
2515 	 * on the $MFT inode and hence ntfs_write_inode() will not be
2516 	 * re-invoked because of it which in turn is ok since the dirtied mft
2517 	 * record will be cleaned and written out to disk below, i.e. before
2518 	 * this function returns.
2519 	 */
2520 	if (modified)
2521 		NInoSetDirty(ctx->ntfs_ino);
2522 	ntfs_attr_put_search_ctx(ctx);
2523 
2524 	return err;
2525 }
2526 
2527 /*
2528  * ntfs_inode_sync_filename - update FILE_NAME attributes
2529  * @ni:	ntfs inode to update FILE_NAME attributes
2530  *
2531  * Update all FILE_NAME attributes for inode @ni in the index.
2532  *
2533  * Return 0 on success or error.
2534  */
2535 int ntfs_inode_sync_filename(struct ntfs_inode *ni)
2536 {
2537 	struct inode *index_vi;
2538 	struct super_block *sb = VFS_I(ni)->i_sb;
2539 	struct ntfs_attr_search_ctx *ctx = NULL;
2540 	struct ntfs_index_context *ictx;
2541 	struct ntfs_inode *index_ni;
2542 	struct file_name_attr *fn;
2543 	struct file_name_attr *fnx;
2544 	struct reparse_point *rpp;
2545 	__le32 reparse_tag;
2546 	int err = 0;
2547 	unsigned long flags;
2548 
2549 	ntfs_debug("Entering for inode %llu\n", ni->mft_no);
2550 
2551 	ctx = ntfs_attr_get_search_ctx(ni, NULL);
2552 	if (!ctx)
2553 		return -ENOMEM;
2554 
2555 	/* Collect the reparse tag, if any */
2556 	reparse_tag = cpu_to_le32(0);
2557 	if (ni->flags & FILE_ATTR_REPARSE_POINT) {
2558 		if (!ntfs_attr_lookup(AT_REPARSE_POINT, NULL,
2559 					0, CASE_SENSITIVE, 0, NULL, 0, ctx)) {
2560 			rpp = (struct reparse_point *)((u8 *)ctx->attr +
2561 					le16_to_cpu(ctx->attr->data.resident.value_offset));
2562 			reparse_tag = rpp->reparse_tag;
2563 		}
2564 		ntfs_attr_reinit_search_ctx(ctx);
2565 	}
2566 
2567 	/* Walk through all FILE_NAME attributes and update them. */
2568 	while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, ctx))) {
2569 		fn = (struct file_name_attr *)((u8 *)ctx->attr +
2570 				le16_to_cpu(ctx->attr->data.resident.value_offset));
2571 		if (MREF_LE(fn->parent_directory) == ni->mft_no)
2572 			continue;
2573 
2574 		index_vi = ntfs_iget(sb, MREF_LE(fn->parent_directory));
2575 		if (IS_ERR(index_vi)) {
2576 			ntfs_error(sb, "Failed to open inode %lld with index",
2577 					(long long)MREF_LE(fn->parent_directory));
2578 			continue;
2579 		}
2580 
2581 		index_ni = NTFS_I(index_vi);
2582 
2583 		mutex_lock_nested(&index_ni->mrec_lock, NTFS_INODE_MUTEX_PARENT);
2584 		if (NInoBeingDeleted(ni)) {
2585 			iput(index_vi);
2586 			mutex_unlock(&index_ni->mrec_lock);
2587 			continue;
2588 		}
2589 
2590 		ictx = ntfs_index_ctx_get(index_ni, I30, 4);
2591 		if (!ictx) {
2592 			ntfs_error(sb, "Failed to get index ctx, inode %llu",
2593 					index_ni->mft_no);
2594 			iput(index_vi);
2595 			mutex_unlock(&index_ni->mrec_lock);
2596 			continue;
2597 		}
2598 
2599 		err = ntfs_index_lookup(fn, sizeof(struct file_name_attr), ictx);
2600 		if (err) {
2601 			ntfs_debug("Index lookup failed, inode %llu",
2602 					index_ni->mft_no);
2603 			ntfs_index_ctx_put(ictx);
2604 			iput(index_vi);
2605 			mutex_unlock(&index_ni->mrec_lock);
2606 			continue;
2607 		}
2608 		/* Update flags and file size. */
2609 		fnx = (struct file_name_attr *)ictx->data;
2610 		fnx->file_attributes =
2611 			(fnx->file_attributes & ~FILE_ATTR_VALID_FLAGS) |
2612 			(ni->flags & FILE_ATTR_VALID_FLAGS);
2613 		if (ctx->mrec->flags & MFT_RECORD_IS_DIRECTORY)
2614 			fnx->data_size = fnx->allocated_size = 0;
2615 		else {
2616 			read_lock_irqsave(&ni->size_lock, flags);
2617 			if (NInoSparse(ni) || NInoCompressed(ni))
2618 				fnx->allocated_size = cpu_to_le64(ni->itype.compressed.size);
2619 			else
2620 				fnx->allocated_size = cpu_to_le64(ni->allocated_size);
2621 			fnx->data_size = cpu_to_le64(ni->data_size);
2622 
2623 			/*
2624 			 * The file name record has also to be fixed if some
2625 			 * attribute update implied the unnamed data to be
2626 			 * made non-resident
2627 			 */
2628 			fn->allocated_size = fnx->allocated_size;
2629 			fn->data_size = fnx->data_size;
2630 			read_unlock_irqrestore(&ni->size_lock, flags);
2631 		}
2632 
2633 		/* update or clear the reparse tag in the index */
2634 		fnx->type.rp.reparse_point_tag = reparse_tag;
2635 		fnx->creation_time = fn->creation_time;
2636 		fnx->last_data_change_time = fn->last_data_change_time;
2637 		fnx->last_mft_change_time = fn->last_mft_change_time;
2638 		fnx->last_access_time = fn->last_access_time;
2639 		ntfs_index_entry_mark_dirty(ictx);
2640 		ntfs_icx_ib_sync_write(ictx);
2641 		NInoSetDirty(ctx->ntfs_ino);
2642 		ntfs_index_ctx_put(ictx);
2643 		mutex_unlock(&index_ni->mrec_lock);
2644 		iput(index_vi);
2645 	}
2646 	/* Check for real error occurred. */
2647 	if (err != -ENOENT) {
2648 		ntfs_error(sb, "Attribute lookup failed, err : %d, inode %llu", err,
2649 				ni->mft_no);
2650 	} else
2651 		err = 0;
2652 
2653 	ntfs_attr_put_search_ctx(ctx);
2654 	return err;
2655 }
2656 
2657 int ntfs_get_block_mft_record(struct ntfs_inode *mft_ni, struct ntfs_inode *ni)
2658 {
2659 	s64 vcn;
2660 	struct runlist_element *rl;
2661 
2662 	if (ni->mft_lcn[0] != LCN_RL_NOT_MAPPED)
2663 		return 0;
2664 
2665 	vcn = (s64)ni->mft_no << mft_ni->vol->mft_record_size_bits >>
2666 	      mft_ni->vol->cluster_size_bits;
2667 
2668 	rl = mft_ni->runlist.rl;
2669 	if (!rl) {
2670 		ntfs_error(mft_ni->vol->sb, "$MFT runlist is not present");
2671 		return -EIO;
2672 	}
2673 
2674 	/* Seek to element containing target vcn. */
2675 	while (rl->length && rl[1].vcn <= vcn)
2676 		rl++;
2677 	ni->mft_lcn[0] = ntfs_rl_vcn_to_lcn(rl, vcn);
2678 	ni->mft_lcn_count = 1;
2679 
2680 	if (mft_ni->vol->cluster_size < mft_ni->vol->mft_record_size &&
2681 	    (rl->length - (vcn - rl->vcn)) <= 1) {
2682 		rl++;
2683 		ni->mft_lcn[1] = ntfs_rl_vcn_to_lcn(rl, vcn + 1);
2684 		ni->mft_lcn_count++;
2685 	}
2686 	return 0;
2687 }
2688 
2689 /*
2690  * __ntfs_write_inode - write out a dirty inode
2691  * @vi:		inode to write out
2692  * @sync:	if true, write out synchronously
2693  *
2694  * Write out a dirty inode to disk including any extent inodes if present.
2695  *
2696  * If @sync is true, commit the inode to disk and wait for io completion.  This
2697  * is done using write_mft_record().
2698  *
2699  * If @sync is false, just schedule the write to happen but do not wait for i/o
2700  * completion.
2701  *
2702  * Return 0 on success and -errno on error.
2703  */
2704 int __ntfs_write_inode(struct inode *vi, int sync)
2705 {
2706 	struct ntfs_inode *ni = NTFS_I(vi);
2707 	struct ntfs_inode *mft_ni = NTFS_I(ni->vol->mft_ino);
2708 	struct mft_record *m;
2709 	int err = 0;
2710 	bool need_iput = false;
2711 
2712 	ntfs_debug("Entering for %sinode 0x%llx.", NInoAttr(ni) ? "attr " : "",
2713 			ni->mft_no);
2714 
2715 	if (NVolShutdown(ni->vol))
2716 		return -EIO;
2717 
2718 	/*
2719 	 * Dirty attribute inodes are written via their real inodes so just
2720 	 * clean them here.  Access time updates are taken care off when the
2721 	 * real inode is written.
2722 	 */
2723 	if (NInoAttr(ni) || ni->nr_extents == -1) {
2724 		NInoClearDirty(ni);
2725 		ntfs_debug("Done.");
2726 		return 0;
2727 	}
2728 
2729 	/* igrab prevents vi from being evicted while mrec_lock is hold. */
2730 	if (igrab(vi) != NULL)
2731 		need_iput = true;
2732 
2733 	mutex_lock_nested(&ni->mrec_lock, NTFS_INODE_MUTEX_NORMAL);
2734 	/* Map, pin, and lock the mft record belonging to the inode. */
2735 	m = map_mft_record(ni);
2736 	if (IS_ERR(m)) {
2737 		mutex_unlock(&ni->mrec_lock);
2738 		err = PTR_ERR(m);
2739 		goto err_out;
2740 	}
2741 
2742 	if (NInoNonResident(ni) && NInoRunlistDirty(ni)) {
2743 		down_write(&ni->runlist.lock);
2744 		err = ntfs_attr_update_mapping_pairs(ni, 0);
2745 		if (!err)
2746 			NInoClearRunlistDirty(ni);
2747 		up_write(&ni->runlist.lock);
2748 	}
2749 
2750 	err = ntfs_inode_sync_standard_information(vi, m);
2751 	if (err)
2752 		goto unm_err_out;
2753 
2754 	/*
2755 	 * when being umounted and inodes are evicted, write_inode()
2756 	 * is called with all inodes being marked with I_FREEING.
2757 	 * then ntfs_inode_sync_filename() waits infinitly because
2758 	 * of ntfs_iget. This situation happens only where sync_filesysem()
2759 	 * from umount fails because of a disk unplug and etc.
2760 	 * the absent of SB_ACTIVE means umounting.
2761 	 */
2762 	if ((vi->i_sb->s_flags & SB_ACTIVE) && NInoTestClearFileNameDirty(ni))
2763 		ntfs_inode_sync_filename(ni);
2764 
2765 	/* Now the access times are updated, write the base mft record. */
2766 	if (NInoDirty(ni)) {
2767 		down_read(&mft_ni->runlist.lock);
2768 		err = ntfs_get_block_mft_record(mft_ni, ni);
2769 		up_read(&mft_ni->runlist.lock);
2770 		if (err)
2771 			goto unm_err_out;
2772 
2773 		err = write_mft_record(ni, m, sync);
2774 		if (err)
2775 			ntfs_error(vi->i_sb, "write_mft_record failed, err : %d\n", err);
2776 	}
2777 	unmap_mft_record(ni);
2778 
2779 	/* Map any unmapped extent mft records with LCNs. */
2780 	down_read(&mft_ni->runlist.lock);
2781 	mutex_lock(&ni->extent_lock);
2782 	if (ni->nr_extents > 0) {
2783 		int i;
2784 
2785 		for (i = 0; i < ni->nr_extents; i++) {
2786 			err = ntfs_get_block_mft_record(mft_ni,
2787 						   ni->ext.extent_ntfs_inos[i]);
2788 			if (err) {
2789 				mutex_unlock(&ni->extent_lock);
2790 				up_read(&mft_ni->runlist.lock);
2791 				mutex_unlock(&ni->mrec_lock);
2792 				goto err_out;
2793 			}
2794 		}
2795 	}
2796 	mutex_unlock(&ni->extent_lock);
2797 	up_read(&mft_ni->runlist.lock);
2798 
2799 	/* Write all attached extent mft records. */
2800 	mutex_lock(&ni->extent_lock);
2801 	if (ni->nr_extents > 0) {
2802 		struct ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos;
2803 		int i;
2804 
2805 		ntfs_debug("Writing %i extent inodes.", ni->nr_extents);
2806 		for (i = 0; i < ni->nr_extents; i++) {
2807 			struct ntfs_inode *tni = extent_nis[i];
2808 
2809 			if (NInoDirty(tni)) {
2810 				struct mft_record *tm;
2811 				int ret;
2812 
2813 				mutex_lock(&tni->mrec_lock);
2814 				tm = map_mft_record(tni);
2815 				if (IS_ERR(tm)) {
2816 					mutex_unlock(&tni->mrec_lock);
2817 					if (!err || err == -ENOMEM)
2818 						err = PTR_ERR(tm);
2819 					continue;
2820 				}
2821 
2822 				ret = write_mft_record(tni, tm, sync);
2823 				unmap_mft_record(tni);
2824 				mutex_unlock(&tni->mrec_lock);
2825 
2826 				if (unlikely(ret)) {
2827 					if (!err || err == -ENOMEM)
2828 						err = ret;
2829 				}
2830 			}
2831 		}
2832 	}
2833 	mutex_unlock(&ni->extent_lock);
2834 	mutex_unlock(&ni->mrec_lock);
2835 
2836 	if (unlikely(err))
2837 		goto err_out;
2838 	if (need_iput)
2839 		iput(vi);
2840 	ntfs_debug("Done.");
2841 	return 0;
2842 unm_err_out:
2843 	unmap_mft_record(ni);
2844 	mutex_unlock(&ni->mrec_lock);
2845 err_out:
2846 	if (err == -ENOMEM)
2847 		mark_inode_dirty(vi);
2848 	else {
2849 		ntfs_error(vi->i_sb, "Failed (error %i):  Run chkdsk.", -err);
2850 		NVolSetErrors(ni->vol);
2851 	}
2852 	if (need_iput)
2853 		iput(vi);
2854 	return err;
2855 }
2856 
2857 /*
2858  * ntfs_extent_inode_open - load an extent inode and attach it to its base
2859  * @base_ni:	base ntfs inode
2860  * @mref:	mft reference of the extent inode to load (in little endian)
2861  *
2862  * First check if the extent inode @mref is already attached to the base ntfs
2863  * inode @base_ni, and if so, return a pointer to the attached extent inode.
2864  *
2865  * If the extent inode is not already attached to the base inode, allocate an
2866  * ntfs_inode structure and initialize it for the given inode @mref. @mref
2867  * specifies the inode number / mft record to read, including the sequence
2868  * number, which can be 0 if no sequence number checking is to be performed.
2869  *
2870  * Then, allocate a buffer for the mft record, read the mft record from the
2871  * volume @base_ni->vol, and attach it to the ntfs_inode structure (->mrec).
2872  * The mft record is mst deprotected and sanity checked for validity and we
2873  * abort if deprotection or checks fail.
2874  *
2875  * Finally attach the ntfs inode to its base inode @base_ni and return a
2876  * pointer to the ntfs_inode structure on success or NULL on error, with errno
2877  * set to the error code.
2878  *
2879  * Note, extent inodes are never closed directly. They are automatically
2880  * disposed off by the closing of the base inode.
2881  */
2882 static struct ntfs_inode *ntfs_extent_inode_open(struct ntfs_inode *base_ni,
2883 		const __le64 mref)
2884 {
2885 	u64 mft_no = MREF_LE(mref);
2886 	struct ntfs_inode *ni = NULL;
2887 	struct ntfs_inode **extent_nis;
2888 	int i;
2889 	struct mft_record *ni_mrec;
2890 	struct super_block *sb;
2891 
2892 	if (!base_ni)
2893 		return NULL;
2894 
2895 	sb = base_ni->vol->sb;
2896 	ntfs_debug("Opening extent inode %llu (base mft record %llu).\n",
2897 			mft_no, base_ni->mft_no);
2898 
2899 	/* Is the extent inode already open and attached to the base inode? */
2900 	if (base_ni->nr_extents > 0) {
2901 		extent_nis = base_ni->ext.extent_ntfs_inos;
2902 		for (i = 0; i < base_ni->nr_extents; i++) {
2903 			u16 seq_no;
2904 
2905 			ni = extent_nis[i];
2906 			if (mft_no != ni->mft_no)
2907 				continue;
2908 			ni_mrec = map_mft_record(ni);
2909 			if (IS_ERR(ni_mrec)) {
2910 				ntfs_error(sb, "failed to map mft record for %llu",
2911 						ni->mft_no);
2912 				goto out;
2913 			}
2914 			/* Verify the sequence number if given. */
2915 			seq_no = MSEQNO_LE(mref);
2916 			if (seq_no &&
2917 			    seq_no != le16_to_cpu(ni_mrec->sequence_number)) {
2918 				ntfs_error(sb, "Found stale extent mft reference mft=%llu",
2919 						ni->mft_no);
2920 				unmap_mft_record(ni);
2921 				goto out;
2922 			}
2923 			unmap_mft_record(ni);
2924 			goto out;
2925 		}
2926 	}
2927 	/* Wasn't there, we need to load the extent inode. */
2928 	ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no);
2929 	if (!ni)
2930 		goto out;
2931 
2932 	ni->seq_no = (u16)MSEQNO_LE(mref);
2933 	ni->nr_extents = -1;
2934 	ni->ext.base_ntfs_ino = base_ni;
2935 	/* Attach extent inode to base inode, reallocating memory if needed. */
2936 	if (!(base_ni->nr_extents & 3)) {
2937 		i = (base_ni->nr_extents + 4) * sizeof(struct ntfs_inode *);
2938 
2939 		extent_nis = kvzalloc(i, GFP_NOFS);
2940 		if (!extent_nis)
2941 			goto err_out;
2942 		if (base_ni->nr_extents) {
2943 			memcpy(extent_nis, base_ni->ext.extent_ntfs_inos,
2944 					i - 4 * sizeof(struct ntfs_inode *));
2945 			kvfree(base_ni->ext.extent_ntfs_inos);
2946 		}
2947 		base_ni->ext.extent_ntfs_inos = extent_nis;
2948 	}
2949 	base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni;
2950 
2951 out:
2952 	ntfs_debug("\n");
2953 	return ni;
2954 err_out:
2955 	ntfs_destroy_ext_inode(ni);
2956 	ni = NULL;
2957 	goto out;
2958 }
2959 
2960 /*
2961  * ntfs_inode_attach_all_extents - attach all extents for target inode
2962  * @ni:		opened ntfs inode for which perform attach
2963  *
2964  * Return 0 on success and error.
2965  */
2966 int ntfs_inode_attach_all_extents(struct ntfs_inode *ni)
2967 {
2968 	struct attr_list_entry *ale;
2969 	u64 prev_attached = 0;
2970 
2971 	if (!ni) {
2972 		ntfs_debug("Invalid arguments.\n");
2973 		return -EINVAL;
2974 	}
2975 
2976 	if (NInoAttr(ni))
2977 		ni = ni->ext.base_ntfs_ino;
2978 
2979 	ntfs_debug("Entering for inode 0x%llx.\n", ni->mft_no);
2980 
2981 	/* Inode haven't got attribute list, thus nothing to attach. */
2982 	if (!NInoAttrList(ni))
2983 		return 0;
2984 
2985 	if (!ni->attr_list) {
2986 		ntfs_debug("Corrupt in-memory struct.\n");
2987 		return -EINVAL;
2988 	}
2989 
2990 	/* Walk through attribute list and attach all extents. */
2991 	ale = (struct attr_list_entry *)ni->attr_list;
2992 	while ((u8 *)ale < ni->attr_list + ni->attr_list_size) {
2993 		if (ni->mft_no != MREF_LE(ale->mft_reference) &&
2994 				prev_attached != MREF_LE(ale->mft_reference)) {
2995 			if (!ntfs_extent_inode_open(ni, ale->mft_reference)) {
2996 				ntfs_debug("Couldn't attach extent inode.\n");
2997 				return -1;
2998 			}
2999 			prev_attached = MREF_LE(ale->mft_reference);
3000 		}
3001 		ale = (struct attr_list_entry *)((u8 *)ale + le16_to_cpu(ale->length));
3002 	}
3003 	return 0;
3004 }
3005 
3006 /*
3007  * ntfs_inode_add_attrlist - add attribute list to inode and fill it
3008  * @ni: opened ntfs inode to which add attribute list
3009  *
3010  * Return 0 on success or error.
3011  */
3012 int ntfs_inode_add_attrlist(struct ntfs_inode *ni)
3013 {
3014 	int err;
3015 	struct ntfs_attr_search_ctx *ctx;
3016 	u8 *al = NULL, *aln;
3017 	int al_len = 0;
3018 	struct attr_list_entry *ale = NULL;
3019 	struct mft_record *ni_mrec;
3020 	u32 attr_al_len;
3021 
3022 	if (!ni)
3023 		return -EINVAL;
3024 
3025 	ntfs_debug("inode %llu\n", ni->mft_no);
3026 
3027 	if (NInoAttrList(ni) || ni->nr_extents) {
3028 		ntfs_error(ni->vol->sb, "Inode already has attribute list");
3029 		return -EEXIST;
3030 	}
3031 
3032 	ni_mrec = map_mft_record(ni);
3033 	if (IS_ERR(ni_mrec))
3034 		return -EIO;
3035 
3036 	/* Form attribute list. */
3037 	ctx = ntfs_attr_get_search_ctx(ni, ni_mrec);
3038 	if (!ctx) {
3039 		err = -ENOMEM;
3040 		goto err_out;
3041 	}
3042 
3043 	/* Walk through all attributes. */
3044 	while (!(err = ntfs_attr_lookup(AT_UNUSED, NULL, 0, 0, 0, NULL, 0, ctx))) {
3045 		int ale_size;
3046 
3047 		if (ctx->attr->type == AT_ATTRIBUTE_LIST) {
3048 			err = -EIO;
3049 			ntfs_error(ni->vol->sb, "Attribute list already present");
3050 			goto put_err_out;
3051 		}
3052 
3053 		ale_size = (sizeof(struct attr_list_entry) + sizeof(__le16) *
3054 				ctx->attr->name_length + 7) & ~7;
3055 		al_len += ale_size;
3056 
3057 		aln = kvrealloc(al, al_len, GFP_NOFS);
3058 		if (!aln) {
3059 			err = -ENOMEM;
3060 			ntfs_error(ni->vol->sb, "Failed to realloc %d bytes", al_len);
3061 			goto put_err_out;
3062 		}
3063 		ale = (struct attr_list_entry *)(aln + ((u8 *)ale - al));
3064 		al = aln;
3065 
3066 		memset(ale, 0, ale_size);
3067 
3068 		/* Add attribute to attribute list. */
3069 		ale->type = ctx->attr->type;
3070 		ale->length = cpu_to_le16((sizeof(struct attr_list_entry) +
3071 					sizeof(__le16) * ctx->attr->name_length + 7) & ~7);
3072 		ale->name_length = ctx->attr->name_length;
3073 		ale->name_offset = (u8 *)ale->name - (u8 *)ale;
3074 		if (ctx->attr->non_resident)
3075 			ale->lowest_vcn =
3076 				ctx->attr->data.non_resident.lowest_vcn;
3077 		else
3078 			ale->lowest_vcn = 0;
3079 		ale->mft_reference = MK_LE_MREF(ni->mft_no,
3080 				le16_to_cpu(ni_mrec->sequence_number));
3081 		ale->instance = ctx->attr->instance;
3082 		memcpy(ale->name, (u8 *)ctx->attr +
3083 				le16_to_cpu(ctx->attr->name_offset),
3084 				ctx->attr->name_length * sizeof(__le16));
3085 		ale = (struct attr_list_entry *)(al + al_len);
3086 	}
3087 
3088 	/* Check for real error occurred. */
3089 	if (err != -ENOENT) {
3090 		ntfs_error(ni->vol->sb, "%s: Attribute lookup failed, inode %llu",
3091 				__func__, ni->mft_no);
3092 		goto put_err_out;
3093 	}
3094 
3095 	/* Set in-memory attribute list. */
3096 	ni->attr_list = al;
3097 	ni->attr_list_size = al_len;
3098 	NInoSetAttrList(ni);
3099 
3100 	attr_al_len = offsetof(struct attr_record, data.resident.reserved) + 1 +
3101 		((al_len + 7) & ~7);
3102 	/* Free space if there is not enough it for $ATTRIBUTE_LIST. */
3103 	if (le32_to_cpu(ni_mrec->bytes_allocated) -
3104 			le32_to_cpu(ni_mrec->bytes_in_use) < attr_al_len) {
3105 		if (ntfs_inode_free_space(ni, (int)attr_al_len)) {
3106 			/* Failed to free space. */
3107 			err = -ENOSPC;
3108 			ntfs_error(ni->vol->sb, "Failed to free space for attrlist");
3109 			goto rollback;
3110 		}
3111 	}
3112 
3113 	/* Add $ATTRIBUTE_LIST to mft record. */
3114 	err = ntfs_resident_attr_record_add(ni, AT_ATTRIBUTE_LIST, AT_UNNAMED, 0,
3115 					    NULL, al_len, 0);
3116 	if (err < 0) {
3117 		ntfs_error(ni->vol->sb, "Couldn't add $ATTRIBUTE_LIST to MFT");
3118 		goto rollback;
3119 	}
3120 
3121 	err = ntfs_attrlist_update(ni);
3122 	if (err < 0)
3123 		goto remove_attrlist_record;
3124 
3125 	ntfs_attr_put_search_ctx(ctx);
3126 	unmap_mft_record(ni);
3127 	return 0;
3128 
3129 remove_attrlist_record:
3130 	/* Prevent ntfs_attr_recorm_rm from freeing attribute list. */
3131 	ni->attr_list = NULL;
3132 	NInoClearAttrList(ni);
3133 	/* Remove $ATTRIBUTE_LIST record. */
3134 	ntfs_attr_reinit_search_ctx(ctx);
3135 	if (!ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0,
3136 				CASE_SENSITIVE, 0, NULL, 0, ctx)) {
3137 		if (ntfs_attr_record_rm(ctx))
3138 			ntfs_error(ni->vol->sb, "Rollback failed to remove attrlist");
3139 	} else {
3140 		ntfs_error(ni->vol->sb, "Rollback failed to find attrlist");
3141 	}
3142 
3143 	/* Setup back in-memory runlist. */
3144 	ni->attr_list = al;
3145 	ni->attr_list_size = al_len;
3146 	NInoSetAttrList(ni);
3147 rollback:
3148 	/*
3149 	 * Scan attribute list for attributes that placed not in the base MFT
3150 	 * record and move them to it.
3151 	 */
3152 	ntfs_attr_reinit_search_ctx(ctx);
3153 	ale = (struct attr_list_entry *)al;
3154 	while ((u8 *)ale < al + al_len) {
3155 		if (MREF_LE(ale->mft_reference) != ni->mft_no) {
3156 			if (!ntfs_attr_lookup(ale->type, ale->name,
3157 						ale->name_length,
3158 						CASE_SENSITIVE,
3159 						le64_to_cpu(ale->lowest_vcn),
3160 						NULL, 0, ctx)) {
3161 				if (ntfs_attr_record_move_to(ctx, ni))
3162 					ntfs_error(ni->vol->sb,
3163 							"Rollback failed to move attribute");
3164 			} else {
3165 				ntfs_error(ni->vol->sb, "Rollback failed to find attr");
3166 			}
3167 			ntfs_attr_reinit_search_ctx(ctx);
3168 		}
3169 		ale = (struct attr_list_entry *)((u8 *)ale + le16_to_cpu(ale->length));
3170 	}
3171 
3172 	/* Remove in-memory attribute list. */
3173 	ni->attr_list = NULL;
3174 	ni->attr_list_size = 0;
3175 	NInoClearAttrList(ni);
3176 	NInoClearAttrListDirty(ni);
3177 put_err_out:
3178 	ntfs_attr_put_search_ctx(ctx);
3179 err_out:
3180 	kvfree(al);
3181 	unmap_mft_record(ni);
3182 	return err;
3183 }
3184 
3185 /*
3186  * ntfs_inode_close - close an ntfs inode and free all associated memory
3187  * @ni:		ntfs inode to close
3188  *
3189  * Make sure the ntfs inode @ni is clean.
3190  *
3191  * If the ntfs inode @ni is a base inode, close all associated extent inodes,
3192  * then deallocate all memory attached to it, and finally free the ntfs inode
3193  * structure itself.
3194  *
3195  * If it is an extent inode, we disconnect it from its base inode before we
3196  * destroy it.
3197  *
3198  * It is OK to pass NULL to this function, it is just noop in this case.
3199  *
3200  * Return 0 on success or error.
3201  */
3202 int ntfs_inode_close(struct ntfs_inode *ni)
3203 {
3204 	int err = -1;
3205 	struct ntfs_inode **tmp_nis;
3206 	struct ntfs_inode *base_ni;
3207 	s32 i;
3208 
3209 	if (!ni)
3210 		return 0;
3211 
3212 	ntfs_debug("Entering for inode %llu\n", ni->mft_no);
3213 
3214 	/* Is this a base inode with mapped extent inodes? */
3215 	/*
3216 	 * If the inode is an extent inode, disconnect it from the
3217 	 * base inode before destroying it.
3218 	 */
3219 	base_ni = ni->ext.base_ntfs_ino;
3220 	tmp_nis = base_ni->ext.extent_ntfs_inos;
3221 	if (!tmp_nis)
3222 		goto out;
3223 	for (i = 0; i < base_ni->nr_extents; ++i) {
3224 		if (tmp_nis[i] != ni)
3225 			continue;
3226 		/* Found it. Disconnect. */
3227 		memmove(tmp_nis + i, tmp_nis + i + 1,
3228 				(base_ni->nr_extents - i - 1) *
3229 				sizeof(struct ntfs_inode *));
3230 		/* Buffer should be for multiple of four extents. */
3231 		if ((--base_ni->nr_extents) & 3)
3232 			break;
3233 		/*
3234 		 * ElectricFence is unhappy with realloc(x,0) as free(x)
3235 		 * thus we explicitly separate these two cases.
3236 		 */
3237 		if (base_ni->nr_extents) {
3238 			/* Resize the memory buffer. */
3239 			tmp_nis = kvrealloc(tmp_nis, base_ni->nr_extents *
3240 					sizeof(struct ntfs_inode *), GFP_NOFS);
3241 			/* Ignore errors, they don't really matter. */
3242 			if (tmp_nis)
3243 				base_ni->ext.extent_ntfs_inos = tmp_nis;
3244 		} else if (tmp_nis) {
3245 			kvfree(tmp_nis);
3246 			base_ni->ext.extent_ntfs_inos = NULL;
3247 		}
3248 		break;
3249 	}
3250 
3251 out:
3252 	if (NInoDirty(ni))
3253 		ntfs_error(ni->vol->sb, "Releasing dirty inode %llu!\n",
3254 				ni->mft_no);
3255 	if (NInoAttrList(ni) && ni->attr_list)
3256 		kvfree(ni->attr_list);
3257 	ntfs_destroy_ext_inode(ni);
3258 	err = 0;
3259 	ntfs_debug("\n");
3260 	return err;
3261 }
3262 
3263 void ntfs_destroy_ext_inode(struct ntfs_inode *ni)
3264 {
3265 	ntfs_debug("Entering.");
3266 	if (ni == NULL)
3267 		return;
3268 
3269 	ntfs_attr_close(ni);
3270 
3271 	if (NInoDirty(ni))
3272 		ntfs_error(ni->vol->sb, "Releasing dirty ext inode %llu!\n",
3273 				ni->mft_no);
3274 	if (NInoAttrList(ni) && ni->attr_list)
3275 		kvfree(ni->attr_list);
3276 	kfree(ni->mrec);
3277 	kmem_cache_free(ntfs_inode_cache, ni);
3278 }
3279 
3280 static struct ntfs_inode *ntfs_inode_base(struct ntfs_inode *ni)
3281 {
3282 	if (ni->nr_extents == -1)
3283 		return ni->ext.base_ntfs_ino;
3284 	return ni;
3285 }
3286 
3287 static int ntfs_attr_position(__le32 type, struct ntfs_attr_search_ctx *ctx)
3288 {
3289 	int err;
3290 
3291 	err = ntfs_attr_lookup(type, NULL, 0, CASE_SENSITIVE, 0, NULL,
3292 				0, ctx);
3293 	if (err) {
3294 		__le32 atype;
3295 
3296 		if (err != -ENOENT)
3297 			return err;
3298 
3299 		atype = ctx->attr->type;
3300 		if (atype == AT_END)
3301 			return -ENOSPC;
3302 
3303 		/*
3304 		 * if ntfs_external_attr_lookup return -ENOENT, ctx->al_entry
3305 		 * could point to an attribute in an extent mft record, but
3306 		 * ctx->attr and ctx->ntfs_ino always points to an attibute in
3307 		 * a base mft record.
3308 		 */
3309 		if (ctx->al_entry &&
3310 		    MREF_LE(ctx->al_entry->mft_reference) != ctx->ntfs_ino->mft_no) {
3311 			ntfs_attr_reinit_search_ctx(ctx);
3312 			err = ntfs_attr_lookup(atype, NULL, 0, CASE_SENSITIVE, 0, NULL,
3313 					       0, ctx);
3314 			if (err)
3315 				return err;
3316 		}
3317 	}
3318 	return 0;
3319 }
3320 
3321 /*
3322  * ntfs_inode_free_space - free space in the MFT record of inode
3323  * @ni:		ntfs inode in which MFT record free space
3324  * @size:	amount of space needed to free
3325  *
3326  * Return 0 on success or error.
3327  */
3328 int ntfs_inode_free_space(struct ntfs_inode *ni, int size)
3329 {
3330 	struct ntfs_attr_search_ctx *ctx;
3331 	int freed, err;
3332 	struct mft_record *ni_mrec;
3333 	struct super_block *sb;
3334 
3335 	if (!ni || size < 0)
3336 		return -EINVAL;
3337 	ntfs_debug("Entering for inode %llu, size %d\n", ni->mft_no, size);
3338 
3339 	sb = ni->vol->sb;
3340 	ni_mrec = map_mft_record(ni);
3341 	if (IS_ERR(ni_mrec))
3342 		return -EIO;
3343 
3344 	freed = (le32_to_cpu(ni_mrec->bytes_allocated) -
3345 			le32_to_cpu(ni_mrec->bytes_in_use));
3346 
3347 	unmap_mft_record(ni);
3348 
3349 	if (size <= freed)
3350 		return 0;
3351 
3352 	ctx = ntfs_attr_get_search_ctx(ni, NULL);
3353 	if (!ctx) {
3354 		ntfs_error(sb, "%s, Failed to get search context", __func__);
3355 		return -ENOMEM;
3356 	}
3357 
3358 	/*
3359 	 * Chkdsk complain if $STANDARD_INFORMATION is not in the base MFT
3360 	 * record.
3361 	 *
3362 	 * Also we can't move $ATTRIBUTE_LIST from base MFT_RECORD, so position
3363 	 * search context on first attribute after $STANDARD_INFORMATION and
3364 	 * $ATTRIBUTE_LIST.
3365 	 *
3366 	 * Why we reposition instead of simply skip this attributes during
3367 	 * enumeration? Because in case we have got only in-memory attribute
3368 	 * list ntfs_attr_lookup will fail when it will try to find
3369 	 * $ATTRIBUTE_LIST.
3370 	 */
3371 	err = ntfs_attr_position(AT_FILE_NAME, ctx);
3372 	if (err)
3373 		goto put_err_out;
3374 
3375 	while (1) {
3376 		int record_size;
3377 
3378 		/*
3379 		 * Check whether attribute is from different MFT record. If so,
3380 		 * find next, because we don't need such.
3381 		 */
3382 		while (ctx->ntfs_ino->mft_no != ni->mft_no) {
3383 retry:
3384 			err = ntfs_attr_lookup(AT_UNUSED, NULL, 0, CASE_SENSITIVE,
3385 						0, NULL, 0, ctx);
3386 			if (err) {
3387 				if (err != -ENOENT)
3388 					ntfs_error(sb, "Attr lookup failed #2");
3389 				else if (ctx->attr->type == AT_END)
3390 					err = -ENOSPC;
3391 				else
3392 					err = 0;
3393 
3394 				if (err)
3395 					goto put_err_out;
3396 			}
3397 		}
3398 
3399 		if (ntfs_inode_base(ctx->ntfs_ino)->mft_no == FILE_MFT &&
3400 				ctx->attr->type == AT_DATA)
3401 			goto retry;
3402 
3403 		if (ctx->attr->type == AT_INDEX_ROOT)
3404 			goto retry;
3405 
3406 		record_size = le32_to_cpu(ctx->attr->length);
3407 
3408 		/* Move away attribute. */
3409 		err = ntfs_attr_record_move_away(ctx, 0);
3410 		if (err) {
3411 			ntfs_error(sb, "Failed to move out attribute #2");
3412 			break;
3413 		}
3414 		freed += record_size;
3415 
3416 		/* Check whether we done. */
3417 		if (size <= freed) {
3418 			ntfs_attr_put_search_ctx(ctx);
3419 			return 0;
3420 		}
3421 
3422 		/*
3423 		 * Reposition to first attribute after $STANDARD_INFORMATION and
3424 		 * $ATTRIBUTE_LIST (see comments upwards).
3425 		 */
3426 		ntfs_attr_reinit_search_ctx(ctx);
3427 		err = ntfs_attr_position(AT_FILE_NAME, ctx);
3428 		if (err)
3429 			break;
3430 	}
3431 put_err_out:
3432 	ntfs_attr_put_search_ctx(ctx);
3433 	if (err == -ENOSPC)
3434 		ntfs_debug("No attributes left that can be moved out.\n");
3435 	return err;
3436 }
3437 
3438 s64 ntfs_inode_attr_pread(struct inode *vi, s64 pos, s64 count, u8 *buf)
3439 {
3440 	struct address_space *mapping = vi->i_mapping;
3441 	struct folio *folio;
3442 	struct ntfs_inode *ni = NTFS_I(vi);
3443 	s64 isize;
3444 	u32 attr_len, total = 0, offset;
3445 	pgoff_t index;
3446 	int err = 0;
3447 
3448 	WARN_ON(!NInoAttr(ni));
3449 	if (!count)
3450 		return 0;
3451 
3452 	mutex_lock(&ni->mrec_lock);
3453 	isize = i_size_read(vi);
3454 	if (pos > isize) {
3455 		mutex_unlock(&ni->mrec_lock);
3456 		return -EINVAL;
3457 	}
3458 	if (pos + count > isize)
3459 		count = isize - pos;
3460 
3461 	if (!NInoNonResident(ni)) {
3462 		struct ntfs_attr_search_ctx *ctx;
3463 		u8 *attr;
3464 
3465 		ctx = ntfs_attr_get_search_ctx(ni->ext.base_ntfs_ino, NULL);
3466 		if (!ctx) {
3467 			ntfs_error(vi->i_sb, "Failed to get attr search ctx");
3468 			err = -ENOMEM;
3469 			mutex_unlock(&ni->mrec_lock);
3470 			goto out;
3471 		}
3472 
3473 		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE,
3474 				       0, NULL, 0, ctx);
3475 		if (err) {
3476 			ntfs_error(vi->i_sb, "Failed to look up attr %#x", ni->type);
3477 			ntfs_attr_put_search_ctx(ctx);
3478 			mutex_unlock(&ni->mrec_lock);
3479 			goto out;
3480 		}
3481 
3482 		attr = (u8 *)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset);
3483 		memcpy(buf, (u8 *)attr + pos, count);
3484 		ntfs_attr_put_search_ctx(ctx);
3485 		mutex_unlock(&ni->mrec_lock);
3486 		return count;
3487 	}
3488 	mutex_unlock(&ni->mrec_lock);
3489 
3490 	index = pos >> PAGE_SHIFT;
3491 	do {
3492 		/* Update @index and get the next folio. */
3493 		folio = read_mapping_folio(mapping, index, NULL);
3494 		if (IS_ERR(folio))
3495 			break;
3496 
3497 		offset = offset_in_folio(folio, pos);
3498 		attr_len = min_t(size_t, (size_t)count, folio_size(folio) - offset);
3499 
3500 		folio_lock(folio);
3501 		memcpy_from_folio(buf, folio, offset, attr_len);
3502 		folio_unlock(folio);
3503 		folio_put(folio);
3504 
3505 		total += attr_len;
3506 		buf += attr_len;
3507 		pos += attr_len;
3508 		count -= attr_len;
3509 		index++;
3510 	} while (count);
3511 out:
3512 	return err ? (s64)err : total;
3513 }
3514 
3515 static inline int ntfs_enlarge_attribute(struct inode *vi, s64 pos, s64 count,
3516 					 struct ntfs_attr_search_ctx *ctx)
3517 {
3518 	struct ntfs_inode *ni = NTFS_I(vi);
3519 	struct super_block *sb = vi->i_sb;
3520 	int ret;
3521 
3522 	if (pos + count <= ni->initialized_size)
3523 		return 0;
3524 
3525 	if (NInoEncrypted(ni) && NInoNonResident(ni))
3526 		return -EACCES;
3527 
3528 	if (NInoCompressed(ni))
3529 		return -EOPNOTSUPP;
3530 
3531 	if (pos + count > ni->data_size) {
3532 		if (ntfs_attr_truncate(ni, pos + count)) {
3533 			ntfs_debug("Failed to truncate attribute");
3534 			return -1;
3535 		}
3536 
3537 		ntfs_attr_reinit_search_ctx(ctx);
3538 		ret = ntfs_attr_lookup(ni->type,
3539 				       ni->name, ni->name_len, CASE_SENSITIVE,
3540 				       0, NULL, 0, ctx);
3541 		if (ret) {
3542 			ntfs_error(sb, "Failed to look up attr %#x", ni->type);
3543 			return ret;
3544 		}
3545 	}
3546 
3547 	if (!NInoNonResident(ni)) {
3548 		if (likely(i_size_read(vi) < ni->data_size))
3549 			i_size_write(vi, ni->data_size);
3550 		return 0;
3551 	}
3552 
3553 	if (pos + count > ni->initialized_size) {
3554 		ctx->attr->data.non_resident.initialized_size = cpu_to_le64(pos + count);
3555 		mark_mft_record_dirty(ctx->ntfs_ino);
3556 		ni->initialized_size = pos + count;
3557 		if (i_size_read(vi) < ni->initialized_size)
3558 			i_size_write(vi, ni->initialized_size);
3559 	}
3560 	return 0;
3561 }
3562 
3563 static s64 __ntfs_inode_resident_attr_pwrite(struct inode *vi,
3564 					     s64 pos, s64 count, u8 *buf,
3565 					     struct ntfs_attr_search_ctx *ctx)
3566 {
3567 	struct ntfs_inode *ni = NTFS_I(vi);
3568 	struct folio *folio;
3569 	struct address_space *mapping = vi->i_mapping;
3570 	u8 *addr;
3571 	int err = 0;
3572 
3573 	WARN_ON(NInoNonResident(ni));
3574 	if (pos + count > PAGE_SIZE) {
3575 		ntfs_error(vi->i_sb, "Out of write into resident attr %#x", ni->type);
3576 		return -EINVAL;
3577 	}
3578 
3579 	/* Copy to mft record page */
3580 	addr = (u8 *)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset);
3581 	memcpy(addr + pos, buf, count);
3582 	mark_mft_record_dirty(ctx->ntfs_ino);
3583 
3584 	/* Keep the first page clean and uptodate */
3585 	folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS,
3586 				   mapping_gfp_mask(mapping));
3587 	if (IS_ERR(folio)) {
3588 		err = PTR_ERR(folio);
3589 		ntfs_error(vi->i_sb, "Failed to read a page 0 for attr %#x: %d",
3590 			   ni->type, err);
3591 		goto out;
3592 	}
3593 	if (!folio_test_uptodate(folio))
3594 		folio_fill_tail(folio, 0, addr,
3595 				le32_to_cpu(ctx->attr->data.resident.value_length));
3596 	else
3597 		memcpy_to_folio(folio, offset_in_folio(folio, pos), buf, count);
3598 	folio_mark_uptodate(folio);
3599 	folio_unlock(folio);
3600 	folio_put(folio);
3601 out:
3602 	return err ? err : count;
3603 }
3604 
3605 static s64 __ntfs_inode_non_resident_attr_pwrite(struct inode *vi,
3606 						 s64 pos, s64 count, u8 *buf,
3607 						 struct ntfs_attr_search_ctx *ctx,
3608 						 bool sync)
3609 {
3610 	struct ntfs_inode *ni = NTFS_I(vi);
3611 	struct address_space *mapping = vi->i_mapping;
3612 	struct folio *folio;
3613 	pgoff_t index;
3614 	unsigned long offset, length;
3615 	size_t attr_len;
3616 	s64 ret = 0, written = 0;
3617 
3618 	WARN_ON(!NInoNonResident(ni));
3619 
3620 	index = pos >> PAGE_SHIFT;
3621 	while (count) {
3622 		if (count == PAGE_SIZE) {
3623 			folio = __filemap_get_folio(vi->i_mapping, index,
3624 					FGP_CREAT | FGP_LOCK,
3625 					mapping_gfp_mask(mapping));
3626 			if (IS_ERR(folio)) {
3627 				ret = -ENOMEM;
3628 				break;
3629 			}
3630 		} else {
3631 			folio = read_mapping_folio(mapping, index, NULL);
3632 			if (IS_ERR(folio)) {
3633 				ret = PTR_ERR(folio);
3634 				ntfs_error(vi->i_sb, "Failed to read a page %lu for attr %#x: %ld",
3635 						index, ni->type, PTR_ERR(folio));
3636 				break;
3637 			}
3638 
3639 			folio_lock(folio);
3640 		}
3641 
3642 		if (count == PAGE_SIZE) {
3643 			offset = 0;
3644 			attr_len = count;
3645 		} else {
3646 			offset = offset_in_folio(folio, pos);
3647 			attr_len = min_t(size_t, (size_t)count, folio_size(folio) - offset);
3648 		}
3649 		memcpy_to_folio(folio, offset, buf, attr_len);
3650 
3651 		if (sync) {
3652 			struct ntfs_volume *vol = ni->vol;
3653 			s64 lcn, lcn_count;
3654 			unsigned int lcn_folio_off = 0;
3655 			struct bio *bio;
3656 			u64 rl_length = 0;
3657 			s64 vcn;
3658 			struct runlist_element *rl;
3659 
3660 			lcn_count = max_t(s64, 1, ntfs_bytes_to_cluster(vol, attr_len));
3661 			vcn = ntfs_pidx_to_cluster(vol, folio->index);
3662 
3663 			do {
3664 				down_write(&ni->runlist.lock);
3665 				rl = ntfs_attr_vcn_to_rl(ni, vcn, &lcn);
3666 				if (IS_ERR(rl)) {
3667 					ret = PTR_ERR(rl);
3668 					up_write(&ni->runlist.lock);
3669 					goto err_unlock_folio;
3670 				}
3671 
3672 				rl_length = rl->length - (vcn - rl->vcn);
3673 				if (rl_length < lcn_count) {
3674 					lcn_count -= rl_length;
3675 				} else {
3676 					rl_length = lcn_count;
3677 					lcn_count = 0;
3678 				}
3679 				up_write(&ni->runlist.lock);
3680 
3681 				if (vol->cluster_size_bits > PAGE_SHIFT) {
3682 					lcn_folio_off = folio->index << PAGE_SHIFT;
3683 					lcn_folio_off &= vol->cluster_size_mask;
3684 				}
3685 
3686 				bio = bio_alloc(vol->sb->s_bdev, 1, REQ_OP_WRITE,
3687 						GFP_NOIO);
3688 				bio->bi_iter.bi_sector =
3689 					ntfs_bytes_to_sector(vol,
3690 							ntfs_cluster_to_bytes(vol, lcn) +
3691 							lcn_folio_off);
3692 
3693 				length = min_t(unsigned long,
3694 					       ntfs_cluster_to_bytes(vol, rl_length),
3695 					       folio_size(folio));
3696 				if (!bio_add_folio(bio, folio, length, offset)) {
3697 					ret = -EIO;
3698 					bio_put(bio);
3699 					goto err_unlock_folio;
3700 				}
3701 
3702 				submit_bio_wait(bio);
3703 				bio_put(bio);
3704 				vcn += rl_length;
3705 				offset += length;
3706 			} while (lcn_count != 0);
3707 
3708 			folio_mark_uptodate(folio);
3709 		} else {
3710 			folio_mark_uptodate(folio);
3711 			folio_mark_dirty(folio);
3712 		}
3713 err_unlock_folio:
3714 		folio_unlock(folio);
3715 		folio_put(folio);
3716 
3717 		if (ret)
3718 			break;
3719 
3720 		written += attr_len;
3721 		buf += attr_len;
3722 		pos += attr_len;
3723 		count -= attr_len;
3724 		index++;
3725 
3726 		cond_resched();
3727 	}
3728 
3729 	return ret ? ret : written;
3730 }
3731 
3732 s64 ntfs_inode_attr_pwrite(struct inode *vi, s64 pos, s64 count, u8 *buf, bool sync)
3733 {
3734 	struct ntfs_inode *ni = NTFS_I(vi);
3735 	struct ntfs_attr_search_ctx *ctx;
3736 	s64 ret;
3737 
3738 	WARN_ON(!NInoAttr(ni));
3739 
3740 	ctx = ntfs_attr_get_search_ctx(ni->ext.base_ntfs_ino, NULL);
3741 	if (!ctx) {
3742 		ntfs_error(vi->i_sb, "Failed to get attr search ctx");
3743 		return -ENOMEM;
3744 	}
3745 
3746 	ret = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE,
3747 			       0, NULL, 0, ctx);
3748 	if (ret) {
3749 		ntfs_attr_put_search_ctx(ctx);
3750 		ntfs_error(vi->i_sb, "Failed to look up attr %#x", ni->type);
3751 		return ret;
3752 	}
3753 
3754 	mutex_lock(&ni->mrec_lock);
3755 	ret = ntfs_enlarge_attribute(vi, pos, count, ctx);
3756 	mutex_unlock(&ni->mrec_lock);
3757 	if (ret)
3758 		goto out;
3759 
3760 	if (NInoNonResident(ni))
3761 		ret = __ntfs_inode_non_resident_attr_pwrite(vi, pos, count, buf, ctx, sync);
3762 	else
3763 		ret = __ntfs_inode_resident_attr_pwrite(vi, pos, count, buf, ctx);
3764 out:
3765 	ntfs_attr_put_search_ctx(ctx);
3766 	return ret;
3767 }
3768 
3769 struct folio *ntfs_get_locked_folio(struct address_space *mapping,
3770 		pgoff_t index, pgoff_t end_index, struct file_ra_state *ra)
3771 {
3772 	struct folio *folio;
3773 
3774 	folio = filemap_lock_folio(mapping, index);
3775 	if (IS_ERR(folio)) {
3776 		if (PTR_ERR(folio) != -ENOENT)
3777 			return folio;
3778 
3779 		page_cache_sync_readahead(mapping, ra, NULL, index,
3780 				end_index - index);
3781 		folio = read_mapping_folio(mapping, index, NULL);
3782 		if (!IS_ERR(folio))
3783 			folio_lock(folio);
3784 	}
3785 
3786 	return folio;
3787 }
3788