xref: /linux/fs/ntfs/inode.c (revision d8f1df2e133f203cae3f458cba44efa327b093d9)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NTFS kernel inode handling.
4  *
5  * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
6  * Copyright (c) 2025 LG Electronics Co., Ltd.
7  */
8 
9 #include <linux/writeback.h>
10 #include <linux/seq_file.h>
11 
12 #include "lcnalloc.h"
13 #include "time.h"
14 #include "ntfs.h"
15 #include "index.h"
16 #include "attrlist.h"
17 #include "reparse.h"
18 #include "ea.h"
19 #include "attrib.h"
20 #include "iomap.h"
21 #include "object_id.h"
22 
23 /*
24  * ntfs_test_inode - compare two (possibly fake) inodes for equality
25  * @vi:		vfs inode which to test
26  * @data:	data which is being tested with
27  *
28  * Compare the ntfs attribute embedded in the ntfs specific part of the vfs
29  * inode @vi for equality with the ntfs attribute @data.
30  *
31  * If searching for the normal file/directory inode, set @na->type to AT_UNUSED.
32  * @na->name and @na->name_len are then ignored.
33  *
34  * Return 1 if the attributes match and 0 if not.
35  *
36  * NOTE: This function runs with the inode_hash_lock spin lock held so it is not
37  * allowed to sleep.
38  */
39 int ntfs_test_inode(struct inode *vi, void *data)
40 {
41 	struct ntfs_attr *na = data;
42 	struct ntfs_inode *ni = NTFS_I(vi);
43 
44 	if (vi->i_ino != na->mft_no)
45 		return 0;
46 
47 	/* If !NInoAttr(ni), @vi is a normal file or directory inode. */
48 	if (likely(!NInoAttr(ni))) {
49 		/* If not looking for a normal inode this is a mismatch. */
50 		if (unlikely(na->type != AT_UNUSED))
51 			return 0;
52 	} else {
53 		/* A fake inode describing an attribute. */
54 		if (ni->type != na->type)
55 			return 0;
56 		if (ni->name_len != na->name_len)
57 			return 0;
58 		if (na->name_len && memcmp(ni->name, na->name,
59 				na->name_len * sizeof(__le16)))
60 			return 0;
61 		if (!ni->ext.base_ntfs_ino)
62 			return 0;
63 	}
64 
65 	/* Match! */
66 	return 1;
67 }
68 
69 /*
70  * ntfs_init_locked_inode - initialize an inode
71  * @vi:		vfs inode to initialize
72  * @data:	data which to initialize @vi to
73  *
74  * Initialize the vfs inode @vi with the values from the ntfs attribute @data in
75  * order to enable ntfs_test_inode() to do its work.
76  *
77  * If initializing the normal file/directory inode, set @na->type to AT_UNUSED.
78  * In that case, @na->name and @na->name_len should be set to NULL and 0,
79  * respectively. Although that is not strictly necessary as
80  * ntfs_read_locked_inode() will fill them in later.
81  *
82  * Return 0 on success and error.
83  *
84  * NOTE: This function runs with the inode->i_lock spin lock held so it is not
85  * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
86  */
87 static int ntfs_init_locked_inode(struct inode *vi, void *data)
88 {
89 	struct ntfs_attr *na = data;
90 	struct ntfs_inode *ni = NTFS_I(vi);
91 
92 	vi->i_ino = (unsigned long)na->mft_no;
93 
94 	if (na->type == AT_INDEX_ALLOCATION)
95 		NInoSetMstProtected(ni);
96 	else
97 		ni->type = na->type;
98 
99 	ni->name = na->name;
100 	ni->name_len = na->name_len;
101 	ni->folio = NULL;
102 	atomic_set(&ni->count, 1);
103 
104 	/* If initializing a normal inode, we are done. */
105 	if (likely(na->type == AT_UNUSED))
106 		return 0;
107 
108 	/* It is a fake inode. */
109 	NInoSetAttr(ni);
110 
111 	/*
112 	 * We have I30 global constant as an optimization as it is the name
113 	 * in >99.9% of named attributes! The other <0.1% incur a GFP_ATOMIC
114 	 * allocation but that is ok. And most attributes are unnamed anyway,
115 	 * thus the fraction of named attributes with name != I30 is actually
116 	 * absolutely tiny.
117 	 */
118 	if (na->name_len && na->name != I30) {
119 		unsigned int i;
120 
121 		i = na->name_len * sizeof(__le16);
122 		ni->name = kmalloc(i + sizeof(__le16), GFP_ATOMIC);
123 		if (!ni->name)
124 			return -ENOMEM;
125 		memcpy(ni->name, na->name, i);
126 		ni->name[na->name_len] = 0;
127 	}
128 	return 0;
129 }
130 
131 static int ntfs_read_locked_inode(struct inode *vi);
132 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi);
133 static int ntfs_read_locked_index_inode(struct inode *base_vi,
134 		struct inode *vi);
135 
136 /*
137  * ntfs_iget - obtain a struct inode corresponding to a specific normal inode
138  * @sb:		super block of mounted volume
139  * @mft_no:	mft record number / inode number to obtain
140  *
141  * Obtain the struct inode corresponding to a specific normal inode (i.e. a
142  * file or directory).
143  *
144  * If the inode is in the cache, it is just returned with an increased
145  * reference count. Otherwise, a new struct inode is allocated and initialized,
146  * and finally ntfs_read_locked_inode() is called to read in the inode and
147  * fill in the remainder of the inode structure.
148  *
149  * Return the struct inode on success. Check the return value with IS_ERR() and
150  * if true, the function failed and the error code is obtained from PTR_ERR().
151  */
152 struct inode *ntfs_iget(struct super_block *sb, u64 mft_no)
153 {
154 	struct inode *vi;
155 	int err;
156 	struct ntfs_attr na;
157 
158 	na.mft_no = mft_no;
159 	na.type = AT_UNUSED;
160 	na.name = NULL;
161 	na.name_len = 0;
162 
163 	vi = iget5_locked(sb, mft_no, ntfs_test_inode,
164 			ntfs_init_locked_inode, &na);
165 	if (unlikely(!vi))
166 		return ERR_PTR(-ENOMEM);
167 
168 	err = 0;
169 
170 	/* If this is a freshly allocated inode, need to read it now. */
171 	if (inode_state_read_once(vi) & I_NEW) {
172 		err = ntfs_read_locked_inode(vi);
173 		unlock_new_inode(vi);
174 	}
175 	/*
176 	 * There is no point in keeping bad inodes around if the failure was
177 	 * due to ENOMEM. We want to be able to retry again later.
178 	 */
179 	if (unlikely(err == -ENOMEM)) {
180 		iput(vi);
181 		vi = ERR_PTR(err);
182 	}
183 	return vi;
184 }
185 
186 /*
187  * ntfs_attr_iget - obtain a struct inode corresponding to an attribute
188  * @base_vi:	vfs base inode containing the attribute
189  * @type:	attribute type
190  * @name:	Unicode name of the attribute (NULL if unnamed)
191  * @name_len:	length of @name in Unicode characters (0 if unnamed)
192  *
193  * Obtain the (fake) struct inode corresponding to the attribute specified by
194  * @type, @name, and @name_len, which is present in the base mft record
195  * specified by the vfs inode @base_vi.
196  *
197  * If the attribute inode is in the cache, it is just returned with an
198  * increased reference count. Otherwise, a new struct inode is allocated and
199  * initialized, and finally ntfs_read_locked_attr_inode() is called to read the
200  * attribute and fill in the inode structure.
201  *
202  * Note, for index allocation attributes, you need to use ntfs_index_iget()
203  * instead of ntfs_attr_iget() as working with indices is a lot more complex.
204  *
205  * Return the struct inode of the attribute inode on success. Check the return
206  * value with IS_ERR() and if true, the function failed and the error code is
207  * obtained from PTR_ERR().
208  */
209 struct inode *ntfs_attr_iget(struct inode *base_vi, __le32 type,
210 		__le16 *name, u32 name_len)
211 {
212 	struct inode *vi;
213 	int err;
214 	struct ntfs_attr na;
215 
216 	/* Make sure no one calls ntfs_attr_iget() for indices. */
217 	WARN_ON(type == AT_INDEX_ALLOCATION);
218 
219 	na.mft_no = base_vi->i_ino;
220 	na.type = type;
221 	na.name = name;
222 	na.name_len = name_len;
223 
224 	vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode,
225 			ntfs_init_locked_inode, &na);
226 	if (unlikely(!vi))
227 		return ERR_PTR(-ENOMEM);
228 	err = 0;
229 
230 	/* If this is a freshly allocated inode, need to read it now. */
231 	if (inode_state_read_once(vi) & I_NEW) {
232 		err = ntfs_read_locked_attr_inode(base_vi, vi);
233 		unlock_new_inode(vi);
234 	}
235 	/*
236 	 * There is no point in keeping bad attribute inodes around. This also
237 	 * simplifies things in that we never need to check for bad attribute
238 	 * inodes elsewhere.
239 	 */
240 	if (unlikely(err)) {
241 		iput(vi);
242 		vi = ERR_PTR(err);
243 	}
244 	return vi;
245 }
246 
247 /*
248  * ntfs_index_iget - obtain a struct inode corresponding to an index
249  * @base_vi:	vfs base inode containing the index related attributes
250  * @name:	Unicode name of the index
251  * @name_len:	length of @name in Unicode characters
252  *
253  * Obtain the (fake) struct inode corresponding to the index specified by @name
254  * and @name_len, which is present in the base mft record specified by the vfs
255  * inode @base_vi.
256  *
257  * If the index inode is in the cache, it is just returned with an increased
258  * reference count.  Otherwise, a new struct inode is allocated and
259  * initialized, and finally ntfs_read_locked_index_inode() is called to read
260  * the index related attributes and fill in the inode structure.
261  *
262  * Return the struct inode of the index inode on success. Check the return
263  * value with IS_ERR() and if true, the function failed and the error code is
264  * obtained from PTR_ERR().
265  */
266 struct inode *ntfs_index_iget(struct inode *base_vi, __le16 *name,
267 		u32 name_len)
268 {
269 	struct inode *vi;
270 	int err;
271 	struct ntfs_attr na;
272 
273 	na.mft_no = base_vi->i_ino;
274 	na.type = AT_INDEX_ALLOCATION;
275 	na.name = name;
276 	na.name_len = name_len;
277 
278 	vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode,
279 			ntfs_init_locked_inode, &na);
280 	if (unlikely(!vi))
281 		return ERR_PTR(-ENOMEM);
282 
283 	err = 0;
284 
285 	/* If this is a freshly allocated inode, need to read it now. */
286 	if (inode_state_read_once(vi) & I_NEW) {
287 		err = ntfs_read_locked_index_inode(base_vi, vi);
288 		unlock_new_inode(vi);
289 	}
290 	/*
291 	 * There is no point in keeping bad index inodes around.  This also
292 	 * simplifies things in that we never need to check for bad index
293 	 * inodes elsewhere.
294 	 */
295 	if (unlikely(err)) {
296 		iput(vi);
297 		vi = ERR_PTR(err);
298 	}
299 	return vi;
300 }
301 
302 struct inode *ntfs_alloc_big_inode(struct super_block *sb)
303 {
304 	struct ntfs_inode *ni;
305 
306 	ntfs_debug("Entering.");
307 	ni = alloc_inode_sb(sb, ntfs_big_inode_cache, GFP_NOFS);
308 	if (likely(ni != NULL)) {
309 		ni->state = 0;
310 		ni->type = 0;
311 		ni->mft_no = 0;
312 		return VFS_I(ni);
313 	}
314 	ntfs_error(sb, "Allocation of NTFS big inode structure failed.");
315 	return NULL;
316 }
317 
318 void ntfs_free_big_inode(struct inode *inode)
319 {
320 	kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
321 }
322 
323 static int ntfs_non_resident_dealloc_clusters(struct ntfs_inode *ni)
324 {
325 	struct super_block *sb = ni->vol->sb;
326 	struct ntfs_attr_search_ctx *actx;
327 	int err = 0;
328 
329 	actx = ntfs_attr_get_search_ctx(ni, NULL);
330 	if (!actx)
331 		return -ENOMEM;
332 	WARN_ON(actx->mrec->link_count != 0);
333 
334 	/**
335 	 * ntfs_truncate_vfs cannot be called in evict() context due
336 	 * to some limitations, which are the @ni vfs inode is marked
337 	 * with I_FREEING, and etc.
338 	 */
339 	if (NInoRunlistDirty(ni)) {
340 		err = ntfs_cluster_free_from_rl(ni->vol, ni->runlist.rl);
341 		if (err)
342 			ntfs_error(sb,
343 					"Failed to free clusters. Leaving inconsistent metadata.\n");
344 	}
345 
346 	while ((err = ntfs_attrs_walk(actx)) == 0) {
347 		if (actx->attr->non_resident &&
348 				(!NInoRunlistDirty(ni) || actx->attr->type != AT_DATA)) {
349 			struct runlist_element *rl;
350 			size_t new_rl_count;
351 
352 			rl = ntfs_mapping_pairs_decompress(ni->vol, actx->attr, NULL,
353 					&new_rl_count);
354 			if (IS_ERR(rl)) {
355 				err = PTR_ERR(rl);
356 				ntfs_error(sb,
357 					   "Failed to decompress runlist. Leaving inconsistent metadata.\n");
358 				continue;
359 			}
360 
361 			err = ntfs_cluster_free_from_rl(ni->vol, rl);
362 			if (err)
363 				ntfs_error(sb,
364 					   "Failed to free attribute clusters. Leaving inconsistent metadata.\n");
365 			kvfree(rl);
366 		}
367 	}
368 
369 	ntfs_release_dirty_clusters(ni->vol, ni->i_dealloc_clusters);
370 	ntfs_attr_put_search_ctx(actx);
371 	return err;
372 }
373 
374 int ntfs_drop_big_inode(struct inode *inode)
375 {
376 	struct ntfs_inode *ni = NTFS_I(inode);
377 
378 	if (!inode_unhashed(inode) && inode_state_read_once(inode) & I_SYNC) {
379 		if (ni->type == AT_DATA || ni->type == AT_INDEX_ALLOCATION) {
380 			if (!inode->i_nlink) {
381 				struct ntfs_inode *ni = NTFS_I(inode);
382 
383 				if (ni->data_size == 0)
384 					return 0;
385 
386 				/* To avoid evict_inode call simultaneously */
387 				atomic_inc(&inode->i_count);
388 				spin_unlock(&inode->i_lock);
389 
390 				truncate_setsize(VFS_I(ni), 0);
391 				ntfs_truncate_vfs(VFS_I(ni), 0, 1);
392 
393 				sb_start_intwrite(inode->i_sb);
394 				i_size_write(inode, 0);
395 				ni->allocated_size = ni->initialized_size = ni->data_size = 0;
396 
397 				truncate_inode_pages_final(inode->i_mapping);
398 				sb_end_intwrite(inode->i_sb);
399 
400 				spin_lock(&inode->i_lock);
401 				atomic_dec(&inode->i_count);
402 			}
403 		}
404 		return 0;
405 	}
406 
407 	return inode_generic_drop(inode);
408 }
409 
410 static inline struct ntfs_inode *ntfs_alloc_extent_inode(void)
411 {
412 	struct ntfs_inode *ni;
413 
414 	ntfs_debug("Entering.");
415 	ni = kmem_cache_alloc(ntfs_inode_cache, GFP_NOFS);
416 	if (likely(ni != NULL)) {
417 		ni->state = 0;
418 		return ni;
419 	}
420 	ntfs_error(NULL, "Allocation of NTFS inode structure failed.");
421 	return NULL;
422 }
423 
424 static void ntfs_destroy_extent_inode(struct ntfs_inode *ni)
425 {
426 	ntfs_debug("Entering.");
427 
428 	if (!atomic_dec_and_test(&ni->count))
429 		WARN_ON(1);
430 	if (ni->folio)
431 		folio_put(ni->folio);
432 	kfree(ni->mrec);
433 	kmem_cache_free(ntfs_inode_cache, ni);
434 }
435 
436 static struct lock_class_key attr_inode_mrec_lock_class;
437 static struct lock_class_key attr_list_inode_mrec_lock_class;
438 
439 /*
440  * The attribute runlist lock has separate locking rules from the
441  * normal runlist lock, so split the two lock-classes:
442  */
443 static struct lock_class_key attr_list_rl_lock_class;
444 
445 /*
446  * __ntfs_init_inode - initialize ntfs specific part of an inode
447  * @sb:		super block of mounted volume
448  * @ni:		freshly allocated ntfs inode which to initialize
449  *
450  * Initialize an ntfs inode to defaults.
451  *
452  * NOTE: ni->mft_no, ni->state, ni->type, ni->name, and ni->name_len are left
453  * untouched. Make sure to initialize them elsewhere.
454  */
455 void __ntfs_init_inode(struct super_block *sb, struct ntfs_inode *ni)
456 {
457 	ntfs_debug("Entering.");
458 	rwlock_init(&ni->size_lock);
459 	ni->initialized_size = ni->allocated_size = 0;
460 	ni->seq_no = 0;
461 	atomic_set(&ni->count, 1);
462 	ni->vol = NTFS_SB(sb);
463 	ntfs_init_runlist(&ni->runlist);
464 	mutex_init(&ni->mrec_lock);
465 	if (ni->type == AT_ATTRIBUTE_LIST) {
466 		lockdep_set_class(&ni->mrec_lock,
467 				  &attr_list_inode_mrec_lock_class);
468 		lockdep_set_class(&ni->runlist.lock,
469 				  &attr_list_rl_lock_class);
470 	} else if (NInoAttr(ni)) {
471 		lockdep_set_class(&ni->mrec_lock,
472 				  &attr_inode_mrec_lock_class);
473 	}
474 
475 	ni->folio = NULL;
476 	ni->folio_ofs = 0;
477 	ni->mrec = NULL;
478 	ni->attr_list_size = 0;
479 	ni->attr_list = NULL;
480 	ni->itype.index.block_size = 0;
481 	ni->itype.index.vcn_size = 0;
482 	ni->itype.index.collation_rule = 0;
483 	ni->itype.index.block_size_bits = 0;
484 	ni->itype.index.vcn_size_bits = 0;
485 	mutex_init(&ni->extent_lock);
486 	ni->nr_extents = 0;
487 	ni->ext.base_ntfs_ino = NULL;
488 	ni->flags = 0;
489 	ni->mft_lcn[0] = LCN_RL_NOT_MAPPED;
490 	ni->mft_lcn_count = 0;
491 	ni->reparse_tag = 0;
492 	ni->reparse_flags = 0;
493 	ni->target = NULL;
494 	ni->i_dealloc_clusters = 0;
495 }
496 
497 /*
498  * Extent inodes get MFT-mapped in a nested way, while the base inode
499  * is still mapped. Teach this nesting to the lock validator by creating
500  * a separate class for nested inode's mrec_lock's:
501  */
502 static struct lock_class_key extent_inode_mrec_lock_key;
503 
504 inline struct ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
505 		u64 mft_no)
506 {
507 	struct ntfs_inode *ni = ntfs_alloc_extent_inode();
508 
509 	ntfs_debug("Entering.");
510 	if (likely(ni != NULL)) {
511 		__ntfs_init_inode(sb, ni);
512 		lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);
513 		ni->mft_no = mft_no;
514 		ni->type = AT_UNUSED;
515 		ni->name = NULL;
516 		ni->name_len = 0;
517 	}
518 	return ni;
519 }
520 
521 /*
522  * ntfs_is_extended_system_file - check if a file is in the $Extend directory
523  * @ctx:	initialized attribute search context
524  *
525  * Search all file name attributes in the inode described by the attribute
526  * search context @ctx and check if any of the names are in the $Extend system
527  * directory.
528  *
529  * Return values:
530  *	   3: file is $ObjId in $Extend directory
531  *	   2: file is $Reparse in $Extend directory
532  *	   1: file is in $Extend directory
533  *	   0: file is not in $Extend directory
534  *    -errno: failed to determine if the file is in the $Extend directory
535  */
536 static int ntfs_is_extended_system_file(struct ntfs_attr_search_ctx *ctx)
537 {
538 	int nr_links, err;
539 
540 	/* Restart search. */
541 	ntfs_attr_reinit_search_ctx(ctx);
542 
543 	/* Get number of hard links. */
544 	nr_links = le16_to_cpu(ctx->mrec->link_count);
545 
546 	/* Loop through all hard links. */
547 	while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0,
548 			ctx))) {
549 		struct file_name_attr *file_name_attr;
550 		struct attr_record *attr = ctx->attr;
551 		u8 *p, *p2;
552 
553 		nr_links--;
554 		/*
555 		 * Maximum sanity checking as we are called on an inode that
556 		 * we suspect might be corrupt.
557 		 */
558 		p = (u8 *)attr + le32_to_cpu(attr->length);
559 		if (p < (u8 *)ctx->mrec || (u8 *)p > (u8 *)ctx->mrec +
560 				le32_to_cpu(ctx->mrec->bytes_in_use)) {
561 err_corrupt_attr:
562 			ntfs_error(ctx->ntfs_ino->vol->sb,
563 					"Corrupt file name attribute. You should run chkdsk.");
564 			return -EIO;
565 		}
566 		if (attr->non_resident) {
567 			ntfs_error(ctx->ntfs_ino->vol->sb,
568 					"Non-resident file name. You should run chkdsk.");
569 			return -EIO;
570 		}
571 		if (attr->flags) {
572 			ntfs_error(ctx->ntfs_ino->vol->sb,
573 					"File name with invalid flags. You should run chkdsk.");
574 			return -EIO;
575 		}
576 		if (!(attr->data.resident.flags & RESIDENT_ATTR_IS_INDEXED)) {
577 			ntfs_error(ctx->ntfs_ino->vol->sb,
578 					"Unindexed file name. You should run chkdsk.");
579 			return -EIO;
580 		}
581 		file_name_attr = (struct file_name_attr *)((u8 *)attr +
582 				le16_to_cpu(attr->data.resident.value_offset));
583 		p2 = (u8 *)file_name_attr + le32_to_cpu(attr->data.resident.value_length);
584 		if (p2 < (u8 *)attr || p2 > p)
585 			goto err_corrupt_attr;
586 		/* This attribute is ok, but is it in the $Extend directory? */
587 		if (MREF_LE(file_name_attr->parent_directory) == FILE_Extend) {
588 			unsigned char *s;
589 
590 			s = ntfs_attr_name_get(ctx->ntfs_ino->vol,
591 					file_name_attr->file_name,
592 					file_name_attr->file_name_length);
593 			if (!s)
594 				return 1;
595 			if (!strcmp("$Reparse", s)) {
596 				ntfs_attr_name_free(&s);
597 				return 2; /* it's reparse point file */
598 			}
599 			if (!strcmp("$ObjId", s)) {
600 				ntfs_attr_name_free(&s);
601 				return 3; /* it's object id file */
602 			}
603 			ntfs_attr_name_free(&s);
604 			return 1;	/* YES, it's an extended system file. */
605 		}
606 	}
607 	if (unlikely(err != -ENOENT))
608 		return err;
609 	if (unlikely(nr_links)) {
610 		ntfs_error(ctx->ntfs_ino->vol->sb,
611 			"Inode hard link count doesn't match number of name attributes. You should run chkdsk.");
612 		return -EIO;
613 	}
614 	return 0;	/* NO, it is not an extended system file. */
615 }
616 
617 static struct lock_class_key ntfs_dir_inval_lock_key;
618 
619 void ntfs_set_vfs_operations(struct inode *inode, mode_t mode, dev_t dev)
620 {
621 	if (S_ISDIR(mode)) {
622 		if (!NInoAttr(NTFS_I(inode))) {
623 			inode->i_op = &ntfs_dir_inode_ops;
624 			inode->i_fop = &ntfs_dir_ops;
625 		}
626 		inode->i_mapping->a_ops = &ntfs_aops;
627 		lockdep_set_class(&inode->i_mapping->invalidate_lock,
628 				  &ntfs_dir_inval_lock_key);
629 	} else if (S_ISLNK(mode)) {
630 		inode->i_op = &ntfs_symlink_inode_operations;
631 		inode->i_mapping->a_ops = &ntfs_aops;
632 	} else if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) {
633 		inode->i_op = &ntfs_special_inode_operations;
634 		init_special_inode(inode, inode->i_mode, dev);
635 	} else {
636 		if (!NInoAttr(NTFS_I(inode))) {
637 			inode->i_op = &ntfs_file_inode_ops;
638 			inode->i_fop = &ntfs_file_ops;
639 		}
640 		if (inode->i_ino == FILE_MFT)
641 			inode->i_mapping->a_ops = &ntfs_mft_aops;
642 		else
643 			inode->i_mapping->a_ops = &ntfs_aops;
644 	}
645 }
646 
647 /*
648  * ntfs_read_locked_inode - read an inode from its device
649  * @vi:		inode to read
650  *
651  * ntfs_read_locked_inode() is called from ntfs_iget() to read the inode
652  * described by @vi into memory from the device.
653  *
654  * The only fields in @vi that we need to/can look at when the function is
655  * called are i_sb, pointing to the mounted device's super block, and i_ino,
656  * the number of the inode to load.
657  *
658  * ntfs_read_locked_inode() maps, pins and locks the mft record number i_ino
659  * for reading and sets up the necessary @vi fields as well as initializing
660  * the ntfs inode.
661  *
662  * Q: What locks are held when the function is called?
663  * A: i_state has I_NEW set, hence the inode is locked, also
664  *    i_count is set to 1, so it is not going to go away
665  *    i_flags is set to 0 and we have no business touching it.  Only an ioctl()
666  *    is allowed to write to them. We should of course be honouring them but
667  *    we need to do that using the IS_* macros defined in include/linux/fs.h.
668  *    In any case ntfs_read_locked_inode() has nothing to do with i_flags.
669  *
670  * Return 0 on success and -errno on error.
671  */
672 static int ntfs_read_locked_inode(struct inode *vi)
673 {
674 	struct ntfs_volume *vol = NTFS_SB(vi->i_sb);
675 	struct ntfs_inode *ni = NTFS_I(vi);
676 	struct mft_record *m;
677 	struct attr_record *a;
678 	struct standard_information *si;
679 	struct ntfs_attr_search_ctx *ctx;
680 	int err = 0;
681 	__le16 *name = I30;
682 	unsigned int name_len = 4, flags = 0;
683 	int extend_sys = 0;
684 	dev_t dev = 0;
685 	bool vol_err = true;
686 
687 	ntfs_debug("Entering for i_ino 0x%llx.", ni->mft_no);
688 
689 	if (uid_valid(vol->uid)) {
690 		vi->i_uid = vol->uid;
691 		flags |= NTFS_VOL_UID;
692 	} else
693 		vi->i_uid = GLOBAL_ROOT_UID;
694 
695 	if (gid_valid(vol->gid)) {
696 		vi->i_gid = vol->gid;
697 		flags |= NTFS_VOL_GID;
698 	} else
699 		vi->i_gid = GLOBAL_ROOT_GID;
700 
701 	vi->i_mode = 0777;
702 
703 	/*
704 	 * Initialize the ntfs specific part of @vi special casing
705 	 * FILE_MFT which we need to do at mount time.
706 	 */
707 	if (vi->i_ino != FILE_MFT)
708 		ntfs_init_big_inode(vi);
709 
710 	m = map_mft_record(ni);
711 	if (IS_ERR(m)) {
712 		err = PTR_ERR(m);
713 		goto err_out;
714 	}
715 
716 	ctx = ntfs_attr_get_search_ctx(ni, m);
717 	if (!ctx) {
718 		err = -ENOMEM;
719 		goto unm_err_out;
720 	}
721 
722 	if (!(m->flags & MFT_RECORD_IN_USE)) {
723 		err = -ENOENT;
724 		vol_err = false;
725 		goto unm_err_out;
726 	}
727 
728 	if (m->base_mft_record) {
729 		ntfs_error(vi->i_sb, "Inode is an extent inode!");
730 		goto unm_err_out;
731 	}
732 
733 	/* Transfer information from mft record into vfs and ntfs inodes. */
734 	vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
735 
736 	if (le16_to_cpu(m->link_count) < 1) {
737 		ntfs_error(vi->i_sb, "Inode link count is 0!");
738 		goto unm_err_out;
739 	}
740 	set_nlink(vi, le16_to_cpu(m->link_count));
741 
742 	/* If read-only, no one gets write permissions. */
743 	if (IS_RDONLY(vi))
744 		vi->i_mode &= ~0222;
745 
746 	/*
747 	 * Find the standard information attribute in the mft record. At this
748 	 * stage we haven't setup the attribute list stuff yet, so this could
749 	 * in fact fail if the standard information is in an extent record, but
750 	 * I don't think this actually ever happens.
751 	 */
752 	ntfs_attr_reinit_search_ctx(ctx);
753 	err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0, 0, 0, NULL, 0,
754 			ctx);
755 	if (unlikely(err)) {
756 		if (err == -ENOENT)
757 			ntfs_error(vi->i_sb, "$STANDARD_INFORMATION attribute is missing.");
758 		goto unm_err_out;
759 	}
760 	a = ctx->attr;
761 	/* Get the standard information attribute value. */
762 	si = (struct standard_information *)((u8 *)a +
763 			le16_to_cpu(a->data.resident.value_offset));
764 
765 	/* Transfer information from the standard information into vi. */
766 	/*
767 	 * Note: The i_?times do not quite map perfectly onto the NTFS times,
768 	 * but they are close enough, and in the end it doesn't really matter
769 	 * that much...
770 	 */
771 	/*
772 	 * mtime is the last change of the data within the file. Not changed
773 	 * when only metadata is changed, e.g. a rename doesn't affect mtime.
774 	 */
775 	ni->i_crtime = ntfs2utc(si->creation_time);
776 
777 	inode_set_mtime_to_ts(vi, ntfs2utc(si->last_data_change_time));
778 	/*
779 	 * ctime is the last change of the metadata of the file. This obviously
780 	 * always changes, when mtime is changed. ctime can be changed on its
781 	 * own, mtime is then not changed, e.g. when a file is renamed.
782 	 */
783 	inode_set_ctime_to_ts(vi, ntfs2utc(si->last_mft_change_time));
784 	/*
785 	 * Last access to the data within the file. Not changed during a rename
786 	 * for example but changed whenever the file is written to.
787 	 */
788 	inode_set_atime_to_ts(vi, ntfs2utc(si->last_access_time));
789 	ni->flags = si->file_attributes;
790 
791 	/* Find the attribute list attribute if present. */
792 	ntfs_attr_reinit_search_ctx(ctx);
793 	err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
794 	if (err) {
795 		if (unlikely(err != -ENOENT)) {
796 			ntfs_error(vi->i_sb, "Failed to lookup attribute list attribute.");
797 			goto unm_err_out;
798 		}
799 	} else {
800 		if (vi->i_ino == FILE_MFT)
801 			goto skip_attr_list_load;
802 		ntfs_debug("Attribute list found in inode 0x%llx.", ni->mft_no);
803 		NInoSetAttrList(ni);
804 		a = ctx->attr;
805 		if (a->flags & ATTR_COMPRESSION_MASK) {
806 			ntfs_error(vi->i_sb,
807 				"Attribute list attribute is compressed.");
808 			goto unm_err_out;
809 		}
810 		if (a->flags & ATTR_IS_ENCRYPTED ||
811 				a->flags & ATTR_IS_SPARSE) {
812 			if (a->non_resident) {
813 				ntfs_error(vi->i_sb,
814 					"Non-resident attribute list attribute is encrypted/sparse.");
815 				goto unm_err_out;
816 			}
817 			ntfs_warning(vi->i_sb,
818 				"Resident attribute list attribute in inode 0x%llx is marked encrypted/sparse which is not true.  However, Windows allows this and chkdsk does not detect or correct it so we will just ignore the invalid flags and pretend they are not set.",
819 				ni->mft_no);
820 		}
821 		/* Now allocate memory for the attribute list. */
822 		ni->attr_list_size = (u32)ntfs_attr_size(a);
823 		if (!ni->attr_list_size) {
824 			ntfs_error(vi->i_sb, "Attr_list_size is zero");
825 			goto unm_err_out;
826 		}
827 		ni->attr_list = kvzalloc(ni->attr_list_size, GFP_NOFS);
828 		if (!ni->attr_list) {
829 			ntfs_error(vi->i_sb,
830 				"Not enough memory to allocate buffer for attribute list.");
831 			err = -ENOMEM;
832 			goto unm_err_out;
833 		}
834 		if (a->non_resident) {
835 			NInoSetAttrListNonResident(ni);
836 			if (a->data.non_resident.lowest_vcn) {
837 				ntfs_error(vi->i_sb, "Attribute list has non zero lowest_vcn.");
838 				goto unm_err_out;
839 			}
840 
841 			/* Now load the attribute list. */
842 			err = load_attribute_list(ni, ni->attr_list, ni->attr_list_size);
843 			if (err) {
844 				ntfs_error(vi->i_sb, "Failed to load attribute list attribute.");
845 				goto unm_err_out;
846 			}
847 		} else /* if (!a->non_resident) */ {
848 			/* Now copy the attribute list. */
849 			memcpy(ni->attr_list, (u8 *)a + le16_to_cpu(
850 					a->data.resident.value_offset),
851 					le32_to_cpu(
852 					a->data.resident.value_length));
853 			/* A resident list is not validated on load; check it now. */
854 			if (!ntfs_attr_list_is_valid(ni->attr_list,
855 						     ni->attr_list_size)) {
856 				ntfs_error(vi->i_sb, "Corrupt attribute list.");
857 				goto unm_err_out;
858 			}
859 		}
860 	}
861 skip_attr_list_load:
862 	err = ntfs_attr_lookup(AT_EA_INFORMATION, NULL, 0, 0, 0, NULL, 0, ctx);
863 	if (!err) {
864 		NInoSetHasEA(ni);
865 		ntfs_ea_get_wsl_inode(vi, &dev, flags);
866 	}
867 
868 	if (ni->flags & FILE_ATTR_REPARSE_POINT) {
869 		unsigned int mode;
870 
871 		mode = ntfs_make_symlink(ni);
872 		if (mode)
873 			vi->i_mode |= mode;
874 		else {
875 			vi->i_mode &= ~S_IFLNK;
876 			if (m->flags & MFT_RECORD_IS_DIRECTORY)
877 				vi->i_mode |= S_IFDIR;
878 			else
879 				vi->i_mode |= S_IFREG;
880 		}
881 	} else if (m->flags & MFT_RECORD_IS_DIRECTORY) {
882 		vi->i_mode |= S_IFDIR;
883 	} else {
884 		vi->i_mode |= S_IFREG;
885 	}
886 
887 	if (S_ISDIR(vi->i_mode)) {
888 		/*
889 		 * Apply the directory permissions mask set in the mount
890 		 * options.
891 		 */
892 		vi->i_mode &= ~vol->dmask;
893 		/* Things break without this kludge! */
894 		if (vi->i_nlink > 1)
895 			set_nlink(vi, 1);
896 	} else {
897 		/* Apply the file permissions mask set in the mount options. */
898 		vi->i_mode &= ~vol->fmask;
899 	}
900 
901 	/*
902 	 * If an attribute list is present we now have the attribute list value
903 	 * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes.
904 	 */
905 	if (m->flags & MFT_RECORD_IS_DIRECTORY) {
906 		struct index_root *ir;
907 
908 view_index_meta:
909 		/* It is a directory, find index root attribute. */
910 		ntfs_attr_reinit_search_ctx(ctx);
911 		err = ntfs_attr_lookup(AT_INDEX_ROOT, name, name_len, CASE_SENSITIVE,
912 				0, NULL, 0, ctx);
913 		if (unlikely(err)) {
914 			if (err == -ENOENT)
915 				ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is missing.");
916 			goto unm_err_out;
917 		}
918 		a = ctx->attr;
919 		/* Set up the state. */
920 		if (unlikely(a->non_resident)) {
921 			ntfs_error(vol->sb,
922 				"$INDEX_ROOT attribute is not resident.");
923 			goto unm_err_out;
924 		}
925 		/* Ensure the attribute name is placed before the value. */
926 		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
927 				le16_to_cpu(a->data.resident.value_offset)))) {
928 			ntfs_error(vol->sb,
929 				"$INDEX_ROOT attribute name is placed after the attribute value.");
930 			goto unm_err_out;
931 		}
932 		/*
933 		 * Compressed/encrypted index root just means that the newly
934 		 * created files in that directory should be created compressed/
935 		 * encrypted. However index root cannot be both compressed and
936 		 * encrypted.
937 		 */
938 		if (a->flags & ATTR_COMPRESSION_MASK) {
939 			NInoSetCompressed(ni);
940 			ni->flags |= FILE_ATTR_COMPRESSED;
941 		}
942 		if (a->flags & ATTR_IS_ENCRYPTED) {
943 			if (a->flags & ATTR_COMPRESSION_MASK) {
944 				ntfs_error(vi->i_sb, "Found encrypted and compressed attribute.");
945 				goto unm_err_out;
946 			}
947 			NInoSetEncrypted(ni);
948 			ni->flags |= FILE_ATTR_ENCRYPTED;
949 		}
950 		if (a->flags & ATTR_IS_SPARSE) {
951 			NInoSetSparse(ni);
952 			ni->flags |= FILE_ATTR_SPARSE_FILE;
953 		}
954 		ir = (struct index_root *)((u8 *)a +
955 				le16_to_cpu(a->data.resident.value_offset));
956 		if (ntfs_index_root_inconsistent(ni->vol, a, ir, ni->mft_no) ||
957 		    ntfs_index_entries_inconsistent(ni->vol, &ir->index,
958 						    ir->collation_rule, ni->mft_no)) {
959 			ntfs_error(vi->i_sb, "Directory index is corrupt.");
960 			goto unm_err_out;
961 		}
962 
963 		if (extend_sys) {
964 			if (ir->type) {
965 				ntfs_error(vi->i_sb, "Indexed attribute is not zero.");
966 				goto unm_err_out;
967 			}
968 		} else {
969 			if (ir->type != AT_FILE_NAME) {
970 				ntfs_error(vi->i_sb, "Indexed attribute is not $FILE_NAME.");
971 				goto unm_err_out;
972 			}
973 
974 			if (ir->collation_rule != COLLATION_FILE_NAME) {
975 				ntfs_error(vi->i_sb,
976 					"Index collation rule is not COLLATION_FILE_NAME.");
977 				goto unm_err_out;
978 			}
979 		}
980 
981 		ni->itype.index.collation_rule = ir->collation_rule;
982 		ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
983 		if (ni->itype.index.block_size &
984 				(ni->itype.index.block_size - 1)) {
985 			ntfs_error(vi->i_sb, "Index block size (%u) is not a power of two.",
986 					ni->itype.index.block_size);
987 			goto unm_err_out;
988 		}
989 		if (ni->itype.index.block_size > PAGE_SIZE) {
990 			ntfs_error(vi->i_sb,
991 				"Index block size (%u) > PAGE_SIZE (%ld) is not supported.",
992 				ni->itype.index.block_size,
993 				PAGE_SIZE);
994 			err = -EOPNOTSUPP;
995 			goto unm_err_out;
996 		}
997 		if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
998 			ntfs_error(vi->i_sb,
999 				"Index block size (%u) < NTFS_BLOCK_SIZE (%i) is not supported.",
1000 				ni->itype.index.block_size,
1001 				NTFS_BLOCK_SIZE);
1002 			err = -EOPNOTSUPP;
1003 			goto unm_err_out;
1004 		}
1005 		ni->itype.index.block_size_bits =
1006 				ffs(ni->itype.index.block_size) - 1;
1007 		/* Determine the size of a vcn in the directory index. */
1008 		if (vol->cluster_size <= ni->itype.index.block_size) {
1009 			ni->itype.index.vcn_size = vol->cluster_size;
1010 			ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
1011 		} else {
1012 			ni->itype.index.vcn_size = vol->sector_size;
1013 			ni->itype.index.vcn_size_bits = vol->sector_size_bits;
1014 		}
1015 
1016 		/* Setup the index allocation attribute, even if not present. */
1017 		ni->type = AT_INDEX_ROOT;
1018 		ni->name = name;
1019 		ni->name_len = name_len;
1020 		vi->i_size = ni->initialized_size = ni->data_size =
1021 			le32_to_cpu(a->data.resident.value_length);
1022 		ni->allocated_size = (ni->data_size + 7) & ~7;
1023 		/* We are done with the mft record, so we release it. */
1024 		ntfs_attr_put_search_ctx(ctx);
1025 		unmap_mft_record(ni);
1026 		m = NULL;
1027 		ctx = NULL;
1028 		/* Setup the operations for this inode. */
1029 		ntfs_set_vfs_operations(vi, vi->i_mode, 0);
1030 		if (ir->index.flags & LARGE_INDEX)
1031 			NInoSetIndexAllocPresent(ni);
1032 	} else {
1033 		/* It is a file. */
1034 		ntfs_attr_reinit_search_ctx(ctx);
1035 
1036 		/* Setup the data attribute, even if not present. */
1037 		ni->type = AT_DATA;
1038 		ni->name = AT_UNNAMED;
1039 		ni->name_len = 0;
1040 
1041 		/* Find first extent of the unnamed data attribute. */
1042 		err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, 0, NULL, 0, ctx);
1043 		if (unlikely(err)) {
1044 			vi->i_size = ni->initialized_size =
1045 					ni->allocated_size = 0;
1046 			if (err != -ENOENT) {
1047 				ntfs_error(vi->i_sb, "Failed to lookup $DATA attribute.");
1048 				goto unm_err_out;
1049 			}
1050 			/*
1051 			 * FILE_Secure does not have an unnamed $DATA
1052 			 * attribute, so we special case it here.
1053 			 */
1054 			if (vi->i_ino == FILE_Secure)
1055 				goto no_data_attr_special_case;
1056 			/*
1057 			 * Most if not all the system files in the $Extend
1058 			 * system directory do not have unnamed data
1059 			 * attributes so we need to check if the parent
1060 			 * directory of the file is FILE_Extend and if it is
1061 			 * ignore this error. To do this we need to get the
1062 			 * name of this inode from the mft record as the name
1063 			 * contains the back reference to the parent directory.
1064 			 */
1065 			extend_sys = ntfs_is_extended_system_file(ctx);
1066 			if (extend_sys > 0) {
1067 				if (m->flags & MFT_RECORD_IS_VIEW_INDEX) {
1068 					if (extend_sys == 2) {
1069 						name = reparse_index_name;
1070 						name_len = 2;
1071 						goto view_index_meta;
1072 					} else if (extend_sys == 3) {
1073 						name = objid_index_name;
1074 						name_len = 2;
1075 						goto view_index_meta;
1076 					}
1077 				}
1078 				goto no_data_attr_special_case;
1079 			}
1080 
1081 			err = extend_sys;
1082 			ntfs_error(vi->i_sb, "$DATA attribute is missing, err : %d", err);
1083 			goto unm_err_out;
1084 		}
1085 		a = ctx->attr;
1086 		/* Setup the state. */
1087 		if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1088 			if (a->flags & ATTR_COMPRESSION_MASK) {
1089 				NInoSetCompressed(ni);
1090 				ni->flags |= FILE_ATTR_COMPRESSED;
1091 				if (vol->cluster_size > 4096) {
1092 					ntfs_error(vi->i_sb,
1093 						"Found compressed data but compression is disabled due to cluster size (%i) > 4kiB.",
1094 						vol->cluster_size);
1095 					goto unm_err_out;
1096 				}
1097 				if ((a->flags & ATTR_COMPRESSION_MASK)
1098 						!= ATTR_IS_COMPRESSED) {
1099 					ntfs_error(vi->i_sb,
1100 						"Found unknown compression method or corrupt file.");
1101 					goto unm_err_out;
1102 				}
1103 			}
1104 			if (a->flags & ATTR_IS_SPARSE) {
1105 				NInoSetSparse(ni);
1106 				ni->flags |= FILE_ATTR_SPARSE_FILE;
1107 			}
1108 		}
1109 		if (a->flags & ATTR_IS_ENCRYPTED) {
1110 			if (NInoCompressed(ni)) {
1111 				ntfs_error(vi->i_sb, "Found encrypted and compressed data.");
1112 				goto unm_err_out;
1113 			}
1114 			NInoSetEncrypted(ni);
1115 			ni->flags |= FILE_ATTR_ENCRYPTED;
1116 		}
1117 		if (a->non_resident) {
1118 			NInoSetNonResident(ni);
1119 			if (NInoCompressed(ni) || NInoSparse(ni)) {
1120 				if (NInoCompressed(ni) &&
1121 				    a->data.non_resident.compression_unit != 4) {
1122 					ntfs_error(vi->i_sb,
1123 						"Found non-standard compression unit (%u instead of 4).  Cannot handle this.",
1124 						a->data.non_resident.compression_unit);
1125 					err = -EOPNOTSUPP;
1126 					goto unm_err_out;
1127 				}
1128 
1129 				if (NInoSparse(ni) &&
1130 				    a->data.non_resident.compression_unit &&
1131 				    a->data.non_resident.compression_unit !=
1132 				     vol->sparse_compression_unit) {
1133 					ntfs_error(vi->i_sb,
1134 						   "Found non-standard compression unit (%u instead of 0 or %d).  Cannot handle this.",
1135 						   a->data.non_resident.compression_unit,
1136 						   vol->sparse_compression_unit);
1137 					err = -EOPNOTSUPP;
1138 					goto unm_err_out;
1139 				}
1140 
1141 
1142 				if (a->data.non_resident.compression_unit) {
1143 					ni->itype.compressed.block_size = 1U <<
1144 							(a->data.non_resident.compression_unit +
1145 							vol->cluster_size_bits);
1146 					ni->itype.compressed.block_size_bits =
1147 							ffs(ni->itype.compressed.block_size) - 1;
1148 					ni->itype.compressed.block_clusters =
1149 							1U << a->data.non_resident.compression_unit;
1150 				} else {
1151 					ni->itype.compressed.block_size = 0;
1152 					ni->itype.compressed.block_size_bits =
1153 							0;
1154 					ni->itype.compressed.block_clusters =
1155 							0;
1156 				}
1157 				ni->itype.compressed.size = le64_to_cpu(
1158 						a->data.non_resident.compressed_size);
1159 			}
1160 			if (a->data.non_resident.lowest_vcn) {
1161 				ntfs_error(vi->i_sb,
1162 					"First extent of $DATA attribute has non zero lowest_vcn.");
1163 				goto unm_err_out;
1164 			}
1165 			vi->i_size = ni->data_size = le64_to_cpu(a->data.non_resident.data_size);
1166 			ni->initialized_size = le64_to_cpu(a->data.non_resident.initialized_size);
1167 			ni->allocated_size = le64_to_cpu(a->data.non_resident.allocated_size);
1168 		} else { /* Resident attribute. */
1169 			vi->i_size = ni->data_size = ni->initialized_size = le32_to_cpu(
1170 					a->data.resident.value_length);
1171 			ni->allocated_size = le32_to_cpu(a->length) -
1172 					le16_to_cpu(
1173 					a->data.resident.value_offset);
1174 			if (vi->i_size > ni->allocated_size) {
1175 				ntfs_error(vi->i_sb,
1176 					"Resident data attribute is corrupt (size exceeds allocation).");
1177 				goto unm_err_out;
1178 			}
1179 		}
1180 no_data_attr_special_case:
1181 		/* We are done with the mft record, so we release it. */
1182 		ntfs_attr_put_search_ctx(ctx);
1183 		unmap_mft_record(ni);
1184 		m = NULL;
1185 		ctx = NULL;
1186 		/* Setup the operations for this inode. */
1187 		ntfs_set_vfs_operations(vi, vi->i_mode, dev);
1188 	}
1189 
1190 	if (NVolSysImmutable(vol) && (ni->flags & FILE_ATTR_SYSTEM) &&
1191 	    !S_ISFIFO(vi->i_mode) && !S_ISSOCK(vi->i_mode) && !S_ISLNK(vi->i_mode))
1192 		vi->i_flags |= S_IMMUTABLE;
1193 
1194 	/*
1195 	 * The number of 512-byte blocks used on disk (for stat). This is in so
1196 	 * far inaccurate as it doesn't account for any named streams or other
1197 	 * special non-resident attributes, but that is how Windows works, too,
1198 	 * so we are at least consistent with Windows, if not entirely
1199 	 * consistent with the Linux Way. Doing it the Linux Way would cause a
1200 	 * significant slowdown as it would involve iterating over all
1201 	 * attributes in the mft record and adding the allocated/compressed
1202 	 * sizes of all non-resident attributes present to give us the Linux
1203 	 * correct size that should go into i_blocks (after division by 512).
1204 	 */
1205 	if (S_ISREG(vi->i_mode) && (NInoCompressed(ni) || NInoSparse(ni)))
1206 		vi->i_blocks = ni->itype.compressed.size >> 9;
1207 	else
1208 		vi->i_blocks = ni->allocated_size >> 9;
1209 
1210 	if (S_ISLNK(vi->i_mode) && ni->target)
1211 		vi->i_size = strlen(ni->target);
1212 
1213 	ntfs_debug("Done.");
1214 	return 0;
1215 unm_err_out:
1216 	if (!err)
1217 		err = -EIO;
1218 	if (ctx)
1219 		ntfs_attr_put_search_ctx(ctx);
1220 	if (m)
1221 		unmap_mft_record(ni);
1222 err_out:
1223 	if (err != -EOPNOTSUPP && err != -ENOMEM && vol_err == true) {
1224 		ntfs_error(vol->sb,
1225 			"Failed with error code %i.  Marking corrupt inode 0x%llx as bad.  Run chkdsk.",
1226 			err, ni->mft_no);
1227 		NVolSetErrors(vol);
1228 	}
1229 	return err;
1230 }
1231 
1232 /*
1233  * ntfs_read_locked_attr_inode - read an attribute inode from its base inode
1234  * @base_vi:	base inode
1235  * @vi:		attribute inode to read
1236  *
1237  * ntfs_read_locked_attr_inode() is called from ntfs_attr_iget() to read the
1238  * attribute inode described by @vi into memory from the base mft record
1239  * described by @base_ni.
1240  *
1241  * ntfs_read_locked_attr_inode() maps, pins and locks the base inode for
1242  * reading and looks up the attribute described by @vi before setting up the
1243  * necessary fields in @vi as well as initializing the ntfs inode.
1244  *
1245  * Q: What locks are held when the function is called?
1246  * A: i_state has I_NEW set, hence the inode is locked, also
1247  *    i_count is set to 1, so it is not going to go away
1248  *
1249  * Return 0 on success and -errno on error.
1250  *
1251  * Note this cannot be called for AT_INDEX_ALLOCATION.
1252  */
1253 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1254 {
1255 	struct ntfs_volume *vol = NTFS_SB(vi->i_sb);
1256 	struct ntfs_inode *ni = NTFS_I(vi), *base_ni = NTFS_I(base_vi);
1257 	struct mft_record *m;
1258 	struct attr_record *a;
1259 	struct ntfs_attr_search_ctx *ctx;
1260 	int err = 0;
1261 
1262 	ntfs_debug("Entering for i_ino 0x%llx.", ni->mft_no);
1263 
1264 	ntfs_init_big_inode(vi);
1265 
1266 	/* Just mirror the values from the base inode. */
1267 	vi->i_uid	= base_vi->i_uid;
1268 	vi->i_gid	= base_vi->i_gid;
1269 	set_nlink(vi, base_vi->i_nlink);
1270 	inode_set_mtime_to_ts(vi, inode_get_mtime(base_vi));
1271 	inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
1272 	inode_set_atime_to_ts(vi, inode_get_atime(base_vi));
1273 	vi->i_generation = ni->seq_no = base_ni->seq_no;
1274 
1275 	/* Set inode type to zero but preserve permissions. */
1276 	vi->i_mode	= base_vi->i_mode & ~S_IFMT;
1277 
1278 	m = map_mft_record(base_ni);
1279 	if (IS_ERR(m)) {
1280 		err = PTR_ERR(m);
1281 		goto err_out;
1282 	}
1283 	ctx = ntfs_attr_get_search_ctx(base_ni, m);
1284 	if (!ctx) {
1285 		err = -ENOMEM;
1286 		goto unm_err_out;
1287 	}
1288 	/* Find the attribute. */
1289 	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1290 			CASE_SENSITIVE, 0, NULL, 0, ctx);
1291 	if (unlikely(err))
1292 		goto unm_err_out;
1293 	a = ctx->attr;
1294 	if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1295 		if (a->flags & ATTR_COMPRESSION_MASK) {
1296 			NInoSetCompressed(ni);
1297 			ni->flags |= FILE_ATTR_COMPRESSED;
1298 			if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
1299 					ni->name_len)) {
1300 				ntfs_error(vi->i_sb,
1301 					   "Found compressed non-data or named data attribute.");
1302 				goto unm_err_out;
1303 			}
1304 			if (vol->cluster_size > 4096) {
1305 				ntfs_error(vi->i_sb,
1306 					"Found compressed attribute but compression is disabled due to cluster size (%i) > 4kiB.",
1307 					vol->cluster_size);
1308 				goto unm_err_out;
1309 			}
1310 			if ((a->flags & ATTR_COMPRESSION_MASK) !=
1311 					ATTR_IS_COMPRESSED) {
1312 				ntfs_error(vi->i_sb, "Found unknown compression method.");
1313 				goto unm_err_out;
1314 			}
1315 		}
1316 		/*
1317 		 * The compressed/sparse flag set in an index root just means
1318 		 * to compress all files.
1319 		 */
1320 		if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1321 			ntfs_error(vi->i_sb,
1322 				"Found mst protected attribute but the attribute is %s.",
1323 				NInoCompressed(ni) ? "compressed" : "sparse");
1324 			goto unm_err_out;
1325 		}
1326 		if (a->flags & ATTR_IS_SPARSE) {
1327 			NInoSetSparse(ni);
1328 			ni->flags |= FILE_ATTR_SPARSE_FILE;
1329 		}
1330 	}
1331 	if (a->flags & ATTR_IS_ENCRYPTED) {
1332 		if (NInoCompressed(ni)) {
1333 			ntfs_error(vi->i_sb, "Found encrypted and compressed data.");
1334 			goto unm_err_out;
1335 		}
1336 		/*
1337 		 * The encryption flag set in an index root just means to
1338 		 * encrypt all files.
1339 		 */
1340 		if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1341 			ntfs_error(vi->i_sb,
1342 				"Found mst protected attribute but the attribute is encrypted.");
1343 			goto unm_err_out;
1344 		}
1345 		if (ni->type != AT_DATA) {
1346 			ntfs_error(vi->i_sb,
1347 				"Found encrypted non-data attribute.");
1348 			goto unm_err_out;
1349 		}
1350 		NInoSetEncrypted(ni);
1351 		ni->flags |= FILE_ATTR_ENCRYPTED;
1352 	}
1353 	if (!a->non_resident) {
1354 		/* Ensure the attribute name is placed before the value. */
1355 		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1356 				le16_to_cpu(a->data.resident.value_offset)))) {
1357 			ntfs_error(vol->sb,
1358 				"Attribute name is placed after the attribute value.");
1359 			goto unm_err_out;
1360 		}
1361 		if (NInoMstProtected(ni)) {
1362 			ntfs_error(vi->i_sb,
1363 				"Found mst protected attribute but the attribute is resident.");
1364 			goto unm_err_out;
1365 		}
1366 		vi->i_size = ni->initialized_size = ni->data_size = le32_to_cpu(
1367 				a->data.resident.value_length);
1368 		ni->allocated_size = le32_to_cpu(a->length) -
1369 				le16_to_cpu(a->data.resident.value_offset);
1370 		if (vi->i_size > ni->allocated_size) {
1371 			ntfs_error(vi->i_sb,
1372 				"Resident attribute is corrupt (size exceeds allocation).");
1373 			goto unm_err_out;
1374 		}
1375 	} else {
1376 		NInoSetNonResident(ni);
1377 		/*
1378 		 * Ensure the attribute name is placed before the mapping pairs
1379 		 * array.
1380 		 */
1381 		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1382 				le16_to_cpu(
1383 				a->data.non_resident.mapping_pairs_offset)))) {
1384 			ntfs_error(vol->sb,
1385 				"Attribute name is placed after the mapping pairs array.");
1386 			goto unm_err_out;
1387 		}
1388 		if (NInoCompressed(ni) || NInoSparse(ni)) {
1389 			if (NInoCompressed(ni) && a->data.non_resident.compression_unit != 4) {
1390 				ntfs_error(vi->i_sb,
1391 					"Found non-standard compression unit (%u instead of 4).  Cannot handle this.",
1392 					a->data.non_resident.compression_unit);
1393 				err = -EOPNOTSUPP;
1394 				goto unm_err_out;
1395 			}
1396 			if (a->data.non_resident.compression_unit) {
1397 				ni->itype.compressed.block_size = 1U <<
1398 						(a->data.non_resident.compression_unit +
1399 						vol->cluster_size_bits);
1400 				ni->itype.compressed.block_size_bits =
1401 						ffs(ni->itype.compressed.block_size) - 1;
1402 				ni->itype.compressed.block_clusters = 1U <<
1403 						a->data.non_resident.compression_unit;
1404 			} else {
1405 				ni->itype.compressed.block_size = 0;
1406 				ni->itype.compressed.block_size_bits = 0;
1407 				ni->itype.compressed.block_clusters = 0;
1408 			}
1409 			ni->itype.compressed.size = le64_to_cpu(
1410 					a->data.non_resident.compressed_size);
1411 		}
1412 		if (a->data.non_resident.lowest_vcn) {
1413 			ntfs_error(vi->i_sb, "First extent of attribute has non-zero lowest_vcn.");
1414 			goto unm_err_out;
1415 		}
1416 		vi->i_size = ni->data_size = le64_to_cpu(a->data.non_resident.data_size);
1417 		ni->initialized_size = le64_to_cpu(a->data.non_resident.initialized_size);
1418 		ni->allocated_size = le64_to_cpu(a->data.non_resident.allocated_size);
1419 	}
1420 	vi->i_mapping->a_ops = &ntfs_aops;
1421 	if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT)
1422 		vi->i_blocks = ni->itype.compressed.size >> 9;
1423 	else
1424 		vi->i_blocks = ni->allocated_size >> 9;
1425 	/*
1426 	 * Make sure the base inode does not go away and attach it to the
1427 	 * attribute inode.
1428 	 */
1429 	if (!igrab(base_vi)) {
1430 		err = -ENOENT;
1431 		goto unm_err_out;
1432 	}
1433 	ni->ext.base_ntfs_ino = base_ni;
1434 	ni->nr_extents = -1;
1435 
1436 	ntfs_attr_put_search_ctx(ctx);
1437 	unmap_mft_record(base_ni);
1438 
1439 	ntfs_debug("Done.");
1440 	return 0;
1441 
1442 unm_err_out:
1443 	if (!err)
1444 		err = -EIO;
1445 	if (ctx)
1446 		ntfs_attr_put_search_ctx(ctx);
1447 	unmap_mft_record(base_ni);
1448 err_out:
1449 	if (err != -ENOENT)
1450 		ntfs_error(vol->sb,
1451 			"Failed with error code %i while reading attribute inode (mft_no 0x%llx, type 0x%x, name_len %i).  Marking corrupt inode and base inode 0x%llx as bad.  Run chkdsk.",
1452 			err, ni->mft_no, ni->type, ni->name_len,
1453 			base_ni->mft_no);
1454 	if (err != -ENOENT && err != -ENOMEM)
1455 		NVolSetErrors(vol);
1456 	return err;
1457 }
1458 
1459 /*
1460  * ntfs_read_locked_index_inode - read an index inode from its base inode
1461  * @base_vi:	base inode
1462  * @vi:		index inode to read
1463  *
1464  * ntfs_read_locked_index_inode() is called from ntfs_index_iget() to read the
1465  * index inode described by @vi into memory from the base mft record described
1466  * by @base_ni.
1467  *
1468  * ntfs_read_locked_index_inode() maps, pins and locks the base inode for
1469  * reading and looks up the attributes relating to the index described by @vi
1470  * before setting up the necessary fields in @vi as well as initializing the
1471  * ntfs inode.
1472  *
1473  * Note, index inodes are essentially attribute inodes (NInoAttr() is true)
1474  * with the attribute type set to AT_INDEX_ALLOCATION.  Apart from that, they
1475  * are setup like directory inodes since directories are a special case of
1476  * indices ao they need to be treated in much the same way.  Most importantly,
1477  * for small indices the index allocation attribute might not actually exist.
1478  * However, the index root attribute always exists but this does not need to
1479  * have an inode associated with it and this is why we define a new inode type
1480  * index.  Also, like for directories, we need to have an attribute inode for
1481  * the bitmap attribute corresponding to the index allocation attribute and we
1482  * can store this in the appropriate field of the inode, just like we do for
1483  * normal directory inodes.
1484  *
1485  * Q: What locks are held when the function is called?
1486  * A: i_state has I_NEW set, hence the inode is locked, also
1487  *    i_count is set to 1, so it is not going to go away
1488  *
1489  * Return 0 on success and -errno on error.
1490  */
1491 static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1492 {
1493 	loff_t bvi_size;
1494 	struct ntfs_volume *vol = NTFS_SB(vi->i_sb);
1495 	struct ntfs_inode *ni = NTFS_I(vi), *base_ni = NTFS_I(base_vi), *bni;
1496 	struct inode *bvi;
1497 	struct mft_record *m;
1498 	struct attr_record *a;
1499 	struct ntfs_attr_search_ctx *ctx;
1500 	struct index_root *ir;
1501 	int err = 0;
1502 
1503 	ntfs_debug("Entering for i_ino 0x%llx.", ni->mft_no);
1504 	lockdep_assert_held(&base_ni->mrec_lock);
1505 
1506 	ntfs_init_big_inode(vi);
1507 	/* Just mirror the values from the base inode. */
1508 	vi->i_uid	= base_vi->i_uid;
1509 	vi->i_gid	= base_vi->i_gid;
1510 	set_nlink(vi, base_vi->i_nlink);
1511 	inode_set_mtime_to_ts(vi, inode_get_mtime(base_vi));
1512 	inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
1513 	inode_set_atime_to_ts(vi, inode_get_atime(base_vi));
1514 	vi->i_generation = ni->seq_no = base_ni->seq_no;
1515 	/* Set inode type to zero but preserve permissions. */
1516 	vi->i_mode	= base_vi->i_mode & ~S_IFMT;
1517 	/* Map the mft record for the base inode. */
1518 	m = map_mft_record(base_ni);
1519 	if (IS_ERR(m)) {
1520 		err = PTR_ERR(m);
1521 		goto err_out;
1522 	}
1523 	ctx = ntfs_attr_get_search_ctx(base_ni, m);
1524 	if (!ctx) {
1525 		err = -ENOMEM;
1526 		goto unm_err_out;
1527 	}
1528 	/* Find the index root attribute. */
1529 	err = ntfs_attr_lookup(AT_INDEX_ROOT, ni->name, ni->name_len,
1530 			CASE_SENSITIVE, 0, NULL, 0, ctx);
1531 	if (unlikely(err)) {
1532 		if (err == -ENOENT)
1533 			ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is missing.");
1534 		goto unm_err_out;
1535 	}
1536 	a = ctx->attr;
1537 	/* Set up the state. */
1538 	if (unlikely(a->non_resident)) {
1539 		ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident.");
1540 		goto unm_err_out;
1541 	}
1542 	/* Ensure the attribute name is placed before the value. */
1543 	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1544 			le16_to_cpu(a->data.resident.value_offset)))) {
1545 		ntfs_error(vol->sb,
1546 			"$INDEX_ROOT attribute name is placed after the attribute value.");
1547 		goto unm_err_out;
1548 	}
1549 
1550 	ir = (struct index_root *)((u8 *)a + le16_to_cpu(a->data.resident.value_offset));
1551 	if (ntfs_index_root_inconsistent(vol, a, ir, ni->mft_no) ||
1552 	    ntfs_index_entries_inconsistent(vol, &ir->index,
1553 					    ir->collation_rule, ni->mft_no)) {
1554 		ntfs_error(vi->i_sb, "Index is corrupt.");
1555 		goto unm_err_out;
1556 	}
1557 
1558 	ni->itype.index.collation_rule = ir->collation_rule;
1559 	ntfs_debug("Index collation rule is 0x%x.",
1560 			le32_to_cpu(ir->collation_rule));
1561 	ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
1562 	if (!is_power_of_2(ni->itype.index.block_size)) {
1563 		ntfs_error(vi->i_sb, "Index block size (%u) is not a power of two.",
1564 				ni->itype.index.block_size);
1565 		goto unm_err_out;
1566 	}
1567 	if (ni->itype.index.block_size > PAGE_SIZE) {
1568 		ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_SIZE (%ld) is not supported.",
1569 				ni->itype.index.block_size, PAGE_SIZE);
1570 		err = -EOPNOTSUPP;
1571 		goto unm_err_out;
1572 	}
1573 	if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
1574 		ntfs_error(vi->i_sb,
1575 				"Index block size (%u) < NTFS_BLOCK_SIZE (%i) is not supported.",
1576 				ni->itype.index.block_size, NTFS_BLOCK_SIZE);
1577 		err = -EOPNOTSUPP;
1578 		goto unm_err_out;
1579 	}
1580 	ni->itype.index.block_size_bits = ffs(ni->itype.index.block_size) - 1;
1581 	/* Determine the size of a vcn in the index. */
1582 	if (vol->cluster_size <= ni->itype.index.block_size) {
1583 		ni->itype.index.vcn_size = vol->cluster_size;
1584 		ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
1585 	} else {
1586 		ni->itype.index.vcn_size = vol->sector_size;
1587 		ni->itype.index.vcn_size_bits = vol->sector_size_bits;
1588 	}
1589 
1590 	/* Find index allocation attribute. */
1591 	ntfs_attr_reinit_search_ctx(ctx);
1592 	err = ntfs_attr_lookup(AT_INDEX_ALLOCATION, ni->name, ni->name_len,
1593 			CASE_SENSITIVE, 0, NULL, 0, ctx);
1594 	if (unlikely(err)) {
1595 		if (err == -ENOENT) {
1596 			/* No index allocation. */
1597 			vi->i_size = ni->initialized_size = ni->allocated_size = 0;
1598 			/* We are done with the mft record, so we release it. */
1599 			ntfs_attr_put_search_ctx(ctx);
1600 			unmap_mft_record(base_ni);
1601 			m = NULL;
1602 			ctx = NULL;
1603 			goto skip_large_index_stuff;
1604 		} else
1605 			ntfs_error(vi->i_sb, "Failed to lookup $INDEX_ALLOCATION attribute.");
1606 		goto unm_err_out;
1607 	}
1608 	NInoSetIndexAllocPresent(ni);
1609 	NInoSetNonResident(ni);
1610 	ni->type = AT_INDEX_ALLOCATION;
1611 
1612 	a = ctx->attr;
1613 	if (!a->non_resident) {
1614 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is resident.");
1615 		goto unm_err_out;
1616 	}
1617 	/*
1618 	 * Ensure the attribute name is placed before the mapping pairs array.
1619 	 */
1620 	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1621 			le16_to_cpu(a->data.non_resident.mapping_pairs_offset)))) {
1622 		ntfs_error(vol->sb,
1623 			"$INDEX_ALLOCATION attribute name is placed after the mapping pairs array.");
1624 		goto unm_err_out;
1625 	}
1626 	if (a->flags & ATTR_IS_ENCRYPTED) {
1627 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is encrypted.");
1628 		goto unm_err_out;
1629 	}
1630 	if (a->flags & ATTR_IS_SPARSE) {
1631 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse.");
1632 		goto unm_err_out;
1633 	}
1634 	if (a->flags & ATTR_COMPRESSION_MASK) {
1635 		ntfs_error(vi->i_sb,
1636 			"$INDEX_ALLOCATION attribute is compressed.");
1637 		goto unm_err_out;
1638 	}
1639 	if (a->data.non_resident.lowest_vcn) {
1640 		ntfs_error(vi->i_sb,
1641 			"First extent of $INDEX_ALLOCATION attribute has non zero lowest_vcn.");
1642 		goto unm_err_out;
1643 	}
1644 	vi->i_size = ni->data_size = le64_to_cpu(a->data.non_resident.data_size);
1645 	ni->initialized_size = le64_to_cpu(a->data.non_resident.initialized_size);
1646 	ni->allocated_size = le64_to_cpu(a->data.non_resident.allocated_size);
1647 	/*
1648 	 * We are done with the mft record, so we release it.  Otherwise
1649 	 * we would deadlock in ntfs_attr_iget().
1650 	 */
1651 	ntfs_attr_put_search_ctx(ctx);
1652 	unmap_mft_record(base_ni);
1653 	m = NULL;
1654 	ctx = NULL;
1655 	/* Get the index bitmap attribute inode. */
1656 	bvi = ntfs_attr_iget(base_vi, AT_BITMAP, ni->name, ni->name_len);
1657 	if (IS_ERR(bvi)) {
1658 		ntfs_error(vi->i_sb, "Failed to get bitmap attribute.");
1659 		err = PTR_ERR(bvi);
1660 		goto unm_err_out;
1661 	}
1662 	bni = NTFS_I(bvi);
1663 	if (NInoCompressed(bni) || NInoEncrypted(bni) ||
1664 			NInoSparse(bni)) {
1665 		ntfs_error(vi->i_sb,
1666 			"$BITMAP attribute is compressed and/or encrypted and/or sparse.");
1667 		goto iput_unm_err_out;
1668 	}
1669 	/* Consistency check bitmap size vs. index allocation size. */
1670 	bvi_size = i_size_read(bvi);
1671 	if ((bvi_size << 3) < (vi->i_size >> ni->itype.index.block_size_bits)) {
1672 		ntfs_error(vi->i_sb,
1673 			"Index bitmap too small (0x%llx) for index allocation (0x%llx).",
1674 			bvi_size << 3, vi->i_size);
1675 		goto iput_unm_err_out;
1676 	}
1677 	iput(bvi);
1678 skip_large_index_stuff:
1679 	/* Setup the operations for this index inode. */
1680 	ntfs_set_vfs_operations(vi, S_IFDIR, 0);
1681 	vi->i_blocks = ni->allocated_size >> 9;
1682 	/*
1683 	 * Make sure the base inode doesn't go away and attach it to the
1684 	 * index inode.
1685 	 */
1686 	if (!igrab(base_vi))
1687 		goto unm_err_out;
1688 	ni->ext.base_ntfs_ino = base_ni;
1689 	ni->nr_extents = -1;
1690 
1691 	ntfs_debug("Done.");
1692 	return 0;
1693 iput_unm_err_out:
1694 	iput(bvi);
1695 unm_err_out:
1696 	if (!err)
1697 		err = -EIO;
1698 	if (ctx)
1699 		ntfs_attr_put_search_ctx(ctx);
1700 	if (m)
1701 		unmap_mft_record(base_ni);
1702 err_out:
1703 	ntfs_error(vi->i_sb,
1704 		"Failed with error code %i while reading index inode (mft_no 0x%llx, name_len %i.",
1705 		err, ni->mft_no, ni->name_len);
1706 	if (err != -EOPNOTSUPP && err != -ENOMEM)
1707 		NVolSetErrors(vol);
1708 	return err;
1709 }
1710 
1711 /*
1712  * load_attribute_list_mount - load an attribute list into memory
1713  * @vol:		ntfs volume from which to read
1714  * @rl:			runlist of the attribute list
1715  * @al_start:		destination buffer
1716  * @size:		size of the destination buffer in bytes
1717  * @initialized_size:	initialized size of the attribute list
1718  *
1719  * Walk the runlist @rl and load all clusters from it copying them into
1720  * the linear buffer @al. The maximum number of bytes copied to @al is @size
1721  * bytes. Note, @size does not need to be a multiple of the cluster size. If
1722  * @initialized_size is less than @size, the region in @al between
1723  * @initialized_size and @size will be zeroed and not read from disk.
1724  *
1725  * Return 0 on success or -errno on error.
1726  */
1727 static int load_attribute_list_mount(struct ntfs_volume *vol,
1728 		struct runlist_element *rl, u8 *al_start, const s64 size,
1729 		const s64 initialized_size)
1730 {
1731 	s64 lcn;
1732 	u8 *al = al_start;
1733 	u8 *al_end = al + initialized_size;
1734 	struct super_block *sb;
1735 	int err = 0;
1736 	loff_t rl_byte_off, rl_byte_len;
1737 
1738 	ntfs_debug("Entering.");
1739 	if (!vol || !rl || !al || size <= 0 || initialized_size < 0 ||
1740 			initialized_size > size)
1741 		return -EINVAL;
1742 	if (!initialized_size) {
1743 		memset(al, 0, size);
1744 		return 0;
1745 	}
1746 	sb = vol->sb;
1747 
1748 	/* Read all clusters specified by the runlist one run at a time. */
1749 	while (rl->length) {
1750 		lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn);
1751 		ntfs_debug("Reading vcn = 0x%llx, lcn = 0x%llx.",
1752 				(unsigned long long)rl->vcn,
1753 				(unsigned long long)lcn);
1754 		/* The attribute list cannot be sparse. */
1755 		if (lcn < 0) {
1756 			ntfs_error(sb, "ntfs_rl_vcn_to_lcn() failed. Cannot read attribute list.");
1757 			return -EIO;
1758 		}
1759 
1760 		rl_byte_off = ntfs_cluster_to_bytes(vol, lcn);
1761 		rl_byte_len = ntfs_cluster_to_bytes(vol, rl->length);
1762 
1763 		if (al + rl_byte_len > al_end)
1764 			rl_byte_len = al_end - al;
1765 
1766 		err = ntfs_bdev_read(sb->s_bdev, al, rl_byte_off,
1767 				   round_up(rl_byte_len, SECTOR_SIZE));
1768 		if (err) {
1769 			ntfs_error(sb, "Cannot read attribute list.");
1770 			return -EIO;
1771 		}
1772 
1773 		if (al + rl_byte_len >= al_end) {
1774 			if (initialized_size < size)
1775 				goto initialize;
1776 			goto done;
1777 		}
1778 
1779 		al += rl_byte_len;
1780 		rl++;
1781 	}
1782 	if (initialized_size < size) {
1783 initialize:
1784 		memset(al_start + initialized_size, 0, size - initialized_size);
1785 	}
1786 done:
1787 	return err;
1788 }
1789 
1790 /*
1791  * The MFT inode has special locking, so teach the lock validator
1792  * about this by splitting off the locking rules of the MFT from
1793  * the locking rules of other inodes. The MFT inode can never be
1794  * accessed from the VFS side (or even internally), only by the
1795  * map_mft functions.
1796  */
1797 static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key;
1798 
1799 /*
1800  * ntfs_read_inode_mount - special read_inode for mount time use only
1801  * @vi:		inode to read
1802  *
1803  * Read inode FILE_MFT at mount time, only called with super_block lock
1804  * held from within the read_super() code path.
1805  *
1806  * This function exists because when it is called the page cache for $MFT/$DATA
1807  * is not initialized and hence we cannot get at the contents of mft records
1808  * by calling map_mft_record*().
1809  *
1810  * Further it needs to cope with the circular references problem, i.e. cannot
1811  * load any attributes other than $ATTRIBUTE_LIST until $DATA is loaded, because
1812  * we do not know where the other extent mft records are yet and again, because
1813  * we cannot call map_mft_record*() yet.  Obviously this applies only when an
1814  * attribute list is actually present in $MFT inode.
1815  *
1816  * We solve these problems by starting with the $DATA attribute before anything
1817  * else and iterating using ntfs_attr_lookup($DATA) over all extents.  As each
1818  * extent is found, we ntfs_mapping_pairs_decompress() including the implied
1819  * ntfs_runlists_merge().  Each step of the iteration necessarily provides
1820  * sufficient information for the next step to complete.
1821  *
1822  * This should work but there are two possible pit falls (see inline comments
1823  * below), but only time will tell if they are real pits or just smoke...
1824  */
1825 int ntfs_read_inode_mount(struct inode *vi)
1826 {
1827 	s64 next_vcn, last_vcn, highest_vcn;
1828 	struct super_block *sb = vi->i_sb;
1829 	struct ntfs_volume *vol = NTFS_SB(sb);
1830 	struct ntfs_inode *ni = NTFS_I(vi);
1831 	struct mft_record *m = NULL;
1832 	struct attr_record *a;
1833 	struct ntfs_attr_search_ctx *ctx;
1834 	unsigned int i, nr_blocks;
1835 	int err;
1836 	size_t new_rl_count;
1837 
1838 	ntfs_debug("Entering.");
1839 
1840 	/* Initialize the ntfs specific part of @vi. */
1841 	ntfs_init_big_inode(vi);
1842 
1843 
1844 	/* Setup the data attribute. It is special as it is mst protected. */
1845 	NInoSetNonResident(ni);
1846 	NInoSetMstProtected(ni);
1847 	NInoSetSparseDisabled(ni);
1848 	ni->type = AT_DATA;
1849 	ni->name = AT_UNNAMED;
1850 	ni->name_len = 0;
1851 	/*
1852 	 * This sets up our little cheat allowing us to reuse the async read io
1853 	 * completion handler for directories.
1854 	 */
1855 	ni->itype.index.block_size = vol->mft_record_size;
1856 	ni->itype.index.block_size_bits = vol->mft_record_size_bits;
1857 
1858 	/* Very important! Needed to be able to call map_mft_record*(). */
1859 	vol->mft_ino = vi;
1860 
1861 	/* Allocate enough memory to read the first mft record. */
1862 	if (vol->mft_record_size > 64 * 1024) {
1863 		ntfs_error(sb, "Unsupported mft record size %i (max 64kiB).",
1864 				vol->mft_record_size);
1865 		goto err_out;
1866 	}
1867 
1868 	i = vol->mft_record_size;
1869 	if (i < sb->s_blocksize)
1870 		i = sb->s_blocksize;
1871 
1872 	m = kzalloc(i, GFP_NOFS);
1873 	if (!m) {
1874 		ntfs_error(sb, "Failed to allocate buffer for $MFT record 0.");
1875 		goto err_out;
1876 	}
1877 
1878 	/* Determine the first block of the $MFT/$DATA attribute. */
1879 	nr_blocks = ntfs_bytes_to_sector(vol, vol->mft_record_size);
1880 	if (!nr_blocks)
1881 		nr_blocks = 1;
1882 
1883 	/* Load $MFT/$DATA's first mft record. */
1884 	err = ntfs_bdev_read(sb->s_bdev, (char *)m,
1885 			     ntfs_cluster_to_bytes(vol, vol->mft_lcn), i);
1886 	if (err) {
1887 		ntfs_error(sb, "Device read failed.");
1888 		goto err_out;
1889 	}
1890 
1891 	if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
1892 		ntfs_error(sb, "Incorrect mft record size %u in superblock, should be %u.",
1893 				le32_to_cpu(m->bytes_allocated), vol->mft_record_size);
1894 		goto err_out;
1895 	}
1896 
1897 	/* Apply the mst fixups. */
1898 	if (post_read_mst_fixup((struct ntfs_record *)m, vol->mft_record_size)) {
1899 		ntfs_error(sb, "MST fixup failed. $MFT is corrupt.");
1900 		goto err_out;
1901 	}
1902 
1903 	if (ntfs_mft_record_check(vol, m, FILE_MFT)) {
1904 		ntfs_error(sb, "ntfs_mft_record_check failed. $MFT is corrupt.");
1905 		goto err_out;
1906 	}
1907 
1908 	/* Need this to sanity check attribute list references to $MFT. */
1909 	vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
1910 
1911 	/* Provides read_folio() for map_mft_record(). */
1912 	vi->i_mapping->a_ops = &ntfs_mft_aops;
1913 
1914 	ctx = ntfs_attr_get_search_ctx(ni, m);
1915 	if (!ctx) {
1916 		err = -ENOMEM;
1917 		goto err_out;
1918 	}
1919 
1920 	/* Find the attribute list attribute if present. */
1921 	err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
1922 	if (err) {
1923 		if (unlikely(err != -ENOENT)) {
1924 			ntfs_error(sb,
1925 				"Failed to lookup attribute list attribute. You should run chkdsk.");
1926 			goto put_err_out;
1927 		}
1928 	} else /* if (!err) */ {
1929 		struct attr_list_entry *al_entry, *next_al_entry;
1930 		u8 *al_end;
1931 		static const char *es = "  Not allowed.  $MFT is corrupt.  You should run chkdsk.";
1932 
1933 		ntfs_debug("Attribute list attribute found in $MFT.");
1934 		NInoSetAttrList(ni);
1935 		a = ctx->attr;
1936 		if (a->flags & ATTR_COMPRESSION_MASK) {
1937 			ntfs_error(sb,
1938 				"Attribute list attribute is compressed.%s",
1939 				es);
1940 			goto put_err_out;
1941 		}
1942 		if (a->flags & ATTR_IS_ENCRYPTED ||
1943 				a->flags & ATTR_IS_SPARSE) {
1944 			if (a->non_resident) {
1945 				ntfs_error(sb,
1946 					"Non-resident attribute list attribute is encrypted/sparse.%s",
1947 					es);
1948 				goto put_err_out;
1949 			}
1950 			ntfs_warning(sb,
1951 				"Resident attribute list attribute in $MFT system file is marked encrypted/sparse which is not true.  However, Windows allows this and chkdsk does not detect or correct it so we will just ignore the invalid flags and pretend they are not set.");
1952 		}
1953 		/* Now allocate memory for the attribute list. */
1954 		ni->attr_list_size = (u32)ntfs_attr_size(a);
1955 		if (!ni->attr_list_size) {
1956 			ntfs_error(sb, "Attr_list_size is zero");
1957 			goto put_err_out;
1958 		}
1959 		ni->attr_list = kvzalloc(round_up(ni->attr_list_size, SECTOR_SIZE),
1960 					 GFP_NOFS);
1961 		if (!ni->attr_list) {
1962 			ntfs_error(sb, "Not enough memory to allocate buffer for attribute list.");
1963 			goto put_err_out;
1964 		}
1965 		if (a->non_resident) {
1966 			struct runlist_element *rl;
1967 			size_t new_rl_count;
1968 
1969 			NInoSetAttrListNonResident(ni);
1970 			if (a->data.non_resident.lowest_vcn) {
1971 				ntfs_error(sb,
1972 					"Attribute list has non zero lowest_vcn. $MFT is corrupt. You should run chkdsk.");
1973 				goto put_err_out;
1974 			}
1975 
1976 			rl = ntfs_mapping_pairs_decompress(vol, a, NULL, &new_rl_count);
1977 			if (IS_ERR(rl)) {
1978 				err = PTR_ERR(rl);
1979 				ntfs_error(sb,
1980 					   "Mapping pairs decompression failed with error code %i.",
1981 					   -err);
1982 				goto put_err_out;
1983 			}
1984 
1985 			err = load_attribute_list_mount(vol, rl, ni->attr_list, ni->attr_list_size,
1986 					le64_to_cpu(a->data.non_resident.initialized_size));
1987 			kvfree(rl);
1988 			if (err) {
1989 				ntfs_error(sb,
1990 					   "Failed to load attribute list with error code %i.",
1991 					   -err);
1992 				goto put_err_out;
1993 			}
1994 		} else /* if (!ctx.attr->non_resident) */ {
1995 			/* Now copy the attribute list. */
1996 			memcpy(ni->attr_list, (u8 *)a + le16_to_cpu(
1997 					a->data.resident.value_offset),
1998 					le32_to_cpu(a->data.resident.value_length));
1999 		}
2000 		/* The attribute list is now setup in memory. */
2001 		al_entry = (struct attr_list_entry *)ni->attr_list;
2002 		al_end = (u8 *)al_entry + ni->attr_list_size;
2003 		for (;; al_entry = next_al_entry) {
2004 			/* Out of bounds check. */
2005 			if ((u8 *)al_entry < ni->attr_list ||
2006 					(u8 *)al_entry > al_end)
2007 				goto em_put_err_out;
2008 			/* Catch the end of the attribute list. */
2009 			if ((u8 *)al_entry == al_end)
2010 				goto em_put_err_out;
2011 			if (!ntfs_attr_list_entry_is_valid(al_entry, al_end))
2012 				goto em_put_err_out;
2013 			next_al_entry = (struct attr_list_entry *)((u8 *)al_entry +
2014 					le16_to_cpu(al_entry->length));
2015 			if (le32_to_cpu(al_entry->type) > le32_to_cpu(AT_DATA))
2016 				goto em_put_err_out;
2017 			if (al_entry->type != AT_DATA)
2018 				continue;
2019 			/* We want an unnamed attribute. */
2020 			if (al_entry->name_length)
2021 				goto em_put_err_out;
2022 			/* Want the first entry, i.e. lowest_vcn == 0. */
2023 			if (al_entry->lowest_vcn)
2024 				goto em_put_err_out;
2025 			/* First entry has to be in the base mft record. */
2026 			if (MREF_LE(al_entry->mft_reference) != vi->i_ino) {
2027 				/* MFT references do not match, logic fails. */
2028 				ntfs_error(sb,
2029 					"BUG: The first $DATA extent of $MFT is not in the base mft record.");
2030 				goto put_err_out;
2031 			} else {
2032 				/* Sequence numbers must match. */
2033 				if (MSEQNO_LE(al_entry->mft_reference) !=
2034 						ni->seq_no)
2035 					goto em_put_err_out;
2036 				/* Got it. All is ok. We can stop now. */
2037 				break;
2038 			}
2039 		}
2040 	}
2041 
2042 	ntfs_attr_reinit_search_ctx(ctx);
2043 
2044 	/* Now load all attribute extents. */
2045 	a = NULL;
2046 	next_vcn = last_vcn = highest_vcn = 0;
2047 	while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0,
2048 			ctx))) {
2049 		struct runlist_element *nrl;
2050 
2051 		/* Cache the current attribute. */
2052 		a = ctx->attr;
2053 		/* $MFT must be non-resident. */
2054 		if (!a->non_resident) {
2055 			ntfs_error(sb,
2056 				"$MFT must be non-resident but a resident extent was found. $MFT is corrupt. Run chkdsk.");
2057 			goto put_err_out;
2058 		}
2059 		/* $MFT must be uncompressed and unencrypted. */
2060 		if (a->flags & ATTR_COMPRESSION_MASK ||
2061 				a->flags & ATTR_IS_ENCRYPTED ||
2062 				a->flags & ATTR_IS_SPARSE) {
2063 			ntfs_error(sb,
2064 				"$MFT must be uncompressed, non-sparse, and unencrypted but a compressed/sparse/encrypted extent was found. $MFT is corrupt. Run chkdsk.");
2065 			goto put_err_out;
2066 		}
2067 		/*
2068 		 * Decompress the mapping pairs array of this extent and merge
2069 		 * the result into the existing runlist. No need for locking
2070 		 * as we have exclusive access to the inode at this time and we
2071 		 * are a mount in progress task, too.
2072 		 */
2073 		nrl = ntfs_mapping_pairs_decompress(vol, a, &ni->runlist,
2074 						    &new_rl_count);
2075 		if (IS_ERR(nrl)) {
2076 			ntfs_error(sb,
2077 				"ntfs_mapping_pairs_decompress() failed with error code %ld.",
2078 				PTR_ERR(nrl));
2079 			goto put_err_out;
2080 		}
2081 		ni->runlist.rl = nrl;
2082 		ni->runlist.count = new_rl_count;
2083 
2084 		/* Are we in the first extent? */
2085 		if (!next_vcn) {
2086 			if (a->data.non_resident.lowest_vcn) {
2087 				ntfs_error(sb,
2088 					"First extent of $DATA attribute has non zero lowest_vcn. $MFT is corrupt. You should run chkdsk.");
2089 				goto put_err_out;
2090 			}
2091 			/* Get the last vcn in the $DATA attribute. */
2092 			last_vcn = ntfs_bytes_to_cluster(vol,
2093 					le64_to_cpu(a->data.non_resident.allocated_size));
2094 			/* Fill in the inode size. */
2095 			vi->i_size = le64_to_cpu(a->data.non_resident.data_size);
2096 			ni->initialized_size = le64_to_cpu(a->data.non_resident.initialized_size);
2097 			ni->allocated_size = le64_to_cpu(a->data.non_resident.allocated_size);
2098 			/*
2099 			 * Verify the number of mft records does not exceed
2100 			 * 2^32 - 1.
2101 			 */
2102 			if ((vi->i_size >> vol->mft_record_size_bits) >=
2103 					(1ULL << 32)) {
2104 				ntfs_error(sb, "$MFT is too big! Aborting.");
2105 				goto put_err_out;
2106 			}
2107 			/*
2108 			 * We have got the first extent of the runlist for
2109 			 * $MFT which means it is now relatively safe to call
2110 			 * the normal ntfs_read_inode() function.
2111 			 * Complete reading the inode, this will actually
2112 			 * re-read the mft record for $MFT, this time entering
2113 			 * it into the page cache with which we complete the
2114 			 * kick start of the volume. It should be safe to do
2115 			 * this now as the first extent of $MFT/$DATA is
2116 			 * already known and we would hope that we don't need
2117 			 * further extents in order to find the other
2118 			 * attributes belonging to $MFT. Only time will tell if
2119 			 * this is really the case. If not we will have to play
2120 			 * magic at this point, possibly duplicating a lot of
2121 			 * ntfs_read_inode() at this point. We will need to
2122 			 * ensure we do enough of its work to be able to call
2123 			 * ntfs_read_inode() on extents of $MFT/$DATA. But lets
2124 			 * hope this never happens...
2125 			 */
2126 			err = ntfs_read_locked_inode(vi);
2127 			if (err) {
2128 				ntfs_error(sb, "ntfs_read_inode() of $MFT failed.\n");
2129 				ntfs_attr_put_search_ctx(ctx);
2130 				/* Revert to the safe super operations. */
2131 				kfree(m);
2132 				return -1;
2133 			}
2134 			/*
2135 			 * Re-initialize some specifics about $MFT's inode as
2136 			 * ntfs_read_inode() will have set up the default ones.
2137 			 */
2138 			/* Set uid and gid to root. */
2139 			vi->i_uid = GLOBAL_ROOT_UID;
2140 			vi->i_gid = GLOBAL_ROOT_GID;
2141 			/* Regular file. No access for anyone. */
2142 			vi->i_mode = S_IFREG;
2143 			/* No VFS initiated operations allowed for $MFT. */
2144 			vi->i_op = &ntfs_empty_inode_ops;
2145 			vi->i_fop = &ntfs_empty_file_ops;
2146 		}
2147 
2148 		/* Get the lowest vcn for the next extent. */
2149 		highest_vcn = le64_to_cpu(a->data.non_resident.highest_vcn);
2150 		next_vcn = highest_vcn + 1;
2151 
2152 		/* Only one extent or error, which we catch below. */
2153 		if (next_vcn <= 0)
2154 			break;
2155 
2156 		/* Avoid endless loops due to corruption. */
2157 		if (next_vcn < le64_to_cpu(a->data.non_resident.lowest_vcn)) {
2158 			ntfs_error(sb, "$MFT has corrupt attribute list attribute. Run chkdsk.");
2159 			goto put_err_out;
2160 		}
2161 	}
2162 	if (err != -ENOENT) {
2163 		ntfs_error(sb, "Failed to lookup $MFT/$DATA attribute extent. Run chkdsk.\n");
2164 		goto put_err_out;
2165 	}
2166 	if (!a) {
2167 		ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is corrupt. Run chkdsk.");
2168 		goto put_err_out;
2169 	}
2170 	if (highest_vcn && highest_vcn != last_vcn - 1) {
2171 		ntfs_error(sb, "Failed to load the complete runlist for $MFT/$DATA. Run chkdsk.");
2172 		ntfs_debug("highest_vcn = 0x%llx, last_vcn - 1 = 0x%llx",
2173 				(unsigned long long)highest_vcn,
2174 				(unsigned long long)last_vcn - 1);
2175 		goto put_err_out;
2176 	}
2177 	ntfs_attr_put_search_ctx(ctx);
2178 	ntfs_debug("Done.");
2179 	kfree(m);
2180 
2181 	/*
2182 	 * Split the locking rules of the MFT inode from the
2183 	 * locking rules of other inodes:
2184 	 */
2185 	lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key);
2186 	lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key);
2187 
2188 	return 0;
2189 
2190 em_put_err_out:
2191 	ntfs_error(sb,
2192 		"Couldn't find first extent of $DATA attribute in attribute list. $MFT is corrupt. Run chkdsk.");
2193 put_err_out:
2194 	ntfs_attr_put_search_ctx(ctx);
2195 err_out:
2196 	ntfs_error(sb, "Failed. Marking inode as bad.");
2197 	kfree(m);
2198 	return -1;
2199 }
2200 
2201 static void __ntfs_clear_inode(struct ntfs_inode *ni)
2202 {
2203 	/* Free all alocated memory. */
2204 	if (NInoNonResident(ni) && ni->runlist.rl) {
2205 		kvfree(ni->runlist.rl);
2206 		ni->runlist.rl = NULL;
2207 	}
2208 
2209 	if (ni->attr_list) {
2210 		kvfree(ni->attr_list);
2211 		ni->attr_list = NULL;
2212 	}
2213 
2214 	if (ni->name_len && ni->name != I30 &&
2215 	    ni->name != reparse_index_name &&
2216 	    ni->name != objid_index_name) {
2217 		WARN_ON(!ni->name);
2218 		kfree(ni->name);
2219 	}
2220 }
2221 
2222 void ntfs_clear_extent_inode(struct ntfs_inode *ni)
2223 {
2224 	ntfs_debug("Entering for inode 0x%llx.", ni->mft_no);
2225 
2226 	WARN_ON(NInoAttr(ni));
2227 	WARN_ON(ni->nr_extents != -1);
2228 
2229 	__ntfs_clear_inode(ni);
2230 	ntfs_destroy_extent_inode(ni);
2231 }
2232 
2233 static int ntfs_delete_base_inode(struct ntfs_inode *ni)
2234 {
2235 	struct super_block *sb = ni->vol->sb;
2236 	int err;
2237 
2238 	if (NInoAttr(ni) || ni->nr_extents == -1)
2239 		return 0;
2240 
2241 	err = ntfs_non_resident_dealloc_clusters(ni);
2242 
2243 	/*
2244 	 * Deallocate extent mft records and free extent inodes.
2245 	 * No need to lock as no one else has a reference.
2246 	 */
2247 	while (ni->nr_extents) {
2248 		err = ntfs_mft_record_free(ni->vol, *(ni->ext.extent_ntfs_inos));
2249 		if (err)
2250 			ntfs_error(sb,
2251 				"Failed to free extent MFT record. Leaving inconsistent metadata.\n");
2252 		ntfs_inode_close(*(ni->ext.extent_ntfs_inos));
2253 	}
2254 
2255 	/* Deallocate base mft record */
2256 	err = ntfs_mft_record_free(ni->vol, ni);
2257 	if (err)
2258 		ntfs_error(sb, "Failed to free base MFT record. Leaving inconsistent metadata.\n");
2259 	return err;
2260 }
2261 
2262 /*
2263  * ntfs_evict_big_inode - clean up the ntfs specific part of an inode
2264  * @vi:		vfs inode pending annihilation
2265  *
2266  * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
2267  * is called, which deallocates all memory belonging to the NTFS specific part
2268  * of the inode and returns.
2269  *
2270  * If the MFT record is dirty, we commit it before doing anything else.
2271  */
2272 void ntfs_evict_big_inode(struct inode *vi)
2273 {
2274 	struct ntfs_inode *ni = NTFS_I(vi);
2275 
2276 	truncate_inode_pages_final(&vi->i_data);
2277 
2278 	if (!vi->i_nlink) {
2279 		if (!NInoAttr(ni)) {
2280 			/* Never called with extent inodes */
2281 			WARN_ON(ni->nr_extents == -1);
2282 			ntfs_delete_base_inode(ni);
2283 		}
2284 		goto release;
2285 	}
2286 
2287 	if (NInoDirty(ni)) {
2288 		/* Committing the inode also commits all extent inodes. */
2289 		ntfs_commit_inode(vi);
2290 
2291 		if (NInoDirty(ni)) {
2292 			ntfs_debug("Failed to commit dirty inode 0x%llx.  Losing data!",
2293 				   ni->mft_no);
2294 			NInoClearAttrListDirty(ni);
2295 			NInoClearDirty(ni);
2296 		}
2297 	}
2298 
2299 	/* No need to lock at this stage as no one else has a reference. */
2300 	if (ni->nr_extents > 0) {
2301 		int i;
2302 
2303 		for (i = 0; i < ni->nr_extents; i++) {
2304 			if (ni->ext.extent_ntfs_inos[i])
2305 				ntfs_clear_extent_inode(ni->ext.extent_ntfs_inos[i]);
2306 		}
2307 		ni->nr_extents = 0;
2308 		kvfree(ni->ext.extent_ntfs_inos);
2309 	}
2310 
2311 release:
2312 	clear_inode(vi);
2313 	__ntfs_clear_inode(ni);
2314 
2315 	if (NInoAttr(ni)) {
2316 		/* Release the base inode if we are holding it. */
2317 		if (ni->nr_extents == -1) {
2318 			iput(VFS_I(ni->ext.base_ntfs_ino));
2319 			ni->nr_extents = 0;
2320 			ni->ext.base_ntfs_ino = NULL;
2321 		}
2322 	}
2323 
2324 	if (!atomic_dec_and_test(&ni->count))
2325 		WARN_ON(1);
2326 	if (ni->folio)
2327 		folio_put(ni->folio);
2328 	kfree(ni->mrec);
2329 	kvfree(ni->target);
2330 }
2331 
2332 /*
2333  * ntfs_show_options - show mount options in /proc/mounts
2334  * @sf:		seq_file in which to write our mount options
2335  * @root:	root of the mounted tree whose mount options to display
2336  *
2337  * Called by the VFS once for each mounted ntfs volume when someone reads
2338  * /proc/mounts in order to display the NTFS specific mount options of each
2339  * mount. The mount options of fs specified by @root are written to the seq file
2340  * @sf and success is returned.
2341  */
2342 int ntfs_show_options(struct seq_file *sf, struct dentry *root)
2343 {
2344 	struct ntfs_volume *vol = NTFS_SB(root->d_sb);
2345 	int i;
2346 
2347 	if (uid_valid(vol->uid))
2348 		seq_printf(sf, ",uid=%i", from_kuid_munged(&init_user_ns, vol->uid));
2349 	if (gid_valid(vol->gid))
2350 		seq_printf(sf, ",gid=%i", from_kgid_munged(&init_user_ns, vol->gid));
2351 	if (vol->fmask == vol->dmask)
2352 		seq_printf(sf, ",umask=0%o", vol->fmask);
2353 	else {
2354 		seq_printf(sf, ",fmask=0%o", vol->fmask);
2355 		seq_printf(sf, ",dmask=0%o", vol->dmask);
2356 	}
2357 	seq_printf(sf, ",iocharset=%s", vol->nls_map->charset);
2358 	if (NVolCaseSensitive(vol))
2359 		seq_puts(sf, ",case_sensitive");
2360 	else
2361 		seq_puts(sf, ",nocase");
2362 	if (NVolShowSystemFiles(vol))
2363 		seq_puts(sf, ",show_sys_files,showmeta");
2364 	for (i = 0; on_errors_arr[i].val; i++) {
2365 		if (on_errors_arr[i].val == vol->on_errors)
2366 			seq_printf(sf, ",errors=%s", on_errors_arr[i].str);
2367 	}
2368 	seq_printf(sf, ",mft_zone_multiplier=%i", vol->mft_zone_multiplier);
2369 	if (NVolSysImmutable(vol))
2370 		seq_puts(sf, ",sys_immutable");
2371 	if (!NVolShowHiddenFiles(vol))
2372 		seq_puts(sf, ",nohidden");
2373 	if (NVolHideDotFiles(vol))
2374 		seq_puts(sf, ",hide_dot_files");
2375 	if (NVolCheckWindowsNames(vol))
2376 		seq_puts(sf, ",windows_names");
2377 	if (NVolDiscard(vol))
2378 		seq_puts(sf, ",discard");
2379 	if (NVolDisableSparse(vol))
2380 		seq_puts(sf, ",disable_sparse");
2381 	if (NVolNativeSymlinkRel(vol))
2382 		seq_puts(sf, ",native_symlink=rel");
2383 	else
2384 		seq_puts(sf, ",native_symlink=raw");
2385 	if (NVolSymlinkNative(vol))
2386 		seq_puts(sf, ",symlink=native");
2387 	else
2388 		seq_puts(sf, ",symlink=wsl");
2389 	if (vol->sb->s_flags & SB_POSIXACL)
2390 		seq_puts(sf, ",acl");
2391 	return 0;
2392 }
2393 
2394 int ntfs_extend_initialized_size(struct inode *vi, const loff_t offset,
2395 				 const loff_t new_size, bool bsync)
2396 {
2397 	struct ntfs_inode *ni = NTFS_I(vi);
2398 	loff_t old_init_size;
2399 	unsigned long flags;
2400 	int err;
2401 
2402 	read_lock_irqsave(&ni->size_lock, flags);
2403 	old_init_size = ni->initialized_size;
2404 	read_unlock_irqrestore(&ni->size_lock, flags);
2405 
2406 	if (!NInoNonResident(ni))
2407 		return -EINVAL;
2408 	if (old_init_size >= new_size)
2409 		return 0;
2410 
2411 	err = ntfs_attr_map_whole_runlist(ni);
2412 	if (err)
2413 		return err;
2414 
2415 	if (!NInoCompressed(ni) && old_init_size < offset) {
2416 		err = iomap_zero_range(vi, old_init_size,
2417 				       offset - old_init_size,
2418 				       NULL, &ntfs_seek_iomap_ops,
2419 				       &ntfs_iomap_folio_ops, NULL);
2420 		if (err)
2421 			return err;
2422 		if (bsync)
2423 			err = filemap_write_and_wait_range(vi->i_mapping,
2424 							   old_init_size,
2425 							   offset - 1);
2426 	}
2427 
2428 
2429 	mutex_lock(&ni->mrec_lock);
2430 	err = ntfs_attr_set_initialized_size(ni, new_size);
2431 	mutex_unlock(&ni->mrec_lock);
2432 	if (err)
2433 		truncate_setsize(vi, old_init_size);
2434 	return err;
2435 }
2436 
2437 int ntfs_truncate_vfs(struct inode *vi, loff_t new_size, loff_t i_size)
2438 {
2439 	struct ntfs_inode *ni = NTFS_I(vi);
2440 	int err;
2441 
2442 	mutex_lock(&ni->mrec_lock);
2443 	err = __ntfs_attr_truncate_vfs(ni, new_size, i_size);
2444 	mutex_unlock(&ni->mrec_lock);
2445 	if (err < 0)
2446 		return err;
2447 
2448 	inode_set_mtime_to_ts(vi, inode_set_ctime_current(vi));
2449 	return 0;
2450 }
2451 
2452 /*
2453  * ntfs_inode_sync_standard_information - update standard information attribute
2454  * @vi:	inode to update standard information
2455  * @m:	mft record
2456  *
2457  * Return 0 on success or -errno on error.
2458  */
2459 static int ntfs_inode_sync_standard_information(struct inode *vi, struct mft_record *m)
2460 {
2461 	struct ntfs_inode *ni = NTFS_I(vi);
2462 	struct ntfs_attr_search_ctx *ctx;
2463 	struct standard_information *si;
2464 	__le64 nt;
2465 	int err = 0;
2466 	bool modified = false;
2467 
2468 	/* Update the access times in the standard information attribute. */
2469 	ctx = ntfs_attr_get_search_ctx(ni, m);
2470 	if (unlikely(!ctx))
2471 		return -ENOMEM;
2472 	err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0,
2473 			CASE_SENSITIVE, 0, NULL, 0, ctx);
2474 	if (unlikely(err)) {
2475 		ntfs_attr_put_search_ctx(ctx);
2476 		return err;
2477 	}
2478 	si = (struct standard_information *)((u8 *)ctx->attr +
2479 			le16_to_cpu(ctx->attr->data.resident.value_offset));
2480 	if (si->file_attributes != ni->flags) {
2481 		si->file_attributes = ni->flags;
2482 		modified = true;
2483 	}
2484 
2485 	/* Update the creation times if they have changed. */
2486 	nt = utc2ntfs(ni->i_crtime);
2487 	if (si->creation_time != nt) {
2488 		ntfs_debug("Updating creation time for inode 0x%llx: old = 0x%llx, new = 0x%llx",
2489 				ni->mft_no, le64_to_cpu(si->creation_time),
2490 				le64_to_cpu(nt));
2491 		si->creation_time = nt;
2492 		modified = true;
2493 	}
2494 
2495 	/* Update the access times if they have changed. */
2496 	nt = utc2ntfs(inode_get_mtime(vi));
2497 	if (si->last_data_change_time != nt) {
2498 		ntfs_debug("Updating mtime for inode 0x%llx: old = 0x%llx, new = 0x%llx",
2499 				ni->mft_no, le64_to_cpu(si->last_data_change_time),
2500 				le64_to_cpu(nt));
2501 		si->last_data_change_time = nt;
2502 		modified = true;
2503 	}
2504 
2505 	nt = utc2ntfs(inode_get_ctime(vi));
2506 	if (si->last_mft_change_time != nt) {
2507 		ntfs_debug("Updating ctime for inode 0x%llx: old = 0x%llx, new = 0x%llx",
2508 				ni->mft_no, le64_to_cpu(si->last_mft_change_time),
2509 				le64_to_cpu(nt));
2510 		si->last_mft_change_time = nt;
2511 		modified = true;
2512 	}
2513 	nt = utc2ntfs(inode_get_atime(vi));
2514 	if (si->last_access_time != nt) {
2515 		ntfs_debug("Updating atime for inode 0x%llx: old = 0x%llx, new = 0x%llx",
2516 				ni->mft_no,
2517 				le64_to_cpu(si->last_access_time),
2518 				le64_to_cpu(nt));
2519 		si->last_access_time = nt;
2520 		modified = true;
2521 	}
2522 
2523 	/*
2524 	 * If we just modified the standard information attribute we need to
2525 	 * mark the mft record it is in dirty.  We do this manually so that
2526 	 * mark_inode_dirty() is not called which would redirty the inode and
2527 	 * hence result in an infinite loop of trying to write the inode.
2528 	 * There is no need to mark the base inode nor the base mft record
2529 	 * dirty, since we are going to write this mft record below in any case
2530 	 * and the base mft record may actually not have been modified so it
2531 	 * might not need to be written out.
2532 	 * NOTE: It is not a problem when the inode for $MFT itself is being
2533 	 * written out as ntfs_mft_mark_dirty() will only set I_DIRTY_PAGES
2534 	 * on the $MFT inode and hence ntfs_write_inode() will not be
2535 	 * re-invoked because of it which in turn is ok since the dirtied mft
2536 	 * record will be cleaned and written out to disk below, i.e. before
2537 	 * this function returns.
2538 	 */
2539 	if (modified)
2540 		NInoSetDirty(ctx->ntfs_ino);
2541 	ntfs_attr_put_search_ctx(ctx);
2542 
2543 	return err;
2544 }
2545 
2546 /*
2547  * ntfs_inode_sync_filename - update FILE_NAME attributes
2548  * @ni:	ntfs inode to update FILE_NAME attributes
2549  *
2550  * Update all FILE_NAME attributes for inode @ni in the index.
2551  *
2552  * Return 0 on success or error.
2553  */
2554 int ntfs_inode_sync_filename(struct ntfs_inode *ni)
2555 {
2556 	struct inode *index_vi;
2557 	struct super_block *sb = VFS_I(ni)->i_sb;
2558 	struct ntfs_attr_search_ctx *ctx = NULL;
2559 	struct ntfs_index_context *ictx;
2560 	struct ntfs_inode *index_ni;
2561 	struct file_name_attr *fn;
2562 	struct file_name_attr *fnx;
2563 	struct reparse_point *rpp;
2564 	__le32 reparse_tag;
2565 	int err = 0;
2566 	unsigned long flags;
2567 
2568 	ntfs_debug("Entering for inode %llu\n", ni->mft_no);
2569 
2570 	ctx = ntfs_attr_get_search_ctx(ni, NULL);
2571 	if (!ctx)
2572 		return -ENOMEM;
2573 
2574 	/* Collect the reparse tag, if any */
2575 	reparse_tag = cpu_to_le32(0);
2576 	if (ni->flags & FILE_ATTR_REPARSE_POINT) {
2577 		if (!ntfs_attr_lookup(AT_REPARSE_POINT, NULL,
2578 					0, CASE_SENSITIVE, 0, NULL, 0, ctx)) {
2579 			rpp = (struct reparse_point *)((u8 *)ctx->attr +
2580 					le16_to_cpu(ctx->attr->data.resident.value_offset));
2581 			reparse_tag = rpp->reparse_tag;
2582 		}
2583 		ntfs_attr_reinit_search_ctx(ctx);
2584 	}
2585 
2586 	/* Walk through all FILE_NAME attributes and update them. */
2587 	while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, ctx))) {
2588 		fn = (struct file_name_attr *)((u8 *)ctx->attr +
2589 				le16_to_cpu(ctx->attr->data.resident.value_offset));
2590 		if (MREF_LE(fn->parent_directory) == ni->mft_no)
2591 			continue;
2592 
2593 		index_vi = ntfs_iget(sb, MREF_LE(fn->parent_directory));
2594 		if (IS_ERR(index_vi)) {
2595 			ntfs_error(sb, "Failed to open inode %lld with index",
2596 					(long long)MREF_LE(fn->parent_directory));
2597 			continue;
2598 		}
2599 
2600 		index_ni = NTFS_I(index_vi);
2601 
2602 		mutex_lock_nested(&index_ni->mrec_lock, NTFS_INODE_MUTEX_PARENT);
2603 		if (NInoBeingDeleted(ni)) {
2604 			mutex_unlock(&index_ni->mrec_lock);
2605 			iput(index_vi);
2606 			continue;
2607 		}
2608 
2609 		ictx = ntfs_index_ctx_get(index_ni, I30, 4);
2610 		if (!ictx) {
2611 			ntfs_error(sb, "Failed to get index ctx, inode %llu",
2612 					index_ni->mft_no);
2613 			mutex_unlock(&index_ni->mrec_lock);
2614 			iput(index_vi);
2615 			continue;
2616 		}
2617 
2618 		err = ntfs_index_lookup(fn, sizeof(struct file_name_attr), ictx);
2619 		if (err) {
2620 			ntfs_debug("Index lookup failed, inode %llu",
2621 					index_ni->mft_no);
2622 			ntfs_index_ctx_put(ictx);
2623 			mutex_unlock(&index_ni->mrec_lock);
2624 			iput(index_vi);
2625 			continue;
2626 		}
2627 		/* Update flags and file size. */
2628 		fnx = (struct file_name_attr *)ictx->data;
2629 		fnx->file_attributes =
2630 			(fnx->file_attributes & ~FILE_ATTR_VALID_FLAGS) |
2631 			(ni->flags & FILE_ATTR_VALID_FLAGS);
2632 		if (ctx->mrec->flags & MFT_RECORD_IS_DIRECTORY)
2633 			fnx->data_size = fnx->allocated_size = 0;
2634 		else {
2635 			read_lock_irqsave(&ni->size_lock, flags);
2636 			if (NInoSparse(ni) || NInoCompressed(ni))
2637 				fnx->allocated_size = cpu_to_le64(ni->itype.compressed.size);
2638 			else
2639 				fnx->allocated_size = cpu_to_le64(ni->allocated_size);
2640 			fnx->data_size = cpu_to_le64(ni->data_size);
2641 
2642 			/*
2643 			 * The file name record has also to be fixed if some
2644 			 * attribute update implied the unnamed data to be
2645 			 * made non-resident
2646 			 */
2647 			fn->allocated_size = fnx->allocated_size;
2648 			fn->data_size = fnx->data_size;
2649 			read_unlock_irqrestore(&ni->size_lock, flags);
2650 		}
2651 
2652 		/* update or clear the reparse tag in the index */
2653 		fnx->type.rp.reparse_point_tag = reparse_tag;
2654 		fnx->creation_time = fn->creation_time;
2655 		fnx->last_data_change_time = fn->last_data_change_time;
2656 		fnx->last_mft_change_time = fn->last_mft_change_time;
2657 		fnx->last_access_time = fn->last_access_time;
2658 		ntfs_index_entry_mark_dirty(ictx);
2659 		ntfs_icx_ib_sync_write(ictx);
2660 		NInoSetDirty(ctx->ntfs_ino);
2661 		ntfs_index_ctx_put(ictx);
2662 		mutex_unlock(&index_ni->mrec_lock);
2663 		iput(index_vi);
2664 	}
2665 	/* Check for real error occurred. */
2666 	if (err != -ENOENT) {
2667 		ntfs_error(sb, "Attribute lookup failed, err : %d, inode %llu", err,
2668 				ni->mft_no);
2669 	} else
2670 		err = 0;
2671 
2672 	ntfs_attr_put_search_ctx(ctx);
2673 	return err;
2674 }
2675 
2676 int ntfs_get_block_mft_record(struct ntfs_inode *mft_ni, struct ntfs_inode *ni)
2677 {
2678 	s64 vcn;
2679 	struct runlist_element *rl;
2680 
2681 	if (ni->mft_lcn[0] != LCN_RL_NOT_MAPPED)
2682 		return 0;
2683 
2684 	vcn = (s64)ni->mft_no << mft_ni->vol->mft_record_size_bits >>
2685 	      mft_ni->vol->cluster_size_bits;
2686 
2687 	rl = mft_ni->runlist.rl;
2688 	if (!rl) {
2689 		ntfs_error(mft_ni->vol->sb, "$MFT runlist is not present");
2690 		return -EIO;
2691 	}
2692 
2693 	/* Seek to element containing target vcn. */
2694 	while (rl->length && rl[1].vcn <= vcn)
2695 		rl++;
2696 	ni->mft_lcn[0] = ntfs_rl_vcn_to_lcn(rl, vcn);
2697 	ni->mft_lcn_count = 1;
2698 
2699 	if (mft_ni->vol->cluster_size < mft_ni->vol->mft_record_size &&
2700 	    (rl->length - (vcn - rl->vcn)) <= 1) {
2701 		rl++;
2702 		ni->mft_lcn[1] = ntfs_rl_vcn_to_lcn(rl, vcn + 1);
2703 		ni->mft_lcn_count++;
2704 	}
2705 	return 0;
2706 }
2707 
2708 /*
2709  * __ntfs_write_inode - write out a dirty inode
2710  * @vi:		inode to write out
2711  * @sync:	if true, write out synchronously
2712  *
2713  * Write out a dirty inode to disk including any extent inodes if present.
2714  *
2715  * If @sync is true, commit the inode to disk and wait for io completion.  This
2716  * is done using write_mft_record().
2717  *
2718  * If @sync is false, just schedule the write to happen but do not wait for i/o
2719  * completion.
2720  *
2721  * Return 0 on success and -errno on error.
2722  */
2723 int __ntfs_write_inode(struct inode *vi, int sync)
2724 {
2725 	struct ntfs_inode *ni = NTFS_I(vi);
2726 	struct ntfs_inode *mft_ni = NTFS_I(ni->vol->mft_ino);
2727 	struct mft_record *m;
2728 	int err = 0;
2729 	bool need_iput = false;
2730 
2731 	ntfs_debug("Entering for %sinode 0x%llx.", NInoAttr(ni) ? "attr " : "",
2732 			ni->mft_no);
2733 
2734 	if (NVolShutdown(ni->vol))
2735 		return -EIO;
2736 
2737 	/*
2738 	 * Dirty attribute inodes are written via their real inodes so just
2739 	 * clean them here.  Access time updates are taken care off when the
2740 	 * real inode is written.
2741 	 */
2742 	if (NInoAttr(ni) || ni->nr_extents == -1) {
2743 		NInoClearDirty(ni);
2744 		ntfs_debug("Done.");
2745 		return 0;
2746 	}
2747 
2748 	/* igrab prevents vi from being evicted while mrec_lock is hold. */
2749 	if (igrab(vi) != NULL)
2750 		need_iput = true;
2751 
2752 	mutex_lock_nested(&ni->mrec_lock, NTFS_INODE_MUTEX_NORMAL);
2753 	/* Map, pin, and lock the mft record belonging to the inode. */
2754 	m = map_mft_record(ni);
2755 	if (IS_ERR(m)) {
2756 		mutex_unlock(&ni->mrec_lock);
2757 		err = PTR_ERR(m);
2758 		goto err_out;
2759 	}
2760 
2761 	if (NInoNonResident(ni) && NInoRunlistDirty(ni)) {
2762 		down_write(&ni->runlist.lock);
2763 		err = ntfs_attr_update_mapping_pairs(ni, 0);
2764 		if (!err)
2765 			NInoClearRunlistDirty(ni);
2766 		up_write(&ni->runlist.lock);
2767 	}
2768 
2769 	err = ntfs_inode_sync_standard_information(vi, m);
2770 	if (err)
2771 		goto unm_err_out;
2772 
2773 	/*
2774 	 * when being umounted and inodes are evicted, write_inode()
2775 	 * is called with all inodes being marked with I_FREEING.
2776 	 * then ntfs_inode_sync_filename() waits infinitly because
2777 	 * of ntfs_iget. This situation happens only where sync_filesysem()
2778 	 * from umount fails because of a disk unplug and etc.
2779 	 * the absent of SB_ACTIVE means umounting.
2780 	 */
2781 	if ((vi->i_sb->s_flags & SB_ACTIVE) && NInoTestClearFileNameDirty(ni))
2782 		ntfs_inode_sync_filename(ni);
2783 
2784 	/* Now the access times are updated, write the base mft record. */
2785 	if (NInoDirty(ni)) {
2786 		down_read(&mft_ni->runlist.lock);
2787 		err = ntfs_get_block_mft_record(mft_ni, ni);
2788 		up_read(&mft_ni->runlist.lock);
2789 		if (err)
2790 			goto unm_err_out;
2791 
2792 		err = write_mft_record(ni, m, sync);
2793 		if (err)
2794 			ntfs_error(vi->i_sb, "write_mft_record failed, err : %d\n", err);
2795 	}
2796 	unmap_mft_record(ni);
2797 
2798 	/* Map any unmapped extent mft records with LCNs. */
2799 	down_read(&mft_ni->runlist.lock);
2800 	mutex_lock(&ni->extent_lock);
2801 	if (ni->nr_extents > 0) {
2802 		int i;
2803 
2804 		for (i = 0; i < ni->nr_extents; i++) {
2805 			err = ntfs_get_block_mft_record(mft_ni,
2806 						   ni->ext.extent_ntfs_inos[i]);
2807 			if (err) {
2808 				mutex_unlock(&ni->extent_lock);
2809 				up_read(&mft_ni->runlist.lock);
2810 				mutex_unlock(&ni->mrec_lock);
2811 				goto err_out;
2812 			}
2813 		}
2814 	}
2815 	mutex_unlock(&ni->extent_lock);
2816 	up_read(&mft_ni->runlist.lock);
2817 
2818 	/* Write all attached extent mft records. */
2819 	mutex_lock(&ni->extent_lock);
2820 	if (ni->nr_extents > 0) {
2821 		struct ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos;
2822 		int i;
2823 
2824 		ntfs_debug("Writing %i extent inodes.", ni->nr_extents);
2825 		for (i = 0; i < ni->nr_extents; i++) {
2826 			struct ntfs_inode *tni = extent_nis[i];
2827 
2828 			if (NInoDirty(tni)) {
2829 				struct mft_record *tm;
2830 				int ret;
2831 
2832 				mutex_lock(&tni->mrec_lock);
2833 				tm = map_mft_record(tni);
2834 				if (IS_ERR(tm)) {
2835 					mutex_unlock(&tni->mrec_lock);
2836 					if (!err || err == -ENOMEM)
2837 						err = PTR_ERR(tm);
2838 					continue;
2839 				}
2840 
2841 				ret = write_mft_record(tni, tm, sync);
2842 				unmap_mft_record(tni);
2843 				mutex_unlock(&tni->mrec_lock);
2844 
2845 				if (unlikely(ret)) {
2846 					if (!err || err == -ENOMEM)
2847 						err = ret;
2848 				}
2849 			}
2850 		}
2851 	}
2852 	mutex_unlock(&ni->extent_lock);
2853 	mutex_unlock(&ni->mrec_lock);
2854 
2855 	if (unlikely(err))
2856 		goto err_out;
2857 	if (need_iput)
2858 		iput(vi);
2859 	ntfs_debug("Done.");
2860 	return 0;
2861 unm_err_out:
2862 	unmap_mft_record(ni);
2863 	mutex_unlock(&ni->mrec_lock);
2864 err_out:
2865 	if (err == -ENOMEM)
2866 		mark_inode_dirty(vi);
2867 	else {
2868 		ntfs_error(vi->i_sb, "Failed (error %i):  Run chkdsk.", -err);
2869 		NVolSetErrors(ni->vol);
2870 	}
2871 	if (need_iput)
2872 		iput(vi);
2873 	return err;
2874 }
2875 
2876 /*
2877  * ntfs_extent_inode_open - load an extent inode and attach it to its base
2878  * @base_ni:	base ntfs inode
2879  * @mref:	mft reference of the extent inode to load (in little endian)
2880  *
2881  * First check if the extent inode @mref is already attached to the base ntfs
2882  * inode @base_ni, and if so, return a pointer to the attached extent inode.
2883  *
2884  * If the extent inode is not already attached to the base inode, allocate an
2885  * ntfs_inode structure and initialize it for the given inode @mref. @mref
2886  * specifies the inode number / mft record to read, including the sequence
2887  * number, which can be 0 if no sequence number checking is to be performed.
2888  *
2889  * Then, allocate a buffer for the mft record, read the mft record from the
2890  * volume @base_ni->vol, and attach it to the ntfs_inode structure (->mrec).
2891  * The mft record is mst deprotected and sanity checked for validity and we
2892  * abort if deprotection or checks fail.
2893  *
2894  * Finally attach the ntfs inode to its base inode @base_ni and return a
2895  * pointer to the ntfs_inode structure on success or NULL on error, with errno
2896  * set to the error code.
2897  *
2898  * Note, extent inodes are never closed directly. They are automatically
2899  * disposed off by the closing of the base inode.
2900  */
2901 static struct ntfs_inode *ntfs_extent_inode_open(struct ntfs_inode *base_ni,
2902 		const __le64 mref)
2903 {
2904 	u64 mft_no = MREF_LE(mref);
2905 	struct ntfs_inode *ni = NULL;
2906 	struct ntfs_inode **extent_nis;
2907 	int i;
2908 	struct mft_record *ni_mrec;
2909 	struct super_block *sb;
2910 
2911 	if (!base_ni)
2912 		return NULL;
2913 
2914 	sb = base_ni->vol->sb;
2915 	ntfs_debug("Opening extent inode %llu (base mft record %llu).\n",
2916 			mft_no, base_ni->mft_no);
2917 
2918 	/* Is the extent inode already open and attached to the base inode? */
2919 	if (base_ni->nr_extents > 0) {
2920 		extent_nis = base_ni->ext.extent_ntfs_inos;
2921 		for (i = 0; i < base_ni->nr_extents; i++) {
2922 			u16 seq_no;
2923 
2924 			ni = extent_nis[i];
2925 			if (mft_no != ni->mft_no)
2926 				continue;
2927 			ni_mrec = map_mft_record(ni);
2928 			if (IS_ERR(ni_mrec)) {
2929 				ntfs_error(sb, "failed to map mft record for %llu",
2930 						ni->mft_no);
2931 				goto out;
2932 			}
2933 			/* Verify the sequence number if given. */
2934 			seq_no = MSEQNO_LE(mref);
2935 			if (seq_no &&
2936 			    seq_no != le16_to_cpu(ni_mrec->sequence_number)) {
2937 				ntfs_error(sb, "Found stale extent mft reference mft=%llu",
2938 						ni->mft_no);
2939 				unmap_mft_record(ni);
2940 				goto out;
2941 			}
2942 			unmap_mft_record(ni);
2943 			goto out;
2944 		}
2945 	}
2946 	/* Wasn't there, we need to load the extent inode. */
2947 	ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no);
2948 	if (!ni)
2949 		goto out;
2950 
2951 	ni->seq_no = (u16)MSEQNO_LE(mref);
2952 	ni->nr_extents = -1;
2953 	ni->ext.base_ntfs_ino = base_ni;
2954 	/* Attach extent inode to base inode, reallocating memory if needed. */
2955 	if (!(base_ni->nr_extents & 3)) {
2956 		i = (base_ni->nr_extents + 4) * sizeof(struct ntfs_inode *);
2957 
2958 		extent_nis = kvzalloc(i, GFP_NOFS);
2959 		if (!extent_nis)
2960 			goto err_out;
2961 		if (base_ni->nr_extents) {
2962 			memcpy(extent_nis, base_ni->ext.extent_ntfs_inos,
2963 					i - 4 * sizeof(struct ntfs_inode *));
2964 			kvfree(base_ni->ext.extent_ntfs_inos);
2965 		}
2966 		base_ni->ext.extent_ntfs_inos = extent_nis;
2967 	}
2968 	base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni;
2969 
2970 out:
2971 	ntfs_debug("\n");
2972 	return ni;
2973 err_out:
2974 	ntfs_destroy_ext_inode(ni);
2975 	ni = NULL;
2976 	goto out;
2977 }
2978 
2979 /*
2980  * ntfs_inode_attach_all_extents - attach all extents for target inode
2981  * @ni:		opened ntfs inode for which perform attach
2982  *
2983  * Return 0 on success and error.
2984  */
2985 int ntfs_inode_attach_all_extents(struct ntfs_inode *ni)
2986 {
2987 	struct attr_list_entry *ale;
2988 	u64 prev_attached = 0;
2989 
2990 	if (!ni) {
2991 		ntfs_debug("Invalid arguments.\n");
2992 		return -EINVAL;
2993 	}
2994 
2995 	if (NInoAttr(ni))
2996 		ni = ni->ext.base_ntfs_ino;
2997 
2998 	ntfs_debug("Entering for inode 0x%llx.\n", ni->mft_no);
2999 
3000 	/* Inode haven't got attribute list, thus nothing to attach. */
3001 	if (!NInoAttrList(ni))
3002 		return 0;
3003 
3004 	if (!ni->attr_list) {
3005 		ntfs_debug("Corrupt in-memory struct.\n");
3006 		return -EINVAL;
3007 	}
3008 
3009 	/* Walk through attribute list and attach all extents. */
3010 	ale = (struct attr_list_entry *)ni->attr_list;
3011 	while ((u8 *)ale < ni->attr_list + ni->attr_list_size) {
3012 		if (ni->mft_no != MREF_LE(ale->mft_reference) &&
3013 				prev_attached != MREF_LE(ale->mft_reference)) {
3014 			if (!ntfs_extent_inode_open(ni, ale->mft_reference)) {
3015 				ntfs_debug("Couldn't attach extent inode.\n");
3016 				return -1;
3017 			}
3018 			prev_attached = MREF_LE(ale->mft_reference);
3019 		}
3020 		ale = (struct attr_list_entry *)((u8 *)ale + le16_to_cpu(ale->length));
3021 	}
3022 	return 0;
3023 }
3024 
3025 /*
3026  * ntfs_inode_add_attrlist - add attribute list to inode and fill it
3027  * @ni: opened ntfs inode to which add attribute list
3028  *
3029  * Return 0 on success or error.
3030  */
3031 int ntfs_inode_add_attrlist(struct ntfs_inode *ni)
3032 {
3033 	int err;
3034 	struct ntfs_attr_search_ctx *ctx;
3035 	u8 *al = NULL, *aln;
3036 	int al_len = 0;
3037 	struct attr_list_entry *ale = NULL;
3038 	struct mft_record *ni_mrec;
3039 	u32 attr_al_len;
3040 
3041 	if (!ni)
3042 		return -EINVAL;
3043 
3044 	ntfs_debug("inode %llu\n", ni->mft_no);
3045 
3046 	if (NInoAttrList(ni) || ni->nr_extents) {
3047 		ntfs_error(ni->vol->sb, "Inode already has attribute list");
3048 		return -EEXIST;
3049 	}
3050 
3051 	ni_mrec = map_mft_record(ni);
3052 	if (IS_ERR(ni_mrec))
3053 		return -EIO;
3054 
3055 	/* Form attribute list. */
3056 	ctx = ntfs_attr_get_search_ctx(ni, ni_mrec);
3057 	if (!ctx) {
3058 		err = -ENOMEM;
3059 		goto err_out;
3060 	}
3061 
3062 	/* Walk through all attributes. */
3063 	while (!(err = ntfs_attr_lookup(AT_UNUSED, NULL, 0, 0, 0, NULL, 0, ctx))) {
3064 		int ale_size;
3065 
3066 		if (ctx->attr->type == AT_ATTRIBUTE_LIST) {
3067 			err = -EIO;
3068 			ntfs_error(ni->vol->sb, "Attribute list already present");
3069 			goto put_err_out;
3070 		}
3071 
3072 		ale_size = (sizeof(struct attr_list_entry) + sizeof(__le16) *
3073 				ctx->attr->name_length + 7) & ~7;
3074 		al_len += ale_size;
3075 
3076 		aln = kvrealloc(al, al_len, GFP_NOFS);
3077 		if (!aln) {
3078 			err = -ENOMEM;
3079 			ntfs_error(ni->vol->sb, "Failed to realloc %d bytes", al_len);
3080 			goto put_err_out;
3081 		}
3082 		ale = (struct attr_list_entry *)(aln + ((u8 *)ale - al));
3083 		al = aln;
3084 
3085 		memset(ale, 0, ale_size);
3086 
3087 		/* Add attribute to attribute list. */
3088 		ale->type = ctx->attr->type;
3089 		ale->length = cpu_to_le16((sizeof(struct attr_list_entry) +
3090 					sizeof(__le16) * ctx->attr->name_length + 7) & ~7);
3091 		ale->name_length = ctx->attr->name_length;
3092 		ale->name_offset = (u8 *)ale->name - (u8 *)ale;
3093 		if (ctx->attr->non_resident)
3094 			ale->lowest_vcn =
3095 				ctx->attr->data.non_resident.lowest_vcn;
3096 		else
3097 			ale->lowest_vcn = 0;
3098 		ale->mft_reference = MK_LE_MREF(ni->mft_no,
3099 				le16_to_cpu(ni_mrec->sequence_number));
3100 		ale->instance = ctx->attr->instance;
3101 		memcpy(ale->name, (u8 *)ctx->attr +
3102 				le16_to_cpu(ctx->attr->name_offset),
3103 				ctx->attr->name_length * sizeof(__le16));
3104 		ale = (struct attr_list_entry *)(al + al_len);
3105 	}
3106 
3107 	/* Check for real error occurred. */
3108 	if (err != -ENOENT) {
3109 		ntfs_error(ni->vol->sb, "%s: Attribute lookup failed, inode %llu",
3110 				__func__, ni->mft_no);
3111 		goto put_err_out;
3112 	}
3113 
3114 	/* Set in-memory attribute list. */
3115 	ni->attr_list = al;
3116 	ni->attr_list_size = al_len;
3117 	NInoSetAttrList(ni);
3118 
3119 	attr_al_len = offsetof(struct attr_record, data.resident.reserved) + 1 +
3120 		((al_len + 7) & ~7);
3121 	/* Free space if there is not enough it for $ATTRIBUTE_LIST. */
3122 	if (le32_to_cpu(ni_mrec->bytes_allocated) -
3123 			le32_to_cpu(ni_mrec->bytes_in_use) < attr_al_len) {
3124 		if (ntfs_inode_free_space(ni, (int)attr_al_len)) {
3125 			/* Failed to free space. */
3126 			err = -ENOSPC;
3127 			ntfs_error(ni->vol->sb, "Failed to free space for attrlist");
3128 			goto rollback;
3129 		}
3130 	}
3131 
3132 	/* Add $ATTRIBUTE_LIST to mft record. */
3133 	err = ntfs_resident_attr_record_add(ni, AT_ATTRIBUTE_LIST, AT_UNNAMED, 0,
3134 					    NULL, al_len, 0);
3135 	if (err < 0) {
3136 		ntfs_error(ni->vol->sb, "Couldn't add $ATTRIBUTE_LIST to MFT");
3137 		goto rollback;
3138 	}
3139 
3140 	err = ntfs_attrlist_update(ni);
3141 	if (err < 0)
3142 		goto remove_attrlist_record;
3143 
3144 	ntfs_attr_put_search_ctx(ctx);
3145 	unmap_mft_record(ni);
3146 	return 0;
3147 
3148 remove_attrlist_record:
3149 	/* Prevent ntfs_attr_recorm_rm from freeing attribute list. */
3150 	ni->attr_list = NULL;
3151 	NInoClearAttrList(ni);
3152 	/* Remove $ATTRIBUTE_LIST record. */
3153 	ntfs_attr_reinit_search_ctx(ctx);
3154 	if (!ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0,
3155 				CASE_SENSITIVE, 0, NULL, 0, ctx)) {
3156 		if (ntfs_attr_record_rm(ctx))
3157 			ntfs_error(ni->vol->sb, "Rollback failed to remove attrlist");
3158 	} else {
3159 		ntfs_error(ni->vol->sb, "Rollback failed to find attrlist");
3160 	}
3161 
3162 	/* Setup back in-memory runlist. */
3163 	ni->attr_list = al;
3164 	ni->attr_list_size = al_len;
3165 	NInoSetAttrList(ni);
3166 rollback:
3167 	/*
3168 	 * Scan attribute list for attributes that placed not in the base MFT
3169 	 * record and move them to it.
3170 	 */
3171 	ntfs_attr_reinit_search_ctx(ctx);
3172 	ale = (struct attr_list_entry *)al;
3173 	while ((u8 *)ale < al + al_len) {
3174 		if (MREF_LE(ale->mft_reference) != ni->mft_no) {
3175 			if (!ntfs_attr_lookup(ale->type, ale->name,
3176 						ale->name_length,
3177 						CASE_SENSITIVE,
3178 						le64_to_cpu(ale->lowest_vcn),
3179 						NULL, 0, ctx)) {
3180 				if (ntfs_attr_record_move_to(ctx, ni))
3181 					ntfs_error(ni->vol->sb,
3182 							"Rollback failed to move attribute");
3183 			} else {
3184 				ntfs_error(ni->vol->sb, "Rollback failed to find attr");
3185 			}
3186 			ntfs_attr_reinit_search_ctx(ctx);
3187 		}
3188 		ale = (struct attr_list_entry *)((u8 *)ale + le16_to_cpu(ale->length));
3189 	}
3190 
3191 	/* Remove in-memory attribute list. */
3192 	ni->attr_list = NULL;
3193 	ni->attr_list_size = 0;
3194 	NInoClearAttrList(ni);
3195 	NInoClearAttrListDirty(ni);
3196 put_err_out:
3197 	ntfs_attr_put_search_ctx(ctx);
3198 err_out:
3199 	kvfree(al);
3200 	unmap_mft_record(ni);
3201 	return err;
3202 }
3203 
3204 /*
3205  * ntfs_inode_close - close an ntfs inode and free all associated memory
3206  * @ni:		ntfs inode to close
3207  *
3208  * Make sure the ntfs inode @ni is clean.
3209  *
3210  * If the ntfs inode @ni is a base inode, close all associated extent inodes,
3211  * then deallocate all memory attached to it, and finally free the ntfs inode
3212  * structure itself.
3213  *
3214  * If it is an extent inode, we disconnect it from its base inode before we
3215  * destroy it.
3216  *
3217  * It is OK to pass NULL to this function, it is just noop in this case.
3218  *
3219  * Return 0 on success or error.
3220  */
3221 int ntfs_inode_close(struct ntfs_inode *ni)
3222 {
3223 	int err = -1;
3224 	struct ntfs_inode **tmp_nis;
3225 	struct ntfs_inode *base_ni;
3226 	s32 i;
3227 
3228 	if (!ni)
3229 		return 0;
3230 
3231 	ntfs_debug("Entering for inode %llu\n", ni->mft_no);
3232 
3233 	/* Is this a base inode with mapped extent inodes? */
3234 	/*
3235 	 * If the inode is an extent inode, disconnect it from the
3236 	 * base inode before destroying it.
3237 	 */
3238 	base_ni = ni->ext.base_ntfs_ino;
3239 	tmp_nis = base_ni->ext.extent_ntfs_inos;
3240 	if (!tmp_nis)
3241 		goto out;
3242 	for (i = 0; i < base_ni->nr_extents; ++i) {
3243 		if (tmp_nis[i] != ni)
3244 			continue;
3245 		/* Found it. Disconnect. */
3246 		memmove(tmp_nis + i, tmp_nis + i + 1,
3247 				(base_ni->nr_extents - i - 1) *
3248 				sizeof(struct ntfs_inode *));
3249 		/* Buffer should be for multiple of four extents. */
3250 		if ((--base_ni->nr_extents) & 3)
3251 			break;
3252 		/*
3253 		 * ElectricFence is unhappy with realloc(x,0) as free(x)
3254 		 * thus we explicitly separate these two cases.
3255 		 */
3256 		if (base_ni->nr_extents) {
3257 			/* Resize the memory buffer. */
3258 			tmp_nis = kvrealloc(tmp_nis, base_ni->nr_extents *
3259 					sizeof(struct ntfs_inode *), GFP_NOFS);
3260 			/* Ignore errors, they don't really matter. */
3261 			if (tmp_nis)
3262 				base_ni->ext.extent_ntfs_inos = tmp_nis;
3263 		} else if (tmp_nis) {
3264 			kvfree(tmp_nis);
3265 			base_ni->ext.extent_ntfs_inos = NULL;
3266 		}
3267 		break;
3268 	}
3269 
3270 out:
3271 	if (NInoDirty(ni))
3272 		ntfs_error(ni->vol->sb, "Releasing dirty inode %llu!\n",
3273 				ni->mft_no);
3274 	if (NInoAttrList(ni) && ni->attr_list)
3275 		kvfree(ni->attr_list);
3276 	ntfs_destroy_ext_inode(ni);
3277 	err = 0;
3278 	ntfs_debug("\n");
3279 	return err;
3280 }
3281 
3282 void ntfs_destroy_ext_inode(struct ntfs_inode *ni)
3283 {
3284 	ntfs_debug("Entering.");
3285 	if (ni == NULL)
3286 		return;
3287 
3288 	ntfs_attr_close(ni);
3289 
3290 	if (NInoDirty(ni))
3291 		ntfs_error(ni->vol->sb, "Releasing dirty ext inode %llu!\n",
3292 				ni->mft_no);
3293 	if (NInoAttrList(ni) && ni->attr_list)
3294 		kvfree(ni->attr_list);
3295 	kfree(ni->mrec);
3296 	kmem_cache_free(ntfs_inode_cache, ni);
3297 }
3298 
3299 static struct ntfs_inode *ntfs_inode_base(struct ntfs_inode *ni)
3300 {
3301 	if (ni->nr_extents == -1)
3302 		return ni->ext.base_ntfs_ino;
3303 	return ni;
3304 }
3305 
3306 static int ntfs_attr_position(__le32 type, struct ntfs_attr_search_ctx *ctx)
3307 {
3308 	int err;
3309 
3310 	err = ntfs_attr_lookup(type, NULL, 0, CASE_SENSITIVE, 0, NULL,
3311 				0, ctx);
3312 	if (err) {
3313 		__le32 atype;
3314 
3315 		if (err != -ENOENT)
3316 			return err;
3317 
3318 		atype = ctx->attr->type;
3319 		if (atype == AT_END)
3320 			return -ENOSPC;
3321 
3322 		/*
3323 		 * if ntfs_external_attr_lookup return -ENOENT, ctx->al_entry
3324 		 * could point to an attribute in an extent mft record, but
3325 		 * ctx->attr and ctx->ntfs_ino always points to an attibute in
3326 		 * a base mft record.
3327 		 */
3328 		if (ctx->al_entry &&
3329 		    MREF_LE(ctx->al_entry->mft_reference) != ctx->ntfs_ino->mft_no) {
3330 			ntfs_attr_reinit_search_ctx(ctx);
3331 			err = ntfs_attr_lookup(atype, NULL, 0, CASE_SENSITIVE, 0, NULL,
3332 					       0, ctx);
3333 			if (err)
3334 				return err;
3335 		}
3336 	}
3337 	return 0;
3338 }
3339 
3340 /*
3341  * ntfs_inode_free_space - free space in the MFT record of inode
3342  * @ni:		ntfs inode in which MFT record free space
3343  * @size:	amount of space needed to free
3344  *
3345  * Return 0 on success or error.
3346  */
3347 int ntfs_inode_free_space(struct ntfs_inode *ni, int size)
3348 {
3349 	struct ntfs_attr_search_ctx *ctx;
3350 	int freed, err;
3351 	struct mft_record *ni_mrec;
3352 	struct super_block *sb;
3353 
3354 	if (!ni || size < 0)
3355 		return -EINVAL;
3356 	ntfs_debug("Entering for inode %llu, size %d\n", ni->mft_no, size);
3357 
3358 	sb = ni->vol->sb;
3359 	ni_mrec = map_mft_record(ni);
3360 	if (IS_ERR(ni_mrec))
3361 		return -EIO;
3362 
3363 	freed = (le32_to_cpu(ni_mrec->bytes_allocated) -
3364 			le32_to_cpu(ni_mrec->bytes_in_use));
3365 
3366 	unmap_mft_record(ni);
3367 
3368 	if (size <= freed)
3369 		return 0;
3370 
3371 	ctx = ntfs_attr_get_search_ctx(ni, NULL);
3372 	if (!ctx) {
3373 		ntfs_error(sb, "%s, Failed to get search context", __func__);
3374 		return -ENOMEM;
3375 	}
3376 
3377 	/*
3378 	 * Chkdsk complain if $STANDARD_INFORMATION is not in the base MFT
3379 	 * record.
3380 	 *
3381 	 * Also we can't move $ATTRIBUTE_LIST from base MFT_RECORD, so position
3382 	 * search context on first attribute after $STANDARD_INFORMATION and
3383 	 * $ATTRIBUTE_LIST.
3384 	 *
3385 	 * Why we reposition instead of simply skip this attributes during
3386 	 * enumeration? Because in case we have got only in-memory attribute
3387 	 * list ntfs_attr_lookup will fail when it will try to find
3388 	 * $ATTRIBUTE_LIST.
3389 	 */
3390 	err = ntfs_attr_position(AT_FILE_NAME, ctx);
3391 	if (err)
3392 		goto put_err_out;
3393 
3394 	while (1) {
3395 		int record_size;
3396 
3397 		/*
3398 		 * Check whether attribute is from different MFT record. If so,
3399 		 * find next, because we don't need such.
3400 		 */
3401 		while (ctx->ntfs_ino->mft_no != ni->mft_no) {
3402 retry:
3403 			err = ntfs_attr_lookup(AT_UNUSED, NULL, 0, CASE_SENSITIVE,
3404 						0, NULL, 0, ctx);
3405 			if (err) {
3406 				if (err != -ENOENT)
3407 					ntfs_error(sb, "Attr lookup failed #2");
3408 				else if (ctx->attr->type == AT_END)
3409 					err = -ENOSPC;
3410 				else
3411 					err = 0;
3412 
3413 				if (err)
3414 					goto put_err_out;
3415 			}
3416 		}
3417 
3418 		if (ntfs_inode_base(ctx->ntfs_ino)->mft_no == FILE_MFT &&
3419 				ctx->attr->type == AT_DATA)
3420 			goto retry;
3421 
3422 		if (ctx->attr->type == AT_INDEX_ROOT)
3423 			goto retry;
3424 
3425 		record_size = le32_to_cpu(ctx->attr->length);
3426 
3427 		/* Move away attribute. */
3428 		err = ntfs_attr_record_move_away(ctx, 0);
3429 		if (err) {
3430 			ntfs_error(sb, "Failed to move out attribute #2");
3431 			break;
3432 		}
3433 		freed += record_size;
3434 
3435 		/* Check whether we done. */
3436 		if (size <= freed) {
3437 			ntfs_attr_put_search_ctx(ctx);
3438 			return 0;
3439 		}
3440 
3441 		/*
3442 		 * Reposition to first attribute after $STANDARD_INFORMATION and
3443 		 * $ATTRIBUTE_LIST (see comments upwards).
3444 		 */
3445 		ntfs_attr_reinit_search_ctx(ctx);
3446 		err = ntfs_attr_position(AT_FILE_NAME, ctx);
3447 		if (err)
3448 			break;
3449 	}
3450 put_err_out:
3451 	ntfs_attr_put_search_ctx(ctx);
3452 	if (err == -ENOSPC)
3453 		ntfs_debug("No attributes left that can be moved out.\n");
3454 	return err;
3455 }
3456 
3457 s64 ntfs_inode_attr_pread(struct inode *vi, s64 pos, s64 count, u8 *buf)
3458 {
3459 	struct address_space *mapping = vi->i_mapping;
3460 	struct folio *folio;
3461 	struct ntfs_inode *ni = NTFS_I(vi);
3462 	s64 isize;
3463 	u32 attr_len, total = 0, offset;
3464 	pgoff_t index;
3465 	int err = 0;
3466 
3467 	WARN_ON(!NInoAttr(ni));
3468 	if (!count)
3469 		return 0;
3470 
3471 	mutex_lock(&ni->mrec_lock);
3472 	isize = i_size_read(vi);
3473 	if (pos > isize) {
3474 		mutex_unlock(&ni->mrec_lock);
3475 		return -EINVAL;
3476 	}
3477 	if (pos + count > isize)
3478 		count = isize - pos;
3479 
3480 	if (!NInoNonResident(ni)) {
3481 		struct ntfs_attr_search_ctx *ctx;
3482 		u8 *attr;
3483 
3484 		ctx = ntfs_attr_get_search_ctx(ni->ext.base_ntfs_ino, NULL);
3485 		if (!ctx) {
3486 			ntfs_error(vi->i_sb, "Failed to get attr search ctx");
3487 			err = -ENOMEM;
3488 			mutex_unlock(&ni->mrec_lock);
3489 			goto out;
3490 		}
3491 
3492 		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE,
3493 				       0, NULL, 0, ctx);
3494 		if (err) {
3495 			ntfs_error(vi->i_sb, "Failed to look up attr %#x", ni->type);
3496 			ntfs_attr_put_search_ctx(ctx);
3497 			mutex_unlock(&ni->mrec_lock);
3498 			goto out;
3499 		}
3500 
3501 		attr = (u8 *)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset);
3502 		memcpy(buf, (u8 *)attr + pos, count);
3503 		ntfs_attr_put_search_ctx(ctx);
3504 		mutex_unlock(&ni->mrec_lock);
3505 		return count;
3506 	}
3507 	mutex_unlock(&ni->mrec_lock);
3508 
3509 	index = pos >> PAGE_SHIFT;
3510 	do {
3511 		/* Update @index and get the next folio. */
3512 		folio = read_mapping_folio(mapping, index, NULL);
3513 		if (IS_ERR(folio))
3514 			break;
3515 
3516 		offset = offset_in_folio(folio, pos);
3517 		attr_len = min_t(size_t, (size_t)count, folio_size(folio) - offset);
3518 
3519 		folio_lock(folio);
3520 		memcpy_from_folio(buf, folio, offset, attr_len);
3521 		folio_unlock(folio);
3522 		folio_put(folio);
3523 
3524 		total += attr_len;
3525 		buf += attr_len;
3526 		pos += attr_len;
3527 		count -= attr_len;
3528 		index++;
3529 	} while (count);
3530 out:
3531 	return err ? (s64)err : total;
3532 }
3533 
3534 static inline int ntfs_enlarge_attribute(struct inode *vi, s64 pos, s64 count,
3535 					 struct ntfs_attr_search_ctx *ctx)
3536 {
3537 	struct ntfs_inode *ni = NTFS_I(vi);
3538 	struct super_block *sb = vi->i_sb;
3539 	int ret;
3540 
3541 	if (pos + count <= ni->initialized_size)
3542 		return 0;
3543 
3544 	if (NInoEncrypted(ni) && NInoNonResident(ni))
3545 		return -EACCES;
3546 
3547 	if (NInoCompressed(ni))
3548 		return -EOPNOTSUPP;
3549 
3550 	if (pos + count > ni->data_size) {
3551 		if (ntfs_attr_truncate(ni, pos + count)) {
3552 			ntfs_debug("Failed to truncate attribute");
3553 			return -1;
3554 		}
3555 
3556 		ntfs_attr_reinit_search_ctx(ctx);
3557 		ret = ntfs_attr_lookup(ni->type,
3558 				       ni->name, ni->name_len, CASE_SENSITIVE,
3559 				       0, NULL, 0, ctx);
3560 		if (ret) {
3561 			ntfs_error(sb, "Failed to look up attr %#x", ni->type);
3562 			return ret;
3563 		}
3564 	}
3565 
3566 	if (!NInoNonResident(ni)) {
3567 		if (likely(i_size_read(vi) < ni->data_size))
3568 			i_size_write(vi, ni->data_size);
3569 		return 0;
3570 	}
3571 
3572 	if (pos + count > ni->initialized_size) {
3573 		ctx->attr->data.non_resident.initialized_size = cpu_to_le64(pos + count);
3574 		mark_mft_record_dirty(ctx->ntfs_ino);
3575 		ni->initialized_size = pos + count;
3576 		if (i_size_read(vi) < ni->initialized_size)
3577 			i_size_write(vi, ni->initialized_size);
3578 	}
3579 	return 0;
3580 }
3581 
3582 static s64 __ntfs_inode_resident_attr_pwrite(struct inode *vi,
3583 					     s64 pos, s64 count, u8 *buf,
3584 					     struct ntfs_attr_search_ctx *ctx)
3585 {
3586 	struct ntfs_inode *ni = NTFS_I(vi);
3587 	struct folio *folio;
3588 	struct address_space *mapping = vi->i_mapping;
3589 	u8 *addr;
3590 	int err = 0;
3591 
3592 	WARN_ON(NInoNonResident(ni));
3593 	if (pos + count > PAGE_SIZE) {
3594 		ntfs_error(vi->i_sb, "Out of write into resident attr %#x", ni->type);
3595 		return -EINVAL;
3596 	}
3597 
3598 	/* Copy to mft record page */
3599 	addr = (u8 *)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset);
3600 	memcpy(addr + pos, buf, count);
3601 	mark_mft_record_dirty(ctx->ntfs_ino);
3602 
3603 	/* Keep the first page clean and uptodate */
3604 	folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS,
3605 				   mapping_gfp_mask(mapping));
3606 	if (IS_ERR(folio)) {
3607 		err = PTR_ERR(folio);
3608 		ntfs_error(vi->i_sb, "Failed to read a page 0 for attr %#x: %d",
3609 			   ni->type, err);
3610 		goto out;
3611 	}
3612 	if (!folio_test_uptodate(folio))
3613 		folio_fill_tail(folio, 0, addr,
3614 				le32_to_cpu(ctx->attr->data.resident.value_length));
3615 	else
3616 		memcpy_to_folio(folio, offset_in_folio(folio, pos), buf, count);
3617 	folio_mark_uptodate(folio);
3618 	folio_unlock(folio);
3619 	folio_put(folio);
3620 out:
3621 	return err ? err : count;
3622 }
3623 
3624 static s64 __ntfs_inode_non_resident_attr_pwrite(struct inode *vi,
3625 						 s64 pos, s64 count, u8 *buf,
3626 						 struct ntfs_attr_search_ctx *ctx,
3627 						 bool sync)
3628 {
3629 	struct ntfs_inode *ni = NTFS_I(vi);
3630 	struct address_space *mapping = vi->i_mapping;
3631 	struct folio *folio;
3632 	pgoff_t index;
3633 	unsigned long offset, length;
3634 	size_t attr_len;
3635 	s64 ret = 0, written = 0;
3636 
3637 	WARN_ON(!NInoNonResident(ni));
3638 
3639 	index = pos >> PAGE_SHIFT;
3640 	while (count) {
3641 		if (count == PAGE_SIZE) {
3642 			folio = __filemap_get_folio(vi->i_mapping, index,
3643 					FGP_CREAT | FGP_LOCK,
3644 					mapping_gfp_mask(mapping));
3645 			if (IS_ERR(folio)) {
3646 				ret = -ENOMEM;
3647 				break;
3648 			}
3649 		} else {
3650 			folio = read_mapping_folio(mapping, index, NULL);
3651 			if (IS_ERR(folio)) {
3652 				ret = PTR_ERR(folio);
3653 				ntfs_error(vi->i_sb, "Failed to read a page %lu for attr %#x: %ld",
3654 						index, ni->type, PTR_ERR(folio));
3655 				break;
3656 			}
3657 
3658 			folio_lock(folio);
3659 		}
3660 
3661 		if (count == PAGE_SIZE) {
3662 			offset = 0;
3663 			attr_len = count;
3664 		} else {
3665 			offset = offset_in_folio(folio, pos);
3666 			attr_len = min_t(size_t, (size_t)count, folio_size(folio) - offset);
3667 		}
3668 		memcpy_to_folio(folio, offset, buf, attr_len);
3669 
3670 		if (sync) {
3671 			struct ntfs_volume *vol = ni->vol;
3672 			s64 lcn, lcn_count;
3673 			unsigned int lcn_folio_off = 0;
3674 			struct bio *bio;
3675 			u64 rl_length = 0;
3676 			s64 vcn;
3677 			struct runlist_element *rl;
3678 
3679 			lcn_count = max_t(s64, 1, ntfs_bytes_to_cluster(vol, attr_len));
3680 			vcn = ntfs_pidx_to_cluster(vol, folio->index);
3681 
3682 			do {
3683 				down_write(&ni->runlist.lock);
3684 				rl = ntfs_attr_vcn_to_rl(ni, vcn, &lcn);
3685 				if (IS_ERR(rl)) {
3686 					ret = PTR_ERR(rl);
3687 					up_write(&ni->runlist.lock);
3688 					goto err_unlock_folio;
3689 				}
3690 
3691 				rl_length = rl->length - (vcn - rl->vcn);
3692 				if (rl_length < lcn_count) {
3693 					lcn_count -= rl_length;
3694 				} else {
3695 					rl_length = lcn_count;
3696 					lcn_count = 0;
3697 				}
3698 				up_write(&ni->runlist.lock);
3699 
3700 				if (vol->cluster_size_bits > PAGE_SHIFT) {
3701 					lcn_folio_off = folio->index << PAGE_SHIFT;
3702 					lcn_folio_off &= vol->cluster_size_mask;
3703 				}
3704 
3705 				bio = bio_alloc(vol->sb->s_bdev, 1, REQ_OP_WRITE,
3706 						GFP_NOIO);
3707 				bio->bi_iter.bi_sector =
3708 					ntfs_bytes_to_sector(vol,
3709 							ntfs_cluster_to_bytes(vol, lcn) +
3710 							lcn_folio_off);
3711 
3712 				length = min_t(unsigned long,
3713 					       ntfs_cluster_to_bytes(vol, rl_length),
3714 					       folio_size(folio));
3715 				if (!bio_add_folio(bio, folio, length, offset)) {
3716 					ret = -EIO;
3717 					bio_put(bio);
3718 					goto err_unlock_folio;
3719 				}
3720 
3721 				submit_bio_wait(bio);
3722 				bio_put(bio);
3723 				vcn += rl_length;
3724 				offset += length;
3725 			} while (lcn_count != 0);
3726 
3727 			folio_mark_uptodate(folio);
3728 		} else {
3729 			folio_mark_uptodate(folio);
3730 			folio_mark_dirty(folio);
3731 		}
3732 err_unlock_folio:
3733 		folio_unlock(folio);
3734 		folio_put(folio);
3735 
3736 		if (ret)
3737 			break;
3738 
3739 		written += attr_len;
3740 		buf += attr_len;
3741 		pos += attr_len;
3742 		count -= attr_len;
3743 		index++;
3744 
3745 		cond_resched();
3746 	}
3747 
3748 	return ret ? ret : written;
3749 }
3750 
3751 s64 ntfs_inode_attr_pwrite(struct inode *vi, s64 pos, s64 count, u8 *buf, bool sync)
3752 {
3753 	struct ntfs_inode *ni = NTFS_I(vi);
3754 	struct ntfs_attr_search_ctx *ctx;
3755 	s64 ret;
3756 
3757 	WARN_ON(!NInoAttr(ni));
3758 
3759 	ctx = ntfs_attr_get_search_ctx(ni->ext.base_ntfs_ino, NULL);
3760 	if (!ctx) {
3761 		ntfs_error(vi->i_sb, "Failed to get attr search ctx");
3762 		return -ENOMEM;
3763 	}
3764 
3765 	ret = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE,
3766 			       0, NULL, 0, ctx);
3767 	if (ret) {
3768 		ntfs_attr_put_search_ctx(ctx);
3769 		ntfs_error(vi->i_sb, "Failed to look up attr %#x", ni->type);
3770 		return ret;
3771 	}
3772 
3773 	mutex_lock(&ni->mrec_lock);
3774 	ret = ntfs_enlarge_attribute(vi, pos, count, ctx);
3775 	mutex_unlock(&ni->mrec_lock);
3776 	if (ret)
3777 		goto out;
3778 
3779 	if (NInoNonResident(ni))
3780 		ret = __ntfs_inode_non_resident_attr_pwrite(vi, pos, count, buf, ctx, sync);
3781 	else
3782 		ret = __ntfs_inode_resident_attr_pwrite(vi, pos, count, buf, ctx);
3783 out:
3784 	ntfs_attr_put_search_ctx(ctx);
3785 	return ret;
3786 }
3787 
3788 struct folio *ntfs_get_locked_folio(struct address_space *mapping,
3789 		pgoff_t index, pgoff_t end_index, struct file_ra_state *ra)
3790 {
3791 	struct folio *folio;
3792 
3793 	folio = filemap_lock_folio(mapping, index);
3794 	if (IS_ERR(folio)) {
3795 		if (PTR_ERR(folio) != -ENOENT)
3796 			return folio;
3797 
3798 		page_cache_sync_readahead(mapping, ra, NULL, index,
3799 				end_index - index);
3800 		folio = read_mapping_folio(mapping, index, NULL);
3801 		if (!IS_ERR(folio))
3802 			folio_lock(folio);
3803 	}
3804 
3805 	return folio;
3806 }
3807