xref: /linux/fs/ntfs/super.c (revision d8f1df2e133f203cae3f458cba44efa327b093d9)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NTFS kernel super block handling.
4  *
5  * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
6  * Copyright (c) 2001,2002 Richard Russon
7  * Copyright (c) 2025 LG Electronics Co., Ltd.
8  */
9 
10 #include <linux/blkdev.h>	/* For bdev_logical_block_size(). */
11 #include <linux/backing-dev.h>
12 #include <linux/vfs.h>
13 #include <linux/fs_struct.h>
14 #include <linux/sched/mm.h>
15 #include <linux/fs_context.h>
16 #include <linux/fs_parser.h>
17 
18 #include "sysctl.h"
19 #include "logfile.h"
20 #include "index.h"
21 #include "ntfs.h"
22 #include "ea.h"
23 #include "volume.h"
24 
25 /* A global default upcase table and a corresponding reference count. */
26 static __le16 *default_upcase;
27 static unsigned long ntfs_nr_upcase_users;
28 
29 static struct workqueue_struct *ntfs_wq;
30 
31 /* Error constants/strings used in inode.c::ntfs_show_options(). */
32 enum {
33 	/* One of these must be present, default is ON_ERRORS_CONTINUE. */
34 	ON_ERRORS_PANIC = 0x01,
35 	ON_ERRORS_REMOUNT_RO = 0x02,
36 	ON_ERRORS_CONTINUE = 0x04,
37 };
38 
39 static const struct constant_table ntfs_param_enums[] = {
40 	{ "panic",		ON_ERRORS_PANIC },
41 	{ "remount-ro",		ON_ERRORS_REMOUNT_RO },
42 	{ "continue",		ON_ERRORS_CONTINUE },
43 	{}
44 };
45 
46 enum {
47 	NATIVE_SYMLINK_RAW,
48 	NATIVE_SYMLINK_REL,
49 };
50 
51 static const struct constant_table ntfs_native_symlink_enums[] = {
52 	{ "raw",		NATIVE_SYMLINK_RAW },
53 	{ "rel",		NATIVE_SYMLINK_REL },
54 	{}
55 };
56 
57 enum {
58 	SYMLINK_WSL,
59 	SYMLINK_NATIVE,
60 };
61 
62 static const struct constant_table ntfs_symlink_enums[] = {
63 	{ "wsl",		SYMLINK_WSL },
64 	{ "native",		SYMLINK_NATIVE },
65 	{}
66 };
67 
68 enum {
69 	Opt_uid,
70 	Opt_gid,
71 	Opt_umask,
72 	Opt_dmask,
73 	Opt_fmask,
74 	Opt_errors,
75 	Opt_nls,
76 	Opt_charset,
77 	Opt_show_sys_files,
78 	Opt_show_meta,
79 	Opt_case_sensitive,
80 	Opt_disable_sparse,
81 	Opt_sparse,
82 	Opt_mft_zone_multiplier,
83 	Opt_preallocated_size,
84 	Opt_sys_immutable,
85 	Opt_nohidden,
86 	Opt_hide_dot_files,
87 	Opt_check_windows_names,
88 	Opt_acl,
89 	Opt_discard,
90 	Opt_nocase,
91 	Opt_native_symlink,
92 	Opt_symlink,
93 };
94 
95 static const struct fs_parameter_spec ntfs_parameters[] = {
96 	fsparam_u32("uid",			Opt_uid),
97 	fsparam_u32("gid",			Opt_gid),
98 	fsparam_u32oct("umask",			Opt_umask),
99 	fsparam_u32oct("dmask",			Opt_dmask),
100 	fsparam_u32oct("fmask",			Opt_fmask),
101 	fsparam_string("nls",			Opt_nls),
102 	fsparam_string("iocharset",		Opt_charset),
103 	fsparam_enum("errors",			Opt_errors, ntfs_param_enums),
104 	fsparam_flag("show_sys_files",		Opt_show_sys_files),
105 	fsparam_flag("showmeta",		Opt_show_meta),
106 	fsparam_flag("case_sensitive",		Opt_case_sensitive),
107 	fsparam_flag("disable_sparse",		Opt_disable_sparse),
108 	fsparam_s32("mft_zone_multiplier",	Opt_mft_zone_multiplier),
109 	fsparam_u64("preallocated_size",	Opt_preallocated_size),
110 	fsparam_flag("sys_immutable",		Opt_sys_immutable),
111 	fsparam_flag("nohidden",		Opt_nohidden),
112 	fsparam_flag("hide_dot_files",		Opt_hide_dot_files),
113 	fsparam_flag("windows_names",		Opt_check_windows_names),
114 	fsparam_flag("acl",			Opt_acl),
115 	fsparam_flag("discard",			Opt_discard),
116 	fsparam_flag("sparse",			Opt_sparse),
117 	fsparam_flag("nocase",			Opt_nocase),
118 	fsparam_enum("native_symlink",		Opt_native_symlink, ntfs_native_symlink_enums),
119 	fsparam_enum("symlink",			Opt_symlink, ntfs_symlink_enums),
120 	{}
121 };
122 
123 static int ntfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
124 {
125 	struct ntfs_volume *vol = fc->s_fs_info;
126 	struct fs_parse_result result;
127 	int opt;
128 
129 	opt = fs_parse(fc, ntfs_parameters, param, &result);
130 	if (opt < 0)
131 		return opt;
132 
133 	switch (opt) {
134 	case Opt_uid:
135 		vol->uid = make_kuid(current_user_ns(), result.uint_32);
136 		break;
137 	case Opt_gid:
138 		vol->gid = make_kgid(current_user_ns(), result.uint_32);
139 		break;
140 	case Opt_umask:
141 		vol->fmask = vol->dmask = result.uint_32;
142 		break;
143 	case Opt_dmask:
144 		vol->dmask = result.uint_32;
145 		break;
146 	case Opt_fmask:
147 		vol->fmask = result.uint_32;
148 		break;
149 	case Opt_errors:
150 		vol->on_errors = result.uint_32;
151 		break;
152 	case Opt_nls:
153 	case Opt_charset:
154 		if (vol->nls_map)
155 			unload_nls(vol->nls_map);
156 		vol->nls_map = load_nls(param->string);
157 		if (!vol->nls_map) {
158 			ntfs_error(vol->sb, "Failed to load NLS table '%s'.",
159 				   param->string);
160 			return -EINVAL;
161 		}
162 		break;
163 	case Opt_mft_zone_multiplier:
164 		if (vol->mft_zone_multiplier && vol->mft_zone_multiplier !=
165 				result.int_32) {
166 			ntfs_error(vol->sb, "Cannot change mft_zone_multiplier on remount.");
167 			return -EINVAL;
168 		}
169 		if (result.int_32 < 1 || result.int_32 > 4) {
170 			ntfs_error(vol->sb,
171 				"Invalid mft_zone_multiplier. Using default value, i.e. 1.");
172 			vol->mft_zone_multiplier = 1;
173 		} else
174 			vol->mft_zone_multiplier = result.int_32;
175 		break;
176 	case Opt_show_sys_files:
177 	case Opt_show_meta:
178 		if (result.boolean)
179 			NVolSetShowSystemFiles(vol);
180 		else
181 			NVolClearShowSystemFiles(vol);
182 		break;
183 	case Opt_case_sensitive:
184 		if (result.boolean)
185 			NVolSetCaseSensitive(vol);
186 		else
187 			NVolClearCaseSensitive(vol);
188 		break;
189 	case Opt_nocase:
190 		if (result.boolean)
191 			NVolClearCaseSensitive(vol);
192 		else
193 			NVolSetCaseSensitive(vol);
194 		break;
195 	case Opt_preallocated_size:
196 		vol->preallocated_size = (loff_t)result.uint_64;
197 		break;
198 	case Opt_sys_immutable:
199 		if (result.boolean)
200 			NVolSetSysImmutable(vol);
201 		else
202 			NVolClearSysImmutable(vol);
203 		break;
204 	case Opt_nohidden:
205 		if (result.boolean)
206 			NVolClearShowHiddenFiles(vol);
207 		else
208 			NVolSetShowHiddenFiles(vol);
209 		break;
210 	case Opt_hide_dot_files:
211 		if (result.boolean)
212 			NVolSetHideDotFiles(vol);
213 		else
214 			NVolClearHideDotFiles(vol);
215 		break;
216 	case Opt_check_windows_names:
217 		if (result.boolean)
218 			NVolSetCheckWindowsNames(vol);
219 		else
220 			NVolClearCheckWindowsNames(vol);
221 		break;
222 	case Opt_acl:
223 #ifdef CONFIG_NTFS_FS_POSIX_ACL
224 		if (result.boolean)
225 			fc->sb_flags |= SB_POSIXACL;
226 		else
227 			fc->sb_flags &= ~SB_POSIXACL;
228 		break;
229 #else
230 		return -EINVAL;
231 #endif
232 	case Opt_discard:
233 		if (result.boolean)
234 			NVolSetDiscard(vol);
235 		else
236 			NVolClearDiscard(vol);
237 		break;
238 	case Opt_disable_sparse:
239 		if (result.boolean)
240 			NVolSetDisableSparse(vol);
241 		else
242 			NVolClearDisableSparse(vol);
243 		break;
244 	case Opt_native_symlink:
245 		if (result.uint_32 == NATIVE_SYMLINK_REL)
246 			NVolSetNativeSymlinkRel(vol);
247 		else
248 			NVolClearNativeSymlinkRel(vol);
249 		break;
250 	case Opt_symlink:
251 		if (result.uint_32 == SYMLINK_NATIVE)
252 			NVolSetSymlinkNative(vol);
253 		else
254 			NVolClearSymlinkNative(vol);
255 		break;
256 	case Opt_sparse:
257 		break;
258 	default:
259 		return -EINVAL;
260 	}
261 
262 	return 0;
263 }
264 
265 static int ntfs_reconfigure(struct fs_context *fc)
266 {
267 	struct super_block *sb = fc->root->d_sb;
268 	struct ntfs_volume *vol = NTFS_SB(sb);
269 
270 	ntfs_debug("Entering with remount");
271 
272 	sync_filesystem(sb);
273 
274 	/*
275 	 * For the read-write compiled driver, if we are remounting read-write,
276 	 * make sure there are no volume errors and that no unsupported volume
277 	 * flags are set.  Also, empty the logfile journal as it would become
278 	 * stale as soon as something is written to the volume and mark the
279 	 * volume dirty so that chkdsk is run if the volume is not umounted
280 	 * cleanly.
281 	 *
282 	 * When remounting read-only, mark the volume clean if no volume errors
283 	 * have occurred.
284 	 */
285 	if (sb_rdonly(sb) && !(fc->sb_flags & SB_RDONLY)) {
286 		static const char *es = ".  Cannot remount read-write.";
287 
288 		/* Remounting read-write. */
289 		if (NVolErrors(vol)) {
290 			ntfs_error(sb, "Volume has errors and is read-only%s",
291 					es);
292 			return -EROFS;
293 		}
294 		if (vol->vol_flags & VOLUME_IS_DIRTY) {
295 			ntfs_error(sb, "Volume is dirty and read-only%s", es);
296 			return -EROFS;
297 		}
298 		if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) {
299 			ntfs_error(sb, "Volume has been modified by chkdsk and is read-only%s", es);
300 			return -EROFS;
301 		}
302 		if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
303 			ntfs_error(sb, "Volume has unsupported flags set (0x%x) and is read-only%s",
304 					le16_to_cpu(vol->vol_flags), es);
305 			return -EROFS;
306 		}
307 		if (vol->logfile_ino && !ntfs_empty_logfile(vol->logfile_ino)) {
308 			ntfs_error(sb, "Failed to empty journal LogFile%s",
309 					es);
310 			NVolSetErrors(vol);
311 			return -EROFS;
312 		}
313 	} else if (!sb_rdonly(sb) && (fc->sb_flags & SB_RDONLY)) {
314 		/* Remounting read-only. */
315 		if (!NVolErrors(vol)) {
316 			if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY))
317 				ntfs_warning(sb,
318 					"Failed to clear dirty bit in volume information flags.  Run chkdsk.");
319 		}
320 	}
321 
322 	ntfs_debug("Done.");
323 	return 0;
324 }
325 
326 const struct option_t on_errors_arr[] = {
327 	{ ON_ERRORS_PANIC,	"panic" },
328 	{ ON_ERRORS_REMOUNT_RO,	"remount-ro", },
329 	{ ON_ERRORS_CONTINUE,	"continue", },
330 	{ 0,			NULL }
331 };
332 
333 void ntfs_handle_error(struct super_block *sb)
334 {
335 	struct ntfs_volume *vol = NTFS_SB(sb);
336 
337 	if (sb_rdonly(sb))
338 		return;
339 
340 	if (vol->on_errors == ON_ERRORS_REMOUNT_RO) {
341 		sb->s_flags |= SB_RDONLY;
342 		pr_crit("(device %s): Filesystem has been set read-only\n",
343 			sb->s_id);
344 	} else if (vol->on_errors == ON_ERRORS_PANIC) {
345 		panic("ntfs: (device %s): panic from previous error\n",
346 		      sb->s_id);
347 	} else if (vol->on_errors == ON_ERRORS_CONTINUE) {
348 		if (errseq_check(&sb->s_wb_err, vol->wb_err) == -ENODEV) {
349 			NVolSetShutdown(vol);
350 			vol->wb_err = sb->s_wb_err;
351 		}
352 	}
353 }
354 
355 /*
356  * ntfs_write_volume_flags - write new flags to the volume information flags
357  * @vol:	ntfs volume on which to modify the flags
358  * @flags:	new flags value for the volume information flags
359  *
360  * Internal function.  You probably want to use ntfs_{set,clear}_volume_flags()
361  * instead (see below).
362  *
363  * Replace the volume information flags on the volume @vol with the value
364  * supplied in @flags.  Note, this overwrites the volume information flags, so
365  * make sure to combine the flags you want to modify with the old flags and use
366  * the result when calling ntfs_write_volume_flags().
367  *
368  * Return 0 on success and -errno on error.
369  */
370 static int ntfs_write_volume_flags(struct ntfs_volume *vol, const __le16 flags)
371 {
372 	struct ntfs_inode *ni = NTFS_I(vol->vol_ino);
373 	struct volume_information *vi;
374 	struct ntfs_attr_search_ctx *ctx;
375 	int err;
376 
377 	ntfs_debug("Entering, old flags = 0x%x, new flags = 0x%x.",
378 			le16_to_cpu(vol->vol_flags), le16_to_cpu(flags));
379 	mutex_lock(&ni->mrec_lock);
380 	if (vol->vol_flags == flags)
381 		goto done;
382 
383 	ctx = ntfs_attr_get_search_ctx(ni, NULL);
384 	if (!ctx) {
385 		err = -ENOMEM;
386 		goto put_unm_err_out;
387 	}
388 
389 	err = ntfs_attr_lookup(AT_VOLUME_INFORMATION, NULL, 0, 0, 0, NULL, 0,
390 			ctx);
391 	if (err)
392 		goto put_unm_err_out;
393 
394 	vi = (struct volume_information *)((u8 *)ctx->attr +
395 			le16_to_cpu(ctx->attr->data.resident.value_offset));
396 	vol->vol_flags = vi->flags = flags;
397 	mark_mft_record_dirty(ctx->ntfs_ino);
398 	ntfs_attr_put_search_ctx(ctx);
399 done:
400 	mutex_unlock(&ni->mrec_lock);
401 	ntfs_debug("Done.");
402 	return 0;
403 put_unm_err_out:
404 	if (ctx)
405 		ntfs_attr_put_search_ctx(ctx);
406 	mutex_unlock(&ni->mrec_lock);
407 	ntfs_error(vol->sb, "Failed with error code %i.", -err);
408 	return err;
409 }
410 
411 /*
412  * ntfs_set_volume_flags - set bits in the volume information flags
413  * @vol:	ntfs volume on which to modify the flags
414  * @flags:	flags to set on the volume
415  *
416  * Set the bits in @flags in the volume information flags on the volume @vol.
417  *
418  * Return 0 on success and -errno on error.
419  */
420 int ntfs_set_volume_flags(struct ntfs_volume *vol, __le16 flags)
421 {
422 	flags &= VOLUME_FLAGS_MASK;
423 	return ntfs_write_volume_flags(vol, vol->vol_flags | flags);
424 }
425 
426 /*
427  * ntfs_clear_volume_flags - clear bits in the volume information flags
428  * @vol:	ntfs volume on which to modify the flags
429  * @flags:	flags to clear on the volume
430  *
431  * Clear the bits in @flags in the volume information flags on the volume @vol.
432  *
433  * Return 0 on success and -errno on error.
434  */
435 int ntfs_clear_volume_flags(struct ntfs_volume *vol, __le16 flags)
436 {
437 	flags &= VOLUME_FLAGS_MASK;
438 	flags = vol->vol_flags & cpu_to_le16(~le16_to_cpu(flags));
439 	return ntfs_write_volume_flags(vol, flags);
440 }
441 
442 int ntfs_write_volume_label(struct ntfs_volume *vol, char *label)
443 {
444 	struct ntfs_inode *vol_ni = NTFS_I(vol->vol_ino);
445 	struct ntfs_attr_search_ctx *ctx;
446 	char *new_label;
447 	__le16 *uname;
448 	int uname_len, ret;
449 
450 	uname_len = ntfs_nlstoucs(vol, label, strlen(label),
451 				  &uname, FSLABEL_MAX);
452 	if (uname_len < 0) {
453 		ntfs_error(vol->sb,
454 			"Failed to convert volume label '%s' to Unicode.",
455 			label);
456 		return uname_len;
457 	}
458 
459 	if (uname_len > NTFS_MAX_LABEL_LEN) {
460 		ntfs_error(vol->sb,
461 			   "Volume label is too long (max %d characters).",
462 			   NTFS_MAX_LABEL_LEN);
463 		kvfree(uname);
464 		return -EINVAL;
465 	}
466 
467 	/*
468 	 * Allocate the in-memory label copy up front. If kstrdup() fails we
469 	 * bail out before touching on-disk metadata, so the in-memory label
470 	 * and the on-disk label stay in sync.
471 	 */
472 	new_label = kstrdup(label, GFP_KERNEL);
473 	if (!new_label) {
474 		kvfree(uname);
475 		return -ENOMEM;
476 	}
477 
478 	mutex_lock(&vol_ni->mrec_lock);
479 	ctx = ntfs_attr_get_search_ctx(vol_ni, NULL);
480 	if (!ctx) {
481 		ret = -ENOMEM;
482 		goto out;
483 	}
484 
485 	ret = ntfs_attr_lookup(AT_VOLUME_NAME, NULL, 0, 0, 0, NULL, 0,
486 			       ctx);
487 	if (!ret)
488 		ret = ntfs_attr_record_rm(ctx);
489 	else if (ret == -ENOENT)
490 		ret = 0;
491 	ntfs_attr_put_search_ctx(ctx);
492 	if (ret)
493 		goto out;
494 
495 	ret = ntfs_resident_attr_record_add(vol_ni, AT_VOLUME_NAME, AT_UNNAMED, 0,
496 					    (u8 *)uname, uname_len * sizeof(__le16), 0);
497 out:
498 	if (ret >= 0) {
499 		char *old_label;
500 
501 		mutex_lock(&vol->volume_label_lock);
502 		old_label = vol->volume_label;
503 		vol->volume_label = new_label;
504 		mutex_unlock(&vol->volume_label_lock);
505 
506 		kfree(old_label);
507 		mark_inode_dirty_sync(vol->vol_ino);
508 		ret = 0;
509 	}
510 	mutex_unlock(&vol_ni->mrec_lock);
511 	kvfree(uname);
512 
513 	if (ret < 0)
514 		kfree(new_label);
515 	return ret;
516 }
517 
518 /*
519  * is_boot_sector_ntfs - check whether a boot sector is a valid NTFS boot sector
520  * @sb:		Super block of the device to which @b belongs.
521  * @b:		Boot sector of device @sb to check.
522  * @silent:	If 'true', all output will be silenced.
523  *
524  * is_boot_sector_ntfs() checks whether the boot sector @b is a valid NTFS boot
525  * sector. Returns 'true' if it is valid and 'false' if not.
526  *
527  * @sb is only needed for warning/error output, i.e. it can be NULL when silent
528  * is 'true'.
529  */
530 static bool is_boot_sector_ntfs(const struct super_block *sb,
531 		const struct ntfs_boot_sector *b, const bool silent)
532 {
533 	/*
534 	 * Check that checksum == sum of u32 values from b to the checksum
535 	 * field.  If checksum is zero, no checking is done.  We will work when
536 	 * the checksum test fails, since some utilities update the boot sector
537 	 * ignoring the checksum which leaves the checksum out-of-date.  We
538 	 * report a warning if this is the case.
539 	 */
540 	if ((void *)b < (void *)&b->checksum && b->checksum && !silent) {
541 		__le32 *u;
542 		u32 i;
543 
544 		for (i = 0, u = (__le32 *)b; u < (__le32 *)(&b->checksum); ++u)
545 			i += le32_to_cpup(u);
546 		if (le32_to_cpu(b->checksum) != i)
547 			ntfs_warning(sb, "Invalid boot sector checksum.");
548 	}
549 	/* Check OEMidentifier is "NTFS    " */
550 	if (b->oem_id != magicNTFS)
551 		goto not_ntfs;
552 	/* Check bytes per sector value is between 256 and 4096. */
553 	if (le16_to_cpu(b->bpb.bytes_per_sector) < 0x100 ||
554 	    le16_to_cpu(b->bpb.bytes_per_sector) > 0x1000)
555 		goto not_ntfs;
556 	/*
557 	 * Check sectors per cluster value is valid and the cluster size
558 	 * is not above the maximum (2MB).
559 	 */
560 	if (b->bpb.sectors_per_cluster > 0x80 &&
561 	    b->bpb.sectors_per_cluster < 0xf4)
562 		goto not_ntfs;
563 
564 	/* Check reserved/unused fields are really zero. */
565 	if (le16_to_cpu(b->bpb.reserved_sectors) ||
566 			le16_to_cpu(b->bpb.root_entries) ||
567 			le16_to_cpu(b->bpb.sectors) ||
568 			le16_to_cpu(b->bpb.sectors_per_fat) ||
569 			le32_to_cpu(b->bpb.large_sectors) || b->bpb.fats)
570 		goto not_ntfs;
571 	/* Check clusters per file mft record value is valid. */
572 	if ((u8)b->clusters_per_mft_record < 0xe1 ||
573 			(u8)b->clusters_per_mft_record > 0xf7)
574 		switch (b->clusters_per_mft_record) {
575 		case 1: case 2: case 4: case 8: case 16: case 32: case 64:
576 			break;
577 		default:
578 			goto not_ntfs;
579 		}
580 	/* Check clusters per index block value is valid. */
581 	if ((u8)b->clusters_per_index_record < 0xe1 ||
582 			(u8)b->clusters_per_index_record > 0xf7)
583 		switch (b->clusters_per_index_record) {
584 		case 1: case 2: case 4: case 8: case 16: case 32: case 64:
585 			break;
586 		default:
587 			goto not_ntfs;
588 		}
589 	/*
590 	 * Check for valid end of sector marker. We will work without it, but
591 	 * many BIOSes will refuse to boot from a bootsector if the magic is
592 	 * incorrect, so we emit a warning.
593 	 */
594 	if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55))
595 		ntfs_warning(sb, "Invalid end of sector marker.");
596 	return true;
597 not_ntfs:
598 	return false;
599 }
600 
601 /*
602  * read_ntfs_boot_sector - read the NTFS boot sector of a device
603  * @sb:		super block of device to read the boot sector from
604  * @silent:	if true, suppress all output
605  *
606  * Reads the boot sector from the device and validates it.
607  */
608 static char *read_ntfs_boot_sector(struct super_block *sb,
609 		const int silent)
610 {
611 	char *boot_sector;
612 
613 	boot_sector = kzalloc(PAGE_SIZE, GFP_NOFS);
614 	if (!boot_sector)
615 		return NULL;
616 
617 	if (ntfs_bdev_read(sb->s_bdev, boot_sector, 0, PAGE_SIZE)) {
618 		if (!silent)
619 			ntfs_error(sb, "Unable to read primary boot sector.");
620 		kfree(boot_sector);
621 		return NULL;
622 	}
623 
624 	if (!is_boot_sector_ntfs(sb, (struct ntfs_boot_sector *)boot_sector,
625 				 silent)) {
626 		if (!silent)
627 			ntfs_error(sb, "Primary boot sector is invalid.");
628 		kfree(boot_sector);
629 		return NULL;
630 	}
631 
632 	return boot_sector;
633 }
634 
635 /*
636  * parse_ntfs_boot_sector - parse the boot sector and store the data in @vol
637  * @vol:	volume structure to initialise with data from boot sector
638  * @b:		boot sector to parse
639  *
640  * Parse the ntfs boot sector @b and store all imporant information therein in
641  * the ntfs super block @vol.  Return 'true' on success and 'false' on error.
642  */
643 static bool parse_ntfs_boot_sector(struct ntfs_volume *vol,
644 		const struct ntfs_boot_sector *b)
645 {
646 	unsigned int sectors_per_cluster, sectors_per_cluster_bits, nr_hidden_sects;
647 	int clusters_per_mft_record, clusters_per_index_record;
648 	s64 ll;
649 
650 	vol->sector_size = le16_to_cpu(b->bpb.bytes_per_sector);
651 	vol->sector_size_bits = ffs(vol->sector_size) - 1;
652 	ntfs_debug("vol->sector_size = %i (0x%x)", vol->sector_size,
653 			vol->sector_size);
654 	ntfs_debug("vol->sector_size_bits = %i (0x%x)", vol->sector_size_bits,
655 			vol->sector_size_bits);
656 	if (vol->sector_size < vol->sb->s_blocksize) {
657 		ntfs_error(vol->sb,
658 			"Sector size (%i) is smaller than the device block size (%lu).  This is not supported.",
659 			vol->sector_size, vol->sb->s_blocksize);
660 		return false;
661 	}
662 
663 	if (b->bpb.sectors_per_cluster >= 0xf4)
664 		sectors_per_cluster = 1U << -(s8)b->bpb.sectors_per_cluster;
665 	else
666 		sectors_per_cluster = b->bpb.sectors_per_cluster;
667 	ntfs_debug("sectors_per_cluster = 0x%x", b->bpb.sectors_per_cluster);
668 	sectors_per_cluster_bits = ffs(sectors_per_cluster) - 1;
669 	ntfs_debug("sectors_per_cluster_bits = 0x%x",
670 			sectors_per_cluster_bits);
671 	nr_hidden_sects = le32_to_cpu(b->bpb.hidden_sectors);
672 	ntfs_debug("number of hidden sectors = 0x%x", nr_hidden_sects);
673 	vol->cluster_size = vol->sector_size << sectors_per_cluster_bits;
674 	vol->cluster_size_mask = vol->cluster_size - 1;
675 	vol->cluster_size_bits = ffs(vol->cluster_size) - 1;
676 	ntfs_debug("vol->cluster_size = %i (0x%x)", vol->cluster_size,
677 			vol->cluster_size);
678 	ntfs_debug("vol->cluster_size_mask = 0x%x", vol->cluster_size_mask);
679 	ntfs_debug("vol->cluster_size_bits = %i", vol->cluster_size_bits);
680 	if (vol->cluster_size < vol->sector_size) {
681 		ntfs_error(vol->sb,
682 			"Cluster size (%i) is smaller than the sector size (%i).  This is not supported.",
683 			vol->cluster_size, vol->sector_size);
684 		return false;
685 	}
686 	clusters_per_mft_record = b->clusters_per_mft_record;
687 	ntfs_debug("clusters_per_mft_record = %i (0x%x)",
688 			clusters_per_mft_record, clusters_per_mft_record);
689 	if (clusters_per_mft_record > 0)
690 		vol->mft_record_size = vol->cluster_size <<
691 				(ffs(clusters_per_mft_record) - 1);
692 	else
693 		/*
694 		 * When mft_record_size < cluster_size, clusters_per_mft_record
695 		 * = -log2(mft_record_size) bytes. mft_record_size normaly is
696 		 * 1024 bytes, which is encoded as 0xF6 (-10 in decimal).
697 		 */
698 		vol->mft_record_size = 1 << -clusters_per_mft_record;
699 	vol->mft_record_size_mask = vol->mft_record_size - 1;
700 	vol->mft_record_size_bits = ffs(vol->mft_record_size) - 1;
701 	ntfs_debug("vol->mft_record_size = %i (0x%x)", vol->mft_record_size,
702 			vol->mft_record_size);
703 	ntfs_debug("vol->mft_record_size_mask = 0x%x",
704 			vol->mft_record_size_mask);
705 	ntfs_debug("vol->mft_record_size_bits = %i (0x%x)",
706 			vol->mft_record_size_bits, vol->mft_record_size_bits);
707 	/*
708 	 * We cannot support mft record sizes above the PAGE_SIZE since
709 	 * we store $MFT/$DATA, the table of mft records in the page cache.
710 	 */
711 	if (vol->mft_record_size > PAGE_SIZE) {
712 		ntfs_error(vol->sb,
713 			"Mft record size (%i) exceeds the PAGE_SIZE on your system (%lu).  This is not supported.",
714 			vol->mft_record_size, PAGE_SIZE);
715 		return false;
716 	}
717 	/* We cannot support mft record sizes below the sector size. */
718 	if (vol->mft_record_size < vol->sector_size) {
719 		ntfs_warning(vol->sb, "Mft record size (%i) is smaller than the sector size (%i).",
720 				vol->mft_record_size, vol->sector_size);
721 	}
722 	clusters_per_index_record = b->clusters_per_index_record;
723 	ntfs_debug("clusters_per_index_record = %i (0x%x)",
724 			clusters_per_index_record, clusters_per_index_record);
725 	if (clusters_per_index_record > 0)
726 		vol->index_record_size = vol->cluster_size <<
727 				(ffs(clusters_per_index_record) - 1);
728 	else
729 		/*
730 		 * When index_record_size < cluster_size,
731 		 * clusters_per_index_record = -log2(index_record_size) bytes.
732 		 * index_record_size normaly equals 4096 bytes, which is
733 		 * encoded as 0xF4 (-12 in decimal).
734 		 */
735 		vol->index_record_size = 1 << -clusters_per_index_record;
736 	vol->index_record_size_mask = vol->index_record_size - 1;
737 	vol->index_record_size_bits = ffs(vol->index_record_size) - 1;
738 	ntfs_debug("vol->index_record_size = %i (0x%x)",
739 			vol->index_record_size, vol->index_record_size);
740 	ntfs_debug("vol->index_record_size_mask = 0x%x",
741 			vol->index_record_size_mask);
742 	ntfs_debug("vol->index_record_size_bits = %i (0x%x)",
743 			vol->index_record_size_bits,
744 			vol->index_record_size_bits);
745 	/* We cannot support index record sizes below the sector size. */
746 	if (vol->index_record_size < vol->sector_size) {
747 		ntfs_error(vol->sb,
748 			   "Index record size (%i) is smaller than the sector size (%i).  This is not supported.",
749 			   vol->index_record_size, vol->sector_size);
750 		return false;
751 	}
752 	/*
753 	 * Get the size of the volume in clusters and check for 64-bit-ness.
754 	 * Windows currently only uses 32 bits to save the clusters so we do
755 	 * the same as it is much faster on 32-bit CPUs.
756 	 */
757 	ll = le64_to_cpu(b->number_of_sectors) >> sectors_per_cluster_bits;
758 	if ((u64)ll >= 1ULL << 32) {
759 		ntfs_error(vol->sb, "Cannot handle 64-bit clusters.");
760 		return false;
761 	}
762 	vol->nr_clusters = ll;
763 	ntfs_debug("vol->nr_clusters = 0x%llx", vol->nr_clusters);
764 	ll = le64_to_cpu(b->mft_lcn);
765 	if (ll >= vol->nr_clusters) {
766 		ntfs_error(vol->sb, "MFT LCN (%lli, 0x%llx) is beyond end of volume.  Weird.",
767 				ll, ll);
768 		return false;
769 	}
770 	vol->mft_lcn = ll;
771 	ntfs_debug("vol->mft_lcn = 0x%llx", vol->mft_lcn);
772 	ll = le64_to_cpu(b->mftmirr_lcn);
773 	if (ll >= vol->nr_clusters) {
774 		ntfs_error(vol->sb, "MFTMirr LCN (%lli, 0x%llx) is beyond end of volume.  Weird.",
775 				ll, ll);
776 		return false;
777 	}
778 	vol->mftmirr_lcn = ll;
779 	ntfs_debug("vol->mftmirr_lcn = 0x%llx", vol->mftmirr_lcn);
780 	/*
781 	 * Work out the size of the mft mirror in number of mft records. If the
782 	 * cluster size is less than or equal to the size taken by four mft
783 	 * records, the mft mirror stores the first four mft records. If the
784 	 * cluster size is bigger than the size taken by four mft records, the
785 	 * mft mirror contains as many mft records as will fit into one
786 	 * cluster.
787 	 */
788 	if (vol->cluster_size <= (4 << vol->mft_record_size_bits))
789 		vol->mftmirr_size = 4;
790 	else
791 		vol->mftmirr_size = vol->cluster_size >>
792 				vol->mft_record_size_bits;
793 	ntfs_debug("vol->mftmirr_size = %i", vol->mftmirr_size);
794 	vol->serial_no = le64_to_cpu(b->volume_serial_number);
795 	ntfs_debug("vol->serial_no = 0x%llx", vol->serial_no);
796 
797 	vol->sparse_compression_unit = 4;
798 	if (vol->cluster_size > 4096) {
799 		switch (vol->cluster_size) {
800 		case 65536:
801 			vol->sparse_compression_unit = 0;
802 			break;
803 		case 32768:
804 			vol->sparse_compression_unit = 1;
805 			break;
806 		case 16384:
807 			vol->sparse_compression_unit = 2;
808 			break;
809 		case 8192:
810 			vol->sparse_compression_unit = 3;
811 			break;
812 		}
813 	}
814 
815 	return true;
816 }
817 
818 /*
819  * ntfs_setup_allocators - initialize the cluster and mft allocators
820  * @vol:	volume structure for which to setup the allocators
821  *
822  * Setup the cluster (lcn) and mft allocators to the starting values.
823  */
824 static void ntfs_setup_allocators(struct ntfs_volume *vol)
825 {
826 	s64 mft_zone_size, mft_lcn;
827 
828 	ntfs_debug("vol->mft_zone_multiplier = 0x%x",
829 			vol->mft_zone_multiplier);
830 	/* Determine the size of the MFT zone. */
831 	mft_zone_size = vol->nr_clusters;
832 	switch (vol->mft_zone_multiplier) {  /* % of volume size in clusters */
833 	case 4:
834 		mft_zone_size >>= 1;			/* 50%   */
835 		break;
836 	case 3:
837 		mft_zone_size = (mft_zone_size +
838 				(mft_zone_size >> 1)) >> 2;	/* 37.5% */
839 		break;
840 	case 2:
841 		mft_zone_size >>= 2;			/* 25%   */
842 		break;
843 	/* case 1: */
844 	default:
845 		mft_zone_size >>= 3;			/* 12.5% */
846 		break;
847 	}
848 	/* Setup the mft zone. */
849 	vol->mft_zone_start = vol->mft_zone_pos = vol->mft_lcn;
850 	ntfs_debug("vol->mft_zone_pos = 0x%llx", vol->mft_zone_pos);
851 	/*
852 	 * Calculate the mft_lcn for an unmodified NTFS volume (see mkntfs
853 	 * source) and if the actual mft_lcn is in the expected place or even
854 	 * further to the front of the volume, extend the mft_zone to cover the
855 	 * beginning of the volume as well.  This is in order to protect the
856 	 * area reserved for the mft bitmap as well within the mft_zone itself.
857 	 * On non-standard volumes we do not protect it as the overhead would
858 	 * be higher than the speed increase we would get by doing it.
859 	 */
860 	mft_lcn = NTFS_B_TO_CLU(vol, 8192 + 2 * vol->cluster_size - 1);
861 	if (mft_lcn * vol->cluster_size < 16 * 1024)
862 		mft_lcn = (16 * 1024 + vol->cluster_size - 1) >>
863 				vol->cluster_size_bits;
864 	if (vol->mft_zone_start <= mft_lcn)
865 		vol->mft_zone_start = 0;
866 	ntfs_debug("vol->mft_zone_start = 0x%llx", vol->mft_zone_start);
867 	/*
868 	 * Need to cap the mft zone on non-standard volumes so that it does
869 	 * not point outside the boundaries of the volume.  We do this by
870 	 * halving the zone size until we are inside the volume.
871 	 */
872 	vol->mft_zone_end = vol->mft_lcn + mft_zone_size;
873 	while (vol->mft_zone_end >= vol->nr_clusters) {
874 		mft_zone_size >>= 1;
875 		vol->mft_zone_end = vol->mft_lcn + mft_zone_size;
876 	}
877 	ntfs_debug("vol->mft_zone_end = 0x%llx", vol->mft_zone_end);
878 	/*
879 	 * Set the current position within each data zone to the start of the
880 	 * respective zone.
881 	 */
882 	vol->data1_zone_pos = vol->mft_zone_end;
883 	ntfs_debug("vol->data1_zone_pos = 0x%llx", vol->data1_zone_pos);
884 	vol->data2_zone_pos = 0;
885 	ntfs_debug("vol->data2_zone_pos = 0x%llx", vol->data2_zone_pos);
886 
887 	/* Set the mft data allocation position to mft record 24. */
888 	vol->mft_data_pos = 24;
889 	ntfs_debug("vol->mft_data_pos = 0x%llx", vol->mft_data_pos);
890 }
891 
892 static struct lock_class_key mftmirr_runlist_lock_key,
893 			     mftmirr_mrec_lock_key;
894 /*
895  * load_and_init_mft_mirror - load and setup the mft mirror inode for a volume
896  * @vol:	ntfs super block describing device whose mft mirror to load
897  *
898  * Return 'true' on success or 'false' on error.
899  */
900 static bool load_and_init_mft_mirror(struct ntfs_volume *vol)
901 {
902 	struct inode *tmp_ino;
903 	struct ntfs_inode *tmp_ni;
904 
905 	ntfs_debug("Entering.");
906 	/* Get mft mirror inode. */
907 	tmp_ino = ntfs_iget(vol->sb, FILE_MFTMirr);
908 	if (IS_ERR(tmp_ino)) {
909 		if (!IS_ERR(tmp_ino))
910 			iput(tmp_ino);
911 		/* Caller will display error message. */
912 		return false;
913 	}
914 	lockdep_set_class(&NTFS_I(tmp_ino)->runlist.lock,
915 			  &mftmirr_runlist_lock_key);
916 	lockdep_set_class(&NTFS_I(tmp_ino)->mrec_lock,
917 			  &mftmirr_mrec_lock_key);
918 	/*
919 	 * Re-initialize some specifics about $MFTMirr's inode as
920 	 * ntfs_read_inode() will have set up the default ones.
921 	 */
922 	/* Set uid and gid to root. */
923 	tmp_ino->i_uid = GLOBAL_ROOT_UID;
924 	tmp_ino->i_gid = GLOBAL_ROOT_GID;
925 	/* Regular file.  No access for anyone. */
926 	tmp_ino->i_mode = S_IFREG;
927 	/* No VFS initiated operations allowed for $MFTMirr. */
928 	tmp_ino->i_op = &ntfs_empty_inode_ops;
929 	tmp_ino->i_fop = &ntfs_empty_file_ops;
930 	/* Put in our special address space operations. */
931 	tmp_ino->i_mapping->a_ops = &ntfs_aops;
932 	tmp_ni = NTFS_I(tmp_ino);
933 	/* The $MFTMirr, like the $MFT is multi sector transfer protected. */
934 	NInoSetMstProtected(tmp_ni);
935 	NInoSetSparseDisabled(tmp_ni);
936 	/*
937 	 * Set up our little cheat allowing us to reuse the async read io
938 	 * completion handler for directories.
939 	 */
940 	tmp_ni->itype.index.block_size = vol->mft_record_size;
941 	tmp_ni->itype.index.block_size_bits = vol->mft_record_size_bits;
942 	vol->mftmirr_ino = tmp_ino;
943 	ntfs_debug("Done.");
944 	return true;
945 }
946 
947 /*
948  * check_mft_mirror - compare contents of the mft mirror with the mft
949  * @vol:	ntfs super block describing device whose mft mirror to check
950  *
951  * Return 'true' on success or 'false' on error.
952  *
953  * Note, this function also results in the mft mirror runlist being completely
954  * mapped into memory.  The mft mirror write code requires this and will BUG()
955  * should it find an unmapped runlist element.
956  */
957 static bool check_mft_mirror(struct ntfs_volume *vol)
958 {
959 	struct super_block *sb = vol->sb;
960 	struct ntfs_inode *mirr_ni;
961 	struct folio *mft_folio = NULL, *mirr_folio = NULL;
962 	u8 *kmft = NULL, *kmirr = NULL;
963 	struct runlist_element *rl, rl2[2];
964 	pgoff_t index;
965 	int mrecs_per_page, i;
966 
967 	ntfs_debug("Entering.");
968 	/* Compare contents of $MFT and $MFTMirr. */
969 	mrecs_per_page = PAGE_SIZE / vol->mft_record_size;
970 	index = i = 0;
971 	do {
972 		u32 bytes;
973 
974 		/* Switch pages if necessary. */
975 		if (!(i % mrecs_per_page)) {
976 			if (index) {
977 				kunmap_local(kmirr);
978 				folio_put(mirr_folio);
979 				kunmap_local(kmft);
980 				folio_put(mft_folio);
981 			}
982 			/* Get the $MFT page. */
983 			mft_folio = read_mapping_folio(vol->mft_ino->i_mapping,
984 					index, NULL);
985 			if (IS_ERR(mft_folio)) {
986 				ntfs_error(sb, "Failed to read $MFT.");
987 				return false;
988 			}
989 			kmft = kmap_local_folio(mft_folio, 0);
990 			/* Get the $MFTMirr page. */
991 			mirr_folio = read_mapping_folio(vol->mftmirr_ino->i_mapping,
992 					index, NULL);
993 			if (IS_ERR(mirr_folio)) {
994 				ntfs_error(sb, "Failed to read $MFTMirr.");
995 				goto mft_unmap_out;
996 			}
997 			kmirr = kmap_local_folio(mirr_folio, 0);
998 			++index;
999 		}
1000 
1001 		/* Do not check the record if it is not in use. */
1002 		if (((struct mft_record *)kmft)->flags & MFT_RECORD_IN_USE) {
1003 			/* Make sure the record is ok. */
1004 			if (ntfs_is_baad_recordp((__le32 *)kmft)) {
1005 				ntfs_error(sb,
1006 					"Incomplete multi sector transfer detected in mft record %i.",
1007 					i);
1008 mm_unmap_out:
1009 				kunmap_local(kmirr);
1010 				folio_put(mirr_folio);
1011 mft_unmap_out:
1012 				kunmap_local(kmft);
1013 				folio_put(mft_folio);
1014 				return false;
1015 			}
1016 		}
1017 		/* Do not check the mirror record if it is not in use. */
1018 		if (((struct mft_record *)kmirr)->flags & MFT_RECORD_IN_USE) {
1019 			if (ntfs_is_baad_recordp((__le32 *)kmirr)) {
1020 				ntfs_error(sb,
1021 					"Incomplete multi sector transfer detected in mft mirror record %i.",
1022 					i);
1023 				goto mm_unmap_out;
1024 			}
1025 		}
1026 		/* Get the amount of data in the current record. */
1027 		bytes = le32_to_cpu(((struct mft_record *)kmft)->bytes_in_use);
1028 		if (bytes < sizeof(struct mft_record_old) ||
1029 		    bytes > vol->mft_record_size ||
1030 		    ntfs_is_baad_recordp((__le32 *)kmft)) {
1031 			bytes = le32_to_cpu(((struct mft_record *)kmirr)->bytes_in_use);
1032 			if (bytes < sizeof(struct mft_record_old) ||
1033 			    bytes > vol->mft_record_size ||
1034 			    ntfs_is_baad_recordp((__le32 *)kmirr))
1035 				bytes = vol->mft_record_size;
1036 		}
1037 		/* Compare the two records. */
1038 		if (memcmp(kmft, kmirr, bytes)) {
1039 			ntfs_error(sb,
1040 				   "$MFT and $MFTMirr record %i do not match.  Run chkdsk.",
1041 				   i);
1042 			goto mm_unmap_out;
1043 		}
1044 		kmft += vol->mft_record_size;
1045 		kmirr += vol->mft_record_size;
1046 	} while (++i < vol->mftmirr_size);
1047 	/* Release the last folios. */
1048 	kunmap_local(kmirr);
1049 	folio_put(mirr_folio);
1050 	kunmap_local(kmft);
1051 	folio_put(mft_folio);
1052 
1053 	/* Construct the mft mirror runlist by hand. */
1054 	rl2[0].vcn = 0;
1055 	rl2[0].lcn = vol->mftmirr_lcn;
1056 	rl2[0].length = NTFS_B_TO_CLU(vol, vol->mftmirr_size * vol->mft_record_size +
1057 				vol->cluster_size - 1);
1058 	rl2[1].vcn = rl2[0].length;
1059 	rl2[1].lcn = LCN_ENOENT;
1060 	rl2[1].length = 0;
1061 	/*
1062 	 * Because we have just read all of the mft mirror, we know we have
1063 	 * mapped the full runlist for it.
1064 	 */
1065 	mirr_ni = NTFS_I(vol->mftmirr_ino);
1066 	down_read(&mirr_ni->runlist.lock);
1067 	rl = mirr_ni->runlist.rl;
1068 	/* Compare the two runlists.  They must be identical. */
1069 	i = 0;
1070 	do {
1071 		if (rl2[i].vcn != rl[i].vcn || rl2[i].lcn != rl[i].lcn ||
1072 				rl2[i].length != rl[i].length) {
1073 			ntfs_error(sb, "$MFTMirr location mismatch.  Run chkdsk.");
1074 			up_read(&mirr_ni->runlist.lock);
1075 			return false;
1076 		}
1077 	} while (rl2[i++].length);
1078 	up_read(&mirr_ni->runlist.lock);
1079 	ntfs_debug("Done.");
1080 	return true;
1081 }
1082 
1083 /*
1084  * load_and_check_logfile - load and check the logfile inode for a volume
1085  * @vol: ntfs volume to load the logfile for
1086  * @rp: on success, set to the restart page header
1087  *
1088  * Return 0 on success or errno on error.
1089  */
1090 static int load_and_check_logfile(struct ntfs_volume *vol,
1091 				  struct restart_page_header **rp)
1092 {
1093 	struct inode *tmp_ino;
1094 	int err = 0;
1095 
1096 	ntfs_debug("Entering.");
1097 	tmp_ino = ntfs_iget(vol->sb, FILE_LogFile);
1098 	if (IS_ERR(tmp_ino)) {
1099 		if (!IS_ERR(tmp_ino))
1100 			iput(tmp_ino);
1101 		/* Caller will display error message. */
1102 		return -ENOENT;
1103 	}
1104 	if (!ntfs_check_logfile(tmp_ino, rp))
1105 		err = -EINVAL;
1106 	NInoSetSparseDisabled(NTFS_I(tmp_ino));
1107 	vol->logfile_ino = tmp_ino;
1108 	ntfs_debug("Done.");
1109 	return err;
1110 }
1111 
1112 #define NTFS_HIBERFIL_HEADER_SIZE	4096
1113 
1114 /*
1115  * check_windows_hibernation_status - check if Windows is suspended on a volume
1116  * @vol:	ntfs super block of device to check
1117  *
1118  * Check if Windows is hibernated on the ntfs volume @vol.  This is done by
1119  * looking for the file hiberfil.sys in the root directory of the volume.  If
1120  * the file is not present Windows is definitely not suspended.
1121  *
1122  * If hiberfil.sys exists and is less than 4kiB in size it means Windows is
1123  * definitely suspended (this volume is not the system volume).  Caveat:  on a
1124  * system with many volumes it is possible that the < 4kiB check is bogus but
1125  * for now this should do fine.
1126  *
1127  * If hiberfil.sys exists and is larger than 4kiB in size, we need to read the
1128  * hiberfil header (which is the first 4kiB).  If this begins with "hibr",
1129  * Windows is definitely suspended.  If it is completely full of zeroes,
1130  * Windows is definitely not hibernated.  Any other case is treated as if
1131  * Windows is suspended.  This caters for the above mentioned caveat of a
1132  * system with many volumes where no "hibr" magic would be present and there is
1133  * no zero header.
1134  *
1135  * Return 0 if Windows is not hibernated on the volume, >0 if Windows is
1136  * hibernated on the volume, and -errno on error.
1137  */
1138 static int check_windows_hibernation_status(struct ntfs_volume *vol)
1139 {
1140 	static const __le16 hiberfil[13] = { cpu_to_le16('h'),
1141 			cpu_to_le16('i'), cpu_to_le16('b'),
1142 			cpu_to_le16('e'), cpu_to_le16('r'),
1143 			cpu_to_le16('f'), cpu_to_le16('i'),
1144 			cpu_to_le16('l'), cpu_to_le16('.'),
1145 			cpu_to_le16('s'), cpu_to_le16('y'),
1146 			cpu_to_le16('s'), 0 };
1147 	u64 mref;
1148 	struct inode *vi;
1149 	struct folio *folio;
1150 	u32 *kaddr, *kend, *start_addr = NULL;
1151 	struct ntfs_name *name = NULL;
1152 	int ret = 1;
1153 
1154 	ntfs_debug("Entering.");
1155 	/*
1156 	 * Find the inode number for the hibernation file by looking up the
1157 	 * filename hiberfil.sys in the root directory.
1158 	 */
1159 	inode_lock(vol->root_ino);
1160 	mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
1161 			&name);
1162 	inode_unlock(vol->root_ino);
1163 	kfree(name);
1164 	if (IS_ERR_MREF(mref)) {
1165 		ret = MREF_ERR(mref);
1166 		/* If the file does not exist, Windows is not hibernated. */
1167 		if (ret == -ENOENT) {
1168 			ntfs_debug("hiberfil.sys not present.  Windows is not hibernated on the volume.");
1169 			return 0;
1170 		}
1171 		/* A real error occurred. */
1172 		ntfs_error(vol->sb, "Failed to find inode number for hiberfil.sys.");
1173 		return ret;
1174 	}
1175 	/* Get the inode. */
1176 	vi = ntfs_iget(vol->sb, MREF(mref));
1177 	if (IS_ERR(vi)) {
1178 		if (!IS_ERR(vi))
1179 			iput(vi);
1180 		ntfs_error(vol->sb, "Failed to load hiberfil.sys.");
1181 		return IS_ERR(vi) ? PTR_ERR(vi) : -EIO;
1182 	}
1183 	if (unlikely(i_size_read(vi) < NTFS_HIBERFIL_HEADER_SIZE)) {
1184 		ntfs_debug("hiberfil.sys is smaller than 4kiB (0x%llx).  Windows is hibernated on the volume.  This is not the system volume.",
1185 				i_size_read(vi));
1186 		goto iput_out;
1187 	}
1188 
1189 	folio = read_mapping_folio(vi->i_mapping, 0, NULL);
1190 	if (IS_ERR(folio)) {
1191 		ntfs_error(vol->sb, "Failed to read from hiberfil.sys.");
1192 		ret = PTR_ERR(folio);
1193 		goto iput_out;
1194 	}
1195 	start_addr = (u32 *)kmap_local_folio(folio, 0);
1196 	kaddr = start_addr;
1197 	if (*(__le32 *)kaddr == cpu_to_le32(0x72626968)/*'hibr'*/) {
1198 		ntfs_debug("Magic \"hibr\" found in hiberfil.sys.  Windows is hibernated on the volume.  This is the system volume.");
1199 		goto unm_iput_out;
1200 	}
1201 	kend = kaddr + NTFS_HIBERFIL_HEADER_SIZE/sizeof(*kaddr);
1202 	do {
1203 		if (unlikely(*kaddr)) {
1204 			ntfs_debug("hiberfil.sys is larger than 4kiB (0x%llx), does not contain the \"hibr\" magic, and does not have a zero header.  Windows is hibernated on the volume.  This is not the system volume.",
1205 					i_size_read(vi));
1206 			goto unm_iput_out;
1207 		}
1208 	} while (++kaddr < kend);
1209 	ntfs_debug("hiberfil.sys contains a zero header.  Windows is not hibernated on the volume.  This is the system volume.");
1210 	ret = 0;
1211 unm_iput_out:
1212 	kunmap_local(start_addr);
1213 	folio_put(folio);
1214 iput_out:
1215 	iput(vi);
1216 	return ret;
1217 }
1218 
1219 /*
1220  * load_and_init_attrdef - load the attribute definitions table for a volume
1221  * @vol:	ntfs super block describing device whose attrdef to load
1222  *
1223  * Return 'true' on success or 'false' on error.
1224  */
1225 static bool load_and_init_attrdef(struct ntfs_volume *vol)
1226 {
1227 	loff_t i_size;
1228 	struct super_block *sb = vol->sb;
1229 	struct inode *ino;
1230 	struct folio *folio;
1231 	u8 *addr;
1232 	pgoff_t index, max_index;
1233 	unsigned int size;
1234 
1235 	ntfs_debug("Entering.");
1236 	/* Read attrdef table and setup vol->attrdef and vol->attrdef_size. */
1237 	ino = ntfs_iget(sb, FILE_AttrDef);
1238 	if (IS_ERR(ino)) {
1239 		if (!IS_ERR(ino))
1240 			iput(ino);
1241 		goto failed;
1242 	}
1243 	NInoSetSparseDisabled(NTFS_I(ino));
1244 	/* The size of FILE_AttrDef must be above 0 and fit inside 31 bits. */
1245 	i_size = i_size_read(ino);
1246 	if (i_size <= 0 || i_size > 0x7fffffff)
1247 		goto iput_failed;
1248 	vol->attrdef = kvzalloc(i_size, GFP_NOFS);
1249 	if (!vol->attrdef)
1250 		goto iput_failed;
1251 	index = 0;
1252 	max_index = i_size >> PAGE_SHIFT;
1253 	size = PAGE_SIZE;
1254 	while (index < max_index) {
1255 		/* Read the attrdef table and copy it into the linear buffer. */
1256 read_partial_attrdef_page:
1257 		folio = read_mapping_folio(ino->i_mapping, index, NULL);
1258 		if (IS_ERR(folio))
1259 			goto free_iput_failed;
1260 		addr = kmap_local_folio(folio, 0);
1261 		memcpy((u8 *)vol->attrdef + (index++ << PAGE_SHIFT),
1262 				addr, size);
1263 		kunmap_local(addr);
1264 		folio_put(folio);
1265 	}
1266 	if (size == PAGE_SIZE) {
1267 		size = i_size & ~PAGE_MASK;
1268 		if (size)
1269 			goto read_partial_attrdef_page;
1270 	}
1271 	vol->attrdef_size = i_size;
1272 	ntfs_debug("Read %llu bytes from $AttrDef.", i_size);
1273 	iput(ino);
1274 	return true;
1275 free_iput_failed:
1276 	kvfree(vol->attrdef);
1277 	vol->attrdef = NULL;
1278 iput_failed:
1279 	iput(ino);
1280 failed:
1281 	ntfs_error(sb, "Failed to initialize attribute definition table.");
1282 	return false;
1283 }
1284 
1285 /*
1286  * load_and_init_upcase - load the upcase table for an ntfs volume
1287  * @vol:	ntfs super block describing device whose upcase to load
1288  *
1289  * Return 'true' on success or 'false' on error.
1290  */
1291 static bool load_and_init_upcase(struct ntfs_volume *vol)
1292 {
1293 	loff_t i_size;
1294 	struct super_block *sb = vol->sb;
1295 	struct inode *ino;
1296 	struct folio *folio;
1297 	u8 *addr;
1298 	pgoff_t index, max_index;
1299 	unsigned int size;
1300 
1301 	ntfs_debug("Entering.");
1302 	/* Read upcase table and setup vol->upcase and vol->upcase_len. */
1303 	ino = ntfs_iget(sb, FILE_UpCase);
1304 	if (IS_ERR(ino)) {
1305 		if (!IS_ERR(ino))
1306 			iput(ino);
1307 		goto upcase_failed;
1308 	}
1309 	/*
1310 	 * The upcase size must not be above 64k Unicode characters, must not
1311 	 * be zero and must be a multiple of sizeof(__le16).
1312 	 */
1313 	i_size = i_size_read(ino);
1314 	if (!i_size || i_size & (sizeof(__le16) - 1) ||
1315 			i_size > 64ULL * 1024 * sizeof(__le16))
1316 		goto iput_upcase_failed;
1317 	vol->upcase = kvzalloc(i_size, GFP_NOFS);
1318 	if (!vol->upcase)
1319 		goto iput_upcase_failed;
1320 	index = 0;
1321 	max_index = i_size >> PAGE_SHIFT;
1322 	size = PAGE_SIZE;
1323 	while (index < max_index) {
1324 		/* Read the upcase table and copy it into the linear buffer. */
1325 read_partial_upcase_page:
1326 		folio = read_mapping_folio(ino->i_mapping, index, NULL);
1327 		if (IS_ERR(folio))
1328 			goto iput_upcase_failed;
1329 		addr = kmap_local_folio(folio, 0);
1330 		memcpy((char *)vol->upcase + (index++ << PAGE_SHIFT),
1331 				addr, size);
1332 		kunmap_local(addr);
1333 		folio_put(folio);
1334 	}
1335 	if (size == PAGE_SIZE) {
1336 		size = i_size & ~PAGE_MASK;
1337 		if (size)
1338 			goto read_partial_upcase_page;
1339 	}
1340 	vol->upcase_len = i_size >> sizeof(unsigned char);
1341 	ntfs_debug("Read %llu bytes from $UpCase (expected %zu bytes).",
1342 			i_size, 64 * 1024 * sizeof(__le16));
1343 	iput(ino);
1344 	mutex_lock(&ntfs_lock);
1345 	if (!default_upcase) {
1346 		ntfs_debug("Using volume specified $UpCase since default is not present.");
1347 		mutex_unlock(&ntfs_lock);
1348 		return true;
1349 	}
1350 	if (default_upcase_len == vol->upcase_len &&
1351 	    !memcmp(vol->upcase, default_upcase,
1352 		    default_upcase_len * sizeof(*default_upcase))) {
1353 		kvfree(vol->upcase);
1354 		vol->upcase = default_upcase;
1355 		ntfs_nr_upcase_users++;
1356 		mutex_unlock(&ntfs_lock);
1357 		ntfs_debug("Volume specified $UpCase matches default. Using default.");
1358 		return true;
1359 	}
1360 	mutex_unlock(&ntfs_lock);
1361 	ntfs_debug("Using volume specified $UpCase since it does not match the default.");
1362 	return true;
1363 iput_upcase_failed:
1364 	iput(ino);
1365 	kvfree(vol->upcase);
1366 	vol->upcase = NULL;
1367 upcase_failed:
1368 	mutex_lock(&ntfs_lock);
1369 	if (default_upcase) {
1370 		vol->upcase = default_upcase;
1371 		vol->upcase_len = default_upcase_len;
1372 		ntfs_nr_upcase_users++;
1373 		mutex_unlock(&ntfs_lock);
1374 		ntfs_error(sb, "Failed to load $UpCase from the volume. Using default.");
1375 		return true;
1376 	}
1377 	mutex_unlock(&ntfs_lock);
1378 	ntfs_error(sb, "Failed to initialize upcase table.");
1379 	return false;
1380 }
1381 
1382 /*
1383  * The lcn and mft bitmap inodes are NTFS-internal inodes with
1384  * their own special locking rules:
1385  */
1386 static struct lock_class_key
1387 	lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key,
1388 	mftbmp_runlist_lock_key, mftbmp_mrec_lock_key;
1389 
1390 /*
1391  * load_system_files - open the system files using normal functions
1392  * @vol:	ntfs super block describing device whose system files to load
1393  *
1394  * Open the system files with normal access functions and complete setting up
1395  * the ntfs super block @vol.
1396  *
1397  * Return 'true' on success or 'false' on error.
1398  */
1399 static bool load_system_files(struct ntfs_volume *vol)
1400 {
1401 	struct super_block *sb = vol->sb;
1402 	struct mft_record *m;
1403 	struct volume_information *vi;
1404 	struct ntfs_attr_search_ctx *ctx;
1405 	struct restart_page_header *rp;
1406 	int err;
1407 
1408 	ntfs_debug("Entering.");
1409 	/* Get mft mirror inode compare the contents of $MFT and $MFTMirr. */
1410 	if (!load_and_init_mft_mirror(vol) || !check_mft_mirror(vol)) {
1411 		/* If a read-write mount, convert it to a read-only mount. */
1412 		if (!sb_rdonly(sb) && vol->on_errors == ON_ERRORS_REMOUNT_RO) {
1413 			static const char *es1 = "Failed to load $MFTMirr";
1414 			static const char *es2 = "$MFTMirr does not match $MFT";
1415 			static const char *es3 = ".  Run ntfsck and/or chkdsk.";
1416 
1417 			sb->s_flags |= SB_RDONLY;
1418 			ntfs_error(sb, "%s.  Mounting read-only%s",
1419 					!vol->mftmirr_ino ? es1 : es2, es3);
1420 		}
1421 		NVolSetErrors(vol);
1422 	}
1423 	/* Get mft bitmap attribute inode. */
1424 	vol->mftbmp_ino = ntfs_attr_iget(vol->mft_ino, AT_BITMAP, NULL, 0);
1425 	if (IS_ERR(vol->mftbmp_ino)) {
1426 		ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute.");
1427 		goto iput_mirr_err_out;
1428 	}
1429 	lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock,
1430 			   &mftbmp_runlist_lock_key);
1431 	lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock,
1432 			   &mftbmp_mrec_lock_key);
1433 	/* Read upcase table and setup @vol->upcase and @vol->upcase_len. */
1434 	if (!load_and_init_upcase(vol))
1435 		goto iput_mftbmp_err_out;
1436 	/*
1437 	 * Read attribute definitions table and setup @vol->attrdef and
1438 	 * @vol->attrdef_size.
1439 	 */
1440 	if (!load_and_init_attrdef(vol))
1441 		goto iput_upcase_err_out;
1442 	/*
1443 	 * Get the cluster allocation bitmap inode and verify the size, no
1444 	 * need for any locking at this stage as we are already running
1445 	 * exclusively as we are mount in progress task.
1446 	 */
1447 	vol->lcnbmp_ino = ntfs_iget(sb, FILE_Bitmap);
1448 	if (IS_ERR(vol->lcnbmp_ino)) {
1449 		if (!IS_ERR(vol->lcnbmp_ino))
1450 			iput(vol->lcnbmp_ino);
1451 		goto bitmap_failed;
1452 	}
1453 	lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock,
1454 			   &lcnbmp_runlist_lock_key);
1455 	lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock,
1456 			   &lcnbmp_mrec_lock_key);
1457 
1458 	NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino));
1459 	if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) {
1460 		iput(vol->lcnbmp_ino);
1461 bitmap_failed:
1462 		ntfs_error(sb, "Failed to load $Bitmap.");
1463 		goto iput_attrdef_err_out;
1464 	}
1465 	/*
1466 	 * Get the volume inode and setup our cache of the volume flags and
1467 	 * version.
1468 	 */
1469 	vol->vol_ino = ntfs_iget(sb, FILE_Volume);
1470 	if (IS_ERR(vol->vol_ino)) {
1471 		if (!IS_ERR(vol->vol_ino))
1472 			iput(vol->vol_ino);
1473 volume_failed:
1474 		ntfs_error(sb, "Failed to load $Volume.");
1475 		goto iput_lcnbmp_err_out;
1476 	}
1477 	m = map_mft_record(NTFS_I(vol->vol_ino));
1478 	if (IS_ERR(m)) {
1479 iput_volume_failed:
1480 		iput(vol->vol_ino);
1481 		goto volume_failed;
1482 	}
1483 
1484 	ctx = ntfs_attr_get_search_ctx(NTFS_I(vol->vol_ino), m);
1485 	if (!ctx) {
1486 		ntfs_error(sb, "Failed to get attribute search context.");
1487 		goto get_ctx_vol_failed;
1488 	}
1489 
1490 	if (!ntfs_attr_lookup(AT_VOLUME_NAME, NULL, 0, 0, 0, NULL, 0, ctx) &&
1491 	    !ctx->attr->non_resident &&
1492 	    !(ctx->attr->flags & (ATTR_IS_SPARSE | ATTR_IS_COMPRESSED)) &&
1493 	    le32_to_cpu(ctx->attr->data.resident.value_length) > 0) {
1494 		err = ntfs_ucstonls(vol, (__le16 *)((u8 *)ctx->attr +
1495 				    le16_to_cpu(ctx->attr->data.resident.value_offset)),
1496 				    le32_to_cpu(ctx->attr->data.resident.value_length) / 2,
1497 				    &vol->volume_label, NTFS_MAX_LABEL_LEN);
1498 		if (err < 0)
1499 			vol->volume_label = NULL;
1500 	}
1501 
1502 	ntfs_attr_reinit_search_ctx(ctx);
1503 	if (ntfs_attr_lookup(AT_VOLUME_INFORMATION, NULL, 0, 0, 0, NULL, 0,
1504 			ctx) || ctx->attr->non_resident || ctx->attr->flags) {
1505 		ntfs_attr_put_search_ctx(ctx);
1506 get_ctx_vol_failed:
1507 		unmap_mft_record(NTFS_I(vol->vol_ino));
1508 		goto iput_volume_failed;
1509 	}
1510 	vi = (struct volume_information *)((char *)ctx->attr +
1511 			le16_to_cpu(ctx->attr->data.resident.value_offset));
1512 	/* Copy the volume flags and version to the struct ntfs_volume structure. */
1513 	vol->vol_flags = vi->flags;
1514 	vol->major_ver = vi->major_ver;
1515 	vol->minor_ver = vi->minor_ver;
1516 	ntfs_attr_put_search_ctx(ctx);
1517 	unmap_mft_record(NTFS_I(vol->vol_ino));
1518 	pr_info("volume version %i.%i, dev %s, cluster size %d\n",
1519 		vol->major_ver, vol->minor_ver, sb->s_id, vol->cluster_size);
1520 
1521 	/* Make sure that no unsupported volume flags are set. */
1522 	if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
1523 		static const char *es1a = "Volume is dirty";
1524 		static const char *es1b = "Volume has been modified by chkdsk";
1525 		static const char *es1c = "Volume has unsupported flags set";
1526 		static const char *es2a = ".  Run chkdsk and mount in Windows.";
1527 		static const char *es2b = ".  Mount in Windows.";
1528 		const char *es1, *es2;
1529 
1530 		es2 = es2a;
1531 		if (vol->vol_flags & VOLUME_IS_DIRTY)
1532 			es1 = es1a;
1533 		else if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) {
1534 			es1 = es1b;
1535 			es2 = es2b;
1536 		} else {
1537 			es1 = es1c;
1538 			ntfs_warning(sb, "Unsupported volume flags 0x%x encountered.",
1539 					(unsigned int)le16_to_cpu(vol->vol_flags));
1540 		}
1541 		/* If a read-write mount, convert it to a read-only mount. */
1542 		if (!sb_rdonly(sb) && vol->on_errors == ON_ERRORS_REMOUNT_RO) {
1543 			sb->s_flags |= SB_RDONLY;
1544 			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
1545 		}
1546 		/*
1547 		 * Do not set NVolErrors() because ntfs_remount() re-checks the
1548 		 * flags which we need to do in case any flags have changed.
1549 		 */
1550 	}
1551 	/*
1552 	 * Get the inode for the logfile, check it and determine if the volume
1553 	 * was shutdown cleanly.
1554 	 */
1555 	rp = NULL;
1556 	err = load_and_check_logfile(vol, &rp);
1557 	if (err) {
1558 		/* If a read-write mount, convert it to a read-only mount. */
1559 		if (!sb_rdonly(sb) && vol->on_errors == ON_ERRORS_REMOUNT_RO) {
1560 			sb->s_flags |= SB_RDONLY;
1561 			ntfs_error(sb, "Failed to load LogFile. Mounting read-only.");
1562 		}
1563 		NVolSetErrors(vol);
1564 	}
1565 
1566 	kvfree(rp);
1567 	/* Get the root directory inode so we can do path lookups. */
1568 	vol->root_ino = ntfs_iget(sb, FILE_root);
1569 	if (IS_ERR(vol->root_ino)) {
1570 		if (!IS_ERR(vol->root_ino))
1571 			iput(vol->root_ino);
1572 		ntfs_error(sb, "Failed to load root directory.");
1573 		goto iput_logfile_err_out;
1574 	}
1575 	/*
1576 	 * Check if Windows is suspended to disk on the target volume.  If it
1577 	 * is hibernated, we must not write *anything* to the disk so set
1578 	 * NVolErrors() without setting the dirty volume flag and mount
1579 	 * read-only.  This will prevent read-write remounting and it will also
1580 	 * prevent all writes.
1581 	 */
1582 	err = check_windows_hibernation_status(vol);
1583 	if (unlikely(err)) {
1584 		static const char *es1a = "Failed to determine if Windows is hibernated";
1585 		static const char *es1b = "Windows is hibernated";
1586 		static const char *es2 = ".  Run chkdsk.";
1587 		const char *es1;
1588 
1589 		es1 = err < 0 ? es1a : es1b;
1590 		/* If a read-write mount, convert it to a read-only mount. */
1591 		if (!sb_rdonly(sb) && vol->on_errors == ON_ERRORS_REMOUNT_RO) {
1592 			sb->s_flags |= SB_RDONLY;
1593 			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
1594 		}
1595 		NVolSetErrors(vol);
1596 	}
1597 
1598 	/* If (still) a read-write mount, empty the logfile. */
1599 	if (!sb_rdonly(sb) &&
1600 	    vol->logfile_ino && !ntfs_empty_logfile(vol->logfile_ino) &&
1601 	    vol->on_errors == ON_ERRORS_REMOUNT_RO) {
1602 		static const char *es1 = "Failed to empty LogFile";
1603 		static const char *es2 = ".  Mount in Windows.";
1604 
1605 		/* Convert to a read-only mount. */
1606 		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
1607 		sb->s_flags |= SB_RDONLY;
1608 		NVolSetErrors(vol);
1609 	}
1610 	/* If on NTFS versions before 3.0, we are done. */
1611 	if (unlikely(vol->major_ver < 3))
1612 		return true;
1613 	/* NTFS 3.0+ specific initialization. */
1614 	/* Get the security descriptors inode. */
1615 	vol->secure_ino = ntfs_iget(sb, FILE_Secure);
1616 	if (IS_ERR(vol->secure_ino)) {
1617 		if (!IS_ERR(vol->secure_ino))
1618 			iput(vol->secure_ino);
1619 		ntfs_error(sb, "Failed to load $Secure.");
1620 		goto iput_root_err_out;
1621 	}
1622 	/* Get the extended system files' directory inode. */
1623 	vol->extend_ino = ntfs_iget(sb, FILE_Extend);
1624 	if (IS_ERR(vol->extend_ino) ||
1625 	    !S_ISDIR(vol->extend_ino->i_mode)) {
1626 		if (!IS_ERR(vol->extend_ino))
1627 			iput(vol->extend_ino);
1628 		ntfs_error(sb, "Failed to load $Extend.");
1629 		goto iput_sec_err_out;
1630 	}
1631 	return true;
1632 
1633 iput_sec_err_out:
1634 	iput(vol->secure_ino);
1635 iput_root_err_out:
1636 	iput(vol->root_ino);
1637 iput_logfile_err_out:
1638 	if (vol->logfile_ino)
1639 		iput(vol->logfile_ino);
1640 	iput(vol->vol_ino);
1641 iput_lcnbmp_err_out:
1642 	iput(vol->lcnbmp_ino);
1643 iput_attrdef_err_out:
1644 	vol->attrdef_size = 0;
1645 	if (vol->attrdef) {
1646 		kvfree(vol->attrdef);
1647 		vol->attrdef = NULL;
1648 	}
1649 iput_upcase_err_out:
1650 	vol->upcase_len = 0;
1651 	mutex_lock(&ntfs_lock);
1652 	if (vol->upcase && vol->upcase == default_upcase) {
1653 		ntfs_nr_upcase_users--;
1654 		vol->upcase = NULL;
1655 	}
1656 	mutex_unlock(&ntfs_lock);
1657 	if (vol->upcase) {
1658 		kvfree(vol->upcase);
1659 		vol->upcase = NULL;
1660 	}
1661 iput_mftbmp_err_out:
1662 	iput(vol->mftbmp_ino);
1663 iput_mirr_err_out:
1664 	iput(vol->mftmirr_ino);
1665 	return false;
1666 }
1667 
1668 static void ntfs_volume_free(struct ntfs_volume *vol)
1669 {
1670 	/* Throw away the table of attribute definitions. */
1671 	vol->attrdef_size = 0;
1672 	if (vol->attrdef) {
1673 		kvfree(vol->attrdef);
1674 		vol->attrdef = NULL;
1675 	}
1676 	vol->upcase_len = 0;
1677 	/*
1678 	 * Destroy the global default upcase table if necessary.  Also decrease
1679 	 * the number of upcase users if we are a user.
1680 	 */
1681 	mutex_lock(&ntfs_lock);
1682 	if (vol->upcase && vol->upcase == default_upcase) {
1683 		ntfs_nr_upcase_users--;
1684 		vol->upcase = NULL;
1685 	}
1686 
1687 	if (!ntfs_nr_upcase_users) {
1688 		kvfree(default_upcase);
1689 		default_upcase = NULL;
1690 	}
1691 
1692 	free_compression_buffers();
1693 
1694 	mutex_unlock(&ntfs_lock);
1695 	if (vol->upcase) {
1696 		kvfree(vol->upcase);
1697 		vol->upcase = NULL;
1698 	}
1699 
1700 	unload_nls(vol->nls_map);
1701 
1702 	kvfree(vol->lcn_empty_bits_per_page);
1703 	kfree(vol->volume_label);
1704 	kfree(vol);
1705 }
1706 
1707 /*
1708  * ntfs_put_super - called by the vfs to unmount a volume
1709  * @sb:		vfs superblock of volume to unmount
1710  */
1711 static void ntfs_put_super(struct super_block *sb)
1712 {
1713 	struct ntfs_volume *vol = NTFS_SB(sb);
1714 
1715 	pr_info("Entering %s, dev %s\n", __func__, sb->s_id);
1716 
1717 	cancel_work_sync(&vol->precalc_work);
1718 
1719 	/*
1720 	 * Commit all inodes while they are still open in case some of them
1721 	 * cause others to be dirtied.
1722 	 */
1723 	ntfs_commit_inode(vol->vol_ino);
1724 
1725 	/* NTFS 3.0+ specific. */
1726 	if (vol->major_ver >= 3) {
1727 		if (vol->extend_ino)
1728 			ntfs_commit_inode(vol->extend_ino);
1729 		if (vol->secure_ino)
1730 			ntfs_commit_inode(vol->secure_ino);
1731 	}
1732 
1733 	ntfs_commit_inode(vol->root_ino);
1734 
1735 	ntfs_commit_inode(vol->lcnbmp_ino);
1736 
1737 	/*
1738 	 * the GFP_NOFS scope is not needed because ntfs_commit_inode
1739 	 * does nothing
1740 	 */
1741 	ntfs_commit_inode(vol->mftbmp_ino);
1742 
1743 	if (vol->logfile_ino)
1744 		ntfs_commit_inode(vol->logfile_ino);
1745 
1746 	if (vol->mftmirr_ino)
1747 		ntfs_commit_inode(vol->mftmirr_ino);
1748 	ntfs_commit_inode(vol->mft_ino);
1749 
1750 	/*
1751 	 * If a read-write mount and no volume errors have occurred, mark the
1752 	 * volume clean.  Also, re-commit all affected inodes.
1753 	 */
1754 	if (!sb_rdonly(sb)) {
1755 		if (!NVolErrors(vol)) {
1756 			if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY))
1757 				ntfs_warning(sb,
1758 					"Failed to clear dirty bit in volume information flags.  Run chkdsk.");
1759 			ntfs_commit_inode(vol->vol_ino);
1760 			ntfs_commit_inode(vol->root_ino);
1761 			if (vol->mftmirr_ino)
1762 				ntfs_commit_inode(vol->mftmirr_ino);
1763 			ntfs_commit_inode(vol->mft_ino);
1764 		} else {
1765 			ntfs_warning(sb,
1766 				"Volume has errors.  Leaving volume marked dirty.  Run chkdsk.");
1767 		}
1768 	}
1769 
1770 	iput(vol->vol_ino);
1771 	vol->vol_ino = NULL;
1772 
1773 	/* NTFS 3.0+ specific clean up. */
1774 	if (vol->major_ver >= 3) {
1775 		if (vol->extend_ino) {
1776 			iput(vol->extend_ino);
1777 			vol->extend_ino = NULL;
1778 		}
1779 		if (vol->secure_ino) {
1780 			iput(vol->secure_ino);
1781 			vol->secure_ino = NULL;
1782 		}
1783 	}
1784 
1785 	iput(vol->root_ino);
1786 	vol->root_ino = NULL;
1787 
1788 	iput(vol->lcnbmp_ino);
1789 	vol->lcnbmp_ino = NULL;
1790 
1791 	iput(vol->mftbmp_ino);
1792 	vol->mftbmp_ino = NULL;
1793 
1794 	if (vol->logfile_ino) {
1795 		iput(vol->logfile_ino);
1796 		vol->logfile_ino = NULL;
1797 	}
1798 	if (vol->mftmirr_ino) {
1799 		/* Re-commit the mft mirror and mft just in case. */
1800 		ntfs_commit_inode(vol->mftmirr_ino);
1801 		ntfs_commit_inode(vol->mft_ino);
1802 		iput(vol->mftmirr_ino);
1803 		vol->mftmirr_ino = NULL;
1804 	}
1805 	/*
1806 	 * We should have no dirty inodes left, due to
1807 	 * mft.c::ntfs_mft_writepage() cleaning all the dirty pages as
1808 	 * the underlying mft records are written out and cleaned.
1809 	 */
1810 	ntfs_commit_inode(vol->mft_ino);
1811 	write_inode_now(vol->mft_ino, 1);
1812 
1813 	iput(vol->mft_ino);
1814 	vol->mft_ino = NULL;
1815 	blkdev_issue_flush(sb->s_bdev);
1816 
1817 	ntfs_volume_free(vol);
1818 }
1819 
1820 int ntfs_force_shutdown(struct super_block *sb, u32 flags)
1821 {
1822 	struct ntfs_volume *vol = NTFS_SB(sb);
1823 	int ret;
1824 
1825 	if (NVolShutdown(vol))
1826 		return 0;
1827 
1828 	switch (flags) {
1829 	case FS_SHUTDOWN_FLAGS_DEFAULT:
1830 	case FS_SHUTDOWN_FLAGS_LOGFLUSH:
1831 		ret = bdev_freeze(sb->s_bdev);
1832 		if (ret)
1833 			return ret;
1834 		bdev_thaw(sb->s_bdev);
1835 		NVolSetShutdown(vol);
1836 		break;
1837 	case FS_SHUTDOWN_FLAGS_NOLOGFLUSH:
1838 		NVolSetShutdown(vol);
1839 		break;
1840 	default:
1841 		return -EINVAL;
1842 	}
1843 
1844 	return 0;
1845 }
1846 
1847 static void ntfs_shutdown(struct super_block *sb)
1848 {
1849 	ntfs_force_shutdown(sb, FS_SHUTDOWN_FLAGS_NOLOGFLUSH);
1850 
1851 }
1852 
1853 static int ntfs_sync_fs(struct super_block *sb, int wait)
1854 {
1855 	struct ntfs_volume *vol = NTFS_SB(sb);
1856 	int err = 0;
1857 
1858 	if (NVolShutdown(vol))
1859 		return -EIO;
1860 
1861 	if (!wait)
1862 		return 0;
1863 
1864 	/* If there are some dirty buffers in the bdev inode */
1865 	if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY)) {
1866 		ntfs_warning(sb, "Failed to clear dirty bit in volume information flags.  Run chkdsk.");
1867 		err = -EIO;
1868 	}
1869 	sync_inodes_sb(sb);
1870 	sync_blockdev(sb->s_bdev);
1871 	blkdev_issue_flush(sb->s_bdev);
1872 	return err;
1873 }
1874 
1875 /*
1876  * get_nr_free_clusters - return the number of free clusters on a volume
1877  * @vol:	ntfs volume for which to obtain free cluster count
1878  *
1879  * Calculate the number of free clusters on the mounted NTFS volume @vol. We
1880  * actually calculate the number of clusters in use instead because this
1881  * allows us to not care about partial pages as these will be just zero filled
1882  * and hence not be counted as allocated clusters.
1883  *
1884  * The only particularity is that clusters beyond the end of the logical ntfs
1885  * volume will be marked as allocated to prevent errors which means we have to
1886  * discount those at the end. This is important as the cluster bitmap always
1887  * has a size in multiples of 8 bytes, i.e. up to 63 clusters could be outside
1888  * the logical volume and marked in use when they are not as they do not exist.
1889  *
1890  * If any pages cannot be read we assume all clusters in the erroring pages are
1891  * in use. This means we return an underestimate on errors which is better than
1892  * an overestimate.
1893  */
1894 s64 get_nr_free_clusters(struct ntfs_volume *vol)
1895 {
1896 	s64 nr_free = vol->nr_clusters;
1897 	u32 nr_used;
1898 	struct address_space *mapping = vol->lcnbmp_ino->i_mapping;
1899 	struct folio *folio;
1900 	pgoff_t index, max_index;
1901 	struct file_ra_state ra = { 0 };
1902 
1903 	ntfs_debug("Entering.");
1904 	/* Serialize accesses to the cluster bitmap. */
1905 
1906 	if (NVolFreeClusterKnown(vol))
1907 		return atomic64_read(&vol->free_clusters);
1908 
1909 	file_ra_state_init(&ra, mapping);
1910 
1911 	/*
1912 	 * Convert the number of bits into bytes rounded up, then convert into
1913 	 * multiples of PAGE_SIZE, rounding up so that if we have one
1914 	 * full and one partial page max_index = 2.
1915 	 */
1916 	max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_SIZE - 1) >>
1917 			PAGE_SHIFT;
1918 	/* Use multiples of 4 bytes, thus max_size is PAGE_SIZE / 4. */
1919 	ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%lx.",
1920 			max_index, PAGE_SIZE / 4);
1921 	for (index = 0; index < max_index; index++) {
1922 		unsigned long *kaddr;
1923 
1924 		/*
1925 		 * Get folio from page cache, getting it from backing store
1926 		 * if necessary, and increment the use count.
1927 		 */
1928 		folio = ntfs_get_locked_folio(mapping, index, max_index, &ra);
1929 
1930 		/* Ignore pages which errored synchronously. */
1931 		if (IS_ERR(folio)) {
1932 			ntfs_debug("Skipping page (index 0x%lx).", index);
1933 			nr_free -= PAGE_SIZE * 8;
1934 			vol->lcn_empty_bits_per_page[index] = 0;
1935 			continue;
1936 		}
1937 
1938 		kaddr = kmap_local_folio(folio, 0);
1939 		/*
1940 		 * Subtract the number of set bits. If this
1941 		 * is the last page and it is partial we don't really care as
1942 		 * it just means we do a little extra work but it won't affect
1943 		 * the result as all out of range bytes are set to zero by
1944 		 * ntfs_readpage().
1945 		 */
1946 		nr_used = bitmap_weight(kaddr, PAGE_SIZE * BITS_PER_BYTE);
1947 		nr_free -= nr_used;
1948 		vol->lcn_empty_bits_per_page[index] = PAGE_SIZE * BITS_PER_BYTE - nr_used;
1949 		kunmap_local(kaddr);
1950 		folio_unlock(folio);
1951 		folio_put(folio);
1952 	}
1953 	ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1);
1954 	/*
1955 	 * Fixup for eventual bits outside logical ntfs volume (see function
1956 	 * description above).
1957 	 */
1958 	if (vol->nr_clusters & 63)
1959 		nr_free += 64 - (vol->nr_clusters & 63);
1960 
1961 	/* If errors occurred we may well have gone below zero, fix this. */
1962 	if (nr_free < 0)
1963 		nr_free = 0;
1964 	else
1965 		atomic64_set(&vol->free_clusters, nr_free);
1966 
1967 	NVolSetFreeClusterKnown(vol);
1968 	wake_up_all(&vol->free_waitq);
1969 	ntfs_debug("Exiting.");
1970 	return nr_free;
1971 }
1972 
1973 /*
1974  * @nr_clusters is the number of clusters requested for allocation.
1975  *
1976  * Return the number of clusters available for allocation within
1977  * the range of @nr_clusters, which is counts that considered
1978  * for delayed allocation.
1979  */
1980 s64 ntfs_available_clusters_count(struct ntfs_volume *vol, s64 nr_clusters)
1981 {
1982 	s64 free_clusters;
1983 
1984 	/* wait event */
1985 	if (!NVolFreeClusterKnown(vol))
1986 		wait_event(vol->free_waitq, NVolFreeClusterKnown(vol));
1987 
1988 	free_clusters = atomic64_read(&vol->free_clusters) -
1989 		atomic64_read(&vol->dirty_clusters);
1990 	if (free_clusters <= 0)
1991 		return -ENOSPC;
1992 	else if (free_clusters < nr_clusters)
1993 		nr_clusters = free_clusters;
1994 
1995 	return nr_clusters;
1996 }
1997 
1998 /*
1999  * __get_nr_free_mft_records - return the number of free inodes on a volume
2000  * @vol:	ntfs volume for which to obtain free inode count
2001  * @nr_free:	number of mft records in filesystem
2002  * @max_index:	maximum number of pages containing set bits
2003  *
2004  * Calculate the number of free mft records (inodes) on the mounted NTFS
2005  * volume @vol. We actually calculate the number of mft records in use instead
2006  * because this allows us to not care about partial pages as these will be just
2007  * zero filled and hence not be counted as allocated mft record.
2008  *
2009  * If any pages cannot be read we assume all mft records in the erroring pages
2010  * are in use. This means we return an underestimate on errors which is better
2011  * than an overestimate.
2012  *
2013  * NOTE: Caller must hold mftbmp_lock rw_semaphore for reading or writing.
2014  */
2015 static unsigned long __get_nr_free_mft_records(struct ntfs_volume *vol,
2016 		s64 nr_free, const pgoff_t max_index)
2017 {
2018 	struct address_space *mapping = vol->mftbmp_ino->i_mapping;
2019 	struct folio *folio;
2020 	pgoff_t index;
2021 	struct file_ra_state ra = { 0 };
2022 
2023 	ntfs_debug("Entering.");
2024 
2025 	file_ra_state_init(&ra, mapping);
2026 
2027 	/* Use multiples of 4 bytes, thus max_size is PAGE_SIZE / 4. */
2028 	ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = 0x%lx.",
2029 			max_index, PAGE_SIZE / 4);
2030 	for (index = 0; index < max_index; index++) {
2031 		unsigned long *kaddr;
2032 
2033 		/*
2034 		 * Get folio from page cache, getting it from backing store
2035 		 * if necessary, and increment the use count.
2036 		 */
2037 		folio = ntfs_get_locked_folio(mapping, index, max_index, &ra);
2038 
2039 		/* Ignore pages which errored synchronously. */
2040 		if (IS_ERR(folio)) {
2041 			ntfs_debug("read_mapping_page() error. Skipping page (index 0x%lx).",
2042 					index);
2043 			nr_free -= PAGE_SIZE * 8;
2044 			continue;
2045 		}
2046 
2047 		kaddr = kmap_local_folio(folio, 0);
2048 		/*
2049 		 * Subtract the number of set bits. If this
2050 		 * is the last page and it is partial we don't really care as
2051 		 * it just means we do a little extra work but it won't affect
2052 		 * the result as all out of range bytes are set to zero by
2053 		 * ntfs_readpage().
2054 		 */
2055 		nr_free -= bitmap_weight(kaddr,
2056 					PAGE_SIZE * BITS_PER_BYTE);
2057 		kunmap_local(kaddr);
2058 		folio_unlock(folio);
2059 		folio_put(folio);
2060 	}
2061 	ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.",
2062 			index - 1);
2063 	/* If errors occurred we may well have gone below zero, fix this. */
2064 	if (nr_free < 0)
2065 		nr_free = 0;
2066 	else
2067 		atomic64_set(&vol->free_mft_records, nr_free);
2068 
2069 	ntfs_debug("Exiting.");
2070 	return nr_free;
2071 }
2072 
2073 /*
2074  * ntfs_statfs - return information about mounted NTFS volume
2075  * @dentry:	dentry from mounted volume
2076  * @sfs:	statfs structure in which to return the information
2077  *
2078  * Return information about the mounted NTFS volume @dentry in the statfs structure
2079  * pointed to by @sfs (this is initialized with zeros before ntfs_statfs is
2080  * called). We interpret the values to be correct of the moment in time at
2081  * which we are called. Most values are variable otherwise and this isn't just
2082  * the free values but the totals as well. For example we can increase the
2083  * total number of file nodes if we run out and we can keep doing this until
2084  * there is no more space on the volume left at all.
2085  *
2086  * Called from vfs_statfs which is used to handle the statfs, fstatfs, and
2087  * ustat system calls.
2088  *
2089  * Return 0 on success or -errno on error.
2090  */
2091 static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
2092 {
2093 	struct super_block *sb = dentry->d_sb;
2094 	s64 size;
2095 	struct ntfs_volume *vol = NTFS_SB(sb);
2096 	struct ntfs_inode *mft_ni = NTFS_I(vol->mft_ino);
2097 	unsigned long flags;
2098 
2099 	ntfs_debug("Entering.");
2100 	/* Type of filesystem. */
2101 	sfs->f_type   = NTFS_SB_MAGIC;
2102 	/* Optimal transfer block size. */
2103 	sfs->f_bsize = vol->cluster_size;
2104 	/* Fundamental file system block size, used as the unit. */
2105 	sfs->f_frsize = vol->cluster_size;
2106 
2107 	/*
2108 	 * Total data blocks in filesystem in units of f_bsize and since
2109 	 * inodes are also stored in data blocs ($MFT is a file) this is just
2110 	 * the total clusters.
2111 	 */
2112 	sfs->f_blocks = vol->nr_clusters;
2113 
2114 	/* wait event */
2115 	if (!NVolFreeClusterKnown(vol))
2116 		wait_event(vol->free_waitq, NVolFreeClusterKnown(vol));
2117 
2118 	/* Free data blocks in filesystem in units of f_bsize. */
2119 	size = atomic64_read(&vol->free_clusters) -
2120 		atomic64_read(&vol->dirty_clusters);
2121 	if (size < 0LL)
2122 		size = 0LL;
2123 
2124 	/* Free blocks avail to non-superuser, same as above on NTFS. */
2125 	sfs->f_bavail = sfs->f_bfree = size;
2126 
2127 	/* Number of inodes in filesystem (at this point in time). */
2128 	read_lock_irqsave(&mft_ni->size_lock, flags);
2129 	sfs->f_files = i_size_read(vol->mft_ino) >> vol->mft_record_size_bits;
2130 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
2131 
2132 	/* Free inodes in fs (based on current total count). */
2133 	sfs->f_ffree = atomic64_read(&vol->free_mft_records);
2134 
2135 	/*
2136 	 * File system id. This is extremely *nix flavour dependent and even
2137 	 * within Linux itself all fs do their own thing. I interpret this to
2138 	 * mean a unique id associated with the mounted fs and not the id
2139 	 * associated with the filesystem driver, the latter is already given
2140 	 * by the filesystem type in sfs->f_type. Thus we use the 64-bit
2141 	 * volume serial number splitting it into two 32-bit parts. We enter
2142 	 * the least significant 32-bits in f_fsid[0] and the most significant
2143 	 * 32-bits in f_fsid[1].
2144 	 */
2145 	sfs->f_fsid = u64_to_fsid(vol->serial_no);
2146 	/* Maximum length of filenames. */
2147 	sfs->f_namelen	   = NTFS_MAX_NAME_LEN;
2148 
2149 	return 0;
2150 }
2151 
2152 static int ntfs_write_inode(struct inode *vi, struct writeback_control *wbc)
2153 {
2154 	return __ntfs_write_inode(vi, wbc->sync_mode == WB_SYNC_ALL);
2155 }
2156 
2157 /*
2158  * The complete super operations.
2159  */
2160 static const struct super_operations ntfs_sops = {
2161 	.alloc_inode	= ntfs_alloc_big_inode,	  /* VFS: Allocate new inode. */
2162 	.free_inode	= ntfs_free_big_inode, /* VFS: Deallocate inode. */
2163 	.drop_inode	= ntfs_drop_big_inode,
2164 	.write_inode	= ntfs_write_inode,	/* VFS: Write dirty inode to disk. */
2165 	.put_super	= ntfs_put_super,	/* Syscall: umount. */
2166 	.shutdown	= ntfs_shutdown,
2167 	.sync_fs	= ntfs_sync_fs,		/* Syscall: sync. */
2168 	.statfs		= ntfs_statfs,		/* Syscall: statfs */
2169 	.evict_inode	= ntfs_evict_big_inode,
2170 	.show_options	= ntfs_show_options,	/* Show mount options in proc. */
2171 };
2172 
2173 static void precalc_free_clusters(struct work_struct *work)
2174 {
2175 	struct ntfs_volume *vol = container_of(work, struct ntfs_volume, precalc_work);
2176 	s64 nr_free;
2177 
2178 	nr_free = get_nr_free_clusters(vol);
2179 
2180 	ntfs_debug("pre-calculate free clusters(%lld) using workqueue",
2181 			nr_free);
2182 }
2183 
2184 static struct lock_class_key ntfs_mft_inval_lock_key;
2185 
2186 /*
2187  * ntfs_fill_super - mount an ntfs filesystem
2188  * @sb: super block of the device to mount
2189  * @fc: filesystem context containing mount options
2190  *
2191  * ntfs_fill_super() is called by the VFS to mount the device described by @sb
2192  * with the mount otions in @data with the NTFS filesystem.
2193  *
2194  * If @silent is true, remain silent even if errors are detected. This is used
2195  * during bootup, when the kernel tries to mount the root filesystem with all
2196  * registered filesystems one after the other until one succeeds. This implies
2197  * that all filesystems except the correct one will quite correctly and
2198  * expectedly return an error, but nobody wants to see error messages when in
2199  * fact this is what is supposed to happen.
2200  */
2201 static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
2202 {
2203 	char *boot;
2204 	struct inode *tmp_ino;
2205 	int blocksize, result;
2206 	pgoff_t lcn_bit_pages;
2207 	struct ntfs_volume *vol = NTFS_SB(sb);
2208 	int silent = fc->sb_flags & SB_SILENT;
2209 
2210 	vol->sb = sb;
2211 
2212 	/*
2213 	 * We do a pretty difficult piece of bootstrap by reading the
2214 	 * MFT (and other metadata) from disk into memory. We'll only
2215 	 * release this metadata during umount, so the locking patterns
2216 	 * observed during bootstrap do not count. So turn off the
2217 	 * observation of locking patterns (strictly for this context
2218 	 * only) while mounting NTFS. [The validator is still active
2219 	 * otherwise, even for this context: it will for example record
2220 	 * lock class registrations.]
2221 	 */
2222 	lockdep_off();
2223 	ntfs_debug("Entering.");
2224 
2225 	if (vol->nls_map && !strcmp(vol->nls_map->charset, "utf8"))
2226 		vol->nls_utf8 = true;
2227 	if (NVolDisableSparse(vol))
2228 		vol->preallocated_size = 0;
2229 
2230 	if (NVolDiscard(vol) && !bdev_max_discard_sectors(sb->s_bdev)) {
2231 		ntfs_warning(
2232 			sb,
2233 			"Discard requested but device does not support discard.  Discard disabled.");
2234 		NVolClearDiscard(vol);
2235 	}
2236 
2237 	/* We support sector sizes up to the PAGE_SIZE. */
2238 	if (bdev_logical_block_size(sb->s_bdev) > PAGE_SIZE) {
2239 		if (!silent)
2240 			ntfs_error(sb,
2241 				"Device has unsupported sector size (%i).  The maximum supported sector size on this architecture is %lu bytes.",
2242 				bdev_logical_block_size(sb->s_bdev),
2243 				PAGE_SIZE);
2244 		goto err_out_now;
2245 	}
2246 
2247 	/*
2248 	 * Setup the device access block size to NTFS_BLOCK_SIZE or the hard
2249 	 * sector size, whichever is bigger.
2250 	 */
2251 	blocksize = sb_min_blocksize(sb, NTFS_BLOCK_SIZE);
2252 	if (blocksize < NTFS_BLOCK_SIZE) {
2253 		if (!silent)
2254 			ntfs_error(sb, "Unable to set device block size.");
2255 		goto err_out_now;
2256 	}
2257 
2258 	ntfs_debug("Set device block size to %i bytes (block size bits %i).",
2259 			blocksize, sb->s_blocksize_bits);
2260 	/* Determine the size of the device in units of block_size bytes. */
2261 	if (!bdev_nr_bytes(sb->s_bdev)) {
2262 		if (!silent)
2263 			ntfs_error(sb, "Unable to determine device size.");
2264 		goto err_out_now;
2265 	}
2266 	vol->nr_blocks = bdev_nr_bytes(sb->s_bdev) >>
2267 			sb->s_blocksize_bits;
2268 	/* Read the boot sector and return unlocked buffer head to it. */
2269 	boot = read_ntfs_boot_sector(sb, silent);
2270 	if (!boot) {
2271 		if (!silent)
2272 			ntfs_error(sb, "Not an NTFS volume.");
2273 		goto err_out_now;
2274 	}
2275 	/*
2276 	 * Extract the data from the boot sector and setup the ntfs volume
2277 	 * using it.
2278 	 */
2279 	result = parse_ntfs_boot_sector(vol, (struct ntfs_boot_sector *)boot);
2280 	kfree(boot);
2281 	if (!result) {
2282 		if (!silent)
2283 			ntfs_error(sb, "Unsupported NTFS filesystem.");
2284 		goto err_out_now;
2285 	}
2286 
2287 	if (vol->sector_size > blocksize) {
2288 		blocksize = sb_set_blocksize(sb, vol->sector_size);
2289 		if (blocksize != vol->sector_size) {
2290 			if (!silent)
2291 				ntfs_error(sb,
2292 					   "Unable to set device block size to sector size (%i).",
2293 					   vol->sector_size);
2294 			goto err_out_now;
2295 		}
2296 		vol->nr_blocks = bdev_nr_bytes(sb->s_bdev) >>
2297 				sb->s_blocksize_bits;
2298 		ntfs_debug("Changed device block size to %i bytes (block size bits %i) to match volume sector size.",
2299 				blocksize, sb->s_blocksize_bits);
2300 	}
2301 	/* Initialize the cluster and mft allocators. */
2302 	ntfs_setup_allocators(vol);
2303 	/* Setup remaining fields in the super block. */
2304 	sb->s_magic = NTFS_SB_MAGIC;
2305 	/*
2306 	 * Ntfs allows 63 bits for the file size, i.e. correct would be:
2307 	 *	sb->s_maxbytes = ~0ULL >> 1;
2308 	 * But the kernel uses a long as the page cache page index which on
2309 	 * 32-bit architectures is only 32-bits. MAX_LFS_FILESIZE is kernel
2310 	 * defined to the maximum the page cache page index can cope with
2311 	 * without overflowing the index or to 2^63 - 1, whichever is smaller.
2312 	 */
2313 	sb->s_maxbytes = MAX_LFS_FILESIZE;
2314 	/* Ntfs measures time in 100ns intervals. */
2315 	sb->s_time_gran = 100;
2316 
2317 	sb->s_xattr = ntfs_xattr_handlers;
2318 	/*
2319 	 * Now load the metadata required for the page cache and our address
2320 	 * space operations to function. We do this by setting up a specialised
2321 	 * read_inode method and then just calling the normal iget() to obtain
2322 	 * the inode for $MFT which is sufficient to allow our normal inode
2323 	 * operations and associated address space operations to function.
2324 	 */
2325 	sb->s_op = &ntfs_sops;
2326 	tmp_ino = new_inode(sb);
2327 	if (!tmp_ino) {
2328 		if (!silent)
2329 			ntfs_error(sb, "Failed to load essential metadata.");
2330 		goto err_out_now;
2331 	}
2332 
2333 	tmp_ino->i_ino = FILE_MFT;
2334 	insert_inode_hash(tmp_ino);
2335 	if (ntfs_read_inode_mount(tmp_ino) < 0) {
2336 		if (!silent)
2337 			ntfs_error(sb, "Failed to load essential metadata.");
2338 		goto iput_tmp_ino_err_out_now;
2339 	}
2340 	lockdep_set_class(&tmp_ino->i_mapping->invalidate_lock,
2341 			  &ntfs_mft_inval_lock_key);
2342 
2343 	mutex_lock(&ntfs_lock);
2344 
2345 	/*
2346 	 * Generate the global default upcase table if necessary.  Also
2347 	 * temporarily increment the number of upcase users to avoid race
2348 	 * conditions with concurrent (u)mounts.
2349 	 */
2350 	if (!default_upcase)
2351 		default_upcase = generate_default_upcase();
2352 	ntfs_nr_upcase_users++;
2353 	mutex_unlock(&ntfs_lock);
2354 
2355 	lcn_bit_pages = (((vol->nr_clusters + 7) >> 3) + PAGE_SIZE - 1) >> PAGE_SHIFT;
2356 	vol->lcn_empty_bits_per_page = kvmalloc_array(lcn_bit_pages, sizeof(unsigned int),
2357 						      GFP_KERNEL);
2358 	if (!vol->lcn_empty_bits_per_page) {
2359 		ntfs_error(sb,
2360 			   "Unable to allocate pages for storing LCN empty bit counts\n");
2361 		goto unl_upcase_iput_tmp_ino_err_out_now;
2362 	}
2363 
2364 	/*
2365 	 * From now on, ignore @silent parameter. If we fail below this line,
2366 	 * it will be due to a corrupt fs or a system error, so we report it.
2367 	 */
2368 	/*
2369 	 * Open the system files with normal access functions and complete
2370 	 * setting up the ntfs super block.
2371 	 */
2372 	if (!load_system_files(vol)) {
2373 		ntfs_error(sb, "Failed to load system files.");
2374 		goto unl_upcase_iput_tmp_ino_err_out_now;
2375 	}
2376 
2377 	/* We grab a reference, simulating an ntfs_iget(). */
2378 	ihold(vol->root_ino);
2379 	sb->s_root = d_make_root(vol->root_ino);
2380 	if (sb->s_root) {
2381 		s64 nr_records;
2382 
2383 		ntfs_debug("Exiting, status successful.");
2384 
2385 		/* Release the default upcase if it has no users. */
2386 		mutex_lock(&ntfs_lock);
2387 		if (!--ntfs_nr_upcase_users && default_upcase) {
2388 			kvfree(default_upcase);
2389 			default_upcase = NULL;
2390 		}
2391 		mutex_unlock(&ntfs_lock);
2392 		sb->s_export_op = &ntfs_export_ops;
2393 		lockdep_on();
2394 
2395 		nr_records = __get_nr_free_mft_records(vol,
2396 				i_size_read(vol->mft_ino) >> vol->mft_record_size_bits,
2397 				((((NTFS_I(vol->mft_ino)->initialized_size >>
2398 				    vol->mft_record_size_bits) +
2399 				   7) >> 3) + PAGE_SIZE - 1) >> PAGE_SHIFT);
2400 		ntfs_debug("Free mft records(%lld)", nr_records);
2401 
2402 		init_waitqueue_head(&vol->free_waitq);
2403 		INIT_WORK(&vol->precalc_work, precalc_free_clusters);
2404 		queue_work(ntfs_wq, &vol->precalc_work);
2405 		return 0;
2406 	}
2407 	ntfs_error(sb, "Failed to allocate root directory.");
2408 	/* Clean up after the successful load_system_files() call from above. */
2409 	iput(vol->vol_ino);
2410 	vol->vol_ino = NULL;
2411 	/* NTFS 3.0+ specific clean up. */
2412 	if (vol->major_ver >= 3) {
2413 		if (vol->extend_ino) {
2414 			iput(vol->extend_ino);
2415 			vol->extend_ino = NULL;
2416 		}
2417 		if (vol->secure_ino) {
2418 			iput(vol->secure_ino);
2419 			vol->secure_ino = NULL;
2420 		}
2421 	}
2422 	iput(vol->root_ino);
2423 	vol->root_ino = NULL;
2424 	iput(vol->lcnbmp_ino);
2425 	vol->lcnbmp_ino = NULL;
2426 	iput(vol->mftbmp_ino);
2427 	vol->mftbmp_ino = NULL;
2428 	if (vol->logfile_ino) {
2429 		iput(vol->logfile_ino);
2430 		vol->logfile_ino = NULL;
2431 	}
2432 	if (vol->mftmirr_ino) {
2433 		iput(vol->mftmirr_ino);
2434 		vol->mftmirr_ino = NULL;
2435 	}
2436 	/* Throw away the table of attribute definitions. */
2437 	vol->attrdef_size = 0;
2438 	if (vol->attrdef) {
2439 		kvfree(vol->attrdef);
2440 		vol->attrdef = NULL;
2441 	}
2442 	vol->upcase_len = 0;
2443 	mutex_lock(&ntfs_lock);
2444 	if (vol->upcase && vol->upcase == default_upcase) {
2445 		ntfs_nr_upcase_users--;
2446 		vol->upcase = NULL;
2447 	}
2448 	mutex_unlock(&ntfs_lock);
2449 	if (vol->upcase) {
2450 		kvfree(vol->upcase);
2451 		vol->upcase = NULL;
2452 	}
2453 	if (vol->nls_map) {
2454 		unload_nls(vol->nls_map);
2455 		vol->nls_map = NULL;
2456 	}
2457 	/* Error exit code path. */
2458 unl_upcase_iput_tmp_ino_err_out_now:
2459 	/*
2460 	 * Decrease the number of upcase users and destroy the global default
2461 	 * upcase table if necessary.
2462 	 */
2463 	mutex_lock(&ntfs_lock);
2464 	if (!--ntfs_nr_upcase_users && default_upcase) {
2465 		kvfree(default_upcase);
2466 		default_upcase = NULL;
2467 	}
2468 
2469 	mutex_unlock(&ntfs_lock);
2470 iput_tmp_ino_err_out_now:
2471 	iput(tmp_ino);
2472 	if (vol->mft_ino && vol->mft_ino != tmp_ino)
2473 		iput(vol->mft_ino);
2474 	vol->mft_ino = NULL;
2475 	/* Errors at this stage are irrelevant. */
2476 err_out_now:
2477 	sb->s_fs_info = NULL;
2478 	kvfree(vol->lcn_empty_bits_per_page);
2479 	kfree(vol->volume_label);
2480 	unload_nls(vol->nls_map);
2481 	kfree(vol);
2482 	ntfs_debug("Failed, returning -EINVAL.");
2483 	lockdep_on();
2484 	return -EINVAL;
2485 }
2486 
2487 /*
2488  * This is a slab cache to optimize allocations and deallocations of Unicode
2489  * strings of the maximum length allowed by NTFS, which is NTFS_MAX_NAME_LEN
2490  * (255) Unicode characters + a terminating NULL Unicode character.
2491  */
2492 struct kmem_cache *ntfs_name_cache;
2493 
2494 /* Slab caches for efficient allocation/deallocation of inodes. */
2495 struct kmem_cache *ntfs_inode_cache;
2496 struct kmem_cache *ntfs_big_inode_cache;
2497 
2498 /* Init once constructor for the inode slab cache. */
2499 static void ntfs_big_inode_init_once(void *foo)
2500 {
2501 	struct ntfs_inode *ni = foo;
2502 
2503 	inode_init_once(VFS_I(ni));
2504 }
2505 
2506 /*
2507  * Slab caches to optimize allocations and deallocations of attribute search
2508  * contexts and index contexts, respectively.
2509  */
2510 struct kmem_cache *ntfs_attr_ctx_cache;
2511 struct kmem_cache *ntfs_index_ctx_cache;
2512 
2513 /* Driver wide mutex. */
2514 DEFINE_MUTEX(ntfs_lock);
2515 
2516 static int ntfs_get_tree(struct fs_context *fc)
2517 {
2518 	return get_tree_bdev(fc, ntfs_fill_super);
2519 }
2520 
2521 static void ntfs_free_fs_context(struct fs_context *fc)
2522 {
2523 	struct ntfs_volume *vol = fc->s_fs_info;
2524 
2525 	if (vol)
2526 		ntfs_volume_free(vol);
2527 }
2528 
2529 static const struct fs_context_operations ntfs_context_ops = {
2530 	.parse_param	= ntfs_parse_param,
2531 	.get_tree	= ntfs_get_tree,
2532 	.free		= ntfs_free_fs_context,
2533 	.reconfigure	= ntfs_reconfigure,
2534 };
2535 
2536 static int ntfs_init_fs_context(struct fs_context *fc)
2537 {
2538 	struct ntfs_volume *vol;
2539 
2540 	/* Allocate a new struct ntfs_volume and place it in sb->s_fs_info. */
2541 	vol = kmalloc(sizeof(struct ntfs_volume), GFP_NOFS);
2542 	if (!vol)
2543 		return -ENOMEM;
2544 
2545 	/* Initialize struct ntfs_volume structure. */
2546 	*vol = (struct ntfs_volume) {
2547 		.uid = INVALID_UID,
2548 		.gid = INVALID_GID,
2549 		.fmask = 0,
2550 		.dmask = 0,
2551 		.mft_zone_multiplier = 1,
2552 		.on_errors = ON_ERRORS_CONTINUE,
2553 		.nls_map = load_nls_default(),
2554 		.preallocated_size = NTFS_DEF_PREALLOC_SIZE,
2555 	};
2556 
2557 	NVolSetShowHiddenFiles(vol);
2558 	NVolSetCaseSensitive(vol);
2559 	init_rwsem(&vol->mftbmp_lock);
2560 	init_rwsem(&vol->lcnbmp_lock);
2561 	mutex_init(&vol->volume_label_lock);
2562 
2563 	fc->s_fs_info = vol;
2564 	fc->ops = &ntfs_context_ops;
2565 	return 0;
2566 }
2567 
2568 static struct file_system_type ntfs_fs_type = {
2569 	.owner                  = THIS_MODULE,
2570 	.name                   = "ntfs",
2571 	.init_fs_context        = ntfs_init_fs_context,
2572 	.parameters             = ntfs_parameters,
2573 	.kill_sb                = kill_block_super,
2574 	.fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
2575 };
2576 MODULE_ALIAS_FS("ntfs");
2577 
2578 static int ntfs_workqueue_init(void)
2579 {
2580 	ntfs_wq = alloc_workqueue("ntfs-bg-io", WQ_PERCPU, 0);
2581 	if (!ntfs_wq)
2582 		return -ENOMEM;
2583 	return 0;
2584 }
2585 
2586 static void ntfs_workqueue_destroy(void)
2587 {
2588 	destroy_workqueue(ntfs_wq);
2589 	ntfs_wq = NULL;
2590 }
2591 
2592 /* Stable names for the slab caches. */
2593 static const char ntfs_index_ctx_cache_name[] = "ntfs_index_ctx_cache";
2594 static const char ntfs_attr_ctx_cache_name[] = "ntfs_attr_ctx_cache";
2595 static const char ntfs_name_cache_name[] = "ntfs_name_cache";
2596 static const char ntfs_inode_cache_name[] = "ntfs_inode_cache";
2597 static const char ntfs_big_inode_cache_name[] = "ntfs_big_inode_cache";
2598 
2599 static int __init init_ntfs_fs(void)
2600 {
2601 	int err = 0;
2602 
2603 	err = ntfs_workqueue_init();
2604 	if (err) {
2605 		pr_crit("Failed to register workqueue!\n");
2606 		return err;
2607 	}
2608 
2609 	ntfs_index_ctx_cache = kmem_cache_create(ntfs_index_ctx_cache_name,
2610 			sizeof(struct ntfs_index_context), 0 /* offset */,
2611 			SLAB_HWCACHE_ALIGN, NULL /* ctor */);
2612 	if (!ntfs_index_ctx_cache) {
2613 		pr_crit("Failed to create %s!\n", ntfs_index_ctx_cache_name);
2614 		goto ictx_err_out;
2615 	}
2616 	ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name,
2617 			sizeof(struct ntfs_attr_search_ctx), 0 /* offset */,
2618 			SLAB_HWCACHE_ALIGN, NULL /* ctor */);
2619 	if (!ntfs_attr_ctx_cache) {
2620 		pr_crit("NTFS: Failed to create %s!\n",
2621 			ntfs_attr_ctx_cache_name);
2622 		goto actx_err_out;
2623 	}
2624 
2625 	ntfs_name_cache = kmem_cache_create(ntfs_name_cache_name,
2626 			(NTFS_MAX_NAME_LEN+2) * sizeof(__le16), 0,
2627 			SLAB_HWCACHE_ALIGN, NULL);
2628 	if (!ntfs_name_cache) {
2629 		pr_crit("Failed to create %s!\n", ntfs_name_cache_name);
2630 		goto name_err_out;
2631 	}
2632 
2633 	ntfs_inode_cache = kmem_cache_create(ntfs_inode_cache_name,
2634 			sizeof(struct ntfs_inode), 0, SLAB_RECLAIM_ACCOUNT, NULL);
2635 	if (!ntfs_inode_cache) {
2636 		pr_crit("Failed to create %s!\n", ntfs_inode_cache_name);
2637 		goto inode_err_out;
2638 	}
2639 
2640 	ntfs_big_inode_cache = kmem_cache_create(ntfs_big_inode_cache_name,
2641 			sizeof(struct big_ntfs_inode), 0, SLAB_HWCACHE_ALIGN |
2642 			SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
2643 			ntfs_big_inode_init_once);
2644 	if (!ntfs_big_inode_cache) {
2645 		pr_crit("Failed to create %s!\n", ntfs_big_inode_cache_name);
2646 		goto big_inode_err_out;
2647 	}
2648 
2649 	/* Register the ntfs sysctls. */
2650 	err = ntfs_sysctl(1);
2651 	if (err) {
2652 		pr_crit("Failed to register NTFS sysctls!\n");
2653 		goto sysctl_err_out;
2654 	}
2655 
2656 	err = register_filesystem(&ntfs_fs_type);
2657 	if (!err) {
2658 		ntfs_debug("NTFS driver registered successfully.");
2659 		return 0; /* Success! */
2660 	}
2661 	pr_crit("Failed to register NTFS filesystem driver!\n");
2662 
2663 	/* Unregister the ntfs sysctls. */
2664 	ntfs_sysctl(0);
2665 sysctl_err_out:
2666 	kmem_cache_destroy(ntfs_big_inode_cache);
2667 big_inode_err_out:
2668 	kmem_cache_destroy(ntfs_inode_cache);
2669 inode_err_out:
2670 	kmem_cache_destroy(ntfs_name_cache);
2671 name_err_out:
2672 	kmem_cache_destroy(ntfs_attr_ctx_cache);
2673 actx_err_out:
2674 	kmem_cache_destroy(ntfs_index_ctx_cache);
2675 ictx_err_out:
2676 	if (!err) {
2677 		pr_crit("Aborting NTFS filesystem driver registration...\n");
2678 		err = -ENOMEM;
2679 	}
2680 	return err;
2681 }
2682 
2683 static void __exit exit_ntfs_fs(void)
2684 {
2685 	ntfs_debug("Unregistering NTFS driver.");
2686 
2687 	unregister_filesystem(&ntfs_fs_type);
2688 
2689 	/*
2690 	 * Make sure all delayed rcu free inodes are flushed before we
2691 	 * destroy cache.
2692 	 */
2693 	rcu_barrier();
2694 	kmem_cache_destroy(ntfs_big_inode_cache);
2695 	kmem_cache_destroy(ntfs_inode_cache);
2696 	kmem_cache_destroy(ntfs_name_cache);
2697 	kmem_cache_destroy(ntfs_attr_ctx_cache);
2698 	kmem_cache_destroy(ntfs_index_ctx_cache);
2699 	ntfs_workqueue_destroy();
2700 	/* Unregister the ntfs sysctls. */
2701 	ntfs_sysctl(0);
2702 }
2703 
2704 module_init(init_ntfs_fs);
2705 module_exit(exit_ntfs_fs);
2706 
2707 MODULE_AUTHOR("Anton Altaparmakov <anton@tuxera.com>"); /* Original read-only NTFS driver */
2708 MODULE_AUTHOR("Namjae Jeon <linkinjeon@kernel.org>"); /* Add write, iomap and various features */
2709 MODULE_DESCRIPTION("NTFS read-write filesystem driver");
2710 MODULE_LICENSE("GPL");
2711 #ifdef DEBUG
2712 module_param(debug_msgs, uint, 0);
2713 MODULE_PARM_DESC(debug_msgs, "Enable debug messages.");
2714 #endif
2715