xref: /linux/fs/ntfs3/fsntfs.c (revision f69e98a91a01fd7c5755dd710e94a17d6e9f583f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *
4  * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
5  *
6  */
7 
8 #include <linux/blkdev.h>
9 #include <linux/buffer_head.h>
10 #include <linux/fs.h>
11 #include <linux/kernel.h>
12 
13 #include "debug.h"
14 #include "ntfs.h"
15 #include "ntfs_fs.h"
16 
17 // clang-format off
18 const struct cpu_str NAME_MFT = {
19 	4, 0, { '$', 'M', 'F', 'T' },
20 };
21 const struct cpu_str NAME_MIRROR = {
22 	8, 0, { '$', 'M', 'F', 'T', 'M', 'i', 'r', 'r' },
23 };
24 const struct cpu_str NAME_LOGFILE = {
25 	8, 0, { '$', 'L', 'o', 'g', 'F', 'i', 'l', 'e' },
26 };
27 const struct cpu_str NAME_VOLUME = {
28 	7, 0, { '$', 'V', 'o', 'l', 'u', 'm', 'e' },
29 };
30 const struct cpu_str NAME_ATTRDEF = {
31 	8, 0, { '$', 'A', 't', 't', 'r', 'D', 'e', 'f' },
32 };
33 const struct cpu_str NAME_ROOT = {
34 	1, 0, { '.' },
35 };
36 const struct cpu_str NAME_BITMAP = {
37 	7, 0, { '$', 'B', 'i', 't', 'm', 'a', 'p' },
38 };
39 const struct cpu_str NAME_BOOT = {
40 	5, 0, { '$', 'B', 'o', 'o', 't' },
41 };
42 const struct cpu_str NAME_BADCLUS = {
43 	8, 0, { '$', 'B', 'a', 'd', 'C', 'l', 'u', 's' },
44 };
45 const struct cpu_str NAME_QUOTA = {
46 	6, 0, { '$', 'Q', 'u', 'o', 't', 'a' },
47 };
48 const struct cpu_str NAME_SECURE = {
49 	7, 0, { '$', 'S', 'e', 'c', 'u', 'r', 'e' },
50 };
51 const struct cpu_str NAME_UPCASE = {
52 	7, 0, { '$', 'U', 'p', 'C', 'a', 's', 'e' },
53 };
54 const struct cpu_str NAME_EXTEND = {
55 	7, 0, { '$', 'E', 'x', 't', 'e', 'n', 'd' },
56 };
57 const struct cpu_str NAME_OBJID = {
58 	6, 0, { '$', 'O', 'b', 'j', 'I', 'd' },
59 };
60 const struct cpu_str NAME_REPARSE = {
61 	8, 0, { '$', 'R', 'e', 'p', 'a', 'r', 's', 'e' },
62 };
63 const struct cpu_str NAME_USNJRNL = {
64 	8, 0, { '$', 'U', 's', 'n', 'J', 'r', 'n', 'l' },
65 };
66 const __le16 BAD_NAME[4] = {
67 	cpu_to_le16('$'), cpu_to_le16('B'), cpu_to_le16('a'), cpu_to_le16('d'),
68 };
69 const __le16 I30_NAME[4] = {
70 	cpu_to_le16('$'), cpu_to_le16('I'), cpu_to_le16('3'), cpu_to_le16('0'),
71 };
72 const __le16 SII_NAME[4] = {
73 	cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('I'), cpu_to_le16('I'),
74 };
75 const __le16 SDH_NAME[4] = {
76 	cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('H'),
77 };
78 const __le16 SDS_NAME[4] = {
79 	cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('S'),
80 };
81 const __le16 SO_NAME[2] = {
82 	cpu_to_le16('$'), cpu_to_le16('O'),
83 };
84 const __le16 SQ_NAME[2] = {
85 	cpu_to_le16('$'), cpu_to_le16('Q'),
86 };
87 const __le16 SR_NAME[2] = {
88 	cpu_to_le16('$'), cpu_to_le16('R'),
89 };
90 
91 #ifdef CONFIG_NTFS3_LZX_XPRESS
92 const __le16 WOF_NAME[17] = {
93 	cpu_to_le16('W'), cpu_to_le16('o'), cpu_to_le16('f'), cpu_to_le16('C'),
94 	cpu_to_le16('o'), cpu_to_le16('m'), cpu_to_le16('p'), cpu_to_le16('r'),
95 	cpu_to_le16('e'), cpu_to_le16('s'), cpu_to_le16('s'), cpu_to_le16('e'),
96 	cpu_to_le16('d'), cpu_to_le16('D'), cpu_to_le16('a'), cpu_to_le16('t'),
97 	cpu_to_le16('a'),
98 };
99 #endif
100 
101 // clang-format on
102 
103 /*
104  * ntfs_fix_pre_write - Insert fixups into @rhdr before writing to disk.
105  */
106 bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes)
107 {
108 	u16 *fixup, *ptr;
109 	u16 sample;
110 	u16 fo = le16_to_cpu(rhdr->fix_off);
111 	u16 fn = le16_to_cpu(rhdr->fix_num);
112 
113 	if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
114 	    fn * SECTOR_SIZE > bytes) {
115 		return false;
116 	}
117 
118 	/* Get fixup pointer. */
119 	fixup = Add2Ptr(rhdr, fo);
120 
121 	if (*fixup >= 0x7FFF)
122 		*fixup = 1;
123 	else
124 		*fixup += 1;
125 
126 	sample = *fixup;
127 
128 	ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short));
129 
130 	while (fn--) {
131 		*++fixup = *ptr;
132 		*ptr = sample;
133 		ptr += SECTOR_SIZE / sizeof(short);
134 	}
135 	return true;
136 }
137 
138 /*
139  * ntfs_fix_post_read - Remove fixups after reading from disk.
140  *
141  * Return: < 0 if error, 0 if ok, 1 if need to update fixups.
142  */
143 int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes,
144 		       bool simple)
145 {
146 	int ret;
147 	u16 *fixup, *ptr;
148 	u16 sample, fo, fn;
149 
150 	fo = le16_to_cpu(rhdr->fix_off);
151 	fn = simple ? ((bytes >> SECTOR_SHIFT) + 1)
152 		    : le16_to_cpu(rhdr->fix_num);
153 
154 	/* Check errors. */
155 	if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
156 	    fn * SECTOR_SIZE > bytes) {
157 		return -EINVAL; /* Native chkntfs returns ok! */
158 	}
159 
160 	/* Get fixup pointer. */
161 	fixup = Add2Ptr(rhdr, fo);
162 	sample = *fixup;
163 	ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short));
164 	ret = 0;
165 
166 	while (fn--) {
167 		/* Test current word. */
168 		if (*ptr != sample) {
169 			/* Fixup does not match! Is it serious error? */
170 			ret = -E_NTFS_FIXUP;
171 		}
172 
173 		/* Replace fixup. */
174 		*ptr = *++fixup;
175 		ptr += SECTOR_SIZE / sizeof(short);
176 	}
177 
178 	return ret;
179 }
180 
181 /*
182  * ntfs_extend_init - Load $Extend file.
183  */
184 int ntfs_extend_init(struct ntfs_sb_info *sbi)
185 {
186 	int err;
187 	struct super_block *sb = sbi->sb;
188 	struct inode *inode, *inode2;
189 	struct MFT_REF ref;
190 
191 	if (sbi->volume.major_ver < 3) {
192 		ntfs_notice(sb, "Skip $Extend 'cause NTFS version");
193 		return 0;
194 	}
195 
196 	ref.low = cpu_to_le32(MFT_REC_EXTEND);
197 	ref.high = 0;
198 	ref.seq = cpu_to_le16(MFT_REC_EXTEND);
199 	inode = ntfs_iget5(sb, &ref, &NAME_EXTEND);
200 	if (IS_ERR(inode)) {
201 		err = PTR_ERR(inode);
202 		ntfs_err(sb, "Failed to load $Extend.");
203 		inode = NULL;
204 		goto out;
205 	}
206 
207 	/* If ntfs_iget5() reads from disk it never returns bad inode. */
208 	if (!S_ISDIR(inode->i_mode)) {
209 		err = -EINVAL;
210 		goto out;
211 	}
212 
213 	/* Try to find $ObjId */
214 	inode2 = dir_search_u(inode, &NAME_OBJID, NULL);
215 	if (inode2 && !IS_ERR(inode2)) {
216 		if (is_bad_inode(inode2)) {
217 			iput(inode2);
218 		} else {
219 			sbi->objid.ni = ntfs_i(inode2);
220 			sbi->objid_no = inode2->i_ino;
221 		}
222 	}
223 
224 	/* Try to find $Quota */
225 	inode2 = dir_search_u(inode, &NAME_QUOTA, NULL);
226 	if (inode2 && !IS_ERR(inode2)) {
227 		sbi->quota_no = inode2->i_ino;
228 		iput(inode2);
229 	}
230 
231 	/* Try to find $Reparse */
232 	inode2 = dir_search_u(inode, &NAME_REPARSE, NULL);
233 	if (inode2 && !IS_ERR(inode2)) {
234 		sbi->reparse.ni = ntfs_i(inode2);
235 		sbi->reparse_no = inode2->i_ino;
236 	}
237 
238 	/* Try to find $UsnJrnl */
239 	inode2 = dir_search_u(inode, &NAME_USNJRNL, NULL);
240 	if (inode2 && !IS_ERR(inode2)) {
241 		sbi->usn_jrnl_no = inode2->i_ino;
242 		iput(inode2);
243 	}
244 
245 	err = 0;
246 out:
247 	iput(inode);
248 	return err;
249 }
250 
251 int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi)
252 {
253 	int err = 0;
254 	struct super_block *sb = sbi->sb;
255 	bool initialized = false;
256 	struct MFT_REF ref;
257 	struct inode *inode;
258 
259 	/* Check for 4GB. */
260 	if (ni->vfs_inode.i_size >= 0x100000000ull) {
261 		ntfs_err(sb, "\x24LogFile is too big");
262 		err = -EINVAL;
263 		goto out;
264 	}
265 
266 	sbi->flags |= NTFS_FLAGS_LOG_REPLAYING;
267 
268 	ref.low = cpu_to_le32(MFT_REC_MFT);
269 	ref.high = 0;
270 	ref.seq = cpu_to_le16(1);
271 
272 	inode = ntfs_iget5(sb, &ref, NULL);
273 
274 	if (IS_ERR(inode))
275 		inode = NULL;
276 
277 	if (!inode) {
278 		/* Try to use MFT copy. */
279 		u64 t64 = sbi->mft.lbo;
280 
281 		sbi->mft.lbo = sbi->mft.lbo2;
282 		inode = ntfs_iget5(sb, &ref, NULL);
283 		sbi->mft.lbo = t64;
284 		if (IS_ERR(inode))
285 			inode = NULL;
286 	}
287 
288 	if (!inode) {
289 		err = -EINVAL;
290 		ntfs_err(sb, "Failed to load $MFT.");
291 		goto out;
292 	}
293 
294 	sbi->mft.ni = ntfs_i(inode);
295 
296 	/* LogFile should not contains attribute list. */
297 	err = ni_load_all_mi(sbi->mft.ni);
298 	if (!err)
299 		err = log_replay(ni, &initialized);
300 
301 	iput(inode);
302 	sbi->mft.ni = NULL;
303 
304 	sync_blockdev(sb->s_bdev);
305 	invalidate_bdev(sb->s_bdev);
306 
307 	if (sbi->flags & NTFS_FLAGS_NEED_REPLAY) {
308 		err = 0;
309 		goto out;
310 	}
311 
312 	if (sb_rdonly(sb) || !initialized)
313 		goto out;
314 
315 	/* Fill LogFile by '-1' if it is initialized. */
316 	err = ntfs_bio_fill_1(sbi, &ni->file.run);
317 
318 out:
319 	sbi->flags &= ~NTFS_FLAGS_LOG_REPLAYING;
320 
321 	return err;
322 }
323 
324 /*
325  * ntfs_query_def
326  *
327  * Return: Current ATTR_DEF_ENTRY for given attribute type.
328  */
329 const struct ATTR_DEF_ENTRY *ntfs_query_def(struct ntfs_sb_info *sbi,
330 					    enum ATTR_TYPE type)
331 {
332 	int type_in = le32_to_cpu(type);
333 	size_t min_idx = 0;
334 	size_t max_idx = sbi->def_entries - 1;
335 
336 	while (min_idx <= max_idx) {
337 		size_t i = min_idx + ((max_idx - min_idx) >> 1);
338 		const struct ATTR_DEF_ENTRY *entry = sbi->def_table + i;
339 		int diff = le32_to_cpu(entry->type) - type_in;
340 
341 		if (!diff)
342 			return entry;
343 		if (diff < 0)
344 			min_idx = i + 1;
345 		else if (i)
346 			max_idx = i - 1;
347 		else
348 			return NULL;
349 	}
350 	return NULL;
351 }
352 
353 /*
354  * ntfs_look_for_free_space - Look for a free space in bitmap.
355  */
356 int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len,
357 			     CLST *new_lcn, CLST *new_len,
358 			     enum ALLOCATE_OPT opt)
359 {
360 	int err;
361 	CLST alen;
362 	struct super_block *sb = sbi->sb;
363 	size_t alcn, zlen, zeroes, zlcn, zlen2, ztrim, new_zlen;
364 	struct wnd_bitmap *wnd = &sbi->used.bitmap;
365 
366 	down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
367 	if (opt & ALLOCATE_MFT) {
368 		zlen = wnd_zone_len(wnd);
369 
370 		if (!zlen) {
371 			err = ntfs_refresh_zone(sbi);
372 			if (err)
373 				goto up_write;
374 
375 			zlen = wnd_zone_len(wnd);
376 		}
377 
378 		if (!zlen) {
379 			ntfs_err(sbi->sb, "no free space to extend mft");
380 			err = -ENOSPC;
381 			goto up_write;
382 		}
383 
384 		lcn = wnd_zone_bit(wnd);
385 		alen = min_t(CLST, len, zlen);
386 
387 		wnd_zone_set(wnd, lcn + alen, zlen - alen);
388 
389 		err = wnd_set_used(wnd, lcn, alen);
390 		if (err)
391 			goto up_write;
392 
393 		alcn = lcn;
394 		goto space_found;
395 	}
396 	/*
397 	 * 'Cause cluster 0 is always used this value means that we should use
398 	 * cached value of 'next_free_lcn' to improve performance.
399 	 */
400 	if (!lcn)
401 		lcn = sbi->used.next_free_lcn;
402 
403 	if (lcn >= wnd->nbits)
404 		lcn = 0;
405 
406 	alen = wnd_find(wnd, len, lcn, BITMAP_FIND_MARK_AS_USED, &alcn);
407 	if (alen)
408 		goto space_found;
409 
410 	/* Try to use clusters from MftZone. */
411 	zlen = wnd_zone_len(wnd);
412 	zeroes = wnd_zeroes(wnd);
413 
414 	/* Check too big request */
415 	if (len > zeroes + zlen || zlen <= NTFS_MIN_MFT_ZONE) {
416 		err = -ENOSPC;
417 		goto up_write;
418 	}
419 
420 	/* How many clusters to cat from zone. */
421 	zlcn = wnd_zone_bit(wnd);
422 	zlen2 = zlen >> 1;
423 	ztrim = clamp_val(len, zlen2, zlen);
424 	new_zlen = max_t(size_t, zlen - ztrim, NTFS_MIN_MFT_ZONE);
425 
426 	wnd_zone_set(wnd, zlcn, new_zlen);
427 
428 	/* Allocate continues clusters. */
429 	alen = wnd_find(wnd, len, 0,
430 			BITMAP_FIND_MARK_AS_USED | BITMAP_FIND_FULL, &alcn);
431 	if (!alen) {
432 		err = -ENOSPC;
433 		goto up_write;
434 	}
435 
436 space_found:
437 	err = 0;
438 	*new_len = alen;
439 	*new_lcn = alcn;
440 
441 	ntfs_unmap_meta(sb, alcn, alen);
442 
443 	/* Set hint for next requests. */
444 	if (!(opt & ALLOCATE_MFT))
445 		sbi->used.next_free_lcn = alcn + alen;
446 up_write:
447 	up_write(&wnd->rw_lock);
448 	return err;
449 }
450 
451 /*
452  * ntfs_extend_mft - Allocate additional MFT records.
453  *
454  * sbi->mft.bitmap is locked for write.
455  *
456  * NOTE: recursive:
457  *	ntfs_look_free_mft ->
458  *	ntfs_extend_mft ->
459  *	attr_set_size ->
460  *	ni_insert_nonresident ->
461  *	ni_insert_attr ->
462  *	ni_ins_attr_ext ->
463  *	ntfs_look_free_mft ->
464  *	ntfs_extend_mft
465  *
466  * To avoid recursive always allocate space for two new MFT records
467  * see attrib.c: "at least two MFT to avoid recursive loop".
468  */
469 static int ntfs_extend_mft(struct ntfs_sb_info *sbi)
470 {
471 	int err;
472 	struct ntfs_inode *ni = sbi->mft.ni;
473 	size_t new_mft_total;
474 	u64 new_mft_bytes, new_bitmap_bytes;
475 	struct ATTRIB *attr;
476 	struct wnd_bitmap *wnd = &sbi->mft.bitmap;
477 
478 	new_mft_total = (wnd->nbits + MFT_INCREASE_CHUNK + 127) & (CLST)~127;
479 	new_mft_bytes = (u64)new_mft_total << sbi->record_bits;
480 
481 	/* Step 1: Resize $MFT::DATA. */
482 	down_write(&ni->file.run_lock);
483 	err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run,
484 			    new_mft_bytes, NULL, false, &attr);
485 
486 	if (err) {
487 		up_write(&ni->file.run_lock);
488 		goto out;
489 	}
490 
491 	attr->nres.valid_size = attr->nres.data_size;
492 	new_mft_total = le64_to_cpu(attr->nres.alloc_size) >> sbi->record_bits;
493 	ni->mi.dirty = true;
494 
495 	/* Step 2: Resize $MFT::BITMAP. */
496 	new_bitmap_bytes = bitmap_size(new_mft_total);
497 
498 	err = attr_set_size(ni, ATTR_BITMAP, NULL, 0, &sbi->mft.bitmap.run,
499 			    new_bitmap_bytes, &new_bitmap_bytes, true, NULL);
500 
501 	/* Refresh MFT Zone if necessary. */
502 	down_write_nested(&sbi->used.bitmap.rw_lock, BITMAP_MUTEX_CLUSTERS);
503 
504 	ntfs_refresh_zone(sbi);
505 
506 	up_write(&sbi->used.bitmap.rw_lock);
507 	up_write(&ni->file.run_lock);
508 
509 	if (err)
510 		goto out;
511 
512 	err = wnd_extend(wnd, new_mft_total);
513 
514 	if (err)
515 		goto out;
516 
517 	ntfs_clear_mft_tail(sbi, sbi->mft.used, new_mft_total);
518 
519 	err = _ni_write_inode(&ni->vfs_inode, 0);
520 out:
521 	return err;
522 }
523 
524 /*
525  * ntfs_look_free_mft - Look for a free MFT record.
526  */
527 int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft,
528 		       struct ntfs_inode *ni, struct mft_inode **mi)
529 {
530 	int err = 0;
531 	size_t zbit, zlen, from, to, fr;
532 	size_t mft_total;
533 	struct MFT_REF ref;
534 	struct super_block *sb = sbi->sb;
535 	struct wnd_bitmap *wnd = &sbi->mft.bitmap;
536 	u32 ir;
537 
538 	static_assert(sizeof(sbi->mft.reserved_bitmap) * 8 >=
539 		      MFT_REC_FREE - MFT_REC_RESERVED);
540 
541 	if (!mft)
542 		down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT);
543 
544 	zlen = wnd_zone_len(wnd);
545 
546 	/* Always reserve space for MFT. */
547 	if (zlen) {
548 		if (mft) {
549 			zbit = wnd_zone_bit(wnd);
550 			*rno = zbit;
551 			wnd_zone_set(wnd, zbit + 1, zlen - 1);
552 		}
553 		goto found;
554 	}
555 
556 	/* No MFT zone. Find the nearest to '0' free MFT. */
557 	if (!wnd_find(wnd, 1, MFT_REC_FREE, 0, &zbit)) {
558 		/* Resize MFT */
559 		mft_total = wnd->nbits;
560 
561 		err = ntfs_extend_mft(sbi);
562 		if (!err) {
563 			zbit = mft_total;
564 			goto reserve_mft;
565 		}
566 
567 		if (!mft || MFT_REC_FREE == sbi->mft.next_reserved)
568 			goto out;
569 
570 		err = 0;
571 
572 		/*
573 		 * Look for free record reserved area [11-16) ==
574 		 * [MFT_REC_RESERVED, MFT_REC_FREE ) MFT bitmap always
575 		 * marks it as used.
576 		 */
577 		if (!sbi->mft.reserved_bitmap) {
578 			/* Once per session create internal bitmap for 5 bits. */
579 			sbi->mft.reserved_bitmap = 0xFF;
580 
581 			ref.high = 0;
582 			for (ir = MFT_REC_RESERVED; ir < MFT_REC_FREE; ir++) {
583 				struct inode *i;
584 				struct ntfs_inode *ni;
585 				struct MFT_REC *mrec;
586 
587 				ref.low = cpu_to_le32(ir);
588 				ref.seq = cpu_to_le16(ir);
589 
590 				i = ntfs_iget5(sb, &ref, NULL);
591 				if (IS_ERR(i)) {
592 next:
593 					ntfs_notice(
594 						sb,
595 						"Invalid reserved record %x",
596 						ref.low);
597 					continue;
598 				}
599 				if (is_bad_inode(i)) {
600 					iput(i);
601 					goto next;
602 				}
603 
604 				ni = ntfs_i(i);
605 
606 				mrec = ni->mi.mrec;
607 
608 				if (!is_rec_base(mrec))
609 					goto next;
610 
611 				if (mrec->hard_links)
612 					goto next;
613 
614 				if (!ni_std(ni))
615 					goto next;
616 
617 				if (ni_find_attr(ni, NULL, NULL, ATTR_NAME,
618 						 NULL, 0, NULL, NULL))
619 					goto next;
620 
621 				__clear_bit(ir - MFT_REC_RESERVED,
622 					    &sbi->mft.reserved_bitmap);
623 			}
624 		}
625 
626 		/* Scan 5 bits for zero. Bit 0 == MFT_REC_RESERVED */
627 		zbit = find_next_zero_bit(&sbi->mft.reserved_bitmap,
628 					  MFT_REC_FREE, MFT_REC_RESERVED);
629 		if (zbit >= MFT_REC_FREE) {
630 			sbi->mft.next_reserved = MFT_REC_FREE;
631 			goto out;
632 		}
633 
634 		zlen = 1;
635 		sbi->mft.next_reserved = zbit;
636 	} else {
637 reserve_mft:
638 		zlen = zbit == MFT_REC_FREE ? (MFT_REC_USER - MFT_REC_FREE) : 4;
639 		if (zbit + zlen > wnd->nbits)
640 			zlen = wnd->nbits - zbit;
641 
642 		while (zlen > 1 && !wnd_is_free(wnd, zbit, zlen))
643 			zlen -= 1;
644 
645 		/* [zbit, zbit + zlen) will be used for MFT itself. */
646 		from = sbi->mft.used;
647 		if (from < zbit)
648 			from = zbit;
649 		to = zbit + zlen;
650 		if (from < to) {
651 			ntfs_clear_mft_tail(sbi, from, to);
652 			sbi->mft.used = to;
653 		}
654 	}
655 
656 	if (mft) {
657 		*rno = zbit;
658 		zbit += 1;
659 		zlen -= 1;
660 	}
661 
662 	wnd_zone_set(wnd, zbit, zlen);
663 
664 found:
665 	if (!mft) {
666 		/* The request to get record for general purpose. */
667 		if (sbi->mft.next_free < MFT_REC_USER)
668 			sbi->mft.next_free = MFT_REC_USER;
669 
670 		for (;;) {
671 			if (sbi->mft.next_free >= sbi->mft.bitmap.nbits) {
672 			} else if (!wnd_find(wnd, 1, MFT_REC_USER, 0, &fr)) {
673 				sbi->mft.next_free = sbi->mft.bitmap.nbits;
674 			} else {
675 				*rno = fr;
676 				sbi->mft.next_free = *rno + 1;
677 				break;
678 			}
679 
680 			err = ntfs_extend_mft(sbi);
681 			if (err)
682 				goto out;
683 		}
684 	}
685 
686 	if (ni && !ni_add_subrecord(ni, *rno, mi)) {
687 		err = -ENOMEM;
688 		goto out;
689 	}
690 
691 	/* We have found a record that are not reserved for next MFT. */
692 	if (*rno >= MFT_REC_FREE)
693 		wnd_set_used(wnd, *rno, 1);
694 	else if (*rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited)
695 		__set_bit(*rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap);
696 
697 out:
698 	if (!mft)
699 		up_write(&wnd->rw_lock);
700 
701 	return err;
702 }
703 
704 /*
705  * ntfs_mark_rec_free - Mark record as free.
706  */
707 void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno)
708 {
709 	struct wnd_bitmap *wnd = &sbi->mft.bitmap;
710 
711 	down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT);
712 	if (rno >= wnd->nbits)
713 		goto out;
714 
715 	if (rno >= MFT_REC_FREE) {
716 		if (!wnd_is_used(wnd, rno, 1))
717 			ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
718 		else
719 			wnd_set_free(wnd, rno, 1);
720 	} else if (rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited) {
721 		__clear_bit(rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap);
722 	}
723 
724 	if (rno < wnd_zone_bit(wnd))
725 		wnd_zone_set(wnd, rno, 1);
726 	else if (rno < sbi->mft.next_free && rno >= MFT_REC_USER)
727 		sbi->mft.next_free = rno;
728 
729 out:
730 	up_write(&wnd->rw_lock);
731 }
732 
733 /*
734  * ntfs_clear_mft_tail - Format empty records [from, to).
735  *
736  * sbi->mft.bitmap is locked for write.
737  */
738 int ntfs_clear_mft_tail(struct ntfs_sb_info *sbi, size_t from, size_t to)
739 {
740 	int err;
741 	u32 rs;
742 	u64 vbo;
743 	struct runs_tree *run;
744 	struct ntfs_inode *ni;
745 
746 	if (from >= to)
747 		return 0;
748 
749 	rs = sbi->record_size;
750 	ni = sbi->mft.ni;
751 	run = &ni->file.run;
752 
753 	down_read(&ni->file.run_lock);
754 	vbo = (u64)from * rs;
755 	for (; from < to; from++, vbo += rs) {
756 		struct ntfs_buffers nb;
757 
758 		err = ntfs_get_bh(sbi, run, vbo, rs, &nb);
759 		if (err)
760 			goto out;
761 
762 		err = ntfs_write_bh(sbi, &sbi->new_rec->rhdr, &nb, 0);
763 		nb_put(&nb);
764 		if (err)
765 			goto out;
766 	}
767 
768 out:
769 	sbi->mft.used = from;
770 	up_read(&ni->file.run_lock);
771 	return err;
772 }
773 
774 /*
775  * ntfs_refresh_zone - Refresh MFT zone.
776  *
777  * sbi->used.bitmap is locked for rw.
778  * sbi->mft.bitmap is locked for write.
779  * sbi->mft.ni->file.run_lock for write.
780  */
781 int ntfs_refresh_zone(struct ntfs_sb_info *sbi)
782 {
783 	CLST zone_limit, zone_max, lcn, vcn, len;
784 	size_t lcn_s, zlen;
785 	struct wnd_bitmap *wnd = &sbi->used.bitmap;
786 	struct ntfs_inode *ni = sbi->mft.ni;
787 
788 	/* Do not change anything unless we have non empty MFT zone. */
789 	if (wnd_zone_len(wnd))
790 		return 0;
791 
792 	/*
793 	 * Compute the MFT zone at two steps.
794 	 * It would be nice if we are able to allocate 1/8 of
795 	 * total clusters for MFT but not more then 512 MB.
796 	 */
797 	zone_limit = (512 * 1024 * 1024) >> sbi->cluster_bits;
798 	zone_max = wnd->nbits >> 3;
799 	if (zone_max > zone_limit)
800 		zone_max = zone_limit;
801 
802 	vcn = bytes_to_cluster(sbi,
803 			       (u64)sbi->mft.bitmap.nbits << sbi->record_bits);
804 
805 	if (!run_lookup_entry(&ni->file.run, vcn - 1, &lcn, &len, NULL))
806 		lcn = SPARSE_LCN;
807 
808 	/* We should always find Last Lcn for MFT. */
809 	if (lcn == SPARSE_LCN)
810 		return -EINVAL;
811 
812 	lcn_s = lcn + 1;
813 
814 	/* Try to allocate clusters after last MFT run. */
815 	zlen = wnd_find(wnd, zone_max, lcn_s, 0, &lcn_s);
816 	if (!zlen) {
817 		ntfs_notice(sbi->sb, "MftZone: unavailable");
818 		return 0;
819 	}
820 
821 	/* Truncate too large zone. */
822 	wnd_zone_set(wnd, lcn_s, zlen);
823 
824 	return 0;
825 }
826 
827 /*
828  * ntfs_update_mftmirr - Update $MFTMirr data.
829  */
830 int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait)
831 {
832 	int err;
833 	struct super_block *sb = sbi->sb;
834 	u32 blocksize = sb->s_blocksize;
835 	sector_t block1, block2;
836 	u32 bytes;
837 
838 	if (!(sbi->flags & NTFS_FLAGS_MFTMIRR))
839 		return 0;
840 
841 	err = 0;
842 	bytes = sbi->mft.recs_mirr << sbi->record_bits;
843 	block1 = sbi->mft.lbo >> sb->s_blocksize_bits;
844 	block2 = sbi->mft.lbo2 >> sb->s_blocksize_bits;
845 
846 	for (; bytes >= blocksize; bytes -= blocksize) {
847 		struct buffer_head *bh1, *bh2;
848 
849 		bh1 = sb_bread(sb, block1++);
850 		if (!bh1) {
851 			err = -EIO;
852 			goto out;
853 		}
854 
855 		bh2 = sb_getblk(sb, block2++);
856 		if (!bh2) {
857 			put_bh(bh1);
858 			err = -EIO;
859 			goto out;
860 		}
861 
862 		if (buffer_locked(bh2))
863 			__wait_on_buffer(bh2);
864 
865 		lock_buffer(bh2);
866 		memcpy(bh2->b_data, bh1->b_data, blocksize);
867 		set_buffer_uptodate(bh2);
868 		mark_buffer_dirty(bh2);
869 		unlock_buffer(bh2);
870 
871 		put_bh(bh1);
872 		bh1 = NULL;
873 
874 		if (wait)
875 			err = sync_dirty_buffer(bh2);
876 
877 		put_bh(bh2);
878 		if (err)
879 			goto out;
880 	}
881 
882 	sbi->flags &= ~NTFS_FLAGS_MFTMIRR;
883 
884 out:
885 	return err;
886 }
887 
888 /*
889  * ntfs_set_state
890  *
891  * Mount: ntfs_set_state(NTFS_DIRTY_DIRTY)
892  * Umount: ntfs_set_state(NTFS_DIRTY_CLEAR)
893  * NTFS error: ntfs_set_state(NTFS_DIRTY_ERROR)
894  */
895 int ntfs_set_state(struct ntfs_sb_info *sbi, enum NTFS_DIRTY_FLAGS dirty)
896 {
897 	int err;
898 	struct ATTRIB *attr;
899 	struct VOLUME_INFO *info;
900 	struct mft_inode *mi;
901 	struct ntfs_inode *ni;
902 
903 	/*
904 	 * Do not change state if fs was real_dirty.
905 	 * Do not change state if fs already dirty(clear).
906 	 * Do not change any thing if mounted read only.
907 	 */
908 	if (sbi->volume.real_dirty || sb_rdonly(sbi->sb))
909 		return 0;
910 
911 	/* Check cached value. */
912 	if ((dirty == NTFS_DIRTY_CLEAR ? 0 : VOLUME_FLAG_DIRTY) ==
913 	    (sbi->volume.flags & VOLUME_FLAG_DIRTY))
914 		return 0;
915 
916 	ni = sbi->volume.ni;
917 	if (!ni)
918 		return -EINVAL;
919 
920 	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_DIRTY);
921 
922 	attr = ni_find_attr(ni, NULL, NULL, ATTR_VOL_INFO, NULL, 0, NULL, &mi);
923 	if (!attr) {
924 		err = -EINVAL;
925 		goto out;
926 	}
927 
928 	info = resident_data_ex(attr, SIZEOF_ATTRIBUTE_VOLUME_INFO);
929 	if (!info) {
930 		err = -EINVAL;
931 		goto out;
932 	}
933 
934 	switch (dirty) {
935 	case NTFS_DIRTY_ERROR:
936 		ntfs_notice(sbi->sb, "Mark volume as dirty due to NTFS errors");
937 		sbi->volume.real_dirty = true;
938 		fallthrough;
939 	case NTFS_DIRTY_DIRTY:
940 		info->flags |= VOLUME_FLAG_DIRTY;
941 		break;
942 	case NTFS_DIRTY_CLEAR:
943 		info->flags &= ~VOLUME_FLAG_DIRTY;
944 		break;
945 	}
946 	/* Cache current volume flags. */
947 	sbi->volume.flags = info->flags;
948 	mi->dirty = true;
949 	err = 0;
950 
951 out:
952 	ni_unlock(ni);
953 	if (err)
954 		return err;
955 
956 	mark_inode_dirty(&ni->vfs_inode);
957 	/* verify(!ntfs_update_mftmirr()); */
958 
959 	/*
960 	 * If we used wait=1, sync_inode_metadata waits for the io for the
961 	 * inode to finish. It hangs when media is removed.
962 	 * So wait=0 is sent down to sync_inode_metadata
963 	 * and filemap_fdatawrite is used for the data blocks.
964 	 */
965 	err = sync_inode_metadata(&ni->vfs_inode, 0);
966 	if (!err)
967 		err = filemap_fdatawrite(ni->vfs_inode.i_mapping);
968 
969 	return err;
970 }
971 
972 /*
973  * security_hash - Calculates a hash of security descriptor.
974  */
975 static inline __le32 security_hash(const void *sd, size_t bytes)
976 {
977 	u32 hash = 0;
978 	const __le32 *ptr = sd;
979 
980 	bytes >>= 2;
981 	while (bytes--)
982 		hash = ((hash >> 0x1D) | (hash << 3)) + le32_to_cpu(*ptr++);
983 	return cpu_to_le32(hash);
984 }
985 
986 int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer)
987 {
988 	struct block_device *bdev = sb->s_bdev;
989 	u32 blocksize = sb->s_blocksize;
990 	u64 block = lbo >> sb->s_blocksize_bits;
991 	u32 off = lbo & (blocksize - 1);
992 	u32 op = blocksize - off;
993 
994 	for (; bytes; block += 1, off = 0, op = blocksize) {
995 		struct buffer_head *bh = __bread(bdev, block, blocksize);
996 
997 		if (!bh)
998 			return -EIO;
999 
1000 		if (op > bytes)
1001 			op = bytes;
1002 
1003 		memcpy(buffer, bh->b_data + off, op);
1004 
1005 		put_bh(bh);
1006 
1007 		bytes -= op;
1008 		buffer = Add2Ptr(buffer, op);
1009 	}
1010 
1011 	return 0;
1012 }
1013 
1014 int ntfs_sb_write(struct super_block *sb, u64 lbo, size_t bytes,
1015 		  const void *buf, int wait)
1016 {
1017 	u32 blocksize = sb->s_blocksize;
1018 	struct block_device *bdev = sb->s_bdev;
1019 	sector_t block = lbo >> sb->s_blocksize_bits;
1020 	u32 off = lbo & (blocksize - 1);
1021 	u32 op = blocksize - off;
1022 	struct buffer_head *bh;
1023 
1024 	if (!wait && (sb->s_flags & SB_SYNCHRONOUS))
1025 		wait = 1;
1026 
1027 	for (; bytes; block += 1, off = 0, op = blocksize) {
1028 		if (op > bytes)
1029 			op = bytes;
1030 
1031 		if (op < blocksize) {
1032 			bh = __bread(bdev, block, blocksize);
1033 			if (!bh) {
1034 				ntfs_err(sb, "failed to read block %llx",
1035 					 (u64)block);
1036 				return -EIO;
1037 			}
1038 		} else {
1039 			bh = __getblk(bdev, block, blocksize);
1040 			if (!bh)
1041 				return -ENOMEM;
1042 		}
1043 
1044 		if (buffer_locked(bh))
1045 			__wait_on_buffer(bh);
1046 
1047 		lock_buffer(bh);
1048 		if (buf) {
1049 			memcpy(bh->b_data + off, buf, op);
1050 			buf = Add2Ptr(buf, op);
1051 		} else {
1052 			memset(bh->b_data + off, -1, op);
1053 		}
1054 
1055 		set_buffer_uptodate(bh);
1056 		mark_buffer_dirty(bh);
1057 		unlock_buffer(bh);
1058 
1059 		if (wait) {
1060 			int err = sync_dirty_buffer(bh);
1061 
1062 			if (err) {
1063 				ntfs_err(
1064 					sb,
1065 					"failed to sync buffer at block %llx, error %d",
1066 					(u64)block, err);
1067 				put_bh(bh);
1068 				return err;
1069 			}
1070 		}
1071 
1072 		put_bh(bh);
1073 
1074 		bytes -= op;
1075 	}
1076 	return 0;
1077 }
1078 
1079 int ntfs_sb_write_run(struct ntfs_sb_info *sbi, const struct runs_tree *run,
1080 		      u64 vbo, const void *buf, size_t bytes, int sync)
1081 {
1082 	struct super_block *sb = sbi->sb;
1083 	u8 cluster_bits = sbi->cluster_bits;
1084 	u32 off = vbo & sbi->cluster_mask;
1085 	CLST lcn, clen, vcn = vbo >> cluster_bits, vcn_next;
1086 	u64 lbo, len;
1087 	size_t idx;
1088 
1089 	if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx))
1090 		return -ENOENT;
1091 
1092 	if (lcn == SPARSE_LCN)
1093 		return -EINVAL;
1094 
1095 	lbo = ((u64)lcn << cluster_bits) + off;
1096 	len = ((u64)clen << cluster_bits) - off;
1097 
1098 	for (;;) {
1099 		u32 op = min_t(u64, len, bytes);
1100 		int err = ntfs_sb_write(sb, lbo, op, buf, sync);
1101 
1102 		if (err)
1103 			return err;
1104 
1105 		bytes -= op;
1106 		if (!bytes)
1107 			break;
1108 
1109 		vcn_next = vcn + clen;
1110 		if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
1111 		    vcn != vcn_next)
1112 			return -ENOENT;
1113 
1114 		if (lcn == SPARSE_LCN)
1115 			return -EINVAL;
1116 
1117 		if (buf)
1118 			buf = Add2Ptr(buf, op);
1119 
1120 		lbo = ((u64)lcn << cluster_bits);
1121 		len = ((u64)clen << cluster_bits);
1122 	}
1123 
1124 	return 0;
1125 }
1126 
1127 struct buffer_head *ntfs_bread_run(struct ntfs_sb_info *sbi,
1128 				   const struct runs_tree *run, u64 vbo)
1129 {
1130 	struct super_block *sb = sbi->sb;
1131 	u8 cluster_bits = sbi->cluster_bits;
1132 	CLST lcn;
1133 	u64 lbo;
1134 
1135 	if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, NULL, NULL))
1136 		return ERR_PTR(-ENOENT);
1137 
1138 	lbo = ((u64)lcn << cluster_bits) + (vbo & sbi->cluster_mask);
1139 
1140 	return ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
1141 }
1142 
1143 int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run,
1144 		     u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb)
1145 {
1146 	int err;
1147 	struct super_block *sb = sbi->sb;
1148 	u32 blocksize = sb->s_blocksize;
1149 	u8 cluster_bits = sbi->cluster_bits;
1150 	u32 off = vbo & sbi->cluster_mask;
1151 	u32 nbh = 0;
1152 	CLST vcn_next, vcn = vbo >> cluster_bits;
1153 	CLST lcn, clen;
1154 	u64 lbo, len;
1155 	size_t idx;
1156 	struct buffer_head *bh;
1157 
1158 	if (!run) {
1159 		/* First reading of $Volume + $MFTMirr + $LogFile goes here. */
1160 		if (vbo > MFT_REC_VOL * sbi->record_size) {
1161 			err = -ENOENT;
1162 			goto out;
1163 		}
1164 
1165 		/* Use absolute boot's 'MFTCluster' to read record. */
1166 		lbo = vbo + sbi->mft.lbo;
1167 		len = sbi->record_size;
1168 	} else if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) {
1169 		err = -ENOENT;
1170 		goto out;
1171 	} else {
1172 		if (lcn == SPARSE_LCN) {
1173 			err = -EINVAL;
1174 			goto out;
1175 		}
1176 
1177 		lbo = ((u64)lcn << cluster_bits) + off;
1178 		len = ((u64)clen << cluster_bits) - off;
1179 	}
1180 
1181 	off = lbo & (blocksize - 1);
1182 	if (nb) {
1183 		nb->off = off;
1184 		nb->bytes = bytes;
1185 	}
1186 
1187 	for (;;) {
1188 		u32 len32 = len >= bytes ? bytes : len;
1189 		sector_t block = lbo >> sb->s_blocksize_bits;
1190 
1191 		do {
1192 			u32 op = blocksize - off;
1193 
1194 			if (op > len32)
1195 				op = len32;
1196 
1197 			bh = ntfs_bread(sb, block);
1198 			if (!bh) {
1199 				err = -EIO;
1200 				goto out;
1201 			}
1202 
1203 			if (buf) {
1204 				memcpy(buf, bh->b_data + off, op);
1205 				buf = Add2Ptr(buf, op);
1206 			}
1207 
1208 			if (!nb) {
1209 				put_bh(bh);
1210 			} else if (nbh >= ARRAY_SIZE(nb->bh)) {
1211 				err = -EINVAL;
1212 				goto out;
1213 			} else {
1214 				nb->bh[nbh++] = bh;
1215 				nb->nbufs = nbh;
1216 			}
1217 
1218 			bytes -= op;
1219 			if (!bytes)
1220 				return 0;
1221 			len32 -= op;
1222 			block += 1;
1223 			off = 0;
1224 
1225 		} while (len32);
1226 
1227 		vcn_next = vcn + clen;
1228 		if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
1229 		    vcn != vcn_next) {
1230 			err = -ENOENT;
1231 			goto out;
1232 		}
1233 
1234 		if (lcn == SPARSE_LCN) {
1235 			err = -EINVAL;
1236 			goto out;
1237 		}
1238 
1239 		lbo = ((u64)lcn << cluster_bits);
1240 		len = ((u64)clen << cluster_bits);
1241 	}
1242 
1243 out:
1244 	if (!nbh)
1245 		return err;
1246 
1247 	while (nbh) {
1248 		put_bh(nb->bh[--nbh]);
1249 		nb->bh[nbh] = NULL;
1250 	}
1251 
1252 	nb->nbufs = 0;
1253 	return err;
1254 }
1255 
1256 /*
1257  * ntfs_read_bh
1258  *
1259  * Return: < 0 if error, 0 if ok, -E_NTFS_FIXUP if need to update fixups.
1260  */
1261 int ntfs_read_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo,
1262 		 struct NTFS_RECORD_HEADER *rhdr, u32 bytes,
1263 		 struct ntfs_buffers *nb)
1264 {
1265 	int err = ntfs_read_run_nb(sbi, run, vbo, rhdr, bytes, nb);
1266 
1267 	if (err)
1268 		return err;
1269 	return ntfs_fix_post_read(rhdr, nb->bytes, true);
1270 }
1271 
1272 int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo,
1273 		u32 bytes, struct ntfs_buffers *nb)
1274 {
1275 	int err = 0;
1276 	struct super_block *sb = sbi->sb;
1277 	u32 blocksize = sb->s_blocksize;
1278 	u8 cluster_bits = sbi->cluster_bits;
1279 	CLST vcn_next, vcn = vbo >> cluster_bits;
1280 	u32 off;
1281 	u32 nbh = 0;
1282 	CLST lcn, clen;
1283 	u64 lbo, len;
1284 	size_t idx;
1285 
1286 	nb->bytes = bytes;
1287 
1288 	if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) {
1289 		err = -ENOENT;
1290 		goto out;
1291 	}
1292 
1293 	off = vbo & sbi->cluster_mask;
1294 	lbo = ((u64)lcn << cluster_bits) + off;
1295 	len = ((u64)clen << cluster_bits) - off;
1296 
1297 	nb->off = off = lbo & (blocksize - 1);
1298 
1299 	for (;;) {
1300 		u32 len32 = min_t(u64, len, bytes);
1301 		sector_t block = lbo >> sb->s_blocksize_bits;
1302 
1303 		do {
1304 			u32 op;
1305 			struct buffer_head *bh;
1306 
1307 			if (nbh >= ARRAY_SIZE(nb->bh)) {
1308 				err = -EINVAL;
1309 				goto out;
1310 			}
1311 
1312 			op = blocksize - off;
1313 			if (op > len32)
1314 				op = len32;
1315 
1316 			if (op == blocksize) {
1317 				bh = sb_getblk(sb, block);
1318 				if (!bh) {
1319 					err = -ENOMEM;
1320 					goto out;
1321 				}
1322 				if (buffer_locked(bh))
1323 					__wait_on_buffer(bh);
1324 				set_buffer_uptodate(bh);
1325 			} else {
1326 				bh = ntfs_bread(sb, block);
1327 				if (!bh) {
1328 					err = -EIO;
1329 					goto out;
1330 				}
1331 			}
1332 
1333 			nb->bh[nbh++] = bh;
1334 			bytes -= op;
1335 			if (!bytes) {
1336 				nb->nbufs = nbh;
1337 				return 0;
1338 			}
1339 
1340 			block += 1;
1341 			len32 -= op;
1342 			off = 0;
1343 		} while (len32);
1344 
1345 		vcn_next = vcn + clen;
1346 		if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
1347 		    vcn != vcn_next) {
1348 			err = -ENOENT;
1349 			goto out;
1350 		}
1351 
1352 		lbo = ((u64)lcn << cluster_bits);
1353 		len = ((u64)clen << cluster_bits);
1354 	}
1355 
1356 out:
1357 	while (nbh) {
1358 		put_bh(nb->bh[--nbh]);
1359 		nb->bh[nbh] = NULL;
1360 	}
1361 
1362 	nb->nbufs = 0;
1363 
1364 	return err;
1365 }
1366 
1367 int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr,
1368 		  struct ntfs_buffers *nb, int sync)
1369 {
1370 	int err = 0;
1371 	struct super_block *sb = sbi->sb;
1372 	u32 block_size = sb->s_blocksize;
1373 	u32 bytes = nb->bytes;
1374 	u32 off = nb->off;
1375 	u16 fo = le16_to_cpu(rhdr->fix_off);
1376 	u16 fn = le16_to_cpu(rhdr->fix_num);
1377 	u32 idx;
1378 	__le16 *fixup;
1379 	__le16 sample;
1380 
1381 	if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
1382 	    fn * SECTOR_SIZE > bytes) {
1383 		return -EINVAL;
1384 	}
1385 
1386 	for (idx = 0; bytes && idx < nb->nbufs; idx += 1, off = 0) {
1387 		u32 op = block_size - off;
1388 		char *bh_data;
1389 		struct buffer_head *bh = nb->bh[idx];
1390 		__le16 *ptr, *end_data;
1391 
1392 		if (op > bytes)
1393 			op = bytes;
1394 
1395 		if (buffer_locked(bh))
1396 			__wait_on_buffer(bh);
1397 
1398 		lock_buffer(nb->bh[idx]);
1399 
1400 		bh_data = bh->b_data + off;
1401 		end_data = Add2Ptr(bh_data, op);
1402 		memcpy(bh_data, rhdr, op);
1403 
1404 		if (!idx) {
1405 			u16 t16;
1406 
1407 			fixup = Add2Ptr(bh_data, fo);
1408 			sample = *fixup;
1409 			t16 = le16_to_cpu(sample);
1410 			if (t16 >= 0x7FFF) {
1411 				sample = *fixup = cpu_to_le16(1);
1412 			} else {
1413 				sample = cpu_to_le16(t16 + 1);
1414 				*fixup = sample;
1415 			}
1416 
1417 			*(__le16 *)Add2Ptr(rhdr, fo) = sample;
1418 		}
1419 
1420 		ptr = Add2Ptr(bh_data, SECTOR_SIZE - sizeof(short));
1421 
1422 		do {
1423 			*++fixup = *ptr;
1424 			*ptr = sample;
1425 			ptr += SECTOR_SIZE / sizeof(short);
1426 		} while (ptr < end_data);
1427 
1428 		set_buffer_uptodate(bh);
1429 		mark_buffer_dirty(bh);
1430 		unlock_buffer(bh);
1431 
1432 		if (sync) {
1433 			int err2 = sync_dirty_buffer(bh);
1434 
1435 			if (!err && err2)
1436 				err = err2;
1437 		}
1438 
1439 		bytes -= op;
1440 		rhdr = Add2Ptr(rhdr, op);
1441 	}
1442 
1443 	return err;
1444 }
1445 
1446 /*
1447  * ntfs_bio_pages - Read/write pages from/to disk.
1448  */
1449 int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run,
1450 		   struct page **pages, u32 nr_pages, u64 vbo, u32 bytes,
1451 		   u32 op)
1452 {
1453 	int err = 0;
1454 	struct bio *new, *bio = NULL;
1455 	struct super_block *sb = sbi->sb;
1456 	struct block_device *bdev = sb->s_bdev;
1457 	struct page *page;
1458 	u8 cluster_bits = sbi->cluster_bits;
1459 	CLST lcn, clen, vcn, vcn_next;
1460 	u32 add, off, page_idx;
1461 	u64 lbo, len;
1462 	size_t run_idx;
1463 	struct blk_plug plug;
1464 
1465 	if (!bytes)
1466 		return 0;
1467 
1468 	blk_start_plug(&plug);
1469 
1470 	/* Align vbo and bytes to be 512 bytes aligned. */
1471 	lbo = (vbo + bytes + 511) & ~511ull;
1472 	vbo = vbo & ~511ull;
1473 	bytes = lbo - vbo;
1474 
1475 	vcn = vbo >> cluster_bits;
1476 	if (!run_lookup_entry(run, vcn, &lcn, &clen, &run_idx)) {
1477 		err = -ENOENT;
1478 		goto out;
1479 	}
1480 	off = vbo & sbi->cluster_mask;
1481 	page_idx = 0;
1482 	page = pages[0];
1483 
1484 	for (;;) {
1485 		lbo = ((u64)lcn << cluster_bits) + off;
1486 		len = ((u64)clen << cluster_bits) - off;
1487 new_bio:
1488 		new = bio_alloc(bdev, nr_pages - page_idx, op, GFP_NOFS);
1489 		if (bio) {
1490 			bio_chain(bio, new);
1491 			submit_bio(bio);
1492 		}
1493 		bio = new;
1494 		bio->bi_iter.bi_sector = lbo >> 9;
1495 
1496 		while (len) {
1497 			off = vbo & (PAGE_SIZE - 1);
1498 			add = off + len > PAGE_SIZE ? (PAGE_SIZE - off) : len;
1499 
1500 			if (bio_add_page(bio, page, add, off) < add)
1501 				goto new_bio;
1502 
1503 			if (bytes <= add)
1504 				goto out;
1505 			bytes -= add;
1506 			vbo += add;
1507 
1508 			if (add + off == PAGE_SIZE) {
1509 				page_idx += 1;
1510 				if (WARN_ON(page_idx >= nr_pages)) {
1511 					err = -EINVAL;
1512 					goto out;
1513 				}
1514 				page = pages[page_idx];
1515 			}
1516 
1517 			if (len <= add)
1518 				break;
1519 			len -= add;
1520 			lbo += add;
1521 		}
1522 
1523 		vcn_next = vcn + clen;
1524 		if (!run_get_entry(run, ++run_idx, &vcn, &lcn, &clen) ||
1525 		    vcn != vcn_next) {
1526 			err = -ENOENT;
1527 			goto out;
1528 		}
1529 		off = 0;
1530 	}
1531 out:
1532 	if (bio) {
1533 		if (!err)
1534 			err = submit_bio_wait(bio);
1535 		bio_put(bio);
1536 	}
1537 	blk_finish_plug(&plug);
1538 
1539 	return err;
1540 }
1541 
1542 /*
1543  * ntfs_bio_fill_1 - Helper for ntfs_loadlog_and_replay().
1544  *
1545  * Fill on-disk logfile range by (-1)
1546  * this means empty logfile.
1547  */
1548 int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run)
1549 {
1550 	int err = 0;
1551 	struct super_block *sb = sbi->sb;
1552 	struct block_device *bdev = sb->s_bdev;
1553 	u8 cluster_bits = sbi->cluster_bits;
1554 	struct bio *new, *bio = NULL;
1555 	CLST lcn, clen;
1556 	u64 lbo, len;
1557 	size_t run_idx;
1558 	struct page *fill;
1559 	void *kaddr;
1560 	struct blk_plug plug;
1561 
1562 	fill = alloc_page(GFP_KERNEL);
1563 	if (!fill)
1564 		return -ENOMEM;
1565 
1566 	kaddr = kmap_atomic(fill);
1567 	memset(kaddr, -1, PAGE_SIZE);
1568 	kunmap_atomic(kaddr);
1569 	flush_dcache_page(fill);
1570 	lock_page(fill);
1571 
1572 	if (!run_lookup_entry(run, 0, &lcn, &clen, &run_idx)) {
1573 		err = -ENOENT;
1574 		goto out;
1575 	}
1576 
1577 	/*
1578 	 * TODO: Try blkdev_issue_write_same.
1579 	 */
1580 	blk_start_plug(&plug);
1581 	do {
1582 		lbo = (u64)lcn << cluster_bits;
1583 		len = (u64)clen << cluster_bits;
1584 new_bio:
1585 		new = bio_alloc(bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOFS);
1586 		if (bio) {
1587 			bio_chain(bio, new);
1588 			submit_bio(bio);
1589 		}
1590 		bio = new;
1591 		bio->bi_iter.bi_sector = lbo >> 9;
1592 
1593 		for (;;) {
1594 			u32 add = len > PAGE_SIZE ? PAGE_SIZE : len;
1595 
1596 			if (bio_add_page(bio, fill, add, 0) < add)
1597 				goto new_bio;
1598 
1599 			lbo += add;
1600 			if (len <= add)
1601 				break;
1602 			len -= add;
1603 		}
1604 	} while (run_get_entry(run, ++run_idx, NULL, &lcn, &clen));
1605 
1606 	if (!err)
1607 		err = submit_bio_wait(bio);
1608 	bio_put(bio);
1609 
1610 	blk_finish_plug(&plug);
1611 out:
1612 	unlock_page(fill);
1613 	put_page(fill);
1614 
1615 	return err;
1616 }
1617 
1618 int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run,
1619 		    u64 vbo, u64 *lbo, u64 *bytes)
1620 {
1621 	u32 off;
1622 	CLST lcn, len;
1623 	u8 cluster_bits = sbi->cluster_bits;
1624 
1625 	if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, &len, NULL))
1626 		return -ENOENT;
1627 
1628 	off = vbo & sbi->cluster_mask;
1629 	*lbo = lcn == SPARSE_LCN ? -1 : (((u64)lcn << cluster_bits) + off);
1630 	*bytes = ((u64)len << cluster_bits) - off;
1631 
1632 	return 0;
1633 }
1634 
1635 struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir)
1636 {
1637 	int err = 0;
1638 	struct super_block *sb = sbi->sb;
1639 	struct inode *inode = new_inode(sb);
1640 	struct ntfs_inode *ni;
1641 
1642 	if (!inode)
1643 		return ERR_PTR(-ENOMEM);
1644 
1645 	ni = ntfs_i(inode);
1646 
1647 	err = mi_format_new(&ni->mi, sbi, rno, dir ? RECORD_FLAG_DIR : 0,
1648 			    false);
1649 	if (err)
1650 		goto out;
1651 
1652 	inode->i_ino = rno;
1653 	if (insert_inode_locked(inode) < 0) {
1654 		err = -EIO;
1655 		goto out;
1656 	}
1657 
1658 out:
1659 	if (err) {
1660 		iput(inode);
1661 		ni = ERR_PTR(err);
1662 	}
1663 	return ni;
1664 }
1665 
1666 /*
1667  * O:BAG:BAD:(A;OICI;FA;;;WD)
1668  * Owner S-1-5-32-544 (Administrators)
1669  * Group S-1-5-32-544 (Administrators)
1670  * ACE: allow S-1-1-0 (Everyone) with FILE_ALL_ACCESS
1671  */
1672 const u8 s_default_security[] __aligned(8) = {
1673 	0x01, 0x00, 0x04, 0x80, 0x30, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
1674 	0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x1C, 0x00,
1675 	0x01, 0x00, 0x00, 0x00, 0x00, 0x03, 0x14, 0x00, 0xFF, 0x01, 0x1F, 0x00,
1676 	0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
1677 	0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x20, 0x00, 0x00, 0x00,
1678 	0x20, 0x02, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05,
1679 	0x20, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00,
1680 };
1681 
1682 static_assert(sizeof(s_default_security) == 0x50);
1683 
1684 static inline u32 sid_length(const struct SID *sid)
1685 {
1686 	return struct_size(sid, SubAuthority, sid->SubAuthorityCount);
1687 }
1688 
1689 /*
1690  * is_acl_valid
1691  *
1692  * Thanks Mark Harmstone for idea.
1693  */
1694 static bool is_acl_valid(const struct ACL *acl, u32 len)
1695 {
1696 	const struct ACE_HEADER *ace;
1697 	u32 i;
1698 	u16 ace_count, ace_size;
1699 
1700 	if (acl->AclRevision != ACL_REVISION &&
1701 	    acl->AclRevision != ACL_REVISION_DS) {
1702 		/*
1703 		 * This value should be ACL_REVISION, unless the ACL contains an
1704 		 * object-specific ACE, in which case this value must be ACL_REVISION_DS.
1705 		 * All ACEs in an ACL must be at the same revision level.
1706 		 */
1707 		return false;
1708 	}
1709 
1710 	if (acl->Sbz1)
1711 		return false;
1712 
1713 	if (le16_to_cpu(acl->AclSize) > len)
1714 		return false;
1715 
1716 	if (acl->Sbz2)
1717 		return false;
1718 
1719 	len -= sizeof(struct ACL);
1720 	ace = (struct ACE_HEADER *)&acl[1];
1721 	ace_count = le16_to_cpu(acl->AceCount);
1722 
1723 	for (i = 0; i < ace_count; i++) {
1724 		if (len < sizeof(struct ACE_HEADER))
1725 			return false;
1726 
1727 		ace_size = le16_to_cpu(ace->AceSize);
1728 		if (len < ace_size)
1729 			return false;
1730 
1731 		len -= ace_size;
1732 		ace = Add2Ptr(ace, ace_size);
1733 	}
1734 
1735 	return true;
1736 }
1737 
1738 bool is_sd_valid(const struct SECURITY_DESCRIPTOR_RELATIVE *sd, u32 len)
1739 {
1740 	u32 sd_owner, sd_group, sd_sacl, sd_dacl;
1741 
1742 	if (len < sizeof(struct SECURITY_DESCRIPTOR_RELATIVE))
1743 		return false;
1744 
1745 	if (sd->Revision != 1)
1746 		return false;
1747 
1748 	if (sd->Sbz1)
1749 		return false;
1750 
1751 	if (!(sd->Control & SE_SELF_RELATIVE))
1752 		return false;
1753 
1754 	sd_owner = le32_to_cpu(sd->Owner);
1755 	if (sd_owner) {
1756 		const struct SID *owner = Add2Ptr(sd, sd_owner);
1757 
1758 		if (sd_owner + offsetof(struct SID, SubAuthority) > len)
1759 			return false;
1760 
1761 		if (owner->Revision != 1)
1762 			return false;
1763 
1764 		if (sd_owner + sid_length(owner) > len)
1765 			return false;
1766 	}
1767 
1768 	sd_group = le32_to_cpu(sd->Group);
1769 	if (sd_group) {
1770 		const struct SID *group = Add2Ptr(sd, sd_group);
1771 
1772 		if (sd_group + offsetof(struct SID, SubAuthority) > len)
1773 			return false;
1774 
1775 		if (group->Revision != 1)
1776 			return false;
1777 
1778 		if (sd_group + sid_length(group) > len)
1779 			return false;
1780 	}
1781 
1782 	sd_sacl = le32_to_cpu(sd->Sacl);
1783 	if (sd_sacl) {
1784 		const struct ACL *sacl = Add2Ptr(sd, sd_sacl);
1785 
1786 		if (sd_sacl + sizeof(struct ACL) > len)
1787 			return false;
1788 
1789 		if (!is_acl_valid(sacl, len - sd_sacl))
1790 			return false;
1791 	}
1792 
1793 	sd_dacl = le32_to_cpu(sd->Dacl);
1794 	if (sd_dacl) {
1795 		const struct ACL *dacl = Add2Ptr(sd, sd_dacl);
1796 
1797 		if (sd_dacl + sizeof(struct ACL) > len)
1798 			return false;
1799 
1800 		if (!is_acl_valid(dacl, len - sd_dacl))
1801 			return false;
1802 	}
1803 
1804 	return true;
1805 }
1806 
1807 /*
1808  * ntfs_security_init - Load and parse $Secure.
1809  */
1810 int ntfs_security_init(struct ntfs_sb_info *sbi)
1811 {
1812 	int err;
1813 	struct super_block *sb = sbi->sb;
1814 	struct inode *inode;
1815 	struct ntfs_inode *ni;
1816 	struct MFT_REF ref;
1817 	struct ATTRIB *attr;
1818 	struct ATTR_LIST_ENTRY *le;
1819 	u64 sds_size;
1820 	size_t off;
1821 	struct NTFS_DE *ne;
1822 	struct NTFS_DE_SII *sii_e;
1823 	struct ntfs_fnd *fnd_sii = NULL;
1824 	const struct INDEX_ROOT *root_sii;
1825 	const struct INDEX_ROOT *root_sdh;
1826 	struct ntfs_index *indx_sdh = &sbi->security.index_sdh;
1827 	struct ntfs_index *indx_sii = &sbi->security.index_sii;
1828 
1829 	ref.low = cpu_to_le32(MFT_REC_SECURE);
1830 	ref.high = 0;
1831 	ref.seq = cpu_to_le16(MFT_REC_SECURE);
1832 
1833 	inode = ntfs_iget5(sb, &ref, &NAME_SECURE);
1834 	if (IS_ERR(inode)) {
1835 		err = PTR_ERR(inode);
1836 		ntfs_err(sb, "Failed to load $Secure.");
1837 		inode = NULL;
1838 		goto out;
1839 	}
1840 
1841 	ni = ntfs_i(inode);
1842 
1843 	le = NULL;
1844 
1845 	attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SDH_NAME,
1846 			    ARRAY_SIZE(SDH_NAME), NULL, NULL);
1847 	if (!attr) {
1848 		err = -EINVAL;
1849 		goto out;
1850 	}
1851 
1852 	root_sdh = resident_data(attr);
1853 	if (root_sdh->type != ATTR_ZERO ||
1854 	    root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH) {
1855 		err = -EINVAL;
1856 		goto out;
1857 	}
1858 
1859 	err = indx_init(indx_sdh, sbi, attr, INDEX_MUTEX_SDH);
1860 	if (err)
1861 		goto out;
1862 
1863 	attr = ni_find_attr(ni, attr, &le, ATTR_ROOT, SII_NAME,
1864 			    ARRAY_SIZE(SII_NAME), NULL, NULL);
1865 	if (!attr) {
1866 		err = -EINVAL;
1867 		goto out;
1868 	}
1869 
1870 	root_sii = resident_data(attr);
1871 	if (root_sii->type != ATTR_ZERO ||
1872 	    root_sii->rule != NTFS_COLLATION_TYPE_UINT) {
1873 		err = -EINVAL;
1874 		goto out;
1875 	}
1876 
1877 	err = indx_init(indx_sii, sbi, attr, INDEX_MUTEX_SII);
1878 	if (err)
1879 		goto out;
1880 
1881 	fnd_sii = fnd_get();
1882 	if (!fnd_sii) {
1883 		err = -ENOMEM;
1884 		goto out;
1885 	}
1886 
1887 	sds_size = inode->i_size;
1888 
1889 	/* Find the last valid Id. */
1890 	sbi->security.next_id = SECURITY_ID_FIRST;
1891 	/* Always write new security at the end of bucket. */
1892 	sbi->security.next_off =
1893 		ALIGN(sds_size - SecurityDescriptorsBlockSize, 16);
1894 
1895 	off = 0;
1896 	ne = NULL;
1897 
1898 	for (;;) {
1899 		u32 next_id;
1900 
1901 		err = indx_find_raw(indx_sii, ni, root_sii, &ne, &off, fnd_sii);
1902 		if (err || !ne)
1903 			break;
1904 
1905 		sii_e = (struct NTFS_DE_SII *)ne;
1906 		if (le16_to_cpu(ne->view.data_size) < SIZEOF_SECURITY_HDR)
1907 			continue;
1908 
1909 		next_id = le32_to_cpu(sii_e->sec_id) + 1;
1910 		if (next_id >= sbi->security.next_id)
1911 			sbi->security.next_id = next_id;
1912 	}
1913 
1914 	sbi->security.ni = ni;
1915 	inode = NULL;
1916 out:
1917 	iput(inode);
1918 	fnd_put(fnd_sii);
1919 
1920 	return err;
1921 }
1922 
1923 /*
1924  * ntfs_get_security_by_id - Read security descriptor by id.
1925  */
1926 int ntfs_get_security_by_id(struct ntfs_sb_info *sbi, __le32 security_id,
1927 			    struct SECURITY_DESCRIPTOR_RELATIVE **sd,
1928 			    size_t *size)
1929 {
1930 	int err;
1931 	int diff;
1932 	struct ntfs_inode *ni = sbi->security.ni;
1933 	struct ntfs_index *indx = &sbi->security.index_sii;
1934 	void *p = NULL;
1935 	struct NTFS_DE_SII *sii_e;
1936 	struct ntfs_fnd *fnd_sii;
1937 	struct SECURITY_HDR d_security;
1938 	const struct INDEX_ROOT *root_sii;
1939 	u32 t32;
1940 
1941 	*sd = NULL;
1942 
1943 	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY);
1944 
1945 	fnd_sii = fnd_get();
1946 	if (!fnd_sii) {
1947 		err = -ENOMEM;
1948 		goto out;
1949 	}
1950 
1951 	root_sii = indx_get_root(indx, ni, NULL, NULL);
1952 	if (!root_sii) {
1953 		err = -EINVAL;
1954 		goto out;
1955 	}
1956 
1957 	/* Try to find this SECURITY descriptor in SII indexes. */
1958 	err = indx_find(indx, ni, root_sii, &security_id, sizeof(security_id),
1959 			NULL, &diff, (struct NTFS_DE **)&sii_e, fnd_sii);
1960 	if (err)
1961 		goto out;
1962 
1963 	if (diff)
1964 		goto out;
1965 
1966 	t32 = le32_to_cpu(sii_e->sec_hdr.size);
1967 	if (t32 < SIZEOF_SECURITY_HDR) {
1968 		err = -EINVAL;
1969 		goto out;
1970 	}
1971 
1972 	if (t32 > SIZEOF_SECURITY_HDR + 0x10000) {
1973 		/* Looks like too big security. 0x10000 - is arbitrary big number. */
1974 		err = -EFBIG;
1975 		goto out;
1976 	}
1977 
1978 	*size = t32 - SIZEOF_SECURITY_HDR;
1979 
1980 	p = kmalloc(*size, GFP_NOFS);
1981 	if (!p) {
1982 		err = -ENOMEM;
1983 		goto out;
1984 	}
1985 
1986 	err = ntfs_read_run_nb(sbi, &ni->file.run,
1987 			       le64_to_cpu(sii_e->sec_hdr.off), &d_security,
1988 			       sizeof(d_security), NULL);
1989 	if (err)
1990 		goto out;
1991 
1992 	if (memcmp(&d_security, &sii_e->sec_hdr, SIZEOF_SECURITY_HDR)) {
1993 		err = -EINVAL;
1994 		goto out;
1995 	}
1996 
1997 	err = ntfs_read_run_nb(sbi, &ni->file.run,
1998 			       le64_to_cpu(sii_e->sec_hdr.off) +
1999 				       SIZEOF_SECURITY_HDR,
2000 			       p, *size, NULL);
2001 	if (err)
2002 		goto out;
2003 
2004 	*sd = p;
2005 	p = NULL;
2006 
2007 out:
2008 	kfree(p);
2009 	fnd_put(fnd_sii);
2010 	ni_unlock(ni);
2011 
2012 	return err;
2013 }
2014 
2015 /*
2016  * ntfs_insert_security - Insert security descriptor into $Secure::SDS.
2017  *
2018  * SECURITY Descriptor Stream data is organized into chunks of 256K bytes
2019  * and it contains a mirror copy of each security descriptor.  When writing
2020  * to a security descriptor at location X, another copy will be written at
2021  * location (X+256K).
2022  * When writing a security descriptor that will cross the 256K boundary,
2023  * the pointer will be advanced by 256K to skip
2024  * over the mirror portion.
2025  */
2026 int ntfs_insert_security(struct ntfs_sb_info *sbi,
2027 			 const struct SECURITY_DESCRIPTOR_RELATIVE *sd,
2028 			 u32 size_sd, __le32 *security_id, bool *inserted)
2029 {
2030 	int err, diff;
2031 	struct ntfs_inode *ni = sbi->security.ni;
2032 	struct ntfs_index *indx_sdh = &sbi->security.index_sdh;
2033 	struct ntfs_index *indx_sii = &sbi->security.index_sii;
2034 	struct NTFS_DE_SDH *e;
2035 	struct NTFS_DE_SDH sdh_e;
2036 	struct NTFS_DE_SII sii_e;
2037 	struct SECURITY_HDR *d_security;
2038 	u32 new_sec_size = size_sd + SIZEOF_SECURITY_HDR;
2039 	u32 aligned_sec_size = ALIGN(new_sec_size, 16);
2040 	struct SECURITY_KEY hash_key;
2041 	struct ntfs_fnd *fnd_sdh = NULL;
2042 	const struct INDEX_ROOT *root_sdh;
2043 	const struct INDEX_ROOT *root_sii;
2044 	u64 mirr_off, new_sds_size;
2045 	u32 next, left;
2046 
2047 	static_assert((1 << Log2OfSecurityDescriptorsBlockSize) ==
2048 		      SecurityDescriptorsBlockSize);
2049 
2050 	hash_key.hash = security_hash(sd, size_sd);
2051 	hash_key.sec_id = SECURITY_ID_INVALID;
2052 
2053 	if (inserted)
2054 		*inserted = false;
2055 	*security_id = SECURITY_ID_INVALID;
2056 
2057 	/* Allocate a temporal buffer. */
2058 	d_security = kzalloc(aligned_sec_size, GFP_NOFS);
2059 	if (!d_security)
2060 		return -ENOMEM;
2061 
2062 	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY);
2063 
2064 	fnd_sdh = fnd_get();
2065 	if (!fnd_sdh) {
2066 		err = -ENOMEM;
2067 		goto out;
2068 	}
2069 
2070 	root_sdh = indx_get_root(indx_sdh, ni, NULL, NULL);
2071 	if (!root_sdh) {
2072 		err = -EINVAL;
2073 		goto out;
2074 	}
2075 
2076 	root_sii = indx_get_root(indx_sii, ni, NULL, NULL);
2077 	if (!root_sii) {
2078 		err = -EINVAL;
2079 		goto out;
2080 	}
2081 
2082 	/*
2083 	 * Check if such security already exists.
2084 	 * Use "SDH" and hash -> to get the offset in "SDS".
2085 	 */
2086 	err = indx_find(indx_sdh, ni, root_sdh, &hash_key, sizeof(hash_key),
2087 			&d_security->key.sec_id, &diff, (struct NTFS_DE **)&e,
2088 			fnd_sdh);
2089 	if (err)
2090 		goto out;
2091 
2092 	while (e) {
2093 		if (le32_to_cpu(e->sec_hdr.size) == new_sec_size) {
2094 			err = ntfs_read_run_nb(sbi, &ni->file.run,
2095 					       le64_to_cpu(e->sec_hdr.off),
2096 					       d_security, new_sec_size, NULL);
2097 			if (err)
2098 				goto out;
2099 
2100 			if (le32_to_cpu(d_security->size) == new_sec_size &&
2101 			    d_security->key.hash == hash_key.hash &&
2102 			    !memcmp(d_security + 1, sd, size_sd)) {
2103 				*security_id = d_security->key.sec_id;
2104 				/* Such security already exists. */
2105 				err = 0;
2106 				goto out;
2107 			}
2108 		}
2109 
2110 		err = indx_find_sort(indx_sdh, ni, root_sdh,
2111 				     (struct NTFS_DE **)&e, fnd_sdh);
2112 		if (err)
2113 			goto out;
2114 
2115 		if (!e || e->key.hash != hash_key.hash)
2116 			break;
2117 	}
2118 
2119 	/* Zero unused space. */
2120 	next = sbi->security.next_off & (SecurityDescriptorsBlockSize - 1);
2121 	left = SecurityDescriptorsBlockSize - next;
2122 
2123 	/* Zero gap until SecurityDescriptorsBlockSize. */
2124 	if (left < new_sec_size) {
2125 		/* Zero "left" bytes from sbi->security.next_off. */
2126 		sbi->security.next_off += SecurityDescriptorsBlockSize + left;
2127 	}
2128 
2129 	/* Zero tail of previous security. */
2130 	//used = ni->vfs_inode.i_size & (SecurityDescriptorsBlockSize - 1);
2131 
2132 	/*
2133 	 * Example:
2134 	 * 0x40438 == ni->vfs_inode.i_size
2135 	 * 0x00440 == sbi->security.next_off
2136 	 * need to zero [0x438-0x440)
2137 	 * if (next > used) {
2138 	 *  u32 tozero = next - used;
2139 	 *  zero "tozero" bytes from sbi->security.next_off - tozero
2140 	 */
2141 
2142 	/* Format new security descriptor. */
2143 	d_security->key.hash = hash_key.hash;
2144 	d_security->key.sec_id = cpu_to_le32(sbi->security.next_id);
2145 	d_security->off = cpu_to_le64(sbi->security.next_off);
2146 	d_security->size = cpu_to_le32(new_sec_size);
2147 	memcpy(d_security + 1, sd, size_sd);
2148 
2149 	/* Write main SDS bucket. */
2150 	err = ntfs_sb_write_run(sbi, &ni->file.run, sbi->security.next_off,
2151 				d_security, aligned_sec_size, 0);
2152 
2153 	if (err)
2154 		goto out;
2155 
2156 	mirr_off = sbi->security.next_off + SecurityDescriptorsBlockSize;
2157 	new_sds_size = mirr_off + aligned_sec_size;
2158 
2159 	if (new_sds_size > ni->vfs_inode.i_size) {
2160 		err = attr_set_size(ni, ATTR_DATA, SDS_NAME,
2161 				    ARRAY_SIZE(SDS_NAME), &ni->file.run,
2162 				    new_sds_size, &new_sds_size, false, NULL);
2163 		if (err)
2164 			goto out;
2165 	}
2166 
2167 	/* Write copy SDS bucket. */
2168 	err = ntfs_sb_write_run(sbi, &ni->file.run, mirr_off, d_security,
2169 				aligned_sec_size, 0);
2170 	if (err)
2171 		goto out;
2172 
2173 	/* Fill SII entry. */
2174 	sii_e.de.view.data_off =
2175 		cpu_to_le16(offsetof(struct NTFS_DE_SII, sec_hdr));
2176 	sii_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR);
2177 	sii_e.de.view.res = 0;
2178 	sii_e.de.size = cpu_to_le16(SIZEOF_SII_DIRENTRY);
2179 	sii_e.de.key_size = cpu_to_le16(sizeof(d_security->key.sec_id));
2180 	sii_e.de.flags = 0;
2181 	sii_e.de.res = 0;
2182 	sii_e.sec_id = d_security->key.sec_id;
2183 	memcpy(&sii_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR);
2184 
2185 	err = indx_insert_entry(indx_sii, ni, &sii_e.de, NULL, NULL, 0);
2186 	if (err)
2187 		goto out;
2188 
2189 	/* Fill SDH entry. */
2190 	sdh_e.de.view.data_off =
2191 		cpu_to_le16(offsetof(struct NTFS_DE_SDH, sec_hdr));
2192 	sdh_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR);
2193 	sdh_e.de.view.res = 0;
2194 	sdh_e.de.size = cpu_to_le16(SIZEOF_SDH_DIRENTRY);
2195 	sdh_e.de.key_size = cpu_to_le16(sizeof(sdh_e.key));
2196 	sdh_e.de.flags = 0;
2197 	sdh_e.de.res = 0;
2198 	sdh_e.key.hash = d_security->key.hash;
2199 	sdh_e.key.sec_id = d_security->key.sec_id;
2200 	memcpy(&sdh_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR);
2201 	sdh_e.magic[0] = cpu_to_le16('I');
2202 	sdh_e.magic[1] = cpu_to_le16('I');
2203 
2204 	fnd_clear(fnd_sdh);
2205 	err = indx_insert_entry(indx_sdh, ni, &sdh_e.de, (void *)(size_t)1,
2206 				fnd_sdh, 0);
2207 	if (err)
2208 		goto out;
2209 
2210 	*security_id = d_security->key.sec_id;
2211 	if (inserted)
2212 		*inserted = true;
2213 
2214 	/* Update Id and offset for next descriptor. */
2215 	sbi->security.next_id += 1;
2216 	sbi->security.next_off += aligned_sec_size;
2217 
2218 out:
2219 	fnd_put(fnd_sdh);
2220 	mark_inode_dirty(&ni->vfs_inode);
2221 	ni_unlock(ni);
2222 	kfree(d_security);
2223 
2224 	return err;
2225 }
2226 
2227 /*
2228  * ntfs_reparse_init - Load and parse $Extend/$Reparse.
2229  */
2230 int ntfs_reparse_init(struct ntfs_sb_info *sbi)
2231 {
2232 	int err;
2233 	struct ntfs_inode *ni = sbi->reparse.ni;
2234 	struct ntfs_index *indx = &sbi->reparse.index_r;
2235 	struct ATTRIB *attr;
2236 	struct ATTR_LIST_ENTRY *le;
2237 	const struct INDEX_ROOT *root_r;
2238 
2239 	if (!ni)
2240 		return 0;
2241 
2242 	le = NULL;
2243 	attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SR_NAME,
2244 			    ARRAY_SIZE(SR_NAME), NULL, NULL);
2245 	if (!attr) {
2246 		err = -EINVAL;
2247 		goto out;
2248 	}
2249 
2250 	root_r = resident_data(attr);
2251 	if (root_r->type != ATTR_ZERO ||
2252 	    root_r->rule != NTFS_COLLATION_TYPE_UINTS) {
2253 		err = -EINVAL;
2254 		goto out;
2255 	}
2256 
2257 	err = indx_init(indx, sbi, attr, INDEX_MUTEX_SR);
2258 	if (err)
2259 		goto out;
2260 
2261 out:
2262 	return err;
2263 }
2264 
2265 /*
2266  * ntfs_objid_init - Load and parse $Extend/$ObjId.
2267  */
2268 int ntfs_objid_init(struct ntfs_sb_info *sbi)
2269 {
2270 	int err;
2271 	struct ntfs_inode *ni = sbi->objid.ni;
2272 	struct ntfs_index *indx = &sbi->objid.index_o;
2273 	struct ATTRIB *attr;
2274 	struct ATTR_LIST_ENTRY *le;
2275 	const struct INDEX_ROOT *root;
2276 
2277 	if (!ni)
2278 		return 0;
2279 
2280 	le = NULL;
2281 	attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SO_NAME,
2282 			    ARRAY_SIZE(SO_NAME), NULL, NULL);
2283 	if (!attr) {
2284 		err = -EINVAL;
2285 		goto out;
2286 	}
2287 
2288 	root = resident_data(attr);
2289 	if (root->type != ATTR_ZERO ||
2290 	    root->rule != NTFS_COLLATION_TYPE_UINTS) {
2291 		err = -EINVAL;
2292 		goto out;
2293 	}
2294 
2295 	err = indx_init(indx, sbi, attr, INDEX_MUTEX_SO);
2296 	if (err)
2297 		goto out;
2298 
2299 out:
2300 	return err;
2301 }
2302 
2303 int ntfs_objid_remove(struct ntfs_sb_info *sbi, struct GUID *guid)
2304 {
2305 	int err;
2306 	struct ntfs_inode *ni = sbi->objid.ni;
2307 	struct ntfs_index *indx = &sbi->objid.index_o;
2308 
2309 	if (!ni)
2310 		return -EINVAL;
2311 
2312 	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_OBJID);
2313 
2314 	err = indx_delete_entry(indx, ni, guid, sizeof(*guid), NULL);
2315 
2316 	mark_inode_dirty(&ni->vfs_inode);
2317 	ni_unlock(ni);
2318 
2319 	return err;
2320 }
2321 
2322 int ntfs_insert_reparse(struct ntfs_sb_info *sbi, __le32 rtag,
2323 			const struct MFT_REF *ref)
2324 {
2325 	int err;
2326 	struct ntfs_inode *ni = sbi->reparse.ni;
2327 	struct ntfs_index *indx = &sbi->reparse.index_r;
2328 	struct NTFS_DE_R re;
2329 
2330 	if (!ni)
2331 		return -EINVAL;
2332 
2333 	memset(&re, 0, sizeof(re));
2334 
2335 	re.de.view.data_off = cpu_to_le16(offsetof(struct NTFS_DE_R, zero));
2336 	re.de.size = cpu_to_le16(sizeof(struct NTFS_DE_R));
2337 	re.de.key_size = cpu_to_le16(sizeof(re.key));
2338 
2339 	re.key.ReparseTag = rtag;
2340 	memcpy(&re.key.ref, ref, sizeof(*ref));
2341 
2342 	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE);
2343 
2344 	err = indx_insert_entry(indx, ni, &re.de, NULL, NULL, 0);
2345 
2346 	mark_inode_dirty(&ni->vfs_inode);
2347 	ni_unlock(ni);
2348 
2349 	return err;
2350 }
2351 
2352 int ntfs_remove_reparse(struct ntfs_sb_info *sbi, __le32 rtag,
2353 			const struct MFT_REF *ref)
2354 {
2355 	int err, diff;
2356 	struct ntfs_inode *ni = sbi->reparse.ni;
2357 	struct ntfs_index *indx = &sbi->reparse.index_r;
2358 	struct ntfs_fnd *fnd = NULL;
2359 	struct REPARSE_KEY rkey;
2360 	struct NTFS_DE_R *re;
2361 	struct INDEX_ROOT *root_r;
2362 
2363 	if (!ni)
2364 		return -EINVAL;
2365 
2366 	rkey.ReparseTag = rtag;
2367 	rkey.ref = *ref;
2368 
2369 	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE);
2370 
2371 	if (rtag) {
2372 		err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL);
2373 		goto out1;
2374 	}
2375 
2376 	fnd = fnd_get();
2377 	if (!fnd) {
2378 		err = -ENOMEM;
2379 		goto out1;
2380 	}
2381 
2382 	root_r = indx_get_root(indx, ni, NULL, NULL);
2383 	if (!root_r) {
2384 		err = -EINVAL;
2385 		goto out;
2386 	}
2387 
2388 	/* 1 - forces to ignore rkey.ReparseTag when comparing keys. */
2389 	err = indx_find(indx, ni, root_r, &rkey, sizeof(rkey), (void *)1, &diff,
2390 			(struct NTFS_DE **)&re, fnd);
2391 	if (err)
2392 		goto out;
2393 
2394 	if (memcmp(&re->key.ref, ref, sizeof(*ref))) {
2395 		/* Impossible. Looks like volume corrupt? */
2396 		goto out;
2397 	}
2398 
2399 	memcpy(&rkey, &re->key, sizeof(rkey));
2400 
2401 	fnd_put(fnd);
2402 	fnd = NULL;
2403 
2404 	err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL);
2405 	if (err)
2406 		goto out;
2407 
2408 out:
2409 	fnd_put(fnd);
2410 
2411 out1:
2412 	mark_inode_dirty(&ni->vfs_inode);
2413 	ni_unlock(ni);
2414 
2415 	return err;
2416 }
2417 
2418 static inline void ntfs_unmap_and_discard(struct ntfs_sb_info *sbi, CLST lcn,
2419 					  CLST len)
2420 {
2421 	ntfs_unmap_meta(sbi->sb, lcn, len);
2422 	ntfs_discard(sbi, lcn, len);
2423 }
2424 
2425 void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim)
2426 {
2427 	CLST end, i;
2428 	struct wnd_bitmap *wnd = &sbi->used.bitmap;
2429 
2430 	down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
2431 	if (!wnd_is_used(wnd, lcn, len)) {
2432 		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
2433 
2434 		end = lcn + len;
2435 		len = 0;
2436 		for (i = lcn; i < end; i++) {
2437 			if (wnd_is_used(wnd, i, 1)) {
2438 				if (!len)
2439 					lcn = i;
2440 				len += 1;
2441 				continue;
2442 			}
2443 
2444 			if (!len)
2445 				continue;
2446 
2447 			if (trim)
2448 				ntfs_unmap_and_discard(sbi, lcn, len);
2449 
2450 			wnd_set_free(wnd, lcn, len);
2451 			len = 0;
2452 		}
2453 
2454 		if (!len)
2455 			goto out;
2456 	}
2457 
2458 	if (trim)
2459 		ntfs_unmap_and_discard(sbi, lcn, len);
2460 	wnd_set_free(wnd, lcn, len);
2461 
2462 out:
2463 	up_write(&wnd->rw_lock);
2464 }
2465 
2466 /*
2467  * run_deallocate - Deallocate clusters.
2468  */
2469 int run_deallocate(struct ntfs_sb_info *sbi, struct runs_tree *run, bool trim)
2470 {
2471 	CLST lcn, len;
2472 	size_t idx = 0;
2473 
2474 	while (run_get_entry(run, idx++, NULL, &lcn, &len)) {
2475 		if (lcn == SPARSE_LCN)
2476 			continue;
2477 
2478 		mark_as_free_ex(sbi, lcn, len, trim);
2479 	}
2480 
2481 	return 0;
2482 }
2483