xref: /linux/fs/ntfs/compress.c (revision d9038d99fb5c623f43bcd8b726bfbbe8562648c2)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NTFS kernel compressed attributes handling.
4  *
5  * Copyright (c) 2001-2004 Anton Altaparmakov
6  * Copyright (c) 2002 Richard Russon
7  * Copyright (c) 2025 LG Electronics Co., Ltd.
8  *
9  * Part of this file is based on code from the NTFS-3G.
10  * and is copyrighted by the respective authors below:
11  * Copyright (c) 2004-2005 Anton Altaparmakov
12  * Copyright (c) 2004-2006 Szabolcs Szakacsits
13  * Copyright (c)      2005 Yura Pakhuchiy
14  * Copyright (c) 2009-2014 Jean-Pierre Andre
15  * Copyright (c)      2014 Eric Biggers
16  */
17 
18 #include <linux/fs.h>
19 #include <linux/blkdev.h>
20 #include <linux/vmalloc.h>
21 #include <linux/slab.h>
22 
23 #include "attrib.h"
24 #include "inode.h"
25 #include "debug.h"
26 #include "ntfs.h"
27 #include "lcnalloc.h"
28 #include "mft.h"
29 
30 /*
31  * Constants used in the compression code
32  */
33 enum {
34 	/* Token types and access mask. */
35 	NTFS_SYMBOL_TOKEN	=	0,
36 	NTFS_PHRASE_TOKEN	=	1,
37 	NTFS_TOKEN_MASK		=	1,
38 
39 	/* Compression sub-block constants. */
40 	NTFS_SB_SIZE_MASK	=	0x0fff,
41 	NTFS_SB_SIZE		=	0x1000,
42 	NTFS_SB_IS_COMPRESSED	=	0x8000,
43 
44 	/*
45 	 * The maximum compression block size is by definition 16 * the cluster
46 	 * size, with the maximum supported cluster size being 4kiB. Thus the
47 	 * maximum compression buffer size is 64kiB, so we use this when
48 	 * initializing the compression buffer.
49 	 */
50 	NTFS_MAX_CB_SIZE	= 64 * 1024,
51 };
52 
53 /*
54  * ntfs_compression_buffer - one buffer for the decompression engine
55  */
56 static u8 *ntfs_compression_buffer;
57 
58 /*
59  * ntfs_cb_lock - mutex lock which protects ntfs_compression_buffer
60  */
61 static DEFINE_MUTEX(ntfs_cb_lock);
62 
63 /*
64  * allocate_compression_buffers - allocate the decompression buffers
65  *
66  * Caller has to hold the ntfs_lock mutex.
67  *
68  * Return 0 on success or -ENOMEM if the allocations failed.
69  */
70 int allocate_compression_buffers(void)
71 {
72 	if (ntfs_compression_buffer)
73 		return 0;
74 
75 	ntfs_compression_buffer = vmalloc(NTFS_MAX_CB_SIZE);
76 	if (!ntfs_compression_buffer)
77 		return -ENOMEM;
78 	return 0;
79 }
80 
81 /*
82  * free_compression_buffers - free the decompression buffers
83  *
84  * Caller has to hold the ntfs_lock mutex.
85  */
86 void free_compression_buffers(void)
87 {
88 	mutex_lock(&ntfs_cb_lock);
89 	if (!ntfs_compression_buffer) {
90 		mutex_unlock(&ntfs_cb_lock);
91 		return;
92 	}
93 
94 	vfree(ntfs_compression_buffer);
95 	ntfs_compression_buffer = NULL;
96 	mutex_unlock(&ntfs_cb_lock);
97 }
98 
99 /*
100  * zero_partial_compressed_page - zero out of bounds compressed page region
101  * @page: page to zero
102  * @initialized_size: initialized size of the attribute
103  */
104 static void zero_partial_compressed_page(struct page *page,
105 		const s64 initialized_size)
106 {
107 	u8 *kp = page_address(page);
108 	unsigned int kp_ofs;
109 
110 	ntfs_debug("Zeroing page region outside initialized size.");
111 	if (((s64)page->__folio_index << PAGE_SHIFT) >= initialized_size) {
112 		clear_page(kp);
113 		return;
114 	}
115 	kp_ofs = initialized_size & ~PAGE_MASK;
116 	memset(kp + kp_ofs, 0, PAGE_SIZE - kp_ofs);
117 }
118 
119 /*
120  * handle_bounds_compressed_page - test for&handle out of bounds compressed page
121  * @page: page to check and handle
122  * @i_size: file size
123  * @initialized_size: initialized size of the attribute
124  */
125 static inline void handle_bounds_compressed_page(struct page *page,
126 		const loff_t i_size, const s64 initialized_size)
127 {
128 	if ((page->__folio_index >= (initialized_size >> PAGE_SHIFT)) &&
129 			(initialized_size < i_size))
130 		zero_partial_compressed_page(page, initialized_size);
131 }
132 
133 /*
134  * ntfs_decompress - decompress a compression block into an array of pages
135  * @dest_pages:		destination array of pages
136  * @completed_pages:	scratch space to track completed pages
137  * @dest_index:		current index into @dest_pages (IN/OUT)
138  * @dest_ofs:		current offset within @dest_pages[@dest_index] (IN/OUT)
139  * @dest_max_index:	maximum index into @dest_pages (IN)
140  * @dest_max_ofs:	maximum offset within @dest_pages[@dest_max_index] (IN)
141  * @xpage:		the target page (-1 if none) (IN)
142  * @xpage_done:		set to 1 if xpage was completed successfully (IN/OUT)
143  * @cb_start:		compression block to decompress (IN)
144  * @cb_size:		size of compression block @cb_start in bytes (IN)
145  * @i_size:		file size when we started the read (IN)
146  * @initialized_size:	initialized file size when we started the read (IN)
147  *
148  * The caller must have disabled preemption. ntfs_decompress() reenables it when
149  * the critical section is finished.
150  *
151  * This decompresses the compression block @cb_start into the array of
152  * destination pages @dest_pages starting at index @dest_index into @dest_pages
153  * and at offset @dest_pos into the page @dest_pages[@dest_index].
154  *
155  * When the page @dest_pages[@xpage] is completed, @xpage_done is set to 1.
156  * If xpage is -1 or @xpage has not been completed, @xpage_done is not modified.
157  *
158  * @cb_start is a pointer to the compression block which needs decompressing
159  * and @cb_size is the size of @cb_start in bytes (8-64kiB).
160  *
161  * Return 0 if success or -EOVERFLOW on error in the compressed stream.
162  * @xpage_done indicates whether the target page (@dest_pages[@xpage]) was
163  * completed during the decompression of the compression block (@cb_start).
164  *
165  * Warning: This function *REQUIRES* PAGE_SIZE >= 4096 or it will blow up
166  * unpredicatbly! You have been warned!
167  *
168  * Note to hackers: This function may not sleep until it has finished accessing
169  * the compression block @cb_start as it is a per-CPU buffer.
170  */
171 static int ntfs_decompress(struct page *dest_pages[], int completed_pages[],
172 		int *dest_index, int *dest_ofs, const int dest_max_index,
173 		const int dest_max_ofs, const int xpage, char *xpage_done,
174 		u8 *const cb_start, const u32 cb_size, const loff_t i_size,
175 		const s64 initialized_size)
176 {
177 	/*
178 	 * Pointers into the compressed data, i.e. the compression block (cb),
179 	 * and the therein contained sub-blocks (sb).
180 	 */
181 	u8 *cb_end = cb_start + cb_size; /* End of cb. */
182 	u8 *cb = cb_start;	/* Current position in cb. */
183 	u8 *cb_sb_start = cb;	/* Beginning of the current sb in the cb. */
184 	u8 *cb_sb_end;		/* End of current sb / beginning of next sb. */
185 
186 	/* Variables for uncompressed data / destination. */
187 	struct page *dp;	/* Current destination page being worked on. */
188 	u8 *dp_addr;		/* Current pointer into dp. */
189 	u8 *dp_sb_start;	/* Start of current sub-block in dp. */
190 	u8 *dp_sb_end;		/* End of current sb in dp (dp_sb_start + NTFS_SB_SIZE). */
191 	u16 do_sb_start;	/* @dest_ofs when starting this sub-block. */
192 	u16 do_sb_end;		/* @dest_ofs of end of this sb (do_sb_start + NTFS_SB_SIZE). */
193 
194 	/* Variables for tag and token parsing. */
195 	u8 tag;			/* Current tag. */
196 	int token;		/* Loop counter for the eight tokens in tag. */
197 	int nr_completed_pages = 0;
198 
199 	/* Default error code. */
200 	int err = -EOVERFLOW;
201 
202 	ntfs_debug("Entering, cb_size = 0x%x.", cb_size);
203 do_next_sb:
204 	ntfs_debug("Beginning sub-block at offset = 0x%zx in the cb.",
205 			cb - cb_start);
206 	/*
207 	 * Have we reached the end of the compression block or the end of the
208 	 * decompressed data?  The latter can happen for example if the current
209 	 * position in the compression block is one byte before its end so the
210 	 * first two checks do not detect it.
211 	 */
212 	if (cb == cb_end || !le16_to_cpup((__le16 *)cb) ||
213 			(*dest_index == dest_max_index &&
214 			*dest_ofs == dest_max_ofs)) {
215 		int i;
216 
217 		ntfs_debug("Completed. Returning success (0).");
218 		err = 0;
219 return_error:
220 		/* We can sleep from now on, so we drop lock. */
221 		mutex_unlock(&ntfs_cb_lock);
222 		/* Second stage: finalize completed pages. */
223 		if (nr_completed_pages > 0) {
224 			for (i = 0; i < nr_completed_pages; i++) {
225 				int di = completed_pages[i];
226 
227 				dp = dest_pages[di];
228 				/*
229 				 * If we are outside the initialized size, zero
230 				 * the out of bounds page range.
231 				 */
232 				handle_bounds_compressed_page(dp, i_size,
233 						initialized_size);
234 				flush_dcache_page(dp);
235 				kunmap_local(page_address(dp));
236 				SetPageUptodate(dp);
237 				unlock_page(dp);
238 				if (di == xpage)
239 					*xpage_done = 1;
240 				else
241 					put_page(dp);
242 				dest_pages[di] = NULL;
243 			}
244 		}
245 		return err;
246 	}
247 
248 	/* Setup offsets for the current sub-block destination. */
249 	do_sb_start = *dest_ofs;
250 	do_sb_end = do_sb_start + NTFS_SB_SIZE;
251 
252 	/* Check that we are still within allowed boundaries. */
253 	if (*dest_index == dest_max_index && do_sb_end > dest_max_ofs)
254 		goto return_overflow;
255 
256 	/* Does the minimum size of a compressed sb overflow valid range? */
257 	if (cb + 6 > cb_end)
258 		goto return_overflow;
259 
260 	/* Setup the current sub-block source pointers and validate range. */
261 	cb_sb_start = cb;
262 	cb_sb_end = cb_sb_start + (le16_to_cpup((__le16 *)cb) & NTFS_SB_SIZE_MASK)
263 			+ 3;
264 	if (cb_sb_end > cb_end)
265 		goto return_overflow;
266 
267 	/* Get the current destination page. */
268 	dp = dest_pages[*dest_index];
269 	if (!dp) {
270 		/* No page present. Skip decompression of this sub-block. */
271 		cb = cb_sb_end;
272 
273 		/* Advance destination position to next sub-block. */
274 		*dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_MASK;
275 		if (!*dest_ofs && (++*dest_index > dest_max_index))
276 			goto return_overflow;
277 		goto do_next_sb;
278 	}
279 
280 	/* We have a valid destination page. Setup the destination pointers. */
281 	dp_addr = (u8 *)page_address(dp) + do_sb_start;
282 
283 	/* Now, we are ready to process the current sub-block (sb). */
284 	if (!(le16_to_cpup((__le16 *)cb) & NTFS_SB_IS_COMPRESSED)) {
285 		ntfs_debug("Found uncompressed sub-block.");
286 		/* This sb is not compressed, just copy it into destination. */
287 
288 		/* Advance source position to first data byte. */
289 		cb += 2;
290 
291 		/* An uncompressed sb must be full size. */
292 		if (cb_sb_end - cb != NTFS_SB_SIZE)
293 			goto return_overflow;
294 
295 		/* Copy the block and advance the source position. */
296 		memcpy(dp_addr, cb, NTFS_SB_SIZE);
297 		cb += NTFS_SB_SIZE;
298 
299 		/* Advance destination position to next sub-block. */
300 		*dest_ofs += NTFS_SB_SIZE;
301 		*dest_ofs &= ~PAGE_MASK;
302 		if (!(*dest_ofs)) {
303 finalize_page:
304 			/*
305 			 * First stage: add current page index to array of
306 			 * completed pages.
307 			 */
308 			completed_pages[nr_completed_pages++] = *dest_index;
309 			if (++*dest_index > dest_max_index)
310 				goto return_overflow;
311 		}
312 		goto do_next_sb;
313 	}
314 	ntfs_debug("Found compressed sub-block.");
315 	/* This sb is compressed, decompress it into destination. */
316 
317 	/* Setup destination pointers. */
318 	dp_sb_start = dp_addr;
319 	dp_sb_end = dp_sb_start + NTFS_SB_SIZE;
320 
321 	/* Forward to the first tag in the sub-block. */
322 	cb += 2;
323 do_next_tag:
324 	if (cb == cb_sb_end) {
325 		/* Check if the decompressed sub-block was not full-length. */
326 		if (dp_addr < dp_sb_end) {
327 			int nr_bytes = do_sb_end - *dest_ofs;
328 
329 			ntfs_debug("Filling incomplete sub-block with zeroes.");
330 			/* Zero remainder and update destination position. */
331 			memset(dp_addr, 0, nr_bytes);
332 			*dest_ofs += nr_bytes;
333 		}
334 		/* We have finished the current sub-block. */
335 		*dest_ofs &= ~PAGE_MASK;
336 		if (!(*dest_ofs))
337 			goto finalize_page;
338 		goto do_next_sb;
339 	}
340 
341 	/* Check we are still in range. */
342 	if (cb > cb_sb_end || dp_addr > dp_sb_end)
343 		goto return_overflow;
344 
345 	/* Get the next tag and advance to first token. */
346 	tag = *cb++;
347 
348 	/* Parse the eight tokens described by the tag. */
349 	for (token = 0; token < 8; token++, tag >>= 1) {
350 		register u16 i;
351 		u16 lg, pt, length, max_non_overlap;
352 		u8 *dp_back_addr;
353 
354 		/* Check if we are done / still in range. */
355 		if (cb >= cb_sb_end || dp_addr > dp_sb_end)
356 			break;
357 
358 		/* Determine token type and parse appropriately.*/
359 		if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) {
360 			/*
361 			 * We have a symbol token, copy the symbol across, and
362 			 * advance the source and destination positions.
363 			 */
364 			*dp_addr++ = *cb++;
365 			++*dest_ofs;
366 
367 			/* Continue with the next token. */
368 			continue;
369 		}
370 
371 		/*
372 		 * We have a phrase token. Make sure it is not the first tag in
373 		 * the sb as this is illegal and would confuse the code below.
374 		 */
375 		if (dp_addr == dp_sb_start)
376 			goto return_overflow;
377 
378 		/*
379 		 * Determine the number of bytes to go back (p) and the number
380 		 * of bytes to copy (l). We use an optimized algorithm in which
381 		 * we first calculate log2(current destination position in sb),
382 		 * which allows determination of l and p in O(1) rather than
383 		 * O(n). We just need an arch-optimized log2() function now.
384 		 */
385 		lg = 0;
386 		for (i = *dest_ofs - do_sb_start - 1; i >= 0x10; i >>= 1)
387 			lg++;
388 
389 		/* Get the phrase token into i. */
390 		pt = le16_to_cpup((__le16 *)cb);
391 
392 		/*
393 		 * Calculate starting position of the byte sequence in
394 		 * the destination using the fact that p = (pt >> (12 - lg)) + 1
395 		 * and make sure we don't go too far back.
396 		 */
397 		dp_back_addr = dp_addr - (pt >> (12 - lg)) - 1;
398 		if (dp_back_addr < dp_sb_start)
399 			goto return_overflow;
400 
401 		/* Now calculate the length of the byte sequence. */
402 		length = (pt & (0xfff >> lg)) + 3;
403 
404 		/* Advance destination position and verify it is in range. */
405 		*dest_ofs += length;
406 		if (*dest_ofs > do_sb_end)
407 			goto return_overflow;
408 
409 		/* The number of non-overlapping bytes. */
410 		max_non_overlap = dp_addr - dp_back_addr;
411 
412 		if (length <= max_non_overlap) {
413 			/* The byte sequence doesn't overlap, just copy it. */
414 			memcpy(dp_addr, dp_back_addr, length);
415 
416 			/* Advance destination pointer. */
417 			dp_addr += length;
418 		} else {
419 			/*
420 			 * The byte sequence does overlap, copy non-overlapping
421 			 * part and then do a slow byte by byte copy for the
422 			 * overlapping part. Also, advance the destination
423 			 * pointer.
424 			 */
425 			memcpy(dp_addr, dp_back_addr, max_non_overlap);
426 			dp_addr += max_non_overlap;
427 			dp_back_addr += max_non_overlap;
428 			length -= max_non_overlap;
429 			while (length--)
430 				*dp_addr++ = *dp_back_addr++;
431 		}
432 
433 		/* Advance source position and continue with the next token. */
434 		cb += 2;
435 	}
436 
437 	/* No tokens left in the current tag. Continue with the next tag. */
438 	goto do_next_tag;
439 
440 return_overflow:
441 	ntfs_error(NULL, "Failed. Returning -EOVERFLOW.");
442 	goto return_error;
443 }
444 
445 /*
446  * ntfs_read_compressed_block - read a compressed block into the page cache
447  * @folio:	locked folio in the compression block(s) we need to read
448  *
449  * When we are called the page has already been verified to be locked and the
450  * attribute is known to be non-resident, not encrypted, but compressed.
451  *
452  * 1. Determine which compression block(s) @page is in.
453  * 2. Get hold of all pages corresponding to this/these compression block(s).
454  * 3. Read the (first) compression block.
455  * 4. Decompress it into the corresponding pages.
456  * 5. Throw the compressed data away and proceed to 3. for the next compression
457  *    block or return success if no more compression blocks left.
458  *
459  * Warning: We have to be careful what we do about existing pages. They might
460  * have been written to so that we would lose data if we were to just overwrite
461  * them with the out-of-date uncompressed data.
462  */
463 int ntfs_read_compressed_block(struct folio *folio)
464 {
465 	struct page *page = &folio->page;
466 	loff_t i_size;
467 	s64 initialized_size;
468 	struct address_space *mapping = page->mapping;
469 	struct ntfs_inode *ni = NTFS_I(mapping->host);
470 	struct ntfs_volume *vol = ni->vol;
471 	struct super_block *sb = vol->sb;
472 	struct runlist_element *rl;
473 	unsigned long flags;
474 	u8 *cb, *cb_pos, *cb_end;
475 	unsigned long offset, index = page->__folio_index;
476 	u32 cb_size = ni->itype.compressed.block_size;
477 	u64 cb_size_mask = cb_size - 1UL;
478 	s64 vcn;
479 	s64 lcn;
480 	/* The first wanted vcn (minimum alignment is PAGE_SIZE). */
481 	s64 start_vcn = (((s64)index << PAGE_SHIFT) & ~cb_size_mask) >>
482 			vol->cluster_size_bits;
483 	/*
484 	 * The first vcn after the last wanted vcn (minimum alignment is again
485 	 * PAGE_SIZE.
486 	 */
487 	s64 end_vcn = ((((s64)(index + 1UL) << PAGE_SHIFT) + cb_size - 1)
488 			& ~cb_size_mask) >> vol->cluster_size_bits;
489 	/* Number of compression blocks (cbs) in the wanted vcn range. */
490 	unsigned int nr_cbs = ntfs_cluster_to_bytes(vol, end_vcn - start_vcn) >>
491 			ni->itype.compressed.block_size_bits;
492 	/*
493 	 * Number of pages required to store the uncompressed data from all
494 	 * compression blocks (cbs) overlapping @page. Due to alignment
495 	 * guarantees of start_vcn and end_vcn, no need to round up here.
496 	 */
497 	unsigned int nr_pages = ntfs_cluster_to_pidx(vol, end_vcn - start_vcn);
498 	unsigned int xpage, max_page, cur_page, cur_ofs, i, page_ofs, page_index;
499 	unsigned int cb_clusters, cb_max_ofs;
500 	int cb_max_page, err = 0;
501 	struct page **pages;
502 	int *completed_pages;
503 	unsigned char xpage_done = 0;
504 	struct page *lpage;
505 
506 	ntfs_debug("Entering, page->index = 0x%lx, cb_size = 0x%x, nr_pages = %i.",
507 			index, cb_size, nr_pages);
508 	/*
509 	 * Bad things happen if we get here for anything that is not an
510 	 * unnamed $DATA attribute.
511 	 */
512 	if (ni->type != AT_DATA || ni->name_len) {
513 		unlock_page(page);
514 		return -EIO;
515 	}
516 
517 	pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS);
518 	completed_pages = kmalloc_array(nr_pages + 1, sizeof(int), GFP_NOFS);
519 
520 	if (unlikely(!pages || !completed_pages)) {
521 		kfree(pages);
522 		kfree(completed_pages);
523 		unlock_page(page);
524 		ntfs_error(vol->sb, "Failed to allocate internal buffers.");
525 		return -ENOMEM;
526 	}
527 
528 	/*
529 	 * We have already been given one page, this is the one we must do.
530 	 * Once again, the alignment guarantees keep it simple.
531 	 */
532 	offset = ntfs_cluster_to_pidx(vol, start_vcn);
533 	xpage = index - offset;
534 	pages[xpage] = page;
535 	/*
536 	 * The remaining pages need to be allocated and inserted into the page
537 	 * cache, alignment guarantees keep all the below much simpler. (-8
538 	 */
539 	read_lock_irqsave(&ni->size_lock, flags);
540 	i_size = i_size_read(VFS_I(ni));
541 	initialized_size = ni->initialized_size;
542 	read_unlock_irqrestore(&ni->size_lock, flags);
543 	max_page = ((i_size + PAGE_SIZE - 1) >> PAGE_SHIFT) -
544 			offset;
545 	/* Is the page fully outside i_size? (truncate in progress) */
546 	if (xpage >= max_page) {
547 		kfree(pages);
548 		kfree(completed_pages);
549 		zero_user_segments(page, 0, PAGE_SIZE, 0, 0);
550 		ntfs_debug("Compressed read outside i_size - truncated?");
551 		SetPageUptodate(page);
552 		unlock_page(page);
553 		return 0;
554 	}
555 	if (nr_pages < max_page)
556 		max_page = nr_pages;
557 
558 	for (i = 0; i < max_page; i++, offset++) {
559 		if (i != xpage)
560 			pages[i] = grab_cache_page_nowait(mapping, offset);
561 		page = pages[i];
562 		if (page) {
563 			/*
564 			 * We only (re)read the page if it isn't already read
565 			 * in and/or dirty or we would be losing data or at
566 			 * least wasting our time.
567 			 */
568 			if (!PageDirty(page) && (!PageUptodate(page))) {
569 				kmap_local_page(page);
570 				continue;
571 			}
572 			unlock_page(page);
573 			put_page(page);
574 			pages[i] = NULL;
575 		}
576 	}
577 
578 	/*
579 	 * We have the runlist, and all the destination pages we need to fill.
580 	 * Now read the first compression block.
581 	 */
582 	cur_page = 0;
583 	cur_ofs = 0;
584 	cb_clusters = ni->itype.compressed.block_clusters;
585 do_next_cb:
586 	nr_cbs--;
587 
588 	mutex_lock(&ntfs_cb_lock);
589 	if (!ntfs_compression_buffer)
590 		if (allocate_compression_buffers()) {
591 			mutex_unlock(&ntfs_cb_lock);
592 			goto err_out;
593 		}
594 
595 
596 	cb = ntfs_compression_buffer;
597 	cb_pos = cb;
598 	cb_end = cb + cb_size;
599 
600 	rl = NULL;
601 	for (vcn = start_vcn, start_vcn += cb_clusters; vcn < start_vcn;
602 			vcn++) {
603 		bool is_retry = false;
604 
605 		if (!rl) {
606 lock_retry_remap:
607 			down_read(&ni->runlist.lock);
608 			rl = ni->runlist.rl;
609 		}
610 		if (likely(rl != NULL)) {
611 			/* Seek to element containing target vcn. */
612 			while (rl->length && rl[1].vcn <= vcn)
613 				rl++;
614 			lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
615 		} else
616 			lcn = LCN_RL_NOT_MAPPED;
617 		ntfs_debug("Reading vcn = 0x%llx, lcn = 0x%llx.",
618 				(unsigned long long)vcn,
619 				(unsigned long long)lcn);
620 		if (lcn < 0) {
621 			/*
622 			 * When we reach the first sparse cluster we have
623 			 * finished with the cb.
624 			 */
625 			if (lcn == LCN_HOLE)
626 				break;
627 			if (is_retry || lcn != LCN_RL_NOT_MAPPED) {
628 				mutex_unlock(&ntfs_cb_lock);
629 				goto rl_err;
630 			}
631 			is_retry = true;
632 			/*
633 			 * Attempt to map runlist, dropping lock for the
634 			 * duration.
635 			 */
636 			up_read(&ni->runlist.lock);
637 			if (!ntfs_map_runlist(ni, vcn))
638 				goto lock_retry_remap;
639 			mutex_unlock(&ntfs_cb_lock);
640 			goto map_rl_err;
641 		}
642 
643 		page_ofs = ntfs_cluster_to_poff(vol, lcn);
644 		page_index = ntfs_cluster_to_pidx(vol, lcn);
645 
646 		lpage = read_mapping_page(sb->s_bdev->bd_mapping,
647 					  page_index, NULL);
648 		if (IS_ERR(lpage)) {
649 			err = PTR_ERR(lpage);
650 			mutex_unlock(&ntfs_cb_lock);
651 			goto read_err;
652 		}
653 
654 		lock_page(lpage);
655 		memcpy(cb_pos, page_address(lpage) + page_ofs,
656 		       vol->cluster_size);
657 		unlock_page(lpage);
658 		put_page(lpage);
659 		cb_pos += vol->cluster_size;
660 	}
661 
662 	/* Release the lock if we took it. */
663 	if (rl)
664 		up_read(&ni->runlist.lock);
665 
666 	/* Just a precaution. */
667 	if (cb_pos + 2 <= cb + cb_size)
668 		*(u16 *)cb_pos = 0;
669 
670 	/* Reset cb_pos back to the beginning. */
671 	cb_pos = cb;
672 
673 	/* We now have both source (if present) and destination. */
674 	ntfs_debug("Successfully read the compression block.");
675 
676 	/* The last page and maximum offset within it for the current cb. */
677 	cb_max_page = (cur_page << PAGE_SHIFT) + cur_ofs + cb_size;
678 	cb_max_ofs = cb_max_page & ~PAGE_MASK;
679 	cb_max_page >>= PAGE_SHIFT;
680 
681 	/* Catch end of file inside a compression block. */
682 	if (cb_max_page > max_page)
683 		cb_max_page = max_page;
684 
685 	if (vcn == start_vcn - cb_clusters) {
686 		/* Sparse cb, zero out page range overlapping the cb. */
687 		ntfs_debug("Found sparse compression block.");
688 		/* We can sleep from now on, so we drop lock. */
689 		mutex_unlock(&ntfs_cb_lock);
690 		if (cb_max_ofs)
691 			cb_max_page--;
692 		for (; cur_page < cb_max_page; cur_page++) {
693 			page = pages[cur_page];
694 			if (page) {
695 				if (likely(!cur_ofs))
696 					clear_page(page_address(page));
697 				else
698 					memset(page_address(page) + cur_ofs, 0,
699 							PAGE_SIZE -
700 							cur_ofs);
701 				flush_dcache_page(page);
702 				kunmap_local(page_address(page));
703 				SetPageUptodate(page);
704 				unlock_page(page);
705 				if (cur_page == xpage)
706 					xpage_done = 1;
707 				else
708 					put_page(page);
709 				pages[cur_page] = NULL;
710 			}
711 			cb_pos += PAGE_SIZE - cur_ofs;
712 			cur_ofs = 0;
713 			if (cb_pos >= cb_end)
714 				break;
715 		}
716 		/* If we have a partial final page, deal with it now. */
717 		if (cb_max_ofs && cb_pos < cb_end) {
718 			page = pages[cur_page];
719 			if (page)
720 				memset(page_address(page) + cur_ofs, 0,
721 						cb_max_ofs - cur_ofs);
722 			/*
723 			 * No need to update cb_pos at this stage:
724 			 *	cb_pos += cb_max_ofs - cur_ofs;
725 			 */
726 			cur_ofs = cb_max_ofs;
727 		}
728 	} else if (vcn == start_vcn) {
729 		/* We can't sleep so we need two stages. */
730 		unsigned int cur2_page = cur_page;
731 		unsigned int cur_ofs2 = cur_ofs;
732 		u8 *cb_pos2 = cb_pos;
733 
734 		ntfs_debug("Found uncompressed compression block.");
735 		/* Uncompressed cb, copy it to the destination pages. */
736 		if (cb_max_ofs)
737 			cb_max_page--;
738 		/* First stage: copy data into destination pages. */
739 		for (; cur_page < cb_max_page; cur_page++) {
740 			page = pages[cur_page];
741 			if (page)
742 				memcpy(page_address(page) + cur_ofs, cb_pos,
743 						PAGE_SIZE - cur_ofs);
744 			cb_pos += PAGE_SIZE - cur_ofs;
745 			cur_ofs = 0;
746 			if (cb_pos >= cb_end)
747 				break;
748 		}
749 		/* If we have a partial final page, deal with it now. */
750 		if (cb_max_ofs && cb_pos < cb_end) {
751 			page = pages[cur_page];
752 			if (page)
753 				memcpy(page_address(page) + cur_ofs, cb_pos,
754 						cb_max_ofs - cur_ofs);
755 			cb_pos += cb_max_ofs - cur_ofs;
756 			cur_ofs = cb_max_ofs;
757 		}
758 		/* We can sleep from now on, so drop lock. */
759 		mutex_unlock(&ntfs_cb_lock);
760 		/* Second stage: finalize pages. */
761 		for (; cur2_page < cb_max_page; cur2_page++) {
762 			page = pages[cur2_page];
763 			if (page) {
764 				/*
765 				 * If we are outside the initialized size, zero
766 				 * the out of bounds page range.
767 				 */
768 				handle_bounds_compressed_page(page, i_size,
769 						initialized_size);
770 				flush_dcache_page(page);
771 				kunmap_local(page_address(page));
772 				SetPageUptodate(page);
773 				unlock_page(page);
774 				if (cur2_page == xpage)
775 					xpage_done = 1;
776 				else
777 					put_page(page);
778 				pages[cur2_page] = NULL;
779 			}
780 			cb_pos2 += PAGE_SIZE - cur_ofs2;
781 			cur_ofs2 = 0;
782 			if (cb_pos2 >= cb_end)
783 				break;
784 		}
785 	} else {
786 		/* Compressed cb, decompress it into the destination page(s). */
787 		unsigned int prev_cur_page = cur_page;
788 
789 		ntfs_debug("Found compressed compression block.");
790 		err = ntfs_decompress(pages, completed_pages, &cur_page,
791 				&cur_ofs, cb_max_page, cb_max_ofs, xpage,
792 				&xpage_done, cb_pos, cb_size - (cb_pos - cb),
793 				i_size, initialized_size);
794 		/*
795 		 * We can sleep from now on, lock already dropped by
796 		 * ntfs_decompress().
797 		 */
798 		if (err) {
799 			ntfs_error(vol->sb,
800 				"ntfs_decompress() failed in inode 0x%llx with error code %i. Skipping this compression block.",
801 				ni->mft_no, -err);
802 			/* Release the unfinished pages. */
803 			for (; prev_cur_page < cur_page; prev_cur_page++) {
804 				page = pages[prev_cur_page];
805 				if (page) {
806 					flush_dcache_page(page);
807 					kunmap_local(page_address(page));
808 					unlock_page(page);
809 					if (prev_cur_page != xpage)
810 						put_page(page);
811 					pages[prev_cur_page] = NULL;
812 				}
813 			}
814 		}
815 	}
816 
817 	/* Do we have more work to do? */
818 	if (nr_cbs)
819 		goto do_next_cb;
820 
821 	/* Clean up if we have any pages left. Should never happen. */
822 	for (cur_page = 0; cur_page < max_page; cur_page++) {
823 		page = pages[cur_page];
824 		if (page) {
825 			ntfs_error(vol->sb,
826 				"Still have pages left! Terminating them with extreme prejudice.  Inode 0x%llx, page index 0x%lx.",
827 				ni->mft_no, page->__folio_index);
828 			flush_dcache_page(page);
829 			kunmap_local(page_address(page));
830 			unlock_page(page);
831 			if (cur_page != xpage)
832 				put_page(page);
833 			pages[cur_page] = NULL;
834 		}
835 	}
836 
837 	/* We no longer need the list of pages. */
838 	kfree(pages);
839 	kfree(completed_pages);
840 
841 	/* If we have completed the requested page, we return success. */
842 	if (likely(xpage_done))
843 		return 0;
844 
845 	ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ?
846 			"EOVERFLOW" : (!err ? "EIO" : "unknown error"));
847 	return err < 0 ? err : -EIO;
848 
849 map_rl_err:
850 	ntfs_error(vol->sb, "ntfs_map_runlist() failed. Cannot read compression block.");
851 	goto err_out;
852 
853 rl_err:
854 	up_read(&ni->runlist.lock);
855 	ntfs_error(vol->sb, "ntfs_rl_vcn_to_lcn() failed. Cannot read compression block.");
856 	goto err_out;
857 
858 read_err:
859 	up_read(&ni->runlist.lock);
860 	ntfs_error(vol->sb, "IO error while reading compressed data.");
861 
862 err_out:
863 	for (i = cur_page; i < max_page; i++) {
864 		page = pages[i];
865 		if (page) {
866 			flush_dcache_page(page);
867 			kunmap_local(page_address(page));
868 			unlock_page(page);
869 			if (i != xpage)
870 				put_page(page);
871 		}
872 	}
873 	kfree(pages);
874 	kfree(completed_pages);
875 	return -EIO;
876 }
877 
878 /*
879  * Match length at or above which ntfs_best_match() will stop searching for
880  * longer matches.
881  */
882 #define NICE_MATCH_LEN		18
883 
884 /*
885  * Maximum number of potential matches that ntfs_best_match() will consider at
886  * each position.
887  */
888 #define MAX_SEARCH_DEPTH	24
889 
890 /* log base 2 of the number of entries in the hash table for match-finding.  */
891 #define HASH_SHIFT		14
892 
893 /*
894  * Constant for the multiplicative hash function. These hashing constants
895  * are used solely for the match-finding algorithm during compression.
896  * They are NOT part of the on-disk format. The decompressor does not
897  * utilize this hash.
898  */
899 #define HASH_MULTIPLIER		0x1E35A7BD
900 
901 struct compress_context {
902 	const unsigned char *inbuf;
903 	int bufsize;
904 	int size;
905 	int rel;
906 	int mxsz;
907 	s16 head[1 << HASH_SHIFT];
908 	s16 prev[NTFS_SB_SIZE];
909 };
910 
911 /*
912  * Hash the next 3-byte sequence in the input buffer
913  */
914 static inline unsigned int ntfs_hash(const u8 *p)
915 {
916 	u32 str;
917 	u32 hash;
918 
919 	/*
920 	 * Unaligned access allowed, and little endian CPU.
921 	 * Callers ensure that at least 4 (not 3) bytes are remaining.
922 	 */
923 	str = *(const u32 *)p & 0xFFFFFF;
924 	hash = str * HASH_MULTIPLIER;
925 
926 	/* High bits are more random than the low bits.  */
927 	return hash >> (32 - HASH_SHIFT);
928 }
929 
930 /*
931  * Search for the longest sequence matching current position
932  *
933  * A hash table, each entry of which points to a chain of sequence
934  * positions sharing the corresponding hash code, is maintained to speed up
935  * searching for matches.  To maintain the hash table, either
936  * ntfs_best_match() or ntfs_skip_position() has to be called for each
937  * consecutive position.
938  *
939  * This function is heavily used; it has to be optimized carefully.
940  *
941  * This function sets pctx->size and pctx->rel to the length and offset,
942  * respectively, of the longest match found.
943  *
944  * The minimum match length is assumed to be 3, and the maximum match
945  * length is assumed to be pctx->mxsz.  If this function produces
946  * pctx->size < 3, then no match was found.
947  *
948  * Note: for the following reasons, this function is not guaranteed to find
949  * *the* longest match up to pctx->mxsz:
950  *
951  *      (1) If this function finds a match of NICE_MATCH_LEN bytes or greater,
952  *          it ends early because a match this long is good enough and it's not
953  *          worth spending more time searching.
954  *
955  *      (2) If this function considers MAX_SEARCH_DEPTH matches with a single
956  *          position, it ends early and returns the longest match found so far.
957  *          This saves a lot of time on degenerate inputs.
958  */
959 static void ntfs_best_match(struct compress_context *pctx, const int i,
960 		int best_len)
961 {
962 	const u8 * const inbuf = pctx->inbuf;
963 	const u8 * const strptr = &inbuf[i]; /* String we're matching against */
964 	s16 * const prev = pctx->prev;
965 	const int max_len = min(pctx->bufsize - i, pctx->mxsz);
966 	const int nice_len = min(NICE_MATCH_LEN, max_len);
967 	int depth_remaining = MAX_SEARCH_DEPTH;
968 	const u8 *best_matchptr = strptr;
969 	unsigned int hash;
970 	s16 cur_match;
971 	const u8 *matchptr;
972 	int len;
973 
974 	if (max_len < 4)
975 		goto out;
976 
977 	/* Insert the current sequence into the appropriate hash chain. */
978 	hash = ntfs_hash(strptr);
979 	cur_match = pctx->head[hash];
980 	prev[i] = cur_match;
981 	pctx->head[hash] = i;
982 
983 	if (best_len >= max_len) {
984 		/*
985 		 * Lazy match is being attempted, but there aren't enough length
986 		 * bits remaining to code a longer match.
987 		 */
988 		goto out;
989 	}
990 
991 	/* Search the appropriate hash chain for matches. */
992 
993 	for (; cur_match >= 0 && depth_remaining--; cur_match = prev[cur_match]) {
994 		matchptr = &inbuf[cur_match];
995 
996 		/*
997 		 * Considering the potential match at 'matchptr':  is it longer
998 		 * than 'best_len'?
999 		 *
1000 		 * The bytes at index 'best_len' are the most likely to differ,
1001 		 * so check them first.
1002 		 *
1003 		 * The bytes at indices 'best_len - 1' and '0' are less
1004 		 * important to check separately.  But doing so still gives a
1005 		 * slight performance improvement, at least on x86_64, probably
1006 		 * because they create separate branches for the CPU to predict
1007 		 * independently of the branches in the main comparison loops.
1008 		 */
1009 		if (matchptr[best_len] != strptr[best_len] ||
1010 				matchptr[best_len - 1] != strptr[best_len - 1] ||
1011 				matchptr[0] != strptr[0])
1012 			goto next_match;
1013 
1014 		for (len = 1; len < best_len - 1; len++)
1015 			if (matchptr[len] != strptr[len])
1016 				goto next_match;
1017 
1018 		/*
1019 		 * The match is the longest found so far ---
1020 		 * at least 'best_len' + 1 bytes.  Continue extending it.
1021 		 */
1022 
1023 		best_matchptr = matchptr;
1024 
1025 		do {
1026 			if (++best_len >= nice_len) {
1027 				/*
1028 				 * 'nice_len' reached; don't waste time
1029 				 * searching for longer matches.  Extend the
1030 				 * match as far as possible and terminate the
1031 				 * search.
1032 				 */
1033 				while (best_len < max_len &&
1034 				       (best_matchptr[best_len] ==
1035 					strptr[best_len]))
1036 					best_len++;
1037 				goto out;
1038 			}
1039 		} while (best_matchptr[best_len] == strptr[best_len]);
1040 
1041 		/* Found a longer match, but 'nice_len' not yet reached.  */
1042 
1043 next_match:
1044 		/* Continue to next match in the chain.  */
1045 		;
1046 	}
1047 
1048 	/*
1049 	 * Reached end of chain, or ended early due to reaching the maximum
1050 	 * search depth.
1051 	 */
1052 
1053 out:
1054 	/* Return the longest match we were able to find.  */
1055 	pctx->size = best_len;
1056 	pctx->rel = best_matchptr - strptr; /* given as a negative number! */
1057 }
1058 
1059 /*
1060  * Advance the match-finder, but don't search for matches.
1061  */
1062 static void ntfs_skip_position(struct compress_context *pctx, const int i)
1063 {
1064 	unsigned int hash;
1065 
1066 	if (pctx->bufsize - i < 4)
1067 		return;
1068 
1069 	/* Insert the current sequence into the appropriate hash chain.  */
1070 	hash = ntfs_hash(pctx->inbuf + i);
1071 	pctx->prev[i] = pctx->head[hash];
1072 	pctx->head[hash] = i;
1073 }
1074 
1075 /*
1076  * Compress a 4096-byte block
1077  *
1078  * Returns a header of two bytes followed by the compressed data.
1079  * If compression is not effective, the header and an uncompressed
1080  * block is returned.
1081  *
1082  * Note : two bytes may be output before output buffer overflow
1083  * is detected, so a 4100-bytes output buffer must be reserved.
1084  *
1085  * Returns the size of the compressed block, including the
1086  * header (minimal size is 2, maximum size is 4098)
1087  * 0 if an error has been met.
1088  */
1089 static unsigned int ntfs_compress_block(const char *inbuf, const int bufsize,
1090 		char *outbuf)
1091 {
1092 	struct compress_context *pctx;
1093 	int i; /* current position */
1094 	int j; /* end of best match from current position */
1095 	int k; /* end of best match from next position */
1096 	int offs; /* offset to best match */
1097 	int bp; /* bits to store offset */
1098 	int bp_cur; /* saved bits to store offset at current position */
1099 	int mxoff; /* max match offset : 1 << bp */
1100 	unsigned int xout;
1101 	unsigned int q; /* aggregated offset and size */
1102 	int have_match; /* do we have a match at the current position? */
1103 	char *ptag; /* location reserved for a tag */
1104 	int tag;    /* current value of tag */
1105 	int ntag;   /* count of bits still undefined in tag */
1106 
1107 	pctx = kvzalloc(sizeof(struct compress_context), GFP_NOFS);
1108 	if (!pctx)
1109 		return -ENOMEM;
1110 
1111 	/*
1112 	 * All hash chains start as empty.  The special value '-1' indicates the
1113 	 * end of each hash chain.
1114 	 */
1115 	memset(pctx->head, 0xFF, sizeof(pctx->head));
1116 
1117 	pctx->inbuf = (const unsigned char *)inbuf;
1118 	pctx->bufsize = bufsize;
1119 	xout = 2;
1120 	i = 0;
1121 	bp = 4;
1122 	mxoff = 1 << bp;
1123 	pctx->mxsz = (1 << (16 - bp)) + 2;
1124 	have_match = 0;
1125 	tag = 0;
1126 	ntag = 8;
1127 	ptag = &outbuf[xout++];
1128 
1129 	while ((i < bufsize) && (xout < (NTFS_SB_SIZE + 2))) {
1130 
1131 		/*
1132 		 * This implementation uses "lazy" parsing: it always chooses
1133 		 * the longest match, unless the match at the next position is
1134 		 * longer.  This is the same strategy used by the high
1135 		 * compression modes of zlib.
1136 		 */
1137 		if (!have_match) {
1138 			/*
1139 			 * Find the longest match at the current position.  But
1140 			 * first adjust the maximum match length if needed.
1141 			 * (This loop might need to run more than one time in
1142 			 * the case that we just output a long match.)
1143 			 */
1144 			while (mxoff < i) {
1145 				bp++;
1146 				mxoff <<= 1;
1147 				pctx->mxsz = (pctx->mxsz + 2) >> 1;
1148 			}
1149 			ntfs_best_match(pctx, i, 2);
1150 		}
1151 
1152 		if (pctx->size >= 3) {
1153 			/* Found a match at the current position.  */
1154 			j = i + pctx->size;
1155 			bp_cur = bp;
1156 			offs = pctx->rel;
1157 
1158 			if (pctx->size >= NICE_MATCH_LEN) {
1159 				/* Choose long matches immediately.  */
1160 				q = (~offs << (16 - bp_cur)) + (j - i - 3);
1161 				outbuf[xout++] = q & 255;
1162 				outbuf[xout++] = (q >> 8) & 255;
1163 				tag |= (1 << (8 - ntag));
1164 
1165 				if (j == bufsize) {
1166 					/*
1167 					 * Shortcut if the match extends to the
1168 					 * end of the buffer.
1169 					 */
1170 					i = j;
1171 					--ntag;
1172 					break;
1173 				}
1174 				i += 1;
1175 				do {
1176 					ntfs_skip_position(pctx, i);
1177 				} while (++i != j);
1178 				have_match = 0;
1179 			} else {
1180 				/*
1181 				 * Check for a longer match at the next
1182 				 * position.
1183 				 */
1184 
1185 				/*
1186 				 * Doesn't need to be while() since we just
1187 				 * adjusted the maximum match length at the
1188 				 * previous position.
1189 				 */
1190 				if (mxoff < i + 1) {
1191 					bp++;
1192 					mxoff <<= 1;
1193 					pctx->mxsz = (pctx->mxsz + 2) >> 1;
1194 				}
1195 				ntfs_best_match(pctx, i + 1, pctx->size);
1196 				k = i + 1 + pctx->size;
1197 
1198 				if (k > (j + 1)) {
1199 					/*
1200 					 * Next match is longer.
1201 					 * Output a literal.
1202 					 */
1203 					outbuf[xout++] = inbuf[i++];
1204 					have_match = 1;
1205 				} else {
1206 					/*
1207 					 * Next match isn't longer.
1208 					 * Output the current match.
1209 					 */
1210 					q = (~offs << (16 - bp_cur)) +
1211 						(j - i - 3);
1212 					outbuf[xout++] = q & 255;
1213 					outbuf[xout++] = (q >> 8) & 255;
1214 					tag |= (1 << (8 - ntag));
1215 
1216 					/*
1217 					 * The minimum match length is 3, and
1218 					 * we've run two bytes through the
1219 					 * matchfinder already.  So the minimum
1220 					 * number of positions we need to skip
1221 					 * is 1.
1222 					 */
1223 					i += 2;
1224 					do {
1225 						ntfs_skip_position(pctx, i);
1226 					} while (++i != j);
1227 					have_match = 0;
1228 				}
1229 			}
1230 		} else {
1231 			/* No match at current position.  Output a literal. */
1232 			outbuf[xout++] = inbuf[i++];
1233 			have_match = 0;
1234 		}
1235 
1236 		/* Store the tag if fully used. */
1237 		if (!--ntag) {
1238 			*ptag = tag;
1239 			ntag = 8;
1240 			ptag = &outbuf[xout++];
1241 			tag = 0;
1242 		}
1243 	}
1244 
1245 	/* Store the last tag if partially used. */
1246 	if (ntag == 8)
1247 		xout--;
1248 	else
1249 		*ptag = tag;
1250 
1251 	/* Determine whether to store the data compressed or uncompressed. */
1252 	if ((i >= bufsize) && (xout < (NTFS_SB_SIZE + 2))) {
1253 		/* Compressed. */
1254 		outbuf[0] = (xout - 3) & 255;
1255 		outbuf[1] = 0xb0 + (((xout - 3) >> 8) & 15);
1256 	} else {
1257 		/* Uncompressed.  */
1258 		memcpy(&outbuf[2], inbuf, bufsize);
1259 		if (bufsize < NTFS_SB_SIZE)
1260 			memset(&outbuf[bufsize + 2], 0, NTFS_SB_SIZE - bufsize);
1261 		outbuf[0] = 0xff;
1262 		outbuf[1] = 0x3f;
1263 		xout = NTFS_SB_SIZE + 2;
1264 	}
1265 
1266 	/*
1267 	 * Free the compression context and return the total number of bytes
1268 	 * written to 'outbuf'.
1269 	 */
1270 	kvfree(pctx);
1271 	return xout;
1272 }
1273 
1274 static int ntfs_write_cb(struct ntfs_inode *ni, loff_t pos, struct page **pages,
1275 		int pages_per_cb)
1276 {
1277 	struct ntfs_volume *vol = ni->vol;
1278 	char *outbuf = NULL, *pbuf, *inbuf;
1279 	u32 compsz, p, insz = pages_per_cb << PAGE_SHIFT;
1280 	s32 rounded, bio_size;
1281 	unsigned int sz, bsz;
1282 	bool fail = false, allzeroes;
1283 	/* a single compressed zero */
1284 	static char onezero[] = {0x01, 0xb0, 0x00, 0x00};
1285 	/* a couple of compressed zeroes */
1286 	static char twozeroes[] = {0x02, 0xb0, 0x00, 0x00, 0x00};
1287 	/* more compressed zeroes, to be followed by some count */
1288 	static char morezeroes[] = {0x03, 0xb0, 0x02, 0x00};
1289 	struct page **pages_disk = NULL, *pg;
1290 	s64 bio_lcn;
1291 	struct runlist_element *rlc, *rl;
1292 	int i, err;
1293 	int pages_count = (round_up(ni->itype.compressed.block_size + 2 *
1294 		(ni->itype.compressed.block_size / NTFS_SB_SIZE) + 2, PAGE_SIZE)) / PAGE_SIZE;
1295 	size_t new_rl_count;
1296 	struct bio *bio = NULL;
1297 	loff_t new_length;
1298 	s64 new_vcn;
1299 
1300 	inbuf = vmap(pages, pages_per_cb, VM_MAP, PAGE_KERNEL_RO);
1301 	if (!inbuf)
1302 		return -ENOMEM;
1303 
1304 	/* may need 2 extra bytes per block and 2 more bytes */
1305 	pages_disk = kcalloc(pages_count, sizeof(struct page *), GFP_NOFS);
1306 	if (!pages_disk) {
1307 		vunmap(inbuf);
1308 		return -ENOMEM;
1309 	}
1310 
1311 	for (i = 0; i < pages_count; i++) {
1312 		pg = alloc_page(GFP_KERNEL);
1313 		if (!pg) {
1314 			err = -ENOMEM;
1315 			goto out;
1316 		}
1317 		pages_disk[i] = pg;
1318 		lock_page(pg);
1319 		kmap_local_page(pg);
1320 	}
1321 
1322 	outbuf = vmap(pages_disk, pages_count, VM_MAP, PAGE_KERNEL);
1323 	if (!outbuf) {
1324 		err = -ENOMEM;
1325 		goto out;
1326 	}
1327 
1328 	compsz = 0;
1329 	allzeroes = true;
1330 	for (p = 0; (p < insz) && !fail; p += NTFS_SB_SIZE) {
1331 		if ((p + NTFS_SB_SIZE) < insz)
1332 			bsz = NTFS_SB_SIZE;
1333 		else
1334 			bsz = insz - p;
1335 		pbuf = &outbuf[compsz];
1336 		sz = ntfs_compress_block(&inbuf[p], bsz, pbuf);
1337 		/* fail if all the clusters (or more) are needed */
1338 		if (!sz || ((compsz + sz + vol->cluster_size + 2) >
1339 			    ni->itype.compressed.block_size))
1340 			fail = true;
1341 		else {
1342 			if (allzeroes) {
1343 				/* check whether this is all zeroes */
1344 				switch (sz) {
1345 				case 4:
1346 					allzeroes = !memcmp(pbuf, onezero, 4);
1347 					break;
1348 				case 5:
1349 					allzeroes = !memcmp(pbuf, twozeroes, 5);
1350 					break;
1351 				case 6:
1352 					allzeroes = !memcmp(pbuf, morezeroes, 4);
1353 					break;
1354 				default:
1355 					allzeroes = false;
1356 					break;
1357 				}
1358 			}
1359 			compsz += sz;
1360 		}
1361 	}
1362 
1363 	if (!fail && !allzeroes) {
1364 		outbuf[compsz++] = 0;
1365 		outbuf[compsz++] = 0;
1366 		rounded = ((compsz - 1) | (vol->cluster_size - 1)) + 1;
1367 		memset(&outbuf[compsz], 0, rounded - compsz);
1368 		bio_size = rounded;
1369 		pages = pages_disk;
1370 	} else if (allzeroes) {
1371 		err = 0;
1372 		goto out;
1373 	} else {
1374 		bio_size = insz;
1375 	}
1376 
1377 	new_vcn = ntfs_bytes_to_cluster(vol, pos & ~(ni->itype.compressed.block_size - 1));
1378 	new_length = ntfs_bytes_to_cluster(vol, round_up(bio_size, vol->cluster_size));
1379 
1380 	err = ntfs_non_resident_attr_punch_hole(ni, new_vcn, ni->itype.compressed.block_clusters);
1381 	if (err < 0)
1382 		goto out;
1383 
1384 	rlc = ntfs_cluster_alloc(vol, new_vcn, new_length, -1, DATA_ZONE,
1385 			false, true, true);
1386 	if (IS_ERR(rlc)) {
1387 		err = PTR_ERR(rlc);
1388 		goto out;
1389 	}
1390 
1391 	bio_lcn = rlc->lcn;
1392 	down_write(&ni->runlist.lock);
1393 	rl = ntfs_runlists_merge(&ni->runlist, rlc, 0, &new_rl_count);
1394 	if (IS_ERR(rl)) {
1395 		up_write(&ni->runlist.lock);
1396 		ntfs_error(vol->sb, "Failed to merge runlists");
1397 		err = PTR_ERR(rl);
1398 		if (ntfs_cluster_free_from_rl(vol, rlc))
1399 			ntfs_error(vol->sb, "Failed to free hot clusters.");
1400 		kvfree(rlc);
1401 		goto out;
1402 	}
1403 
1404 	ni->runlist.count = new_rl_count;
1405 	ni->runlist.rl = rl;
1406 
1407 	err = ntfs_attr_update_mapping_pairs(ni, 0);
1408 	up_write(&ni->runlist.lock);
1409 	if (err) {
1410 		err = -EIO;
1411 		goto out;
1412 	}
1413 
1414 	i = 0;
1415 	while (bio_size > 0) {
1416 		int page_size;
1417 
1418 		if (bio_size >= PAGE_SIZE) {
1419 			page_size = PAGE_SIZE;
1420 			bio_size -= PAGE_SIZE;
1421 		} else {
1422 			page_size = bio_size;
1423 			bio_size = 0;
1424 		}
1425 
1426 setup_bio:
1427 		if (!bio) {
1428 			bio = bio_alloc(vol->sb->s_bdev, 1, REQ_OP_WRITE,
1429 					GFP_NOIO);
1430 			bio->bi_iter.bi_sector =
1431 				ntfs_bytes_to_sector(vol,
1432 						ntfs_cluster_to_bytes(vol, bio_lcn + i));
1433 		}
1434 
1435 		if (!bio_add_page(bio, pages[i], page_size, 0)) {
1436 			err = submit_bio_wait(bio);
1437 			bio_put(bio);
1438 			if (err)
1439 				goto out;
1440 			bio = NULL;
1441 			goto setup_bio;
1442 		}
1443 		i++;
1444 	}
1445 
1446 	err = submit_bio_wait(bio);
1447 	bio_put(bio);
1448 out:
1449 	vunmap(outbuf);
1450 	for (i = 0; i < pages_count; i++) {
1451 		pg = pages_disk[i];
1452 		if (pg) {
1453 			kunmap_local(page_address(pg));
1454 			unlock_page(pg);
1455 			put_page(pg);
1456 		}
1457 	}
1458 	kfree(pages_disk);
1459 	vunmap(inbuf);
1460 	NInoSetFileNameDirty(ni);
1461 	mark_mft_record_dirty(ni);
1462 
1463 	return err;
1464 }
1465 
1466 int ntfs_compress_write(struct ntfs_inode *ni, loff_t pos, size_t count,
1467 		struct iov_iter *from)
1468 {
1469 	struct folio *folio;
1470 	struct page **pages = NULL, *page;
1471 	int pages_per_cb = ni->itype.compressed.block_size >> PAGE_SHIFT;
1472 	int cb_size = ni->itype.compressed.block_size, cb_off, err = 0;
1473 	int i, ip;
1474 	size_t written = 0;
1475 	struct address_space *mapping = VFS_I(ni)->i_mapping;
1476 
1477 	if (NInoCompressed(ni) && pos + count > ni->allocated_size) {
1478 		int err;
1479 		loff_t end = pos + count;
1480 
1481 		err = ntfs_attr_expand(ni, end,
1482 				round_up(end, ni->itype.compressed.block_size));
1483 		if (err)
1484 			return err;
1485 	}
1486 
1487 	pages = kmalloc_array(pages_per_cb, sizeof(struct page *), GFP_NOFS);
1488 	if (!pages)
1489 		return -ENOMEM;
1490 
1491 	while (count) {
1492 		pgoff_t index;
1493 		size_t copied, bytes;
1494 		int off;
1495 
1496 		off = pos & (cb_size - 1);
1497 		bytes = cb_size - off;
1498 		if (bytes > count)
1499 			bytes = count;
1500 
1501 		cb_off = pos & ~(cb_size - 1);
1502 		index = cb_off >> PAGE_SHIFT;
1503 
1504 		if (unlikely(fault_in_iov_iter_readable(from, bytes))) {
1505 			err = -EFAULT;
1506 			goto out;
1507 		}
1508 
1509 		for (i = 0; i < pages_per_cb; i++) {
1510 			folio = read_mapping_folio(mapping, index + i, NULL);
1511 			if (IS_ERR(folio)) {
1512 				for (ip = 0; ip < i; ip++) {
1513 					folio_unlock(page_folio(pages[ip]));
1514 					folio_put(page_folio(pages[ip]));
1515 				}
1516 				err = PTR_ERR(folio);
1517 				goto out;
1518 			}
1519 
1520 			folio_lock(folio);
1521 			pages[i] = folio_page(folio, 0);
1522 		}
1523 
1524 		WARN_ON(!bytes);
1525 		copied = 0;
1526 		ip = off >> PAGE_SHIFT;
1527 		off = offset_in_page(pos);
1528 
1529 		for (;;) {
1530 			size_t cp, tail = PAGE_SIZE - off;
1531 
1532 			page = pages[ip];
1533 			cp = copy_folio_from_iter_atomic(page_folio(page), off,
1534 					min(tail, bytes), from);
1535 			flush_dcache_page(page);
1536 
1537 			copied += cp;
1538 			bytes -= cp;
1539 			if (!bytes || !cp)
1540 				break;
1541 
1542 			if (cp < tail) {
1543 				off += cp;
1544 			} else {
1545 				ip++;
1546 				off = 0;
1547 			}
1548 		}
1549 
1550 		err = ntfs_write_cb(ni, pos, pages, pages_per_cb);
1551 
1552 		for (i = 0; i < pages_per_cb; i++) {
1553 			folio = page_folio(pages[i]);
1554 			if (i < ip) {
1555 				folio_clear_dirty(folio);
1556 				folio_mark_uptodate(folio);
1557 			}
1558 			folio_unlock(folio);
1559 			folio_put(folio);
1560 		}
1561 
1562 		if (err)
1563 			goto out;
1564 
1565 		cond_resched();
1566 		pos += copied;
1567 		written += copied;
1568 		count = iov_iter_count(from);
1569 	}
1570 
1571 out:
1572 	kfree(pages);
1573 	if (err < 0)
1574 		written = err;
1575 
1576 	return written;
1577 }
1578