xref: /linux/fs/btrfs/zlib.c (revision 26902be0cd0997b34ef13593e35ef3501a3c70b5)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2008 Oracle.  All rights reserved.
4  *
5  * Based on jffs2 zlib code:
6  * Copyright © 2001-2007 Red Hat, Inc.
7  * Created by David Woodhouse <dwmw2@infradead.org>
8  */
9 
10 #include <linux/kernel.h>
11 #include <linux/slab.h>
12 #include <linux/zlib.h>
13 #include <linux/zutil.h>
14 #include <linux/mm.h>
15 #include <linux/init.h>
16 #include <linux/err.h>
17 #include <linux/sched.h>
18 #include <linux/pagemap.h>
19 #include <linux/bio.h>
20 #include <linux/refcount.h>
21 #include "btrfs_inode.h"
22 #include "compression.h"
23 #include "fs.h"
24 #include "subpage.h"
25 
26 /* workspace buffer size for s390 zlib hardware support */
27 #define ZLIB_DFLTCC_BUF_SIZE    (4 * PAGE_SIZE)
28 
29 struct workspace {
30 	z_stream strm;
31 	char *buf;
32 	unsigned int buf_size;
33 	struct list_head list;
34 	int level;
35 };
36 
37 struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
38 {
39 	struct list_head *ws = btrfs_get_workspace(fs_info, BTRFS_COMPRESS_ZLIB, level);
40 	struct workspace *workspace = list_entry(ws, struct workspace, list);
41 
42 	workspace->level = level;
43 
44 	return ws;
45 }
46 
47 void zlib_free_workspace(struct list_head *ws)
48 {
49 	struct workspace *workspace = list_entry(ws, struct workspace, list);
50 
51 	kvfree(workspace->strm.workspace);
52 	kfree(workspace->buf);
53 	kfree(workspace);
54 }
55 
56 /*
57  * For s390 hardware acceleration, the buffer size should be at least
58  * ZLIB_DFLTCC_BUF_SIZE to achieve the best performance.
59  *
60  * But if bs > ps we can have large enough folios that meet the s390 hardware
61  * handling.
62  */
63 static bool need_special_buffer(struct btrfs_fs_info *fs_info)
64 {
65 	if (!zlib_deflate_dfltcc_enabled())
66 		return false;
67 	if (btrfs_min_folio_size(fs_info) >= ZLIB_DFLTCC_BUF_SIZE)
68 		return false;
69 	return true;
70 }
71 
72 struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
73 {
74 	const u32 blocksize = fs_info->sectorsize;
75 	struct workspace *workspace;
76 	int workspacesize;
77 
78 	workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
79 	if (!workspace)
80 		return ERR_PTR(-ENOMEM);
81 
82 	workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
83 			zlib_inflate_workspacesize());
84 	workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN);
85 	workspace->level = level;
86 	workspace->buf = NULL;
87 	if (need_special_buffer(fs_info)) {
88 		workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE,
89 					 __GFP_NOMEMALLOC | __GFP_NORETRY |
90 					 __GFP_NOWARN | GFP_NOIO);
91 		workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE;
92 	}
93 	if (!workspace->buf) {
94 		workspace->buf = kmalloc(blocksize, GFP_KERNEL);
95 		workspace->buf_size = blocksize;
96 	}
97 	if (!workspace->strm.workspace || !workspace->buf)
98 		goto fail;
99 
100 	INIT_LIST_HEAD(&workspace->list);
101 
102 	return &workspace->list;
103 fail:
104 	zlib_free_workspace(&workspace->list);
105 	return ERR_PTR(-ENOMEM);
106 }
107 
108 /*
109  * Helper for S390x with hardware zlib compression support.
110  *
111  * That hardware acceleration requires a buffer size larger than a single page
112  * to get ideal performance, thus we need to do the memory copy rather than
113  * use the page cache directly as input buffer.
114  */
115 static int copy_data_into_buffer(struct address_space *mapping,
116 				 struct workspace *workspace, u64 filepos,
117 				 unsigned long length)
118 {
119 	u64 cur = filepos;
120 
121 	/* It's only for hardware accelerated zlib code. */
122 	ASSERT(zlib_deflate_dfltcc_enabled());
123 
124 	while (cur < filepos + length) {
125 		struct folio *folio;
126 		void *data_in;
127 		unsigned int offset;
128 		unsigned long copy_length;
129 		int ret;
130 
131 		ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio);
132 		if (ret < 0)
133 			return ret;
134 
135 		offset = offset_in_folio(folio, cur);
136 		copy_length = min(folio_size(folio) - offset,
137 				  filepos + length - cur);
138 
139 		data_in = kmap_local_folio(folio, offset);
140 		memcpy(workspace->buf + cur - filepos, data_in, copy_length);
141 		kunmap_local(data_in);
142 		folio_put(folio);
143 		cur += copy_length;
144 	}
145 	return 0;
146 }
147 
148 int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb)
149 {
150 	struct btrfs_inode *inode = cb->bbio.inode;
151 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
152 	struct workspace *workspace = list_entry(ws, struct workspace, list);
153 	struct address_space *mapping = inode->vfs_inode.i_mapping;
154 	struct bio *bio = &cb->bbio.bio;
155 	u64 start = cb->start;
156 	u32 len = cb->len;
157 	const u32 min_folio_size = btrfs_min_folio_size(fs_info);
158 	int ret;
159 	char *data_in = NULL;
160 	char *cfolio_out;
161 	struct folio *in_folio = NULL;
162 	struct folio *out_folio = NULL;
163 	const u32 blocksize = fs_info->sectorsize;
164 	const u64 orig_end = start + len;
165 
166 	ret = zlib_deflateInit(&workspace->strm, workspace->level);
167 	if (unlikely(ret != Z_OK)) {
168 		btrfs_err(fs_info,
169 	"zlib compression init failed, error %d root %llu inode %llu offset %llu",
170 			  ret, btrfs_root_id(inode->root), btrfs_ino(inode), start);
171 		ret = -EIO;
172 		goto out;
173 	}
174 
175 	workspace->strm.total_in = 0;
176 	workspace->strm.total_out = 0;
177 
178 	out_folio = btrfs_alloc_compr_folio(fs_info);
179 	if (out_folio == NULL) {
180 		ret = -ENOMEM;
181 		goto out;
182 	}
183 	cfolio_out = folio_address(out_folio);
184 
185 	workspace->strm.next_in = workspace->buf;
186 	workspace->strm.avail_in = 0;
187 	workspace->strm.next_out = cfolio_out;
188 	workspace->strm.avail_out = min_folio_size;
189 
190 	while (workspace->strm.total_in < len) {
191 		/*
192 		 * Get next input pages and copy the contents to the workspace
193 		 * buffer if required.
194 		 */
195 		if (workspace->strm.avail_in == 0) {
196 			unsigned long bytes_left = len - workspace->strm.total_in;
197 			unsigned int copy_length = min(bytes_left, workspace->buf_size);
198 
199 			/*
200 			 * For s390 hardware accelerated zlib, and our folio is smaller
201 			 * than the copy_length, we need to fill the buffer so that
202 			 * we can take full advantage of hardware acceleration.
203 			 */
204 			if (need_special_buffer(fs_info)) {
205 				ret = copy_data_into_buffer(mapping, workspace,
206 							    start, copy_length);
207 				if (ret < 0)
208 					goto out;
209 				start += copy_length;
210 				workspace->strm.next_in = workspace->buf;
211 				workspace->strm.avail_in = copy_length;
212 			} else {
213 				unsigned int cur_len;
214 
215 				if (data_in) {
216 					kunmap_local(data_in);
217 					folio_put(in_folio);
218 					data_in = NULL;
219 				}
220 				ret = btrfs_compress_filemap_get_folio(mapping,
221 						start, &in_folio);
222 				if (ret < 0)
223 					goto out;
224 				cur_len = btrfs_calc_input_length(in_folio, orig_end, start);
225 				data_in = kmap_local_folio(in_folio,
226 							   offset_in_folio(in_folio, start));
227 				start += cur_len;
228 				workspace->strm.next_in = data_in;
229 				workspace->strm.avail_in = cur_len;
230 			}
231 		}
232 
233 		ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
234 		if (unlikely(ret != Z_OK)) {
235 			btrfs_warn(fs_info,
236 		"zlib compression failed, error %d root %llu inode %llu offset %llu",
237 				   ret, btrfs_root_id(inode->root), btrfs_ino(inode),
238 				   start);
239 			zlib_deflateEnd(&workspace->strm);
240 			ret = -EIO;
241 			goto out;
242 		}
243 
244 		/* We're making it bigger, give up. */
245 		if (workspace->strm.total_in > blocksize * 2 &&
246 		    workspace->strm.total_in < workspace->strm.total_out) {
247 			ret = -E2BIG;
248 			goto out;
249 		}
250 		if (workspace->strm.total_out >= len) {
251 			ret = -E2BIG;
252 			goto out;
253 		}
254 		/* Queue the full folio and allocate a new one. */
255 		if (workspace->strm.avail_out == 0) {
256 			if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) {
257 				ret = -E2BIG;
258 				goto out;
259 			}
260 
261 			out_folio = btrfs_alloc_compr_folio(fs_info);
262 			if (out_folio == NULL) {
263 				ret = -ENOMEM;
264 				goto out;
265 			}
266 			cfolio_out = folio_address(out_folio);
267 			workspace->strm.avail_out = min_folio_size;
268 			workspace->strm.next_out = cfolio_out;
269 		}
270 		/* We're all done. */
271 		if (workspace->strm.total_in >= len)
272 			break;
273 	}
274 
275 	workspace->strm.avail_in = 0;
276 
277 	/*
278 	 * Call deflate with Z_FINISH flush parameter providing more output
279 	 * space but no more input data, until it returns with Z_STREAM_END.
280 	 */
281 	while (ret != Z_STREAM_END) {
282 		ret = zlib_deflate(&workspace->strm, Z_FINISH);
283 		if (ret == Z_STREAM_END)
284 			break;
285 		if (unlikely(ret != Z_OK && ret != Z_BUF_ERROR)) {
286 			zlib_deflateEnd(&workspace->strm);
287 			ret = -EIO;
288 			goto out;
289 		} else if (workspace->strm.avail_out == 0) {
290 			if (workspace->strm.total_out >= len) {
291 				ret = -E2BIG;
292 				goto out;
293 			}
294 			if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) {
295 				ret = -E2BIG;
296 				goto out;
297 			}
298 			/* Get another folio for the stream end. */
299 			out_folio = btrfs_alloc_compr_folio(fs_info);
300 			if (out_folio == NULL) {
301 				ret = -ENOMEM;
302 				goto out;
303 			}
304 			cfolio_out = folio_address(out_folio);
305 			workspace->strm.avail_out = min_folio_size;
306 			workspace->strm.next_out = cfolio_out;
307 		}
308 	}
309 	/* Queue the remaining part of the folio. */
310 	if (workspace->strm.total_out > bio->bi_iter.bi_size) {
311 		u32 cur_len = offset_in_folio(out_folio, workspace->strm.total_out);
312 
313 		if (!bio_add_folio(bio, out_folio, cur_len, 0)) {
314 			ret = -E2BIG;
315 			goto out;
316 		}
317 	} else {
318 		/* The last folio hasn't' been utilized. */
319 		btrfs_free_compr_folio(out_folio);
320 	}
321 	out_folio = NULL;
322 	ASSERT(bio->bi_iter.bi_size == workspace->strm.total_out);
323 	zlib_deflateEnd(&workspace->strm);
324 
325 	if (workspace->strm.total_out >= workspace->strm.total_in) {
326 		ret = -E2BIG;
327 		goto out;
328 	}
329 
330 	ret = 0;
331 out:
332 	if (out_folio)
333 		btrfs_free_compr_folio(out_folio);
334 	if (data_in) {
335 		kunmap_local(data_in);
336 		folio_put(in_folio);
337 	}
338 
339 	return ret;
340 }
341 
342 int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
343 {
344 	struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
345 	struct workspace *workspace = list_entry(ws, struct workspace, list);
346 	struct folio_iter fi;
347 	const u32 min_folio_size = btrfs_min_folio_size(fs_info);
348 	int ret = 0, ret2;
349 	int wbits = MAX_WBITS;
350 	char *data_in;
351 	size_t total_out = 0;
352 	size_t srclen = cb->compressed_len;
353 	unsigned long buf_start;
354 
355 	bio_first_folio(&fi, &cb->bbio.bio, 0);
356 
357 	/* We must have at least one folio here, that has the correct size. */
358 	if (unlikely(!fi.folio))
359 		return -EINVAL;
360 	ASSERT(folio_size(fi.folio) == min_folio_size);
361 
362 	data_in = kmap_local_folio(fi.folio, 0);
363 	workspace->strm.next_in = data_in;
364 	workspace->strm.avail_in = min_t(size_t, srclen, min_folio_size);
365 	workspace->strm.total_in = 0;
366 
367 	workspace->strm.total_out = 0;
368 	workspace->strm.next_out = workspace->buf;
369 	workspace->strm.avail_out = workspace->buf_size;
370 
371 	/* If it's deflate, and it's got no preset dictionary, then
372 	   we can tell zlib to skip the adler32 check. */
373 	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
374 	    ((data_in[0] & 0x0f) == Z_DEFLATED) &&
375 	    !(((data_in[0]<<8) + data_in[1]) % 31)) {
376 
377 		wbits = -((data_in[0] >> 4) + 8);
378 		workspace->strm.next_in += 2;
379 		workspace->strm.avail_in -= 2;
380 	}
381 
382 	ret = zlib_inflateInit2(&workspace->strm, wbits);
383 	if (unlikely(ret != Z_OK)) {
384 		struct btrfs_inode *inode = cb->bbio.inode;
385 
386 		kunmap_local(data_in);
387 		btrfs_err(inode->root->fs_info,
388 	"zlib decompression init failed, error %d root %llu inode %llu offset %llu",
389 			  ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
390 		return -EIO;
391 	}
392 	while (workspace->strm.total_in < srclen) {
393 		ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
394 		if (ret != Z_OK && ret != Z_STREAM_END)
395 			break;
396 
397 		buf_start = total_out;
398 		total_out = workspace->strm.total_out;
399 
400 		/* we didn't make progress in this inflate call, we're done */
401 		if (buf_start == total_out)
402 			break;
403 
404 		ret2 = btrfs_decompress_buf2page(workspace->buf,
405 				total_out - buf_start, cb, buf_start);
406 		if (ret2 == 0) {
407 			ret = 0;
408 			goto done;
409 		}
410 
411 		workspace->strm.next_out = workspace->buf;
412 		workspace->strm.avail_out = workspace->buf_size;
413 
414 		if (workspace->strm.avail_in == 0) {
415 			unsigned long tmp;
416 			kunmap_local(data_in);
417 			bio_next_folio(&fi, &cb->bbio.bio);
418 			if (!fi.folio) {
419 				data_in = NULL;
420 				break;
421 			}
422 			ASSERT(folio_size(fi.folio) == min_folio_size);
423 			data_in = kmap_local_folio(fi.folio, 0);
424 			workspace->strm.next_in = data_in;
425 			tmp = srclen - workspace->strm.total_in;
426 			workspace->strm.avail_in = min(tmp, min_folio_size);
427 		}
428 	}
429 	if (unlikely(ret != Z_STREAM_END)) {
430 		btrfs_err(cb->bbio.inode->root->fs_info,
431 		"zlib decompression failed, error %d root %llu inode %llu offset %llu",
432 			  ret, btrfs_root_id(cb->bbio.inode->root),
433 			  btrfs_ino(cb->bbio.inode), cb->start);
434 		ret = -EIO;
435 	} else {
436 		ret = 0;
437 	}
438 done:
439 	zlib_inflateEnd(&workspace->strm);
440 	if (data_in)
441 		kunmap_local(data_in);
442 	return ret;
443 }
444 
445 int zlib_decompress(struct list_head *ws, const u8 *data_in,
446 		struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
447 		size_t destlen)
448 {
449 	struct workspace *workspace = list_entry(ws, struct workspace, list);
450 	int ret = 0;
451 	int wbits = MAX_WBITS;
452 	unsigned long to_copy;
453 
454 	workspace->strm.next_in = data_in;
455 	workspace->strm.avail_in = srclen;
456 	workspace->strm.total_in = 0;
457 
458 	workspace->strm.next_out = workspace->buf;
459 	workspace->strm.avail_out = workspace->buf_size;
460 	workspace->strm.total_out = 0;
461 	/* If it's deflate, and it's got no preset dictionary, then
462 	   we can tell zlib to skip the adler32 check. */
463 	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
464 	    ((data_in[0] & 0x0f) == Z_DEFLATED) &&
465 	    !(((data_in[0]<<8) + data_in[1]) % 31)) {
466 
467 		wbits = -((data_in[0] >> 4) + 8);
468 		workspace->strm.next_in += 2;
469 		workspace->strm.avail_in -= 2;
470 	}
471 
472 	ret = zlib_inflateInit2(&workspace->strm, wbits);
473 	if (unlikely(ret != Z_OK)) {
474 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
475 
476 		btrfs_err(inode->root->fs_info,
477 		"zlib decompression init failed, error %d root %llu inode %llu offset %llu",
478 			  ret, btrfs_root_id(inode->root), btrfs_ino(inode),
479 			  folio_pos(dest_folio));
480 		return -EIO;
481 	}
482 
483 	/*
484 	 * Everything (in/out buf) should be at most one sector, there should
485 	 * be no need to switch any input/output buffer.
486 	 */
487 	ret = zlib_inflate(&workspace->strm, Z_FINISH);
488 	to_copy = min(workspace->strm.total_out, destlen);
489 	if (ret != Z_STREAM_END)
490 		goto out;
491 
492 	memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, to_copy);
493 
494 out:
495 	if (unlikely(to_copy != destlen)) {
496 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
497 
498 		btrfs_err(inode->root->fs_info,
499 "zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu",
500 			  ret, btrfs_root_id(inode->root), btrfs_ino(inode),
501 			  folio_pos(dest_folio), to_copy, destlen);
502 		ret = -EIO;
503 	} else {
504 		ret = 0;
505 	}
506 
507 	zlib_inflateEnd(&workspace->strm);
508 
509 	if (unlikely(to_copy < destlen))
510 		folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
511 	return ret;
512 }
513 
514 const struct btrfs_compress_levels btrfs_zlib_compress = {
515 	.min_level		= 1,
516 	.max_level		= 9,
517 	.default_level		= BTRFS_ZLIB_DEFAULT_LEVEL,
518 };
519