xref: /linux/fs/btrfs/zlib.c (revision 7ae37b2c94ed30bfefece2b68c727a4474206718)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2008 Oracle.  All rights reserved.
4  *
5  * Based on jffs2 zlib code:
6  * Copyright © 2001-2007 Red Hat, Inc.
7  * Created by David Woodhouse <dwmw2@infradead.org>
8  */
9 
10 #include <linux/kernel.h>
11 #include <linux/slab.h>
12 #include <linux/zlib.h>
13 #include <linux/zutil.h>
14 #include <linux/mm.h>
15 #include <linux/init.h>
16 #include <linux/err.h>
17 #include <linux/sched.h>
18 #include <linux/pagemap.h>
19 #include <linux/bio.h>
20 #include <linux/refcount.h>
21 #include "btrfs_inode.h"
22 #include "compression.h"
23 #include "fs.h"
24 #include "subpage.h"
25 
26 /* workspace buffer size for s390 zlib hardware support */
27 #define ZLIB_DFLTCC_BUF_SIZE    (4 * PAGE_SIZE)
28 
29 struct workspace {
30 	z_stream strm;
31 	char *buf;
32 	unsigned int buf_size;
33 	struct list_head list;
34 	int level;
35 };
36 
37 struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
38 {
39 	struct list_head *ws = btrfs_get_workspace(fs_info, BTRFS_COMPRESS_ZLIB, level);
40 	struct workspace *workspace = list_entry(ws, struct workspace, list);
41 
42 	workspace->level = level;
43 
44 	return ws;
45 }
46 
47 void zlib_free_workspace(struct list_head *ws)
48 {
49 	struct workspace *workspace = list_entry(ws, struct workspace, list);
50 
51 	kvfree(workspace->strm.workspace);
52 	kfree(workspace->buf);
53 	kfree(workspace);
54 }
55 
56 /*
57  * For s390 hardware acceleration, the buffer size should be at least
58  * ZLIB_DFLTCC_BUF_SIZE to achieve the best performance.
59  *
60  * But if bs > ps we can have large enough folios that meet the s390 hardware
61  * handling.
62  */
63 static bool need_special_buffer(struct btrfs_fs_info *fs_info)
64 {
65 	if (!zlib_deflate_dfltcc_enabled())
66 		return false;
67 	if (btrfs_min_folio_size(fs_info) >= ZLIB_DFLTCC_BUF_SIZE)
68 		return false;
69 	return true;
70 }
71 
72 struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
73 {
74 	struct workspace *workspace;
75 	int workspacesize;
76 
77 	workspace = kzalloc_obj(*workspace);
78 	if (!workspace)
79 		return ERR_PTR(-ENOMEM);
80 
81 	workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
82 			zlib_inflate_workspacesize());
83 	workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN);
84 	workspace->level = level;
85 	workspace->buf = NULL;
86 	if (need_special_buffer(fs_info)) {
87 		workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE,
88 					 __GFP_NOMEMALLOC | __GFP_NORETRY |
89 					 __GFP_NOWARN | GFP_NOIO);
90 		workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE;
91 	}
92 	if (!workspace->buf) {
93 		workspace->buf = kmalloc(fs_info->sectorsize, GFP_KERNEL);
94 		workspace->buf_size = fs_info->sectorsize;
95 	}
96 	if (!workspace->strm.workspace || !workspace->buf)
97 		goto fail;
98 
99 	INIT_LIST_HEAD(&workspace->list);
100 
101 	return &workspace->list;
102 fail:
103 	zlib_free_workspace(&workspace->list);
104 	return ERR_PTR(-ENOMEM);
105 }
106 
107 /*
108  * Helper for S390x with hardware zlib compression support.
109  *
110  * That hardware acceleration requires a buffer size larger than a single page
111  * to get ideal performance, thus we need to do the memory copy rather than
112  * use the page cache directly as input buffer.
113  */
114 static int copy_data_into_buffer(struct address_space *mapping,
115 				 struct workspace *workspace, u64 filepos,
116 				 unsigned long length)
117 {
118 	u64 cur = filepos;
119 
120 	/* It's only for hardware accelerated zlib code. */
121 	ASSERT(zlib_deflate_dfltcc_enabled());
122 
123 	while (cur < filepos + length) {
124 		struct folio *folio;
125 		void *data_in;
126 		unsigned int offset;
127 		unsigned long copy_length;
128 		int ret;
129 
130 		ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio);
131 		if (ret < 0)
132 			return ret;
133 
134 		offset = offset_in_folio(folio, cur);
135 		copy_length = min(folio_size(folio) - offset,
136 				  filepos + length - cur);
137 
138 		data_in = kmap_local_folio(folio, offset);
139 		memcpy(workspace->buf + cur - filepos, data_in, copy_length);
140 		kunmap_local(data_in);
141 		folio_put(folio);
142 		cur += copy_length;
143 	}
144 	return 0;
145 }
146 
147 int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb)
148 {
149 	struct btrfs_inode *inode = cb->bbio.inode;
150 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
151 	struct workspace *workspace = list_entry(ws, struct workspace, list);
152 	struct address_space *mapping = inode->vfs_inode.i_mapping;
153 	struct bio *bio = &cb->bbio.bio;
154 	u64 start = cb->start;
155 	u32 len = cb->len;
156 	const u32 min_folio_size = btrfs_min_folio_size(fs_info);
157 	int ret;
158 	char *data_in = NULL;
159 	struct folio *in_folio = NULL;
160 	struct folio *out_folio = NULL;
161 	const u64 orig_end = start + len;
162 
163 	ret = zlib_deflateInit(&workspace->strm, workspace->level);
164 	if (unlikely(ret != Z_OK)) {
165 		btrfs_err(fs_info,
166 	"zlib compression init failed, error %d root %llu inode %llu offset %llu",
167 			  ret, btrfs_root_id(inode->root), btrfs_ino(inode), start);
168 		ret = -EIO;
169 		goto out;
170 	}
171 
172 	workspace->strm.total_in = 0;
173 	workspace->strm.total_out = 0;
174 
175 	out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS);
176 	if (out_folio == NULL) {
177 		ret = -ENOMEM;
178 		goto out;
179 	}
180 
181 	workspace->strm.next_in = workspace->buf;
182 	workspace->strm.avail_in = 0;
183 	workspace->strm.next_out = folio_address(out_folio);
184 	workspace->strm.avail_out = min_folio_size;
185 
186 	while (workspace->strm.total_in < len) {
187 		/*
188 		 * Get next input pages and copy the contents to the workspace
189 		 * buffer if required.
190 		 */
191 		if (workspace->strm.avail_in == 0) {
192 			unsigned long bytes_left = len - workspace->strm.total_in;
193 			unsigned int copy_length = min(bytes_left, workspace->buf_size);
194 
195 			/*
196 			 * For s390 hardware accelerated zlib, and our folio is smaller
197 			 * than the copy_length, we need to fill the buffer so that
198 			 * we can take full advantage of hardware acceleration.
199 			 */
200 			if (need_special_buffer(fs_info)) {
201 				ret = copy_data_into_buffer(mapping, workspace,
202 							    start, copy_length);
203 				if (ret < 0)
204 					goto out;
205 				start += copy_length;
206 				workspace->strm.next_in = workspace->buf;
207 				workspace->strm.avail_in = copy_length;
208 			} else {
209 				unsigned int cur_len;
210 
211 				if (data_in) {
212 					kunmap_local(data_in);
213 					folio_put(in_folio);
214 					data_in = NULL;
215 				}
216 				ret = btrfs_compress_filemap_get_folio(mapping,
217 						start, &in_folio);
218 				if (ret < 0)
219 					goto out;
220 				cur_len = btrfs_calc_input_length(in_folio, orig_end, start);
221 				data_in = kmap_local_folio(in_folio,
222 							   offset_in_folio(in_folio, start));
223 				start += cur_len;
224 				workspace->strm.next_in = data_in;
225 				workspace->strm.avail_in = cur_len;
226 			}
227 		}
228 
229 		ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
230 		if (unlikely(ret != Z_OK)) {
231 			btrfs_warn(fs_info,
232 		"zlib compression failed, error %d root %llu inode %llu offset %llu",
233 				   ret, btrfs_root_id(inode->root), btrfs_ino(inode),
234 				   start);
235 			zlib_deflateEnd(&workspace->strm);
236 			ret = -EIO;
237 			goto out;
238 		}
239 
240 		/* We're making it bigger, give up. */
241 		if (workspace->strm.total_in > fs_info->sectorsize * 2 &&
242 		    workspace->strm.total_in < workspace->strm.total_out) {
243 			ret = -E2BIG;
244 			goto out;
245 		}
246 		if (workspace->strm.total_out >= len) {
247 			ret = -E2BIG;
248 			goto out;
249 		}
250 		/* Queue the full folio and allocate a new one. */
251 		if (workspace->strm.avail_out == 0) {
252 			if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) {
253 				ret = -E2BIG;
254 				goto out;
255 			}
256 
257 			out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS);
258 			if (out_folio == NULL) {
259 				ret = -ENOMEM;
260 				goto out;
261 			}
262 			workspace->strm.avail_out = min_folio_size;
263 			workspace->strm.next_out = folio_address(out_folio);
264 		}
265 		/* We're all done. */
266 		if (workspace->strm.total_in >= len)
267 			break;
268 	}
269 
270 	workspace->strm.avail_in = 0;
271 
272 	/*
273 	 * Call deflate with Z_FINISH flush parameter providing more output
274 	 * space but no more input data, until it returns with Z_STREAM_END.
275 	 */
276 	while (ret != Z_STREAM_END) {
277 		ret = zlib_deflate(&workspace->strm, Z_FINISH);
278 		if (ret == Z_STREAM_END)
279 			break;
280 		if (unlikely(ret != Z_OK && ret != Z_BUF_ERROR)) {
281 			zlib_deflateEnd(&workspace->strm);
282 			ret = -EIO;
283 			goto out;
284 		} else if (workspace->strm.avail_out == 0) {
285 			if (workspace->strm.total_out >= len) {
286 				ret = -E2BIG;
287 				goto out;
288 			}
289 			if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) {
290 				ret = -E2BIG;
291 				goto out;
292 			}
293 			/* Get another folio for the stream end. */
294 			out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS);
295 			if (out_folio == NULL) {
296 				ret = -ENOMEM;
297 				goto out;
298 			}
299 			workspace->strm.avail_out = min_folio_size;
300 			workspace->strm.next_out = folio_address(out_folio);
301 		}
302 	}
303 	/* Queue the remaining part of the folio. */
304 	if (workspace->strm.total_out > bio->bi_iter.bi_size) {
305 		const u32 cur_len = workspace->strm.total_out - bio->bi_iter.bi_size;
306 
307 		ASSERT(cur_len <= folio_size(out_folio));
308 
309 		if (!bio_add_folio(bio, out_folio, cur_len, 0)) {
310 			ret = -E2BIG;
311 			goto out;
312 		}
313 	} else {
314 		/* The last folio hasn't' been utilized. */
315 		btrfs_free_compr_folio(out_folio);
316 	}
317 	out_folio = NULL;
318 	ASSERT(bio->bi_iter.bi_size == workspace->strm.total_out);
319 	zlib_deflateEnd(&workspace->strm);
320 
321 	if (workspace->strm.total_out >= workspace->strm.total_in) {
322 		ret = -E2BIG;
323 		goto out;
324 	}
325 
326 	ret = 0;
327 out:
328 	if (out_folio)
329 		btrfs_free_compr_folio(out_folio);
330 	if (data_in) {
331 		kunmap_local(data_in);
332 		folio_put(in_folio);
333 	}
334 
335 	return ret;
336 }
337 
338 int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
339 {
340 	struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
341 	struct workspace *workspace = list_entry(ws, struct workspace, list);
342 	struct folio_iter fi;
343 	const u32 min_folio_size = btrfs_min_folio_size(fs_info);
344 	int ret = 0, ret2;
345 	int wbits = MAX_WBITS;
346 	char *data_in;
347 	size_t total_out = 0;
348 	const size_t srclen = bio_get_size(&cb->bbio.bio);
349 	unsigned long buf_start;
350 
351 	bio_first_folio(&fi, &cb->bbio.bio, 0);
352 
353 	/* We must have at least one folio here, that has the correct size. */
354 	if (unlikely(!fi.folio))
355 		return -EINVAL;
356 	ASSERT(folio_size(fi.folio) == min_folio_size);
357 
358 	data_in = kmap_local_folio(fi.folio, 0);
359 	workspace->strm.next_in = data_in;
360 	workspace->strm.avail_in = min_t(size_t, srclen, min_folio_size);
361 	workspace->strm.total_in = 0;
362 
363 	workspace->strm.total_out = 0;
364 	workspace->strm.next_out = workspace->buf;
365 	workspace->strm.avail_out = workspace->buf_size;
366 
367 	/* If it's deflate, and it's got no preset dictionary, then
368 	   we can tell zlib to skip the adler32 check. */
369 	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
370 	    ((data_in[0] & 0x0f) == Z_DEFLATED) &&
371 	    !(((data_in[0]<<8) + data_in[1]) % 31)) {
372 
373 		wbits = -((data_in[0] >> 4) + 8);
374 		workspace->strm.next_in += 2;
375 		workspace->strm.avail_in -= 2;
376 	}
377 
378 	ret = zlib_inflateInit2(&workspace->strm, wbits);
379 	if (unlikely(ret != Z_OK)) {
380 		struct btrfs_inode *inode = cb->bbio.inode;
381 
382 		kunmap_local(data_in);
383 		btrfs_err(inode->root->fs_info,
384 	"zlib decompression init failed, error %d root %llu inode %llu offset %llu",
385 			  ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
386 		return -EIO;
387 	}
388 	while (workspace->strm.total_in < srclen) {
389 		ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
390 		if (ret != Z_OK && ret != Z_STREAM_END)
391 			break;
392 
393 		buf_start = total_out;
394 		total_out = workspace->strm.total_out;
395 
396 		/* we didn't make progress in this inflate call, we're done */
397 		if (buf_start == total_out)
398 			break;
399 
400 		ret2 = btrfs_decompress_buf2page(workspace->buf,
401 				total_out - buf_start, cb, buf_start);
402 		if (ret2 == 0) {
403 			ret = 0;
404 			goto done;
405 		}
406 
407 		workspace->strm.next_out = workspace->buf;
408 		workspace->strm.avail_out = workspace->buf_size;
409 
410 		if (workspace->strm.avail_in == 0) {
411 			unsigned long tmp;
412 			kunmap_local(data_in);
413 			bio_next_folio(&fi, &cb->bbio.bio);
414 			if (!fi.folio) {
415 				data_in = NULL;
416 				break;
417 			}
418 			ASSERT(folio_size(fi.folio) == min_folio_size);
419 			data_in = kmap_local_folio(fi.folio, 0);
420 			workspace->strm.next_in = data_in;
421 			tmp = srclen - workspace->strm.total_in;
422 			workspace->strm.avail_in = min(tmp, min_folio_size);
423 		}
424 	}
425 	if (unlikely(ret != Z_STREAM_END)) {
426 		btrfs_err(cb->bbio.inode->root->fs_info,
427 		"zlib decompression failed, error %d root %llu inode %llu offset %llu",
428 			  ret, btrfs_root_id(cb->bbio.inode->root),
429 			  btrfs_ino(cb->bbio.inode), cb->start);
430 		ret = -EIO;
431 	} else {
432 		ret = 0;
433 	}
434 done:
435 	zlib_inflateEnd(&workspace->strm);
436 	if (data_in)
437 		kunmap_local(data_in);
438 	return ret;
439 }
440 
441 int zlib_decompress(struct list_head *ws, const u8 *data_in,
442 		struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
443 		size_t destlen)
444 {
445 	struct workspace *workspace = list_entry(ws, struct workspace, list);
446 	int ret = 0;
447 	int wbits = MAX_WBITS;
448 	unsigned long to_copy;
449 
450 	workspace->strm.next_in = data_in;
451 	workspace->strm.avail_in = srclen;
452 	workspace->strm.total_in = 0;
453 
454 	workspace->strm.next_out = workspace->buf;
455 	workspace->strm.avail_out = workspace->buf_size;
456 	workspace->strm.total_out = 0;
457 	/* If it's deflate, and it's got no preset dictionary, then
458 	   we can tell zlib to skip the adler32 check. */
459 	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
460 	    ((data_in[0] & 0x0f) == Z_DEFLATED) &&
461 	    !(((data_in[0]<<8) + data_in[1]) % 31)) {
462 
463 		wbits = -((data_in[0] >> 4) + 8);
464 		workspace->strm.next_in += 2;
465 		workspace->strm.avail_in -= 2;
466 	}
467 
468 	ret = zlib_inflateInit2(&workspace->strm, wbits);
469 	if (unlikely(ret != Z_OK)) {
470 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
471 
472 		btrfs_err(inode->root->fs_info,
473 		"zlib decompression init failed, error %d root %llu inode %llu offset %llu",
474 			  ret, btrfs_root_id(inode->root), btrfs_ino(inode),
475 			  folio_pos(dest_folio));
476 		return -EIO;
477 	}
478 
479 	/*
480 	 * Everything (in/out buf) should be at most one sector, there should
481 	 * be no need to switch any input/output buffer.
482 	 */
483 	ret = zlib_inflate(&workspace->strm, Z_FINISH);
484 	to_copy = min(workspace->strm.total_out, destlen);
485 	if (ret != Z_STREAM_END)
486 		goto out;
487 
488 	memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, to_copy);
489 
490 out:
491 	if (unlikely(to_copy != destlen)) {
492 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
493 
494 		btrfs_err(inode->root->fs_info,
495 "zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu",
496 			  ret, btrfs_root_id(inode->root), btrfs_ino(inode),
497 			  folio_pos(dest_folio), to_copy, destlen);
498 		ret = -EIO;
499 	} else {
500 		ret = 0;
501 	}
502 
503 	zlib_inflateEnd(&workspace->strm);
504 
505 	if (unlikely(to_copy < destlen))
506 		folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
507 	return ret;
508 }
509 
510 const struct btrfs_compress_levels btrfs_zlib_compress = {
511 	.min_level		= 1,
512 	.max_level		= 9,
513 	.default_level		= BTRFS_ZLIB_DEFAULT_LEVEL,
514 };
515