xref: /linux/fs/btrfs/zlib.c (revision e829083bc46d3d79b9aade758c350ec12342c9bd)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2008 Oracle.  All rights reserved.
4  *
5  * Based on jffs2 zlib code:
6  * Copyright © 2001-2007 Red Hat, Inc.
7  * Created by David Woodhouse <dwmw2@infradead.org>
8  */
9 
10 #include <linux/kernel.h>
11 #include <linux/slab.h>
12 #include <linux/zlib.h>
13 #include <linux/zutil.h>
14 #include <linux/mm.h>
15 #include <linux/init.h>
16 #include <linux/err.h>
17 #include <linux/sched.h>
18 #include <linux/pagemap.h>
19 #include <linux/bio.h>
20 #include <linux/refcount.h>
21 #include "btrfs_inode.h"
22 #include "compression.h"
23 #include "fs.h"
24 #include "subpage.h"
25 
26 /* workspace buffer size for s390 zlib hardware support */
27 #define ZLIB_DFLTCC_BUF_SIZE    (4 * PAGE_SIZE)
28 
29 struct workspace {
30 	z_stream strm;
31 	char *buf;
32 	unsigned int buf_size;
33 	struct list_head list;
34 	int level;
35 };
36 
zlib_get_workspace(struct btrfs_fs_info * fs_info,unsigned int level)37 struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
38 {
39 	struct list_head *ws = btrfs_get_workspace(fs_info, BTRFS_COMPRESS_ZLIB, level);
40 	struct workspace *workspace = list_entry(ws, struct workspace, list);
41 
42 	workspace->level = level;
43 
44 	return ws;
45 }
46 
zlib_free_workspace(struct list_head * ws)47 void zlib_free_workspace(struct list_head *ws)
48 {
49 	struct workspace *workspace = list_entry(ws, struct workspace, list);
50 
51 	kvfree(workspace->strm.workspace);
52 	kfree(workspace->buf);
53 	kfree(workspace);
54 }
55 
56 /*
57  * For s390 hardware acceleration, the buffer size should be at least
58  * ZLIB_DFLTCC_BUF_SIZE to achieve the best performance.
59  *
60  * But if bs > ps we can have large enough folios that meet the s390 hardware
61  * handling.
62  */
need_special_buffer(struct btrfs_fs_info * fs_info)63 static bool need_special_buffer(struct btrfs_fs_info *fs_info)
64 {
65 	if (!zlib_deflate_dfltcc_enabled())
66 		return false;
67 	if (btrfs_min_folio_size(fs_info) >= ZLIB_DFLTCC_BUF_SIZE)
68 		return false;
69 	return true;
70 }
71 
zlib_alloc_workspace(struct btrfs_fs_info * fs_info,unsigned int level)72 struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
73 {
74 	const u32 blocksize = fs_info->sectorsize;
75 	struct workspace *workspace;
76 	int workspacesize;
77 
78 	workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
79 	if (!workspace)
80 		return ERR_PTR(-ENOMEM);
81 
82 	workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
83 			zlib_inflate_workspacesize());
84 	workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN);
85 	workspace->level = level;
86 	workspace->buf = NULL;
87 	if (need_special_buffer(fs_info)) {
88 		workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE,
89 					 __GFP_NOMEMALLOC | __GFP_NORETRY |
90 					 __GFP_NOWARN | GFP_NOIO);
91 		workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE;
92 	}
93 	if (!workspace->buf) {
94 		workspace->buf = kmalloc(blocksize, GFP_KERNEL);
95 		workspace->buf_size = blocksize;
96 	}
97 	if (!workspace->strm.workspace || !workspace->buf)
98 		goto fail;
99 
100 	INIT_LIST_HEAD(&workspace->list);
101 
102 	return &workspace->list;
103 fail:
104 	zlib_free_workspace(&workspace->list);
105 	return ERR_PTR(-ENOMEM);
106 }
107 
108 /*
109  * Helper for S390x with hardware zlib compression support.
110  *
111  * That hardware acceleration requires a buffer size larger than a single page
112  * to get ideal performance, thus we need to do the memory copy rather than
113  * use the page cache directly as input buffer.
114  */
copy_data_into_buffer(struct address_space * mapping,struct workspace * workspace,u64 filepos,unsigned long length)115 static int copy_data_into_buffer(struct address_space *mapping,
116 				 struct workspace *workspace, u64 filepos,
117 				 unsigned long length)
118 {
119 	u64 cur = filepos;
120 
121 	/* It's only for hardware accelerated zlib code. */
122 	ASSERT(zlib_deflate_dfltcc_enabled());
123 
124 	while (cur < filepos + length) {
125 		struct folio *folio;
126 		void *data_in;
127 		unsigned int offset;
128 		unsigned long copy_length;
129 		int ret;
130 
131 		ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio);
132 		if (ret < 0)
133 			return ret;
134 
135 		offset = offset_in_folio(folio, cur);
136 		copy_length = min(folio_size(folio) - offset,
137 				  filepos + length - cur);
138 
139 		data_in = kmap_local_folio(folio, offset);
140 		memcpy(workspace->buf + cur - filepos, data_in, copy_length);
141 		kunmap_local(data_in);
142 		folio_put(folio);
143 		cur += copy_length;
144 	}
145 	return 0;
146 }
147 
zlib_compress_folios(struct list_head * ws,struct btrfs_inode * inode,u64 start,struct folio ** folios,unsigned long * out_folios,unsigned long * total_in,unsigned long * total_out)148 int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
149 			 u64 start, struct folio **folios, unsigned long *out_folios,
150 			 unsigned long *total_in, unsigned long *total_out)
151 {
152 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
153 	struct workspace *workspace = list_entry(ws, struct workspace, list);
154 	struct address_space *mapping = inode->vfs_inode.i_mapping;
155 	const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
156 	const u32 min_folio_size = btrfs_min_folio_size(fs_info);
157 	int ret;
158 	char *data_in = NULL;
159 	char *cfolio_out;
160 	int nr_folios = 0;
161 	struct folio *in_folio = NULL;
162 	struct folio *out_folio = NULL;
163 	unsigned long len = *total_out;
164 	unsigned long nr_dest_folios = *out_folios;
165 	const unsigned long max_out = nr_dest_folios << min_folio_shift;
166 	const u32 blocksize = fs_info->sectorsize;
167 	const u64 orig_end = start + len;
168 
169 	*out_folios = 0;
170 	*total_out = 0;
171 	*total_in = 0;
172 
173 	ret = zlib_deflateInit(&workspace->strm, workspace->level);
174 	if (unlikely(ret != Z_OK)) {
175 		btrfs_err(fs_info,
176 	"zlib compression init failed, error %d root %llu inode %llu offset %llu",
177 			  ret, btrfs_root_id(inode->root), btrfs_ino(inode), start);
178 		ret = -EIO;
179 		goto out;
180 	}
181 
182 	workspace->strm.total_in = 0;
183 	workspace->strm.total_out = 0;
184 
185 	out_folio = btrfs_alloc_compr_folio(fs_info);
186 	if (out_folio == NULL) {
187 		ret = -ENOMEM;
188 		goto out;
189 	}
190 	cfolio_out = folio_address(out_folio);
191 	folios[0] = out_folio;
192 	nr_folios = 1;
193 
194 	workspace->strm.next_in = workspace->buf;
195 	workspace->strm.avail_in = 0;
196 	workspace->strm.next_out = cfolio_out;
197 	workspace->strm.avail_out = min_folio_size;
198 
199 	while (workspace->strm.total_in < len) {
200 		/*
201 		 * Get next input pages and copy the contents to
202 		 * the workspace buffer if required.
203 		 */
204 		if (workspace->strm.avail_in == 0) {
205 			unsigned long bytes_left = len - workspace->strm.total_in;
206 			unsigned int copy_length = min(bytes_left, workspace->buf_size);
207 
208 			/*
209 			 * For s390 hardware accelerated zlib, and our folio is smaller
210 			 * than the copy_length, we need to fill the buffer so that
211 			 * we can take full advantage of hardware acceleration.
212 			 */
213 			if (need_special_buffer(fs_info)) {
214 				ret = copy_data_into_buffer(mapping, workspace,
215 							    start, copy_length);
216 				if (ret < 0)
217 					goto out;
218 				start += copy_length;
219 				workspace->strm.next_in = workspace->buf;
220 				workspace->strm.avail_in = copy_length;
221 			} else {
222 				unsigned int cur_len;
223 
224 				if (data_in) {
225 					kunmap_local(data_in);
226 					folio_put(in_folio);
227 					data_in = NULL;
228 				}
229 				ret = btrfs_compress_filemap_get_folio(mapping,
230 						start, &in_folio);
231 				if (ret < 0)
232 					goto out;
233 				cur_len = btrfs_calc_input_length(in_folio, orig_end, start);
234 				data_in = kmap_local_folio(in_folio,
235 							   offset_in_folio(in_folio, start));
236 				start += cur_len;
237 				workspace->strm.next_in = data_in;
238 				workspace->strm.avail_in = cur_len;
239 			}
240 		}
241 
242 		ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
243 		if (unlikely(ret != Z_OK)) {
244 			btrfs_warn(fs_info,
245 		"zlib compression failed, error %d root %llu inode %llu offset %llu",
246 				   ret, btrfs_root_id(inode->root), btrfs_ino(inode),
247 				   start);
248 			zlib_deflateEnd(&workspace->strm);
249 			ret = -EIO;
250 			goto out;
251 		}
252 
253 		/* we're making it bigger, give up */
254 		if (workspace->strm.total_in > blocksize * 2 &&
255 		    workspace->strm.total_in <
256 		    workspace->strm.total_out) {
257 			ret = -E2BIG;
258 			goto out;
259 		}
260 		/* we need another page for writing out.  Test this
261 		 * before the total_in so we will pull in a new page for
262 		 * the stream end if required
263 		 */
264 		if (workspace->strm.avail_out == 0) {
265 			if (nr_folios == nr_dest_folios) {
266 				ret = -E2BIG;
267 				goto out;
268 			}
269 			out_folio = btrfs_alloc_compr_folio(fs_info);
270 			if (out_folio == NULL) {
271 				ret = -ENOMEM;
272 				goto out;
273 			}
274 			cfolio_out = folio_address(out_folio);
275 			folios[nr_folios] = out_folio;
276 			nr_folios++;
277 			workspace->strm.avail_out = min_folio_size;
278 			workspace->strm.next_out = cfolio_out;
279 		}
280 		/* we're all done */
281 		if (workspace->strm.total_in >= len)
282 			break;
283 		if (workspace->strm.total_out > max_out)
284 			break;
285 	}
286 	workspace->strm.avail_in = 0;
287 	/*
288 	 * Call deflate with Z_FINISH flush parameter providing more output
289 	 * space but no more input data, until it returns with Z_STREAM_END.
290 	 */
291 	while (ret != Z_STREAM_END) {
292 		ret = zlib_deflate(&workspace->strm, Z_FINISH);
293 		if (ret == Z_STREAM_END)
294 			break;
295 		if (unlikely(ret != Z_OK && ret != Z_BUF_ERROR)) {
296 			zlib_deflateEnd(&workspace->strm);
297 			ret = -EIO;
298 			goto out;
299 		} else if (workspace->strm.avail_out == 0) {
300 			/* Get another folio for the stream end. */
301 			if (nr_folios == nr_dest_folios) {
302 				ret = -E2BIG;
303 				goto out;
304 			}
305 			out_folio = btrfs_alloc_compr_folio(fs_info);
306 			if (out_folio == NULL) {
307 				ret = -ENOMEM;
308 				goto out;
309 			}
310 			cfolio_out = folio_address(out_folio);
311 			folios[nr_folios] = out_folio;
312 			nr_folios++;
313 			workspace->strm.avail_out = min_folio_size;
314 			workspace->strm.next_out = cfolio_out;
315 		}
316 	}
317 	zlib_deflateEnd(&workspace->strm);
318 
319 	if (workspace->strm.total_out >= workspace->strm.total_in) {
320 		ret = -E2BIG;
321 		goto out;
322 	}
323 
324 	ret = 0;
325 	*total_out = workspace->strm.total_out;
326 	*total_in = workspace->strm.total_in;
327 out:
328 	*out_folios = nr_folios;
329 	if (data_in) {
330 		kunmap_local(data_in);
331 		folio_put(in_folio);
332 	}
333 
334 	return ret;
335 }
336 
zlib_decompress_bio(struct list_head * ws,struct compressed_bio * cb)337 int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
338 {
339 	struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
340 	struct workspace *workspace = list_entry(ws, struct workspace, list);
341 	const u32 min_folio_size = btrfs_min_folio_size(fs_info);
342 	int ret = 0, ret2;
343 	int wbits = MAX_WBITS;
344 	char *data_in;
345 	size_t total_out = 0;
346 	unsigned long folio_in_index = 0;
347 	size_t srclen = cb->compressed_len;
348 	unsigned long total_folios_in = DIV_ROUND_UP(srclen, min_folio_size);
349 	unsigned long buf_start;
350 	struct folio **folios_in = cb->compressed_folios;
351 
352 	data_in = kmap_local_folio(folios_in[folio_in_index], 0);
353 	workspace->strm.next_in = data_in;
354 	workspace->strm.avail_in = min_t(size_t, srclen, min_folio_size);
355 	workspace->strm.total_in = 0;
356 
357 	workspace->strm.total_out = 0;
358 	workspace->strm.next_out = workspace->buf;
359 	workspace->strm.avail_out = workspace->buf_size;
360 
361 	/* If it's deflate, and it's got no preset dictionary, then
362 	   we can tell zlib to skip the adler32 check. */
363 	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
364 	    ((data_in[0] & 0x0f) == Z_DEFLATED) &&
365 	    !(((data_in[0]<<8) + data_in[1]) % 31)) {
366 
367 		wbits = -((data_in[0] >> 4) + 8);
368 		workspace->strm.next_in += 2;
369 		workspace->strm.avail_in -= 2;
370 	}
371 
372 	ret = zlib_inflateInit2(&workspace->strm, wbits);
373 	if (unlikely(ret != Z_OK)) {
374 		struct btrfs_inode *inode = cb->bbio.inode;
375 
376 		kunmap_local(data_in);
377 		btrfs_err(inode->root->fs_info,
378 	"zlib decompression init failed, error %d root %llu inode %llu offset %llu",
379 			  ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
380 		return -EIO;
381 	}
382 	while (workspace->strm.total_in < srclen) {
383 		ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
384 		if (ret != Z_OK && ret != Z_STREAM_END)
385 			break;
386 
387 		buf_start = total_out;
388 		total_out = workspace->strm.total_out;
389 
390 		/* we didn't make progress in this inflate call, we're done */
391 		if (buf_start == total_out)
392 			break;
393 
394 		ret2 = btrfs_decompress_buf2page(workspace->buf,
395 				total_out - buf_start, cb, buf_start);
396 		if (ret2 == 0) {
397 			ret = 0;
398 			goto done;
399 		}
400 
401 		workspace->strm.next_out = workspace->buf;
402 		workspace->strm.avail_out = workspace->buf_size;
403 
404 		if (workspace->strm.avail_in == 0) {
405 			unsigned long tmp;
406 			kunmap_local(data_in);
407 			folio_in_index++;
408 			if (folio_in_index >= total_folios_in) {
409 				data_in = NULL;
410 				break;
411 			}
412 			data_in = kmap_local_folio(folios_in[folio_in_index], 0);
413 			workspace->strm.next_in = data_in;
414 			tmp = srclen - workspace->strm.total_in;
415 			workspace->strm.avail_in = min(tmp, min_folio_size);
416 		}
417 	}
418 	if (unlikely(ret != Z_STREAM_END)) {
419 		btrfs_err(cb->bbio.inode->root->fs_info,
420 		"zlib decompression failed, error %d root %llu inode %llu offset %llu",
421 			  ret, btrfs_root_id(cb->bbio.inode->root),
422 			  btrfs_ino(cb->bbio.inode), cb->start);
423 		ret = -EIO;
424 	} else {
425 		ret = 0;
426 	}
427 done:
428 	zlib_inflateEnd(&workspace->strm);
429 	if (data_in)
430 		kunmap_local(data_in);
431 	return ret;
432 }
433 
zlib_decompress(struct list_head * ws,const u8 * data_in,struct folio * dest_folio,unsigned long dest_pgoff,size_t srclen,size_t destlen)434 int zlib_decompress(struct list_head *ws, const u8 *data_in,
435 		struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
436 		size_t destlen)
437 {
438 	struct workspace *workspace = list_entry(ws, struct workspace, list);
439 	int ret = 0;
440 	int wbits = MAX_WBITS;
441 	unsigned long to_copy;
442 
443 	workspace->strm.next_in = data_in;
444 	workspace->strm.avail_in = srclen;
445 	workspace->strm.total_in = 0;
446 
447 	workspace->strm.next_out = workspace->buf;
448 	workspace->strm.avail_out = workspace->buf_size;
449 	workspace->strm.total_out = 0;
450 	/* If it's deflate, and it's got no preset dictionary, then
451 	   we can tell zlib to skip the adler32 check. */
452 	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
453 	    ((data_in[0] & 0x0f) == Z_DEFLATED) &&
454 	    !(((data_in[0]<<8) + data_in[1]) % 31)) {
455 
456 		wbits = -((data_in[0] >> 4) + 8);
457 		workspace->strm.next_in += 2;
458 		workspace->strm.avail_in -= 2;
459 	}
460 
461 	ret = zlib_inflateInit2(&workspace->strm, wbits);
462 	if (unlikely(ret != Z_OK)) {
463 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
464 
465 		btrfs_err(inode->root->fs_info,
466 		"zlib decompression init failed, error %d root %llu inode %llu offset %llu",
467 			  ret, btrfs_root_id(inode->root), btrfs_ino(inode),
468 			  folio_pos(dest_folio));
469 		return -EIO;
470 	}
471 
472 	/*
473 	 * Everything (in/out buf) should be at most one sector, there should
474 	 * be no need to switch any input/output buffer.
475 	 */
476 	ret = zlib_inflate(&workspace->strm, Z_FINISH);
477 	to_copy = min(workspace->strm.total_out, destlen);
478 	if (ret != Z_STREAM_END)
479 		goto out;
480 
481 	memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, to_copy);
482 
483 out:
484 	if (unlikely(to_copy != destlen)) {
485 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
486 
487 		btrfs_err(inode->root->fs_info,
488 "zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu",
489 			  ret, btrfs_root_id(inode->root), btrfs_ino(inode),
490 			  folio_pos(dest_folio), to_copy, destlen);
491 		ret = -EIO;
492 	} else {
493 		ret = 0;
494 	}
495 
496 	zlib_inflateEnd(&workspace->strm);
497 
498 	if (unlikely(to_copy < destlen))
499 		folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
500 	return ret;
501 }
502 
503 const struct btrfs_compress_levels btrfs_zlib_compress = {
504 	.min_level		= 1,
505 	.max_level		= 9,
506 	.default_level		= BTRFS_ZLIB_DEFAULT_LEVEL,
507 };
508