1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2008 Oracle. All rights reserved.
4 *
5 * Based on jffs2 zlib code:
6 * Copyright © 2001-2007 Red Hat, Inc.
7 * Created by David Woodhouse <dwmw2@infradead.org>
8 */
9
10 #include <linux/kernel.h>
11 #include <linux/slab.h>
12 #include <linux/zlib.h>
13 #include <linux/zutil.h>
14 #include <linux/mm.h>
15 #include <linux/init.h>
16 #include <linux/err.h>
17 #include <linux/sched.h>
18 #include <linux/pagemap.h>
19 #include <linux/bio.h>
20 #include <linux/refcount.h>
21 #include "btrfs_inode.h"
22 #include "compression.h"
23 #include "fs.h"
24 #include "subpage.h"
25
26 /* workspace buffer size for s390 zlib hardware support */
27 #define ZLIB_DFLTCC_BUF_SIZE (4 * PAGE_SIZE)
28
29 struct workspace {
30 z_stream strm;
31 char *buf;
32 unsigned int buf_size;
33 struct list_head list;
34 int level;
35 };
36
zlib_get_workspace(struct btrfs_fs_info * fs_info,unsigned int level)37 struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
38 {
39 struct list_head *ws = btrfs_get_workspace(fs_info, BTRFS_COMPRESS_ZLIB, level);
40 struct workspace *workspace = list_entry(ws, struct workspace, list);
41
42 workspace->level = level;
43
44 return ws;
45 }
46
zlib_free_workspace(struct list_head * ws)47 void zlib_free_workspace(struct list_head *ws)
48 {
49 struct workspace *workspace = list_entry(ws, struct workspace, list);
50
51 kvfree(workspace->strm.workspace);
52 kfree(workspace->buf);
53 kfree(workspace);
54 }
55
56 /*
57 * For s390 hardware acceleration, the buffer size should be at least
58 * ZLIB_DFLTCC_BUF_SIZE to achieve the best performance.
59 *
60 * But if bs > ps we can have large enough folios that meet the s390 hardware
61 * handling.
62 */
need_special_buffer(struct btrfs_fs_info * fs_info)63 static bool need_special_buffer(struct btrfs_fs_info *fs_info)
64 {
65 if (!zlib_deflate_dfltcc_enabled())
66 return false;
67 if (btrfs_min_folio_size(fs_info) >= ZLIB_DFLTCC_BUF_SIZE)
68 return false;
69 return true;
70 }
71
zlib_alloc_workspace(struct btrfs_fs_info * fs_info,unsigned int level)72 struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
73 {
74 const u32 blocksize = fs_info->sectorsize;
75 struct workspace *workspace;
76 int workspacesize;
77
78 workspace = kzalloc_obj(*workspace);
79 if (!workspace)
80 return ERR_PTR(-ENOMEM);
81
82 workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
83 zlib_inflate_workspacesize());
84 workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN);
85 workspace->level = level;
86 workspace->buf = NULL;
87 if (need_special_buffer(fs_info)) {
88 workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE,
89 __GFP_NOMEMALLOC | __GFP_NORETRY |
90 __GFP_NOWARN | GFP_NOIO);
91 workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE;
92 }
93 if (!workspace->buf) {
94 workspace->buf = kmalloc(blocksize, GFP_KERNEL);
95 workspace->buf_size = blocksize;
96 }
97 if (!workspace->strm.workspace || !workspace->buf)
98 goto fail;
99
100 INIT_LIST_HEAD(&workspace->list);
101
102 return &workspace->list;
103 fail:
104 zlib_free_workspace(&workspace->list);
105 return ERR_PTR(-ENOMEM);
106 }
107
108 /*
109 * Helper for S390x with hardware zlib compression support.
110 *
111 * That hardware acceleration requires a buffer size larger than a single page
112 * to get ideal performance, thus we need to do the memory copy rather than
113 * use the page cache directly as input buffer.
114 */
copy_data_into_buffer(struct address_space * mapping,struct workspace * workspace,u64 filepos,unsigned long length)115 static int copy_data_into_buffer(struct address_space *mapping,
116 struct workspace *workspace, u64 filepos,
117 unsigned long length)
118 {
119 u64 cur = filepos;
120
121 /* It's only for hardware accelerated zlib code. */
122 ASSERT(zlib_deflate_dfltcc_enabled());
123
124 while (cur < filepos + length) {
125 struct folio *folio;
126 void *data_in;
127 unsigned int offset;
128 unsigned long copy_length;
129 int ret;
130
131 ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio);
132 if (ret < 0)
133 return ret;
134
135 offset = offset_in_folio(folio, cur);
136 copy_length = min(folio_size(folio) - offset,
137 filepos + length - cur);
138
139 data_in = kmap_local_folio(folio, offset);
140 memcpy(workspace->buf + cur - filepos, data_in, copy_length);
141 kunmap_local(data_in);
142 folio_put(folio);
143 cur += copy_length;
144 }
145 return 0;
146 }
147
zlib_compress_bio(struct list_head * ws,struct compressed_bio * cb)148 int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb)
149 {
150 struct btrfs_inode *inode = cb->bbio.inode;
151 struct btrfs_fs_info *fs_info = inode->root->fs_info;
152 struct workspace *workspace = list_entry(ws, struct workspace, list);
153 struct address_space *mapping = inode->vfs_inode.i_mapping;
154 struct bio *bio = &cb->bbio.bio;
155 u64 start = cb->start;
156 u32 len = cb->len;
157 const u32 min_folio_size = btrfs_min_folio_size(fs_info);
158 int ret;
159 char *data_in = NULL;
160 char *cfolio_out;
161 struct folio *in_folio = NULL;
162 struct folio *out_folio = NULL;
163 const u32 blocksize = fs_info->sectorsize;
164 const u64 orig_end = start + len;
165
166 ret = zlib_deflateInit(&workspace->strm, workspace->level);
167 if (unlikely(ret != Z_OK)) {
168 btrfs_err(fs_info,
169 "zlib compression init failed, error %d root %llu inode %llu offset %llu",
170 ret, btrfs_root_id(inode->root), btrfs_ino(inode), start);
171 ret = -EIO;
172 goto out;
173 }
174
175 workspace->strm.total_in = 0;
176 workspace->strm.total_out = 0;
177
178 out_folio = btrfs_alloc_compr_folio(fs_info);
179 if (out_folio == NULL) {
180 ret = -ENOMEM;
181 goto out;
182 }
183 cfolio_out = folio_address(out_folio);
184
185 workspace->strm.next_in = workspace->buf;
186 workspace->strm.avail_in = 0;
187 workspace->strm.next_out = cfolio_out;
188 workspace->strm.avail_out = min_folio_size;
189
190 while (workspace->strm.total_in < len) {
191 /*
192 * Get next input pages and copy the contents to the workspace
193 * buffer if required.
194 */
195 if (workspace->strm.avail_in == 0) {
196 unsigned long bytes_left = len - workspace->strm.total_in;
197 unsigned int copy_length = min(bytes_left, workspace->buf_size);
198
199 /*
200 * For s390 hardware accelerated zlib, and our folio is smaller
201 * than the copy_length, we need to fill the buffer so that
202 * we can take full advantage of hardware acceleration.
203 */
204 if (need_special_buffer(fs_info)) {
205 ret = copy_data_into_buffer(mapping, workspace,
206 start, copy_length);
207 if (ret < 0)
208 goto out;
209 start += copy_length;
210 workspace->strm.next_in = workspace->buf;
211 workspace->strm.avail_in = copy_length;
212 } else {
213 unsigned int cur_len;
214
215 if (data_in) {
216 kunmap_local(data_in);
217 folio_put(in_folio);
218 data_in = NULL;
219 }
220 ret = btrfs_compress_filemap_get_folio(mapping,
221 start, &in_folio);
222 if (ret < 0)
223 goto out;
224 cur_len = btrfs_calc_input_length(in_folio, orig_end, start);
225 data_in = kmap_local_folio(in_folio,
226 offset_in_folio(in_folio, start));
227 start += cur_len;
228 workspace->strm.next_in = data_in;
229 workspace->strm.avail_in = cur_len;
230 }
231 }
232
233 ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
234 if (unlikely(ret != Z_OK)) {
235 btrfs_warn(fs_info,
236 "zlib compression failed, error %d root %llu inode %llu offset %llu",
237 ret, btrfs_root_id(inode->root), btrfs_ino(inode),
238 start);
239 zlib_deflateEnd(&workspace->strm);
240 ret = -EIO;
241 goto out;
242 }
243
244 /* We're making it bigger, give up. */
245 if (workspace->strm.total_in > blocksize * 2 &&
246 workspace->strm.total_in < workspace->strm.total_out) {
247 ret = -E2BIG;
248 goto out;
249 }
250 if (workspace->strm.total_out >= len) {
251 ret = -E2BIG;
252 goto out;
253 }
254 /* Queue the full folio and allocate a new one. */
255 if (workspace->strm.avail_out == 0) {
256 if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) {
257 ret = -E2BIG;
258 goto out;
259 }
260
261 out_folio = btrfs_alloc_compr_folio(fs_info);
262 if (out_folio == NULL) {
263 ret = -ENOMEM;
264 goto out;
265 }
266 cfolio_out = folio_address(out_folio);
267 workspace->strm.avail_out = min_folio_size;
268 workspace->strm.next_out = cfolio_out;
269 }
270 /* We're all done. */
271 if (workspace->strm.total_in >= len)
272 break;
273 }
274
275 workspace->strm.avail_in = 0;
276
277 /*
278 * Call deflate with Z_FINISH flush parameter providing more output
279 * space but no more input data, until it returns with Z_STREAM_END.
280 */
281 while (ret != Z_STREAM_END) {
282 ret = zlib_deflate(&workspace->strm, Z_FINISH);
283 if (ret == Z_STREAM_END)
284 break;
285 if (unlikely(ret != Z_OK && ret != Z_BUF_ERROR)) {
286 zlib_deflateEnd(&workspace->strm);
287 ret = -EIO;
288 goto out;
289 } else if (workspace->strm.avail_out == 0) {
290 if (workspace->strm.total_out >= len) {
291 ret = -E2BIG;
292 goto out;
293 }
294 if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) {
295 ret = -E2BIG;
296 goto out;
297 }
298 /* Get another folio for the stream end. */
299 out_folio = btrfs_alloc_compr_folio(fs_info);
300 if (out_folio == NULL) {
301 ret = -ENOMEM;
302 goto out;
303 }
304 cfolio_out = folio_address(out_folio);
305 workspace->strm.avail_out = min_folio_size;
306 workspace->strm.next_out = cfolio_out;
307 }
308 }
309 /* Queue the remaining part of the folio. */
310 if (workspace->strm.total_out > bio->bi_iter.bi_size) {
311 const u32 cur_len = workspace->strm.total_out - bio->bi_iter.bi_size;
312
313 ASSERT(cur_len <= folio_size(out_folio));
314
315 if (!bio_add_folio(bio, out_folio, cur_len, 0)) {
316 ret = -E2BIG;
317 goto out;
318 }
319 } else {
320 /* The last folio hasn't' been utilized. */
321 btrfs_free_compr_folio(out_folio);
322 }
323 out_folio = NULL;
324 ASSERT(bio->bi_iter.bi_size == workspace->strm.total_out);
325 zlib_deflateEnd(&workspace->strm);
326
327 if (workspace->strm.total_out >= workspace->strm.total_in) {
328 ret = -E2BIG;
329 goto out;
330 }
331
332 ret = 0;
333 out:
334 if (out_folio)
335 btrfs_free_compr_folio(out_folio);
336 if (data_in) {
337 kunmap_local(data_in);
338 folio_put(in_folio);
339 }
340
341 return ret;
342 }
343
zlib_decompress_bio(struct list_head * ws,struct compressed_bio * cb)344 int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
345 {
346 struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
347 struct workspace *workspace = list_entry(ws, struct workspace, list);
348 struct folio_iter fi;
349 const u32 min_folio_size = btrfs_min_folio_size(fs_info);
350 int ret = 0, ret2;
351 int wbits = MAX_WBITS;
352 char *data_in;
353 size_t total_out = 0;
354 size_t srclen = cb->compressed_len;
355 unsigned long buf_start;
356
357 bio_first_folio(&fi, &cb->bbio.bio, 0);
358
359 /* We must have at least one folio here, that has the correct size. */
360 if (unlikely(!fi.folio))
361 return -EINVAL;
362 ASSERT(folio_size(fi.folio) == min_folio_size);
363
364 data_in = kmap_local_folio(fi.folio, 0);
365 workspace->strm.next_in = data_in;
366 workspace->strm.avail_in = min_t(size_t, srclen, min_folio_size);
367 workspace->strm.total_in = 0;
368
369 workspace->strm.total_out = 0;
370 workspace->strm.next_out = workspace->buf;
371 workspace->strm.avail_out = workspace->buf_size;
372
373 /* If it's deflate, and it's got no preset dictionary, then
374 we can tell zlib to skip the adler32 check. */
375 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
376 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
377 !(((data_in[0]<<8) + data_in[1]) % 31)) {
378
379 wbits = -((data_in[0] >> 4) + 8);
380 workspace->strm.next_in += 2;
381 workspace->strm.avail_in -= 2;
382 }
383
384 ret = zlib_inflateInit2(&workspace->strm, wbits);
385 if (unlikely(ret != Z_OK)) {
386 struct btrfs_inode *inode = cb->bbio.inode;
387
388 kunmap_local(data_in);
389 btrfs_err(inode->root->fs_info,
390 "zlib decompression init failed, error %d root %llu inode %llu offset %llu",
391 ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
392 return -EIO;
393 }
394 while (workspace->strm.total_in < srclen) {
395 ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
396 if (ret != Z_OK && ret != Z_STREAM_END)
397 break;
398
399 buf_start = total_out;
400 total_out = workspace->strm.total_out;
401
402 /* we didn't make progress in this inflate call, we're done */
403 if (buf_start == total_out)
404 break;
405
406 ret2 = btrfs_decompress_buf2page(workspace->buf,
407 total_out - buf_start, cb, buf_start);
408 if (ret2 == 0) {
409 ret = 0;
410 goto done;
411 }
412
413 workspace->strm.next_out = workspace->buf;
414 workspace->strm.avail_out = workspace->buf_size;
415
416 if (workspace->strm.avail_in == 0) {
417 unsigned long tmp;
418 kunmap_local(data_in);
419 bio_next_folio(&fi, &cb->bbio.bio);
420 if (!fi.folio) {
421 data_in = NULL;
422 break;
423 }
424 ASSERT(folio_size(fi.folio) == min_folio_size);
425 data_in = kmap_local_folio(fi.folio, 0);
426 workspace->strm.next_in = data_in;
427 tmp = srclen - workspace->strm.total_in;
428 workspace->strm.avail_in = min(tmp, min_folio_size);
429 }
430 }
431 if (unlikely(ret != Z_STREAM_END)) {
432 btrfs_err(cb->bbio.inode->root->fs_info,
433 "zlib decompression failed, error %d root %llu inode %llu offset %llu",
434 ret, btrfs_root_id(cb->bbio.inode->root),
435 btrfs_ino(cb->bbio.inode), cb->start);
436 ret = -EIO;
437 } else {
438 ret = 0;
439 }
440 done:
441 zlib_inflateEnd(&workspace->strm);
442 if (data_in)
443 kunmap_local(data_in);
444 return ret;
445 }
446
zlib_decompress(struct list_head * ws,const u8 * data_in,struct folio * dest_folio,unsigned long dest_pgoff,size_t srclen,size_t destlen)447 int zlib_decompress(struct list_head *ws, const u8 *data_in,
448 struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
449 size_t destlen)
450 {
451 struct workspace *workspace = list_entry(ws, struct workspace, list);
452 int ret = 0;
453 int wbits = MAX_WBITS;
454 unsigned long to_copy;
455
456 workspace->strm.next_in = data_in;
457 workspace->strm.avail_in = srclen;
458 workspace->strm.total_in = 0;
459
460 workspace->strm.next_out = workspace->buf;
461 workspace->strm.avail_out = workspace->buf_size;
462 workspace->strm.total_out = 0;
463 /* If it's deflate, and it's got no preset dictionary, then
464 we can tell zlib to skip the adler32 check. */
465 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
466 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
467 !(((data_in[0]<<8) + data_in[1]) % 31)) {
468
469 wbits = -((data_in[0] >> 4) + 8);
470 workspace->strm.next_in += 2;
471 workspace->strm.avail_in -= 2;
472 }
473
474 ret = zlib_inflateInit2(&workspace->strm, wbits);
475 if (unlikely(ret != Z_OK)) {
476 struct btrfs_inode *inode = folio_to_inode(dest_folio);
477
478 btrfs_err(inode->root->fs_info,
479 "zlib decompression init failed, error %d root %llu inode %llu offset %llu",
480 ret, btrfs_root_id(inode->root), btrfs_ino(inode),
481 folio_pos(dest_folio));
482 return -EIO;
483 }
484
485 /*
486 * Everything (in/out buf) should be at most one sector, there should
487 * be no need to switch any input/output buffer.
488 */
489 ret = zlib_inflate(&workspace->strm, Z_FINISH);
490 to_copy = min(workspace->strm.total_out, destlen);
491 if (ret != Z_STREAM_END)
492 goto out;
493
494 memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, to_copy);
495
496 out:
497 if (unlikely(to_copy != destlen)) {
498 struct btrfs_inode *inode = folio_to_inode(dest_folio);
499
500 btrfs_err(inode->root->fs_info,
501 "zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu",
502 ret, btrfs_root_id(inode->root), btrfs_ino(inode),
503 folio_pos(dest_folio), to_copy, destlen);
504 ret = -EIO;
505 } else {
506 ret = 0;
507 }
508
509 zlib_inflateEnd(&workspace->strm);
510
511 if (unlikely(to_copy < destlen))
512 folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
513 return ret;
514 }
515
516 const struct btrfs_compress_levels btrfs_zlib_compress = {
517 .min_level = 1,
518 .max_level = 9,
519 .default_level = BTRFS_ZLIB_DEFAULT_LEVEL,
520 };
521