1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2008 Oracle. All rights reserved.
4 *
5 * Based on jffs2 zlib code:
6 * Copyright © 2001-2007 Red Hat, Inc.
7 * Created by David Woodhouse <dwmw2@infradead.org>
8 */
9
10 #include <linux/kernel.h>
11 #include <linux/slab.h>
12 #include <linux/zlib.h>
13 #include <linux/zutil.h>
14 #include <linux/mm.h>
15 #include <linux/init.h>
16 #include <linux/err.h>
17 #include <linux/sched.h>
18 #include <linux/pagemap.h>
19 #include <linux/bio.h>
20 #include <linux/refcount.h>
21 #include "btrfs_inode.h"
22 #include "compression.h"
23 #include "fs.h"
24 #include "subpage.h"
25
26 /* workspace buffer size for s390 zlib hardware support */
27 #define ZLIB_DFLTCC_BUF_SIZE (4 * PAGE_SIZE)
28
29 struct workspace {
30 z_stream strm;
31 char *buf;
32 unsigned int buf_size;
33 struct list_head list;
34 int level;
35 };
36
zlib_get_workspace(struct btrfs_fs_info * fs_info,unsigned int level)37 struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
38 {
39 struct list_head *ws = btrfs_get_workspace(fs_info, BTRFS_COMPRESS_ZLIB, level);
40 struct workspace *workspace = list_entry(ws, struct workspace, list);
41
42 workspace->level = level;
43
44 return ws;
45 }
46
zlib_free_workspace(struct list_head * ws)47 void zlib_free_workspace(struct list_head *ws)
48 {
49 struct workspace *workspace = list_entry(ws, struct workspace, list);
50
51 kvfree(workspace->strm.workspace);
52 kfree(workspace->buf);
53 kfree(workspace);
54 }
55
56 /*
57 * For s390 hardware acceleration, the buffer size should be at least
58 * ZLIB_DFLTCC_BUF_SIZE to achieve the best performance.
59 *
60 * But if bs > ps we can have large enough folios that meet the s390 hardware
61 * handling.
62 */
need_special_buffer(struct btrfs_fs_info * fs_info)63 static bool need_special_buffer(struct btrfs_fs_info *fs_info)
64 {
65 if (!zlib_deflate_dfltcc_enabled())
66 return false;
67 if (btrfs_min_folio_size(fs_info) >= ZLIB_DFLTCC_BUF_SIZE)
68 return false;
69 return true;
70 }
71
zlib_alloc_workspace(struct btrfs_fs_info * fs_info,unsigned int level)72 struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
73 {
74 const u32 blocksize = fs_info->sectorsize;
75 struct workspace *workspace;
76 int workspacesize;
77
78 workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
79 if (!workspace)
80 return ERR_PTR(-ENOMEM);
81
82 workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
83 zlib_inflate_workspacesize());
84 workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN);
85 workspace->level = level;
86 workspace->buf = NULL;
87 if (need_special_buffer(fs_info)) {
88 workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE,
89 __GFP_NOMEMALLOC | __GFP_NORETRY |
90 __GFP_NOWARN | GFP_NOIO);
91 workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE;
92 }
93 if (!workspace->buf) {
94 workspace->buf = kmalloc(blocksize, GFP_KERNEL);
95 workspace->buf_size = blocksize;
96 }
97 if (!workspace->strm.workspace || !workspace->buf)
98 goto fail;
99
100 INIT_LIST_HEAD(&workspace->list);
101
102 return &workspace->list;
103 fail:
104 zlib_free_workspace(&workspace->list);
105 return ERR_PTR(-ENOMEM);
106 }
107
108 /*
109 * Helper for S390x with hardware zlib compression support.
110 *
111 * That hardware acceleration requires a buffer size larger than a single page
112 * to get ideal performance, thus we need to do the memory copy rather than
113 * use the page cache directly as input buffer.
114 */
copy_data_into_buffer(struct address_space * mapping,struct workspace * workspace,u64 filepos,unsigned long length)115 static int copy_data_into_buffer(struct address_space *mapping,
116 struct workspace *workspace, u64 filepos,
117 unsigned long length)
118 {
119 u64 cur = filepos;
120
121 /* It's only for hardware accelerated zlib code. */
122 ASSERT(zlib_deflate_dfltcc_enabled());
123
124 while (cur < filepos + length) {
125 struct folio *folio;
126 void *data_in;
127 unsigned int offset;
128 unsigned long copy_length;
129 int ret;
130
131 ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio);
132 if (ret < 0)
133 return ret;
134
135 offset = offset_in_folio(folio, cur);
136 copy_length = min(folio_size(folio) - offset,
137 filepos + length - cur);
138
139 data_in = kmap_local_folio(folio, offset);
140 memcpy(workspace->buf + cur - filepos, data_in, copy_length);
141 kunmap_local(data_in);
142 cur += copy_length;
143 }
144 return 0;
145 }
146
zlib_compress_folios(struct list_head * ws,struct btrfs_inode * inode,u64 start,struct folio ** folios,unsigned long * out_folios,unsigned long * total_in,unsigned long * total_out)147 int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
148 u64 start, struct folio **folios, unsigned long *out_folios,
149 unsigned long *total_in, unsigned long *total_out)
150 {
151 struct btrfs_fs_info *fs_info = inode->root->fs_info;
152 struct workspace *workspace = list_entry(ws, struct workspace, list);
153 struct address_space *mapping = inode->vfs_inode.i_mapping;
154 const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
155 const u32 min_folio_size = btrfs_min_folio_size(fs_info);
156 int ret;
157 char *data_in = NULL;
158 char *cfolio_out;
159 int nr_folios = 0;
160 struct folio *in_folio = NULL;
161 struct folio *out_folio = NULL;
162 unsigned long len = *total_out;
163 unsigned long nr_dest_folios = *out_folios;
164 const unsigned long max_out = nr_dest_folios << min_folio_shift;
165 const u32 blocksize = fs_info->sectorsize;
166 const u64 orig_end = start + len;
167
168 *out_folios = 0;
169 *total_out = 0;
170 *total_in = 0;
171
172 ret = zlib_deflateInit(&workspace->strm, workspace->level);
173 if (unlikely(ret != Z_OK)) {
174 btrfs_err(fs_info,
175 "zlib compression init failed, error %d root %llu inode %llu offset %llu",
176 ret, btrfs_root_id(inode->root), btrfs_ino(inode), start);
177 ret = -EIO;
178 goto out;
179 }
180
181 workspace->strm.total_in = 0;
182 workspace->strm.total_out = 0;
183
184 out_folio = btrfs_alloc_compr_folio(fs_info);
185 if (out_folio == NULL) {
186 ret = -ENOMEM;
187 goto out;
188 }
189 cfolio_out = folio_address(out_folio);
190 folios[0] = out_folio;
191 nr_folios = 1;
192
193 workspace->strm.next_in = workspace->buf;
194 workspace->strm.avail_in = 0;
195 workspace->strm.next_out = cfolio_out;
196 workspace->strm.avail_out = min_folio_size;
197
198 while (workspace->strm.total_in < len) {
199 /*
200 * Get next input pages and copy the contents to
201 * the workspace buffer if required.
202 */
203 if (workspace->strm.avail_in == 0) {
204 unsigned long bytes_left = len - workspace->strm.total_in;
205 unsigned int copy_length = min(bytes_left, workspace->buf_size);
206
207 /*
208 * For s390 hardware accelerated zlib, and our folio is smaller
209 * than the copy_length, we need to fill the buffer so that
210 * we can take full advantage of hardware acceleration.
211 */
212 if (need_special_buffer(fs_info)) {
213 ret = copy_data_into_buffer(mapping, workspace,
214 start, copy_length);
215 if (ret < 0)
216 goto out;
217 start += copy_length;
218 workspace->strm.next_in = workspace->buf;
219 workspace->strm.avail_in = copy_length;
220 } else {
221 unsigned int cur_len;
222
223 if (data_in) {
224 kunmap_local(data_in);
225 folio_put(in_folio);
226 data_in = NULL;
227 }
228 ret = btrfs_compress_filemap_get_folio(mapping,
229 start, &in_folio);
230 if (ret < 0)
231 goto out;
232 cur_len = btrfs_calc_input_length(in_folio, orig_end, start);
233 data_in = kmap_local_folio(in_folio,
234 offset_in_folio(in_folio, start));
235 start += cur_len;
236 workspace->strm.next_in = data_in;
237 workspace->strm.avail_in = cur_len;
238 }
239 }
240
241 ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
242 if (unlikely(ret != Z_OK)) {
243 btrfs_warn(fs_info,
244 "zlib compression failed, error %d root %llu inode %llu offset %llu",
245 ret, btrfs_root_id(inode->root), btrfs_ino(inode),
246 start);
247 zlib_deflateEnd(&workspace->strm);
248 ret = -EIO;
249 goto out;
250 }
251
252 /* we're making it bigger, give up */
253 if (workspace->strm.total_in > blocksize * 2 &&
254 workspace->strm.total_in <
255 workspace->strm.total_out) {
256 ret = -E2BIG;
257 goto out;
258 }
259 /* we need another page for writing out. Test this
260 * before the total_in so we will pull in a new page for
261 * the stream end if required
262 */
263 if (workspace->strm.avail_out == 0) {
264 if (nr_folios == nr_dest_folios) {
265 ret = -E2BIG;
266 goto out;
267 }
268 out_folio = btrfs_alloc_compr_folio(fs_info);
269 if (out_folio == NULL) {
270 ret = -ENOMEM;
271 goto out;
272 }
273 cfolio_out = folio_address(out_folio);
274 folios[nr_folios] = out_folio;
275 nr_folios++;
276 workspace->strm.avail_out = min_folio_size;
277 workspace->strm.next_out = cfolio_out;
278 }
279 /* we're all done */
280 if (workspace->strm.total_in >= len)
281 break;
282 if (workspace->strm.total_out > max_out)
283 break;
284 }
285 workspace->strm.avail_in = 0;
286 /*
287 * Call deflate with Z_FINISH flush parameter providing more output
288 * space but no more input data, until it returns with Z_STREAM_END.
289 */
290 while (ret != Z_STREAM_END) {
291 ret = zlib_deflate(&workspace->strm, Z_FINISH);
292 if (ret == Z_STREAM_END)
293 break;
294 if (unlikely(ret != Z_OK && ret != Z_BUF_ERROR)) {
295 zlib_deflateEnd(&workspace->strm);
296 ret = -EIO;
297 goto out;
298 } else if (workspace->strm.avail_out == 0) {
299 /* Get another folio for the stream end. */
300 if (nr_folios == nr_dest_folios) {
301 ret = -E2BIG;
302 goto out;
303 }
304 out_folio = btrfs_alloc_compr_folio(fs_info);
305 if (out_folio == NULL) {
306 ret = -ENOMEM;
307 goto out;
308 }
309 cfolio_out = folio_address(out_folio);
310 folios[nr_folios] = out_folio;
311 nr_folios++;
312 workspace->strm.avail_out = min_folio_size;
313 workspace->strm.next_out = cfolio_out;
314 }
315 }
316 zlib_deflateEnd(&workspace->strm);
317
318 if (workspace->strm.total_out >= workspace->strm.total_in) {
319 ret = -E2BIG;
320 goto out;
321 }
322
323 ret = 0;
324 *total_out = workspace->strm.total_out;
325 *total_in = workspace->strm.total_in;
326 out:
327 *out_folios = nr_folios;
328 if (data_in) {
329 kunmap_local(data_in);
330 folio_put(in_folio);
331 }
332
333 return ret;
334 }
335
zlib_decompress_bio(struct list_head * ws,struct compressed_bio * cb)336 int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
337 {
338 struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
339 struct workspace *workspace = list_entry(ws, struct workspace, list);
340 const u32 min_folio_size = btrfs_min_folio_size(fs_info);
341 int ret = 0, ret2;
342 int wbits = MAX_WBITS;
343 char *data_in;
344 size_t total_out = 0;
345 unsigned long folio_in_index = 0;
346 size_t srclen = cb->compressed_len;
347 unsigned long total_folios_in = DIV_ROUND_UP(srclen, min_folio_size);
348 unsigned long buf_start;
349 struct folio **folios_in = cb->compressed_folios;
350
351 data_in = kmap_local_folio(folios_in[folio_in_index], 0);
352 workspace->strm.next_in = data_in;
353 workspace->strm.avail_in = min_t(size_t, srclen, min_folio_size);
354 workspace->strm.total_in = 0;
355
356 workspace->strm.total_out = 0;
357 workspace->strm.next_out = workspace->buf;
358 workspace->strm.avail_out = workspace->buf_size;
359
360 /* If it's deflate, and it's got no preset dictionary, then
361 we can tell zlib to skip the adler32 check. */
362 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
363 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
364 !(((data_in[0]<<8) + data_in[1]) % 31)) {
365
366 wbits = -((data_in[0] >> 4) + 8);
367 workspace->strm.next_in += 2;
368 workspace->strm.avail_in -= 2;
369 }
370
371 ret = zlib_inflateInit2(&workspace->strm, wbits);
372 if (unlikely(ret != Z_OK)) {
373 struct btrfs_inode *inode = cb->bbio.inode;
374
375 kunmap_local(data_in);
376 btrfs_err(inode->root->fs_info,
377 "zlib decompression init failed, error %d root %llu inode %llu offset %llu",
378 ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
379 return -EIO;
380 }
381 while (workspace->strm.total_in < srclen) {
382 ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
383 if (ret != Z_OK && ret != Z_STREAM_END)
384 break;
385
386 buf_start = total_out;
387 total_out = workspace->strm.total_out;
388
389 /* we didn't make progress in this inflate call, we're done */
390 if (buf_start == total_out)
391 break;
392
393 ret2 = btrfs_decompress_buf2page(workspace->buf,
394 total_out - buf_start, cb, buf_start);
395 if (ret2 == 0) {
396 ret = 0;
397 goto done;
398 }
399
400 workspace->strm.next_out = workspace->buf;
401 workspace->strm.avail_out = workspace->buf_size;
402
403 if (workspace->strm.avail_in == 0) {
404 unsigned long tmp;
405 kunmap_local(data_in);
406 folio_in_index++;
407 if (folio_in_index >= total_folios_in) {
408 data_in = NULL;
409 break;
410 }
411 data_in = kmap_local_folio(folios_in[folio_in_index], 0);
412 workspace->strm.next_in = data_in;
413 tmp = srclen - workspace->strm.total_in;
414 workspace->strm.avail_in = min(tmp, min_folio_size);
415 }
416 }
417 if (unlikely(ret != Z_STREAM_END)) {
418 btrfs_err(cb->bbio.inode->root->fs_info,
419 "zlib decompression failed, error %d root %llu inode %llu offset %llu",
420 ret, btrfs_root_id(cb->bbio.inode->root),
421 btrfs_ino(cb->bbio.inode), cb->start);
422 ret = -EIO;
423 } else {
424 ret = 0;
425 }
426 done:
427 zlib_inflateEnd(&workspace->strm);
428 if (data_in)
429 kunmap_local(data_in);
430 return ret;
431 }
432
zlib_decompress(struct list_head * ws,const u8 * data_in,struct folio * dest_folio,unsigned long dest_pgoff,size_t srclen,size_t destlen)433 int zlib_decompress(struct list_head *ws, const u8 *data_in,
434 struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
435 size_t destlen)
436 {
437 struct workspace *workspace = list_entry(ws, struct workspace, list);
438 int ret = 0;
439 int wbits = MAX_WBITS;
440 unsigned long to_copy;
441
442 workspace->strm.next_in = data_in;
443 workspace->strm.avail_in = srclen;
444 workspace->strm.total_in = 0;
445
446 workspace->strm.next_out = workspace->buf;
447 workspace->strm.avail_out = workspace->buf_size;
448 workspace->strm.total_out = 0;
449 /* If it's deflate, and it's got no preset dictionary, then
450 we can tell zlib to skip the adler32 check. */
451 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
452 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
453 !(((data_in[0]<<8) + data_in[1]) % 31)) {
454
455 wbits = -((data_in[0] >> 4) + 8);
456 workspace->strm.next_in += 2;
457 workspace->strm.avail_in -= 2;
458 }
459
460 ret = zlib_inflateInit2(&workspace->strm, wbits);
461 if (unlikely(ret != Z_OK)) {
462 struct btrfs_inode *inode = folio_to_inode(dest_folio);
463
464 btrfs_err(inode->root->fs_info,
465 "zlib decompression init failed, error %d root %llu inode %llu offset %llu",
466 ret, btrfs_root_id(inode->root), btrfs_ino(inode),
467 folio_pos(dest_folio));
468 return -EIO;
469 }
470
471 /*
472 * Everything (in/out buf) should be at most one sector, there should
473 * be no need to switch any input/output buffer.
474 */
475 ret = zlib_inflate(&workspace->strm, Z_FINISH);
476 to_copy = min(workspace->strm.total_out, destlen);
477 if (ret != Z_STREAM_END)
478 goto out;
479
480 memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, to_copy);
481
482 out:
483 if (unlikely(to_copy != destlen)) {
484 struct btrfs_inode *inode = folio_to_inode(dest_folio);
485
486 btrfs_err(inode->root->fs_info,
487 "zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu",
488 ret, btrfs_root_id(inode->root), btrfs_ino(inode),
489 folio_pos(dest_folio), to_copy, destlen);
490 ret = -EIO;
491 } else {
492 ret = 0;
493 }
494
495 zlib_inflateEnd(&workspace->strm);
496
497 if (unlikely(to_copy < destlen))
498 folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
499 return ret;
500 }
501
502 const struct btrfs_compress_levels btrfs_zlib_compress = {
503 .min_level = 1,
504 .max_level = 9,
505 .default_level = BTRFS_ZLIB_DEFAULT_LEVEL,
506 };
507