1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2008 Oracle. All rights reserved.
4 *
5 * Based on jffs2 zlib code:
6 * Copyright © 2001-2007 Red Hat, Inc.
7 * Created by David Woodhouse <dwmw2@infradead.org>
8 */
9
10 #include <linux/kernel.h>
11 #include <linux/slab.h>
12 #include <linux/zlib.h>
13 #include <linux/zutil.h>
14 #include <linux/mm.h>
15 #include <linux/init.h>
16 #include <linux/err.h>
17 #include <linux/sched.h>
18 #include <linux/pagemap.h>
19 #include <linux/bio.h>
20 #include <linux/refcount.h>
21 #include "btrfs_inode.h"
22 #include "compression.h"
23 #include "fs.h"
24 #include "subpage.h"
25
26 /* workspace buffer size for s390 zlib hardware support */
27 #define ZLIB_DFLTCC_BUF_SIZE (4 * PAGE_SIZE)
28
29 struct workspace {
30 z_stream strm;
31 char *buf;
32 unsigned int buf_size;
33 struct list_head list;
34 int level;
35 };
36
37 static struct workspace_manager wsm;
38
zlib_get_workspace(unsigned int level)39 struct list_head *zlib_get_workspace(unsigned int level)
40 {
41 struct list_head *ws = btrfs_get_workspace(BTRFS_COMPRESS_ZLIB, level);
42 struct workspace *workspace = list_entry(ws, struct workspace, list);
43
44 workspace->level = level;
45
46 return ws;
47 }
48
zlib_free_workspace(struct list_head * ws)49 void zlib_free_workspace(struct list_head *ws)
50 {
51 struct workspace *workspace = list_entry(ws, struct workspace, list);
52
53 kvfree(workspace->strm.workspace);
54 kfree(workspace->buf);
55 kfree(workspace);
56 }
57
zlib_alloc_workspace(unsigned int level)58 struct list_head *zlib_alloc_workspace(unsigned int level)
59 {
60 struct workspace *workspace;
61 int workspacesize;
62
63 workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
64 if (!workspace)
65 return ERR_PTR(-ENOMEM);
66
67 workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
68 zlib_inflate_workspacesize());
69 workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN);
70 workspace->level = level;
71 workspace->buf = NULL;
72 /*
73 * In case of s390 zlib hardware support, allocate lager workspace
74 * buffer. If allocator fails, fall back to a single page buffer.
75 */
76 if (zlib_deflate_dfltcc_enabled()) {
77 workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE,
78 __GFP_NOMEMALLOC | __GFP_NORETRY |
79 __GFP_NOWARN | GFP_NOIO);
80 workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE;
81 }
82 if (!workspace->buf) {
83 workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
84 workspace->buf_size = PAGE_SIZE;
85 }
86 if (!workspace->strm.workspace || !workspace->buf)
87 goto fail;
88
89 INIT_LIST_HEAD(&workspace->list);
90
91 return &workspace->list;
92 fail:
93 zlib_free_workspace(&workspace->list);
94 return ERR_PTR(-ENOMEM);
95 }
96
97 /*
98 * Helper for S390x with hardware zlib compression support.
99 *
100 * That hardware acceleration requires a buffer size larger than a single page
101 * to get ideal performance, thus we need to do the memory copy rather than
102 * use the page cache directly as input buffer.
103 */
copy_data_into_buffer(struct address_space * mapping,struct workspace * workspace,u64 filepos,unsigned long length)104 static int copy_data_into_buffer(struct address_space *mapping,
105 struct workspace *workspace, u64 filepos,
106 unsigned long length)
107 {
108 u64 cur = filepos;
109
110 /* It's only for hardware accelerated zlib code. */
111 ASSERT(zlib_deflate_dfltcc_enabled());
112
113 while (cur < filepos + length) {
114 struct folio *folio;
115 void *data_in;
116 unsigned int offset;
117 unsigned long copy_length;
118 int ret;
119
120 ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio);
121 if (ret < 0)
122 return ret;
123 /* No large folio support yet. */
124 ASSERT(!folio_test_large(folio));
125
126 offset = offset_in_folio(folio, cur);
127 copy_length = min(folio_size(folio) - offset,
128 filepos + length - cur);
129
130 data_in = kmap_local_folio(folio, offset);
131 memcpy(workspace->buf + cur - filepos, data_in, copy_length);
132 kunmap_local(data_in);
133 cur += copy_length;
134 }
135 return 0;
136 }
137
zlib_compress_folios(struct list_head * ws,struct address_space * mapping,u64 start,struct folio ** folios,unsigned long * out_folios,unsigned long * total_in,unsigned long * total_out)138 int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
139 u64 start, struct folio **folios, unsigned long *out_folios,
140 unsigned long *total_in, unsigned long *total_out)
141 {
142 struct workspace *workspace = list_entry(ws, struct workspace, list);
143 int ret;
144 char *data_in = NULL;
145 char *cfolio_out;
146 int nr_folios = 0;
147 struct folio *in_folio = NULL;
148 struct folio *out_folio = NULL;
149 unsigned long len = *total_out;
150 unsigned long nr_dest_folios = *out_folios;
151 const unsigned long max_out = nr_dest_folios * PAGE_SIZE;
152 const u64 orig_end = start + len;
153
154 *out_folios = 0;
155 *total_out = 0;
156 *total_in = 0;
157
158 ret = zlib_deflateInit(&workspace->strm, workspace->level);
159 if (unlikely(ret != Z_OK)) {
160 struct btrfs_inode *inode = BTRFS_I(mapping->host);
161
162 btrfs_err(inode->root->fs_info,
163 "zlib compression init failed, error %d root %llu inode %llu offset %llu",
164 ret, btrfs_root_id(inode->root), btrfs_ino(inode), start);
165 ret = -EIO;
166 goto out;
167 }
168
169 workspace->strm.total_in = 0;
170 workspace->strm.total_out = 0;
171
172 out_folio = btrfs_alloc_compr_folio();
173 if (out_folio == NULL) {
174 ret = -ENOMEM;
175 goto out;
176 }
177 cfolio_out = folio_address(out_folio);
178 folios[0] = out_folio;
179 nr_folios = 1;
180
181 workspace->strm.next_in = workspace->buf;
182 workspace->strm.avail_in = 0;
183 workspace->strm.next_out = cfolio_out;
184 workspace->strm.avail_out = PAGE_SIZE;
185
186 while (workspace->strm.total_in < len) {
187 /*
188 * Get next input pages and copy the contents to
189 * the workspace buffer if required.
190 */
191 if (workspace->strm.avail_in == 0) {
192 unsigned long bytes_left = len - workspace->strm.total_in;
193 unsigned int copy_length = min(bytes_left, workspace->buf_size);
194
195 /*
196 * This can only happen when hardware zlib compression is
197 * enabled.
198 */
199 if (copy_length > PAGE_SIZE) {
200 ret = copy_data_into_buffer(mapping, workspace,
201 start, copy_length);
202 if (ret < 0)
203 goto out;
204 start += copy_length;
205 workspace->strm.next_in = workspace->buf;
206 workspace->strm.avail_in = copy_length;
207 } else {
208 unsigned int pg_off;
209 unsigned int cur_len;
210
211 if (data_in) {
212 kunmap_local(data_in);
213 folio_put(in_folio);
214 data_in = NULL;
215 }
216 ret = btrfs_compress_filemap_get_folio(mapping,
217 start, &in_folio);
218 if (ret < 0)
219 goto out;
220 pg_off = offset_in_page(start);
221 cur_len = btrfs_calc_input_length(orig_end, start);
222 data_in = kmap_local_folio(in_folio, pg_off);
223 start += cur_len;
224 workspace->strm.next_in = data_in;
225 workspace->strm.avail_in = cur_len;
226 }
227 }
228
229 ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
230 if (unlikely(ret != Z_OK)) {
231 struct btrfs_inode *inode = BTRFS_I(mapping->host);
232
233 btrfs_warn(inode->root->fs_info,
234 "zlib compression failed, error %d root %llu inode %llu offset %llu",
235 ret, btrfs_root_id(inode->root), btrfs_ino(inode),
236 start);
237 zlib_deflateEnd(&workspace->strm);
238 ret = -EIO;
239 goto out;
240 }
241
242 /* we're making it bigger, give up */
243 if (workspace->strm.total_in > 8192 &&
244 workspace->strm.total_in <
245 workspace->strm.total_out) {
246 ret = -E2BIG;
247 goto out;
248 }
249 /* we need another page for writing out. Test this
250 * before the total_in so we will pull in a new page for
251 * the stream end if required
252 */
253 if (workspace->strm.avail_out == 0) {
254 if (nr_folios == nr_dest_folios) {
255 ret = -E2BIG;
256 goto out;
257 }
258 out_folio = btrfs_alloc_compr_folio();
259 if (out_folio == NULL) {
260 ret = -ENOMEM;
261 goto out;
262 }
263 cfolio_out = folio_address(out_folio);
264 folios[nr_folios] = out_folio;
265 nr_folios++;
266 workspace->strm.avail_out = PAGE_SIZE;
267 workspace->strm.next_out = cfolio_out;
268 }
269 /* we're all done */
270 if (workspace->strm.total_in >= len)
271 break;
272 if (workspace->strm.total_out > max_out)
273 break;
274 }
275 workspace->strm.avail_in = 0;
276 /*
277 * Call deflate with Z_FINISH flush parameter providing more output
278 * space but no more input data, until it returns with Z_STREAM_END.
279 */
280 while (ret != Z_STREAM_END) {
281 ret = zlib_deflate(&workspace->strm, Z_FINISH);
282 if (ret == Z_STREAM_END)
283 break;
284 if (ret != Z_OK && ret != Z_BUF_ERROR) {
285 zlib_deflateEnd(&workspace->strm);
286 ret = -EIO;
287 goto out;
288 } else if (workspace->strm.avail_out == 0) {
289 /* Get another folio for the stream end. */
290 if (nr_folios == nr_dest_folios) {
291 ret = -E2BIG;
292 goto out;
293 }
294 out_folio = btrfs_alloc_compr_folio();
295 if (out_folio == NULL) {
296 ret = -ENOMEM;
297 goto out;
298 }
299 cfolio_out = folio_address(out_folio);
300 folios[nr_folios] = out_folio;
301 nr_folios++;
302 workspace->strm.avail_out = PAGE_SIZE;
303 workspace->strm.next_out = cfolio_out;
304 }
305 }
306 zlib_deflateEnd(&workspace->strm);
307
308 if (workspace->strm.total_out >= workspace->strm.total_in) {
309 ret = -E2BIG;
310 goto out;
311 }
312
313 ret = 0;
314 *total_out = workspace->strm.total_out;
315 *total_in = workspace->strm.total_in;
316 out:
317 *out_folios = nr_folios;
318 if (data_in) {
319 kunmap_local(data_in);
320 folio_put(in_folio);
321 }
322
323 return ret;
324 }
325
zlib_decompress_bio(struct list_head * ws,struct compressed_bio * cb)326 int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
327 {
328 struct workspace *workspace = list_entry(ws, struct workspace, list);
329 int ret = 0, ret2;
330 int wbits = MAX_WBITS;
331 char *data_in;
332 size_t total_out = 0;
333 unsigned long folio_in_index = 0;
334 size_t srclen = cb->compressed_len;
335 unsigned long total_folios_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
336 unsigned long buf_start;
337 struct folio **folios_in = cb->compressed_folios;
338
339 data_in = kmap_local_folio(folios_in[folio_in_index], 0);
340 workspace->strm.next_in = data_in;
341 workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE);
342 workspace->strm.total_in = 0;
343
344 workspace->strm.total_out = 0;
345 workspace->strm.next_out = workspace->buf;
346 workspace->strm.avail_out = workspace->buf_size;
347
348 /* If it's deflate, and it's got no preset dictionary, then
349 we can tell zlib to skip the adler32 check. */
350 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
351 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
352 !(((data_in[0]<<8) + data_in[1]) % 31)) {
353
354 wbits = -((data_in[0] >> 4) + 8);
355 workspace->strm.next_in += 2;
356 workspace->strm.avail_in -= 2;
357 }
358
359 ret = zlib_inflateInit2(&workspace->strm, wbits);
360 if (unlikely(ret != Z_OK)) {
361 struct btrfs_inode *inode = cb->bbio.inode;
362
363 kunmap_local(data_in);
364 btrfs_err(inode->root->fs_info,
365 "zlib decompression init failed, error %d root %llu inode %llu offset %llu",
366 ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
367 return -EIO;
368 }
369 while (workspace->strm.total_in < srclen) {
370 ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
371 if (ret != Z_OK && ret != Z_STREAM_END)
372 break;
373
374 buf_start = total_out;
375 total_out = workspace->strm.total_out;
376
377 /* we didn't make progress in this inflate call, we're done */
378 if (buf_start == total_out)
379 break;
380
381 ret2 = btrfs_decompress_buf2page(workspace->buf,
382 total_out - buf_start, cb, buf_start);
383 if (ret2 == 0) {
384 ret = 0;
385 goto done;
386 }
387
388 workspace->strm.next_out = workspace->buf;
389 workspace->strm.avail_out = workspace->buf_size;
390
391 if (workspace->strm.avail_in == 0) {
392 unsigned long tmp;
393 kunmap_local(data_in);
394 folio_in_index++;
395 if (folio_in_index >= total_folios_in) {
396 data_in = NULL;
397 break;
398 }
399 data_in = kmap_local_folio(folios_in[folio_in_index], 0);
400 workspace->strm.next_in = data_in;
401 tmp = srclen - workspace->strm.total_in;
402 workspace->strm.avail_in = min(tmp, PAGE_SIZE);
403 }
404 }
405 if (unlikely(ret != Z_STREAM_END)) {
406 btrfs_err(cb->bbio.inode->root->fs_info,
407 "zlib decompression failed, error %d root %llu inode %llu offset %llu",
408 ret, btrfs_root_id(cb->bbio.inode->root),
409 btrfs_ino(cb->bbio.inode), cb->start);
410 ret = -EIO;
411 } else {
412 ret = 0;
413 }
414 done:
415 zlib_inflateEnd(&workspace->strm);
416 if (data_in)
417 kunmap_local(data_in);
418 return ret;
419 }
420
zlib_decompress(struct list_head * ws,const u8 * data_in,struct folio * dest_folio,unsigned long dest_pgoff,size_t srclen,size_t destlen)421 int zlib_decompress(struct list_head *ws, const u8 *data_in,
422 struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
423 size_t destlen)
424 {
425 struct workspace *workspace = list_entry(ws, struct workspace, list);
426 int ret = 0;
427 int wbits = MAX_WBITS;
428 unsigned long to_copy;
429
430 workspace->strm.next_in = data_in;
431 workspace->strm.avail_in = srclen;
432 workspace->strm.total_in = 0;
433
434 workspace->strm.next_out = workspace->buf;
435 workspace->strm.avail_out = workspace->buf_size;
436 workspace->strm.total_out = 0;
437 /* If it's deflate, and it's got no preset dictionary, then
438 we can tell zlib to skip the adler32 check. */
439 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
440 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
441 !(((data_in[0]<<8) + data_in[1]) % 31)) {
442
443 wbits = -((data_in[0] >> 4) + 8);
444 workspace->strm.next_in += 2;
445 workspace->strm.avail_in -= 2;
446 }
447
448 ret = zlib_inflateInit2(&workspace->strm, wbits);
449 if (unlikely(ret != Z_OK)) {
450 struct btrfs_inode *inode = folio_to_inode(dest_folio);
451
452 btrfs_err(inode->root->fs_info,
453 "zlib decompression init failed, error %d root %llu inode %llu offset %llu",
454 ret, btrfs_root_id(inode->root), btrfs_ino(inode),
455 folio_pos(dest_folio));
456 return -EIO;
457 }
458
459 /*
460 * Everything (in/out buf) should be at most one sector, there should
461 * be no need to switch any input/output buffer.
462 */
463 ret = zlib_inflate(&workspace->strm, Z_FINISH);
464 to_copy = min(workspace->strm.total_out, destlen);
465 if (ret != Z_STREAM_END)
466 goto out;
467
468 memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, to_copy);
469
470 out:
471 if (unlikely(to_copy != destlen)) {
472 struct btrfs_inode *inode = folio_to_inode(dest_folio);
473
474 btrfs_err(inode->root->fs_info,
475 "zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu",
476 ret, btrfs_root_id(inode->root), btrfs_ino(inode),
477 folio_pos(dest_folio), to_copy, destlen);
478 ret = -EIO;
479 } else {
480 ret = 0;
481 }
482
483 zlib_inflateEnd(&workspace->strm);
484
485 if (unlikely(to_copy < destlen))
486 folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
487 return ret;
488 }
489
490 const struct btrfs_compress_op btrfs_zlib_compress = {
491 .workspace_manager = &wsm,
492 .min_level = 1,
493 .max_level = 9,
494 .default_level = BTRFS_ZLIB_DEFAULT_LEVEL,
495 };
496