1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Oracle. All rights reserved. 4 * 5 * Based on jffs2 zlib code: 6 * Copyright © 2001-2007 Red Hat, Inc. 7 * Created by David Woodhouse <dwmw2@infradead.org> 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/slab.h> 12 #include <linux/zlib.h> 13 #include <linux/zutil.h> 14 #include <linux/mm.h> 15 #include <linux/init.h> 16 #include <linux/err.h> 17 #include <linux/sched.h> 18 #include <linux/pagemap.h> 19 #include <linux/bio.h> 20 #include <linux/refcount.h> 21 #include "btrfs_inode.h" 22 #include "compression.h" 23 #include "fs.h" 24 #include "subpage.h" 25 26 /* workspace buffer size for s390 zlib hardware support */ 27 #define ZLIB_DFLTCC_BUF_SIZE (4 * PAGE_SIZE) 28 29 struct workspace { 30 z_stream strm; 31 char *buf; 32 unsigned int buf_size; 33 struct list_head list; 34 int level; 35 }; 36 37 struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level) 38 { 39 struct list_head *ws = btrfs_get_workspace(fs_info, BTRFS_COMPRESS_ZLIB, level); 40 struct workspace *workspace = list_entry(ws, struct workspace, list); 41 42 workspace->level = level; 43 44 return ws; 45 } 46 47 void zlib_free_workspace(struct list_head *ws) 48 { 49 struct workspace *workspace = list_entry(ws, struct workspace, list); 50 51 kvfree(workspace->strm.workspace); 52 kfree(workspace->buf); 53 kfree(workspace); 54 } 55 56 /* 57 * For s390 hardware acceleration, the buffer size should be at least 58 * ZLIB_DFLTCC_BUF_SIZE to achieve the best performance. 59 * 60 * But if bs > ps we can have large enough folios that meet the s390 hardware 61 * handling. 62 */ 63 static bool need_special_buffer(struct btrfs_fs_info *fs_info) 64 { 65 if (!zlib_deflate_dfltcc_enabled()) 66 return false; 67 if (btrfs_min_folio_size(fs_info) >= ZLIB_DFLTCC_BUF_SIZE) 68 return false; 69 return true; 70 } 71 72 struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level) 73 { 74 const u32 blocksize = fs_info->sectorsize; 75 struct workspace *workspace; 76 int workspacesize; 77 78 workspace = kzalloc_obj(*workspace); 79 if (!workspace) 80 return ERR_PTR(-ENOMEM); 81 82 workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), 83 zlib_inflate_workspacesize()); 84 workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN); 85 workspace->level = level; 86 workspace->buf = NULL; 87 if (need_special_buffer(fs_info)) { 88 workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE, 89 __GFP_NOMEMALLOC | __GFP_NORETRY | 90 __GFP_NOWARN | GFP_NOIO); 91 workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE; 92 } 93 if (!workspace->buf) { 94 workspace->buf = kmalloc(blocksize, GFP_KERNEL); 95 workspace->buf_size = blocksize; 96 } 97 if (!workspace->strm.workspace || !workspace->buf) 98 goto fail; 99 100 INIT_LIST_HEAD(&workspace->list); 101 102 return &workspace->list; 103 fail: 104 zlib_free_workspace(&workspace->list); 105 return ERR_PTR(-ENOMEM); 106 } 107 108 /* 109 * Helper for S390x with hardware zlib compression support. 110 * 111 * That hardware acceleration requires a buffer size larger than a single page 112 * to get ideal performance, thus we need to do the memory copy rather than 113 * use the page cache directly as input buffer. 114 */ 115 static int copy_data_into_buffer(struct address_space *mapping, 116 struct workspace *workspace, u64 filepos, 117 unsigned long length) 118 { 119 u64 cur = filepos; 120 121 /* It's only for hardware accelerated zlib code. */ 122 ASSERT(zlib_deflate_dfltcc_enabled()); 123 124 while (cur < filepos + length) { 125 struct folio *folio; 126 void *data_in; 127 unsigned int offset; 128 unsigned long copy_length; 129 int ret; 130 131 ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio); 132 if (ret < 0) 133 return ret; 134 135 offset = offset_in_folio(folio, cur); 136 copy_length = min(folio_size(folio) - offset, 137 filepos + length - cur); 138 139 data_in = kmap_local_folio(folio, offset); 140 memcpy(workspace->buf + cur - filepos, data_in, copy_length); 141 kunmap_local(data_in); 142 folio_put(folio); 143 cur += copy_length; 144 } 145 return 0; 146 } 147 148 int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb) 149 { 150 struct btrfs_inode *inode = cb->bbio.inode; 151 struct btrfs_fs_info *fs_info = inode->root->fs_info; 152 struct workspace *workspace = list_entry(ws, struct workspace, list); 153 struct address_space *mapping = inode->vfs_inode.i_mapping; 154 struct bio *bio = &cb->bbio.bio; 155 u64 start = cb->start; 156 u32 len = cb->len; 157 const u32 min_folio_size = btrfs_min_folio_size(fs_info); 158 int ret; 159 char *data_in = NULL; 160 char *cfolio_out; 161 struct folio *in_folio = NULL; 162 struct folio *out_folio = NULL; 163 const u32 blocksize = fs_info->sectorsize; 164 const u64 orig_end = start + len; 165 166 ret = zlib_deflateInit(&workspace->strm, workspace->level); 167 if (unlikely(ret != Z_OK)) { 168 btrfs_err(fs_info, 169 "zlib compression init failed, error %d root %llu inode %llu offset %llu", 170 ret, btrfs_root_id(inode->root), btrfs_ino(inode), start); 171 ret = -EIO; 172 goto out; 173 } 174 175 workspace->strm.total_in = 0; 176 workspace->strm.total_out = 0; 177 178 out_folio = btrfs_alloc_compr_folio(fs_info); 179 if (out_folio == NULL) { 180 ret = -ENOMEM; 181 goto out; 182 } 183 cfolio_out = folio_address(out_folio); 184 185 workspace->strm.next_in = workspace->buf; 186 workspace->strm.avail_in = 0; 187 workspace->strm.next_out = cfolio_out; 188 workspace->strm.avail_out = min_folio_size; 189 190 while (workspace->strm.total_in < len) { 191 /* 192 * Get next input pages and copy the contents to the workspace 193 * buffer if required. 194 */ 195 if (workspace->strm.avail_in == 0) { 196 unsigned long bytes_left = len - workspace->strm.total_in; 197 unsigned int copy_length = min(bytes_left, workspace->buf_size); 198 199 /* 200 * For s390 hardware accelerated zlib, and our folio is smaller 201 * than the copy_length, we need to fill the buffer so that 202 * we can take full advantage of hardware acceleration. 203 */ 204 if (need_special_buffer(fs_info)) { 205 ret = copy_data_into_buffer(mapping, workspace, 206 start, copy_length); 207 if (ret < 0) 208 goto out; 209 start += copy_length; 210 workspace->strm.next_in = workspace->buf; 211 workspace->strm.avail_in = copy_length; 212 } else { 213 unsigned int cur_len; 214 215 if (data_in) { 216 kunmap_local(data_in); 217 folio_put(in_folio); 218 data_in = NULL; 219 } 220 ret = btrfs_compress_filemap_get_folio(mapping, 221 start, &in_folio); 222 if (ret < 0) 223 goto out; 224 cur_len = btrfs_calc_input_length(in_folio, orig_end, start); 225 data_in = kmap_local_folio(in_folio, 226 offset_in_folio(in_folio, start)); 227 start += cur_len; 228 workspace->strm.next_in = data_in; 229 workspace->strm.avail_in = cur_len; 230 } 231 } 232 233 ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH); 234 if (unlikely(ret != Z_OK)) { 235 btrfs_warn(fs_info, 236 "zlib compression failed, error %d root %llu inode %llu offset %llu", 237 ret, btrfs_root_id(inode->root), btrfs_ino(inode), 238 start); 239 zlib_deflateEnd(&workspace->strm); 240 ret = -EIO; 241 goto out; 242 } 243 244 /* We're making it bigger, give up. */ 245 if (workspace->strm.total_in > blocksize * 2 && 246 workspace->strm.total_in < workspace->strm.total_out) { 247 ret = -E2BIG; 248 goto out; 249 } 250 if (workspace->strm.total_out >= len) { 251 ret = -E2BIG; 252 goto out; 253 } 254 /* Queue the full folio and allocate a new one. */ 255 if (workspace->strm.avail_out == 0) { 256 if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) { 257 ret = -E2BIG; 258 goto out; 259 } 260 261 out_folio = btrfs_alloc_compr_folio(fs_info); 262 if (out_folio == NULL) { 263 ret = -ENOMEM; 264 goto out; 265 } 266 cfolio_out = folio_address(out_folio); 267 workspace->strm.avail_out = min_folio_size; 268 workspace->strm.next_out = cfolio_out; 269 } 270 /* We're all done. */ 271 if (workspace->strm.total_in >= len) 272 break; 273 } 274 275 workspace->strm.avail_in = 0; 276 277 /* 278 * Call deflate with Z_FINISH flush parameter providing more output 279 * space but no more input data, until it returns with Z_STREAM_END. 280 */ 281 while (ret != Z_STREAM_END) { 282 ret = zlib_deflate(&workspace->strm, Z_FINISH); 283 if (ret == Z_STREAM_END) 284 break; 285 if (unlikely(ret != Z_OK && ret != Z_BUF_ERROR)) { 286 zlib_deflateEnd(&workspace->strm); 287 ret = -EIO; 288 goto out; 289 } else if (workspace->strm.avail_out == 0) { 290 if (workspace->strm.total_out >= len) { 291 ret = -E2BIG; 292 goto out; 293 } 294 if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) { 295 ret = -E2BIG; 296 goto out; 297 } 298 /* Get another folio for the stream end. */ 299 out_folio = btrfs_alloc_compr_folio(fs_info); 300 if (out_folio == NULL) { 301 ret = -ENOMEM; 302 goto out; 303 } 304 cfolio_out = folio_address(out_folio); 305 workspace->strm.avail_out = min_folio_size; 306 workspace->strm.next_out = cfolio_out; 307 } 308 } 309 /* Queue the remaining part of the folio. */ 310 if (workspace->strm.total_out > bio->bi_iter.bi_size) { 311 const u32 cur_len = workspace->strm.total_out - bio->bi_iter.bi_size; 312 313 ASSERT(cur_len <= folio_size(out_folio)); 314 315 if (!bio_add_folio(bio, out_folio, cur_len, 0)) { 316 ret = -E2BIG; 317 goto out; 318 } 319 } else { 320 /* The last folio hasn't' been utilized. */ 321 btrfs_free_compr_folio(out_folio); 322 } 323 out_folio = NULL; 324 ASSERT(bio->bi_iter.bi_size == workspace->strm.total_out); 325 zlib_deflateEnd(&workspace->strm); 326 327 if (workspace->strm.total_out >= workspace->strm.total_in) { 328 ret = -E2BIG; 329 goto out; 330 } 331 332 ret = 0; 333 out: 334 if (out_folio) 335 btrfs_free_compr_folio(out_folio); 336 if (data_in) { 337 kunmap_local(data_in); 338 folio_put(in_folio); 339 } 340 341 return ret; 342 } 343 344 int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) 345 { 346 struct btrfs_fs_info *fs_info = cb_to_fs_info(cb); 347 struct workspace *workspace = list_entry(ws, struct workspace, list); 348 struct folio_iter fi; 349 const u32 min_folio_size = btrfs_min_folio_size(fs_info); 350 int ret = 0, ret2; 351 int wbits = MAX_WBITS; 352 char *data_in; 353 size_t total_out = 0; 354 size_t srclen = cb->compressed_len; 355 unsigned long buf_start; 356 357 bio_first_folio(&fi, &cb->bbio.bio, 0); 358 359 /* We must have at least one folio here, that has the correct size. */ 360 if (unlikely(!fi.folio)) 361 return -EINVAL; 362 ASSERT(folio_size(fi.folio) == min_folio_size); 363 364 data_in = kmap_local_folio(fi.folio, 0); 365 workspace->strm.next_in = data_in; 366 workspace->strm.avail_in = min_t(size_t, srclen, min_folio_size); 367 workspace->strm.total_in = 0; 368 369 workspace->strm.total_out = 0; 370 workspace->strm.next_out = workspace->buf; 371 workspace->strm.avail_out = workspace->buf_size; 372 373 /* If it's deflate, and it's got no preset dictionary, then 374 we can tell zlib to skip the adler32 check. */ 375 if (srclen > 2 && !(data_in[1] & PRESET_DICT) && 376 ((data_in[0] & 0x0f) == Z_DEFLATED) && 377 !(((data_in[0]<<8) + data_in[1]) % 31)) { 378 379 wbits = -((data_in[0] >> 4) + 8); 380 workspace->strm.next_in += 2; 381 workspace->strm.avail_in -= 2; 382 } 383 384 ret = zlib_inflateInit2(&workspace->strm, wbits); 385 if (unlikely(ret != Z_OK)) { 386 struct btrfs_inode *inode = cb->bbio.inode; 387 388 kunmap_local(data_in); 389 btrfs_err(inode->root->fs_info, 390 "zlib decompression init failed, error %d root %llu inode %llu offset %llu", 391 ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start); 392 return -EIO; 393 } 394 while (workspace->strm.total_in < srclen) { 395 ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); 396 if (ret != Z_OK && ret != Z_STREAM_END) 397 break; 398 399 buf_start = total_out; 400 total_out = workspace->strm.total_out; 401 402 /* we didn't make progress in this inflate call, we're done */ 403 if (buf_start == total_out) 404 break; 405 406 ret2 = btrfs_decompress_buf2page(workspace->buf, 407 total_out - buf_start, cb, buf_start); 408 if (ret2 == 0) { 409 ret = 0; 410 goto done; 411 } 412 413 workspace->strm.next_out = workspace->buf; 414 workspace->strm.avail_out = workspace->buf_size; 415 416 if (workspace->strm.avail_in == 0) { 417 unsigned long tmp; 418 kunmap_local(data_in); 419 bio_next_folio(&fi, &cb->bbio.bio); 420 if (!fi.folio) { 421 data_in = NULL; 422 break; 423 } 424 ASSERT(folio_size(fi.folio) == min_folio_size); 425 data_in = kmap_local_folio(fi.folio, 0); 426 workspace->strm.next_in = data_in; 427 tmp = srclen - workspace->strm.total_in; 428 workspace->strm.avail_in = min(tmp, min_folio_size); 429 } 430 } 431 if (unlikely(ret != Z_STREAM_END)) { 432 btrfs_err(cb->bbio.inode->root->fs_info, 433 "zlib decompression failed, error %d root %llu inode %llu offset %llu", 434 ret, btrfs_root_id(cb->bbio.inode->root), 435 btrfs_ino(cb->bbio.inode), cb->start); 436 ret = -EIO; 437 } else { 438 ret = 0; 439 } 440 done: 441 zlib_inflateEnd(&workspace->strm); 442 if (data_in) 443 kunmap_local(data_in); 444 return ret; 445 } 446 447 int zlib_decompress(struct list_head *ws, const u8 *data_in, 448 struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, 449 size_t destlen) 450 { 451 struct workspace *workspace = list_entry(ws, struct workspace, list); 452 int ret = 0; 453 int wbits = MAX_WBITS; 454 unsigned long to_copy; 455 456 workspace->strm.next_in = data_in; 457 workspace->strm.avail_in = srclen; 458 workspace->strm.total_in = 0; 459 460 workspace->strm.next_out = workspace->buf; 461 workspace->strm.avail_out = workspace->buf_size; 462 workspace->strm.total_out = 0; 463 /* If it's deflate, and it's got no preset dictionary, then 464 we can tell zlib to skip the adler32 check. */ 465 if (srclen > 2 && !(data_in[1] & PRESET_DICT) && 466 ((data_in[0] & 0x0f) == Z_DEFLATED) && 467 !(((data_in[0]<<8) + data_in[1]) % 31)) { 468 469 wbits = -((data_in[0] >> 4) + 8); 470 workspace->strm.next_in += 2; 471 workspace->strm.avail_in -= 2; 472 } 473 474 ret = zlib_inflateInit2(&workspace->strm, wbits); 475 if (unlikely(ret != Z_OK)) { 476 struct btrfs_inode *inode = folio_to_inode(dest_folio); 477 478 btrfs_err(inode->root->fs_info, 479 "zlib decompression init failed, error %d root %llu inode %llu offset %llu", 480 ret, btrfs_root_id(inode->root), btrfs_ino(inode), 481 folio_pos(dest_folio)); 482 return -EIO; 483 } 484 485 /* 486 * Everything (in/out buf) should be at most one sector, there should 487 * be no need to switch any input/output buffer. 488 */ 489 ret = zlib_inflate(&workspace->strm, Z_FINISH); 490 to_copy = min(workspace->strm.total_out, destlen); 491 if (ret != Z_STREAM_END) 492 goto out; 493 494 memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, to_copy); 495 496 out: 497 if (unlikely(to_copy != destlen)) { 498 struct btrfs_inode *inode = folio_to_inode(dest_folio); 499 500 btrfs_err(inode->root->fs_info, 501 "zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu", 502 ret, btrfs_root_id(inode->root), btrfs_ino(inode), 503 folio_pos(dest_folio), to_copy, destlen); 504 ret = -EIO; 505 } else { 506 ret = 0; 507 } 508 509 zlib_inflateEnd(&workspace->strm); 510 511 if (unlikely(to_copy < destlen)) 512 folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy); 513 return ret; 514 } 515 516 const struct btrfs_compress_levels btrfs_zlib_compress = { 517 .min_level = 1, 518 .max_level = 9, 519 .default_level = BTRFS_ZLIB_DEFAULT_LEVEL, 520 }; 521