1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Oracle. All rights reserved. 4 * 5 * Based on jffs2 zlib code: 6 * Copyright © 2001-2007 Red Hat, Inc. 7 * Created by David Woodhouse <dwmw2@infradead.org> 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/slab.h> 12 #include <linux/zlib.h> 13 #include <linux/zutil.h> 14 #include <linux/mm.h> 15 #include <linux/init.h> 16 #include <linux/err.h> 17 #include <linux/sched.h> 18 #include <linux/pagemap.h> 19 #include <linux/bio.h> 20 #include <linux/refcount.h> 21 #include "btrfs_inode.h" 22 #include "compression.h" 23 #include "fs.h" 24 #include "subpage.h" 25 26 /* workspace buffer size for s390 zlib hardware support */ 27 #define ZLIB_DFLTCC_BUF_SIZE (4 * PAGE_SIZE) 28 29 struct workspace { 30 z_stream strm; 31 char *buf; 32 unsigned int buf_size; 33 struct list_head list; 34 int level; 35 }; 36 37 struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level) 38 { 39 struct list_head *ws = btrfs_get_workspace(fs_info, BTRFS_COMPRESS_ZLIB, level); 40 struct workspace *workspace = list_entry(ws, struct workspace, list); 41 42 workspace->level = level; 43 44 return ws; 45 } 46 47 void zlib_free_workspace(struct list_head *ws) 48 { 49 struct workspace *workspace = list_entry(ws, struct workspace, list); 50 51 kvfree(workspace->strm.workspace); 52 kfree(workspace->buf); 53 kfree(workspace); 54 } 55 56 /* 57 * For s390 hardware acceleration, the buffer size should be at least 58 * ZLIB_DFLTCC_BUF_SIZE to achieve the best performance. 59 * 60 * But if bs > ps we can have large enough folios that meet the s390 hardware 61 * handling. 62 */ 63 static bool need_special_buffer(struct btrfs_fs_info *fs_info) 64 { 65 if (!zlib_deflate_dfltcc_enabled()) 66 return false; 67 if (btrfs_min_folio_size(fs_info) >= ZLIB_DFLTCC_BUF_SIZE) 68 return false; 69 return true; 70 } 71 72 struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level) 73 { 74 struct workspace *workspace; 75 int workspacesize; 76 77 workspace = kzalloc_obj(*workspace); 78 if (!workspace) 79 return ERR_PTR(-ENOMEM); 80 81 workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), 82 zlib_inflate_workspacesize()); 83 workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN); 84 workspace->level = level; 85 workspace->buf = NULL; 86 if (need_special_buffer(fs_info)) { 87 workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE, 88 __GFP_NOMEMALLOC | __GFP_NORETRY | 89 __GFP_NOWARN | GFP_NOIO); 90 workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE; 91 } 92 if (!workspace->buf) { 93 workspace->buf = kmalloc(fs_info->sectorsize, GFP_KERNEL); 94 workspace->buf_size = fs_info->sectorsize; 95 } 96 if (!workspace->strm.workspace || !workspace->buf) 97 goto fail; 98 99 INIT_LIST_HEAD(&workspace->list); 100 101 return &workspace->list; 102 fail: 103 zlib_free_workspace(&workspace->list); 104 return ERR_PTR(-ENOMEM); 105 } 106 107 /* 108 * Helper for S390x with hardware zlib compression support. 109 * 110 * That hardware acceleration requires a buffer size larger than a single page 111 * to get ideal performance, thus we need to do the memory copy rather than 112 * use the page cache directly as input buffer. 113 */ 114 static int copy_data_into_buffer(struct address_space *mapping, 115 struct workspace *workspace, u64 filepos, 116 unsigned long length) 117 { 118 u64 cur = filepos; 119 120 /* It's only for hardware accelerated zlib code. */ 121 ASSERT(zlib_deflate_dfltcc_enabled()); 122 123 while (cur < filepos + length) { 124 struct folio *folio; 125 void *data_in; 126 unsigned int offset; 127 unsigned long copy_length; 128 int ret; 129 130 ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio); 131 if (ret < 0) 132 return ret; 133 134 offset = offset_in_folio(folio, cur); 135 copy_length = min(folio_size(folio) - offset, 136 filepos + length - cur); 137 138 data_in = kmap_local_folio(folio, offset); 139 memcpy(workspace->buf + cur - filepos, data_in, copy_length); 140 kunmap_local(data_in); 141 folio_put(folio); 142 cur += copy_length; 143 } 144 return 0; 145 } 146 147 int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb) 148 { 149 struct btrfs_inode *inode = cb->bbio.inode; 150 struct btrfs_fs_info *fs_info = inode->root->fs_info; 151 struct workspace *workspace = list_entry(ws, struct workspace, list); 152 struct address_space *mapping = inode->vfs_inode.i_mapping; 153 struct bio *bio = &cb->bbio.bio; 154 u64 start = cb->start; 155 u32 len = cb->len; 156 const u32 min_folio_size = btrfs_min_folio_size(fs_info); 157 int ret; 158 char *data_in = NULL; 159 struct folio *in_folio = NULL; 160 struct folio *out_folio = NULL; 161 const u64 orig_end = start + len; 162 163 ret = zlib_deflateInit(&workspace->strm, workspace->level); 164 if (unlikely(ret != Z_OK)) { 165 btrfs_err(fs_info, 166 "zlib compression init failed, error %d root %llu inode %llu offset %llu", 167 ret, btrfs_root_id(inode->root), btrfs_ino(inode), start); 168 ret = -EIO; 169 goto out; 170 } 171 172 workspace->strm.total_in = 0; 173 workspace->strm.total_out = 0; 174 175 out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS); 176 if (out_folio == NULL) { 177 ret = -ENOMEM; 178 goto out; 179 } 180 181 workspace->strm.next_in = workspace->buf; 182 workspace->strm.avail_in = 0; 183 workspace->strm.next_out = folio_address(out_folio); 184 workspace->strm.avail_out = min_folio_size; 185 186 while (workspace->strm.total_in < len) { 187 /* 188 * Get next input pages and copy the contents to the workspace 189 * buffer if required. 190 */ 191 if (workspace->strm.avail_in == 0) { 192 unsigned long bytes_left = len - workspace->strm.total_in; 193 unsigned int copy_length = min(bytes_left, workspace->buf_size); 194 195 /* 196 * For s390 hardware accelerated zlib, and our folio is smaller 197 * than the copy_length, we need to fill the buffer so that 198 * we can take full advantage of hardware acceleration. 199 */ 200 if (need_special_buffer(fs_info)) { 201 ret = copy_data_into_buffer(mapping, workspace, 202 start, copy_length); 203 if (ret < 0) 204 goto out; 205 start += copy_length; 206 workspace->strm.next_in = workspace->buf; 207 workspace->strm.avail_in = copy_length; 208 } else { 209 unsigned int cur_len; 210 211 if (data_in) { 212 kunmap_local(data_in); 213 folio_put(in_folio); 214 data_in = NULL; 215 } 216 ret = btrfs_compress_filemap_get_folio(mapping, 217 start, &in_folio); 218 if (ret < 0) 219 goto out; 220 cur_len = btrfs_calc_input_length(in_folio, orig_end, start); 221 data_in = kmap_local_folio(in_folio, 222 offset_in_folio(in_folio, start)); 223 start += cur_len; 224 workspace->strm.next_in = data_in; 225 workspace->strm.avail_in = cur_len; 226 } 227 } 228 229 ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH); 230 if (unlikely(ret != Z_OK)) { 231 btrfs_warn(fs_info, 232 "zlib compression failed, error %d root %llu inode %llu offset %llu", 233 ret, btrfs_root_id(inode->root), btrfs_ino(inode), 234 start); 235 zlib_deflateEnd(&workspace->strm); 236 ret = -EIO; 237 goto out; 238 } 239 240 /* We're making it bigger, give up. */ 241 if (workspace->strm.total_in > fs_info->sectorsize * 2 && 242 workspace->strm.total_in < workspace->strm.total_out) { 243 ret = -E2BIG; 244 goto out; 245 } 246 if (workspace->strm.total_out >= len) { 247 ret = -E2BIG; 248 goto out; 249 } 250 /* Queue the full folio and allocate a new one. */ 251 if (workspace->strm.avail_out == 0) { 252 if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) { 253 ret = -E2BIG; 254 goto out; 255 } 256 257 out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS); 258 if (out_folio == NULL) { 259 ret = -ENOMEM; 260 goto out; 261 } 262 workspace->strm.avail_out = min_folio_size; 263 workspace->strm.next_out = folio_address(out_folio); 264 } 265 /* We're all done. */ 266 if (workspace->strm.total_in >= len) 267 break; 268 } 269 270 workspace->strm.avail_in = 0; 271 272 /* 273 * Call deflate with Z_FINISH flush parameter providing more output 274 * space but no more input data, until it returns with Z_STREAM_END. 275 */ 276 while (ret != Z_STREAM_END) { 277 ret = zlib_deflate(&workspace->strm, Z_FINISH); 278 if (ret == Z_STREAM_END) 279 break; 280 if (unlikely(ret != Z_OK && ret != Z_BUF_ERROR)) { 281 zlib_deflateEnd(&workspace->strm); 282 ret = -EIO; 283 goto out; 284 } else if (workspace->strm.avail_out == 0) { 285 if (workspace->strm.total_out >= len) { 286 ret = -E2BIG; 287 goto out; 288 } 289 if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) { 290 ret = -E2BIG; 291 goto out; 292 } 293 /* Get another folio for the stream end. */ 294 out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS); 295 if (out_folio == NULL) { 296 ret = -ENOMEM; 297 goto out; 298 } 299 workspace->strm.avail_out = min_folio_size; 300 workspace->strm.next_out = folio_address(out_folio); 301 } 302 } 303 /* Queue the remaining part of the folio. */ 304 if (workspace->strm.total_out > bio->bi_iter.bi_size) { 305 const u32 cur_len = workspace->strm.total_out - bio->bi_iter.bi_size; 306 307 ASSERT(cur_len <= folio_size(out_folio)); 308 309 if (!bio_add_folio(bio, out_folio, cur_len, 0)) { 310 ret = -E2BIG; 311 goto out; 312 } 313 } else { 314 /* The last folio hasn't' been utilized. */ 315 btrfs_free_compr_folio(out_folio); 316 } 317 out_folio = NULL; 318 ASSERT(bio->bi_iter.bi_size == workspace->strm.total_out); 319 zlib_deflateEnd(&workspace->strm); 320 321 if (workspace->strm.total_out >= workspace->strm.total_in) { 322 ret = -E2BIG; 323 goto out; 324 } 325 326 ret = 0; 327 out: 328 if (out_folio) 329 btrfs_free_compr_folio(out_folio); 330 if (data_in) { 331 kunmap_local(data_in); 332 folio_put(in_folio); 333 } 334 335 return ret; 336 } 337 338 int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) 339 { 340 struct btrfs_fs_info *fs_info = cb_to_fs_info(cb); 341 struct workspace *workspace = list_entry(ws, struct workspace, list); 342 struct folio_iter fi; 343 const u32 min_folio_size = btrfs_min_folio_size(fs_info); 344 int ret = 0, ret2; 345 int wbits = MAX_WBITS; 346 char *data_in; 347 size_t total_out = 0; 348 const size_t srclen = bio_get_size(&cb->bbio.bio); 349 unsigned long buf_start; 350 351 bio_first_folio(&fi, &cb->bbio.bio, 0); 352 353 /* We must have at least one folio here, that has the correct size. */ 354 if (unlikely(!fi.folio)) 355 return -EINVAL; 356 ASSERT(folio_size(fi.folio) == min_folio_size); 357 358 data_in = kmap_local_folio(fi.folio, 0); 359 workspace->strm.next_in = data_in; 360 workspace->strm.avail_in = min_t(size_t, srclen, min_folio_size); 361 workspace->strm.total_in = 0; 362 363 workspace->strm.total_out = 0; 364 workspace->strm.next_out = workspace->buf; 365 workspace->strm.avail_out = workspace->buf_size; 366 367 /* If it's deflate, and it's got no preset dictionary, then 368 we can tell zlib to skip the adler32 check. */ 369 if (srclen > 2 && !(data_in[1] & PRESET_DICT) && 370 ((data_in[0] & 0x0f) == Z_DEFLATED) && 371 !(((data_in[0]<<8) + data_in[1]) % 31)) { 372 373 wbits = -((data_in[0] >> 4) + 8); 374 workspace->strm.next_in += 2; 375 workspace->strm.avail_in -= 2; 376 } 377 378 ret = zlib_inflateInit2(&workspace->strm, wbits); 379 if (unlikely(ret != Z_OK)) { 380 struct btrfs_inode *inode = cb->bbio.inode; 381 382 kunmap_local(data_in); 383 btrfs_err(inode->root->fs_info, 384 "zlib decompression init failed, error %d root %llu inode %llu offset %llu", 385 ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start); 386 return -EIO; 387 } 388 while (workspace->strm.total_in < srclen) { 389 ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); 390 if (ret != Z_OK && ret != Z_STREAM_END) 391 break; 392 393 buf_start = total_out; 394 total_out = workspace->strm.total_out; 395 396 /* we didn't make progress in this inflate call, we're done */ 397 if (buf_start == total_out) 398 break; 399 400 ret2 = btrfs_decompress_buf2page(workspace->buf, 401 total_out - buf_start, cb, buf_start); 402 if (ret2 == 0) { 403 ret = 0; 404 goto done; 405 } 406 407 workspace->strm.next_out = workspace->buf; 408 workspace->strm.avail_out = workspace->buf_size; 409 410 if (workspace->strm.avail_in == 0) { 411 unsigned long tmp; 412 kunmap_local(data_in); 413 bio_next_folio(&fi, &cb->bbio.bio); 414 if (!fi.folio) { 415 data_in = NULL; 416 break; 417 } 418 ASSERT(folio_size(fi.folio) == min_folio_size); 419 data_in = kmap_local_folio(fi.folio, 0); 420 workspace->strm.next_in = data_in; 421 tmp = srclen - workspace->strm.total_in; 422 workspace->strm.avail_in = min(tmp, min_folio_size); 423 } 424 } 425 if (unlikely(ret != Z_STREAM_END)) { 426 btrfs_err(cb->bbio.inode->root->fs_info, 427 "zlib decompression failed, error %d root %llu inode %llu offset %llu", 428 ret, btrfs_root_id(cb->bbio.inode->root), 429 btrfs_ino(cb->bbio.inode), cb->start); 430 ret = -EIO; 431 } else { 432 ret = 0; 433 } 434 done: 435 zlib_inflateEnd(&workspace->strm); 436 if (data_in) 437 kunmap_local(data_in); 438 return ret; 439 } 440 441 int zlib_decompress(struct list_head *ws, const u8 *data_in, 442 struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, 443 size_t destlen) 444 { 445 struct workspace *workspace = list_entry(ws, struct workspace, list); 446 int ret = 0; 447 int wbits = MAX_WBITS; 448 unsigned long to_copy; 449 450 workspace->strm.next_in = data_in; 451 workspace->strm.avail_in = srclen; 452 workspace->strm.total_in = 0; 453 454 workspace->strm.next_out = workspace->buf; 455 workspace->strm.avail_out = workspace->buf_size; 456 workspace->strm.total_out = 0; 457 /* If it's deflate, and it's got no preset dictionary, then 458 we can tell zlib to skip the adler32 check. */ 459 if (srclen > 2 && !(data_in[1] & PRESET_DICT) && 460 ((data_in[0] & 0x0f) == Z_DEFLATED) && 461 !(((data_in[0]<<8) + data_in[1]) % 31)) { 462 463 wbits = -((data_in[0] >> 4) + 8); 464 workspace->strm.next_in += 2; 465 workspace->strm.avail_in -= 2; 466 } 467 468 ret = zlib_inflateInit2(&workspace->strm, wbits); 469 if (unlikely(ret != Z_OK)) { 470 struct btrfs_inode *inode = folio_to_inode(dest_folio); 471 472 btrfs_err(inode->root->fs_info, 473 "zlib decompression init failed, error %d root %llu inode %llu offset %llu", 474 ret, btrfs_root_id(inode->root), btrfs_ino(inode), 475 folio_pos(dest_folio)); 476 return -EIO; 477 } 478 479 /* 480 * Everything (in/out buf) should be at most one sector, there should 481 * be no need to switch any input/output buffer. 482 */ 483 ret = zlib_inflate(&workspace->strm, Z_FINISH); 484 to_copy = min(workspace->strm.total_out, destlen); 485 if (ret != Z_STREAM_END) 486 goto out; 487 488 memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, to_copy); 489 490 out: 491 if (unlikely(to_copy != destlen)) { 492 struct btrfs_inode *inode = folio_to_inode(dest_folio); 493 494 btrfs_err(inode->root->fs_info, 495 "zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu", 496 ret, btrfs_root_id(inode->root), btrfs_ino(inode), 497 folio_pos(dest_folio), to_copy, destlen); 498 ret = -EIO; 499 } else { 500 ret = 0; 501 } 502 503 zlib_inflateEnd(&workspace->strm); 504 505 if (unlikely(to_copy < destlen)) 506 folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy); 507 return ret; 508 } 509 510 const struct btrfs_compress_levels btrfs_zlib_compress = { 511 .min_level = 1, 512 .max_level = 9, 513 .default_level = BTRFS_ZLIB_DEFAULT_LEVEL, 514 }; 515