1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Oracle. All rights reserved. 4 * 5 * Based on jffs2 zlib code: 6 * Copyright © 2001-2007 Red Hat, Inc. 7 * Created by David Woodhouse <dwmw2@infradead.org> 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/slab.h> 12 #include <linux/zlib.h> 13 #include <linux/zutil.h> 14 #include <linux/mm.h> 15 #include <linux/init.h> 16 #include <linux/err.h> 17 #include <linux/sched.h> 18 #include <linux/pagemap.h> 19 #include <linux/bio.h> 20 #include <linux/refcount.h> 21 #include "btrfs_inode.h" 22 #include "compression.h" 23 #include "fs.h" 24 #include "subpage.h" 25 26 /* workspace buffer size for s390 zlib hardware support */ 27 #define ZLIB_DFLTCC_BUF_SIZE (4 * PAGE_SIZE) 28 29 struct workspace { 30 z_stream strm; 31 char *buf; 32 unsigned int buf_size; 33 struct list_head list; 34 int level; 35 }; 36 37 struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level) 38 { 39 struct list_head *ws = btrfs_get_workspace(fs_info, BTRFS_COMPRESS_ZLIB, level); 40 struct workspace *workspace = list_entry(ws, struct workspace, list); 41 42 workspace->level = level; 43 44 return ws; 45 } 46 47 void zlib_free_workspace(struct list_head *ws) 48 { 49 struct workspace *workspace = list_entry(ws, struct workspace, list); 50 51 kvfree(workspace->strm.workspace); 52 kfree(workspace->buf); 53 kfree(workspace); 54 } 55 56 /* 57 * For s390 hardware acceleration, the buffer size should be at least 58 * ZLIB_DFLTCC_BUF_SIZE to achieve the best performance. 59 * 60 * But if bs > ps we can have large enough folios that meet the s390 hardware 61 * handling. 62 */ 63 static bool need_special_buffer(struct btrfs_fs_info *fs_info) 64 { 65 if (!zlib_deflate_dfltcc_enabled()) 66 return false; 67 if (btrfs_min_folio_size(fs_info) >= ZLIB_DFLTCC_BUF_SIZE) 68 return false; 69 return true; 70 } 71 72 struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level) 73 { 74 const u32 blocksize = fs_info->sectorsize; 75 struct workspace *workspace; 76 int workspacesize; 77 78 workspace = kzalloc(sizeof(*workspace), GFP_KERNEL); 79 if (!workspace) 80 return ERR_PTR(-ENOMEM); 81 82 workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), 83 zlib_inflate_workspacesize()); 84 workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN); 85 workspace->level = level; 86 workspace->buf = NULL; 87 if (need_special_buffer(fs_info)) { 88 workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE, 89 __GFP_NOMEMALLOC | __GFP_NORETRY | 90 __GFP_NOWARN | GFP_NOIO); 91 workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE; 92 } 93 if (!workspace->buf) { 94 workspace->buf = kmalloc(blocksize, GFP_KERNEL); 95 workspace->buf_size = blocksize; 96 } 97 if (!workspace->strm.workspace || !workspace->buf) 98 goto fail; 99 100 INIT_LIST_HEAD(&workspace->list); 101 102 return &workspace->list; 103 fail: 104 zlib_free_workspace(&workspace->list); 105 return ERR_PTR(-ENOMEM); 106 } 107 108 /* 109 * Helper for S390x with hardware zlib compression support. 110 * 111 * That hardware acceleration requires a buffer size larger than a single page 112 * to get ideal performance, thus we need to do the memory copy rather than 113 * use the page cache directly as input buffer. 114 */ 115 static int copy_data_into_buffer(struct address_space *mapping, 116 struct workspace *workspace, u64 filepos, 117 unsigned long length) 118 { 119 u64 cur = filepos; 120 121 /* It's only for hardware accelerated zlib code. */ 122 ASSERT(zlib_deflate_dfltcc_enabled()); 123 124 while (cur < filepos + length) { 125 struct folio *folio; 126 void *data_in; 127 unsigned int offset; 128 unsigned long copy_length; 129 int ret; 130 131 ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio); 132 if (ret < 0) 133 return ret; 134 135 offset = offset_in_folio(folio, cur); 136 copy_length = min(folio_size(folio) - offset, 137 filepos + length - cur); 138 139 data_in = kmap_local_folio(folio, offset); 140 memcpy(workspace->buf + cur - filepos, data_in, copy_length); 141 kunmap_local(data_in); 142 cur += copy_length; 143 } 144 return 0; 145 } 146 147 int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode, 148 u64 start, struct folio **folios, unsigned long *out_folios, 149 unsigned long *total_in, unsigned long *total_out) 150 { 151 struct btrfs_fs_info *fs_info = inode->root->fs_info; 152 struct workspace *workspace = list_entry(ws, struct workspace, list); 153 struct address_space *mapping = inode->vfs_inode.i_mapping; 154 const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order; 155 const u32 min_folio_size = btrfs_min_folio_size(fs_info); 156 int ret; 157 char *data_in = NULL; 158 char *cfolio_out; 159 int nr_folios = 0; 160 struct folio *in_folio = NULL; 161 struct folio *out_folio = NULL; 162 unsigned long len = *total_out; 163 unsigned long nr_dest_folios = *out_folios; 164 const unsigned long max_out = nr_dest_folios << min_folio_shift; 165 const u32 blocksize = fs_info->sectorsize; 166 const u64 orig_end = start + len; 167 168 *out_folios = 0; 169 *total_out = 0; 170 *total_in = 0; 171 172 ret = zlib_deflateInit(&workspace->strm, workspace->level); 173 if (unlikely(ret != Z_OK)) { 174 btrfs_err(fs_info, 175 "zlib compression init failed, error %d root %llu inode %llu offset %llu", 176 ret, btrfs_root_id(inode->root), btrfs_ino(inode), start); 177 ret = -EIO; 178 goto out; 179 } 180 181 workspace->strm.total_in = 0; 182 workspace->strm.total_out = 0; 183 184 out_folio = btrfs_alloc_compr_folio(fs_info); 185 if (out_folio == NULL) { 186 ret = -ENOMEM; 187 goto out; 188 } 189 cfolio_out = folio_address(out_folio); 190 folios[0] = out_folio; 191 nr_folios = 1; 192 193 workspace->strm.next_in = workspace->buf; 194 workspace->strm.avail_in = 0; 195 workspace->strm.next_out = cfolio_out; 196 workspace->strm.avail_out = min_folio_size; 197 198 while (workspace->strm.total_in < len) { 199 /* 200 * Get next input pages and copy the contents to 201 * the workspace buffer if required. 202 */ 203 if (workspace->strm.avail_in == 0) { 204 unsigned long bytes_left = len - workspace->strm.total_in; 205 unsigned int copy_length = min(bytes_left, workspace->buf_size); 206 207 /* 208 * For s390 hardware accelerated zlib, and our folio is smaller 209 * than the copy_length, we need to fill the buffer so that 210 * we can take full advantage of hardware acceleration. 211 */ 212 if (need_special_buffer(fs_info)) { 213 ret = copy_data_into_buffer(mapping, workspace, 214 start, copy_length); 215 if (ret < 0) 216 goto out; 217 start += copy_length; 218 workspace->strm.next_in = workspace->buf; 219 workspace->strm.avail_in = copy_length; 220 } else { 221 unsigned int cur_len; 222 223 if (data_in) { 224 kunmap_local(data_in); 225 folio_put(in_folio); 226 data_in = NULL; 227 } 228 ret = btrfs_compress_filemap_get_folio(mapping, 229 start, &in_folio); 230 if (ret < 0) 231 goto out; 232 cur_len = btrfs_calc_input_length(in_folio, orig_end, start); 233 data_in = kmap_local_folio(in_folio, 234 offset_in_folio(in_folio, start)); 235 start += cur_len; 236 workspace->strm.next_in = data_in; 237 workspace->strm.avail_in = cur_len; 238 } 239 } 240 241 ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH); 242 if (unlikely(ret != Z_OK)) { 243 btrfs_warn(fs_info, 244 "zlib compression failed, error %d root %llu inode %llu offset %llu", 245 ret, btrfs_root_id(inode->root), btrfs_ino(inode), 246 start); 247 zlib_deflateEnd(&workspace->strm); 248 ret = -EIO; 249 goto out; 250 } 251 252 /* we're making it bigger, give up */ 253 if (workspace->strm.total_in > blocksize * 2 && 254 workspace->strm.total_in < 255 workspace->strm.total_out) { 256 ret = -E2BIG; 257 goto out; 258 } 259 /* we need another page for writing out. Test this 260 * before the total_in so we will pull in a new page for 261 * the stream end if required 262 */ 263 if (workspace->strm.avail_out == 0) { 264 if (nr_folios == nr_dest_folios) { 265 ret = -E2BIG; 266 goto out; 267 } 268 out_folio = btrfs_alloc_compr_folio(fs_info); 269 if (out_folio == NULL) { 270 ret = -ENOMEM; 271 goto out; 272 } 273 cfolio_out = folio_address(out_folio); 274 folios[nr_folios] = out_folio; 275 nr_folios++; 276 workspace->strm.avail_out = min_folio_size; 277 workspace->strm.next_out = cfolio_out; 278 } 279 /* we're all done */ 280 if (workspace->strm.total_in >= len) 281 break; 282 if (workspace->strm.total_out > max_out) 283 break; 284 } 285 workspace->strm.avail_in = 0; 286 /* 287 * Call deflate with Z_FINISH flush parameter providing more output 288 * space but no more input data, until it returns with Z_STREAM_END. 289 */ 290 while (ret != Z_STREAM_END) { 291 ret = zlib_deflate(&workspace->strm, Z_FINISH); 292 if (ret == Z_STREAM_END) 293 break; 294 if (unlikely(ret != Z_OK && ret != Z_BUF_ERROR)) { 295 zlib_deflateEnd(&workspace->strm); 296 ret = -EIO; 297 goto out; 298 } else if (workspace->strm.avail_out == 0) { 299 /* Get another folio for the stream end. */ 300 if (nr_folios == nr_dest_folios) { 301 ret = -E2BIG; 302 goto out; 303 } 304 out_folio = btrfs_alloc_compr_folio(fs_info); 305 if (out_folio == NULL) { 306 ret = -ENOMEM; 307 goto out; 308 } 309 cfolio_out = folio_address(out_folio); 310 folios[nr_folios] = out_folio; 311 nr_folios++; 312 workspace->strm.avail_out = min_folio_size; 313 workspace->strm.next_out = cfolio_out; 314 } 315 } 316 zlib_deflateEnd(&workspace->strm); 317 318 if (workspace->strm.total_out >= workspace->strm.total_in) { 319 ret = -E2BIG; 320 goto out; 321 } 322 323 ret = 0; 324 *total_out = workspace->strm.total_out; 325 *total_in = workspace->strm.total_in; 326 out: 327 *out_folios = nr_folios; 328 if (data_in) { 329 kunmap_local(data_in); 330 folio_put(in_folio); 331 } 332 333 return ret; 334 } 335 336 int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) 337 { 338 struct btrfs_fs_info *fs_info = cb_to_fs_info(cb); 339 struct workspace *workspace = list_entry(ws, struct workspace, list); 340 const u32 min_folio_size = btrfs_min_folio_size(fs_info); 341 int ret = 0, ret2; 342 int wbits = MAX_WBITS; 343 char *data_in; 344 size_t total_out = 0; 345 unsigned long folio_in_index = 0; 346 size_t srclen = cb->compressed_len; 347 unsigned long total_folios_in = DIV_ROUND_UP(srclen, min_folio_size); 348 unsigned long buf_start; 349 struct folio **folios_in = cb->compressed_folios; 350 351 data_in = kmap_local_folio(folios_in[folio_in_index], 0); 352 workspace->strm.next_in = data_in; 353 workspace->strm.avail_in = min_t(size_t, srclen, min_folio_size); 354 workspace->strm.total_in = 0; 355 356 workspace->strm.total_out = 0; 357 workspace->strm.next_out = workspace->buf; 358 workspace->strm.avail_out = workspace->buf_size; 359 360 /* If it's deflate, and it's got no preset dictionary, then 361 we can tell zlib to skip the adler32 check. */ 362 if (srclen > 2 && !(data_in[1] & PRESET_DICT) && 363 ((data_in[0] & 0x0f) == Z_DEFLATED) && 364 !(((data_in[0]<<8) + data_in[1]) % 31)) { 365 366 wbits = -((data_in[0] >> 4) + 8); 367 workspace->strm.next_in += 2; 368 workspace->strm.avail_in -= 2; 369 } 370 371 ret = zlib_inflateInit2(&workspace->strm, wbits); 372 if (unlikely(ret != Z_OK)) { 373 struct btrfs_inode *inode = cb->bbio.inode; 374 375 kunmap_local(data_in); 376 btrfs_err(inode->root->fs_info, 377 "zlib decompression init failed, error %d root %llu inode %llu offset %llu", 378 ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start); 379 return -EIO; 380 } 381 while (workspace->strm.total_in < srclen) { 382 ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); 383 if (ret != Z_OK && ret != Z_STREAM_END) 384 break; 385 386 buf_start = total_out; 387 total_out = workspace->strm.total_out; 388 389 /* we didn't make progress in this inflate call, we're done */ 390 if (buf_start == total_out) 391 break; 392 393 ret2 = btrfs_decompress_buf2page(workspace->buf, 394 total_out - buf_start, cb, buf_start); 395 if (ret2 == 0) { 396 ret = 0; 397 goto done; 398 } 399 400 workspace->strm.next_out = workspace->buf; 401 workspace->strm.avail_out = workspace->buf_size; 402 403 if (workspace->strm.avail_in == 0) { 404 unsigned long tmp; 405 kunmap_local(data_in); 406 folio_in_index++; 407 if (folio_in_index >= total_folios_in) { 408 data_in = NULL; 409 break; 410 } 411 data_in = kmap_local_folio(folios_in[folio_in_index], 0); 412 workspace->strm.next_in = data_in; 413 tmp = srclen - workspace->strm.total_in; 414 workspace->strm.avail_in = min(tmp, min_folio_size); 415 } 416 } 417 if (unlikely(ret != Z_STREAM_END)) { 418 btrfs_err(cb->bbio.inode->root->fs_info, 419 "zlib decompression failed, error %d root %llu inode %llu offset %llu", 420 ret, btrfs_root_id(cb->bbio.inode->root), 421 btrfs_ino(cb->bbio.inode), cb->start); 422 ret = -EIO; 423 } else { 424 ret = 0; 425 } 426 done: 427 zlib_inflateEnd(&workspace->strm); 428 if (data_in) 429 kunmap_local(data_in); 430 return ret; 431 } 432 433 int zlib_decompress(struct list_head *ws, const u8 *data_in, 434 struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, 435 size_t destlen) 436 { 437 struct workspace *workspace = list_entry(ws, struct workspace, list); 438 int ret = 0; 439 int wbits = MAX_WBITS; 440 unsigned long to_copy; 441 442 workspace->strm.next_in = data_in; 443 workspace->strm.avail_in = srclen; 444 workspace->strm.total_in = 0; 445 446 workspace->strm.next_out = workspace->buf; 447 workspace->strm.avail_out = workspace->buf_size; 448 workspace->strm.total_out = 0; 449 /* If it's deflate, and it's got no preset dictionary, then 450 we can tell zlib to skip the adler32 check. */ 451 if (srclen > 2 && !(data_in[1] & PRESET_DICT) && 452 ((data_in[0] & 0x0f) == Z_DEFLATED) && 453 !(((data_in[0]<<8) + data_in[1]) % 31)) { 454 455 wbits = -((data_in[0] >> 4) + 8); 456 workspace->strm.next_in += 2; 457 workspace->strm.avail_in -= 2; 458 } 459 460 ret = zlib_inflateInit2(&workspace->strm, wbits); 461 if (unlikely(ret != Z_OK)) { 462 struct btrfs_inode *inode = folio_to_inode(dest_folio); 463 464 btrfs_err(inode->root->fs_info, 465 "zlib decompression init failed, error %d root %llu inode %llu offset %llu", 466 ret, btrfs_root_id(inode->root), btrfs_ino(inode), 467 folio_pos(dest_folio)); 468 return -EIO; 469 } 470 471 /* 472 * Everything (in/out buf) should be at most one sector, there should 473 * be no need to switch any input/output buffer. 474 */ 475 ret = zlib_inflate(&workspace->strm, Z_FINISH); 476 to_copy = min(workspace->strm.total_out, destlen); 477 if (ret != Z_STREAM_END) 478 goto out; 479 480 memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, to_copy); 481 482 out: 483 if (unlikely(to_copy != destlen)) { 484 struct btrfs_inode *inode = folio_to_inode(dest_folio); 485 486 btrfs_err(inode->root->fs_info, 487 "zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu", 488 ret, btrfs_root_id(inode->root), btrfs_ino(inode), 489 folio_pos(dest_folio), to_copy, destlen); 490 ret = -EIO; 491 } else { 492 ret = 0; 493 } 494 495 zlib_inflateEnd(&workspace->strm); 496 497 if (unlikely(to_copy < destlen)) 498 folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy); 499 return ret; 500 } 501 502 const struct btrfs_compress_levels btrfs_zlib_compress = { 503 .min_level = 1, 504 .max_level = 9, 505 .default_level = BTRFS_ZLIB_DEFAULT_LEVEL, 506 }; 507