1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Oracle. All rights reserved. 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/slab.h> 8 #include <linux/mm.h> 9 #include <linux/init.h> 10 #include <linux/err.h> 11 #include <linux/sched.h> 12 #include <linux/pagemap.h> 13 #include <linux/bio.h> 14 #include <linux/lzo.h> 15 #include <linux/refcount.h> 16 #include "messages.h" 17 #include "compression.h" 18 #include "ctree.h" 19 #include "super.h" 20 #include "btrfs_inode.h" 21 22 #define LZO_LEN 4 23 24 /* 25 * Btrfs LZO compression format 26 * 27 * Regular and inlined LZO compressed data extents consist of: 28 * 29 * 1. Header 30 * Fixed size. LZO_LEN (4) bytes long, LE32. 31 * Records the total size (including the header) of compressed data. 32 * 33 * 2. Segment(s) 34 * Variable size. Each segment includes one segment header, followed by data 35 * payload. 36 * One regular LZO compressed extent can have one or more segments. 37 * For inlined LZO compressed extent, only one segment is allowed. 38 * One segment represents at most one sector of uncompressed data. 39 * 40 * 2.1 Segment header 41 * Fixed size. LZO_LEN (4) bytes long, LE32. 42 * Records the total size of the segment (not including the header). 43 * Segment header never crosses sector boundary, thus it's possible to 44 * have at most 3 padding zeros at the end of the sector. 45 * 46 * 2.2 Data Payload 47 * Variable size. Size up limit should be lzo1x_worst_compress(sectorsize) 48 * which is 4419 for a 4KiB sectorsize. 49 * 50 * Example with 4K sectorsize: 51 * Page 1: 52 * 0 0x2 0x4 0x6 0x8 0xa 0xc 0xe 0x10 53 * 0x0000 | Header | SegHdr 01 | Data payload 01 ... | 54 * ... 55 * 0x0ff0 | SegHdr N | Data payload N ... |00| 56 * ^^ padding zeros 57 * Page 2: 58 * 0x1000 | SegHdr N+1| Data payload N+1 ... | 59 */ 60 61 struct workspace { 62 void *mem; 63 void *buf; /* where decompressed data goes */ 64 void *cbuf; /* where compressed data goes */ 65 struct list_head list; 66 }; 67 68 static u32 workspace_buf_length(const struct btrfs_fs_info *fs_info) 69 { 70 return lzo1x_worst_compress(fs_info->sectorsize); 71 } 72 static u32 workspace_cbuf_length(const struct btrfs_fs_info *fs_info) 73 { 74 return lzo1x_worst_compress(fs_info->sectorsize); 75 } 76 77 void lzo_free_workspace(struct list_head *ws) 78 { 79 struct workspace *workspace = list_entry(ws, struct workspace, list); 80 81 kvfree(workspace->buf); 82 kvfree(workspace->cbuf); 83 kvfree(workspace->mem); 84 kfree(workspace); 85 } 86 87 struct list_head *lzo_alloc_workspace(struct btrfs_fs_info *fs_info) 88 { 89 struct workspace *workspace; 90 91 workspace = kzalloc(sizeof(*workspace), GFP_KERNEL); 92 if (!workspace) 93 return ERR_PTR(-ENOMEM); 94 95 workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN); 96 workspace->buf = kvmalloc(workspace_buf_length(fs_info), GFP_KERNEL | __GFP_NOWARN); 97 workspace->cbuf = kvmalloc(workspace_cbuf_length(fs_info), GFP_KERNEL | __GFP_NOWARN); 98 if (!workspace->mem || !workspace->buf || !workspace->cbuf) 99 goto fail; 100 101 INIT_LIST_HEAD(&workspace->list); 102 103 return &workspace->list; 104 fail: 105 lzo_free_workspace(&workspace->list); 106 return ERR_PTR(-ENOMEM); 107 } 108 109 static inline void write_compress_length(char *buf, size_t len) 110 { 111 __le32 dlen; 112 113 dlen = cpu_to_le32(len); 114 memcpy(buf, &dlen, LZO_LEN); 115 } 116 117 static inline size_t read_compress_length(const char *buf) 118 { 119 __le32 dlen; 120 121 memcpy(&dlen, buf, LZO_LEN); 122 return le32_to_cpu(dlen); 123 } 124 125 /* 126 * Write data into @out_folio and queue it into @out_bio. 127 * 128 * Return 0 if everything is fine and @total_out will be increased. 129 * Return <0 for error. 130 * 131 * The @out_folio can be NULL after a full folio is queued. 132 * Thus the caller should check and allocate a new folio when needed. 133 */ 134 static int write_and_queue_folio(struct bio *out_bio, struct folio **out_folio, 135 u32 *total_out, u32 write_len) 136 { 137 const u32 fsize = folio_size(*out_folio); 138 const u32 foffset = offset_in_folio(*out_folio, *total_out); 139 140 ASSERT(out_folio && *out_folio); 141 /* Should not cross folio boundary. */ 142 ASSERT(foffset + write_len <= fsize); 143 144 /* We can not use bio_add_folio_nofail() which doesn't do any merge. */ 145 if (!bio_add_folio(out_bio, *out_folio, write_len, foffset)) { 146 /* 147 * We have allocated a bio that havs BTRFS_MAX_COMPRESSED_PAGES 148 * vecs, and all ranges inside the same folio should have been 149 * merged. If bio_add_folio() still failed, that means we have 150 * reached the bvec limits. 151 * 152 * This should only happen at the beginning of a folio, and 153 * caller is responsible for releasing the folio, since it's 154 * not yet queued into the bio. 155 */ 156 ASSERT(IS_ALIGNED(*total_out, fsize)); 157 return -E2BIG; 158 } 159 160 *total_out += write_len; 161 /* 162 * The full folio has been filled and queued, reset @out_folio to NULL, 163 * so that error handling is fully handled by the bio. 164 */ 165 if (IS_ALIGNED(*total_out, fsize)) 166 *out_folio = NULL; 167 return 0; 168 } 169 170 /* 171 * Copy compressed data to bio. 172 * 173 * @out_bio: The bio that will contain all the compressed data. 174 * @compressed_data: The compressed data of this segment. 175 * @compressed_size: The size of the compressed data. 176 * @out_folio: The current output folio, will be updated if a new 177 * folio is allocated. 178 * @total_out: The total bytes of current output. 179 * @max_out: The maximum size of the compressed data. 180 * 181 * Will do: 182 * 183 * - Write a segment header into the destination 184 * - Copy the compressed buffer into the destination 185 * - Make sure we have enough space in the last sector to fit a segment header 186 * If not, we will pad at most (LZO_LEN (4)) - 1 bytes of zeros. 187 * - If a full folio is filled, it will be queued into @out_bio, and @out_folio 188 * will be updated. 189 * 190 * Will allocate new pages when needed. 191 */ 192 static int copy_compressed_data_to_bio(struct btrfs_fs_info *fs_info, 193 struct bio *out_bio, 194 const char *compressed_data, 195 size_t compressed_size, 196 struct folio **out_folio, 197 u32 *total_out, u32 max_out) 198 { 199 const u32 sectorsize = fs_info->sectorsize; 200 const u32 sectorsize_bits = fs_info->sectorsize_bits; 201 const u32 fsize = btrfs_min_folio_size(fs_info); 202 const u32 old_size = out_bio->bi_iter.bi_size; 203 u32 copy_start; 204 u32 sector_bytes_left; 205 char *kaddr; 206 int ret; 207 208 ASSERT(out_folio); 209 210 /* There should be at least a lzo header queued. */ 211 ASSERT(old_size); 212 ASSERT(old_size == *total_out); 213 214 /* 215 * We never allow a segment header crossing sector boundary, previous 216 * run should ensure we have enough space left inside the sector. 217 */ 218 ASSERT((old_size >> sectorsize_bits) == (old_size + LZO_LEN - 1) >> sectorsize_bits); 219 220 if (!*out_folio) { 221 *out_folio = btrfs_alloc_compr_folio(fs_info); 222 if (!*out_folio) 223 return -ENOMEM; 224 } 225 226 /* Write the segment header first. */ 227 kaddr = kmap_local_folio(*out_folio, offset_in_folio(*out_folio, *total_out)); 228 write_compress_length(kaddr, compressed_size); 229 kunmap_local(kaddr); 230 ret = write_and_queue_folio(out_bio, out_folio, total_out, LZO_LEN); 231 if (ret < 0) 232 return ret; 233 234 copy_start = *total_out; 235 236 /* Copy compressed data. */ 237 while (*total_out - copy_start < compressed_size) { 238 u32 copy_len = min_t(u32, sectorsize - *total_out % sectorsize, 239 copy_start + compressed_size - *total_out); 240 u32 foffset = *total_out & (fsize - 1); 241 242 /* With the range copied, we're larger than the original range. */ 243 if (((*total_out + copy_len) >> sectorsize_bits) >= 244 max_out >> sectorsize_bits) 245 return -E2BIG; 246 247 if (!*out_folio) { 248 *out_folio = btrfs_alloc_compr_folio(fs_info); 249 if (!*out_folio) 250 return -ENOMEM; 251 } 252 253 kaddr = kmap_local_folio(*out_folio, foffset); 254 memcpy(kaddr, compressed_data + *total_out - copy_start, copy_len); 255 kunmap_local(kaddr); 256 ret = write_and_queue_folio(out_bio, out_folio, total_out, copy_len); 257 if (ret < 0) 258 return ret; 259 } 260 261 /* 262 * Check if we can fit the next segment header into the remaining space 263 * of the sector. 264 */ 265 sector_bytes_left = round_up(*total_out, sectorsize) - *total_out; 266 if (sector_bytes_left >= LZO_LEN || sector_bytes_left == 0) 267 return 0; 268 269 ASSERT(*out_folio); 270 271 /* The remaining size is not enough, pad it with zeros */ 272 folio_zero_range(*out_folio, offset_in_folio(*out_folio, *total_out), sector_bytes_left); 273 return write_and_queue_folio(out_bio, out_folio, total_out, sector_bytes_left); 274 } 275 276 int lzo_compress_bio(struct list_head *ws, struct compressed_bio *cb) 277 { 278 struct btrfs_inode *inode = cb->bbio.inode; 279 struct btrfs_fs_info *fs_info = inode->root->fs_info; 280 struct workspace *workspace = list_entry(ws, struct workspace, list); 281 struct bio *bio = &cb->bbio.bio; 282 const u64 start = cb->start; 283 const u32 len = cb->len; 284 const u32 sectorsize = fs_info->sectorsize; 285 const u32 min_folio_size = btrfs_min_folio_size(fs_info); 286 struct address_space *mapping = inode->vfs_inode.i_mapping; 287 struct folio *folio_in = NULL; 288 struct folio *folio_out = NULL; 289 char *sizes_ptr; 290 int ret = 0; 291 /* Points to the file offset of input data. */ 292 u64 cur_in = start; 293 /* Points to the current output byte. */ 294 u32 total_out = 0; 295 296 ASSERT(bio->bi_iter.bi_size == 0); 297 ASSERT(len); 298 299 folio_out = btrfs_alloc_compr_folio(fs_info); 300 if (!folio_out) 301 return -ENOMEM; 302 303 /* Queue a segment header first. */ 304 ret = write_and_queue_folio(bio, &folio_out, &total_out, LZO_LEN); 305 /* The first header should not fail. */ 306 ASSERT(ret == 0); 307 308 while (cur_in < start + len) { 309 char *data_in; 310 const u32 sectorsize_mask = sectorsize - 1; 311 u32 sector_off = (cur_in - start) & sectorsize_mask; 312 u32 in_len; 313 size_t out_len; 314 315 /* Get the input page first. */ 316 if (!folio_in) { 317 ret = btrfs_compress_filemap_get_folio(mapping, cur_in, &folio_in); 318 if (ret < 0) 319 goto out; 320 } 321 322 /* Compress at most one sector of data each time. */ 323 in_len = min_t(u32, start + len - cur_in, sectorsize - sector_off); 324 ASSERT(in_len); 325 data_in = kmap_local_folio(folio_in, offset_in_folio(folio_in, cur_in)); 326 ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, &out_len, 327 workspace->mem); 328 kunmap_local(data_in); 329 if (unlikely(ret < 0)) { 330 /* lzo1x_1_compress never fails. */ 331 ret = -EIO; 332 goto out; 333 } 334 335 ret = copy_compressed_data_to_bio(fs_info, bio, workspace->cbuf, out_len, 336 &folio_out, &total_out, len); 337 if (ret < 0) 338 goto out; 339 340 cur_in += in_len; 341 342 /* 343 * Check if we're making it bigger after two sectors. And if 344 * it is so, give up. 345 */ 346 if (cur_in - start > sectorsize * 2 && cur_in - start < total_out) { 347 ret = -E2BIG; 348 goto out; 349 } 350 351 /* Check if we have reached input folio boundary. */ 352 if (IS_ALIGNED(cur_in, min_folio_size)) { 353 folio_put(folio_in); 354 folio_in = NULL; 355 } 356 } 357 /* 358 * The last folio is already queued. Bio is responsible for freeing 359 * those folios now. 360 */ 361 folio_out = NULL; 362 363 /* Store the size of all chunks of compressed data */ 364 sizes_ptr = kmap_local_folio(bio_first_folio_all(bio), 0); 365 write_compress_length(sizes_ptr, total_out); 366 kunmap_local(sizes_ptr); 367 out: 368 /* 369 * We can only free the folio that has no part queued into the bio. 370 * 371 * As any folio that is already queued into bio will be released by 372 * the endio function of bio. 373 */ 374 if (folio_out && IS_ALIGNED(total_out, min_folio_size)) { 375 btrfs_free_compr_folio(folio_out); 376 folio_out = NULL; 377 } 378 if (folio_in) 379 folio_put(folio_in); 380 return ret; 381 } 382 383 static struct folio *get_current_folio(struct compressed_bio *cb, struct folio_iter *fi, 384 u32 *cur_folio_index, u32 cur_in) 385 { 386 struct btrfs_fs_info *fs_info = cb_to_fs_info(cb); 387 const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order; 388 389 ASSERT(cur_folio_index); 390 391 /* Need to switch to the next folio. */ 392 if (cur_in >> min_folio_shift != *cur_folio_index) { 393 /* We can only do the switch one folio a time. */ 394 ASSERT(cur_in >> min_folio_shift == *cur_folio_index + 1); 395 396 bio_next_folio(fi, &cb->bbio.bio); 397 (*cur_folio_index)++; 398 } 399 return fi->folio; 400 } 401 402 /* 403 * Copy the compressed segment payload into @dest. 404 * 405 * For the payload there will be no padding, just need to do page switching. 406 */ 407 static void copy_compressed_segment(struct compressed_bio *cb, 408 struct folio_iter *fi, u32 *cur_folio_index, 409 char *dest, u32 len, u32 *cur_in) 410 { 411 u32 orig_in = *cur_in; 412 413 while (*cur_in < orig_in + len) { 414 struct folio *cur_folio = get_current_folio(cb, fi, cur_folio_index, *cur_in); 415 u32 copy_len; 416 417 ASSERT(cur_folio); 418 copy_len = min_t(u32, orig_in + len - *cur_in, 419 folio_size(cur_folio) - offset_in_folio(cur_folio, *cur_in)); 420 ASSERT(copy_len); 421 422 memcpy_from_folio(dest + *cur_in - orig_in, cur_folio, 423 offset_in_folio(cur_folio, *cur_in), copy_len); 424 425 *cur_in += copy_len; 426 } 427 } 428 429 int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) 430 { 431 struct workspace *workspace = list_entry(ws, struct workspace, list); 432 const struct btrfs_fs_info *fs_info = cb->bbio.inode->root->fs_info; 433 const u32 sectorsize = fs_info->sectorsize; 434 struct folio_iter fi; 435 char *kaddr; 436 int ret; 437 /* Compressed data length, can be unaligned */ 438 u32 len_in; 439 /* Offset inside the compressed data */ 440 u32 cur_in = 0; 441 /* Bytes decompressed so far */ 442 u32 cur_out = 0; 443 /* The current folio index number inside the bio. */ 444 u32 cur_folio_index = 0; 445 446 bio_first_folio(&fi, &cb->bbio.bio, 0); 447 /* There must be a compressed folio and matches the sectorsize. */ 448 if (unlikely(!fi.folio)) 449 return -EINVAL; 450 ASSERT(folio_size(fi.folio) == sectorsize); 451 kaddr = kmap_local_folio(fi.folio, 0); 452 len_in = read_compress_length(kaddr); 453 kunmap_local(kaddr); 454 cur_in += LZO_LEN; 455 456 /* 457 * LZO header length check 458 * 459 * The total length should not exceed the maximum extent length, 460 * and all sectors should be used. 461 * If this happens, it means the compressed extent is corrupted. 462 */ 463 if (unlikely(len_in > min_t(size_t, BTRFS_MAX_COMPRESSED, cb->compressed_len) || 464 round_up(len_in, sectorsize) < cb->compressed_len)) { 465 struct btrfs_inode *inode = cb->bbio.inode; 466 467 btrfs_err(fs_info, 468 "lzo header invalid, root %llu inode %llu offset %llu lzo len %u compressed len %u", 469 btrfs_root_id(inode->root), btrfs_ino(inode), 470 cb->start, len_in, cb->compressed_len); 471 return -EUCLEAN; 472 } 473 474 /* Go through each lzo segment */ 475 while (cur_in < len_in) { 476 struct folio *cur_folio; 477 /* Length of the compressed segment */ 478 u32 seg_len; 479 u32 sector_bytes_left; 480 size_t out_len = lzo1x_worst_compress(sectorsize); 481 482 /* 483 * We should always have enough space for one segment header 484 * inside current sector. 485 */ 486 ASSERT(cur_in / sectorsize == 487 (cur_in + LZO_LEN - 1) / sectorsize); 488 cur_folio = get_current_folio(cb, &fi, &cur_folio_index, cur_in); 489 ASSERT(cur_folio); 490 kaddr = kmap_local_folio(cur_folio, 0); 491 seg_len = read_compress_length(kaddr + offset_in_folio(cur_folio, cur_in)); 492 kunmap_local(kaddr); 493 cur_in += LZO_LEN; 494 495 if (unlikely(seg_len > workspace_cbuf_length(fs_info))) { 496 struct btrfs_inode *inode = cb->bbio.inode; 497 498 /* 499 * seg_len shouldn't be larger than we have allocated 500 * for workspace->cbuf 501 */ 502 btrfs_err(fs_info, 503 "lzo segment too big, root %llu inode %llu offset %llu len %u", 504 btrfs_root_id(inode->root), btrfs_ino(inode), 505 cb->start, seg_len); 506 return -EIO; 507 } 508 509 /* Copy the compressed segment payload into workspace */ 510 copy_compressed_segment(cb, &fi, &cur_folio_index, workspace->cbuf, 511 seg_len, &cur_in); 512 513 /* Decompress the data */ 514 ret = lzo1x_decompress_safe(workspace->cbuf, seg_len, 515 workspace->buf, &out_len); 516 if (unlikely(ret != LZO_E_OK)) { 517 struct btrfs_inode *inode = cb->bbio.inode; 518 519 btrfs_err(fs_info, 520 "lzo decompression failed, error %d root %llu inode %llu offset %llu", 521 ret, btrfs_root_id(inode->root), btrfs_ino(inode), 522 cb->start); 523 return -EIO; 524 } 525 526 /* Copy the data into inode pages */ 527 ret = btrfs_decompress_buf2page(workspace->buf, out_len, cb, cur_out); 528 cur_out += out_len; 529 530 /* All data read, exit */ 531 if (ret == 0) 532 return 0; 533 ret = 0; 534 535 /* Check if the sector has enough space for a segment header */ 536 sector_bytes_left = sectorsize - (cur_in % sectorsize); 537 if (sector_bytes_left >= LZO_LEN) 538 continue; 539 540 /* Skip the padding zeros */ 541 cur_in += sector_bytes_left; 542 } 543 544 return 0; 545 } 546 547 int lzo_decompress(struct list_head *ws, const u8 *data_in, 548 struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, 549 size_t destlen) 550 { 551 struct workspace *workspace = list_entry(ws, struct workspace, list); 552 struct btrfs_fs_info *fs_info = folio_to_fs_info(dest_folio); 553 const u32 sectorsize = fs_info->sectorsize; 554 size_t in_len; 555 size_t out_len; 556 size_t max_segment_len = workspace_buf_length(fs_info); 557 int ret; 558 559 if (unlikely(srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2)) 560 return -EUCLEAN; 561 562 in_len = read_compress_length(data_in); 563 if (unlikely(in_len != srclen)) 564 return -EUCLEAN; 565 data_in += LZO_LEN; 566 567 in_len = read_compress_length(data_in); 568 if (unlikely(in_len != srclen - LZO_LEN * 2)) 569 return -EUCLEAN; 570 data_in += LZO_LEN; 571 572 out_len = sectorsize; 573 ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); 574 if (unlikely(ret != LZO_E_OK)) { 575 struct btrfs_inode *inode = folio_to_inode(dest_folio); 576 577 btrfs_err(fs_info, 578 "lzo decompression failed, error %d root %llu inode %llu offset %llu", 579 ret, btrfs_root_id(inode->root), btrfs_ino(inode), 580 folio_pos(dest_folio)); 581 return -EIO; 582 } 583 584 ASSERT(out_len <= sectorsize); 585 memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, out_len); 586 /* Early end, considered as an error. */ 587 if (unlikely(out_len < destlen)) { 588 folio_zero_range(dest_folio, dest_pgoff + out_len, destlen - out_len); 589 return -EIO; 590 } 591 592 return 0; 593 } 594 595 const struct btrfs_compress_levels btrfs_lzo_compress = { 596 .max_level = 1, 597 .default_level = 1, 598 }; 599