1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2008 Oracle. All rights reserved.
4 */
5
6 #include <linux/kernel.h>
7 #include <linux/slab.h>
8 #include <linux/mm.h>
9 #include <linux/init.h>
10 #include <linux/err.h>
11 #include <linux/sched.h>
12 #include <linux/pagemap.h>
13 #include <linux/bio.h>
14 #include <linux/lzo.h>
15 #include <linux/refcount.h>
16 #include "messages.h"
17 #include "compression.h"
18 #include "ctree.h"
19 #include "super.h"
20 #include "btrfs_inode.h"
21
22 #define LZO_LEN 4
23
24 /*
25 * Btrfs LZO compression format
26 *
27 * Regular and inlined LZO compressed data extents consist of:
28 *
29 * 1. Header
30 * Fixed size. LZO_LEN (4) bytes long, LE32.
31 * Records the total size (including the header) of compressed data.
32 *
33 * 2. Segment(s)
34 * Variable size. Each segment includes one segment header, followed by data
35 * payload.
36 * One regular LZO compressed extent can have one or more segments.
37 * For inlined LZO compressed extent, only one segment is allowed.
38 * One segment represents at most one sector of uncompressed data.
39 *
40 * 2.1 Segment header
41 * Fixed size. LZO_LEN (4) bytes long, LE32.
42 * Records the total size of the segment (not including the header).
43 * Segment header never crosses sector boundary, thus it's possible to
44 * have at most 3 padding zeros at the end of the sector.
45 *
46 * 2.2 Data Payload
47 * Variable size. Size up limit should be lzo1x_worst_compress(sectorsize)
48 * which is 4419 for a 4KiB sectorsize.
49 *
50 * Example with 4K sectorsize:
51 * Page 1:
52 * 0 0x2 0x4 0x6 0x8 0xa 0xc 0xe 0x10
53 * 0x0000 | Header | SegHdr 01 | Data payload 01 ... |
54 * ...
55 * 0x0ff0 | SegHdr N | Data payload N ... |00|
56 * ^^ padding zeros
57 * Page 2:
58 * 0x1000 | SegHdr N+1| Data payload N+1 ... |
59 */
60
61 struct workspace {
62 void *mem;
63 void *buf; /* where decompressed data goes */
64 void *cbuf; /* where compressed data goes */
65 struct list_head list;
66 };
67
workspace_buf_length(const struct btrfs_fs_info * fs_info)68 static u32 workspace_buf_length(const struct btrfs_fs_info *fs_info)
69 {
70 return lzo1x_worst_compress(fs_info->sectorsize);
71 }
workspace_cbuf_length(const struct btrfs_fs_info * fs_info)72 static u32 workspace_cbuf_length(const struct btrfs_fs_info *fs_info)
73 {
74 return lzo1x_worst_compress(fs_info->sectorsize);
75 }
76
lzo_free_workspace(struct list_head * ws)77 void lzo_free_workspace(struct list_head *ws)
78 {
79 struct workspace *workspace = list_entry(ws, struct workspace, list);
80
81 kvfree(workspace->buf);
82 kvfree(workspace->cbuf);
83 kvfree(workspace->mem);
84 kfree(workspace);
85 }
86
lzo_alloc_workspace(struct btrfs_fs_info * fs_info)87 struct list_head *lzo_alloc_workspace(struct btrfs_fs_info *fs_info)
88 {
89 struct workspace *workspace;
90
91 workspace = kzalloc_obj(*workspace);
92 if (!workspace)
93 return ERR_PTR(-ENOMEM);
94
95 workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN);
96 workspace->buf = kvmalloc(workspace_buf_length(fs_info), GFP_KERNEL | __GFP_NOWARN);
97 workspace->cbuf = kvmalloc(workspace_cbuf_length(fs_info), GFP_KERNEL | __GFP_NOWARN);
98 if (!workspace->mem || !workspace->buf || !workspace->cbuf)
99 goto fail;
100
101 INIT_LIST_HEAD(&workspace->list);
102
103 return &workspace->list;
104 fail:
105 lzo_free_workspace(&workspace->list);
106 return ERR_PTR(-ENOMEM);
107 }
108
write_compress_length(char * buf,size_t len)109 static inline void write_compress_length(char *buf, size_t len)
110 {
111 __le32 dlen;
112
113 dlen = cpu_to_le32(len);
114 memcpy(buf, &dlen, LZO_LEN);
115 }
116
read_compress_length(const char * buf)117 static inline size_t read_compress_length(const char *buf)
118 {
119 __le32 dlen;
120
121 memcpy(&dlen, buf, LZO_LEN);
122 return le32_to_cpu(dlen);
123 }
124
125 /*
126 * Write data into @out_folio and queue it into @out_bio.
127 *
128 * Return 0 if everything is fine and @total_out will be increased.
129 * Return <0 for error.
130 *
131 * The @out_folio can be NULL after a full folio is queued.
132 * Thus the caller should check and allocate a new folio when needed.
133 */
write_and_queue_folio(struct bio * out_bio,struct folio ** out_folio,u32 * total_out,u32 write_len)134 static int write_and_queue_folio(struct bio *out_bio, struct folio **out_folio,
135 u32 *total_out, u32 write_len)
136 {
137 const u32 fsize = folio_size(*out_folio);
138 const u32 foffset = offset_in_folio(*out_folio, *total_out);
139
140 ASSERT(out_folio && *out_folio);
141 /* Should not cross folio boundary. */
142 ASSERT(foffset + write_len <= fsize);
143
144 /* We can not use bio_add_folio_nofail() which doesn't do any merge. */
145 if (!bio_add_folio(out_bio, *out_folio, write_len, foffset)) {
146 /*
147 * We have allocated a bio that havs BTRFS_MAX_COMPRESSED_PAGES
148 * vecs, and all ranges inside the same folio should have been
149 * merged. If bio_add_folio() still failed, that means we have
150 * reached the bvec limits.
151 *
152 * This should only happen at the beginning of a folio, and
153 * caller is responsible for releasing the folio, since it's
154 * not yet queued into the bio.
155 */
156 ASSERT(IS_ALIGNED(*total_out, fsize));
157 return -E2BIG;
158 }
159
160 *total_out += write_len;
161 /*
162 * The full folio has been filled and queued, reset @out_folio to NULL,
163 * so that error handling is fully handled by the bio.
164 */
165 if (IS_ALIGNED(*total_out, fsize))
166 *out_folio = NULL;
167 return 0;
168 }
169
170 /*
171 * Copy compressed data to bio.
172 *
173 * @out_bio: The bio that will contain all the compressed data.
174 * @compressed_data: The compressed data of this segment.
175 * @compressed_size: The size of the compressed data.
176 * @out_folio: The current output folio, will be updated if a new
177 * folio is allocated.
178 * @total_out: The total bytes of current output.
179 * @max_out: The maximum size of the compressed data.
180 *
181 * Will do:
182 *
183 * - Write a segment header into the destination
184 * - Copy the compressed buffer into the destination
185 * - Make sure we have enough space in the last sector to fit a segment header
186 * If not, we will pad at most (LZO_LEN (4)) - 1 bytes of zeros.
187 * - If a full folio is filled, it will be queued into @out_bio, and @out_folio
188 * will be updated.
189 *
190 * Will allocate new pages when needed.
191 */
copy_compressed_data_to_bio(struct btrfs_fs_info * fs_info,struct bio * out_bio,const char * compressed_data,size_t compressed_size,struct folio ** out_folio,u32 * total_out,u32 max_out)192 static int copy_compressed_data_to_bio(struct btrfs_fs_info *fs_info,
193 struct bio *out_bio,
194 const char *compressed_data,
195 size_t compressed_size,
196 struct folio **out_folio,
197 u32 *total_out, u32 max_out)
198 {
199 const u32 sectorsize = fs_info->sectorsize;
200 const u32 sectorsize_bits = fs_info->sectorsize_bits;
201 const u32 fsize = btrfs_min_folio_size(fs_info);
202 const u32 old_size = out_bio->bi_iter.bi_size;
203 u32 copy_start;
204 u32 sector_bytes_left;
205 char *kaddr;
206 int ret;
207
208 ASSERT(out_folio);
209
210 /* There should be at least a lzo header queued. */
211 ASSERT(old_size);
212 ASSERT(old_size == *total_out);
213
214 /*
215 * We never allow a segment header crossing sector boundary, previous
216 * run should ensure we have enough space left inside the sector.
217 */
218 ASSERT((old_size >> sectorsize_bits) == (old_size + LZO_LEN - 1) >> sectorsize_bits);
219
220 if (!*out_folio) {
221 *out_folio = btrfs_alloc_compr_folio(fs_info);
222 if (!*out_folio)
223 return -ENOMEM;
224 }
225
226 /* Write the segment header first. */
227 kaddr = kmap_local_folio(*out_folio, offset_in_folio(*out_folio, *total_out));
228 write_compress_length(kaddr, compressed_size);
229 kunmap_local(kaddr);
230 ret = write_and_queue_folio(out_bio, out_folio, total_out, LZO_LEN);
231 if (ret < 0)
232 return ret;
233
234 copy_start = *total_out;
235
236 /* Copy compressed data. */
237 while (*total_out - copy_start < compressed_size) {
238 u32 copy_len = min_t(u32, sectorsize - *total_out % sectorsize,
239 copy_start + compressed_size - *total_out);
240 u32 foffset = *total_out & (fsize - 1);
241
242 /* With the range copied, we're larger than the original range. */
243 if (((*total_out + copy_len) >> sectorsize_bits) >=
244 max_out >> sectorsize_bits)
245 return -E2BIG;
246
247 if (!*out_folio) {
248 *out_folio = btrfs_alloc_compr_folio(fs_info);
249 if (!*out_folio)
250 return -ENOMEM;
251 }
252
253 kaddr = kmap_local_folio(*out_folio, foffset);
254 memcpy(kaddr, compressed_data + *total_out - copy_start, copy_len);
255 kunmap_local(kaddr);
256 ret = write_and_queue_folio(out_bio, out_folio, total_out, copy_len);
257 if (ret < 0)
258 return ret;
259 }
260
261 /*
262 * Check if we can fit the next segment header into the remaining space
263 * of the sector.
264 */
265 sector_bytes_left = round_up(*total_out, sectorsize) - *total_out;
266 if (sector_bytes_left >= LZO_LEN || sector_bytes_left == 0)
267 return 0;
268
269 ASSERT(*out_folio);
270
271 /* The remaining size is not enough, pad it with zeros */
272 folio_zero_range(*out_folio, offset_in_folio(*out_folio, *total_out), sector_bytes_left);
273 return write_and_queue_folio(out_bio, out_folio, total_out, sector_bytes_left);
274 }
275
lzo_compress_bio(struct list_head * ws,struct compressed_bio * cb)276 int lzo_compress_bio(struct list_head *ws, struct compressed_bio *cb)
277 {
278 struct btrfs_inode *inode = cb->bbio.inode;
279 struct btrfs_fs_info *fs_info = inode->root->fs_info;
280 struct workspace *workspace = list_entry(ws, struct workspace, list);
281 struct bio *bio = &cb->bbio.bio;
282 const u64 start = cb->start;
283 const u32 len = cb->len;
284 const u32 sectorsize = fs_info->sectorsize;
285 const u32 min_folio_size = btrfs_min_folio_size(fs_info);
286 struct address_space *mapping = inode->vfs_inode.i_mapping;
287 struct folio *folio_in = NULL;
288 struct folio *folio_out = NULL;
289 char *sizes_ptr;
290 int ret = 0;
291 /* Points to the file offset of input data. */
292 u64 cur_in = start;
293 /* Points to the current output byte. */
294 u32 total_out = 0;
295
296 ASSERT(bio->bi_iter.bi_size == 0);
297 ASSERT(len);
298
299 folio_out = btrfs_alloc_compr_folio(fs_info);
300 if (!folio_out)
301 return -ENOMEM;
302
303 /* Queue a segment header first. */
304 ret = write_and_queue_folio(bio, &folio_out, &total_out, LZO_LEN);
305 /* The first header should not fail. */
306 ASSERT(ret == 0);
307
308 while (cur_in < start + len) {
309 char *data_in;
310 const u32 sectorsize_mask = sectorsize - 1;
311 u32 sector_off = (cur_in - start) & sectorsize_mask;
312 u32 in_len;
313 size_t out_len;
314
315 /* Get the input page first. */
316 if (!folio_in) {
317 ret = btrfs_compress_filemap_get_folio(mapping, cur_in, &folio_in);
318 if (ret < 0)
319 goto out;
320 }
321
322 /* Compress at most one sector of data each time. */
323 in_len = min_t(u32, start + len - cur_in, sectorsize - sector_off);
324 ASSERT(in_len);
325 data_in = kmap_local_folio(folio_in, offset_in_folio(folio_in, cur_in));
326 ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, &out_len,
327 workspace->mem);
328 kunmap_local(data_in);
329 if (unlikely(ret < 0)) {
330 /* lzo1x_1_compress never fails. */
331 ret = -EIO;
332 goto out;
333 }
334
335 ret = copy_compressed_data_to_bio(fs_info, bio, workspace->cbuf, out_len,
336 &folio_out, &total_out, len);
337 if (ret < 0)
338 goto out;
339
340 cur_in += in_len;
341
342 /*
343 * Check if we're making it bigger after two sectors. And if
344 * it is so, give up.
345 */
346 if (cur_in - start > sectorsize * 2 && cur_in - start < total_out) {
347 ret = -E2BIG;
348 goto out;
349 }
350
351 /* Check if we have reached input folio boundary. */
352 if (IS_ALIGNED(cur_in, min_folio_size)) {
353 folio_put(folio_in);
354 folio_in = NULL;
355 }
356 }
357 /*
358 * The last folio is already queued. Bio is responsible for freeing
359 * those folios now.
360 */
361 folio_out = NULL;
362
363 /* Store the size of all chunks of compressed data */
364 sizes_ptr = kmap_local_folio(bio_first_folio_all(bio), 0);
365 write_compress_length(sizes_ptr, total_out);
366 kunmap_local(sizes_ptr);
367 out:
368 /*
369 * We can only free the folio that has no part queued into the bio.
370 *
371 * As any folio that is already queued into bio will be released by
372 * the endio function of bio.
373 */
374 if (folio_out && IS_ALIGNED(total_out, min_folio_size)) {
375 btrfs_free_compr_folio(folio_out);
376 folio_out = NULL;
377 }
378 if (folio_in)
379 folio_put(folio_in);
380 return ret;
381 }
382
get_current_folio(struct compressed_bio * cb,struct folio_iter * fi,u32 * cur_folio_index,u32 cur_in)383 static struct folio *get_current_folio(struct compressed_bio *cb, struct folio_iter *fi,
384 u32 *cur_folio_index, u32 cur_in)
385 {
386 struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
387 const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
388
389 ASSERT(cur_folio_index);
390
391 /* Need to switch to the next folio. */
392 if (cur_in >> min_folio_shift != *cur_folio_index) {
393 /* We can only do the switch one folio a time. */
394 ASSERT(cur_in >> min_folio_shift == *cur_folio_index + 1);
395
396 bio_next_folio(fi, &cb->bbio.bio);
397 (*cur_folio_index)++;
398 }
399 return fi->folio;
400 }
401
402 /*
403 * Copy the compressed segment payload into @dest.
404 *
405 * For the payload there will be no padding, just need to do page switching.
406 */
copy_compressed_segment(struct compressed_bio * cb,struct folio_iter * fi,u32 * cur_folio_index,char * dest,u32 len,u32 * cur_in)407 static void copy_compressed_segment(struct compressed_bio *cb,
408 struct folio_iter *fi, u32 *cur_folio_index,
409 char *dest, u32 len, u32 *cur_in)
410 {
411 u32 orig_in = *cur_in;
412
413 while (*cur_in < orig_in + len) {
414 struct folio *cur_folio = get_current_folio(cb, fi, cur_folio_index, *cur_in);
415 u32 copy_len;
416
417 ASSERT(cur_folio);
418 copy_len = min_t(u32, orig_in + len - *cur_in,
419 folio_size(cur_folio) - offset_in_folio(cur_folio, *cur_in));
420 ASSERT(copy_len);
421
422 memcpy_from_folio(dest + *cur_in - orig_in, cur_folio,
423 offset_in_folio(cur_folio, *cur_in), copy_len);
424
425 *cur_in += copy_len;
426 }
427 }
428
lzo_decompress_bio(struct list_head * ws,struct compressed_bio * cb)429 int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
430 {
431 struct workspace *workspace = list_entry(ws, struct workspace, list);
432 const struct btrfs_fs_info *fs_info = cb->bbio.inode->root->fs_info;
433 const u32 sectorsize = fs_info->sectorsize;
434 struct folio_iter fi;
435 char *kaddr;
436 int ret;
437 /* Compressed data length, can be unaligned */
438 u32 len_in;
439 /* Offset inside the compressed data */
440 u32 cur_in = 0;
441 /* Bytes decompressed so far */
442 u32 cur_out = 0;
443 /* The current folio index number inside the bio. */
444 u32 cur_folio_index = 0;
445
446 bio_first_folio(&fi, &cb->bbio.bio, 0);
447 /* There must be a compressed folio and matches the sectorsize. */
448 if (unlikely(!fi.folio))
449 return -EINVAL;
450 ASSERT(folio_size(fi.folio) == sectorsize);
451 kaddr = kmap_local_folio(fi.folio, 0);
452 len_in = read_compress_length(kaddr);
453 kunmap_local(kaddr);
454 cur_in += LZO_LEN;
455
456 /*
457 * LZO header length check
458 *
459 * The total length should not exceed the maximum extent length,
460 * and all sectors should be used.
461 * If this happens, it means the compressed extent is corrupted.
462 */
463 if (unlikely(len_in > min_t(size_t, BTRFS_MAX_COMPRESSED, cb->compressed_len) ||
464 round_up(len_in, sectorsize) < cb->compressed_len)) {
465 struct btrfs_inode *inode = cb->bbio.inode;
466
467 btrfs_err(fs_info,
468 "lzo header invalid, root %llu inode %llu offset %llu lzo len %u compressed len %u",
469 btrfs_root_id(inode->root), btrfs_ino(inode),
470 cb->start, len_in, cb->compressed_len);
471 return -EUCLEAN;
472 }
473
474 /* Go through each lzo segment */
475 while (cur_in < len_in) {
476 struct folio *cur_folio;
477 /* Length of the compressed segment */
478 u32 seg_len;
479 u32 sector_bytes_left;
480 size_t out_len = lzo1x_worst_compress(sectorsize);
481
482 /*
483 * We should always have enough space for one segment header
484 * inside current sector.
485 */
486 ASSERT(cur_in / sectorsize ==
487 (cur_in + LZO_LEN - 1) / sectorsize);
488 cur_folio = get_current_folio(cb, &fi, &cur_folio_index, cur_in);
489 ASSERT(cur_folio);
490 kaddr = kmap_local_folio(cur_folio, 0);
491 seg_len = read_compress_length(kaddr + offset_in_folio(cur_folio, cur_in));
492 kunmap_local(kaddr);
493 cur_in += LZO_LEN;
494
495 if (unlikely(seg_len > workspace_cbuf_length(fs_info))) {
496 struct btrfs_inode *inode = cb->bbio.inode;
497
498 /*
499 * seg_len shouldn't be larger than we have allocated
500 * for workspace->cbuf
501 */
502 btrfs_err(fs_info,
503 "lzo segment too big, root %llu inode %llu offset %llu len %u",
504 btrfs_root_id(inode->root), btrfs_ino(inode),
505 cb->start, seg_len);
506 return -EIO;
507 }
508
509 /* Copy the compressed segment payload into workspace */
510 copy_compressed_segment(cb, &fi, &cur_folio_index, workspace->cbuf,
511 seg_len, &cur_in);
512
513 /* Decompress the data */
514 ret = lzo1x_decompress_safe(workspace->cbuf, seg_len,
515 workspace->buf, &out_len);
516 if (unlikely(ret != LZO_E_OK)) {
517 struct btrfs_inode *inode = cb->bbio.inode;
518
519 btrfs_err(fs_info,
520 "lzo decompression failed, error %d root %llu inode %llu offset %llu",
521 ret, btrfs_root_id(inode->root), btrfs_ino(inode),
522 cb->start);
523 return -EIO;
524 }
525
526 /* Copy the data into inode pages */
527 ret = btrfs_decompress_buf2page(workspace->buf, out_len, cb, cur_out);
528 cur_out += out_len;
529
530 /* All data read, exit */
531 if (ret == 0)
532 return 0;
533 ret = 0;
534
535 /* Check if the sector has enough space for a segment header */
536 sector_bytes_left = sectorsize - (cur_in % sectorsize);
537 if (sector_bytes_left >= LZO_LEN)
538 continue;
539
540 /* Skip the padding zeros */
541 cur_in += sector_bytes_left;
542 }
543
544 return 0;
545 }
546
lzo_decompress(struct list_head * ws,const u8 * data_in,struct folio * dest_folio,unsigned long dest_pgoff,size_t srclen,size_t destlen)547 int lzo_decompress(struct list_head *ws, const u8 *data_in,
548 struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
549 size_t destlen)
550 {
551 struct workspace *workspace = list_entry(ws, struct workspace, list);
552 struct btrfs_fs_info *fs_info = folio_to_fs_info(dest_folio);
553 const u32 sectorsize = fs_info->sectorsize;
554 size_t in_len;
555 size_t out_len;
556 size_t max_segment_len = workspace_buf_length(fs_info);
557 int ret;
558
559 if (unlikely(srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2))
560 return -EUCLEAN;
561
562 in_len = read_compress_length(data_in);
563 if (unlikely(in_len != srclen))
564 return -EUCLEAN;
565 data_in += LZO_LEN;
566
567 in_len = read_compress_length(data_in);
568 if (unlikely(in_len != srclen - LZO_LEN * 2))
569 return -EUCLEAN;
570 data_in += LZO_LEN;
571
572 out_len = sectorsize;
573 ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
574 if (unlikely(ret != LZO_E_OK)) {
575 struct btrfs_inode *inode = folio_to_inode(dest_folio);
576
577 btrfs_err(fs_info,
578 "lzo decompression failed, error %d root %llu inode %llu offset %llu",
579 ret, btrfs_root_id(inode->root), btrfs_ino(inode),
580 folio_pos(dest_folio));
581 return -EIO;
582 }
583
584 ASSERT(out_len <= sectorsize);
585 memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, out_len);
586 /* Early end, considered as an error. */
587 if (unlikely(out_len < destlen)) {
588 folio_zero_range(dest_folio, dest_pgoff + out_len, destlen - out_len);
589 return -EIO;
590 }
591
592 return 0;
593 }
594
595 const struct btrfs_compress_levels btrfs_lzo_compress = {
596 .max_level = 1,
597 .default_level = 1,
598 };
599