1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2016-present, Facebook, Inc.
4 * All rights reserved.
5 *
6 */
7
8 #include <linux/bio.h>
9 #include <linux/bitmap.h>
10 #include <linux/err.h>
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/mm.h>
14 #include <linux/sched/mm.h>
15 #include <linux/pagemap.h>
16 #include <linux/refcount.h>
17 #include <linux/sched.h>
18 #include <linux/slab.h>
19 #include <linux/zstd.h>
20 #include "misc.h"
21 #include "fs.h"
22 #include "btrfs_inode.h"
23 #include "compression.h"
24 #include "super.h"
25
26 #define ZSTD_BTRFS_MAX_WINDOWLOG 17
27 #define ZSTD_BTRFS_MAX_INPUT (1U << ZSTD_BTRFS_MAX_WINDOWLOG)
28 #define ZSTD_BTRFS_DEFAULT_LEVEL 3
29 #define ZSTD_BTRFS_MIN_LEVEL -15
30 #define ZSTD_BTRFS_MAX_LEVEL 15
31 /* 307s to avoid pathologically clashing with transaction commit */
32 #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
33
zstd_get_btrfs_parameters(int level,size_t src_len)34 static zstd_parameters zstd_get_btrfs_parameters(int level,
35 size_t src_len)
36 {
37 zstd_parameters params = zstd_get_params(level, src_len);
38
39 if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
40 params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
41 WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT);
42 return params;
43 }
44
45 struct workspace {
46 void *mem;
47 size_t size;
48 char *buf;
49 int level;
50 int req_level;
51 unsigned long last_used; /* jiffies */
52 struct list_head list;
53 struct list_head lru_list;
54 zstd_in_buffer in_buf;
55 zstd_out_buffer out_buf;
56 zstd_parameters params;
57 };
58
59 /*
60 * Zstd Workspace Management
61 *
62 * Zstd workspaces have different memory requirements depending on the level.
63 * The zstd workspaces are managed by having individual lists for each level
64 * and a global lru. Forward progress is maintained by protecting a max level
65 * workspace.
66 *
67 * Getting a workspace is done by using the bitmap to identify the levels that
68 * have available workspaces and scans up. This lets us recycle higher level
69 * workspaces because of the monotonic memory guarantee. A workspace's
70 * last_used is only updated if it is being used by the corresponding memory
71 * level. Putting a workspace involves adding it back to the appropriate places
72 * and adding it back to the lru if necessary.
73 *
74 * A timer is used to reclaim workspaces if they have not been used for
75 * ZSTD_BTRFS_RECLAIM_JIFFIES. This helps keep only active workspaces around.
76 * The upper bound is provided by the workqueue limit which is 2 (percpu limit).
77 */
78
79 struct zstd_workspace_manager {
80 spinlock_t lock;
81 struct list_head lru_list;
82 struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL];
83 unsigned long active_map;
84 wait_queue_head_t wait;
85 struct timer_list timer;
86 };
87
88 static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL];
89
list_to_workspace(struct list_head * list)90 static inline struct workspace *list_to_workspace(struct list_head *list)
91 {
92 return container_of(list, struct workspace, list);
93 }
94
clip_level(int level)95 static inline int clip_level(int level)
96 {
97 return max(0, level - 1);
98 }
99
100 /*
101 * Timer callback to free unused workspaces.
102 *
103 * @t: timer
104 *
105 * This scans the lru_list and attempts to reclaim any workspace that hasn't
106 * been used for ZSTD_BTRFS_RECLAIM_JIFFIES.
107 *
108 * The context is softirq and does not need the _bh locking primitives.
109 */
zstd_reclaim_timer_fn(struct timer_list * timer)110 static void zstd_reclaim_timer_fn(struct timer_list *timer)
111 {
112 struct zstd_workspace_manager *zwsm =
113 container_of(timer, struct zstd_workspace_manager, timer);
114 unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
115 struct list_head *pos, *next;
116
117 spin_lock(&zwsm->lock);
118
119 if (list_empty(&zwsm->lru_list)) {
120 spin_unlock(&zwsm->lock);
121 return;
122 }
123
124 list_for_each_prev_safe(pos, next, &zwsm->lru_list) {
125 struct workspace *victim = container_of(pos, struct workspace,
126 lru_list);
127 int level;
128
129 if (time_after(victim->last_used, reclaim_threshold))
130 break;
131
132 /* workspace is in use */
133 if (victim->req_level)
134 continue;
135
136 level = victim->level;
137 list_del(&victim->lru_list);
138 list_del(&victim->list);
139 zstd_free_workspace(&victim->list);
140
141 if (list_empty(&zwsm->idle_ws[level]))
142 clear_bit(level, &zwsm->active_map);
143
144 }
145
146 if (!list_empty(&zwsm->lru_list))
147 mod_timer(&zwsm->timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
148
149 spin_unlock(&zwsm->lock);
150 }
151
152 /*
153 * Calculate monotonic memory bounds.
154 *
155 * It is possible based on the level configurations that a higher level
156 * workspace uses less memory than a lower level workspace. In order to reuse
157 * workspaces, this must be made a monotonic relationship. This precomputes
158 * the required memory for each level and enforces the monotonicity between
159 * level and memory required.
160 */
zstd_calc_ws_mem_sizes(void)161 static void zstd_calc_ws_mem_sizes(void)
162 {
163 size_t max_size = 0;
164 int level;
165
166 for (level = ZSTD_BTRFS_MIN_LEVEL; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
167 if (level == 0)
168 continue;
169 zstd_parameters params =
170 zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
171 size_t level_size =
172 max_t(size_t,
173 zstd_cstream_workspace_bound(¶ms.cParams),
174 zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT));
175
176 max_size = max_t(size_t, max_size, level_size);
177 /* Use level 1 workspace size for all the fast mode negative levels. */
178 zstd_ws_mem_sizes[clip_level(level)] = max_size;
179 }
180 }
181
zstd_alloc_workspace_manager(struct btrfs_fs_info * fs_info)182 int zstd_alloc_workspace_manager(struct btrfs_fs_info *fs_info)
183 {
184 struct zstd_workspace_manager *zwsm;
185 struct list_head *ws;
186
187 ASSERT(fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] == NULL);
188 zwsm = kzalloc_obj(*zwsm);
189 if (!zwsm)
190 return -ENOMEM;
191 zstd_calc_ws_mem_sizes();
192 spin_lock_init(&zwsm->lock);
193 init_waitqueue_head(&zwsm->wait);
194 timer_setup(&zwsm->timer, zstd_reclaim_timer_fn, 0);
195
196 INIT_LIST_HEAD(&zwsm->lru_list);
197 for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
198 INIT_LIST_HEAD(&zwsm->idle_ws[i]);
199 fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = zwsm;
200
201 ws = zstd_alloc_workspace(fs_info, ZSTD_BTRFS_MAX_LEVEL);
202 if (IS_ERR(ws)) {
203 btrfs_warn(NULL, "cannot preallocate zstd compression workspace");
204 } else {
205 set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &zwsm->active_map);
206 list_add(ws, &zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
207 }
208 return 0;
209 }
210
zstd_free_workspace_manager(struct btrfs_fs_info * fs_info)211 void zstd_free_workspace_manager(struct btrfs_fs_info *fs_info)
212 {
213 struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
214 struct workspace *workspace;
215
216 if (!zwsm)
217 return;
218 fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = NULL;
219 spin_lock_bh(&zwsm->lock);
220 for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
221 while (!list_empty(&zwsm->idle_ws[i])) {
222 workspace = container_of(zwsm->idle_ws[i].next,
223 struct workspace, list);
224 list_del(&workspace->list);
225 list_del(&workspace->lru_list);
226 zstd_free_workspace(&workspace->list);
227 }
228 }
229 spin_unlock_bh(&zwsm->lock);
230 timer_delete_sync(&zwsm->timer);
231 kfree(zwsm);
232 }
233
234 /*
235 * Find workspace for given level.
236 *
237 * @level: compression level
238 *
239 * This iterates over the set bits in the active_map beginning at the requested
240 * compression level. This lets us utilize already allocated workspaces before
241 * allocating a new one. If the workspace is of a larger size, it is used, but
242 * the place in the lru_list and last_used times are not updated. This is to
243 * offer the opportunity to reclaim the workspace in favor of allocating an
244 * appropriately sized one in the future.
245 */
zstd_find_workspace(struct btrfs_fs_info * fs_info,int level)246 static struct list_head *zstd_find_workspace(struct btrfs_fs_info *fs_info, int level)
247 {
248 struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
249 struct list_head *ws;
250 struct workspace *workspace;
251 int i = clip_level(level);
252
253 ASSERT(zwsm);
254 spin_lock_bh(&zwsm->lock);
255 for_each_set_bit_from(i, &zwsm->active_map, ZSTD_BTRFS_MAX_LEVEL) {
256 if (!list_empty(&zwsm->idle_ws[i])) {
257 ws = zwsm->idle_ws[i].next;
258 workspace = list_to_workspace(ws);
259 list_del_init(ws);
260 /* keep its place if it's a lower level using this */
261 workspace->req_level = level;
262 if (clip_level(level) == workspace->level)
263 list_del(&workspace->lru_list);
264 if (list_empty(&zwsm->idle_ws[i]))
265 clear_bit(i, &zwsm->active_map);
266 spin_unlock_bh(&zwsm->lock);
267 return ws;
268 }
269 }
270 spin_unlock_bh(&zwsm->lock);
271
272 return NULL;
273 }
274
275 /*
276 * Zstd get_workspace for level.
277 *
278 * @level: compression level
279 *
280 * If @level is 0, then any compression level can be used. Therefore, we begin
281 * scanning from 1. We first scan through possible workspaces and then after
282 * attempt to allocate a new workspace. If we fail to allocate one due to
283 * memory pressure, go to sleep waiting for the max level workspace to free up.
284 */
zstd_get_workspace(struct btrfs_fs_info * fs_info,int level)285 struct list_head *zstd_get_workspace(struct btrfs_fs_info *fs_info, int level)
286 {
287 struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
288 struct list_head *ws;
289 unsigned int nofs_flag;
290
291 ASSERT(zwsm);
292
293 /* level == 0 means we can use any workspace */
294 if (!level)
295 level = 1;
296
297 again:
298 ws = zstd_find_workspace(fs_info, level);
299 if (ws)
300 return ws;
301
302 nofs_flag = memalloc_nofs_save();
303 ws = zstd_alloc_workspace(fs_info, level);
304 memalloc_nofs_restore(nofs_flag);
305
306 if (IS_ERR(ws)) {
307 DEFINE_WAIT(wait);
308
309 prepare_to_wait(&zwsm->wait, &wait, TASK_UNINTERRUPTIBLE);
310 schedule();
311 finish_wait(&zwsm->wait, &wait);
312
313 goto again;
314 }
315
316 return ws;
317 }
318
319 /*
320 * Zstd put_workspace.
321 *
322 * @ws: list_head for the workspace
323 *
324 * When putting back a workspace, we only need to update the LRU if we are of
325 * the requested compression level. Here is where we continue to protect the
326 * max level workspace or update last_used accordingly. If the reclaim timer
327 * isn't set, it is also set here. Only the max level workspace tries and wakes
328 * up waiting workspaces.
329 */
zstd_put_workspace(struct btrfs_fs_info * fs_info,struct list_head * ws)330 void zstd_put_workspace(struct btrfs_fs_info *fs_info, struct list_head *ws)
331 {
332 struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
333 struct workspace *workspace = list_to_workspace(ws);
334
335 ASSERT(zwsm);
336 spin_lock_bh(&zwsm->lock);
337
338 /* A node is only taken off the lru if we are the corresponding level */
339 if (clip_level(workspace->req_level) == workspace->level) {
340 /* Hide a max level workspace from reclaim */
341 if (list_empty(&zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
342 INIT_LIST_HEAD(&workspace->lru_list);
343 } else {
344 workspace->last_used = jiffies;
345 list_add(&workspace->lru_list, &zwsm->lru_list);
346 if (!timer_pending(&zwsm->timer))
347 mod_timer(&zwsm->timer,
348 jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
349 }
350 }
351
352 set_bit(workspace->level, &zwsm->active_map);
353 list_add(&workspace->list, &zwsm->idle_ws[workspace->level]);
354 workspace->req_level = 0;
355
356 spin_unlock_bh(&zwsm->lock);
357
358 if (workspace->level == clip_level(ZSTD_BTRFS_MAX_LEVEL))
359 cond_wake_up(&zwsm->wait);
360 }
361
zstd_free_workspace(struct list_head * ws)362 void zstd_free_workspace(struct list_head *ws)
363 {
364 struct workspace *workspace = list_entry(ws, struct workspace, list);
365
366 kvfree(workspace->mem);
367 kfree(workspace->buf);
368 kfree(workspace);
369 }
370
zstd_alloc_workspace(struct btrfs_fs_info * fs_info,int level)371 struct list_head *zstd_alloc_workspace(struct btrfs_fs_info *fs_info, int level)
372 {
373 const u32 blocksize = fs_info->sectorsize;
374 struct workspace *workspace;
375
376 workspace = kzalloc_obj(*workspace);
377 if (!workspace)
378 return ERR_PTR(-ENOMEM);
379
380 /* Use level 1 workspace size for all the fast mode negative levels. */
381 workspace->size = zstd_ws_mem_sizes[clip_level(level)];
382 workspace->level = clip_level(level);
383 workspace->req_level = level;
384 workspace->last_used = jiffies;
385 workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN);
386 workspace->buf = kmalloc(blocksize, GFP_KERNEL);
387 if (!workspace->mem || !workspace->buf)
388 goto fail;
389
390 INIT_LIST_HEAD(&workspace->list);
391 INIT_LIST_HEAD(&workspace->lru_list);
392
393 return &workspace->list;
394 fail:
395 zstd_free_workspace(&workspace->list);
396 return ERR_PTR(-ENOMEM);
397 }
398
zstd_compress_bio(struct list_head * ws,struct compressed_bio * cb)399 int zstd_compress_bio(struct list_head *ws, struct compressed_bio *cb)
400 {
401 struct btrfs_inode *inode = cb->bbio.inode;
402 struct btrfs_fs_info *fs_info = inode->root->fs_info;
403 struct workspace *workspace = list_entry(ws, struct workspace, list);
404 struct address_space *mapping = inode->vfs_inode.i_mapping;
405 struct bio *bio = &cb->bbio.bio;
406 zstd_cstream *stream;
407 int ret = 0;
408 /* The current folio to read. */
409 struct folio *in_folio = NULL;
410 /* The current folio to write to. */
411 struct folio *out_folio = NULL;
412 unsigned long tot_in = 0;
413 unsigned long tot_out = 0;
414 const u64 start = cb->start;
415 const u32 len = cb->len;
416 const u64 end = start + len;
417 const u32 blocksize = fs_info->sectorsize;
418 const u32 min_folio_size = btrfs_min_folio_size(fs_info);
419
420 workspace->params = zstd_get_btrfs_parameters(workspace->req_level, len);
421
422 /* Initialize the stream. */
423 stream = zstd_init_cstream(&workspace->params, len, workspace->mem, workspace->size);
424 if (unlikely(!stream)) {
425 btrfs_err(fs_info,
426 "zstd compression init level %d failed, root %llu inode %llu offset %llu",
427 workspace->req_level, btrfs_root_id(inode->root),
428 btrfs_ino(inode), start);
429 ret = -EIO;
430 goto out;
431 }
432
433 /* Map in the first page of input data. */
434 ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
435 if (ret < 0)
436 goto out;
437 workspace->in_buf.src = kmap_local_folio(in_folio, offset_in_folio(in_folio, start));
438 workspace->in_buf.pos = 0;
439 workspace->in_buf.size = btrfs_calc_input_length(in_folio, end, start);
440
441 /* Allocate and map in the output buffer. */
442 out_folio = btrfs_alloc_compr_folio(fs_info);
443 if (out_folio == NULL) {
444 ret = -ENOMEM;
445 goto out;
446 }
447 workspace->out_buf.dst = folio_address(out_folio);
448 workspace->out_buf.pos = 0;
449 workspace->out_buf.size = min_folio_size;
450
451 while (1) {
452 size_t ret2;
453
454 ret2 = zstd_compress_stream(stream, &workspace->out_buf, &workspace->in_buf);
455 if (unlikely(zstd_is_error(ret2))) {
456 btrfs_warn(fs_info,
457 "zstd compression level %d failed, error %d root %llu inode %llu offset %llu",
458 workspace->req_level, zstd_get_error_code(ret2),
459 btrfs_root_id(inode->root), btrfs_ino(inode),
460 start + tot_in);
461 ret = -EIO;
462 goto out;
463 }
464
465 /* Check to see if we are making it bigger. */
466 if (tot_in + workspace->in_buf.pos > blocksize * 2 &&
467 tot_in + workspace->in_buf.pos < tot_out + workspace->out_buf.pos) {
468 ret = -E2BIG;
469 goto out;
470 }
471
472 /* Check if we need more output space. */
473 if (workspace->out_buf.pos >= workspace->out_buf.size) {
474 tot_out += min_folio_size;
475 if (tot_out >= len) {
476 ret = -E2BIG;
477 goto out;
478 }
479 /* Queue the current foliot into the bio. */
480 if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) {
481 ret = -E2BIG;
482 goto out;
483 }
484
485 out_folio = btrfs_alloc_compr_folio(fs_info);
486 if (out_folio == NULL) {
487 ret = -ENOMEM;
488 goto out;
489 }
490 workspace->out_buf.dst = folio_address(out_folio);
491 workspace->out_buf.pos = 0;
492 workspace->out_buf.size = min_folio_size;
493 }
494
495 /* We've reached the end of the input. */
496 if (tot_in + workspace->in_buf.pos >= len) {
497 tot_in += workspace->in_buf.pos;
498 break;
499 }
500
501 /* Check if we need more input. */
502 if (workspace->in_buf.pos >= workspace->in_buf.size) {
503 u64 cur;
504
505 tot_in += workspace->in_buf.size;
506 cur = start + tot_in;
507
508 kunmap_local(workspace->in_buf.src);
509 workspace->in_buf.src = NULL;
510 folio_put(in_folio);
511
512 ret = btrfs_compress_filemap_get_folio(mapping, cur, &in_folio);
513 if (ret < 0)
514 goto out;
515 workspace->in_buf.src = kmap_local_folio(in_folio,
516 offset_in_folio(in_folio, cur));
517 workspace->in_buf.pos = 0;
518 workspace->in_buf.size = btrfs_calc_input_length(in_folio, end, cur);
519 }
520 }
521
522 while (1) {
523 size_t ret2;
524
525 ret2 = zstd_end_stream(stream, &workspace->out_buf);
526 if (unlikely(zstd_is_error(ret2))) {
527 btrfs_err(fs_info,
528 "zstd compression end level %d failed, error %d root %llu inode %llu offset %llu",
529 workspace->req_level, zstd_get_error_code(ret2),
530 btrfs_root_id(inode->root), btrfs_ino(inode),
531 start + tot_in);
532 ret = -EIO;
533 goto out;
534 }
535 /* Queue the remaining part of the output folio into bio. */
536 if (ret2 == 0) {
537 tot_out += workspace->out_buf.pos;
538 if (tot_out >= len) {
539 ret = -E2BIG;
540 goto out;
541 }
542 if (!bio_add_folio(bio, out_folio, workspace->out_buf.pos, 0)) {
543 ret = -E2BIG;
544 goto out;
545 }
546 out_folio = NULL;
547 break;
548 }
549 tot_out += min_folio_size;
550 if (tot_out >= len) {
551 ret = -E2BIG;
552 goto out;
553 }
554 if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) {
555 ret = -E2BIG;
556 goto out;
557 }
558 out_folio = btrfs_alloc_compr_folio(fs_info);
559 if (out_folio == NULL) {
560 ret = -ENOMEM;
561 goto out;
562 }
563 workspace->out_buf.dst = folio_address(out_folio);
564 workspace->out_buf.pos = 0;
565 workspace->out_buf.size = min_folio_size;
566 }
567
568 if (tot_out >= tot_in) {
569 ret = -E2BIG;
570 goto out;
571 }
572
573 ret = 0;
574 ASSERT(tot_out == bio->bi_iter.bi_size);
575 out:
576 if (out_folio)
577 btrfs_free_compr_folio(out_folio);
578 if (workspace->in_buf.src) {
579 kunmap_local(workspace->in_buf.src);
580 folio_put(in_folio);
581 }
582 return ret;
583 }
584
zstd_decompress_bio(struct list_head * ws,struct compressed_bio * cb)585 int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
586 {
587 struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
588 struct workspace *workspace = list_entry(ws, struct workspace, list);
589 struct folio_iter fi;
590 size_t srclen = cb->compressed_len;
591 zstd_dstream *stream;
592 int ret = 0;
593 const u32 blocksize = fs_info->sectorsize;
594 const unsigned int min_folio_size = btrfs_min_folio_size(fs_info);
595 unsigned long folio_in_index = 0;
596 unsigned long total_folios_in = DIV_ROUND_UP(srclen, min_folio_size);
597 unsigned long buf_start;
598 unsigned long total_out = 0;
599
600 bio_first_folio(&fi, &cb->bbio.bio, 0);
601 if (unlikely(!fi.folio))
602 return -EINVAL;
603 ASSERT(folio_size(fi.folio) == blocksize);
604
605 stream = zstd_init_dstream(
606 ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
607 if (unlikely(!stream)) {
608 struct btrfs_inode *inode = cb->bbio.inode;
609
610 btrfs_err(inode->root->fs_info,
611 "zstd decompression init failed, root %llu inode %llu offset %llu",
612 btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
613 ret = -EIO;
614 goto done;
615 }
616
617 workspace->in_buf.src = kmap_local_folio(fi.folio, 0);
618 workspace->in_buf.pos = 0;
619 workspace->in_buf.size = min_t(size_t, srclen, min_folio_size);
620
621 workspace->out_buf.dst = workspace->buf;
622 workspace->out_buf.pos = 0;
623 workspace->out_buf.size = blocksize;
624
625 while (1) {
626 size_t ret2;
627
628 ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
629 &workspace->in_buf);
630 if (unlikely(zstd_is_error(ret2))) {
631 struct btrfs_inode *inode = cb->bbio.inode;
632
633 btrfs_err(inode->root->fs_info,
634 "zstd decompression failed, error %d root %llu inode %llu offset %llu",
635 zstd_get_error_code(ret2), btrfs_root_id(inode->root),
636 btrfs_ino(inode), cb->start);
637 ret = -EIO;
638 goto done;
639 }
640 buf_start = total_out;
641 total_out += workspace->out_buf.pos;
642 workspace->out_buf.pos = 0;
643
644 ret = btrfs_decompress_buf2page(workspace->out_buf.dst,
645 total_out - buf_start, cb, buf_start);
646 if (ret == 0)
647 break;
648
649 if (workspace->in_buf.pos >= srclen)
650 break;
651
652 /* Check if we've hit the end of a frame */
653 if (ret2 == 0)
654 break;
655
656 if (workspace->in_buf.pos == workspace->in_buf.size) {
657 kunmap_local(workspace->in_buf.src);
658 folio_in_index++;
659 if (unlikely(folio_in_index >= total_folios_in)) {
660 workspace->in_buf.src = NULL;
661 ret = -EIO;
662 goto done;
663 }
664 srclen -= min_folio_size;
665 bio_next_folio(&fi, &cb->bbio.bio);
666 ASSERT(fi.folio);
667 workspace->in_buf.src = kmap_local_folio(fi.folio, 0);
668 workspace->in_buf.pos = 0;
669 workspace->in_buf.size = min_t(size_t, srclen, min_folio_size);
670 }
671 }
672 ret = 0;
673 done:
674 if (workspace->in_buf.src)
675 kunmap_local(workspace->in_buf.src);
676 return ret;
677 }
678
zstd_decompress(struct list_head * ws,const u8 * data_in,struct folio * dest_folio,unsigned long dest_pgoff,size_t srclen,size_t destlen)679 int zstd_decompress(struct list_head *ws, const u8 *data_in,
680 struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
681 size_t destlen)
682 {
683 struct workspace *workspace = list_entry(ws, struct workspace, list);
684 struct btrfs_fs_info *fs_info = btrfs_sb(folio_inode(dest_folio)->i_sb);
685 const u32 sectorsize = fs_info->sectorsize;
686 zstd_dstream *stream;
687 int ret = 0;
688 unsigned long to_copy = 0;
689
690 stream = zstd_init_dstream(
691 ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
692 if (unlikely(!stream)) {
693 struct btrfs_inode *inode = folio_to_inode(dest_folio);
694
695 btrfs_err(inode->root->fs_info,
696 "zstd decompression init failed, root %llu inode %llu offset %llu",
697 btrfs_root_id(inode->root), btrfs_ino(inode),
698 folio_pos(dest_folio));
699 ret = -EIO;
700 goto finish;
701 }
702
703 workspace->in_buf.src = data_in;
704 workspace->in_buf.pos = 0;
705 workspace->in_buf.size = srclen;
706
707 workspace->out_buf.dst = workspace->buf;
708 workspace->out_buf.pos = 0;
709 workspace->out_buf.size = sectorsize;
710
711 /*
712 * Since both input and output buffers should not exceed one sector,
713 * one call should end the decompression.
714 */
715 ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf);
716 if (unlikely(zstd_is_error(ret))) {
717 struct btrfs_inode *inode = folio_to_inode(dest_folio);
718
719 btrfs_err(inode->root->fs_info,
720 "zstd decompression failed, error %d root %llu inode %llu offset %llu",
721 zstd_get_error_code(ret), btrfs_root_id(inode->root),
722 btrfs_ino(inode), folio_pos(dest_folio));
723 goto finish;
724 }
725 to_copy = workspace->out_buf.pos;
726 memcpy_to_folio(dest_folio, dest_pgoff, workspace->out_buf.dst, to_copy);
727 finish:
728 /* Error or early end. */
729 if (unlikely(to_copy < destlen)) {
730 ret = -EIO;
731 folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
732 }
733 return ret;
734 }
735
736 const struct btrfs_compress_levels btrfs_zstd_compress = {
737 .min_level = ZSTD_BTRFS_MIN_LEVEL,
738 .max_level = ZSTD_BTRFS_MAX_LEVEL,
739 .default_level = ZSTD_BTRFS_DEFAULT_LEVEL,
740 };
741