xref: /linux/fs/btrfs/zstd.c (revision 26902be0cd0997b34ef13593e35ef3501a3c70b5)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2016-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  */
7 
8 #include <linux/bio.h>
9 #include <linux/bitmap.h>
10 #include <linux/err.h>
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/mm.h>
14 #include <linux/sched/mm.h>
15 #include <linux/pagemap.h>
16 #include <linux/refcount.h>
17 #include <linux/sched.h>
18 #include <linux/slab.h>
19 #include <linux/zstd.h>
20 #include "misc.h"
21 #include "fs.h"
22 #include "btrfs_inode.h"
23 #include "compression.h"
24 #include "super.h"
25 
26 #define ZSTD_BTRFS_MAX_WINDOWLOG 17
27 #define ZSTD_BTRFS_MAX_INPUT (1U << ZSTD_BTRFS_MAX_WINDOWLOG)
28 #define ZSTD_BTRFS_DEFAULT_LEVEL 3
29 #define ZSTD_BTRFS_MIN_LEVEL -15
30 #define ZSTD_BTRFS_MAX_LEVEL 15
31 /* 307s to avoid pathologically clashing with transaction commit */
32 #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
33 
34 static zstd_parameters zstd_get_btrfs_parameters(int level,
35 						 size_t src_len)
36 {
37 	zstd_parameters params = zstd_get_params(level, src_len);
38 
39 	if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
40 		params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
41 	WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT);
42 	return params;
43 }
44 
45 struct workspace {
46 	void *mem;
47 	size_t size;
48 	char *buf;
49 	int level;
50 	int req_level;
51 	unsigned long last_used; /* jiffies */
52 	struct list_head list;
53 	struct list_head lru_list;
54 	zstd_in_buffer in_buf;
55 	zstd_out_buffer out_buf;
56 	zstd_parameters params;
57 };
58 
59 /*
60  * Zstd Workspace Management
61  *
62  * Zstd workspaces have different memory requirements depending on the level.
63  * The zstd workspaces are managed by having individual lists for each level
64  * and a global lru.  Forward progress is maintained by protecting a max level
65  * workspace.
66  *
67  * Getting a workspace is done by using the bitmap to identify the levels that
68  * have available workspaces and scans up.  This lets us recycle higher level
69  * workspaces because of the monotonic memory guarantee.  A workspace's
70  * last_used is only updated if it is being used by the corresponding memory
71  * level.  Putting a workspace involves adding it back to the appropriate places
72  * and adding it back to the lru if necessary.
73  *
74  * A timer is used to reclaim workspaces if they have not been used for
75  * ZSTD_BTRFS_RECLAIM_JIFFIES.  This helps keep only active workspaces around.
76  * The upper bound is provided by the workqueue limit which is 2 (percpu limit).
77  */
78 
79 struct zstd_workspace_manager {
80 	spinlock_t lock;
81 	struct list_head lru_list;
82 	struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL];
83 	unsigned long active_map;
84 	wait_queue_head_t wait;
85 	struct timer_list timer;
86 };
87 
88 static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL];
89 
90 static inline struct workspace *list_to_workspace(struct list_head *list)
91 {
92 	return container_of(list, struct workspace, list);
93 }
94 
95 static inline int clip_level(int level)
96 {
97 	return max(0, level - 1);
98 }
99 
100 /*
101  * Timer callback to free unused workspaces.
102  *
103  * @t: timer
104  *
105  * This scans the lru_list and attempts to reclaim any workspace that hasn't
106  * been used for ZSTD_BTRFS_RECLAIM_JIFFIES.
107  *
108  * The context is softirq and does not need the _bh locking primitives.
109  */
110 static void zstd_reclaim_timer_fn(struct timer_list *timer)
111 {
112 	struct zstd_workspace_manager *zwsm =
113 		container_of(timer, struct zstd_workspace_manager, timer);
114 	unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
115 	struct list_head *pos, *next;
116 
117 	spin_lock(&zwsm->lock);
118 
119 	if (list_empty(&zwsm->lru_list)) {
120 		spin_unlock(&zwsm->lock);
121 		return;
122 	}
123 
124 	list_for_each_prev_safe(pos, next, &zwsm->lru_list) {
125 		struct workspace *victim = container_of(pos, struct workspace,
126 							lru_list);
127 		int level;
128 
129 		if (time_after(victim->last_used, reclaim_threshold))
130 			break;
131 
132 		/* workspace is in use */
133 		if (victim->req_level)
134 			continue;
135 
136 		level = victim->level;
137 		list_del(&victim->lru_list);
138 		list_del(&victim->list);
139 		zstd_free_workspace(&victim->list);
140 
141 		if (list_empty(&zwsm->idle_ws[level]))
142 			clear_bit(level, &zwsm->active_map);
143 
144 	}
145 
146 	if (!list_empty(&zwsm->lru_list))
147 		mod_timer(&zwsm->timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
148 
149 	spin_unlock(&zwsm->lock);
150 }
151 
152 /*
153  * Calculate monotonic memory bounds.
154  *
155  * It is possible based on the level configurations that a higher level
156  * workspace uses less memory than a lower level workspace.  In order to reuse
157  * workspaces, this must be made a monotonic relationship.  This precomputes
158  * the required memory for each level and enforces the monotonicity between
159  * level and memory required.
160  */
161 static void zstd_calc_ws_mem_sizes(void)
162 {
163 	size_t max_size = 0;
164 	int level;
165 
166 	for (level = ZSTD_BTRFS_MIN_LEVEL; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
167 		if (level == 0)
168 			continue;
169 		zstd_parameters params =
170 			zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
171 		size_t level_size =
172 			max_t(size_t,
173 			      zstd_cstream_workspace_bound(&params.cParams),
174 			      zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT));
175 
176 		max_size = max_t(size_t, max_size, level_size);
177 		/* Use level 1 workspace size for all the fast mode negative levels. */
178 		zstd_ws_mem_sizes[clip_level(level)] = max_size;
179 	}
180 }
181 
182 int zstd_alloc_workspace_manager(struct btrfs_fs_info *fs_info)
183 {
184 	struct zstd_workspace_manager *zwsm;
185 	struct list_head *ws;
186 
187 	ASSERT(fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] == NULL);
188 	zwsm = kzalloc(sizeof(*zwsm), GFP_KERNEL);
189 	if (!zwsm)
190 		return -ENOMEM;
191 	zstd_calc_ws_mem_sizes();
192 	spin_lock_init(&zwsm->lock);
193 	init_waitqueue_head(&zwsm->wait);
194 	timer_setup(&zwsm->timer, zstd_reclaim_timer_fn, 0);
195 
196 	INIT_LIST_HEAD(&zwsm->lru_list);
197 	for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
198 		INIT_LIST_HEAD(&zwsm->idle_ws[i]);
199 	fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = zwsm;
200 
201 	ws = zstd_alloc_workspace(fs_info, ZSTD_BTRFS_MAX_LEVEL);
202 	if (IS_ERR(ws)) {
203 		btrfs_warn(NULL, "cannot preallocate zstd compression workspace");
204 	} else {
205 		set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &zwsm->active_map);
206 		list_add(ws, &zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
207 	}
208 	return 0;
209 }
210 
211 void zstd_free_workspace_manager(struct btrfs_fs_info *fs_info)
212 {
213 	struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
214 	struct workspace *workspace;
215 
216 	if (!zwsm)
217 		return;
218 	fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = NULL;
219 	spin_lock_bh(&zwsm->lock);
220 	for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
221 		while (!list_empty(&zwsm->idle_ws[i])) {
222 			workspace = container_of(zwsm->idle_ws[i].next,
223 						 struct workspace, list);
224 			list_del(&workspace->list);
225 			list_del(&workspace->lru_list);
226 			zstd_free_workspace(&workspace->list);
227 		}
228 	}
229 	spin_unlock_bh(&zwsm->lock);
230 	timer_delete_sync(&zwsm->timer);
231 	kfree(zwsm);
232 }
233 
234 /*
235  * Find workspace for given level.
236  *
237  * @level: compression level
238  *
239  * This iterates over the set bits in the active_map beginning at the requested
240  * compression level.  This lets us utilize already allocated workspaces before
241  * allocating a new one.  If the workspace is of a larger size, it is used, but
242  * the place in the lru_list and last_used times are not updated.  This is to
243  * offer the opportunity to reclaim the workspace in favor of allocating an
244  * appropriately sized one in the future.
245  */
246 static struct list_head *zstd_find_workspace(struct btrfs_fs_info *fs_info, int level)
247 {
248 	struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
249 	struct list_head *ws;
250 	struct workspace *workspace;
251 	int i = clip_level(level);
252 
253 	ASSERT(zwsm);
254 	spin_lock_bh(&zwsm->lock);
255 	for_each_set_bit_from(i, &zwsm->active_map, ZSTD_BTRFS_MAX_LEVEL) {
256 		if (!list_empty(&zwsm->idle_ws[i])) {
257 			ws = zwsm->idle_ws[i].next;
258 			workspace = list_to_workspace(ws);
259 			list_del_init(ws);
260 			/* keep its place if it's a lower level using this */
261 			workspace->req_level = level;
262 			if (clip_level(level) == workspace->level)
263 				list_del(&workspace->lru_list);
264 			if (list_empty(&zwsm->idle_ws[i]))
265 				clear_bit(i, &zwsm->active_map);
266 			spin_unlock_bh(&zwsm->lock);
267 			return ws;
268 		}
269 	}
270 	spin_unlock_bh(&zwsm->lock);
271 
272 	return NULL;
273 }
274 
275 /*
276  * Zstd get_workspace for level.
277  *
278  * @level: compression level
279  *
280  * If @level is 0, then any compression level can be used.  Therefore, we begin
281  * scanning from 1.  We first scan through possible workspaces and then after
282  * attempt to allocate a new workspace.  If we fail to allocate one due to
283  * memory pressure, go to sleep waiting for the max level workspace to free up.
284  */
285 struct list_head *zstd_get_workspace(struct btrfs_fs_info *fs_info, int level)
286 {
287 	struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
288 	struct list_head *ws;
289 	unsigned int nofs_flag;
290 
291 	ASSERT(zwsm);
292 
293 	/* level == 0 means we can use any workspace */
294 	if (!level)
295 		level = 1;
296 
297 again:
298 	ws = zstd_find_workspace(fs_info, level);
299 	if (ws)
300 		return ws;
301 
302 	nofs_flag = memalloc_nofs_save();
303 	ws = zstd_alloc_workspace(fs_info, level);
304 	memalloc_nofs_restore(nofs_flag);
305 
306 	if (IS_ERR(ws)) {
307 		DEFINE_WAIT(wait);
308 
309 		prepare_to_wait(&zwsm->wait, &wait, TASK_UNINTERRUPTIBLE);
310 		schedule();
311 		finish_wait(&zwsm->wait, &wait);
312 
313 		goto again;
314 	}
315 
316 	return ws;
317 }
318 
319 /*
320  * Zstd put_workspace.
321  *
322  * @ws: list_head for the workspace
323  *
324  * When putting back a workspace, we only need to update the LRU if we are of
325  * the requested compression level.  Here is where we continue to protect the
326  * max level workspace or update last_used accordingly.  If the reclaim timer
327  * isn't set, it is also set here.  Only the max level workspace tries and wakes
328  * up waiting workspaces.
329  */
330 void zstd_put_workspace(struct btrfs_fs_info *fs_info, struct list_head *ws)
331 {
332 	struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
333 	struct workspace *workspace = list_to_workspace(ws);
334 
335 	ASSERT(zwsm);
336 	spin_lock_bh(&zwsm->lock);
337 
338 	/* A node is only taken off the lru if we are the corresponding level */
339 	if (clip_level(workspace->req_level) == workspace->level) {
340 		/* Hide a max level workspace from reclaim */
341 		if (list_empty(&zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
342 			INIT_LIST_HEAD(&workspace->lru_list);
343 		} else {
344 			workspace->last_used = jiffies;
345 			list_add(&workspace->lru_list, &zwsm->lru_list);
346 			if (!timer_pending(&zwsm->timer))
347 				mod_timer(&zwsm->timer,
348 					  jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
349 		}
350 	}
351 
352 	set_bit(workspace->level, &zwsm->active_map);
353 	list_add(&workspace->list, &zwsm->idle_ws[workspace->level]);
354 	workspace->req_level = 0;
355 
356 	spin_unlock_bh(&zwsm->lock);
357 
358 	if (workspace->level == clip_level(ZSTD_BTRFS_MAX_LEVEL))
359 		cond_wake_up(&zwsm->wait);
360 }
361 
362 void zstd_free_workspace(struct list_head *ws)
363 {
364 	struct workspace *workspace = list_entry(ws, struct workspace, list);
365 
366 	kvfree(workspace->mem);
367 	kfree(workspace->buf);
368 	kfree(workspace);
369 }
370 
371 struct list_head *zstd_alloc_workspace(struct btrfs_fs_info *fs_info, int level)
372 {
373 	const u32 blocksize = fs_info->sectorsize;
374 	struct workspace *workspace;
375 
376 	workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
377 	if (!workspace)
378 		return ERR_PTR(-ENOMEM);
379 
380 	/* Use level 1 workspace size for all the fast mode negative levels. */
381 	workspace->size = zstd_ws_mem_sizes[clip_level(level)];
382 	workspace->level = clip_level(level);
383 	workspace->req_level = level;
384 	workspace->last_used = jiffies;
385 	workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN);
386 	workspace->buf = kmalloc(blocksize, GFP_KERNEL);
387 	if (!workspace->mem || !workspace->buf)
388 		goto fail;
389 
390 	INIT_LIST_HEAD(&workspace->list);
391 	INIT_LIST_HEAD(&workspace->lru_list);
392 
393 	return &workspace->list;
394 fail:
395 	zstd_free_workspace(&workspace->list);
396 	return ERR_PTR(-ENOMEM);
397 }
398 
399 int zstd_compress_bio(struct list_head *ws, struct compressed_bio *cb)
400 {
401 	struct btrfs_inode *inode = cb->bbio.inode;
402 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
403 	struct workspace *workspace = list_entry(ws, struct workspace, list);
404 	struct address_space *mapping = inode->vfs_inode.i_mapping;
405 	struct bio *bio = &cb->bbio.bio;
406 	zstd_cstream *stream;
407 	int ret = 0;
408 	/* The current folio to read. */
409 	struct folio *in_folio = NULL;
410 	/* The current folio to write to. */
411 	struct folio *out_folio = NULL;
412 	unsigned long tot_in = 0;
413 	unsigned long tot_out = 0;
414 	const u64 start = cb->start;
415 	const u32 len = cb->len;
416 	const u64 end = start + len;
417 	const u32 blocksize = fs_info->sectorsize;
418 	const u32 min_folio_size = btrfs_min_folio_size(fs_info);
419 
420 	workspace->params = zstd_get_btrfs_parameters(workspace->req_level, len);
421 
422 	/* Initialize the stream. */
423 	stream = zstd_init_cstream(&workspace->params, len, workspace->mem, workspace->size);
424 	if (unlikely(!stream)) {
425 		btrfs_err(fs_info,
426 	"zstd compression init level %d failed, root %llu inode %llu offset %llu",
427 			  workspace->req_level, btrfs_root_id(inode->root),
428 			  btrfs_ino(inode), start);
429 		ret = -EIO;
430 		goto out;
431 	}
432 
433 	/* Map in the first page of input data. */
434 	ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
435 	if (ret < 0)
436 		goto out;
437 	workspace->in_buf.src = kmap_local_folio(in_folio, offset_in_folio(in_folio, start));
438 	workspace->in_buf.pos = 0;
439 	workspace->in_buf.size = btrfs_calc_input_length(in_folio, end, start);
440 
441 	/* Allocate and map in the output buffer. */
442 	out_folio = btrfs_alloc_compr_folio(fs_info);
443 	if (out_folio == NULL) {
444 		ret = -ENOMEM;
445 		goto out;
446 	}
447 	workspace->out_buf.dst = folio_address(out_folio);
448 	workspace->out_buf.pos = 0;
449 	workspace->out_buf.size = min_folio_size;
450 
451 	while (1) {
452 		size_t ret2;
453 
454 		ret2 = zstd_compress_stream(stream, &workspace->out_buf, &workspace->in_buf);
455 		if (unlikely(zstd_is_error(ret2))) {
456 			btrfs_warn(fs_info,
457 "zstd compression level %d failed, error %d root %llu inode %llu offset %llu",
458 				   workspace->req_level, zstd_get_error_code(ret2),
459 				   btrfs_root_id(inode->root), btrfs_ino(inode),
460 				   start + tot_in);
461 			ret = -EIO;
462 			goto out;
463 		}
464 
465 		/* Check to see if we are making it bigger. */
466 		if (tot_in + workspace->in_buf.pos > blocksize * 2 &&
467 		    tot_in + workspace->in_buf.pos < tot_out + workspace->out_buf.pos) {
468 			ret = -E2BIG;
469 			goto out;
470 		}
471 
472 		/* Check if we need more output space. */
473 		if (workspace->out_buf.pos >= workspace->out_buf.size) {
474 			tot_out += min_folio_size;
475 			if (tot_out >= len) {
476 				ret = -E2BIG;
477 				goto out;
478 			}
479 			/* Queue the current foliot into the bio. */
480 			if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) {
481 				ret = -E2BIG;
482 				goto out;
483 			}
484 
485 			out_folio = btrfs_alloc_compr_folio(fs_info);
486 			if (out_folio == NULL) {
487 				ret = -ENOMEM;
488 				goto out;
489 			}
490 			workspace->out_buf.dst = folio_address(out_folio);
491 			workspace->out_buf.pos = 0;
492 			workspace->out_buf.size = min_folio_size;
493 		}
494 
495 		/* We've reached the end of the input. */
496 		if (tot_in + workspace->in_buf.pos >= len) {
497 			tot_in += workspace->in_buf.pos;
498 			break;
499 		}
500 
501 		/* Check if we need more input. */
502 		if (workspace->in_buf.pos >= workspace->in_buf.size) {
503 			u64 cur;
504 
505 			tot_in += workspace->in_buf.size;
506 			cur = start + tot_in;
507 
508 			kunmap_local(workspace->in_buf.src);
509 			workspace->in_buf.src = NULL;
510 			folio_put(in_folio);
511 
512 			ret = btrfs_compress_filemap_get_folio(mapping, cur, &in_folio);
513 			if (ret < 0)
514 				goto out;
515 			workspace->in_buf.src = kmap_local_folio(in_folio,
516 							 offset_in_folio(in_folio, cur));
517 			workspace->in_buf.pos = 0;
518 			workspace->in_buf.size = btrfs_calc_input_length(in_folio, end, cur);
519 		}
520 	}
521 
522 	while (1) {
523 		size_t ret2;
524 
525 		ret2 = zstd_end_stream(stream, &workspace->out_buf);
526 		if (unlikely(zstd_is_error(ret2))) {
527 			btrfs_err(fs_info,
528 "zstd compression end level %d failed, error %d root %llu inode %llu offset %llu",
529 				  workspace->req_level, zstd_get_error_code(ret2),
530 				  btrfs_root_id(inode->root), btrfs_ino(inode),
531 				  start + tot_in);
532 			ret = -EIO;
533 			goto out;
534 		}
535 		/* Queue the remaining part of the output folio into bio. */
536 		if (ret2 == 0) {
537 			tot_out += workspace->out_buf.pos;
538 			if (tot_out >= len) {
539 				ret = -E2BIG;
540 				goto out;
541 			}
542 			if (!bio_add_folio(bio, out_folio, workspace->out_buf.pos, 0)) {
543 				ret = -E2BIG;
544 				goto out;
545 			}
546 			out_folio = NULL;
547 			break;
548 		}
549 		tot_out += min_folio_size;
550 		if (tot_out >= len) {
551 			ret = -E2BIG;
552 			goto out;
553 		}
554 		if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) {
555 			ret = -E2BIG;
556 			goto out;
557 		}
558 		out_folio = btrfs_alloc_compr_folio(fs_info);
559 		if (out_folio == NULL) {
560 			ret = -ENOMEM;
561 			goto out;
562 		}
563 		workspace->out_buf.dst = folio_address(out_folio);
564 		workspace->out_buf.pos = 0;
565 		workspace->out_buf.size = min_folio_size;
566 	}
567 
568 	if (tot_out >= tot_in) {
569 		ret = -E2BIG;
570 		goto out;
571 	}
572 
573 	ret = 0;
574 	ASSERT(tot_out == bio->bi_iter.bi_size);
575 out:
576 	if (out_folio)
577 		btrfs_free_compr_folio(out_folio);
578 	if (workspace->in_buf.src) {
579 		kunmap_local(workspace->in_buf.src);
580 		folio_put(in_folio);
581 	}
582 	return ret;
583 }
584 
585 int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
586 {
587 	struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
588 	struct workspace *workspace = list_entry(ws, struct workspace, list);
589 	struct folio_iter fi;
590 	size_t srclen = cb->compressed_len;
591 	zstd_dstream *stream;
592 	int ret = 0;
593 	const u32 blocksize = fs_info->sectorsize;
594 	const unsigned int min_folio_size = btrfs_min_folio_size(fs_info);
595 	unsigned long folio_in_index = 0;
596 	unsigned long total_folios_in = DIV_ROUND_UP(srclen, min_folio_size);
597 	unsigned long buf_start;
598 	unsigned long total_out = 0;
599 
600 	bio_first_folio(&fi, &cb->bbio.bio, 0);
601 	if (unlikely(!fi.folio))
602 		return -EINVAL;
603 	ASSERT(folio_size(fi.folio) == blocksize);
604 
605 	stream = zstd_init_dstream(
606 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
607 	if (unlikely(!stream)) {
608 		struct btrfs_inode *inode = cb->bbio.inode;
609 
610 		btrfs_err(inode->root->fs_info,
611 		"zstd decompression init failed, root %llu inode %llu offset %llu",
612 			  btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
613 		ret = -EIO;
614 		goto done;
615 	}
616 
617 	workspace->in_buf.src = kmap_local_folio(fi.folio, 0);
618 	workspace->in_buf.pos = 0;
619 	workspace->in_buf.size = min_t(size_t, srclen, min_folio_size);
620 
621 	workspace->out_buf.dst = workspace->buf;
622 	workspace->out_buf.pos = 0;
623 	workspace->out_buf.size = blocksize;
624 
625 	while (1) {
626 		size_t ret2;
627 
628 		ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
629 				&workspace->in_buf);
630 		if (unlikely(zstd_is_error(ret2))) {
631 			struct btrfs_inode *inode = cb->bbio.inode;
632 
633 			btrfs_err(inode->root->fs_info,
634 		"zstd decompression failed, error %d root %llu inode %llu offset %llu",
635 				  zstd_get_error_code(ret2), btrfs_root_id(inode->root),
636 				  btrfs_ino(inode), cb->start);
637 			ret = -EIO;
638 			goto done;
639 		}
640 		buf_start = total_out;
641 		total_out += workspace->out_buf.pos;
642 		workspace->out_buf.pos = 0;
643 
644 		ret = btrfs_decompress_buf2page(workspace->out_buf.dst,
645 				total_out - buf_start, cb, buf_start);
646 		if (ret == 0)
647 			break;
648 
649 		if (workspace->in_buf.pos >= srclen)
650 			break;
651 
652 		/* Check if we've hit the end of a frame */
653 		if (ret2 == 0)
654 			break;
655 
656 		if (workspace->in_buf.pos == workspace->in_buf.size) {
657 			kunmap_local(workspace->in_buf.src);
658 			folio_in_index++;
659 			if (unlikely(folio_in_index >= total_folios_in)) {
660 				workspace->in_buf.src = NULL;
661 				ret = -EIO;
662 				goto done;
663 			}
664 			srclen -= min_folio_size;
665 			bio_next_folio(&fi, &cb->bbio.bio);
666 			ASSERT(fi.folio);
667 			workspace->in_buf.src = kmap_local_folio(fi.folio, 0);
668 			workspace->in_buf.pos = 0;
669 			workspace->in_buf.size = min_t(size_t, srclen, min_folio_size);
670 		}
671 	}
672 	ret = 0;
673 done:
674 	if (workspace->in_buf.src)
675 		kunmap_local(workspace->in_buf.src);
676 	return ret;
677 }
678 
679 int zstd_decompress(struct list_head *ws, const u8 *data_in,
680 		struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
681 		size_t destlen)
682 {
683 	struct workspace *workspace = list_entry(ws, struct workspace, list);
684 	struct btrfs_fs_info *fs_info = btrfs_sb(folio_inode(dest_folio)->i_sb);
685 	const u32 sectorsize = fs_info->sectorsize;
686 	zstd_dstream *stream;
687 	int ret = 0;
688 	unsigned long to_copy = 0;
689 
690 	stream = zstd_init_dstream(
691 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
692 	if (unlikely(!stream)) {
693 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
694 
695 		btrfs_err(inode->root->fs_info,
696 		"zstd decompression init failed, root %llu inode %llu offset %llu",
697 			  btrfs_root_id(inode->root), btrfs_ino(inode),
698 			  folio_pos(dest_folio));
699 		ret = -EIO;
700 		goto finish;
701 	}
702 
703 	workspace->in_buf.src = data_in;
704 	workspace->in_buf.pos = 0;
705 	workspace->in_buf.size = srclen;
706 
707 	workspace->out_buf.dst = workspace->buf;
708 	workspace->out_buf.pos = 0;
709 	workspace->out_buf.size = sectorsize;
710 
711 	/*
712 	 * Since both input and output buffers should not exceed one sector,
713 	 * one call should end the decompression.
714 	 */
715 	ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf);
716 	if (unlikely(zstd_is_error(ret))) {
717 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
718 
719 		btrfs_err(inode->root->fs_info,
720 		"zstd decompression failed, error %d root %llu inode %llu offset %llu",
721 			  zstd_get_error_code(ret), btrfs_root_id(inode->root),
722 			  btrfs_ino(inode), folio_pos(dest_folio));
723 		goto finish;
724 	}
725 	to_copy = workspace->out_buf.pos;
726 	memcpy_to_folio(dest_folio, dest_pgoff, workspace->out_buf.dst, to_copy);
727 finish:
728 	/* Error or early end. */
729 	if (unlikely(to_copy < destlen)) {
730 		ret = -EIO;
731 		folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
732 	}
733 	return ret;
734 }
735 
736 const struct btrfs_compress_levels btrfs_zstd_compress = {
737 	.min_level	= ZSTD_BTRFS_MIN_LEVEL,
738 	.max_level	= ZSTD_BTRFS_MAX_LEVEL,
739 	.default_level	= ZSTD_BTRFS_DEFAULT_LEVEL,
740 };
741