xref: /linux/fs/btrfs/zstd.c (revision 7a40974fd0efa3698de4c6d1d0ee0436bcc4445d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2016-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  */
7 
8 #include <linux/bio.h>
9 #include <linux/bitmap.h>
10 #include <linux/err.h>
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/mm.h>
14 #include <linux/sched/mm.h>
15 #include <linux/pagemap.h>
16 #include <linux/refcount.h>
17 #include <linux/sched.h>
18 #include <linux/slab.h>
19 #include <linux/zstd.h>
20 #include "misc.h"
21 #include "fs.h"
22 #include "btrfs_inode.h"
23 #include "compression.h"
24 #include "super.h"
25 
26 #define ZSTD_BTRFS_MAX_WINDOWLOG 17
27 #define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
28 #define ZSTD_BTRFS_DEFAULT_LEVEL 3
29 #define ZSTD_BTRFS_MAX_LEVEL 15
30 /* 307s to avoid pathologically clashing with transaction commit */
31 #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
32 
zstd_get_btrfs_parameters(unsigned int level,size_t src_len)33 static zstd_parameters zstd_get_btrfs_parameters(unsigned int level,
34 						 size_t src_len)
35 {
36 	zstd_parameters params = zstd_get_params(level, src_len);
37 
38 	if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
39 		params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
40 	WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT);
41 	return params;
42 }
43 
44 struct workspace {
45 	void *mem;
46 	size_t size;
47 	char *buf;
48 	unsigned int level;
49 	unsigned int req_level;
50 	unsigned long last_used; /* jiffies */
51 	struct list_head list;
52 	struct list_head lru_list;
53 	zstd_in_buffer in_buf;
54 	zstd_out_buffer out_buf;
55 };
56 
57 /*
58  * Zstd Workspace Management
59  *
60  * Zstd workspaces have different memory requirements depending on the level.
61  * The zstd workspaces are managed by having individual lists for each level
62  * and a global lru.  Forward progress is maintained by protecting a max level
63  * workspace.
64  *
65  * Getting a workspace is done by using the bitmap to identify the levels that
66  * have available workspaces and scans up.  This lets us recycle higher level
67  * workspaces because of the monotonic memory guarantee.  A workspace's
68  * last_used is only updated if it is being used by the corresponding memory
69  * level.  Putting a workspace involves adding it back to the appropriate places
70  * and adding it back to the lru if necessary.
71  *
72  * A timer is used to reclaim workspaces if they have not been used for
73  * ZSTD_BTRFS_RECLAIM_JIFFIES.  This helps keep only active workspaces around.
74  * The upper bound is provided by the workqueue limit which is 2 (percpu limit).
75  */
76 
77 struct zstd_workspace_manager {
78 	const struct btrfs_compress_op *ops;
79 	spinlock_t lock;
80 	struct list_head lru_list;
81 	struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL];
82 	unsigned long active_map;
83 	wait_queue_head_t wait;
84 	struct timer_list timer;
85 };
86 
87 static struct zstd_workspace_manager wsm;
88 
89 static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL];
90 
list_to_workspace(struct list_head * list)91 static inline struct workspace *list_to_workspace(struct list_head *list)
92 {
93 	return container_of(list, struct workspace, list);
94 }
95 
96 void zstd_free_workspace(struct list_head *ws);
97 struct list_head *zstd_alloc_workspace(unsigned int level);
98 
99 /*
100  * Timer callback to free unused workspaces.
101  *
102  * @t: timer
103  *
104  * This scans the lru_list and attempts to reclaim any workspace that hasn't
105  * been used for ZSTD_BTRFS_RECLAIM_JIFFIES.
106  *
107  * The context is softirq and does not need the _bh locking primitives.
108  */
zstd_reclaim_timer_fn(struct timer_list * timer)109 static void zstd_reclaim_timer_fn(struct timer_list *timer)
110 {
111 	unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
112 	struct list_head *pos, *next;
113 
114 	spin_lock(&wsm.lock);
115 
116 	if (list_empty(&wsm.lru_list)) {
117 		spin_unlock(&wsm.lock);
118 		return;
119 	}
120 
121 	list_for_each_prev_safe(pos, next, &wsm.lru_list) {
122 		struct workspace *victim = container_of(pos, struct workspace,
123 							lru_list);
124 		unsigned int level;
125 
126 		if (time_after(victim->last_used, reclaim_threshold))
127 			break;
128 
129 		/* workspace is in use */
130 		if (victim->req_level)
131 			continue;
132 
133 		level = victim->level;
134 		list_del(&victim->lru_list);
135 		list_del(&victim->list);
136 		zstd_free_workspace(&victim->list);
137 
138 		if (list_empty(&wsm.idle_ws[level - 1]))
139 			clear_bit(level - 1, &wsm.active_map);
140 
141 	}
142 
143 	if (!list_empty(&wsm.lru_list))
144 		mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
145 
146 	spin_unlock(&wsm.lock);
147 }
148 
149 /*
150  * Calculate monotonic memory bounds.
151  *
152  * It is possible based on the level configurations that a higher level
153  * workspace uses less memory than a lower level workspace.  In order to reuse
154  * workspaces, this must be made a monotonic relationship.  This precomputes
155  * the required memory for each level and enforces the monotonicity between
156  * level and memory required.
157  */
zstd_calc_ws_mem_sizes(void)158 static void zstd_calc_ws_mem_sizes(void)
159 {
160 	size_t max_size = 0;
161 	unsigned int level;
162 
163 	for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
164 		zstd_parameters params =
165 			zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
166 		size_t level_size =
167 			max_t(size_t,
168 			      zstd_cstream_workspace_bound(&params.cParams),
169 			      zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT));
170 
171 		max_size = max_t(size_t, max_size, level_size);
172 		zstd_ws_mem_sizes[level - 1] = max_size;
173 	}
174 }
175 
zstd_init_workspace_manager(void)176 void zstd_init_workspace_manager(void)
177 {
178 	struct list_head *ws;
179 	int i;
180 
181 	zstd_calc_ws_mem_sizes();
182 
183 	wsm.ops = &btrfs_zstd_compress;
184 	spin_lock_init(&wsm.lock);
185 	init_waitqueue_head(&wsm.wait);
186 	timer_setup(&wsm.timer, zstd_reclaim_timer_fn, 0);
187 
188 	INIT_LIST_HEAD(&wsm.lru_list);
189 	for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
190 		INIT_LIST_HEAD(&wsm.idle_ws[i]);
191 
192 	ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
193 	if (IS_ERR(ws)) {
194 		pr_warn(
195 		"BTRFS: cannot preallocate zstd compression workspace\n");
196 	} else {
197 		set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &wsm.active_map);
198 		list_add(ws, &wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
199 	}
200 }
201 
zstd_cleanup_workspace_manager(void)202 void zstd_cleanup_workspace_manager(void)
203 {
204 	struct workspace *workspace;
205 	int i;
206 
207 	spin_lock_bh(&wsm.lock);
208 	for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
209 		while (!list_empty(&wsm.idle_ws[i])) {
210 			workspace = container_of(wsm.idle_ws[i].next,
211 						 struct workspace, list);
212 			list_del(&workspace->list);
213 			list_del(&workspace->lru_list);
214 			zstd_free_workspace(&workspace->list);
215 		}
216 	}
217 	spin_unlock_bh(&wsm.lock);
218 
219 	del_timer_sync(&wsm.timer);
220 }
221 
222 /*
223  * Find workspace for given level.
224  *
225  * @level: compression level
226  *
227  * This iterates over the set bits in the active_map beginning at the requested
228  * compression level.  This lets us utilize already allocated workspaces before
229  * allocating a new one.  If the workspace is of a larger size, it is used, but
230  * the place in the lru_list and last_used times are not updated.  This is to
231  * offer the opportunity to reclaim the workspace in favor of allocating an
232  * appropriately sized one in the future.
233  */
zstd_find_workspace(unsigned int level)234 static struct list_head *zstd_find_workspace(unsigned int level)
235 {
236 	struct list_head *ws;
237 	struct workspace *workspace;
238 	int i = level - 1;
239 
240 	spin_lock_bh(&wsm.lock);
241 	for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
242 		if (!list_empty(&wsm.idle_ws[i])) {
243 			ws = wsm.idle_ws[i].next;
244 			workspace = list_to_workspace(ws);
245 			list_del_init(ws);
246 			/* keep its place if it's a lower level using this */
247 			workspace->req_level = level;
248 			if (level == workspace->level)
249 				list_del(&workspace->lru_list);
250 			if (list_empty(&wsm.idle_ws[i]))
251 				clear_bit(i, &wsm.active_map);
252 			spin_unlock_bh(&wsm.lock);
253 			return ws;
254 		}
255 	}
256 	spin_unlock_bh(&wsm.lock);
257 
258 	return NULL;
259 }
260 
261 /*
262  * Zstd get_workspace for level.
263  *
264  * @level: compression level
265  *
266  * If @level is 0, then any compression level can be used.  Therefore, we begin
267  * scanning from 1.  We first scan through possible workspaces and then after
268  * attempt to allocate a new workspace.  If we fail to allocate one due to
269  * memory pressure, go to sleep waiting for the max level workspace to free up.
270  */
zstd_get_workspace(unsigned int level)271 struct list_head *zstd_get_workspace(unsigned int level)
272 {
273 	struct list_head *ws;
274 	unsigned int nofs_flag;
275 
276 	/* level == 0 means we can use any workspace */
277 	if (!level)
278 		level = 1;
279 
280 again:
281 	ws = zstd_find_workspace(level);
282 	if (ws)
283 		return ws;
284 
285 	nofs_flag = memalloc_nofs_save();
286 	ws = zstd_alloc_workspace(level);
287 	memalloc_nofs_restore(nofs_flag);
288 
289 	if (IS_ERR(ws)) {
290 		DEFINE_WAIT(wait);
291 
292 		prepare_to_wait(&wsm.wait, &wait, TASK_UNINTERRUPTIBLE);
293 		schedule();
294 		finish_wait(&wsm.wait, &wait);
295 
296 		goto again;
297 	}
298 
299 	return ws;
300 }
301 
302 /*
303  * Zstd put_workspace.
304  *
305  * @ws: list_head for the workspace
306  *
307  * When putting back a workspace, we only need to update the LRU if we are of
308  * the requested compression level.  Here is where we continue to protect the
309  * max level workspace or update last_used accordingly.  If the reclaim timer
310  * isn't set, it is also set here.  Only the max level workspace tries and wakes
311  * up waiting workspaces.
312  */
zstd_put_workspace(struct list_head * ws)313 void zstd_put_workspace(struct list_head *ws)
314 {
315 	struct workspace *workspace = list_to_workspace(ws);
316 
317 	spin_lock_bh(&wsm.lock);
318 
319 	/* A node is only taken off the lru if we are the corresponding level */
320 	if (workspace->req_level == workspace->level) {
321 		/* Hide a max level workspace from reclaim */
322 		if (list_empty(&wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
323 			INIT_LIST_HEAD(&workspace->lru_list);
324 		} else {
325 			workspace->last_used = jiffies;
326 			list_add(&workspace->lru_list, &wsm.lru_list);
327 			if (!timer_pending(&wsm.timer))
328 				mod_timer(&wsm.timer,
329 					  jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
330 		}
331 	}
332 
333 	set_bit(workspace->level - 1, &wsm.active_map);
334 	list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]);
335 	workspace->req_level = 0;
336 
337 	spin_unlock_bh(&wsm.lock);
338 
339 	if (workspace->level == ZSTD_BTRFS_MAX_LEVEL)
340 		cond_wake_up(&wsm.wait);
341 }
342 
zstd_free_workspace(struct list_head * ws)343 void zstd_free_workspace(struct list_head *ws)
344 {
345 	struct workspace *workspace = list_entry(ws, struct workspace, list);
346 
347 	kvfree(workspace->mem);
348 	kfree(workspace->buf);
349 	kfree(workspace);
350 }
351 
zstd_alloc_workspace(unsigned int level)352 struct list_head *zstd_alloc_workspace(unsigned int level)
353 {
354 	struct workspace *workspace;
355 
356 	workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
357 	if (!workspace)
358 		return ERR_PTR(-ENOMEM);
359 
360 	workspace->size = zstd_ws_mem_sizes[level - 1];
361 	workspace->level = level;
362 	workspace->req_level = level;
363 	workspace->last_used = jiffies;
364 	workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN);
365 	workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
366 	if (!workspace->mem || !workspace->buf)
367 		goto fail;
368 
369 	INIT_LIST_HEAD(&workspace->list);
370 	INIT_LIST_HEAD(&workspace->lru_list);
371 
372 	return &workspace->list;
373 fail:
374 	zstd_free_workspace(&workspace->list);
375 	return ERR_PTR(-ENOMEM);
376 }
377 
zstd_compress_folios(struct list_head * ws,struct address_space * mapping,u64 start,struct folio ** folios,unsigned long * out_folios,unsigned long * total_in,unsigned long * total_out)378 int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
379 			 u64 start, struct folio **folios, unsigned long *out_folios,
380 			 unsigned long *total_in, unsigned long *total_out)
381 {
382 	struct workspace *workspace = list_entry(ws, struct workspace, list);
383 	zstd_cstream *stream;
384 	int ret = 0;
385 	int nr_folios = 0;
386 	struct folio *in_folio = NULL;  /* The current folio to read. */
387 	struct folio *out_folio = NULL; /* The current folio to write to. */
388 	unsigned long tot_in = 0;
389 	unsigned long tot_out = 0;
390 	unsigned long len = *total_out;
391 	const unsigned long nr_dest_folios = *out_folios;
392 	const u64 orig_end = start + len;
393 	unsigned long max_out = nr_dest_folios * PAGE_SIZE;
394 	unsigned int pg_off;
395 	unsigned int cur_len;
396 	zstd_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
397 							   len);
398 
399 	*out_folios = 0;
400 	*total_out = 0;
401 	*total_in = 0;
402 
403 	/* Initialize the stream */
404 	stream = zstd_init_cstream(&params, len, workspace->mem,
405 			workspace->size);
406 	if (unlikely(!stream)) {
407 		struct btrfs_inode *inode = BTRFS_I(mapping->host);
408 
409 		btrfs_err(inode->root->fs_info,
410 	"zstd compression init level %d failed, root %llu inode %llu offset %llu",
411 			  workspace->req_level, btrfs_root_id(inode->root),
412 			  btrfs_ino(inode), start);
413 		ret = -EIO;
414 		goto out;
415 	}
416 
417 	/* map in the first page of input data */
418 	ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
419 	if (ret < 0)
420 		goto out;
421 	pg_off = offset_in_page(start);
422 	cur_len = btrfs_calc_input_length(orig_end, start);
423 	workspace->in_buf.src = kmap_local_folio(in_folio, pg_off);
424 	workspace->in_buf.pos = 0;
425 	workspace->in_buf.size = cur_len;
426 
427 	/* Allocate and map in the output buffer */
428 	out_folio = btrfs_alloc_compr_folio();
429 	if (out_folio == NULL) {
430 		ret = -ENOMEM;
431 		goto out;
432 	}
433 	folios[nr_folios++] = out_folio;
434 	workspace->out_buf.dst = folio_address(out_folio);
435 	workspace->out_buf.pos = 0;
436 	workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
437 
438 	while (1) {
439 		size_t ret2;
440 
441 		ret2 = zstd_compress_stream(stream, &workspace->out_buf,
442 				&workspace->in_buf);
443 		if (unlikely(zstd_is_error(ret2))) {
444 			struct btrfs_inode *inode = BTRFS_I(mapping->host);
445 
446 			btrfs_warn(inode->root->fs_info,
447 "zstd compression level %d failed, error %d root %llu inode %llu offset %llu",
448 				   workspace->req_level, zstd_get_error_code(ret2),
449 				   btrfs_root_id(inode->root), btrfs_ino(inode),
450 				   start);
451 			ret = -EIO;
452 			goto out;
453 		}
454 
455 		/* Check to see if we are making it bigger */
456 		if (tot_in + workspace->in_buf.pos > 8192 &&
457 				tot_in + workspace->in_buf.pos <
458 				tot_out + workspace->out_buf.pos) {
459 			ret = -E2BIG;
460 			goto out;
461 		}
462 
463 		/* We've reached the end of our output range */
464 		if (workspace->out_buf.pos >= max_out) {
465 			tot_out += workspace->out_buf.pos;
466 			ret = -E2BIG;
467 			goto out;
468 		}
469 
470 		/* Check if we need more output space */
471 		if (workspace->out_buf.pos == workspace->out_buf.size) {
472 			tot_out += PAGE_SIZE;
473 			max_out -= PAGE_SIZE;
474 			if (nr_folios == nr_dest_folios) {
475 				ret = -E2BIG;
476 				goto out;
477 			}
478 			out_folio = btrfs_alloc_compr_folio();
479 			if (out_folio == NULL) {
480 				ret = -ENOMEM;
481 				goto out;
482 			}
483 			folios[nr_folios++] = out_folio;
484 			workspace->out_buf.dst = folio_address(out_folio);
485 			workspace->out_buf.pos = 0;
486 			workspace->out_buf.size = min_t(size_t, max_out,
487 							PAGE_SIZE);
488 		}
489 
490 		/* We've reached the end of the input */
491 		if (workspace->in_buf.pos >= len) {
492 			tot_in += workspace->in_buf.pos;
493 			break;
494 		}
495 
496 		/* Check if we need more input */
497 		if (workspace->in_buf.pos == workspace->in_buf.size) {
498 			tot_in += PAGE_SIZE;
499 			kunmap_local(workspace->in_buf.src);
500 			workspace->in_buf.src = NULL;
501 			folio_put(in_folio);
502 			start += cur_len;
503 			len -= cur_len;
504 			ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
505 			if (ret < 0)
506 				goto out;
507 			pg_off = offset_in_page(start);
508 			cur_len = btrfs_calc_input_length(orig_end, start);
509 			workspace->in_buf.src = kmap_local_folio(in_folio, pg_off);
510 			workspace->in_buf.pos = 0;
511 			workspace->in_buf.size = cur_len;
512 		}
513 	}
514 	while (1) {
515 		size_t ret2;
516 
517 		ret2 = zstd_end_stream(stream, &workspace->out_buf);
518 		if (unlikely(zstd_is_error(ret2))) {
519 			struct btrfs_inode *inode = BTRFS_I(mapping->host);
520 
521 			btrfs_err(inode->root->fs_info,
522 "zstd compression end level %d failed, error %d root %llu inode %llu offset %llu",
523 				  workspace->req_level, zstd_get_error_code(ret2),
524 				  btrfs_root_id(inode->root), btrfs_ino(inode),
525 				  start);
526 			ret = -EIO;
527 			goto out;
528 		}
529 		if (ret2 == 0) {
530 			tot_out += workspace->out_buf.pos;
531 			break;
532 		}
533 		if (workspace->out_buf.pos >= max_out) {
534 			tot_out += workspace->out_buf.pos;
535 			ret = -E2BIG;
536 			goto out;
537 		}
538 
539 		tot_out += PAGE_SIZE;
540 		max_out -= PAGE_SIZE;
541 		if (nr_folios == nr_dest_folios) {
542 			ret = -E2BIG;
543 			goto out;
544 		}
545 		out_folio = btrfs_alloc_compr_folio();
546 		if (out_folio == NULL) {
547 			ret = -ENOMEM;
548 			goto out;
549 		}
550 		folios[nr_folios++] = out_folio;
551 		workspace->out_buf.dst = folio_address(out_folio);
552 		workspace->out_buf.pos = 0;
553 		workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
554 	}
555 
556 	if (tot_out >= tot_in) {
557 		ret = -E2BIG;
558 		goto out;
559 	}
560 
561 	ret = 0;
562 	*total_in = tot_in;
563 	*total_out = tot_out;
564 out:
565 	*out_folios = nr_folios;
566 	if (workspace->in_buf.src) {
567 		kunmap_local(workspace->in_buf.src);
568 		folio_put(in_folio);
569 	}
570 	return ret;
571 }
572 
zstd_decompress_bio(struct list_head * ws,struct compressed_bio * cb)573 int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
574 {
575 	struct workspace *workspace = list_entry(ws, struct workspace, list);
576 	struct folio **folios_in = cb->compressed_folios;
577 	size_t srclen = cb->compressed_len;
578 	zstd_dstream *stream;
579 	int ret = 0;
580 	unsigned long folio_in_index = 0;
581 	unsigned long total_folios_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
582 	unsigned long buf_start;
583 	unsigned long total_out = 0;
584 
585 	stream = zstd_init_dstream(
586 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
587 	if (unlikely(!stream)) {
588 		struct btrfs_inode *inode = cb->bbio.inode;
589 
590 		btrfs_err(inode->root->fs_info,
591 		"zstd decompression init failed, root %llu inode %llu offset %llu",
592 			  btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
593 		ret = -EIO;
594 		goto done;
595 	}
596 
597 	workspace->in_buf.src = kmap_local_folio(folios_in[folio_in_index], 0);
598 	workspace->in_buf.pos = 0;
599 	workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
600 
601 	workspace->out_buf.dst = workspace->buf;
602 	workspace->out_buf.pos = 0;
603 	workspace->out_buf.size = PAGE_SIZE;
604 
605 	while (1) {
606 		size_t ret2;
607 
608 		ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
609 				&workspace->in_buf);
610 		if (unlikely(zstd_is_error(ret2))) {
611 			struct btrfs_inode *inode = cb->bbio.inode;
612 
613 			btrfs_err(inode->root->fs_info,
614 		"zstd decompression failed, error %d root %llu inode %llu offset %llu",
615 				  zstd_get_error_code(ret2), btrfs_root_id(inode->root),
616 				  btrfs_ino(inode), cb->start);
617 			ret = -EIO;
618 			goto done;
619 		}
620 		buf_start = total_out;
621 		total_out += workspace->out_buf.pos;
622 		workspace->out_buf.pos = 0;
623 
624 		ret = btrfs_decompress_buf2page(workspace->out_buf.dst,
625 				total_out - buf_start, cb, buf_start);
626 		if (ret == 0)
627 			break;
628 
629 		if (workspace->in_buf.pos >= srclen)
630 			break;
631 
632 		/* Check if we've hit the end of a frame */
633 		if (ret2 == 0)
634 			break;
635 
636 		if (workspace->in_buf.pos == workspace->in_buf.size) {
637 			kunmap_local(workspace->in_buf.src);
638 			folio_in_index++;
639 			if (folio_in_index >= total_folios_in) {
640 				workspace->in_buf.src = NULL;
641 				ret = -EIO;
642 				goto done;
643 			}
644 			srclen -= PAGE_SIZE;
645 			workspace->in_buf.src =
646 				kmap_local_folio(folios_in[folio_in_index], 0);
647 			workspace->in_buf.pos = 0;
648 			workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
649 		}
650 	}
651 	ret = 0;
652 done:
653 	if (workspace->in_buf.src)
654 		kunmap_local(workspace->in_buf.src);
655 	return ret;
656 }
657 
zstd_decompress(struct list_head * ws,const u8 * data_in,struct folio * dest_folio,unsigned long dest_pgoff,size_t srclen,size_t destlen)658 int zstd_decompress(struct list_head *ws, const u8 *data_in,
659 		struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
660 		size_t destlen)
661 {
662 	struct workspace *workspace = list_entry(ws, struct workspace, list);
663 	struct btrfs_fs_info *fs_info = btrfs_sb(folio_inode(dest_folio)->i_sb);
664 	const u32 sectorsize = fs_info->sectorsize;
665 	zstd_dstream *stream;
666 	int ret = 0;
667 	unsigned long to_copy = 0;
668 
669 	stream = zstd_init_dstream(
670 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
671 	if (unlikely(!stream)) {
672 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
673 
674 		btrfs_err(inode->root->fs_info,
675 		"zstd decompression init failed, root %llu inode %llu offset %llu",
676 			  btrfs_root_id(inode->root), btrfs_ino(inode),
677 			  folio_pos(dest_folio));
678 		ret = -EIO;
679 		goto finish;
680 	}
681 
682 	workspace->in_buf.src = data_in;
683 	workspace->in_buf.pos = 0;
684 	workspace->in_buf.size = srclen;
685 
686 	workspace->out_buf.dst = workspace->buf;
687 	workspace->out_buf.pos = 0;
688 	workspace->out_buf.size = sectorsize;
689 
690 	/*
691 	 * Since both input and output buffers should not exceed one sector,
692 	 * one call should end the decompression.
693 	 */
694 	ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf);
695 	if (unlikely(zstd_is_error(ret))) {
696 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
697 
698 		btrfs_err(inode->root->fs_info,
699 		"zstd decompression failed, error %d root %llu inode %llu offset %llu",
700 			  zstd_get_error_code(ret), btrfs_root_id(inode->root),
701 			  btrfs_ino(inode), folio_pos(dest_folio));
702 		goto finish;
703 	}
704 	to_copy = workspace->out_buf.pos;
705 	memcpy_to_folio(dest_folio, dest_pgoff, workspace->out_buf.dst, to_copy);
706 finish:
707 	/* Error or early end. */
708 	if (unlikely(to_copy < destlen)) {
709 		ret = -EIO;
710 		folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
711 	}
712 	return ret;
713 }
714 
715 const struct btrfs_compress_op btrfs_zstd_compress = {
716 	/* ZSTD uses own workspace manager */
717 	.workspace_manager = NULL,
718 	.max_level	= ZSTD_BTRFS_MAX_LEVEL,
719 	.default_level	= ZSTD_BTRFS_DEFAULT_LEVEL,
720 };
721