xref: /linux/fs/btrfs/zstd.c (revision 3fd6c59042dbba50391e30862beac979491145fe)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2016-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  */
7 
8 #include <linux/bio.h>
9 #include <linux/bitmap.h>
10 #include <linux/err.h>
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/mm.h>
14 #include <linux/sched/mm.h>
15 #include <linux/pagemap.h>
16 #include <linux/refcount.h>
17 #include <linux/sched.h>
18 #include <linux/slab.h>
19 #include <linux/zstd.h>
20 #include "misc.h"
21 #include "fs.h"
22 #include "btrfs_inode.h"
23 #include "compression.h"
24 #include "super.h"
25 
26 #define ZSTD_BTRFS_MAX_WINDOWLOG 17
27 #define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
28 #define ZSTD_BTRFS_DEFAULT_LEVEL 3
29 #define ZSTD_BTRFS_MAX_LEVEL 15
30 /* 307s to avoid pathologically clashing with transaction commit */
31 #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
32 
zstd_get_btrfs_parameters(unsigned int level,size_t src_len)33 static zstd_parameters zstd_get_btrfs_parameters(unsigned int level,
34 						 size_t src_len)
35 {
36 	zstd_parameters params = zstd_get_params(level, src_len);
37 
38 	if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
39 		params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
40 	WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT);
41 	return params;
42 }
43 
44 struct workspace {
45 	void *mem;
46 	size_t size;
47 	char *buf;
48 	unsigned int level;
49 	unsigned int req_level;
50 	unsigned long last_used; /* jiffies */
51 	struct list_head list;
52 	struct list_head lru_list;
53 	zstd_in_buffer in_buf;
54 	zstd_out_buffer out_buf;
55 };
56 
57 /*
58  * Zstd Workspace Management
59  *
60  * Zstd workspaces have different memory requirements depending on the level.
61  * The zstd workspaces are managed by having individual lists for each level
62  * and a global lru.  Forward progress is maintained by protecting a max level
63  * workspace.
64  *
65  * Getting a workspace is done by using the bitmap to identify the levels that
66  * have available workspaces and scans up.  This lets us recycle higher level
67  * workspaces because of the monotonic memory guarantee.  A workspace's
68  * last_used is only updated if it is being used by the corresponding memory
69  * level.  Putting a workspace involves adding it back to the appropriate places
70  * and adding it back to the lru if necessary.
71  *
72  * A timer is used to reclaim workspaces if they have not been used for
73  * ZSTD_BTRFS_RECLAIM_JIFFIES.  This helps keep only active workspaces around.
74  * The upper bound is provided by the workqueue limit which is 2 (percpu limit).
75  */
76 
77 struct zstd_workspace_manager {
78 	const struct btrfs_compress_op *ops;
79 	spinlock_t lock;
80 	struct list_head lru_list;
81 	struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL];
82 	unsigned long active_map;
83 	wait_queue_head_t wait;
84 	struct timer_list timer;
85 };
86 
87 static struct zstd_workspace_manager wsm;
88 
89 static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL];
90 
list_to_workspace(struct list_head * list)91 static inline struct workspace *list_to_workspace(struct list_head *list)
92 {
93 	return container_of(list, struct workspace, list);
94 }
95 
96 void zstd_free_workspace(struct list_head *ws);
97 struct list_head *zstd_alloc_workspace(unsigned int level);
98 
99 /*
100  * Timer callback to free unused workspaces.
101  *
102  * @t: timer
103  *
104  * This scans the lru_list and attempts to reclaim any workspace that hasn't
105  * been used for ZSTD_BTRFS_RECLAIM_JIFFIES.
106  *
107  * The context is softirq and does not need the _bh locking primitives.
108  */
zstd_reclaim_timer_fn(struct timer_list * timer)109 static void zstd_reclaim_timer_fn(struct timer_list *timer)
110 {
111 	unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
112 	struct list_head *pos, *next;
113 
114 	ASSERT(timer == &wsm.timer);
115 
116 	spin_lock(&wsm.lock);
117 
118 	if (list_empty(&wsm.lru_list)) {
119 		spin_unlock(&wsm.lock);
120 		return;
121 	}
122 
123 	list_for_each_prev_safe(pos, next, &wsm.lru_list) {
124 		struct workspace *victim = container_of(pos, struct workspace,
125 							lru_list);
126 		unsigned int level;
127 
128 		if (time_after(victim->last_used, reclaim_threshold))
129 			break;
130 
131 		/* workspace is in use */
132 		if (victim->req_level)
133 			continue;
134 
135 		level = victim->level;
136 		list_del(&victim->lru_list);
137 		list_del(&victim->list);
138 		zstd_free_workspace(&victim->list);
139 
140 		if (list_empty(&wsm.idle_ws[level - 1]))
141 			clear_bit(level - 1, &wsm.active_map);
142 
143 	}
144 
145 	if (!list_empty(&wsm.lru_list))
146 		mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
147 
148 	spin_unlock(&wsm.lock);
149 }
150 
151 /*
152  * Calculate monotonic memory bounds.
153  *
154  * It is possible based on the level configurations that a higher level
155  * workspace uses less memory than a lower level workspace.  In order to reuse
156  * workspaces, this must be made a monotonic relationship.  This precomputes
157  * the required memory for each level and enforces the monotonicity between
158  * level and memory required.
159  */
zstd_calc_ws_mem_sizes(void)160 static void zstd_calc_ws_mem_sizes(void)
161 {
162 	size_t max_size = 0;
163 	unsigned int level;
164 
165 	for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
166 		zstd_parameters params =
167 			zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
168 		size_t level_size =
169 			max_t(size_t,
170 			      zstd_cstream_workspace_bound(&params.cParams),
171 			      zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT));
172 
173 		max_size = max_t(size_t, max_size, level_size);
174 		zstd_ws_mem_sizes[level - 1] = max_size;
175 	}
176 }
177 
zstd_init_workspace_manager(void)178 void zstd_init_workspace_manager(void)
179 {
180 	struct list_head *ws;
181 	int i;
182 
183 	zstd_calc_ws_mem_sizes();
184 
185 	wsm.ops = &btrfs_zstd_compress;
186 	spin_lock_init(&wsm.lock);
187 	init_waitqueue_head(&wsm.wait);
188 	timer_setup(&wsm.timer, zstd_reclaim_timer_fn, 0);
189 
190 	INIT_LIST_HEAD(&wsm.lru_list);
191 	for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
192 		INIT_LIST_HEAD(&wsm.idle_ws[i]);
193 
194 	ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
195 	if (IS_ERR(ws)) {
196 		pr_warn(
197 		"BTRFS: cannot preallocate zstd compression workspace\n");
198 	} else {
199 		set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &wsm.active_map);
200 		list_add(ws, &wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
201 	}
202 }
203 
zstd_cleanup_workspace_manager(void)204 void zstd_cleanup_workspace_manager(void)
205 {
206 	struct workspace *workspace;
207 	int i;
208 
209 	spin_lock_bh(&wsm.lock);
210 	for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
211 		while (!list_empty(&wsm.idle_ws[i])) {
212 			workspace = container_of(wsm.idle_ws[i].next,
213 						 struct workspace, list);
214 			list_del(&workspace->list);
215 			list_del(&workspace->lru_list);
216 			zstd_free_workspace(&workspace->list);
217 		}
218 	}
219 	spin_unlock_bh(&wsm.lock);
220 
221 	del_timer_sync(&wsm.timer);
222 }
223 
224 /*
225  * Find workspace for given level.
226  *
227  * @level: compression level
228  *
229  * This iterates over the set bits in the active_map beginning at the requested
230  * compression level.  This lets us utilize already allocated workspaces before
231  * allocating a new one.  If the workspace is of a larger size, it is used, but
232  * the place in the lru_list and last_used times are not updated.  This is to
233  * offer the opportunity to reclaim the workspace in favor of allocating an
234  * appropriately sized one in the future.
235  */
zstd_find_workspace(unsigned int level)236 static struct list_head *zstd_find_workspace(unsigned int level)
237 {
238 	struct list_head *ws;
239 	struct workspace *workspace;
240 	int i = level - 1;
241 
242 	spin_lock_bh(&wsm.lock);
243 	for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
244 		if (!list_empty(&wsm.idle_ws[i])) {
245 			ws = wsm.idle_ws[i].next;
246 			workspace = list_to_workspace(ws);
247 			list_del_init(ws);
248 			/* keep its place if it's a lower level using this */
249 			workspace->req_level = level;
250 			if (level == workspace->level)
251 				list_del(&workspace->lru_list);
252 			if (list_empty(&wsm.idle_ws[i]))
253 				clear_bit(i, &wsm.active_map);
254 			spin_unlock_bh(&wsm.lock);
255 			return ws;
256 		}
257 	}
258 	spin_unlock_bh(&wsm.lock);
259 
260 	return NULL;
261 }
262 
263 /*
264  * Zstd get_workspace for level.
265  *
266  * @level: compression level
267  *
268  * If @level is 0, then any compression level can be used.  Therefore, we begin
269  * scanning from 1.  We first scan through possible workspaces and then after
270  * attempt to allocate a new workspace.  If we fail to allocate one due to
271  * memory pressure, go to sleep waiting for the max level workspace to free up.
272  */
zstd_get_workspace(unsigned int level)273 struct list_head *zstd_get_workspace(unsigned int level)
274 {
275 	struct list_head *ws;
276 	unsigned int nofs_flag;
277 
278 	/* level == 0 means we can use any workspace */
279 	if (!level)
280 		level = 1;
281 
282 again:
283 	ws = zstd_find_workspace(level);
284 	if (ws)
285 		return ws;
286 
287 	nofs_flag = memalloc_nofs_save();
288 	ws = zstd_alloc_workspace(level);
289 	memalloc_nofs_restore(nofs_flag);
290 
291 	if (IS_ERR(ws)) {
292 		DEFINE_WAIT(wait);
293 
294 		prepare_to_wait(&wsm.wait, &wait, TASK_UNINTERRUPTIBLE);
295 		schedule();
296 		finish_wait(&wsm.wait, &wait);
297 
298 		goto again;
299 	}
300 
301 	return ws;
302 }
303 
304 /*
305  * Zstd put_workspace.
306  *
307  * @ws: list_head for the workspace
308  *
309  * When putting back a workspace, we only need to update the LRU if we are of
310  * the requested compression level.  Here is where we continue to protect the
311  * max level workspace or update last_used accordingly.  If the reclaim timer
312  * isn't set, it is also set here.  Only the max level workspace tries and wakes
313  * up waiting workspaces.
314  */
zstd_put_workspace(struct list_head * ws)315 void zstd_put_workspace(struct list_head *ws)
316 {
317 	struct workspace *workspace = list_to_workspace(ws);
318 
319 	spin_lock_bh(&wsm.lock);
320 
321 	/* A node is only taken off the lru if we are the corresponding level */
322 	if (workspace->req_level == workspace->level) {
323 		/* Hide a max level workspace from reclaim */
324 		if (list_empty(&wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
325 			INIT_LIST_HEAD(&workspace->lru_list);
326 		} else {
327 			workspace->last_used = jiffies;
328 			list_add(&workspace->lru_list, &wsm.lru_list);
329 			if (!timer_pending(&wsm.timer))
330 				mod_timer(&wsm.timer,
331 					  jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
332 		}
333 	}
334 
335 	set_bit(workspace->level - 1, &wsm.active_map);
336 	list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]);
337 	workspace->req_level = 0;
338 
339 	spin_unlock_bh(&wsm.lock);
340 
341 	if (workspace->level == ZSTD_BTRFS_MAX_LEVEL)
342 		cond_wake_up(&wsm.wait);
343 }
344 
zstd_free_workspace(struct list_head * ws)345 void zstd_free_workspace(struct list_head *ws)
346 {
347 	struct workspace *workspace = list_entry(ws, struct workspace, list);
348 
349 	kvfree(workspace->mem);
350 	kfree(workspace->buf);
351 	kfree(workspace);
352 }
353 
zstd_alloc_workspace(unsigned int level)354 struct list_head *zstd_alloc_workspace(unsigned int level)
355 {
356 	struct workspace *workspace;
357 
358 	workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
359 	if (!workspace)
360 		return ERR_PTR(-ENOMEM);
361 
362 	workspace->size = zstd_ws_mem_sizes[level - 1];
363 	workspace->level = level;
364 	workspace->req_level = level;
365 	workspace->last_used = jiffies;
366 	workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN);
367 	workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
368 	if (!workspace->mem || !workspace->buf)
369 		goto fail;
370 
371 	INIT_LIST_HEAD(&workspace->list);
372 	INIT_LIST_HEAD(&workspace->lru_list);
373 
374 	return &workspace->list;
375 fail:
376 	zstd_free_workspace(&workspace->list);
377 	return ERR_PTR(-ENOMEM);
378 }
379 
zstd_compress_folios(struct list_head * ws,struct address_space * mapping,u64 start,struct folio ** folios,unsigned long * out_folios,unsigned long * total_in,unsigned long * total_out)380 int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
381 			 u64 start, struct folio **folios, unsigned long *out_folios,
382 			 unsigned long *total_in, unsigned long *total_out)
383 {
384 	struct workspace *workspace = list_entry(ws, struct workspace, list);
385 	zstd_cstream *stream;
386 	int ret = 0;
387 	int nr_folios = 0;
388 	struct folio *in_folio = NULL;  /* The current folio to read. */
389 	struct folio *out_folio = NULL; /* The current folio to write to. */
390 	unsigned long tot_in = 0;
391 	unsigned long tot_out = 0;
392 	unsigned long len = *total_out;
393 	const unsigned long nr_dest_folios = *out_folios;
394 	const u64 orig_end = start + len;
395 	unsigned long max_out = nr_dest_folios * PAGE_SIZE;
396 	unsigned int pg_off;
397 	unsigned int cur_len;
398 	zstd_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
399 							   len);
400 
401 	*out_folios = 0;
402 	*total_out = 0;
403 	*total_in = 0;
404 
405 	/* Initialize the stream */
406 	stream = zstd_init_cstream(&params, len, workspace->mem,
407 			workspace->size);
408 	if (unlikely(!stream)) {
409 		struct btrfs_inode *inode = BTRFS_I(mapping->host);
410 
411 		btrfs_err(inode->root->fs_info,
412 	"zstd compression init level %d failed, root %llu inode %llu offset %llu",
413 			  workspace->req_level, btrfs_root_id(inode->root),
414 			  btrfs_ino(inode), start);
415 		ret = -EIO;
416 		goto out;
417 	}
418 
419 	/* map in the first page of input data */
420 	ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
421 	if (ret < 0)
422 		goto out;
423 	pg_off = offset_in_page(start);
424 	cur_len = btrfs_calc_input_length(orig_end, start);
425 	workspace->in_buf.src = kmap_local_folio(in_folio, pg_off);
426 	workspace->in_buf.pos = 0;
427 	workspace->in_buf.size = cur_len;
428 
429 	/* Allocate and map in the output buffer */
430 	out_folio = btrfs_alloc_compr_folio();
431 	if (out_folio == NULL) {
432 		ret = -ENOMEM;
433 		goto out;
434 	}
435 	folios[nr_folios++] = out_folio;
436 	workspace->out_buf.dst = folio_address(out_folio);
437 	workspace->out_buf.pos = 0;
438 	workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
439 
440 	while (1) {
441 		size_t ret2;
442 
443 		ret2 = zstd_compress_stream(stream, &workspace->out_buf,
444 				&workspace->in_buf);
445 		if (unlikely(zstd_is_error(ret2))) {
446 			struct btrfs_inode *inode = BTRFS_I(mapping->host);
447 
448 			btrfs_warn(inode->root->fs_info,
449 "zstd compression level %d failed, error %d root %llu inode %llu offset %llu",
450 				   workspace->req_level, zstd_get_error_code(ret2),
451 				   btrfs_root_id(inode->root), btrfs_ino(inode),
452 				   start);
453 			ret = -EIO;
454 			goto out;
455 		}
456 
457 		/* Check to see if we are making it bigger */
458 		if (tot_in + workspace->in_buf.pos > 8192 &&
459 				tot_in + workspace->in_buf.pos <
460 				tot_out + workspace->out_buf.pos) {
461 			ret = -E2BIG;
462 			goto out;
463 		}
464 
465 		/* We've reached the end of our output range */
466 		if (workspace->out_buf.pos >= max_out) {
467 			tot_out += workspace->out_buf.pos;
468 			ret = -E2BIG;
469 			goto out;
470 		}
471 
472 		/* Check if we need more output space */
473 		if (workspace->out_buf.pos == workspace->out_buf.size) {
474 			tot_out += PAGE_SIZE;
475 			max_out -= PAGE_SIZE;
476 			if (nr_folios == nr_dest_folios) {
477 				ret = -E2BIG;
478 				goto out;
479 			}
480 			out_folio = btrfs_alloc_compr_folio();
481 			if (out_folio == NULL) {
482 				ret = -ENOMEM;
483 				goto out;
484 			}
485 			folios[nr_folios++] = out_folio;
486 			workspace->out_buf.dst = folio_address(out_folio);
487 			workspace->out_buf.pos = 0;
488 			workspace->out_buf.size = min_t(size_t, max_out,
489 							PAGE_SIZE);
490 		}
491 
492 		/* We've reached the end of the input */
493 		if (workspace->in_buf.pos >= len) {
494 			tot_in += workspace->in_buf.pos;
495 			break;
496 		}
497 
498 		/* Check if we need more input */
499 		if (workspace->in_buf.pos == workspace->in_buf.size) {
500 			tot_in += workspace->in_buf.size;
501 			kunmap_local(workspace->in_buf.src);
502 			workspace->in_buf.src = NULL;
503 			folio_put(in_folio);
504 			start += cur_len;
505 			len -= cur_len;
506 			ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
507 			if (ret < 0)
508 				goto out;
509 			pg_off = offset_in_page(start);
510 			cur_len = btrfs_calc_input_length(orig_end, start);
511 			workspace->in_buf.src = kmap_local_folio(in_folio, pg_off);
512 			workspace->in_buf.pos = 0;
513 			workspace->in_buf.size = cur_len;
514 		}
515 	}
516 	while (1) {
517 		size_t ret2;
518 
519 		ret2 = zstd_end_stream(stream, &workspace->out_buf);
520 		if (unlikely(zstd_is_error(ret2))) {
521 			struct btrfs_inode *inode = BTRFS_I(mapping->host);
522 
523 			btrfs_err(inode->root->fs_info,
524 "zstd compression end level %d failed, error %d root %llu inode %llu offset %llu",
525 				  workspace->req_level, zstd_get_error_code(ret2),
526 				  btrfs_root_id(inode->root), btrfs_ino(inode),
527 				  start);
528 			ret = -EIO;
529 			goto out;
530 		}
531 		if (ret2 == 0) {
532 			tot_out += workspace->out_buf.pos;
533 			break;
534 		}
535 		if (workspace->out_buf.pos >= max_out) {
536 			tot_out += workspace->out_buf.pos;
537 			ret = -E2BIG;
538 			goto out;
539 		}
540 
541 		tot_out += PAGE_SIZE;
542 		max_out -= PAGE_SIZE;
543 		if (nr_folios == nr_dest_folios) {
544 			ret = -E2BIG;
545 			goto out;
546 		}
547 		out_folio = btrfs_alloc_compr_folio();
548 		if (out_folio == NULL) {
549 			ret = -ENOMEM;
550 			goto out;
551 		}
552 		folios[nr_folios++] = out_folio;
553 		workspace->out_buf.dst = folio_address(out_folio);
554 		workspace->out_buf.pos = 0;
555 		workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
556 	}
557 
558 	if (tot_out >= tot_in) {
559 		ret = -E2BIG;
560 		goto out;
561 	}
562 
563 	ret = 0;
564 	*total_in = tot_in;
565 	*total_out = tot_out;
566 out:
567 	*out_folios = nr_folios;
568 	if (workspace->in_buf.src) {
569 		kunmap_local(workspace->in_buf.src);
570 		folio_put(in_folio);
571 	}
572 	return ret;
573 }
574 
zstd_decompress_bio(struct list_head * ws,struct compressed_bio * cb)575 int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
576 {
577 	struct workspace *workspace = list_entry(ws, struct workspace, list);
578 	struct folio **folios_in = cb->compressed_folios;
579 	size_t srclen = cb->compressed_len;
580 	zstd_dstream *stream;
581 	int ret = 0;
582 	unsigned long folio_in_index = 0;
583 	unsigned long total_folios_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
584 	unsigned long buf_start;
585 	unsigned long total_out = 0;
586 
587 	stream = zstd_init_dstream(
588 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
589 	if (unlikely(!stream)) {
590 		struct btrfs_inode *inode = cb->bbio.inode;
591 
592 		btrfs_err(inode->root->fs_info,
593 		"zstd decompression init failed, root %llu inode %llu offset %llu",
594 			  btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
595 		ret = -EIO;
596 		goto done;
597 	}
598 
599 	workspace->in_buf.src = kmap_local_folio(folios_in[folio_in_index], 0);
600 	workspace->in_buf.pos = 0;
601 	workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
602 
603 	workspace->out_buf.dst = workspace->buf;
604 	workspace->out_buf.pos = 0;
605 	workspace->out_buf.size = PAGE_SIZE;
606 
607 	while (1) {
608 		size_t ret2;
609 
610 		ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
611 				&workspace->in_buf);
612 		if (unlikely(zstd_is_error(ret2))) {
613 			struct btrfs_inode *inode = cb->bbio.inode;
614 
615 			btrfs_err(inode->root->fs_info,
616 		"zstd decompression failed, error %d root %llu inode %llu offset %llu",
617 				  zstd_get_error_code(ret2), btrfs_root_id(inode->root),
618 				  btrfs_ino(inode), cb->start);
619 			ret = -EIO;
620 			goto done;
621 		}
622 		buf_start = total_out;
623 		total_out += workspace->out_buf.pos;
624 		workspace->out_buf.pos = 0;
625 
626 		ret = btrfs_decompress_buf2page(workspace->out_buf.dst,
627 				total_out - buf_start, cb, buf_start);
628 		if (ret == 0)
629 			break;
630 
631 		if (workspace->in_buf.pos >= srclen)
632 			break;
633 
634 		/* Check if we've hit the end of a frame */
635 		if (ret2 == 0)
636 			break;
637 
638 		if (workspace->in_buf.pos == workspace->in_buf.size) {
639 			kunmap_local(workspace->in_buf.src);
640 			folio_in_index++;
641 			if (folio_in_index >= total_folios_in) {
642 				workspace->in_buf.src = NULL;
643 				ret = -EIO;
644 				goto done;
645 			}
646 			srclen -= PAGE_SIZE;
647 			workspace->in_buf.src =
648 				kmap_local_folio(folios_in[folio_in_index], 0);
649 			workspace->in_buf.pos = 0;
650 			workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
651 		}
652 	}
653 	ret = 0;
654 done:
655 	if (workspace->in_buf.src)
656 		kunmap_local(workspace->in_buf.src);
657 	return ret;
658 }
659 
zstd_decompress(struct list_head * ws,const u8 * data_in,struct folio * dest_folio,unsigned long dest_pgoff,size_t srclen,size_t destlen)660 int zstd_decompress(struct list_head *ws, const u8 *data_in,
661 		struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
662 		size_t destlen)
663 {
664 	struct workspace *workspace = list_entry(ws, struct workspace, list);
665 	struct btrfs_fs_info *fs_info = btrfs_sb(folio_inode(dest_folio)->i_sb);
666 	const u32 sectorsize = fs_info->sectorsize;
667 	zstd_dstream *stream;
668 	int ret = 0;
669 	unsigned long to_copy = 0;
670 
671 	stream = zstd_init_dstream(
672 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
673 	if (unlikely(!stream)) {
674 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
675 
676 		btrfs_err(inode->root->fs_info,
677 		"zstd decompression init failed, root %llu inode %llu offset %llu",
678 			  btrfs_root_id(inode->root), btrfs_ino(inode),
679 			  folio_pos(dest_folio));
680 		ret = -EIO;
681 		goto finish;
682 	}
683 
684 	workspace->in_buf.src = data_in;
685 	workspace->in_buf.pos = 0;
686 	workspace->in_buf.size = srclen;
687 
688 	workspace->out_buf.dst = workspace->buf;
689 	workspace->out_buf.pos = 0;
690 	workspace->out_buf.size = sectorsize;
691 
692 	/*
693 	 * Since both input and output buffers should not exceed one sector,
694 	 * one call should end the decompression.
695 	 */
696 	ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf);
697 	if (unlikely(zstd_is_error(ret))) {
698 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
699 
700 		btrfs_err(inode->root->fs_info,
701 		"zstd decompression failed, error %d root %llu inode %llu offset %llu",
702 			  zstd_get_error_code(ret), btrfs_root_id(inode->root),
703 			  btrfs_ino(inode), folio_pos(dest_folio));
704 		goto finish;
705 	}
706 	to_copy = workspace->out_buf.pos;
707 	memcpy_to_folio(dest_folio, dest_pgoff, workspace->out_buf.dst, to_copy);
708 finish:
709 	/* Error or early end. */
710 	if (unlikely(to_copy < destlen)) {
711 		ret = -EIO;
712 		folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
713 	}
714 	return ret;
715 }
716 
717 const struct btrfs_compress_op btrfs_zstd_compress = {
718 	/* ZSTD uses own workspace manager */
719 	.workspace_manager = NULL,
720 	.max_level	= ZSTD_BTRFS_MAX_LEVEL,
721 	.default_level	= ZSTD_BTRFS_DEFAULT_LEVEL,
722 };
723