xref: /linux/fs/btrfs/zstd.c (revision a3a02a52bcfcbcc4a637d4b68bf1bc391c9fad02)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2016-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  */
7 
8 #include <linux/bio.h>
9 #include <linux/bitmap.h>
10 #include <linux/err.h>
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/mm.h>
14 #include <linux/sched/mm.h>
15 #include <linux/pagemap.h>
16 #include <linux/refcount.h>
17 #include <linux/sched.h>
18 #include <linux/slab.h>
19 #include <linux/zstd.h>
20 #include "misc.h"
21 #include "fs.h"
22 #include "btrfs_inode.h"
23 #include "compression.h"
24 #include "super.h"
25 
26 #define ZSTD_BTRFS_MAX_WINDOWLOG 17
27 #define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
28 #define ZSTD_BTRFS_DEFAULT_LEVEL 3
29 #define ZSTD_BTRFS_MAX_LEVEL 15
30 /* 307s to avoid pathologically clashing with transaction commit */
31 #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
32 
33 static zstd_parameters zstd_get_btrfs_parameters(unsigned int level,
34 						 size_t src_len)
35 {
36 	zstd_parameters params = zstd_get_params(level, src_len);
37 
38 	if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
39 		params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
40 	WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT);
41 	return params;
42 }
43 
44 struct workspace {
45 	void *mem;
46 	size_t size;
47 	char *buf;
48 	unsigned int level;
49 	unsigned int req_level;
50 	unsigned long last_used; /* jiffies */
51 	struct list_head list;
52 	struct list_head lru_list;
53 	zstd_in_buffer in_buf;
54 	zstd_out_buffer out_buf;
55 };
56 
57 /*
58  * Zstd Workspace Management
59  *
60  * Zstd workspaces have different memory requirements depending on the level.
61  * The zstd workspaces are managed by having individual lists for each level
62  * and a global lru.  Forward progress is maintained by protecting a max level
63  * workspace.
64  *
65  * Getting a workspace is done by using the bitmap to identify the levels that
66  * have available workspaces and scans up.  This lets us recycle higher level
67  * workspaces because of the monotonic memory guarantee.  A workspace's
68  * last_used is only updated if it is being used by the corresponding memory
69  * level.  Putting a workspace involves adding it back to the appropriate places
70  * and adding it back to the lru if necessary.
71  *
72  * A timer is used to reclaim workspaces if they have not been used for
73  * ZSTD_BTRFS_RECLAIM_JIFFIES.  This helps keep only active workspaces around.
74  * The upper bound is provided by the workqueue limit which is 2 (percpu limit).
75  */
76 
77 struct zstd_workspace_manager {
78 	const struct btrfs_compress_op *ops;
79 	spinlock_t lock;
80 	struct list_head lru_list;
81 	struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL];
82 	unsigned long active_map;
83 	wait_queue_head_t wait;
84 	struct timer_list timer;
85 };
86 
87 static struct zstd_workspace_manager wsm;
88 
89 static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL];
90 
91 static inline struct workspace *list_to_workspace(struct list_head *list)
92 {
93 	return container_of(list, struct workspace, list);
94 }
95 
96 void zstd_free_workspace(struct list_head *ws);
97 struct list_head *zstd_alloc_workspace(unsigned int level);
98 
99 /*
100  * Timer callback to free unused workspaces.
101  *
102  * @t: timer
103  *
104  * This scans the lru_list and attempts to reclaim any workspace that hasn't
105  * been used for ZSTD_BTRFS_RECLAIM_JIFFIES.
106  *
107  * The context is softirq and does not need the _bh locking primitives.
108  */
109 static void zstd_reclaim_timer_fn(struct timer_list *timer)
110 {
111 	unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
112 	struct list_head *pos, *next;
113 
114 	spin_lock(&wsm.lock);
115 
116 	if (list_empty(&wsm.lru_list)) {
117 		spin_unlock(&wsm.lock);
118 		return;
119 	}
120 
121 	list_for_each_prev_safe(pos, next, &wsm.lru_list) {
122 		struct workspace *victim = container_of(pos, struct workspace,
123 							lru_list);
124 		unsigned int level;
125 
126 		if (time_after(victim->last_used, reclaim_threshold))
127 			break;
128 
129 		/* workspace is in use */
130 		if (victim->req_level)
131 			continue;
132 
133 		level = victim->level;
134 		list_del(&victim->lru_list);
135 		list_del(&victim->list);
136 		zstd_free_workspace(&victim->list);
137 
138 		if (list_empty(&wsm.idle_ws[level - 1]))
139 			clear_bit(level - 1, &wsm.active_map);
140 
141 	}
142 
143 	if (!list_empty(&wsm.lru_list))
144 		mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
145 
146 	spin_unlock(&wsm.lock);
147 }
148 
149 /*
150  * Calculate monotonic memory bounds.
151  *
152  * It is possible based on the level configurations that a higher level
153  * workspace uses less memory than a lower level workspace.  In order to reuse
154  * workspaces, this must be made a monotonic relationship.  This precomputes
155  * the required memory for each level and enforces the monotonicity between
156  * level and memory required.
157  */
158 static void zstd_calc_ws_mem_sizes(void)
159 {
160 	size_t max_size = 0;
161 	unsigned int level;
162 
163 	for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
164 		zstd_parameters params =
165 			zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
166 		size_t level_size =
167 			max_t(size_t,
168 			      zstd_cstream_workspace_bound(&params.cParams),
169 			      zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT));
170 
171 		max_size = max_t(size_t, max_size, level_size);
172 		zstd_ws_mem_sizes[level - 1] = max_size;
173 	}
174 }
175 
176 void zstd_init_workspace_manager(void)
177 {
178 	struct list_head *ws;
179 	int i;
180 
181 	zstd_calc_ws_mem_sizes();
182 
183 	wsm.ops = &btrfs_zstd_compress;
184 	spin_lock_init(&wsm.lock);
185 	init_waitqueue_head(&wsm.wait);
186 	timer_setup(&wsm.timer, zstd_reclaim_timer_fn, 0);
187 
188 	INIT_LIST_HEAD(&wsm.lru_list);
189 	for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
190 		INIT_LIST_HEAD(&wsm.idle_ws[i]);
191 
192 	ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
193 	if (IS_ERR(ws)) {
194 		pr_warn(
195 		"BTRFS: cannot preallocate zstd compression workspace\n");
196 	} else {
197 		set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &wsm.active_map);
198 		list_add(ws, &wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
199 	}
200 }
201 
202 void zstd_cleanup_workspace_manager(void)
203 {
204 	struct workspace *workspace;
205 	int i;
206 
207 	spin_lock_bh(&wsm.lock);
208 	for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
209 		while (!list_empty(&wsm.idle_ws[i])) {
210 			workspace = container_of(wsm.idle_ws[i].next,
211 						 struct workspace, list);
212 			list_del(&workspace->list);
213 			list_del(&workspace->lru_list);
214 			zstd_free_workspace(&workspace->list);
215 		}
216 	}
217 	spin_unlock_bh(&wsm.lock);
218 
219 	del_timer_sync(&wsm.timer);
220 }
221 
222 /*
223  * Find workspace for given level.
224  *
225  * @level: compression level
226  *
227  * This iterates over the set bits in the active_map beginning at the requested
228  * compression level.  This lets us utilize already allocated workspaces before
229  * allocating a new one.  If the workspace is of a larger size, it is used, but
230  * the place in the lru_list and last_used times are not updated.  This is to
231  * offer the opportunity to reclaim the workspace in favor of allocating an
232  * appropriately sized one in the future.
233  */
234 static struct list_head *zstd_find_workspace(unsigned int level)
235 {
236 	struct list_head *ws;
237 	struct workspace *workspace;
238 	int i = level - 1;
239 
240 	spin_lock_bh(&wsm.lock);
241 	for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
242 		if (!list_empty(&wsm.idle_ws[i])) {
243 			ws = wsm.idle_ws[i].next;
244 			workspace = list_to_workspace(ws);
245 			list_del_init(ws);
246 			/* keep its place if it's a lower level using this */
247 			workspace->req_level = level;
248 			if (level == workspace->level)
249 				list_del(&workspace->lru_list);
250 			if (list_empty(&wsm.idle_ws[i]))
251 				clear_bit(i, &wsm.active_map);
252 			spin_unlock_bh(&wsm.lock);
253 			return ws;
254 		}
255 	}
256 	spin_unlock_bh(&wsm.lock);
257 
258 	return NULL;
259 }
260 
261 /*
262  * Zstd get_workspace for level.
263  *
264  * @level: compression level
265  *
266  * If @level is 0, then any compression level can be used.  Therefore, we begin
267  * scanning from 1.  We first scan through possible workspaces and then after
268  * attempt to allocate a new workspace.  If we fail to allocate one due to
269  * memory pressure, go to sleep waiting for the max level workspace to free up.
270  */
271 struct list_head *zstd_get_workspace(unsigned int level)
272 {
273 	struct list_head *ws;
274 	unsigned int nofs_flag;
275 
276 	/* level == 0 means we can use any workspace */
277 	if (!level)
278 		level = 1;
279 
280 again:
281 	ws = zstd_find_workspace(level);
282 	if (ws)
283 		return ws;
284 
285 	nofs_flag = memalloc_nofs_save();
286 	ws = zstd_alloc_workspace(level);
287 	memalloc_nofs_restore(nofs_flag);
288 
289 	if (IS_ERR(ws)) {
290 		DEFINE_WAIT(wait);
291 
292 		prepare_to_wait(&wsm.wait, &wait, TASK_UNINTERRUPTIBLE);
293 		schedule();
294 		finish_wait(&wsm.wait, &wait);
295 
296 		goto again;
297 	}
298 
299 	return ws;
300 }
301 
302 /*
303  * Zstd put_workspace.
304  *
305  * @ws: list_head for the workspace
306  *
307  * When putting back a workspace, we only need to update the LRU if we are of
308  * the requested compression level.  Here is where we continue to protect the
309  * max level workspace or update last_used accordingly.  If the reclaim timer
310  * isn't set, it is also set here.  Only the max level workspace tries and wakes
311  * up waiting workspaces.
312  */
313 void zstd_put_workspace(struct list_head *ws)
314 {
315 	struct workspace *workspace = list_to_workspace(ws);
316 
317 	spin_lock_bh(&wsm.lock);
318 
319 	/* A node is only taken off the lru if we are the corresponding level */
320 	if (workspace->req_level == workspace->level) {
321 		/* Hide a max level workspace from reclaim */
322 		if (list_empty(&wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
323 			INIT_LIST_HEAD(&workspace->lru_list);
324 		} else {
325 			workspace->last_used = jiffies;
326 			list_add(&workspace->lru_list, &wsm.lru_list);
327 			if (!timer_pending(&wsm.timer))
328 				mod_timer(&wsm.timer,
329 					  jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
330 		}
331 	}
332 
333 	set_bit(workspace->level - 1, &wsm.active_map);
334 	list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]);
335 	workspace->req_level = 0;
336 
337 	spin_unlock_bh(&wsm.lock);
338 
339 	if (workspace->level == ZSTD_BTRFS_MAX_LEVEL)
340 		cond_wake_up(&wsm.wait);
341 }
342 
343 void zstd_free_workspace(struct list_head *ws)
344 {
345 	struct workspace *workspace = list_entry(ws, struct workspace, list);
346 
347 	kvfree(workspace->mem);
348 	kfree(workspace->buf);
349 	kfree(workspace);
350 }
351 
352 struct list_head *zstd_alloc_workspace(unsigned int level)
353 {
354 	struct workspace *workspace;
355 
356 	workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
357 	if (!workspace)
358 		return ERR_PTR(-ENOMEM);
359 
360 	workspace->size = zstd_ws_mem_sizes[level - 1];
361 	workspace->level = level;
362 	workspace->req_level = level;
363 	workspace->last_used = jiffies;
364 	workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN);
365 	workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
366 	if (!workspace->mem || !workspace->buf)
367 		goto fail;
368 
369 	INIT_LIST_HEAD(&workspace->list);
370 	INIT_LIST_HEAD(&workspace->lru_list);
371 
372 	return &workspace->list;
373 fail:
374 	zstd_free_workspace(&workspace->list);
375 	return ERR_PTR(-ENOMEM);
376 }
377 
378 int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
379 			 u64 start, struct folio **folios, unsigned long *out_folios,
380 			 unsigned long *total_in, unsigned long *total_out)
381 {
382 	struct workspace *workspace = list_entry(ws, struct workspace, list);
383 	zstd_cstream *stream;
384 	int ret = 0;
385 	int nr_folios = 0;
386 	struct folio *in_folio = NULL;  /* The current folio to read. */
387 	struct folio *out_folio = NULL; /* The current folio to write to. */
388 	unsigned long tot_in = 0;
389 	unsigned long tot_out = 0;
390 	unsigned long len = *total_out;
391 	const unsigned long nr_dest_folios = *out_folios;
392 	unsigned long max_out = nr_dest_folios * PAGE_SIZE;
393 	zstd_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
394 							   len);
395 
396 	*out_folios = 0;
397 	*total_out = 0;
398 	*total_in = 0;
399 
400 	/* Initialize the stream */
401 	stream = zstd_init_cstream(&params, len, workspace->mem,
402 			workspace->size);
403 	if (unlikely(!stream)) {
404 		struct btrfs_inode *inode = BTRFS_I(mapping->host);
405 
406 		btrfs_err(inode->root->fs_info,
407 	"zstd compression init level %d failed, root %llu inode %llu offset %llu",
408 			  workspace->req_level, btrfs_root_id(inode->root),
409 			  btrfs_ino(inode), start);
410 		ret = -EIO;
411 		goto out;
412 	}
413 
414 	/* map in the first page of input data */
415 	ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
416 	if (ret < 0)
417 		goto out;
418 	workspace->in_buf.src = kmap_local_folio(in_folio, 0);
419 	workspace->in_buf.pos = 0;
420 	workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
421 
422 	/* Allocate and map in the output buffer */
423 	out_folio = btrfs_alloc_compr_folio();
424 	if (out_folio == NULL) {
425 		ret = -ENOMEM;
426 		goto out;
427 	}
428 	folios[nr_folios++] = out_folio;
429 	workspace->out_buf.dst = folio_address(out_folio);
430 	workspace->out_buf.pos = 0;
431 	workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
432 
433 	while (1) {
434 		size_t ret2;
435 
436 		ret2 = zstd_compress_stream(stream, &workspace->out_buf,
437 				&workspace->in_buf);
438 		if (unlikely(zstd_is_error(ret2))) {
439 			struct btrfs_inode *inode = BTRFS_I(mapping->host);
440 
441 			btrfs_warn(inode->root->fs_info,
442 "zstd compression level %d failed, error %d root %llu inode %llu offset %llu",
443 				   workspace->req_level, zstd_get_error_code(ret2),
444 				   btrfs_root_id(inode->root), btrfs_ino(inode),
445 				   start);
446 			ret = -EIO;
447 			goto out;
448 		}
449 
450 		/* Check to see if we are making it bigger */
451 		if (tot_in + workspace->in_buf.pos > 8192 &&
452 				tot_in + workspace->in_buf.pos <
453 				tot_out + workspace->out_buf.pos) {
454 			ret = -E2BIG;
455 			goto out;
456 		}
457 
458 		/* We've reached the end of our output range */
459 		if (workspace->out_buf.pos >= max_out) {
460 			tot_out += workspace->out_buf.pos;
461 			ret = -E2BIG;
462 			goto out;
463 		}
464 
465 		/* Check if we need more output space */
466 		if (workspace->out_buf.pos == workspace->out_buf.size) {
467 			tot_out += PAGE_SIZE;
468 			max_out -= PAGE_SIZE;
469 			if (nr_folios == nr_dest_folios) {
470 				ret = -E2BIG;
471 				goto out;
472 			}
473 			out_folio = btrfs_alloc_compr_folio();
474 			if (out_folio == NULL) {
475 				ret = -ENOMEM;
476 				goto out;
477 			}
478 			folios[nr_folios++] = out_folio;
479 			workspace->out_buf.dst = folio_address(out_folio);
480 			workspace->out_buf.pos = 0;
481 			workspace->out_buf.size = min_t(size_t, max_out,
482 							PAGE_SIZE);
483 		}
484 
485 		/* We've reached the end of the input */
486 		if (workspace->in_buf.pos >= len) {
487 			tot_in += workspace->in_buf.pos;
488 			break;
489 		}
490 
491 		/* Check if we need more input */
492 		if (workspace->in_buf.pos == workspace->in_buf.size) {
493 			tot_in += PAGE_SIZE;
494 			kunmap_local(workspace->in_buf.src);
495 			workspace->in_buf.src = NULL;
496 			folio_put(in_folio);
497 			start += PAGE_SIZE;
498 			len -= PAGE_SIZE;
499 			ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
500 			if (ret < 0)
501 				goto out;
502 			workspace->in_buf.src = kmap_local_folio(in_folio, 0);
503 			workspace->in_buf.pos = 0;
504 			workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
505 		}
506 	}
507 	while (1) {
508 		size_t ret2;
509 
510 		ret2 = zstd_end_stream(stream, &workspace->out_buf);
511 		if (unlikely(zstd_is_error(ret2))) {
512 			struct btrfs_inode *inode = BTRFS_I(mapping->host);
513 
514 			btrfs_err(inode->root->fs_info,
515 "zstd compression end level %d failed, error %d root %llu inode %llu offset %llu",
516 				  workspace->req_level, zstd_get_error_code(ret2),
517 				  btrfs_root_id(inode->root), btrfs_ino(inode),
518 				  start);
519 			ret = -EIO;
520 			goto out;
521 		}
522 		if (ret2 == 0) {
523 			tot_out += workspace->out_buf.pos;
524 			break;
525 		}
526 		if (workspace->out_buf.pos >= max_out) {
527 			tot_out += workspace->out_buf.pos;
528 			ret = -E2BIG;
529 			goto out;
530 		}
531 
532 		tot_out += PAGE_SIZE;
533 		max_out -= PAGE_SIZE;
534 		if (nr_folios == nr_dest_folios) {
535 			ret = -E2BIG;
536 			goto out;
537 		}
538 		out_folio = btrfs_alloc_compr_folio();
539 		if (out_folio == NULL) {
540 			ret = -ENOMEM;
541 			goto out;
542 		}
543 		folios[nr_folios++] = out_folio;
544 		workspace->out_buf.dst = folio_address(out_folio);
545 		workspace->out_buf.pos = 0;
546 		workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
547 	}
548 
549 	if (tot_out >= tot_in) {
550 		ret = -E2BIG;
551 		goto out;
552 	}
553 
554 	ret = 0;
555 	*total_in = tot_in;
556 	*total_out = tot_out;
557 out:
558 	*out_folios = nr_folios;
559 	if (workspace->in_buf.src) {
560 		kunmap_local(workspace->in_buf.src);
561 		folio_put(in_folio);
562 	}
563 	return ret;
564 }
565 
566 int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
567 {
568 	struct workspace *workspace = list_entry(ws, struct workspace, list);
569 	struct folio **folios_in = cb->compressed_folios;
570 	size_t srclen = cb->compressed_len;
571 	zstd_dstream *stream;
572 	int ret = 0;
573 	unsigned long folio_in_index = 0;
574 	unsigned long total_folios_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
575 	unsigned long buf_start;
576 	unsigned long total_out = 0;
577 
578 	stream = zstd_init_dstream(
579 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
580 	if (unlikely(!stream)) {
581 		struct btrfs_inode *inode = cb->bbio.inode;
582 
583 		btrfs_err(inode->root->fs_info,
584 		"zstd decompression init failed, root %llu inode %llu offset %llu",
585 			  btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
586 		ret = -EIO;
587 		goto done;
588 	}
589 
590 	workspace->in_buf.src = kmap_local_folio(folios_in[folio_in_index], 0);
591 	workspace->in_buf.pos = 0;
592 	workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
593 
594 	workspace->out_buf.dst = workspace->buf;
595 	workspace->out_buf.pos = 0;
596 	workspace->out_buf.size = PAGE_SIZE;
597 
598 	while (1) {
599 		size_t ret2;
600 
601 		ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
602 				&workspace->in_buf);
603 		if (unlikely(zstd_is_error(ret2))) {
604 			struct btrfs_inode *inode = cb->bbio.inode;
605 
606 			btrfs_err(inode->root->fs_info,
607 		"zstd decompression failed, error %d root %llu inode %llu offset %llu",
608 				  zstd_get_error_code(ret2), btrfs_root_id(inode->root),
609 				  btrfs_ino(inode), cb->start);
610 			ret = -EIO;
611 			goto done;
612 		}
613 		buf_start = total_out;
614 		total_out += workspace->out_buf.pos;
615 		workspace->out_buf.pos = 0;
616 
617 		ret = btrfs_decompress_buf2page(workspace->out_buf.dst,
618 				total_out - buf_start, cb, buf_start);
619 		if (ret == 0)
620 			break;
621 
622 		if (workspace->in_buf.pos >= srclen)
623 			break;
624 
625 		/* Check if we've hit the end of a frame */
626 		if (ret2 == 0)
627 			break;
628 
629 		if (workspace->in_buf.pos == workspace->in_buf.size) {
630 			kunmap_local(workspace->in_buf.src);
631 			folio_in_index++;
632 			if (folio_in_index >= total_folios_in) {
633 				workspace->in_buf.src = NULL;
634 				ret = -EIO;
635 				goto done;
636 			}
637 			srclen -= PAGE_SIZE;
638 			workspace->in_buf.src =
639 				kmap_local_folio(folios_in[folio_in_index], 0);
640 			workspace->in_buf.pos = 0;
641 			workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
642 		}
643 	}
644 	ret = 0;
645 done:
646 	if (workspace->in_buf.src)
647 		kunmap_local(workspace->in_buf.src);
648 	return ret;
649 }
650 
651 int zstd_decompress(struct list_head *ws, const u8 *data_in,
652 		struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
653 		size_t destlen)
654 {
655 	struct workspace *workspace = list_entry(ws, struct workspace, list);
656 	struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb);
657 	const u32 sectorsize = fs_info->sectorsize;
658 	zstd_dstream *stream;
659 	int ret = 0;
660 	unsigned long to_copy = 0;
661 
662 	stream = zstd_init_dstream(
663 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
664 	if (unlikely(!stream)) {
665 		struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
666 
667 		btrfs_err(inode->root->fs_info,
668 		"zstd decompression init failed, root %llu inode %llu offset %llu",
669 			  btrfs_root_id(inode->root), btrfs_ino(inode),
670 			  page_offset(dest_page));
671 		ret = -EIO;
672 		goto finish;
673 	}
674 
675 	workspace->in_buf.src = data_in;
676 	workspace->in_buf.pos = 0;
677 	workspace->in_buf.size = srclen;
678 
679 	workspace->out_buf.dst = workspace->buf;
680 	workspace->out_buf.pos = 0;
681 	workspace->out_buf.size = sectorsize;
682 
683 	/*
684 	 * Since both input and output buffers should not exceed one sector,
685 	 * one call should end the decompression.
686 	 */
687 	ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf);
688 	if (unlikely(zstd_is_error(ret))) {
689 		struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
690 
691 		btrfs_err(inode->root->fs_info,
692 		"zstd decompression failed, error %d root %llu inode %llu offset %llu",
693 			  zstd_get_error_code(ret), btrfs_root_id(inode->root),
694 			  btrfs_ino(inode), page_offset(dest_page));
695 		goto finish;
696 	}
697 	to_copy = workspace->out_buf.pos;
698 	memcpy_to_page(dest_page, dest_pgoff, workspace->out_buf.dst, to_copy);
699 finish:
700 	/* Error or early end. */
701 	if (unlikely(to_copy < destlen)) {
702 		ret = -EIO;
703 		memzero_page(dest_page, dest_pgoff + to_copy, destlen - to_copy);
704 	}
705 	return ret;
706 }
707 
708 const struct btrfs_compress_op btrfs_zstd_compress = {
709 	/* ZSTD uses own workspace manager */
710 	.workspace_manager = NULL,
711 	.max_level	= ZSTD_BTRFS_MAX_LEVEL,
712 	.default_level	= ZSTD_BTRFS_DEFAULT_LEVEL,
713 };
714