xref: /freebsd/sys/contrib/openzfs/module/zstd/zfs_zstd.c (revision e3aa18ad71782a73d3dd9dd3d526bbd2b607ca16)
1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * Redistribution and use in source and binary forms, with or without
5eda14cbcSMatt Macy  * modification, are permitted provided that the following conditions are met:
6eda14cbcSMatt Macy  *
7eda14cbcSMatt Macy  * 1. Redistributions of source code must retain the above copyright notice,
8eda14cbcSMatt Macy  * this list of conditions and the following disclaimer.
9eda14cbcSMatt Macy  *
10eda14cbcSMatt Macy  * 2. Redistributions in binary form must reproduce the above copyright notice,
11eda14cbcSMatt Macy  * this list of conditions and the following disclaimer in the documentation
12eda14cbcSMatt Macy  * and/or other materials provided with the distribution.
13eda14cbcSMatt Macy  *
14eda14cbcSMatt Macy  * 3. Neither the name of the copyright holder nor the names of its
15eda14cbcSMatt Macy  * contributors may be used to endorse or promote products derived from this
16eda14cbcSMatt Macy  * software without specific prior written permission.
17eda14cbcSMatt Macy  *
18eda14cbcSMatt Macy  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19eda14cbcSMatt Macy  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20eda14cbcSMatt Macy  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21eda14cbcSMatt Macy  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22eda14cbcSMatt Macy  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23eda14cbcSMatt Macy  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24eda14cbcSMatt Macy  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25eda14cbcSMatt Macy  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26eda14cbcSMatt Macy  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27eda14cbcSMatt Macy  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28eda14cbcSMatt Macy  * POSSIBILITY OF SUCH DAMAGE.
29eda14cbcSMatt Macy  */
30eda14cbcSMatt Macy 
31eda14cbcSMatt Macy /*
32eda14cbcSMatt Macy  * Copyright (c) 2016-2018, Klara Inc.
33eda14cbcSMatt Macy  * Copyright (c) 2016-2018, Allan Jude
34eda14cbcSMatt Macy  * Copyright (c) 2018-2020, Sebastian Gottschall
35eda14cbcSMatt Macy  * Copyright (c) 2019-2020, Michael Niewöhner
36eda14cbcSMatt Macy  * Copyright (c) 2020, The FreeBSD Foundation [1]
37eda14cbcSMatt Macy  *
38eda14cbcSMatt Macy  * [1] Portions of this software were developed by Allan Jude
39eda14cbcSMatt Macy  *     under sponsorship from the FreeBSD Foundation.
40eda14cbcSMatt Macy  */
41eda14cbcSMatt Macy 
42eda14cbcSMatt Macy #include <sys/param.h>
43eda14cbcSMatt Macy #include <sys/sysmacros.h>
44eda14cbcSMatt Macy #include <sys/zfs_context.h>
45eda14cbcSMatt Macy #include <sys/zio_compress.h>
46eda14cbcSMatt Macy #include <sys/spa.h>
47eda14cbcSMatt Macy #include <sys/zstd/zstd.h>
48eda14cbcSMatt Macy 
49eda14cbcSMatt Macy #define	ZSTD_STATIC_LINKING_ONLY
50eda14cbcSMatt Macy #include "lib/zstd.h"
51c03c5b1cSMartin Matuska #include "lib/common/zstd_errors.h"
52eda14cbcSMatt Macy 
53*e3aa18adSMartin Matuska #ifndef IN_LIBSA
54*e3aa18adSMartin Matuska static int zstd_earlyabort_pass = 1;
55*e3aa18adSMartin Matuska static int zstd_cutoff_level = ZIO_ZSTD_LEVEL_3;
56*e3aa18adSMartin Matuska static unsigned int zstd_abort_size = (128 * 1024);
57*e3aa18adSMartin Matuska #endif
58*e3aa18adSMartin Matuska 
59716fd348SMartin Matuska static kstat_t *zstd_ksp = NULL;
60eda14cbcSMatt Macy 
61eda14cbcSMatt Macy typedef struct zstd_stats {
62eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_alloc_fail;
63eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_alloc_fallback;
64eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_com_alloc_fail;
65eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_dec_alloc_fail;
66eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_com_inval;
67eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_dec_inval;
68eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_dec_header_inval;
69eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_com_fail;
70eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_dec_fail;
71*e3aa18adSMartin Matuska 	/*
72*e3aa18adSMartin Matuska 	 * LZ4 first-pass early abort verdict
73*e3aa18adSMartin Matuska 	 */
74*e3aa18adSMartin Matuska 	kstat_named_t	zstd_stat_lz4pass_allowed;
75*e3aa18adSMartin Matuska 	kstat_named_t	zstd_stat_lz4pass_rejected;
76*e3aa18adSMartin Matuska 	/*
77*e3aa18adSMartin Matuska 	 * zstd-1 second-pass early abort verdict
78*e3aa18adSMartin Matuska 	 */
79*e3aa18adSMartin Matuska 	kstat_named_t	zstd_stat_zstdpass_allowed;
80*e3aa18adSMartin Matuska 	kstat_named_t	zstd_stat_zstdpass_rejected;
81*e3aa18adSMartin Matuska 	/*
82*e3aa18adSMartin Matuska 	 * We excluded this from early abort for some reason
83*e3aa18adSMartin Matuska 	 */
84*e3aa18adSMartin Matuska 	kstat_named_t	zstd_stat_passignored;
85*e3aa18adSMartin Matuska 	kstat_named_t	zstd_stat_passignored_size;
864a58b4abSMateusz Guzik 	kstat_named_t	zstd_stat_buffers;
874a58b4abSMateusz Guzik 	kstat_named_t	zstd_stat_size;
88eda14cbcSMatt Macy } zstd_stats_t;
89eda14cbcSMatt Macy 
90eda14cbcSMatt Macy static zstd_stats_t zstd_stats = {
91eda14cbcSMatt Macy 	{ "alloc_fail",			KSTAT_DATA_UINT64 },
92eda14cbcSMatt Macy 	{ "alloc_fallback",		KSTAT_DATA_UINT64 },
93eda14cbcSMatt Macy 	{ "compress_alloc_fail",	KSTAT_DATA_UINT64 },
94eda14cbcSMatt Macy 	{ "decompress_alloc_fail",	KSTAT_DATA_UINT64 },
95eda14cbcSMatt Macy 	{ "compress_level_invalid",	KSTAT_DATA_UINT64 },
96eda14cbcSMatt Macy 	{ "decompress_level_invalid",	KSTAT_DATA_UINT64 },
97eda14cbcSMatt Macy 	{ "decompress_header_invalid",	KSTAT_DATA_UINT64 },
98eda14cbcSMatt Macy 	{ "compress_failed",		KSTAT_DATA_UINT64 },
99eda14cbcSMatt Macy 	{ "decompress_failed",		KSTAT_DATA_UINT64 },
100*e3aa18adSMartin Matuska 	{ "lz4pass_allowed",		KSTAT_DATA_UINT64 },
101*e3aa18adSMartin Matuska 	{ "lz4pass_rejected",		KSTAT_DATA_UINT64 },
102*e3aa18adSMartin Matuska 	{ "zstdpass_allowed",		KSTAT_DATA_UINT64 },
103*e3aa18adSMartin Matuska 	{ "zstdpass_rejected",		KSTAT_DATA_UINT64 },
104*e3aa18adSMartin Matuska 	{ "passignored",		KSTAT_DATA_UINT64 },
105*e3aa18adSMartin Matuska 	{ "passignored_size",		KSTAT_DATA_UINT64 },
1064a58b4abSMateusz Guzik 	{ "buffers",			KSTAT_DATA_UINT64 },
1074a58b4abSMateusz Guzik 	{ "size",			KSTAT_DATA_UINT64 },
108eda14cbcSMatt Macy };
109eda14cbcSMatt Macy 
110*e3aa18adSMartin Matuska #ifdef _KERNEL
111*e3aa18adSMartin Matuska static int
112*e3aa18adSMartin Matuska kstat_zstd_update(kstat_t *ksp, int rw)
113*e3aa18adSMartin Matuska {
114*e3aa18adSMartin Matuska 	ASSERT(ksp != NULL);
115*e3aa18adSMartin Matuska 
116*e3aa18adSMartin Matuska 	if (rw == KSTAT_WRITE && ksp == zstd_ksp) {
117*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_alloc_fail);
118*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_alloc_fallback);
119*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_com_alloc_fail);
120*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_dec_alloc_fail);
121*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_com_inval);
122*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_dec_inval);
123*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_dec_header_inval);
124*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_com_fail);
125*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_dec_fail);
126*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_lz4pass_allowed);
127*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_lz4pass_rejected);
128*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_zstdpass_allowed);
129*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_zstdpass_rejected);
130*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_passignored);
131*e3aa18adSMartin Matuska 		ZSTDSTAT_ZERO(zstd_stat_passignored_size);
132*e3aa18adSMartin Matuska 	}
133*e3aa18adSMartin Matuska 
134*e3aa18adSMartin Matuska 	return (0);
135*e3aa18adSMartin Matuska }
136*e3aa18adSMartin Matuska #endif
137*e3aa18adSMartin Matuska 
138eda14cbcSMatt Macy /* Enums describing the allocator type specified by kmem_type in zstd_kmem */
139eda14cbcSMatt Macy enum zstd_kmem_type {
140eda14cbcSMatt Macy 	ZSTD_KMEM_UNKNOWN = 0,
141eda14cbcSMatt Macy 	/* Allocation type using kmem_vmalloc */
142eda14cbcSMatt Macy 	ZSTD_KMEM_DEFAULT,
143eda14cbcSMatt Macy 	/* Pool based allocation using mempool_alloc */
144eda14cbcSMatt Macy 	ZSTD_KMEM_POOL,
145eda14cbcSMatt Macy 	/* Reserved fallback memory for decompression only */
146eda14cbcSMatt Macy 	ZSTD_KMEM_DCTX,
147eda14cbcSMatt Macy 	ZSTD_KMEM_COUNT,
148eda14cbcSMatt Macy };
149eda14cbcSMatt Macy 
150eda14cbcSMatt Macy /* Structure for pooled memory objects */
151eda14cbcSMatt Macy struct zstd_pool {
152eda14cbcSMatt Macy 	void *mem;
153eda14cbcSMatt Macy 	size_t size;
154eda14cbcSMatt Macy 	kmutex_t barrier;
155eda14cbcSMatt Macy 	hrtime_t timeout;
156eda14cbcSMatt Macy };
157eda14cbcSMatt Macy 
158eda14cbcSMatt Macy /* Global structure for handling memory allocations */
159eda14cbcSMatt Macy struct zstd_kmem {
160eda14cbcSMatt Macy 	enum zstd_kmem_type kmem_type;
161eda14cbcSMatt Macy 	size_t kmem_size;
162eda14cbcSMatt Macy 	struct zstd_pool *pool;
163eda14cbcSMatt Macy };
164eda14cbcSMatt Macy 
165eda14cbcSMatt Macy /* Fallback memory structure used for decompression only if memory runs out */
166eda14cbcSMatt Macy struct zstd_fallback_mem {
167eda14cbcSMatt Macy 	size_t mem_size;
168eda14cbcSMatt Macy 	void *mem;
169eda14cbcSMatt Macy 	kmutex_t barrier;
170eda14cbcSMatt Macy };
171eda14cbcSMatt Macy 
172eda14cbcSMatt Macy struct zstd_levelmap {
173eda14cbcSMatt Macy 	int16_t zstd_level;
174eda14cbcSMatt Macy 	enum zio_zstd_levels level;
175eda14cbcSMatt Macy };
176eda14cbcSMatt Macy 
177eda14cbcSMatt Macy /*
178eda14cbcSMatt Macy  * ZSTD memory handlers
179eda14cbcSMatt Macy  *
180eda14cbcSMatt Macy  * For decompression we use a different handler which also provides fallback
181eda14cbcSMatt Macy  * memory allocation in case memory runs out.
182eda14cbcSMatt Macy  *
183eda14cbcSMatt Macy  * The ZSTD handlers were split up for the most simplified implementation.
184eda14cbcSMatt Macy  */
185eda14cbcSMatt Macy static void *zstd_alloc(void *opaque, size_t size);
186eda14cbcSMatt Macy static void *zstd_dctx_alloc(void *opaque, size_t size);
187eda14cbcSMatt Macy static void zstd_free(void *opaque, void *ptr);
188eda14cbcSMatt Macy 
189eda14cbcSMatt Macy /* Compression memory handler */
190eda14cbcSMatt Macy static const ZSTD_customMem zstd_malloc = {
191eda14cbcSMatt Macy 	zstd_alloc,
192eda14cbcSMatt Macy 	zstd_free,
193eda14cbcSMatt Macy 	NULL,
194eda14cbcSMatt Macy };
195eda14cbcSMatt Macy 
196eda14cbcSMatt Macy /* Decompression memory handler */
197eda14cbcSMatt Macy static const ZSTD_customMem zstd_dctx_malloc = {
198eda14cbcSMatt Macy 	zstd_dctx_alloc,
199eda14cbcSMatt Macy 	zstd_free,
200eda14cbcSMatt Macy 	NULL,
201eda14cbcSMatt Macy };
202eda14cbcSMatt Macy 
203eda14cbcSMatt Macy /* Level map for converting ZFS internal levels to ZSTD levels and vice versa */
204eda14cbcSMatt Macy static struct zstd_levelmap zstd_levels[] = {
205eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_1, ZIO_ZSTD_LEVEL_1},
206eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_2, ZIO_ZSTD_LEVEL_2},
207eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_3, ZIO_ZSTD_LEVEL_3},
208eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_4, ZIO_ZSTD_LEVEL_4},
209eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_5, ZIO_ZSTD_LEVEL_5},
210eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_6, ZIO_ZSTD_LEVEL_6},
211eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_7, ZIO_ZSTD_LEVEL_7},
212eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_8, ZIO_ZSTD_LEVEL_8},
213eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_9, ZIO_ZSTD_LEVEL_9},
214eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_10, ZIO_ZSTD_LEVEL_10},
215eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_11, ZIO_ZSTD_LEVEL_11},
216eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_12, ZIO_ZSTD_LEVEL_12},
217eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_13, ZIO_ZSTD_LEVEL_13},
218eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_14, ZIO_ZSTD_LEVEL_14},
219eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_15, ZIO_ZSTD_LEVEL_15},
220eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_16, ZIO_ZSTD_LEVEL_16},
221eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_17, ZIO_ZSTD_LEVEL_17},
222eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_18, ZIO_ZSTD_LEVEL_18},
223eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_19, ZIO_ZSTD_LEVEL_19},
224eda14cbcSMatt Macy 	{-1, ZIO_ZSTD_LEVEL_FAST_1},
225eda14cbcSMatt Macy 	{-2, ZIO_ZSTD_LEVEL_FAST_2},
226eda14cbcSMatt Macy 	{-3, ZIO_ZSTD_LEVEL_FAST_3},
227eda14cbcSMatt Macy 	{-4, ZIO_ZSTD_LEVEL_FAST_4},
228eda14cbcSMatt Macy 	{-5, ZIO_ZSTD_LEVEL_FAST_5},
229eda14cbcSMatt Macy 	{-6, ZIO_ZSTD_LEVEL_FAST_6},
230eda14cbcSMatt Macy 	{-7, ZIO_ZSTD_LEVEL_FAST_7},
231eda14cbcSMatt Macy 	{-8, ZIO_ZSTD_LEVEL_FAST_8},
232eda14cbcSMatt Macy 	{-9, ZIO_ZSTD_LEVEL_FAST_9},
233eda14cbcSMatt Macy 	{-10, ZIO_ZSTD_LEVEL_FAST_10},
234eda14cbcSMatt Macy 	{-20, ZIO_ZSTD_LEVEL_FAST_20},
235eda14cbcSMatt Macy 	{-30, ZIO_ZSTD_LEVEL_FAST_30},
236eda14cbcSMatt Macy 	{-40, ZIO_ZSTD_LEVEL_FAST_40},
237eda14cbcSMatt Macy 	{-50, ZIO_ZSTD_LEVEL_FAST_50},
238eda14cbcSMatt Macy 	{-60, ZIO_ZSTD_LEVEL_FAST_60},
239eda14cbcSMatt Macy 	{-70, ZIO_ZSTD_LEVEL_FAST_70},
240eda14cbcSMatt Macy 	{-80, ZIO_ZSTD_LEVEL_FAST_80},
241eda14cbcSMatt Macy 	{-90, ZIO_ZSTD_LEVEL_FAST_90},
242eda14cbcSMatt Macy 	{-100, ZIO_ZSTD_LEVEL_FAST_100},
243eda14cbcSMatt Macy 	{-500, ZIO_ZSTD_LEVEL_FAST_500},
244eda14cbcSMatt Macy 	{-1000, ZIO_ZSTD_LEVEL_FAST_1000},
245eda14cbcSMatt Macy };
246eda14cbcSMatt Macy 
247eda14cbcSMatt Macy /*
248eda14cbcSMatt Macy  * This variable represents the maximum count of the pool based on the number
249eda14cbcSMatt Macy  * of CPUs plus some buffer. We default to cpu count * 4, see init_zstd.
250eda14cbcSMatt Macy  */
251eda14cbcSMatt Macy static int pool_count = 16;
252eda14cbcSMatt Macy 
253eda14cbcSMatt Macy #define	ZSTD_POOL_MAX		pool_count
254eda14cbcSMatt Macy #define	ZSTD_POOL_TIMEOUT	60 * 2
255eda14cbcSMatt Macy 
256eda14cbcSMatt Macy static struct zstd_fallback_mem zstd_dctx_fallback;
257eda14cbcSMatt Macy static struct zstd_pool *zstd_mempool_cctx;
258eda14cbcSMatt Macy static struct zstd_pool *zstd_mempool_dctx;
259eda14cbcSMatt Macy 
2602617128aSMartin Matuska /*
2612617128aSMartin Matuska  * The library zstd code expects these if ADDRESS_SANITIZER gets defined,
2622617128aSMartin Matuska  * and while ASAN does this, KASAN defines that and does not. So to avoid
2632617128aSMartin Matuska  * changing the external code, we do this.
2642617128aSMartin Matuska  */
265c03c5b1cSMartin Matuska #if defined(ZFS_ASAN_ENABLED)
2662617128aSMartin Matuska #define	ADDRESS_SANITIZER 1
2672617128aSMartin Matuska #endif
2682617128aSMartin Matuska #if defined(_KERNEL) && defined(ADDRESS_SANITIZER)
2692617128aSMartin Matuska void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
2702617128aSMartin Matuska void __asan_poison_memory_region(void const volatile *addr, size_t size);
2712617128aSMartin Matuska void __asan_unpoison_memory_region(void const volatile *addr, size_t size) {};
2722617128aSMartin Matuska void __asan_poison_memory_region(void const volatile *addr, size_t size) {};
2732617128aSMartin Matuska #endif
2742617128aSMartin Matuska 
2757877fdebSMatt Macy 
2767877fdebSMatt Macy static void
2777877fdebSMatt Macy zstd_mempool_reap(struct zstd_pool *zstd_mempool)
2787877fdebSMatt Macy {
2797877fdebSMatt Macy 	struct zstd_pool *pool;
2807877fdebSMatt Macy 
2817877fdebSMatt Macy 	if (!zstd_mempool || !ZSTDSTAT(zstd_stat_buffers)) {
2827877fdebSMatt Macy 		return;
2837877fdebSMatt Macy 	}
2847877fdebSMatt Macy 
2857877fdebSMatt Macy 	/* free obsolete slots */
2867877fdebSMatt Macy 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
2877877fdebSMatt Macy 		pool = &zstd_mempool[i];
2887877fdebSMatt Macy 		if (pool->mem && mutex_tryenter(&pool->barrier)) {
2897877fdebSMatt Macy 			/* Free memory if unused object older than 2 minutes */
2907877fdebSMatt Macy 			if (pool->mem && gethrestime_sec() > pool->timeout) {
2917877fdebSMatt Macy 				vmem_free(pool->mem, pool->size);
2927877fdebSMatt Macy 				ZSTDSTAT_SUB(zstd_stat_buffers, 1);
2937877fdebSMatt Macy 				ZSTDSTAT_SUB(zstd_stat_size, pool->size);
2947877fdebSMatt Macy 				pool->mem = NULL;
2957877fdebSMatt Macy 				pool->size = 0;
2967877fdebSMatt Macy 				pool->timeout = 0;
2977877fdebSMatt Macy 			}
2987877fdebSMatt Macy 			mutex_exit(&pool->barrier);
2997877fdebSMatt Macy 		}
3007877fdebSMatt Macy 	}
3017877fdebSMatt Macy }
3027877fdebSMatt Macy 
303eda14cbcSMatt Macy /*
304eda14cbcSMatt Macy  * Try to get a cached allocated buffer from memory pool or allocate a new one
305eda14cbcSMatt Macy  * if necessary. If a object is older than 2 minutes and does not fit the
306eda14cbcSMatt Macy  * requested size, it will be released and a new cached entry will be allocated.
307eda14cbcSMatt Macy  * If other pooled objects are detected without being used for 2 minutes, they
308eda14cbcSMatt Macy  * will be released, too.
309eda14cbcSMatt Macy  *
310eda14cbcSMatt Macy  * The concept is that high frequency memory allocations of bigger objects are
311eda14cbcSMatt Macy  * expensive. So if a lot of work is going on, allocations will be kept for a
312eda14cbcSMatt Macy  * while and can be reused in that time frame.
313eda14cbcSMatt Macy  *
314eda14cbcSMatt Macy  * The scheduled release will be updated every time a object is reused.
315eda14cbcSMatt Macy  */
3167877fdebSMatt Macy 
317eda14cbcSMatt Macy static void *
318eda14cbcSMatt Macy zstd_mempool_alloc(struct zstd_pool *zstd_mempool, size_t size)
319eda14cbcSMatt Macy {
320eda14cbcSMatt Macy 	struct zstd_pool *pool;
321eda14cbcSMatt Macy 	struct zstd_kmem *mem = NULL;
322eda14cbcSMatt Macy 
323eda14cbcSMatt Macy 	if (!zstd_mempool) {
324eda14cbcSMatt Macy 		return (NULL);
325eda14cbcSMatt Macy 	}
326eda14cbcSMatt Macy 
327eda14cbcSMatt Macy 	/* Seek for preallocated memory slot and free obsolete slots */
328eda14cbcSMatt Macy 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
329eda14cbcSMatt Macy 		pool = &zstd_mempool[i];
330eda14cbcSMatt Macy 		/*
33116038816SMartin Matuska 		 * This lock is simply a marker for a pool object being in use.
332eda14cbcSMatt Macy 		 * If it's already hold, it will be skipped.
333eda14cbcSMatt Macy 		 *
334eda14cbcSMatt Macy 		 * We need to create it before checking it to avoid race
335eda14cbcSMatt Macy 		 * conditions caused by running in a threaded context.
336eda14cbcSMatt Macy 		 *
337eda14cbcSMatt Macy 		 * The lock is later released by zstd_mempool_free.
338eda14cbcSMatt Macy 		 */
339eda14cbcSMatt Macy 		if (mutex_tryenter(&pool->barrier)) {
340eda14cbcSMatt Macy 			/*
341eda14cbcSMatt Macy 			 * Check if objects fits the size, if so we take it and
342eda14cbcSMatt Macy 			 * update the timestamp.
343eda14cbcSMatt Macy 			 */
3447877fdebSMatt Macy 			if (pool->mem && size <= pool->size) {
345eda14cbcSMatt Macy 				pool->timeout = gethrestime_sec() +
346eda14cbcSMatt Macy 				    ZSTD_POOL_TIMEOUT;
347eda14cbcSMatt Macy 				mem = pool->mem;
3487877fdebSMatt Macy 				return (mem);
349eda14cbcSMatt Macy 			}
350eda14cbcSMatt Macy 			mutex_exit(&pool->barrier);
351eda14cbcSMatt Macy 		}
352eda14cbcSMatt Macy 	}
353eda14cbcSMatt Macy 
354eda14cbcSMatt Macy 	/*
355eda14cbcSMatt Macy 	 * If no preallocated slot was found, try to fill in a new one.
356eda14cbcSMatt Macy 	 *
357eda14cbcSMatt Macy 	 * We run a similar algorithm twice here to avoid pool fragmentation.
358eda14cbcSMatt Macy 	 * The first one may generate holes in the list if objects get released.
359eda14cbcSMatt Macy 	 * We always make sure that these holes get filled instead of adding new
360eda14cbcSMatt Macy 	 * allocations constantly at the end.
361eda14cbcSMatt Macy 	 */
362eda14cbcSMatt Macy 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
363eda14cbcSMatt Macy 		pool = &zstd_mempool[i];
364eda14cbcSMatt Macy 		if (mutex_tryenter(&pool->barrier)) {
365eda14cbcSMatt Macy 			/* Object is free, try to allocate new one */
366eda14cbcSMatt Macy 			if (!pool->mem) {
367eda14cbcSMatt Macy 				mem = vmem_alloc(size, KM_SLEEP);
3684a58b4abSMateusz Guzik 				if (mem) {
3694a58b4abSMateusz Guzik 					ZSTDSTAT_ADD(zstd_stat_buffers, 1);
3704a58b4abSMateusz Guzik 					ZSTDSTAT_ADD(zstd_stat_size, size);
371eda14cbcSMatt Macy 					pool->mem = mem;
3724a58b4abSMateusz Guzik 					pool->size = size;
373eda14cbcSMatt Macy 					/* Keep track for later release */
374eda14cbcSMatt Macy 					mem->pool = pool;
375eda14cbcSMatt Macy 					mem->kmem_type = ZSTD_KMEM_POOL;
376eda14cbcSMatt Macy 					mem->kmem_size = size;
377eda14cbcSMatt Macy 				}
378eda14cbcSMatt Macy 			}
379eda14cbcSMatt Macy 
380eda14cbcSMatt Macy 			if (size <= pool->size) {
381eda14cbcSMatt Macy 				/* Update timestamp */
382eda14cbcSMatt Macy 				pool->timeout = gethrestime_sec() +
383eda14cbcSMatt Macy 				    ZSTD_POOL_TIMEOUT;
384eda14cbcSMatt Macy 
385eda14cbcSMatt Macy 				return (pool->mem);
386eda14cbcSMatt Macy 			}
387eda14cbcSMatt Macy 
388eda14cbcSMatt Macy 			mutex_exit(&pool->barrier);
389eda14cbcSMatt Macy 		}
390eda14cbcSMatt Macy 	}
391eda14cbcSMatt Macy 
392eda14cbcSMatt Macy 	/*
393eda14cbcSMatt Macy 	 * If the pool is full or the allocation failed, try lazy allocation
394eda14cbcSMatt Macy 	 * instead.
395eda14cbcSMatt Macy 	 */
396eda14cbcSMatt Macy 	if (!mem) {
397eda14cbcSMatt Macy 		mem = vmem_alloc(size, KM_NOSLEEP);
398eda14cbcSMatt Macy 		if (mem) {
399eda14cbcSMatt Macy 			mem->pool = NULL;
400eda14cbcSMatt Macy 			mem->kmem_type = ZSTD_KMEM_DEFAULT;
401eda14cbcSMatt Macy 			mem->kmem_size = size;
402eda14cbcSMatt Macy 		}
403eda14cbcSMatt Macy 	}
404eda14cbcSMatt Macy 
405eda14cbcSMatt Macy 	return (mem);
406eda14cbcSMatt Macy }
407eda14cbcSMatt Macy 
408eda14cbcSMatt Macy /* Mark object as released by releasing the barrier mutex */
409eda14cbcSMatt Macy static void
410eda14cbcSMatt Macy zstd_mempool_free(struct zstd_kmem *z)
411eda14cbcSMatt Macy {
412eda14cbcSMatt Macy 	mutex_exit(&z->pool->barrier);
413eda14cbcSMatt Macy }
414eda14cbcSMatt Macy 
415eda14cbcSMatt Macy /* Convert ZFS internal enum to ZSTD level */
416eda14cbcSMatt Macy static int
417eda14cbcSMatt Macy zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
418eda14cbcSMatt Macy {
419eda14cbcSMatt Macy 	if (level > 0 && level <= ZIO_ZSTD_LEVEL_19) {
420eda14cbcSMatt Macy 		*zstd_level = zstd_levels[level - 1].zstd_level;
421eda14cbcSMatt Macy 		return (0);
422eda14cbcSMatt Macy 	}
423eda14cbcSMatt Macy 	if (level >= ZIO_ZSTD_LEVEL_FAST_1 &&
424eda14cbcSMatt Macy 	    level <= ZIO_ZSTD_LEVEL_FAST_1000) {
425eda14cbcSMatt Macy 		*zstd_level = zstd_levels[level - ZIO_ZSTD_LEVEL_FAST_1
426eda14cbcSMatt Macy 		    + ZIO_ZSTD_LEVEL_19].zstd_level;
427eda14cbcSMatt Macy 		return (0);
428eda14cbcSMatt Macy 	}
429eda14cbcSMatt Macy 
430eda14cbcSMatt Macy 	/* Invalid/unknown zfs compression enum - this should never happen. */
431eda14cbcSMatt Macy 	return (1);
432eda14cbcSMatt Macy }
433eda14cbcSMatt Macy 
434*e3aa18adSMartin Matuska #ifndef IN_LIBSA
435*e3aa18adSMartin Matuska size_t
436*e3aa18adSMartin Matuska zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len, size_t d_len,
437*e3aa18adSMartin Matuska     int level)
438*e3aa18adSMartin Matuska {
439*e3aa18adSMartin Matuska 	int16_t zstd_level;
440*e3aa18adSMartin Matuska 	if (zstd_enum_to_level(level, &zstd_level)) {
441*e3aa18adSMartin Matuska 		ZSTDSTAT_BUMP(zstd_stat_com_inval);
442*e3aa18adSMartin Matuska 		return (s_len);
443*e3aa18adSMartin Matuska 	}
444*e3aa18adSMartin Matuska 	/*
445*e3aa18adSMartin Matuska 	 * A zstd early abort heuristic.
446*e3aa18adSMartin Matuska 	 *
447*e3aa18adSMartin Matuska 	 * - Zeroth, if this is <= zstd-3, or < zstd_abort_size (currently
448*e3aa18adSMartin Matuska 	 *   128k), don't try any of this, just go.
449*e3aa18adSMartin Matuska 	 *   (because experimentally that was a reasonable cutoff for a perf win
450*e3aa18adSMartin Matuska 	 *   with tiny ratio change)
451*e3aa18adSMartin Matuska 	 * - First, we try LZ4 compression, and if it doesn't early abort, we
452*e3aa18adSMartin Matuska 	 *   jump directly to whatever compression level we intended to try.
453*e3aa18adSMartin Matuska 	 * - Second, we try zstd-1 - if that errors out (usually, but not
454*e3aa18adSMartin Matuska 	 *   exclusively, if it would overflow), we give up early.
455*e3aa18adSMartin Matuska 	 *
456*e3aa18adSMartin Matuska 	 *   If it works, instead we go on and compress anyway.
457*e3aa18adSMartin Matuska 	 *
458*e3aa18adSMartin Matuska 	 * Why two passes? LZ4 alone gets you a lot of the way, but on highly
459*e3aa18adSMartin Matuska 	 * compressible data, it was losing up to 8.5% of the compressed
460*e3aa18adSMartin Matuska 	 * savings versus no early abort, and all the zstd-fast levels are
461*e3aa18adSMartin Matuska 	 * worse indications on their own than LZ4, and don't improve the LZ4
462*e3aa18adSMartin Matuska 	 * pass noticably if stacked like this.
463*e3aa18adSMartin Matuska 	 */
464*e3aa18adSMartin Matuska 	size_t actual_abort_size = zstd_abort_size;
465*e3aa18adSMartin Matuska 	if (zstd_earlyabort_pass > 0 && zstd_level >= zstd_cutoff_level &&
466*e3aa18adSMartin Matuska 	    s_len >= actual_abort_size) {
467*e3aa18adSMartin Matuska 		int pass_len = 1;
468*e3aa18adSMartin Matuska 		pass_len = lz4_compress_zfs(s_start, d_start, s_len, d_len, 0);
469*e3aa18adSMartin Matuska 		if (pass_len < d_len) {
470*e3aa18adSMartin Matuska 			ZSTDSTAT_BUMP(zstd_stat_lz4pass_allowed);
471*e3aa18adSMartin Matuska 			goto keep_trying;
472*e3aa18adSMartin Matuska 		}
473*e3aa18adSMartin Matuska 		ZSTDSTAT_BUMP(zstd_stat_lz4pass_rejected);
474*e3aa18adSMartin Matuska 
475*e3aa18adSMartin Matuska 		pass_len = zfs_zstd_compress(s_start, d_start, s_len, d_len,
476*e3aa18adSMartin Matuska 		    ZIO_ZSTD_LEVEL_1);
477*e3aa18adSMartin Matuska 		if (pass_len == s_len || pass_len <= 0 || pass_len > d_len) {
478*e3aa18adSMartin Matuska 			ZSTDSTAT_BUMP(zstd_stat_zstdpass_rejected);
479*e3aa18adSMartin Matuska 			return (s_len);
480*e3aa18adSMartin Matuska 		}
481*e3aa18adSMartin Matuska 		ZSTDSTAT_BUMP(zstd_stat_zstdpass_allowed);
482*e3aa18adSMartin Matuska 	} else {
483*e3aa18adSMartin Matuska 		ZSTDSTAT_BUMP(zstd_stat_passignored);
484*e3aa18adSMartin Matuska 		if (s_len < actual_abort_size) {
485*e3aa18adSMartin Matuska 			ZSTDSTAT_BUMP(zstd_stat_passignored_size);
486*e3aa18adSMartin Matuska 		}
487*e3aa18adSMartin Matuska 	}
488*e3aa18adSMartin Matuska keep_trying:
489*e3aa18adSMartin Matuska 	return (zfs_zstd_compress(s_start, d_start, s_len, d_len, level));
490*e3aa18adSMartin Matuska 
491*e3aa18adSMartin Matuska }
492*e3aa18adSMartin Matuska #endif
49321b492edSMartin Matuska 
494eda14cbcSMatt Macy /* Compress block using zstd */
495eda14cbcSMatt Macy size_t
496eda14cbcSMatt Macy zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
497eda14cbcSMatt Macy     int level)
498eda14cbcSMatt Macy {
499eda14cbcSMatt Macy 	size_t c_len;
500eda14cbcSMatt Macy 	int16_t zstd_level;
501eda14cbcSMatt Macy 	zfs_zstdhdr_t *hdr;
502eda14cbcSMatt Macy 	ZSTD_CCtx *cctx;
503eda14cbcSMatt Macy 
504eda14cbcSMatt Macy 	hdr = (zfs_zstdhdr_t *)d_start;
505eda14cbcSMatt Macy 
506eda14cbcSMatt Macy 	/* Skip compression if the specified level is invalid */
507eda14cbcSMatt Macy 	if (zstd_enum_to_level(level, &zstd_level)) {
508eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_com_inval);
509eda14cbcSMatt Macy 		return (s_len);
510eda14cbcSMatt Macy 	}
511eda14cbcSMatt Macy 
512eda14cbcSMatt Macy 	ASSERT3U(d_len, >=, sizeof (*hdr));
513eda14cbcSMatt Macy 	ASSERT3U(d_len, <=, s_len);
514eda14cbcSMatt Macy 	ASSERT3U(zstd_level, !=, 0);
515eda14cbcSMatt Macy 
516eda14cbcSMatt Macy 	cctx = ZSTD_createCCtx_advanced(zstd_malloc);
517eda14cbcSMatt Macy 
518eda14cbcSMatt Macy 	/*
519eda14cbcSMatt Macy 	 * Out of kernel memory, gently fall through - this will disable
520eda14cbcSMatt Macy 	 * compression in zio_compress_data
521eda14cbcSMatt Macy 	 */
522eda14cbcSMatt Macy 	if (!cctx) {
523eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_com_alloc_fail);
524eda14cbcSMatt Macy 		return (s_len);
525eda14cbcSMatt Macy 	}
526eda14cbcSMatt Macy 
527eda14cbcSMatt Macy 	/* Set the compression level */
528eda14cbcSMatt Macy 	ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstd_level);
529eda14cbcSMatt Macy 
530eda14cbcSMatt Macy 	/* Use the "magicless" zstd header which saves us 4 header bytes */
531eda14cbcSMatt Macy 	ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless);
532eda14cbcSMatt Macy 
533eda14cbcSMatt Macy 	/*
534eda14cbcSMatt Macy 	 * Disable redundant checksum calculation and content size storage since
535eda14cbcSMatt Macy 	 * this is already done by ZFS itself.
536eda14cbcSMatt Macy 	 */
537eda14cbcSMatt Macy 	ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0);
538eda14cbcSMatt Macy 	ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0);
539eda14cbcSMatt Macy 
540eda14cbcSMatt Macy 	c_len = ZSTD_compress2(cctx,
541eda14cbcSMatt Macy 	    hdr->data,
542eda14cbcSMatt Macy 	    d_len - sizeof (*hdr),
543eda14cbcSMatt Macy 	    s_start, s_len);
544eda14cbcSMatt Macy 
545eda14cbcSMatt Macy 	ZSTD_freeCCtx(cctx);
546eda14cbcSMatt Macy 
547eda14cbcSMatt Macy 	/* Error in the compression routine, disable compression. */
548eda14cbcSMatt Macy 	if (ZSTD_isError(c_len)) {
549eda14cbcSMatt Macy 		/*
550eda14cbcSMatt Macy 		 * If we are aborting the compression because the saves are
551eda14cbcSMatt Macy 		 * too small, that is not a failure. Everything else is a
552eda14cbcSMatt Macy 		 * failure, so increment the compression failure counter.
553eda14cbcSMatt Macy 		 */
554*e3aa18adSMartin Matuska 		int err = ZSTD_getErrorCode(c_len);
555*e3aa18adSMartin Matuska 		if (err != ZSTD_error_dstSize_tooSmall) {
556eda14cbcSMatt Macy 			ZSTDSTAT_BUMP(zstd_stat_com_fail);
557*e3aa18adSMartin Matuska 			dprintf("Error: %s", ZSTD_getErrorString(err));
558eda14cbcSMatt Macy 		}
559eda14cbcSMatt Macy 		return (s_len);
560eda14cbcSMatt Macy 	}
561eda14cbcSMatt Macy 
562eda14cbcSMatt Macy 	/*
563eda14cbcSMatt Macy 	 * Encode the compressed buffer size at the start. We'll need this in
564eda14cbcSMatt Macy 	 * decompression to counter the effects of padding which might be added
565eda14cbcSMatt Macy 	 * to the compressed buffer and which, if unhandled, would confuse the
566eda14cbcSMatt Macy 	 * hell out of our decompression function.
567eda14cbcSMatt Macy 	 */
568eda14cbcSMatt Macy 	hdr->c_len = BE_32(c_len);
569eda14cbcSMatt Macy 
570eda14cbcSMatt Macy 	/*
571eda14cbcSMatt Macy 	 * Check version for overflow.
572eda14cbcSMatt Macy 	 * The limit of 24 bits must not be exceeded. This allows a maximum
573eda14cbcSMatt Macy 	 * version 1677.72.15 which we don't expect to be ever reached.
574eda14cbcSMatt Macy 	 */
575eda14cbcSMatt Macy 	ASSERT3U(ZSTD_VERSION_NUMBER, <=, 0xFFFFFF);
576eda14cbcSMatt Macy 
577eda14cbcSMatt Macy 	/*
578eda14cbcSMatt Macy 	 * Encode the compression level as well. We may need to know the
579eda14cbcSMatt Macy 	 * original compression level if compressed_arc is disabled, to match
580eda14cbcSMatt Macy 	 * the compression settings to write this block to the L2ARC.
581eda14cbcSMatt Macy 	 *
582eda14cbcSMatt Macy 	 * Encode the actual level, so if the enum changes in the future, we
583eda14cbcSMatt Macy 	 * will be compatible.
584eda14cbcSMatt Macy 	 *
585eda14cbcSMatt Macy 	 * The upper 24 bits store the ZSTD version to be able to provide
586eda14cbcSMatt Macy 	 * future compatibility, since new versions might enhance the
587eda14cbcSMatt Macy 	 * compression algorithm in a way, where the compressed data will
588eda14cbcSMatt Macy 	 * change.
589eda14cbcSMatt Macy 	 *
590eda14cbcSMatt Macy 	 * As soon as such incompatibility occurs, handling code needs to be
591eda14cbcSMatt Macy 	 * added, differentiating between the versions.
592eda14cbcSMatt Macy 	 */
59321b492edSMartin Matuska 	zfs_set_hdrversion(hdr, ZSTD_VERSION_NUMBER);
59421b492edSMartin Matuska 	zfs_set_hdrlevel(hdr, level);
595eda14cbcSMatt Macy 	hdr->raw_version_level = BE_32(hdr->raw_version_level);
596eda14cbcSMatt Macy 
597eda14cbcSMatt Macy 	return (c_len + sizeof (*hdr));
598eda14cbcSMatt Macy }
599eda14cbcSMatt Macy 
600eda14cbcSMatt Macy /* Decompress block using zstd and return its stored level */
601eda14cbcSMatt Macy int
602eda14cbcSMatt Macy zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
603eda14cbcSMatt Macy     size_t d_len, uint8_t *level)
604eda14cbcSMatt Macy {
605eda14cbcSMatt Macy 	ZSTD_DCtx *dctx;
606eda14cbcSMatt Macy 	size_t result;
607eda14cbcSMatt Macy 	int16_t zstd_level;
608eda14cbcSMatt Macy 	uint32_t c_len;
609eda14cbcSMatt Macy 	const zfs_zstdhdr_t *hdr;
610eda14cbcSMatt Macy 	zfs_zstdhdr_t hdr_copy;
611eda14cbcSMatt Macy 
612eda14cbcSMatt Macy 	hdr = (const zfs_zstdhdr_t *)s_start;
613eda14cbcSMatt Macy 	c_len = BE_32(hdr->c_len);
614eda14cbcSMatt Macy 
615eda14cbcSMatt Macy 	/*
616eda14cbcSMatt Macy 	 * Make a copy instead of directly converting the header, since we must
617eda14cbcSMatt Macy 	 * not modify the original data that may be used again later.
618eda14cbcSMatt Macy 	 */
619eda14cbcSMatt Macy 	hdr_copy.raw_version_level = BE_32(hdr->raw_version_level);
62021b492edSMartin Matuska 	uint8_t curlevel = zfs_get_hdrlevel(&hdr_copy);
621eda14cbcSMatt Macy 
622eda14cbcSMatt Macy 	/*
623eda14cbcSMatt Macy 	 * NOTE: We ignore the ZSTD version for now. As soon as any
62416038816SMartin Matuska 	 * incompatibility occurs, it has to be handled accordingly.
625eda14cbcSMatt Macy 	 * The version can be accessed via `hdr_copy.version`.
626eda14cbcSMatt Macy 	 */
627eda14cbcSMatt Macy 
628eda14cbcSMatt Macy 	/*
629eda14cbcSMatt Macy 	 * Convert and check the level
630eda14cbcSMatt Macy 	 * An invalid level is a strong indicator for data corruption! In such
631eda14cbcSMatt Macy 	 * case return an error so the upper layers can try to fix it.
632eda14cbcSMatt Macy 	 */
63321b492edSMartin Matuska 	if (zstd_enum_to_level(curlevel, &zstd_level)) {
634eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_dec_inval);
635eda14cbcSMatt Macy 		return (1);
636eda14cbcSMatt Macy 	}
637eda14cbcSMatt Macy 
638eda14cbcSMatt Macy 	ASSERT3U(d_len, >=, s_len);
63921b492edSMartin Matuska 	ASSERT3U(curlevel, !=, ZIO_COMPLEVEL_INHERIT);
640eda14cbcSMatt Macy 
641eda14cbcSMatt Macy 	/* Invalid compressed buffer size encoded at start */
642eda14cbcSMatt Macy 	if (c_len + sizeof (*hdr) > s_len) {
643eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_dec_header_inval);
644eda14cbcSMatt Macy 		return (1);
645eda14cbcSMatt Macy 	}
646eda14cbcSMatt Macy 
647eda14cbcSMatt Macy 	dctx = ZSTD_createDCtx_advanced(zstd_dctx_malloc);
648eda14cbcSMatt Macy 	if (!dctx) {
649eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_dec_alloc_fail);
650eda14cbcSMatt Macy 		return (1);
651eda14cbcSMatt Macy 	}
652eda14cbcSMatt Macy 
653eda14cbcSMatt Macy 	/* Set header type to "magicless" */
654eda14cbcSMatt Macy 	ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless);
655eda14cbcSMatt Macy 
656eda14cbcSMatt Macy 	/* Decompress the data and release the context */
657eda14cbcSMatt Macy 	result = ZSTD_decompressDCtx(dctx, d_start, d_len, hdr->data, c_len);
658eda14cbcSMatt Macy 	ZSTD_freeDCtx(dctx);
659eda14cbcSMatt Macy 
660eda14cbcSMatt Macy 	/*
661eda14cbcSMatt Macy 	 * Returns 0 on success (decompression function returned non-negative)
662eda14cbcSMatt Macy 	 * and non-zero on failure (decompression function returned negative.
663eda14cbcSMatt Macy 	 */
664eda14cbcSMatt Macy 	if (ZSTD_isError(result)) {
665eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_dec_fail);
666eda14cbcSMatt Macy 		return (1);
667eda14cbcSMatt Macy 	}
668eda14cbcSMatt Macy 
669eda14cbcSMatt Macy 	if (level) {
67021b492edSMartin Matuska 		*level = curlevel;
671eda14cbcSMatt Macy 	}
672eda14cbcSMatt Macy 
673eda14cbcSMatt Macy 	return (0);
674eda14cbcSMatt Macy }
675eda14cbcSMatt Macy 
676eda14cbcSMatt Macy /* Decompress datablock using zstd */
677eda14cbcSMatt Macy int
678eda14cbcSMatt Macy zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len,
679eda14cbcSMatt Macy     int level __maybe_unused)
680eda14cbcSMatt Macy {
681eda14cbcSMatt Macy 
682eda14cbcSMatt Macy 	return (zfs_zstd_decompress_level(s_start, d_start, s_len, d_len,
683eda14cbcSMatt Macy 	    NULL));
684eda14cbcSMatt Macy }
685eda14cbcSMatt Macy 
686eda14cbcSMatt Macy /* Allocator for zstd compression context using mempool_allocator */
687eda14cbcSMatt Macy static void *
688eda14cbcSMatt Macy zstd_alloc(void *opaque __maybe_unused, size_t size)
689eda14cbcSMatt Macy {
690eda14cbcSMatt Macy 	size_t nbytes = sizeof (struct zstd_kmem) + size;
691eda14cbcSMatt Macy 	struct zstd_kmem *z = NULL;
692eda14cbcSMatt Macy 
693eda14cbcSMatt Macy 	z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_cctx, nbytes);
694eda14cbcSMatt Macy 
695eda14cbcSMatt Macy 	if (!z) {
696eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
697eda14cbcSMatt Macy 		return (NULL);
698eda14cbcSMatt Macy 	}
699eda14cbcSMatt Macy 
700eda14cbcSMatt Macy 	return ((void*)z + (sizeof (struct zstd_kmem)));
701eda14cbcSMatt Macy }
702eda14cbcSMatt Macy 
703eda14cbcSMatt Macy /*
704eda14cbcSMatt Macy  * Allocator for zstd decompression context using mempool_allocator with
705eda14cbcSMatt Macy  * fallback to reserved memory if allocation fails
706eda14cbcSMatt Macy  */
707eda14cbcSMatt Macy static void *
708eda14cbcSMatt Macy zstd_dctx_alloc(void *opaque __maybe_unused, size_t size)
709eda14cbcSMatt Macy {
710eda14cbcSMatt Macy 	size_t nbytes = sizeof (struct zstd_kmem) + size;
711eda14cbcSMatt Macy 	struct zstd_kmem *z = NULL;
712eda14cbcSMatt Macy 	enum zstd_kmem_type type = ZSTD_KMEM_DEFAULT;
713eda14cbcSMatt Macy 
714eda14cbcSMatt Macy 	z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_dctx, nbytes);
715eda14cbcSMatt Macy 	if (!z) {
716eda14cbcSMatt Macy 		/* Try harder, decompression shall not fail */
717eda14cbcSMatt Macy 		z = vmem_alloc(nbytes, KM_SLEEP);
718eda14cbcSMatt Macy 		if (z) {
719eda14cbcSMatt Macy 			z->pool = NULL;
720eda14cbcSMatt Macy 		}
721eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
722eda14cbcSMatt Macy 	} else {
723eda14cbcSMatt Macy 		return ((void*)z + (sizeof (struct zstd_kmem)));
724eda14cbcSMatt Macy 	}
725eda14cbcSMatt Macy 
726eda14cbcSMatt Macy 	/* Fallback if everything fails */
727eda14cbcSMatt Macy 	if (!z) {
728eda14cbcSMatt Macy 		/*
729eda14cbcSMatt Macy 		 * Barrier since we only can handle it in a single thread. All
730eda14cbcSMatt Macy 		 * other following threads need to wait here until decompression
731eda14cbcSMatt Macy 		 * is completed. zstd_free will release this barrier later.
732eda14cbcSMatt Macy 		 */
733eda14cbcSMatt Macy 		mutex_enter(&zstd_dctx_fallback.barrier);
734eda14cbcSMatt Macy 
735eda14cbcSMatt Macy 		z = zstd_dctx_fallback.mem;
736eda14cbcSMatt Macy 		type = ZSTD_KMEM_DCTX;
737eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_alloc_fallback);
738eda14cbcSMatt Macy 	}
739eda14cbcSMatt Macy 
740eda14cbcSMatt Macy 	/* Allocation should always be successful */
741eda14cbcSMatt Macy 	if (!z) {
742eda14cbcSMatt Macy 		return (NULL);
743eda14cbcSMatt Macy 	}
744eda14cbcSMatt Macy 
745eda14cbcSMatt Macy 	z->kmem_type = type;
746eda14cbcSMatt Macy 	z->kmem_size = nbytes;
747eda14cbcSMatt Macy 
748eda14cbcSMatt Macy 	return ((void*)z + (sizeof (struct zstd_kmem)));
749eda14cbcSMatt Macy }
750eda14cbcSMatt Macy 
751eda14cbcSMatt Macy /* Free allocated memory by its specific type */
752eda14cbcSMatt Macy static void
753eda14cbcSMatt Macy zstd_free(void *opaque __maybe_unused, void *ptr)
754eda14cbcSMatt Macy {
755eda14cbcSMatt Macy 	struct zstd_kmem *z = (ptr - sizeof (struct zstd_kmem));
756eda14cbcSMatt Macy 	enum zstd_kmem_type type;
757eda14cbcSMatt Macy 
758eda14cbcSMatt Macy 	ASSERT3U(z->kmem_type, <, ZSTD_KMEM_COUNT);
759eda14cbcSMatt Macy 	ASSERT3U(z->kmem_type, >, ZSTD_KMEM_UNKNOWN);
760eda14cbcSMatt Macy 
761eda14cbcSMatt Macy 	type = z->kmem_type;
762eda14cbcSMatt Macy 	switch (type) {
763eda14cbcSMatt Macy 	case ZSTD_KMEM_DEFAULT:
764eda14cbcSMatt Macy 		vmem_free(z, z->kmem_size);
765eda14cbcSMatt Macy 		break;
766eda14cbcSMatt Macy 	case ZSTD_KMEM_POOL:
767eda14cbcSMatt Macy 		zstd_mempool_free(z);
768eda14cbcSMatt Macy 		break;
769eda14cbcSMatt Macy 	case ZSTD_KMEM_DCTX:
770eda14cbcSMatt Macy 		mutex_exit(&zstd_dctx_fallback.barrier);
771eda14cbcSMatt Macy 		break;
772eda14cbcSMatt Macy 	default:
773eda14cbcSMatt Macy 		break;
774eda14cbcSMatt Macy 	}
775eda14cbcSMatt Macy }
776eda14cbcSMatt Macy 
777eda14cbcSMatt Macy /* Allocate fallback memory to ensure safe decompression */
778eda14cbcSMatt Macy static void __init
779eda14cbcSMatt Macy create_fallback_mem(struct zstd_fallback_mem *mem, size_t size)
780eda14cbcSMatt Macy {
781eda14cbcSMatt Macy 	mem->mem_size = size;
782eda14cbcSMatt Macy 	mem->mem = vmem_zalloc(mem->mem_size, KM_SLEEP);
783eda14cbcSMatt Macy 	mutex_init(&mem->barrier, NULL, MUTEX_DEFAULT, NULL);
784eda14cbcSMatt Macy }
785eda14cbcSMatt Macy 
786eda14cbcSMatt Macy /* Initialize memory pool barrier mutexes */
787eda14cbcSMatt Macy static void __init
788eda14cbcSMatt Macy zstd_mempool_init(void)
789eda14cbcSMatt Macy {
790eda14cbcSMatt Macy 	zstd_mempool_cctx = (struct zstd_pool *)
791eda14cbcSMatt Macy 	    kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
792eda14cbcSMatt Macy 	zstd_mempool_dctx = (struct zstd_pool *)
793eda14cbcSMatt Macy 	    kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
794eda14cbcSMatt Macy 
795eda14cbcSMatt Macy 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
796eda14cbcSMatt Macy 		mutex_init(&zstd_mempool_cctx[i].barrier, NULL,
797eda14cbcSMatt Macy 		    MUTEX_DEFAULT, NULL);
798eda14cbcSMatt Macy 		mutex_init(&zstd_mempool_dctx[i].barrier, NULL,
799eda14cbcSMatt Macy 		    MUTEX_DEFAULT, NULL);
800eda14cbcSMatt Macy 	}
801eda14cbcSMatt Macy }
802eda14cbcSMatt Macy 
803eda14cbcSMatt Macy /* Initialize zstd-related memory handling */
804eda14cbcSMatt Macy static int __init
805eda14cbcSMatt Macy zstd_meminit(void)
806eda14cbcSMatt Macy {
807eda14cbcSMatt Macy 	zstd_mempool_init();
808eda14cbcSMatt Macy 
809eda14cbcSMatt Macy 	/*
810eda14cbcSMatt Macy 	 * Estimate the size of the fallback decompression context.
811eda14cbcSMatt Macy 	 * The expected size on x64 with current ZSTD should be about 160 KB.
812eda14cbcSMatt Macy 	 */
813eda14cbcSMatt Macy 	create_fallback_mem(&zstd_dctx_fallback,
814eda14cbcSMatt Macy 	    P2ROUNDUP(ZSTD_estimateDCtxSize() + sizeof (struct zstd_kmem),
815eda14cbcSMatt Macy 	    PAGESIZE));
816eda14cbcSMatt Macy 
817eda14cbcSMatt Macy 	return (0);
818eda14cbcSMatt Macy }
819eda14cbcSMatt Macy 
820eda14cbcSMatt Macy /* Release object from pool and free memory */
821716fd348SMartin Matuska static void
822eda14cbcSMatt Macy release_pool(struct zstd_pool *pool)
823eda14cbcSMatt Macy {
824eda14cbcSMatt Macy 	mutex_destroy(&pool->barrier);
825eda14cbcSMatt Macy 	vmem_free(pool->mem, pool->size);
826eda14cbcSMatt Macy 	pool->mem = NULL;
827eda14cbcSMatt Macy 	pool->size = 0;
828eda14cbcSMatt Macy }
829eda14cbcSMatt Macy 
830eda14cbcSMatt Macy /* Release memory pool objects */
831716fd348SMartin Matuska static void
832eda14cbcSMatt Macy zstd_mempool_deinit(void)
833eda14cbcSMatt Macy {
834eda14cbcSMatt Macy 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
835eda14cbcSMatt Macy 		release_pool(&zstd_mempool_cctx[i]);
836eda14cbcSMatt Macy 		release_pool(&zstd_mempool_dctx[i]);
837eda14cbcSMatt Macy 	}
838eda14cbcSMatt Macy 
839eda14cbcSMatt Macy 	kmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
840eda14cbcSMatt Macy 	kmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
841eda14cbcSMatt Macy 	zstd_mempool_dctx = NULL;
842eda14cbcSMatt Macy 	zstd_mempool_cctx = NULL;
843eda14cbcSMatt Macy }
844eda14cbcSMatt Macy 
845c40487d4SMatt Macy /* release unused memory from pool */
846c40487d4SMatt Macy 
847c40487d4SMatt Macy void
848c40487d4SMatt Macy zfs_zstd_cache_reap_now(void)
849c40487d4SMatt Macy {
85036639c39SMateusz Guzik 
85136639c39SMateusz Guzik 	/*
85236639c39SMateusz Guzik 	 * Short-circuit if there are no buffers to begin with.
85336639c39SMateusz Guzik 	 */
85436639c39SMateusz Guzik 	if (ZSTDSTAT(zstd_stat_buffers) == 0)
85536639c39SMateusz Guzik 		return;
85636639c39SMateusz Guzik 
857c40487d4SMatt Macy 	/*
858c40487d4SMatt Macy 	 * calling alloc with zero size seeks
859c40487d4SMatt Macy 	 * and releases old unused objects
860c40487d4SMatt Macy 	 */
8617877fdebSMatt Macy 	zstd_mempool_reap(zstd_mempool_cctx);
8627877fdebSMatt Macy 	zstd_mempool_reap(zstd_mempool_dctx);
863c40487d4SMatt Macy }
864c40487d4SMatt Macy 
865eda14cbcSMatt Macy extern int __init
866eda14cbcSMatt Macy zstd_init(void)
867eda14cbcSMatt Macy {
868eda14cbcSMatt Macy 	/* Set pool size by using maximum sane thread count * 4 */
869eda14cbcSMatt Macy 	pool_count = (boot_ncpus * 4);
870eda14cbcSMatt Macy 	zstd_meminit();
871eda14cbcSMatt Macy 
872eda14cbcSMatt Macy 	/* Initialize kstat */
873eda14cbcSMatt Macy 	zstd_ksp = kstat_create("zfs", 0, "zstd", "misc",
874eda14cbcSMatt Macy 	    KSTAT_TYPE_NAMED, sizeof (zstd_stats) / sizeof (kstat_named_t),
875eda14cbcSMatt Macy 	    KSTAT_FLAG_VIRTUAL);
876eda14cbcSMatt Macy 	if (zstd_ksp != NULL) {
877eda14cbcSMatt Macy 		zstd_ksp->ks_data = &zstd_stats;
878eda14cbcSMatt Macy 		kstat_install(zstd_ksp);
879*e3aa18adSMartin Matuska #ifdef _KERNEL
880*e3aa18adSMartin Matuska 		zstd_ksp->ks_update = kstat_zstd_update;
881*e3aa18adSMartin Matuska #endif
882eda14cbcSMatt Macy 	}
883eda14cbcSMatt Macy 
884eda14cbcSMatt Macy 	return (0);
885eda14cbcSMatt Macy }
886eda14cbcSMatt Macy 
887716fd348SMartin Matuska extern void
888eda14cbcSMatt Macy zstd_fini(void)
889eda14cbcSMatt Macy {
890eda14cbcSMatt Macy 	/* Deinitialize kstat */
891eda14cbcSMatt Macy 	if (zstd_ksp != NULL) {
892eda14cbcSMatt Macy 		kstat_delete(zstd_ksp);
893eda14cbcSMatt Macy 		zstd_ksp = NULL;
894eda14cbcSMatt Macy 	}
895eda14cbcSMatt Macy 
896eda14cbcSMatt Macy 	/* Release fallback memory */
897eda14cbcSMatt Macy 	vmem_free(zstd_dctx_fallback.mem, zstd_dctx_fallback.mem_size);
898eda14cbcSMatt Macy 	mutex_destroy(&zstd_dctx_fallback.barrier);
899eda14cbcSMatt Macy 
900eda14cbcSMatt Macy 	/* Deinit memory pool */
901eda14cbcSMatt Macy 	zstd_mempool_deinit();
902eda14cbcSMatt Macy }
903eda14cbcSMatt Macy 
904eda14cbcSMatt Macy #if defined(_KERNEL)
905716fd348SMartin Matuska #ifdef __FreeBSD__
906eda14cbcSMatt Macy module_init(zstd_init);
907eda14cbcSMatt Macy module_exit(zstd_fini);
908716fd348SMartin Matuska #endif
909eda14cbcSMatt Macy 
910*e3aa18adSMartin Matuska ZFS_MODULE_PARAM(zfs, zstd_, earlyabort_pass, INT, ZMOD_RW,
911*e3aa18adSMartin Matuska 	"Enable early abort attempts when using zstd");
912*e3aa18adSMartin Matuska ZFS_MODULE_PARAM(zfs, zstd_, abort_size, UINT, ZMOD_RW,
913*e3aa18adSMartin Matuska 	"Minimal size of block to attempt early abort");
914eda14cbcSMatt Macy #endif
915