xref: /freebsd/sys/contrib/openzfs/module/zstd/zfs_zstd.c (revision eda14cbc264d6969b02f2b1994cef11148e914f1)
1*eda14cbcSMatt Macy /*
2*eda14cbcSMatt Macy  * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
3*eda14cbcSMatt Macy  *
4*eda14cbcSMatt Macy  * Redistribution and use in source and binary forms, with or without
5*eda14cbcSMatt Macy  * modification, are permitted provided that the following conditions are met:
6*eda14cbcSMatt Macy  *
7*eda14cbcSMatt Macy  * 1. Redistributions of source code must retain the above copyright notice,
8*eda14cbcSMatt Macy  * this list of conditions and the following disclaimer.
9*eda14cbcSMatt Macy  *
10*eda14cbcSMatt Macy  * 2. Redistributions in binary form must reproduce the above copyright notice,
11*eda14cbcSMatt Macy  * this list of conditions and the following disclaimer in the documentation
12*eda14cbcSMatt Macy  * and/or other materials provided with the distribution.
13*eda14cbcSMatt Macy  *
14*eda14cbcSMatt Macy  * 3. Neither the name of the copyright holder nor the names of its
15*eda14cbcSMatt Macy  * contributors may be used to endorse or promote products derived from this
16*eda14cbcSMatt Macy  * software without specific prior written permission.
17*eda14cbcSMatt Macy  *
18*eda14cbcSMatt Macy  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19*eda14cbcSMatt Macy  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20*eda14cbcSMatt Macy  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21*eda14cbcSMatt Macy  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22*eda14cbcSMatt Macy  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23*eda14cbcSMatt Macy  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24*eda14cbcSMatt Macy  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25*eda14cbcSMatt Macy  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26*eda14cbcSMatt Macy  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27*eda14cbcSMatt Macy  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28*eda14cbcSMatt Macy  * POSSIBILITY OF SUCH DAMAGE.
29*eda14cbcSMatt Macy  */
30*eda14cbcSMatt Macy 
31*eda14cbcSMatt Macy /*
32*eda14cbcSMatt Macy  * Copyright (c) 2016-2018, Klara Inc.
33*eda14cbcSMatt Macy  * Copyright (c) 2016-2018, Allan Jude
34*eda14cbcSMatt Macy  * Copyright (c) 2018-2020, Sebastian Gottschall
35*eda14cbcSMatt Macy  * Copyright (c) 2019-2020, Michael Niewöhner
36*eda14cbcSMatt Macy  * Copyright (c) 2020, The FreeBSD Foundation [1]
37*eda14cbcSMatt Macy  *
38*eda14cbcSMatt Macy  * [1] Portions of this software were developed by Allan Jude
39*eda14cbcSMatt Macy  *     under sponsorship from the FreeBSD Foundation.
40*eda14cbcSMatt Macy  */
41*eda14cbcSMatt Macy 
42*eda14cbcSMatt Macy #include <sys/param.h>
43*eda14cbcSMatt Macy #include <sys/sysmacros.h>
44*eda14cbcSMatt Macy #include <sys/zfs_context.h>
45*eda14cbcSMatt Macy #include <sys/zio_compress.h>
46*eda14cbcSMatt Macy #include <sys/spa.h>
47*eda14cbcSMatt Macy #include <sys/zstd/zstd.h>
48*eda14cbcSMatt Macy 
49*eda14cbcSMatt Macy #define	ZSTD_STATIC_LINKING_ONLY
50*eda14cbcSMatt Macy #include "lib/zstd.h"
51*eda14cbcSMatt Macy #include "lib/zstd_errors.h"
52*eda14cbcSMatt Macy 
53*eda14cbcSMatt Macy kstat_t *zstd_ksp = NULL;
54*eda14cbcSMatt Macy 
55*eda14cbcSMatt Macy typedef struct zstd_stats {
56*eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_alloc_fail;
57*eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_alloc_fallback;
58*eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_com_alloc_fail;
59*eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_dec_alloc_fail;
60*eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_com_inval;
61*eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_dec_inval;
62*eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_dec_header_inval;
63*eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_com_fail;
64*eda14cbcSMatt Macy 	kstat_named_t	zstd_stat_dec_fail;
65*eda14cbcSMatt Macy } zstd_stats_t;
66*eda14cbcSMatt Macy 
67*eda14cbcSMatt Macy static zstd_stats_t zstd_stats = {
68*eda14cbcSMatt Macy 	{ "alloc_fail",			KSTAT_DATA_UINT64 },
69*eda14cbcSMatt Macy 	{ "alloc_fallback",		KSTAT_DATA_UINT64 },
70*eda14cbcSMatt Macy 	{ "compress_alloc_fail",	KSTAT_DATA_UINT64 },
71*eda14cbcSMatt Macy 	{ "decompress_alloc_fail",	KSTAT_DATA_UINT64 },
72*eda14cbcSMatt Macy 	{ "compress_level_invalid",	KSTAT_DATA_UINT64 },
73*eda14cbcSMatt Macy 	{ "decompress_level_invalid",	KSTAT_DATA_UINT64 },
74*eda14cbcSMatt Macy 	{ "decompress_header_invalid",	KSTAT_DATA_UINT64 },
75*eda14cbcSMatt Macy 	{ "compress_failed",		KSTAT_DATA_UINT64 },
76*eda14cbcSMatt Macy 	{ "decompress_failed",		KSTAT_DATA_UINT64 },
77*eda14cbcSMatt Macy };
78*eda14cbcSMatt Macy 
79*eda14cbcSMatt Macy /* Enums describing the allocator type specified by kmem_type in zstd_kmem */
80*eda14cbcSMatt Macy enum zstd_kmem_type {
81*eda14cbcSMatt Macy 	ZSTD_KMEM_UNKNOWN = 0,
82*eda14cbcSMatt Macy 	/* Allocation type using kmem_vmalloc */
83*eda14cbcSMatt Macy 	ZSTD_KMEM_DEFAULT,
84*eda14cbcSMatt Macy 	/* Pool based allocation using mempool_alloc */
85*eda14cbcSMatt Macy 	ZSTD_KMEM_POOL,
86*eda14cbcSMatt Macy 	/* Reserved fallback memory for decompression only */
87*eda14cbcSMatt Macy 	ZSTD_KMEM_DCTX,
88*eda14cbcSMatt Macy 	ZSTD_KMEM_COUNT,
89*eda14cbcSMatt Macy };
90*eda14cbcSMatt Macy 
91*eda14cbcSMatt Macy /* Structure for pooled memory objects */
92*eda14cbcSMatt Macy struct zstd_pool {
93*eda14cbcSMatt Macy 	void *mem;
94*eda14cbcSMatt Macy 	size_t size;
95*eda14cbcSMatt Macy 	kmutex_t barrier;
96*eda14cbcSMatt Macy 	hrtime_t timeout;
97*eda14cbcSMatt Macy };
98*eda14cbcSMatt Macy 
99*eda14cbcSMatt Macy /* Global structure for handling memory allocations */
100*eda14cbcSMatt Macy struct zstd_kmem {
101*eda14cbcSMatt Macy 	enum zstd_kmem_type kmem_type;
102*eda14cbcSMatt Macy 	size_t kmem_size;
103*eda14cbcSMatt Macy 	struct zstd_pool *pool;
104*eda14cbcSMatt Macy };
105*eda14cbcSMatt Macy 
106*eda14cbcSMatt Macy /* Fallback memory structure used for decompression only if memory runs out */
107*eda14cbcSMatt Macy struct zstd_fallback_mem {
108*eda14cbcSMatt Macy 	size_t mem_size;
109*eda14cbcSMatt Macy 	void *mem;
110*eda14cbcSMatt Macy 	kmutex_t barrier;
111*eda14cbcSMatt Macy };
112*eda14cbcSMatt Macy 
113*eda14cbcSMatt Macy struct zstd_levelmap {
114*eda14cbcSMatt Macy 	int16_t zstd_level;
115*eda14cbcSMatt Macy 	enum zio_zstd_levels level;
116*eda14cbcSMatt Macy };
117*eda14cbcSMatt Macy 
118*eda14cbcSMatt Macy /*
119*eda14cbcSMatt Macy  * ZSTD memory handlers
120*eda14cbcSMatt Macy  *
121*eda14cbcSMatt Macy  * For decompression we use a different handler which also provides fallback
122*eda14cbcSMatt Macy  * memory allocation in case memory runs out.
123*eda14cbcSMatt Macy  *
124*eda14cbcSMatt Macy  * The ZSTD handlers were split up for the most simplified implementation.
125*eda14cbcSMatt Macy  */
126*eda14cbcSMatt Macy static void *zstd_alloc(void *opaque, size_t size);
127*eda14cbcSMatt Macy static void *zstd_dctx_alloc(void *opaque, size_t size);
128*eda14cbcSMatt Macy static void zstd_free(void *opaque, void *ptr);
129*eda14cbcSMatt Macy 
130*eda14cbcSMatt Macy /* Compression memory handler */
131*eda14cbcSMatt Macy static const ZSTD_customMem zstd_malloc = {
132*eda14cbcSMatt Macy 	zstd_alloc,
133*eda14cbcSMatt Macy 	zstd_free,
134*eda14cbcSMatt Macy 	NULL,
135*eda14cbcSMatt Macy };
136*eda14cbcSMatt Macy 
137*eda14cbcSMatt Macy /* Decompression memory handler */
138*eda14cbcSMatt Macy static const ZSTD_customMem zstd_dctx_malloc = {
139*eda14cbcSMatt Macy 	zstd_dctx_alloc,
140*eda14cbcSMatt Macy 	zstd_free,
141*eda14cbcSMatt Macy 	NULL,
142*eda14cbcSMatt Macy };
143*eda14cbcSMatt Macy 
144*eda14cbcSMatt Macy /* Level map for converting ZFS internal levels to ZSTD levels and vice versa */
145*eda14cbcSMatt Macy static struct zstd_levelmap zstd_levels[] = {
146*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_1, ZIO_ZSTD_LEVEL_1},
147*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_2, ZIO_ZSTD_LEVEL_2},
148*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_3, ZIO_ZSTD_LEVEL_3},
149*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_4, ZIO_ZSTD_LEVEL_4},
150*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_5, ZIO_ZSTD_LEVEL_5},
151*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_6, ZIO_ZSTD_LEVEL_6},
152*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_7, ZIO_ZSTD_LEVEL_7},
153*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_8, ZIO_ZSTD_LEVEL_8},
154*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_9, ZIO_ZSTD_LEVEL_9},
155*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_10, ZIO_ZSTD_LEVEL_10},
156*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_11, ZIO_ZSTD_LEVEL_11},
157*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_12, ZIO_ZSTD_LEVEL_12},
158*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_13, ZIO_ZSTD_LEVEL_13},
159*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_14, ZIO_ZSTD_LEVEL_14},
160*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_15, ZIO_ZSTD_LEVEL_15},
161*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_16, ZIO_ZSTD_LEVEL_16},
162*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_17, ZIO_ZSTD_LEVEL_17},
163*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_18, ZIO_ZSTD_LEVEL_18},
164*eda14cbcSMatt Macy 	{ZIO_ZSTD_LEVEL_19, ZIO_ZSTD_LEVEL_19},
165*eda14cbcSMatt Macy 	{-1, ZIO_ZSTD_LEVEL_FAST_1},
166*eda14cbcSMatt Macy 	{-2, ZIO_ZSTD_LEVEL_FAST_2},
167*eda14cbcSMatt Macy 	{-3, ZIO_ZSTD_LEVEL_FAST_3},
168*eda14cbcSMatt Macy 	{-4, ZIO_ZSTD_LEVEL_FAST_4},
169*eda14cbcSMatt Macy 	{-5, ZIO_ZSTD_LEVEL_FAST_5},
170*eda14cbcSMatt Macy 	{-6, ZIO_ZSTD_LEVEL_FAST_6},
171*eda14cbcSMatt Macy 	{-7, ZIO_ZSTD_LEVEL_FAST_7},
172*eda14cbcSMatt Macy 	{-8, ZIO_ZSTD_LEVEL_FAST_8},
173*eda14cbcSMatt Macy 	{-9, ZIO_ZSTD_LEVEL_FAST_9},
174*eda14cbcSMatt Macy 	{-10, ZIO_ZSTD_LEVEL_FAST_10},
175*eda14cbcSMatt Macy 	{-20, ZIO_ZSTD_LEVEL_FAST_20},
176*eda14cbcSMatt Macy 	{-30, ZIO_ZSTD_LEVEL_FAST_30},
177*eda14cbcSMatt Macy 	{-40, ZIO_ZSTD_LEVEL_FAST_40},
178*eda14cbcSMatt Macy 	{-50, ZIO_ZSTD_LEVEL_FAST_50},
179*eda14cbcSMatt Macy 	{-60, ZIO_ZSTD_LEVEL_FAST_60},
180*eda14cbcSMatt Macy 	{-70, ZIO_ZSTD_LEVEL_FAST_70},
181*eda14cbcSMatt Macy 	{-80, ZIO_ZSTD_LEVEL_FAST_80},
182*eda14cbcSMatt Macy 	{-90, ZIO_ZSTD_LEVEL_FAST_90},
183*eda14cbcSMatt Macy 	{-100, ZIO_ZSTD_LEVEL_FAST_100},
184*eda14cbcSMatt Macy 	{-500, ZIO_ZSTD_LEVEL_FAST_500},
185*eda14cbcSMatt Macy 	{-1000, ZIO_ZSTD_LEVEL_FAST_1000},
186*eda14cbcSMatt Macy };
187*eda14cbcSMatt Macy 
188*eda14cbcSMatt Macy /*
189*eda14cbcSMatt Macy  * This variable represents the maximum count of the pool based on the number
190*eda14cbcSMatt Macy  * of CPUs plus some buffer. We default to cpu count * 4, see init_zstd.
191*eda14cbcSMatt Macy  */
192*eda14cbcSMatt Macy static int pool_count = 16;
193*eda14cbcSMatt Macy 
194*eda14cbcSMatt Macy #define	ZSTD_POOL_MAX		pool_count
195*eda14cbcSMatt Macy #define	ZSTD_POOL_TIMEOUT	60 * 2
196*eda14cbcSMatt Macy 
197*eda14cbcSMatt Macy static struct zstd_fallback_mem zstd_dctx_fallback;
198*eda14cbcSMatt Macy static struct zstd_pool *zstd_mempool_cctx;
199*eda14cbcSMatt Macy static struct zstd_pool *zstd_mempool_dctx;
200*eda14cbcSMatt Macy 
201*eda14cbcSMatt Macy /*
202*eda14cbcSMatt Macy  * Try to get a cached allocated buffer from memory pool or allocate a new one
203*eda14cbcSMatt Macy  * if necessary. If a object is older than 2 minutes and does not fit the
204*eda14cbcSMatt Macy  * requested size, it will be released and a new cached entry will be allocated.
205*eda14cbcSMatt Macy  * If other pooled objects are detected without being used for 2 minutes, they
206*eda14cbcSMatt Macy  * will be released, too.
207*eda14cbcSMatt Macy  *
208*eda14cbcSMatt Macy  * The concept is that high frequency memory allocations of bigger objects are
209*eda14cbcSMatt Macy  * expensive. So if a lot of work is going on, allocations will be kept for a
210*eda14cbcSMatt Macy  * while and can be reused in that time frame.
211*eda14cbcSMatt Macy  *
212*eda14cbcSMatt Macy  * The scheduled release will be updated every time a object is reused.
213*eda14cbcSMatt Macy  */
214*eda14cbcSMatt Macy static void *
215*eda14cbcSMatt Macy zstd_mempool_alloc(struct zstd_pool *zstd_mempool, size_t size)
216*eda14cbcSMatt Macy {
217*eda14cbcSMatt Macy 	struct zstd_pool *pool;
218*eda14cbcSMatt Macy 	struct zstd_kmem *mem = NULL;
219*eda14cbcSMatt Macy 
220*eda14cbcSMatt Macy 	if (!zstd_mempool) {
221*eda14cbcSMatt Macy 		return (NULL);
222*eda14cbcSMatt Macy 	}
223*eda14cbcSMatt Macy 
224*eda14cbcSMatt Macy 	/* Seek for preallocated memory slot and free obsolete slots */
225*eda14cbcSMatt Macy 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
226*eda14cbcSMatt Macy 		pool = &zstd_mempool[i];
227*eda14cbcSMatt Macy 		/*
228*eda14cbcSMatt Macy 		 * This lock is simply a marker for a pool object beeing in use.
229*eda14cbcSMatt Macy 		 * If it's already hold, it will be skipped.
230*eda14cbcSMatt Macy 		 *
231*eda14cbcSMatt Macy 		 * We need to create it before checking it to avoid race
232*eda14cbcSMatt Macy 		 * conditions caused by running in a threaded context.
233*eda14cbcSMatt Macy 		 *
234*eda14cbcSMatt Macy 		 * The lock is later released by zstd_mempool_free.
235*eda14cbcSMatt Macy 		 */
236*eda14cbcSMatt Macy 		if (mutex_tryenter(&pool->barrier)) {
237*eda14cbcSMatt Macy 			/*
238*eda14cbcSMatt Macy 			 * Check if objects fits the size, if so we take it and
239*eda14cbcSMatt Macy 			 * update the timestamp.
240*eda14cbcSMatt Macy 			 */
241*eda14cbcSMatt Macy 			if (!mem && pool->mem && size <= pool->size) {
242*eda14cbcSMatt Macy 				pool->timeout = gethrestime_sec() +
243*eda14cbcSMatt Macy 				    ZSTD_POOL_TIMEOUT;
244*eda14cbcSMatt Macy 				mem = pool->mem;
245*eda14cbcSMatt Macy 				continue;
246*eda14cbcSMatt Macy 			}
247*eda14cbcSMatt Macy 
248*eda14cbcSMatt Macy 			/* Free memory if unused object older than 2 minutes */
249*eda14cbcSMatt Macy 			if (pool->mem && gethrestime_sec() > pool->timeout) {
250*eda14cbcSMatt Macy 				vmem_free(pool->mem, pool->size);
251*eda14cbcSMatt Macy 				pool->mem = NULL;
252*eda14cbcSMatt Macy 				pool->size = 0;
253*eda14cbcSMatt Macy 				pool->timeout = 0;
254*eda14cbcSMatt Macy 			}
255*eda14cbcSMatt Macy 
256*eda14cbcSMatt Macy 			mutex_exit(&pool->barrier);
257*eda14cbcSMatt Macy 		}
258*eda14cbcSMatt Macy 	}
259*eda14cbcSMatt Macy 
260*eda14cbcSMatt Macy 	if (mem) {
261*eda14cbcSMatt Macy 		return (mem);
262*eda14cbcSMatt Macy 	}
263*eda14cbcSMatt Macy 
264*eda14cbcSMatt Macy 	/*
265*eda14cbcSMatt Macy 	 * If no preallocated slot was found, try to fill in a new one.
266*eda14cbcSMatt Macy 	 *
267*eda14cbcSMatt Macy 	 * We run a similar algorithm twice here to avoid pool fragmentation.
268*eda14cbcSMatt Macy 	 * The first one may generate holes in the list if objects get released.
269*eda14cbcSMatt Macy 	 * We always make sure that these holes get filled instead of adding new
270*eda14cbcSMatt Macy 	 * allocations constantly at the end.
271*eda14cbcSMatt Macy 	 */
272*eda14cbcSMatt Macy 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
273*eda14cbcSMatt Macy 		pool = &zstd_mempool[i];
274*eda14cbcSMatt Macy 		if (mutex_tryenter(&pool->barrier)) {
275*eda14cbcSMatt Macy 			/* Object is free, try to allocate new one */
276*eda14cbcSMatt Macy 			if (!pool->mem) {
277*eda14cbcSMatt Macy 				mem = vmem_alloc(size, KM_SLEEP);
278*eda14cbcSMatt Macy 				pool->mem = mem;
279*eda14cbcSMatt Macy 
280*eda14cbcSMatt Macy 				if (pool->mem) {
281*eda14cbcSMatt Macy 					/* Keep track for later release */
282*eda14cbcSMatt Macy 					mem->pool = pool;
283*eda14cbcSMatt Macy 					pool->size = size;
284*eda14cbcSMatt Macy 					mem->kmem_type = ZSTD_KMEM_POOL;
285*eda14cbcSMatt Macy 					mem->kmem_size = size;
286*eda14cbcSMatt Macy 				}
287*eda14cbcSMatt Macy 			}
288*eda14cbcSMatt Macy 
289*eda14cbcSMatt Macy 			if (size <= pool->size) {
290*eda14cbcSMatt Macy 				/* Update timestamp */
291*eda14cbcSMatt Macy 				pool->timeout = gethrestime_sec() +
292*eda14cbcSMatt Macy 				    ZSTD_POOL_TIMEOUT;
293*eda14cbcSMatt Macy 
294*eda14cbcSMatt Macy 				return (pool->mem);
295*eda14cbcSMatt Macy 			}
296*eda14cbcSMatt Macy 
297*eda14cbcSMatt Macy 			mutex_exit(&pool->barrier);
298*eda14cbcSMatt Macy 		}
299*eda14cbcSMatt Macy 	}
300*eda14cbcSMatt Macy 
301*eda14cbcSMatt Macy 	/*
302*eda14cbcSMatt Macy 	 * If the pool is full or the allocation failed, try lazy allocation
303*eda14cbcSMatt Macy 	 * instead.
304*eda14cbcSMatt Macy 	 */
305*eda14cbcSMatt Macy 	if (!mem) {
306*eda14cbcSMatt Macy 		mem = vmem_alloc(size, KM_NOSLEEP);
307*eda14cbcSMatt Macy 		if (mem) {
308*eda14cbcSMatt Macy 			mem->pool = NULL;
309*eda14cbcSMatt Macy 			mem->kmem_type = ZSTD_KMEM_DEFAULT;
310*eda14cbcSMatt Macy 			mem->kmem_size = size;
311*eda14cbcSMatt Macy 		}
312*eda14cbcSMatt Macy 	}
313*eda14cbcSMatt Macy 
314*eda14cbcSMatt Macy 	return (mem);
315*eda14cbcSMatt Macy }
316*eda14cbcSMatt Macy 
317*eda14cbcSMatt Macy /* Mark object as released by releasing the barrier mutex */
318*eda14cbcSMatt Macy static void
319*eda14cbcSMatt Macy zstd_mempool_free(struct zstd_kmem *z)
320*eda14cbcSMatt Macy {
321*eda14cbcSMatt Macy 	mutex_exit(&z->pool->barrier);
322*eda14cbcSMatt Macy }
323*eda14cbcSMatt Macy 
324*eda14cbcSMatt Macy /* Convert ZFS internal enum to ZSTD level */
325*eda14cbcSMatt Macy static int
326*eda14cbcSMatt Macy zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
327*eda14cbcSMatt Macy {
328*eda14cbcSMatt Macy 	if (level > 0 && level <= ZIO_ZSTD_LEVEL_19) {
329*eda14cbcSMatt Macy 		*zstd_level = zstd_levels[level - 1].zstd_level;
330*eda14cbcSMatt Macy 		return (0);
331*eda14cbcSMatt Macy 	}
332*eda14cbcSMatt Macy 	if (level >= ZIO_ZSTD_LEVEL_FAST_1 &&
333*eda14cbcSMatt Macy 	    level <= ZIO_ZSTD_LEVEL_FAST_1000) {
334*eda14cbcSMatt Macy 		*zstd_level = zstd_levels[level - ZIO_ZSTD_LEVEL_FAST_1
335*eda14cbcSMatt Macy 		    + ZIO_ZSTD_LEVEL_19].zstd_level;
336*eda14cbcSMatt Macy 		return (0);
337*eda14cbcSMatt Macy 	}
338*eda14cbcSMatt Macy 
339*eda14cbcSMatt Macy 	/* Invalid/unknown zfs compression enum - this should never happen. */
340*eda14cbcSMatt Macy 	return (1);
341*eda14cbcSMatt Macy }
342*eda14cbcSMatt Macy 
343*eda14cbcSMatt Macy /* Compress block using zstd */
344*eda14cbcSMatt Macy size_t
345*eda14cbcSMatt Macy zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
346*eda14cbcSMatt Macy     int level)
347*eda14cbcSMatt Macy {
348*eda14cbcSMatt Macy 	size_t c_len;
349*eda14cbcSMatt Macy 	int16_t zstd_level;
350*eda14cbcSMatt Macy 	zfs_zstdhdr_t *hdr;
351*eda14cbcSMatt Macy 	ZSTD_CCtx *cctx;
352*eda14cbcSMatt Macy 
353*eda14cbcSMatt Macy 	hdr = (zfs_zstdhdr_t *)d_start;
354*eda14cbcSMatt Macy 
355*eda14cbcSMatt Macy 	/* Skip compression if the specified level is invalid */
356*eda14cbcSMatt Macy 	if (zstd_enum_to_level(level, &zstd_level)) {
357*eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_com_inval);
358*eda14cbcSMatt Macy 		return (s_len);
359*eda14cbcSMatt Macy 	}
360*eda14cbcSMatt Macy 
361*eda14cbcSMatt Macy 	ASSERT3U(d_len, >=, sizeof (*hdr));
362*eda14cbcSMatt Macy 	ASSERT3U(d_len, <=, s_len);
363*eda14cbcSMatt Macy 	ASSERT3U(zstd_level, !=, 0);
364*eda14cbcSMatt Macy 
365*eda14cbcSMatt Macy 	cctx = ZSTD_createCCtx_advanced(zstd_malloc);
366*eda14cbcSMatt Macy 
367*eda14cbcSMatt Macy 	/*
368*eda14cbcSMatt Macy 	 * Out of kernel memory, gently fall through - this will disable
369*eda14cbcSMatt Macy 	 * compression in zio_compress_data
370*eda14cbcSMatt Macy 	 */
371*eda14cbcSMatt Macy 	if (!cctx) {
372*eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_com_alloc_fail);
373*eda14cbcSMatt Macy 		return (s_len);
374*eda14cbcSMatt Macy 	}
375*eda14cbcSMatt Macy 
376*eda14cbcSMatt Macy 	/* Set the compression level */
377*eda14cbcSMatt Macy 	ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstd_level);
378*eda14cbcSMatt Macy 
379*eda14cbcSMatt Macy 	/* Use the "magicless" zstd header which saves us 4 header bytes */
380*eda14cbcSMatt Macy 	ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless);
381*eda14cbcSMatt Macy 
382*eda14cbcSMatt Macy 	/*
383*eda14cbcSMatt Macy 	 * Disable redundant checksum calculation and content size storage since
384*eda14cbcSMatt Macy 	 * this is already done by ZFS itself.
385*eda14cbcSMatt Macy 	 */
386*eda14cbcSMatt Macy 	ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0);
387*eda14cbcSMatt Macy 	ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0);
388*eda14cbcSMatt Macy 
389*eda14cbcSMatt Macy 	c_len = ZSTD_compress2(cctx,
390*eda14cbcSMatt Macy 	    hdr->data,
391*eda14cbcSMatt Macy 	    d_len - sizeof (*hdr),
392*eda14cbcSMatt Macy 	    s_start, s_len);
393*eda14cbcSMatt Macy 
394*eda14cbcSMatt Macy 	ZSTD_freeCCtx(cctx);
395*eda14cbcSMatt Macy 
396*eda14cbcSMatt Macy 	/* Error in the compression routine, disable compression. */
397*eda14cbcSMatt Macy 	if (ZSTD_isError(c_len)) {
398*eda14cbcSMatt Macy 		/*
399*eda14cbcSMatt Macy 		 * If we are aborting the compression because the saves are
400*eda14cbcSMatt Macy 		 * too small, that is not a failure. Everything else is a
401*eda14cbcSMatt Macy 		 * failure, so increment the compression failure counter.
402*eda14cbcSMatt Macy 		 */
403*eda14cbcSMatt Macy 		if (ZSTD_getErrorCode(c_len) != ZSTD_error_dstSize_tooSmall) {
404*eda14cbcSMatt Macy 			ZSTDSTAT_BUMP(zstd_stat_com_fail);
405*eda14cbcSMatt Macy 		}
406*eda14cbcSMatt Macy 		return (s_len);
407*eda14cbcSMatt Macy 	}
408*eda14cbcSMatt Macy 
409*eda14cbcSMatt Macy 	/*
410*eda14cbcSMatt Macy 	 * Encode the compressed buffer size at the start. We'll need this in
411*eda14cbcSMatt Macy 	 * decompression to counter the effects of padding which might be added
412*eda14cbcSMatt Macy 	 * to the compressed buffer and which, if unhandled, would confuse the
413*eda14cbcSMatt Macy 	 * hell out of our decompression function.
414*eda14cbcSMatt Macy 	 */
415*eda14cbcSMatt Macy 	hdr->c_len = BE_32(c_len);
416*eda14cbcSMatt Macy 
417*eda14cbcSMatt Macy 	/*
418*eda14cbcSMatt Macy 	 * Check version for overflow.
419*eda14cbcSMatt Macy 	 * The limit of 24 bits must not be exceeded. This allows a maximum
420*eda14cbcSMatt Macy 	 * version 1677.72.15 which we don't expect to be ever reached.
421*eda14cbcSMatt Macy 	 */
422*eda14cbcSMatt Macy 	ASSERT3U(ZSTD_VERSION_NUMBER, <=, 0xFFFFFF);
423*eda14cbcSMatt Macy 
424*eda14cbcSMatt Macy 	/*
425*eda14cbcSMatt Macy 	 * Encode the compression level as well. We may need to know the
426*eda14cbcSMatt Macy 	 * original compression level if compressed_arc is disabled, to match
427*eda14cbcSMatt Macy 	 * the compression settings to write this block to the L2ARC.
428*eda14cbcSMatt Macy 	 *
429*eda14cbcSMatt Macy 	 * Encode the actual level, so if the enum changes in the future, we
430*eda14cbcSMatt Macy 	 * will be compatible.
431*eda14cbcSMatt Macy 	 *
432*eda14cbcSMatt Macy 	 * The upper 24 bits store the ZSTD version to be able to provide
433*eda14cbcSMatt Macy 	 * future compatibility, since new versions might enhance the
434*eda14cbcSMatt Macy 	 * compression algorithm in a way, where the compressed data will
435*eda14cbcSMatt Macy 	 * change.
436*eda14cbcSMatt Macy 	 *
437*eda14cbcSMatt Macy 	 * As soon as such incompatibility occurs, handling code needs to be
438*eda14cbcSMatt Macy 	 * added, differentiating between the versions.
439*eda14cbcSMatt Macy 	 */
440*eda14cbcSMatt Macy 	hdr->version = ZSTD_VERSION_NUMBER;
441*eda14cbcSMatt Macy 	hdr->level = level;
442*eda14cbcSMatt Macy 	hdr->raw_version_level = BE_32(hdr->raw_version_level);
443*eda14cbcSMatt Macy 
444*eda14cbcSMatt Macy 	return (c_len + sizeof (*hdr));
445*eda14cbcSMatt Macy }
446*eda14cbcSMatt Macy 
447*eda14cbcSMatt Macy /* Decompress block using zstd and return its stored level */
448*eda14cbcSMatt Macy int
449*eda14cbcSMatt Macy zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
450*eda14cbcSMatt Macy     size_t d_len, uint8_t *level)
451*eda14cbcSMatt Macy {
452*eda14cbcSMatt Macy 	ZSTD_DCtx *dctx;
453*eda14cbcSMatt Macy 	size_t result;
454*eda14cbcSMatt Macy 	int16_t zstd_level;
455*eda14cbcSMatt Macy 	uint32_t c_len;
456*eda14cbcSMatt Macy 	const zfs_zstdhdr_t *hdr;
457*eda14cbcSMatt Macy 	zfs_zstdhdr_t hdr_copy;
458*eda14cbcSMatt Macy 
459*eda14cbcSMatt Macy 	hdr = (const zfs_zstdhdr_t *)s_start;
460*eda14cbcSMatt Macy 	c_len = BE_32(hdr->c_len);
461*eda14cbcSMatt Macy 
462*eda14cbcSMatt Macy 	/*
463*eda14cbcSMatt Macy 	 * Make a copy instead of directly converting the header, since we must
464*eda14cbcSMatt Macy 	 * not modify the original data that may be used again later.
465*eda14cbcSMatt Macy 	 */
466*eda14cbcSMatt Macy 	hdr_copy.raw_version_level = BE_32(hdr->raw_version_level);
467*eda14cbcSMatt Macy 
468*eda14cbcSMatt Macy 	/*
469*eda14cbcSMatt Macy 	 * NOTE: We ignore the ZSTD version for now. As soon as any
470*eda14cbcSMatt Macy 	 * incompatibility occurrs, it has to be handled accordingly.
471*eda14cbcSMatt Macy 	 * The version can be accessed via `hdr_copy.version`.
472*eda14cbcSMatt Macy 	 */
473*eda14cbcSMatt Macy 
474*eda14cbcSMatt Macy 	/*
475*eda14cbcSMatt Macy 	 * Convert and check the level
476*eda14cbcSMatt Macy 	 * An invalid level is a strong indicator for data corruption! In such
477*eda14cbcSMatt Macy 	 * case return an error so the upper layers can try to fix it.
478*eda14cbcSMatt Macy 	 */
479*eda14cbcSMatt Macy 	if (zstd_enum_to_level(hdr_copy.level, &zstd_level)) {
480*eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_dec_inval);
481*eda14cbcSMatt Macy 		return (1);
482*eda14cbcSMatt Macy 	}
483*eda14cbcSMatt Macy 
484*eda14cbcSMatt Macy 	ASSERT3U(d_len, >=, s_len);
485*eda14cbcSMatt Macy 	ASSERT3U(hdr_copy.level, !=, ZIO_COMPLEVEL_INHERIT);
486*eda14cbcSMatt Macy 
487*eda14cbcSMatt Macy 	/* Invalid compressed buffer size encoded at start */
488*eda14cbcSMatt Macy 	if (c_len + sizeof (*hdr) > s_len) {
489*eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_dec_header_inval);
490*eda14cbcSMatt Macy 		return (1);
491*eda14cbcSMatt Macy 	}
492*eda14cbcSMatt Macy 
493*eda14cbcSMatt Macy 	dctx = ZSTD_createDCtx_advanced(zstd_dctx_malloc);
494*eda14cbcSMatt Macy 	if (!dctx) {
495*eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_dec_alloc_fail);
496*eda14cbcSMatt Macy 		return (1);
497*eda14cbcSMatt Macy 	}
498*eda14cbcSMatt Macy 
499*eda14cbcSMatt Macy 	/* Set header type to "magicless" */
500*eda14cbcSMatt Macy 	ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless);
501*eda14cbcSMatt Macy 
502*eda14cbcSMatt Macy 	/* Decompress the data and release the context */
503*eda14cbcSMatt Macy 	result = ZSTD_decompressDCtx(dctx, d_start, d_len, hdr->data, c_len);
504*eda14cbcSMatt Macy 	ZSTD_freeDCtx(dctx);
505*eda14cbcSMatt Macy 
506*eda14cbcSMatt Macy 	/*
507*eda14cbcSMatt Macy 	 * Returns 0 on success (decompression function returned non-negative)
508*eda14cbcSMatt Macy 	 * and non-zero on failure (decompression function returned negative.
509*eda14cbcSMatt Macy 	 */
510*eda14cbcSMatt Macy 	if (ZSTD_isError(result)) {
511*eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_dec_fail);
512*eda14cbcSMatt Macy 		return (1);
513*eda14cbcSMatt Macy 	}
514*eda14cbcSMatt Macy 
515*eda14cbcSMatt Macy 	if (level) {
516*eda14cbcSMatt Macy 		*level = hdr_copy.level;
517*eda14cbcSMatt Macy 	}
518*eda14cbcSMatt Macy 
519*eda14cbcSMatt Macy 	return (0);
520*eda14cbcSMatt Macy }
521*eda14cbcSMatt Macy 
522*eda14cbcSMatt Macy /* Decompress datablock using zstd */
523*eda14cbcSMatt Macy int
524*eda14cbcSMatt Macy zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len,
525*eda14cbcSMatt Macy     int level __maybe_unused)
526*eda14cbcSMatt Macy {
527*eda14cbcSMatt Macy 
528*eda14cbcSMatt Macy 	return (zfs_zstd_decompress_level(s_start, d_start, s_len, d_len,
529*eda14cbcSMatt Macy 	    NULL));
530*eda14cbcSMatt Macy }
531*eda14cbcSMatt Macy 
532*eda14cbcSMatt Macy /* Allocator for zstd compression context using mempool_allocator */
533*eda14cbcSMatt Macy static void *
534*eda14cbcSMatt Macy zstd_alloc(void *opaque __maybe_unused, size_t size)
535*eda14cbcSMatt Macy {
536*eda14cbcSMatt Macy 	size_t nbytes = sizeof (struct zstd_kmem) + size;
537*eda14cbcSMatt Macy 	struct zstd_kmem *z = NULL;
538*eda14cbcSMatt Macy 
539*eda14cbcSMatt Macy 	z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_cctx, nbytes);
540*eda14cbcSMatt Macy 
541*eda14cbcSMatt Macy 	if (!z) {
542*eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
543*eda14cbcSMatt Macy 		return (NULL);
544*eda14cbcSMatt Macy 	}
545*eda14cbcSMatt Macy 
546*eda14cbcSMatt Macy 	return ((void*)z + (sizeof (struct zstd_kmem)));
547*eda14cbcSMatt Macy }
548*eda14cbcSMatt Macy 
549*eda14cbcSMatt Macy /*
550*eda14cbcSMatt Macy  * Allocator for zstd decompression context using mempool_allocator with
551*eda14cbcSMatt Macy  * fallback to reserved memory if allocation fails
552*eda14cbcSMatt Macy  */
553*eda14cbcSMatt Macy static void *
554*eda14cbcSMatt Macy zstd_dctx_alloc(void *opaque __maybe_unused, size_t size)
555*eda14cbcSMatt Macy {
556*eda14cbcSMatt Macy 	size_t nbytes = sizeof (struct zstd_kmem) + size;
557*eda14cbcSMatt Macy 	struct zstd_kmem *z = NULL;
558*eda14cbcSMatt Macy 	enum zstd_kmem_type type = ZSTD_KMEM_DEFAULT;
559*eda14cbcSMatt Macy 
560*eda14cbcSMatt Macy 	z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_dctx, nbytes);
561*eda14cbcSMatt Macy 	if (!z) {
562*eda14cbcSMatt Macy 		/* Try harder, decompression shall not fail */
563*eda14cbcSMatt Macy 		z = vmem_alloc(nbytes, KM_SLEEP);
564*eda14cbcSMatt Macy 		if (z) {
565*eda14cbcSMatt Macy 			z->pool = NULL;
566*eda14cbcSMatt Macy 		}
567*eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
568*eda14cbcSMatt Macy 	} else {
569*eda14cbcSMatt Macy 		return ((void*)z + (sizeof (struct zstd_kmem)));
570*eda14cbcSMatt Macy 	}
571*eda14cbcSMatt Macy 
572*eda14cbcSMatt Macy 	/* Fallback if everything fails */
573*eda14cbcSMatt Macy 	if (!z) {
574*eda14cbcSMatt Macy 		/*
575*eda14cbcSMatt Macy 		 * Barrier since we only can handle it in a single thread. All
576*eda14cbcSMatt Macy 		 * other following threads need to wait here until decompression
577*eda14cbcSMatt Macy 		 * is completed. zstd_free will release this barrier later.
578*eda14cbcSMatt Macy 		 */
579*eda14cbcSMatt Macy 		mutex_enter(&zstd_dctx_fallback.barrier);
580*eda14cbcSMatt Macy 
581*eda14cbcSMatt Macy 		z = zstd_dctx_fallback.mem;
582*eda14cbcSMatt Macy 		type = ZSTD_KMEM_DCTX;
583*eda14cbcSMatt Macy 		ZSTDSTAT_BUMP(zstd_stat_alloc_fallback);
584*eda14cbcSMatt Macy 	}
585*eda14cbcSMatt Macy 
586*eda14cbcSMatt Macy 	/* Allocation should always be successful */
587*eda14cbcSMatt Macy 	if (!z) {
588*eda14cbcSMatt Macy 		return (NULL);
589*eda14cbcSMatt Macy 	}
590*eda14cbcSMatt Macy 
591*eda14cbcSMatt Macy 	z->kmem_type = type;
592*eda14cbcSMatt Macy 	z->kmem_size = nbytes;
593*eda14cbcSMatt Macy 
594*eda14cbcSMatt Macy 	return ((void*)z + (sizeof (struct zstd_kmem)));
595*eda14cbcSMatt Macy }
596*eda14cbcSMatt Macy 
597*eda14cbcSMatt Macy /* Free allocated memory by its specific type */
598*eda14cbcSMatt Macy static void
599*eda14cbcSMatt Macy zstd_free(void *opaque __maybe_unused, void *ptr)
600*eda14cbcSMatt Macy {
601*eda14cbcSMatt Macy 	struct zstd_kmem *z = (ptr - sizeof (struct zstd_kmem));
602*eda14cbcSMatt Macy 	enum zstd_kmem_type type;
603*eda14cbcSMatt Macy 
604*eda14cbcSMatt Macy 	ASSERT3U(z->kmem_type, <, ZSTD_KMEM_COUNT);
605*eda14cbcSMatt Macy 	ASSERT3U(z->kmem_type, >, ZSTD_KMEM_UNKNOWN);
606*eda14cbcSMatt Macy 
607*eda14cbcSMatt Macy 	type = z->kmem_type;
608*eda14cbcSMatt Macy 	switch (type) {
609*eda14cbcSMatt Macy 	case ZSTD_KMEM_DEFAULT:
610*eda14cbcSMatt Macy 		vmem_free(z, z->kmem_size);
611*eda14cbcSMatt Macy 		break;
612*eda14cbcSMatt Macy 	case ZSTD_KMEM_POOL:
613*eda14cbcSMatt Macy 		zstd_mempool_free(z);
614*eda14cbcSMatt Macy 		break;
615*eda14cbcSMatt Macy 	case ZSTD_KMEM_DCTX:
616*eda14cbcSMatt Macy 		mutex_exit(&zstd_dctx_fallback.barrier);
617*eda14cbcSMatt Macy 		break;
618*eda14cbcSMatt Macy 	default:
619*eda14cbcSMatt Macy 		break;
620*eda14cbcSMatt Macy 	}
621*eda14cbcSMatt Macy }
622*eda14cbcSMatt Macy 
623*eda14cbcSMatt Macy /* Allocate fallback memory to ensure safe decompression */
624*eda14cbcSMatt Macy static void __init
625*eda14cbcSMatt Macy create_fallback_mem(struct zstd_fallback_mem *mem, size_t size)
626*eda14cbcSMatt Macy {
627*eda14cbcSMatt Macy 	mem->mem_size = size;
628*eda14cbcSMatt Macy 	mem->mem = vmem_zalloc(mem->mem_size, KM_SLEEP);
629*eda14cbcSMatt Macy 	mutex_init(&mem->barrier, NULL, MUTEX_DEFAULT, NULL);
630*eda14cbcSMatt Macy }
631*eda14cbcSMatt Macy 
632*eda14cbcSMatt Macy /* Initialize memory pool barrier mutexes */
633*eda14cbcSMatt Macy static void __init
634*eda14cbcSMatt Macy zstd_mempool_init(void)
635*eda14cbcSMatt Macy {
636*eda14cbcSMatt Macy 	zstd_mempool_cctx = (struct zstd_pool *)
637*eda14cbcSMatt Macy 	    kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
638*eda14cbcSMatt Macy 	zstd_mempool_dctx = (struct zstd_pool *)
639*eda14cbcSMatt Macy 	    kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
640*eda14cbcSMatt Macy 
641*eda14cbcSMatt Macy 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
642*eda14cbcSMatt Macy 		mutex_init(&zstd_mempool_cctx[i].barrier, NULL,
643*eda14cbcSMatt Macy 		    MUTEX_DEFAULT, NULL);
644*eda14cbcSMatt Macy 		mutex_init(&zstd_mempool_dctx[i].barrier, NULL,
645*eda14cbcSMatt Macy 		    MUTEX_DEFAULT, NULL);
646*eda14cbcSMatt Macy 	}
647*eda14cbcSMatt Macy }
648*eda14cbcSMatt Macy 
649*eda14cbcSMatt Macy /* Initialize zstd-related memory handling */
650*eda14cbcSMatt Macy static int __init
651*eda14cbcSMatt Macy zstd_meminit(void)
652*eda14cbcSMatt Macy {
653*eda14cbcSMatt Macy 	zstd_mempool_init();
654*eda14cbcSMatt Macy 
655*eda14cbcSMatt Macy 	/*
656*eda14cbcSMatt Macy 	 * Estimate the size of the fallback decompression context.
657*eda14cbcSMatt Macy 	 * The expected size on x64 with current ZSTD should be about 160 KB.
658*eda14cbcSMatt Macy 	 */
659*eda14cbcSMatt Macy 	create_fallback_mem(&zstd_dctx_fallback,
660*eda14cbcSMatt Macy 	    P2ROUNDUP(ZSTD_estimateDCtxSize() + sizeof (struct zstd_kmem),
661*eda14cbcSMatt Macy 	    PAGESIZE));
662*eda14cbcSMatt Macy 
663*eda14cbcSMatt Macy 	return (0);
664*eda14cbcSMatt Macy }
665*eda14cbcSMatt Macy 
666*eda14cbcSMatt Macy /* Release object from pool and free memory */
667*eda14cbcSMatt Macy static void __exit
668*eda14cbcSMatt Macy release_pool(struct zstd_pool *pool)
669*eda14cbcSMatt Macy {
670*eda14cbcSMatt Macy 	mutex_destroy(&pool->barrier);
671*eda14cbcSMatt Macy 	vmem_free(pool->mem, pool->size);
672*eda14cbcSMatt Macy 	pool->mem = NULL;
673*eda14cbcSMatt Macy 	pool->size = 0;
674*eda14cbcSMatt Macy }
675*eda14cbcSMatt Macy 
676*eda14cbcSMatt Macy /* Release memory pool objects */
677*eda14cbcSMatt Macy static void __exit
678*eda14cbcSMatt Macy zstd_mempool_deinit(void)
679*eda14cbcSMatt Macy {
680*eda14cbcSMatt Macy 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
681*eda14cbcSMatt Macy 		release_pool(&zstd_mempool_cctx[i]);
682*eda14cbcSMatt Macy 		release_pool(&zstd_mempool_dctx[i]);
683*eda14cbcSMatt Macy 	}
684*eda14cbcSMatt Macy 
685*eda14cbcSMatt Macy 	kmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
686*eda14cbcSMatt Macy 	kmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
687*eda14cbcSMatt Macy 	zstd_mempool_dctx = NULL;
688*eda14cbcSMatt Macy 	zstd_mempool_cctx = NULL;
689*eda14cbcSMatt Macy }
690*eda14cbcSMatt Macy 
691*eda14cbcSMatt Macy extern int __init
692*eda14cbcSMatt Macy zstd_init(void)
693*eda14cbcSMatt Macy {
694*eda14cbcSMatt Macy 	/* Set pool size by using maximum sane thread count * 4 */
695*eda14cbcSMatt Macy 	pool_count = (boot_ncpus * 4);
696*eda14cbcSMatt Macy 	zstd_meminit();
697*eda14cbcSMatt Macy 
698*eda14cbcSMatt Macy 	/* Initialize kstat */
699*eda14cbcSMatt Macy 	zstd_ksp = kstat_create("zfs", 0, "zstd", "misc",
700*eda14cbcSMatt Macy 	    KSTAT_TYPE_NAMED, sizeof (zstd_stats) / sizeof (kstat_named_t),
701*eda14cbcSMatt Macy 	    KSTAT_FLAG_VIRTUAL);
702*eda14cbcSMatt Macy 	if (zstd_ksp != NULL) {
703*eda14cbcSMatt Macy 		zstd_ksp->ks_data = &zstd_stats;
704*eda14cbcSMatt Macy 		kstat_install(zstd_ksp);
705*eda14cbcSMatt Macy 	}
706*eda14cbcSMatt Macy 
707*eda14cbcSMatt Macy 	return (0);
708*eda14cbcSMatt Macy }
709*eda14cbcSMatt Macy 
710*eda14cbcSMatt Macy extern void __exit
711*eda14cbcSMatt Macy zstd_fini(void)
712*eda14cbcSMatt Macy {
713*eda14cbcSMatt Macy 	/* Deinitialize kstat */
714*eda14cbcSMatt Macy 	if (zstd_ksp != NULL) {
715*eda14cbcSMatt Macy 		kstat_delete(zstd_ksp);
716*eda14cbcSMatt Macy 		zstd_ksp = NULL;
717*eda14cbcSMatt Macy 	}
718*eda14cbcSMatt Macy 
719*eda14cbcSMatt Macy 	/* Release fallback memory */
720*eda14cbcSMatt Macy 	vmem_free(zstd_dctx_fallback.mem, zstd_dctx_fallback.mem_size);
721*eda14cbcSMatt Macy 	mutex_destroy(&zstd_dctx_fallback.barrier);
722*eda14cbcSMatt Macy 
723*eda14cbcSMatt Macy 	/* Deinit memory pool */
724*eda14cbcSMatt Macy 	zstd_mempool_deinit();
725*eda14cbcSMatt Macy }
726*eda14cbcSMatt Macy 
727*eda14cbcSMatt Macy #if defined(_KERNEL)
728*eda14cbcSMatt Macy module_init(zstd_init);
729*eda14cbcSMatt Macy module_exit(zstd_fini);
730*eda14cbcSMatt Macy 
731*eda14cbcSMatt Macy ZFS_MODULE_DESCRIPTION("ZSTD Compression for ZFS");
732*eda14cbcSMatt Macy ZFS_MODULE_LICENSE("BSD");
733*eda14cbcSMatt Macy ZFS_MODULE_VERSION(ZSTD_VERSION_STRING);
734*eda14cbcSMatt Macy 
735*eda14cbcSMatt Macy EXPORT_SYMBOL(zfs_zstd_compress);
736*eda14cbcSMatt Macy EXPORT_SYMBOL(zfs_zstd_decompress_level);
737*eda14cbcSMatt Macy EXPORT_SYMBOL(zfs_zstd_decompress);
738*eda14cbcSMatt Macy #endif
739