1*61145dc2SMartin Matuska // SPDX-License-Identifier: BSD-3-Clause
2eda14cbcSMatt Macy /*
3eda14cbcSMatt Macy * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
4eda14cbcSMatt Macy *
5eda14cbcSMatt Macy * Redistribution and use in source and binary forms, with or without
6eda14cbcSMatt Macy * modification, are permitted provided that the following conditions are met:
7eda14cbcSMatt Macy *
8eda14cbcSMatt Macy * 1. Redistributions of source code must retain the above copyright notice,
9eda14cbcSMatt Macy * this list of conditions and the following disclaimer.
10eda14cbcSMatt Macy *
11eda14cbcSMatt Macy * 2. Redistributions in binary form must reproduce the above copyright notice,
12eda14cbcSMatt Macy * this list of conditions and the following disclaimer in the documentation
13eda14cbcSMatt Macy * and/or other materials provided with the distribution.
14eda14cbcSMatt Macy *
15eda14cbcSMatt Macy * 3. Neither the name of the copyright holder nor the names of its
16eda14cbcSMatt Macy * contributors may be used to endorse or promote products derived from this
17eda14cbcSMatt Macy * software without specific prior written permission.
18eda14cbcSMatt Macy *
19eda14cbcSMatt Macy * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20eda14cbcSMatt Macy * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21eda14cbcSMatt Macy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22eda14cbcSMatt Macy * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23eda14cbcSMatt Macy * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24eda14cbcSMatt Macy * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25eda14cbcSMatt Macy * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26eda14cbcSMatt Macy * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27eda14cbcSMatt Macy * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28eda14cbcSMatt Macy * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29eda14cbcSMatt Macy * POSSIBILITY OF SUCH DAMAGE.
30eda14cbcSMatt Macy */
31eda14cbcSMatt Macy
32eda14cbcSMatt Macy /*
33eda14cbcSMatt Macy * Copyright (c) 2016-2018, Klara Inc.
34eda14cbcSMatt Macy * Copyright (c) 2016-2018, Allan Jude
35eda14cbcSMatt Macy * Copyright (c) 2018-2020, Sebastian Gottschall
36eda14cbcSMatt Macy * Copyright (c) 2019-2020, Michael Niewöhner
37eda14cbcSMatt Macy * Copyright (c) 2020, The FreeBSD Foundation [1]
38eda14cbcSMatt Macy *
39eda14cbcSMatt Macy * [1] Portions of this software were developed by Allan Jude
40eda14cbcSMatt Macy * under sponsorship from the FreeBSD Foundation.
41eda14cbcSMatt Macy */
42eda14cbcSMatt Macy
43eda14cbcSMatt Macy #include <sys/param.h>
44eda14cbcSMatt Macy #include <sys/sysmacros.h>
45eda14cbcSMatt Macy #include <sys/zfs_context.h>
46eda14cbcSMatt Macy #include <sys/zio_compress.h>
47eda14cbcSMatt Macy #include <sys/spa.h>
48eda14cbcSMatt Macy #include <sys/zstd/zstd.h>
49eda14cbcSMatt Macy
50eda14cbcSMatt Macy #define ZSTD_STATIC_LINKING_ONLY
51eda14cbcSMatt Macy #include "lib/zstd.h"
52c03c5b1cSMartin Matuska #include "lib/common/zstd_errors.h"
53eda14cbcSMatt Macy
54e3aa18adSMartin Matuska #ifndef IN_LIBSA
55be181ee2SMartin Matuska static uint_t zstd_earlyabort_pass = 1;
56e3aa18adSMartin Matuska static int zstd_cutoff_level = ZIO_ZSTD_LEVEL_3;
57e3aa18adSMartin Matuska static unsigned int zstd_abort_size = (128 * 1024);
58e3aa18adSMartin Matuska #endif
59e3aa18adSMartin Matuska
60e2df9bb4SMartin Matuska #ifdef IN_BASE
61e2df9bb4SMartin Matuska int zfs_zstd_decompress_buf(void *, void *, size_t, size_t, int);
62e2df9bb4SMartin Matuska #endif
63e2df9bb4SMartin Matuska
64716fd348SMartin Matuska static kstat_t *zstd_ksp = NULL;
65eda14cbcSMatt Macy
66eda14cbcSMatt Macy typedef struct zstd_stats {
67eda14cbcSMatt Macy kstat_named_t zstd_stat_alloc_fail;
68eda14cbcSMatt Macy kstat_named_t zstd_stat_alloc_fallback;
69eda14cbcSMatt Macy kstat_named_t zstd_stat_com_alloc_fail;
70eda14cbcSMatt Macy kstat_named_t zstd_stat_dec_alloc_fail;
71eda14cbcSMatt Macy kstat_named_t zstd_stat_com_inval;
72eda14cbcSMatt Macy kstat_named_t zstd_stat_dec_inval;
73eda14cbcSMatt Macy kstat_named_t zstd_stat_dec_header_inval;
74eda14cbcSMatt Macy kstat_named_t zstd_stat_com_fail;
75eda14cbcSMatt Macy kstat_named_t zstd_stat_dec_fail;
76e3aa18adSMartin Matuska /*
77e3aa18adSMartin Matuska * LZ4 first-pass early abort verdict
78e3aa18adSMartin Matuska */
79e3aa18adSMartin Matuska kstat_named_t zstd_stat_lz4pass_allowed;
80e3aa18adSMartin Matuska kstat_named_t zstd_stat_lz4pass_rejected;
81e3aa18adSMartin Matuska /*
82e3aa18adSMartin Matuska * zstd-1 second-pass early abort verdict
83e3aa18adSMartin Matuska */
84e3aa18adSMartin Matuska kstat_named_t zstd_stat_zstdpass_allowed;
85e3aa18adSMartin Matuska kstat_named_t zstd_stat_zstdpass_rejected;
86e3aa18adSMartin Matuska /*
87e3aa18adSMartin Matuska * We excluded this from early abort for some reason
88e3aa18adSMartin Matuska */
89e3aa18adSMartin Matuska kstat_named_t zstd_stat_passignored;
90e3aa18adSMartin Matuska kstat_named_t zstd_stat_passignored_size;
914a58b4abSMateusz Guzik kstat_named_t zstd_stat_buffers;
924a58b4abSMateusz Guzik kstat_named_t zstd_stat_size;
93eda14cbcSMatt Macy } zstd_stats_t;
94eda14cbcSMatt Macy
95eda14cbcSMatt Macy static zstd_stats_t zstd_stats = {
96eda14cbcSMatt Macy { "alloc_fail", KSTAT_DATA_UINT64 },
97eda14cbcSMatt Macy { "alloc_fallback", KSTAT_DATA_UINT64 },
98eda14cbcSMatt Macy { "compress_alloc_fail", KSTAT_DATA_UINT64 },
99eda14cbcSMatt Macy { "decompress_alloc_fail", KSTAT_DATA_UINT64 },
100eda14cbcSMatt Macy { "compress_level_invalid", KSTAT_DATA_UINT64 },
101eda14cbcSMatt Macy { "decompress_level_invalid", KSTAT_DATA_UINT64 },
102eda14cbcSMatt Macy { "decompress_header_invalid", KSTAT_DATA_UINT64 },
103eda14cbcSMatt Macy { "compress_failed", KSTAT_DATA_UINT64 },
104eda14cbcSMatt Macy { "decompress_failed", KSTAT_DATA_UINT64 },
105e3aa18adSMartin Matuska { "lz4pass_allowed", KSTAT_DATA_UINT64 },
106e3aa18adSMartin Matuska { "lz4pass_rejected", KSTAT_DATA_UINT64 },
107e3aa18adSMartin Matuska { "zstdpass_allowed", KSTAT_DATA_UINT64 },
108e3aa18adSMartin Matuska { "zstdpass_rejected", KSTAT_DATA_UINT64 },
109e3aa18adSMartin Matuska { "passignored", KSTAT_DATA_UINT64 },
110e3aa18adSMartin Matuska { "passignored_size", KSTAT_DATA_UINT64 },
1114a58b4abSMateusz Guzik { "buffers", KSTAT_DATA_UINT64 },
1124a58b4abSMateusz Guzik { "size", KSTAT_DATA_UINT64 },
113eda14cbcSMatt Macy };
114eda14cbcSMatt Macy
115e3aa18adSMartin Matuska #ifdef _KERNEL
116e3aa18adSMartin Matuska static int
kstat_zstd_update(kstat_t * ksp,int rw)117e3aa18adSMartin Matuska kstat_zstd_update(kstat_t *ksp, int rw)
118e3aa18adSMartin Matuska {
119e3aa18adSMartin Matuska ASSERT(ksp != NULL);
120e3aa18adSMartin Matuska
121e3aa18adSMartin Matuska if (rw == KSTAT_WRITE && ksp == zstd_ksp) {
122e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_alloc_fail);
123e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_alloc_fallback);
124e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_com_alloc_fail);
125e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_dec_alloc_fail);
126e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_com_inval);
127e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_dec_inval);
128e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_dec_header_inval);
129e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_com_fail);
130e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_dec_fail);
131e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_lz4pass_allowed);
132e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_lz4pass_rejected);
133e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_zstdpass_allowed);
134e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_zstdpass_rejected);
135e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_passignored);
136e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_passignored_size);
137e3aa18adSMartin Matuska }
138e3aa18adSMartin Matuska
139e3aa18adSMartin Matuska return (0);
140e3aa18adSMartin Matuska }
141e3aa18adSMartin Matuska #endif
142e3aa18adSMartin Matuska
143eda14cbcSMatt Macy /* Enums describing the allocator type specified by kmem_type in zstd_kmem */
144eda14cbcSMatt Macy enum zstd_kmem_type {
145eda14cbcSMatt Macy ZSTD_KMEM_UNKNOWN = 0,
146eda14cbcSMatt Macy /* Allocation type using kmem_vmalloc */
147eda14cbcSMatt Macy ZSTD_KMEM_DEFAULT,
148eda14cbcSMatt Macy /* Pool based allocation using mempool_alloc */
149eda14cbcSMatt Macy ZSTD_KMEM_POOL,
150eda14cbcSMatt Macy /* Reserved fallback memory for decompression only */
151eda14cbcSMatt Macy ZSTD_KMEM_DCTX,
152eda14cbcSMatt Macy ZSTD_KMEM_COUNT,
153eda14cbcSMatt Macy };
154eda14cbcSMatt Macy
155eda14cbcSMatt Macy /* Structure for pooled memory objects */
156eda14cbcSMatt Macy struct zstd_pool {
157eda14cbcSMatt Macy void *mem;
158eda14cbcSMatt Macy size_t size;
159eda14cbcSMatt Macy kmutex_t barrier;
160eda14cbcSMatt Macy hrtime_t timeout;
161eda14cbcSMatt Macy };
162eda14cbcSMatt Macy
163eda14cbcSMatt Macy /* Global structure for handling memory allocations */
164eda14cbcSMatt Macy struct zstd_kmem {
165eda14cbcSMatt Macy enum zstd_kmem_type kmem_type;
166eda14cbcSMatt Macy size_t kmem_size;
167eda14cbcSMatt Macy struct zstd_pool *pool;
168eda14cbcSMatt Macy };
169eda14cbcSMatt Macy
170eda14cbcSMatt Macy /* Fallback memory structure used for decompression only if memory runs out */
171eda14cbcSMatt Macy struct zstd_fallback_mem {
172eda14cbcSMatt Macy size_t mem_size;
173eda14cbcSMatt Macy void *mem;
174eda14cbcSMatt Macy kmutex_t barrier;
175eda14cbcSMatt Macy };
176eda14cbcSMatt Macy
177eda14cbcSMatt Macy struct zstd_levelmap {
178eda14cbcSMatt Macy int16_t zstd_level;
179eda14cbcSMatt Macy enum zio_zstd_levels level;
180eda14cbcSMatt Macy };
181eda14cbcSMatt Macy
182eda14cbcSMatt Macy /*
183eda14cbcSMatt Macy * ZSTD memory handlers
184eda14cbcSMatt Macy *
185eda14cbcSMatt Macy * For decompression we use a different handler which also provides fallback
186eda14cbcSMatt Macy * memory allocation in case memory runs out.
187eda14cbcSMatt Macy *
188eda14cbcSMatt Macy * The ZSTD handlers were split up for the most simplified implementation.
189eda14cbcSMatt Macy */
1904f0c9b76SWarner Losh #ifndef IN_LIBSA
191eda14cbcSMatt Macy static void *zstd_alloc(void *opaque, size_t size);
1924f0c9b76SWarner Losh #endif
193eda14cbcSMatt Macy static void *zstd_dctx_alloc(void *opaque, size_t size);
194eda14cbcSMatt Macy static void zstd_free(void *opaque, void *ptr);
195eda14cbcSMatt Macy
1964f0c9b76SWarner Losh #ifndef IN_LIBSA
197eda14cbcSMatt Macy /* Compression memory handler */
198eda14cbcSMatt Macy static const ZSTD_customMem zstd_malloc = {
199eda14cbcSMatt Macy zstd_alloc,
200eda14cbcSMatt Macy zstd_free,
201eda14cbcSMatt Macy NULL,
202eda14cbcSMatt Macy };
2034f0c9b76SWarner Losh #endif
204eda14cbcSMatt Macy
205eda14cbcSMatt Macy /* Decompression memory handler */
206eda14cbcSMatt Macy static const ZSTD_customMem zstd_dctx_malloc = {
207eda14cbcSMatt Macy zstd_dctx_alloc,
208eda14cbcSMatt Macy zstd_free,
209eda14cbcSMatt Macy NULL,
210eda14cbcSMatt Macy };
211eda14cbcSMatt Macy
212eda14cbcSMatt Macy /* Level map for converting ZFS internal levels to ZSTD levels and vice versa */
213eda14cbcSMatt Macy static struct zstd_levelmap zstd_levels[] = {
214eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_1, ZIO_ZSTD_LEVEL_1},
215eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_2, ZIO_ZSTD_LEVEL_2},
216eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_3, ZIO_ZSTD_LEVEL_3},
217eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_4, ZIO_ZSTD_LEVEL_4},
218eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_5, ZIO_ZSTD_LEVEL_5},
219eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_6, ZIO_ZSTD_LEVEL_6},
220eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_7, ZIO_ZSTD_LEVEL_7},
221eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_8, ZIO_ZSTD_LEVEL_8},
222eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_9, ZIO_ZSTD_LEVEL_9},
223eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_10, ZIO_ZSTD_LEVEL_10},
224eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_11, ZIO_ZSTD_LEVEL_11},
225eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_12, ZIO_ZSTD_LEVEL_12},
226eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_13, ZIO_ZSTD_LEVEL_13},
227eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_14, ZIO_ZSTD_LEVEL_14},
228eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_15, ZIO_ZSTD_LEVEL_15},
229eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_16, ZIO_ZSTD_LEVEL_16},
230eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_17, ZIO_ZSTD_LEVEL_17},
231eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_18, ZIO_ZSTD_LEVEL_18},
232eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_19, ZIO_ZSTD_LEVEL_19},
233eda14cbcSMatt Macy {-1, ZIO_ZSTD_LEVEL_FAST_1},
234eda14cbcSMatt Macy {-2, ZIO_ZSTD_LEVEL_FAST_2},
235eda14cbcSMatt Macy {-3, ZIO_ZSTD_LEVEL_FAST_3},
236eda14cbcSMatt Macy {-4, ZIO_ZSTD_LEVEL_FAST_4},
237eda14cbcSMatt Macy {-5, ZIO_ZSTD_LEVEL_FAST_5},
238eda14cbcSMatt Macy {-6, ZIO_ZSTD_LEVEL_FAST_6},
239eda14cbcSMatt Macy {-7, ZIO_ZSTD_LEVEL_FAST_7},
240eda14cbcSMatt Macy {-8, ZIO_ZSTD_LEVEL_FAST_8},
241eda14cbcSMatt Macy {-9, ZIO_ZSTD_LEVEL_FAST_9},
242eda14cbcSMatt Macy {-10, ZIO_ZSTD_LEVEL_FAST_10},
243eda14cbcSMatt Macy {-20, ZIO_ZSTD_LEVEL_FAST_20},
244eda14cbcSMatt Macy {-30, ZIO_ZSTD_LEVEL_FAST_30},
245eda14cbcSMatt Macy {-40, ZIO_ZSTD_LEVEL_FAST_40},
246eda14cbcSMatt Macy {-50, ZIO_ZSTD_LEVEL_FAST_50},
247eda14cbcSMatt Macy {-60, ZIO_ZSTD_LEVEL_FAST_60},
248eda14cbcSMatt Macy {-70, ZIO_ZSTD_LEVEL_FAST_70},
249eda14cbcSMatt Macy {-80, ZIO_ZSTD_LEVEL_FAST_80},
250eda14cbcSMatt Macy {-90, ZIO_ZSTD_LEVEL_FAST_90},
251eda14cbcSMatt Macy {-100, ZIO_ZSTD_LEVEL_FAST_100},
252eda14cbcSMatt Macy {-500, ZIO_ZSTD_LEVEL_FAST_500},
253eda14cbcSMatt Macy {-1000, ZIO_ZSTD_LEVEL_FAST_1000},
254eda14cbcSMatt Macy };
255eda14cbcSMatt Macy
256eda14cbcSMatt Macy /*
257eda14cbcSMatt Macy * This variable represents the maximum count of the pool based on the number
258eda14cbcSMatt Macy * of CPUs plus some buffer. We default to cpu count * 4, see init_zstd.
259eda14cbcSMatt Macy */
260eda14cbcSMatt Macy static int pool_count = 16;
261eda14cbcSMatt Macy
262eda14cbcSMatt Macy #define ZSTD_POOL_MAX pool_count
263eda14cbcSMatt Macy #define ZSTD_POOL_TIMEOUT 60 * 2
264eda14cbcSMatt Macy
265eda14cbcSMatt Macy static struct zstd_fallback_mem zstd_dctx_fallback;
266eda14cbcSMatt Macy static struct zstd_pool *zstd_mempool_cctx;
267eda14cbcSMatt Macy static struct zstd_pool *zstd_mempool_dctx;
268eda14cbcSMatt Macy
2692617128aSMartin Matuska /*
2702617128aSMartin Matuska * The library zstd code expects these if ADDRESS_SANITIZER gets defined,
2712617128aSMartin Matuska * and while ASAN does this, KASAN defines that and does not. So to avoid
2722617128aSMartin Matuska * changing the external code, we do this.
2732617128aSMartin Matuska */
274c03c5b1cSMartin Matuska #if defined(ZFS_ASAN_ENABLED)
2752617128aSMartin Matuska #define ADDRESS_SANITIZER 1
2762617128aSMartin Matuska #endif
2772617128aSMartin Matuska #if defined(_KERNEL) && defined(ADDRESS_SANITIZER)
2782617128aSMartin Matuska void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
2792617128aSMartin Matuska void __asan_poison_memory_region(void const volatile *addr, size_t size);
__asan_unpoison_memory_region(void const volatile * addr,size_t size)2802617128aSMartin Matuska void __asan_unpoison_memory_region(void const volatile *addr, size_t size) {};
__asan_poison_memory_region(void const volatile * addr,size_t size)2812617128aSMartin Matuska void __asan_poison_memory_region(void const volatile *addr, size_t size) {};
2822617128aSMartin Matuska #endif
2832617128aSMartin Matuska
2847877fdebSMatt Macy
2857877fdebSMatt Macy static void
zstd_mempool_reap(struct zstd_pool * zstd_mempool)2867877fdebSMatt Macy zstd_mempool_reap(struct zstd_pool *zstd_mempool)
2877877fdebSMatt Macy {
2887877fdebSMatt Macy struct zstd_pool *pool;
2897877fdebSMatt Macy
2907877fdebSMatt Macy if (!zstd_mempool || !ZSTDSTAT(zstd_stat_buffers)) {
2917877fdebSMatt Macy return;
2927877fdebSMatt Macy }
2937877fdebSMatt Macy
2947877fdebSMatt Macy /* free obsolete slots */
2957877fdebSMatt Macy for (int i = 0; i < ZSTD_POOL_MAX; i++) {
2967877fdebSMatt Macy pool = &zstd_mempool[i];
2977877fdebSMatt Macy if (pool->mem && mutex_tryenter(&pool->barrier)) {
2987877fdebSMatt Macy /* Free memory if unused object older than 2 minutes */
2997877fdebSMatt Macy if (pool->mem && gethrestime_sec() > pool->timeout) {
3007877fdebSMatt Macy vmem_free(pool->mem, pool->size);
3017877fdebSMatt Macy ZSTDSTAT_SUB(zstd_stat_buffers, 1);
3027877fdebSMatt Macy ZSTDSTAT_SUB(zstd_stat_size, pool->size);
3037877fdebSMatt Macy pool->mem = NULL;
3047877fdebSMatt Macy pool->size = 0;
3057877fdebSMatt Macy pool->timeout = 0;
3067877fdebSMatt Macy }
3077877fdebSMatt Macy mutex_exit(&pool->barrier);
3087877fdebSMatt Macy }
3097877fdebSMatt Macy }
3107877fdebSMatt Macy }
3117877fdebSMatt Macy
312eda14cbcSMatt Macy /*
313eda14cbcSMatt Macy * Try to get a cached allocated buffer from memory pool or allocate a new one
314eda14cbcSMatt Macy * if necessary. If a object is older than 2 minutes and does not fit the
315eda14cbcSMatt Macy * requested size, it will be released and a new cached entry will be allocated.
316eda14cbcSMatt Macy * If other pooled objects are detected without being used for 2 minutes, they
317eda14cbcSMatt Macy * will be released, too.
318eda14cbcSMatt Macy *
319eda14cbcSMatt Macy * The concept is that high frequency memory allocations of bigger objects are
320eda14cbcSMatt Macy * expensive. So if a lot of work is going on, allocations will be kept for a
321eda14cbcSMatt Macy * while and can be reused in that time frame.
322eda14cbcSMatt Macy *
323eda14cbcSMatt Macy * The scheduled release will be updated every time a object is reused.
324eda14cbcSMatt Macy */
3257877fdebSMatt Macy
326eda14cbcSMatt Macy static void *
zstd_mempool_alloc(struct zstd_pool * zstd_mempool,size_t size)327eda14cbcSMatt Macy zstd_mempool_alloc(struct zstd_pool *zstd_mempool, size_t size)
328eda14cbcSMatt Macy {
329eda14cbcSMatt Macy struct zstd_pool *pool;
330eda14cbcSMatt Macy struct zstd_kmem *mem = NULL;
331eda14cbcSMatt Macy
332eda14cbcSMatt Macy if (!zstd_mempool) {
333eda14cbcSMatt Macy return (NULL);
334eda14cbcSMatt Macy }
335eda14cbcSMatt Macy
336eda14cbcSMatt Macy /* Seek for preallocated memory slot and free obsolete slots */
337eda14cbcSMatt Macy for (int i = 0; i < ZSTD_POOL_MAX; i++) {
338eda14cbcSMatt Macy pool = &zstd_mempool[i];
339eda14cbcSMatt Macy /*
34016038816SMartin Matuska * This lock is simply a marker for a pool object being in use.
341eda14cbcSMatt Macy * If it's already hold, it will be skipped.
342eda14cbcSMatt Macy *
343eda14cbcSMatt Macy * We need to create it before checking it to avoid race
344eda14cbcSMatt Macy * conditions caused by running in a threaded context.
345eda14cbcSMatt Macy *
346eda14cbcSMatt Macy * The lock is later released by zstd_mempool_free.
347eda14cbcSMatt Macy */
348eda14cbcSMatt Macy if (mutex_tryenter(&pool->barrier)) {
349eda14cbcSMatt Macy /*
350eda14cbcSMatt Macy * Check if objects fits the size, if so we take it and
351eda14cbcSMatt Macy * update the timestamp.
352eda14cbcSMatt Macy */
3537877fdebSMatt Macy if (pool->mem && size <= pool->size) {
354eda14cbcSMatt Macy pool->timeout = gethrestime_sec() +
355eda14cbcSMatt Macy ZSTD_POOL_TIMEOUT;
356eda14cbcSMatt Macy mem = pool->mem;
3577877fdebSMatt Macy return (mem);
358eda14cbcSMatt Macy }
359eda14cbcSMatt Macy mutex_exit(&pool->barrier);
360eda14cbcSMatt Macy }
361eda14cbcSMatt Macy }
362eda14cbcSMatt Macy
363eda14cbcSMatt Macy /*
364eda14cbcSMatt Macy * If no preallocated slot was found, try to fill in a new one.
365eda14cbcSMatt Macy *
366eda14cbcSMatt Macy * We run a similar algorithm twice here to avoid pool fragmentation.
367eda14cbcSMatt Macy * The first one may generate holes in the list if objects get released.
368eda14cbcSMatt Macy * We always make sure that these holes get filled instead of adding new
369eda14cbcSMatt Macy * allocations constantly at the end.
370eda14cbcSMatt Macy */
371eda14cbcSMatt Macy for (int i = 0; i < ZSTD_POOL_MAX; i++) {
372eda14cbcSMatt Macy pool = &zstd_mempool[i];
373eda14cbcSMatt Macy if (mutex_tryenter(&pool->barrier)) {
374eda14cbcSMatt Macy /* Object is free, try to allocate new one */
375eda14cbcSMatt Macy if (!pool->mem) {
376eda14cbcSMatt Macy mem = vmem_alloc(size, KM_SLEEP);
3774a58b4abSMateusz Guzik if (mem) {
3784a58b4abSMateusz Guzik ZSTDSTAT_ADD(zstd_stat_buffers, 1);
3794a58b4abSMateusz Guzik ZSTDSTAT_ADD(zstd_stat_size, size);
380eda14cbcSMatt Macy pool->mem = mem;
3814a58b4abSMateusz Guzik pool->size = size;
382eda14cbcSMatt Macy /* Keep track for later release */
383eda14cbcSMatt Macy mem->pool = pool;
384eda14cbcSMatt Macy mem->kmem_type = ZSTD_KMEM_POOL;
385eda14cbcSMatt Macy mem->kmem_size = size;
386eda14cbcSMatt Macy }
387eda14cbcSMatt Macy }
388eda14cbcSMatt Macy
389eda14cbcSMatt Macy if (size <= pool->size) {
390eda14cbcSMatt Macy /* Update timestamp */
391eda14cbcSMatt Macy pool->timeout = gethrestime_sec() +
392eda14cbcSMatt Macy ZSTD_POOL_TIMEOUT;
393eda14cbcSMatt Macy
394eda14cbcSMatt Macy return (pool->mem);
395eda14cbcSMatt Macy }
396eda14cbcSMatt Macy
397eda14cbcSMatt Macy mutex_exit(&pool->barrier);
398eda14cbcSMatt Macy }
399eda14cbcSMatt Macy }
400eda14cbcSMatt Macy
401eda14cbcSMatt Macy /*
402eda14cbcSMatt Macy * If the pool is full or the allocation failed, try lazy allocation
403eda14cbcSMatt Macy * instead.
404eda14cbcSMatt Macy */
405eda14cbcSMatt Macy if (!mem) {
406eda14cbcSMatt Macy mem = vmem_alloc(size, KM_NOSLEEP);
407eda14cbcSMatt Macy if (mem) {
408eda14cbcSMatt Macy mem->pool = NULL;
409eda14cbcSMatt Macy mem->kmem_type = ZSTD_KMEM_DEFAULT;
410eda14cbcSMatt Macy mem->kmem_size = size;
411eda14cbcSMatt Macy }
412eda14cbcSMatt Macy }
413eda14cbcSMatt Macy
414eda14cbcSMatt Macy return (mem);
415eda14cbcSMatt Macy }
416eda14cbcSMatt Macy
417eda14cbcSMatt Macy /* Mark object as released by releasing the barrier mutex */
418eda14cbcSMatt Macy static void
zstd_mempool_free(struct zstd_kmem * z)419eda14cbcSMatt Macy zstd_mempool_free(struct zstd_kmem *z)
420eda14cbcSMatt Macy {
421eda14cbcSMatt Macy mutex_exit(&z->pool->barrier);
422eda14cbcSMatt Macy }
423eda14cbcSMatt Macy
424eda14cbcSMatt Macy /* Convert ZFS internal enum to ZSTD level */
425eda14cbcSMatt Macy static int
zstd_enum_to_level(enum zio_zstd_levels level,int16_t * zstd_level)426eda14cbcSMatt Macy zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
427eda14cbcSMatt Macy {
428eda14cbcSMatt Macy if (level > 0 && level <= ZIO_ZSTD_LEVEL_19) {
429eda14cbcSMatt Macy *zstd_level = zstd_levels[level - 1].zstd_level;
430eda14cbcSMatt Macy return (0);
431eda14cbcSMatt Macy }
432eda14cbcSMatt Macy if (level >= ZIO_ZSTD_LEVEL_FAST_1 &&
433eda14cbcSMatt Macy level <= ZIO_ZSTD_LEVEL_FAST_1000) {
434eda14cbcSMatt Macy *zstd_level = zstd_levels[level - ZIO_ZSTD_LEVEL_FAST_1
435eda14cbcSMatt Macy + ZIO_ZSTD_LEVEL_19].zstd_level;
436eda14cbcSMatt Macy return (0);
437eda14cbcSMatt Macy }
438eda14cbcSMatt Macy
439eda14cbcSMatt Macy /* Invalid/unknown zfs compression enum - this should never happen. */
440eda14cbcSMatt Macy return (1);
441eda14cbcSMatt Macy }
442eda14cbcSMatt Macy
443e3aa18adSMartin Matuska #ifndef IN_LIBSA
444e2df9bb4SMartin Matuska static size_t
zfs_zstd_compress_wrap(void * s_start,void * d_start,size_t s_len,size_t d_len,int level)445e3aa18adSMartin Matuska zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len, size_t d_len,
446e3aa18adSMartin Matuska int level)
447e3aa18adSMartin Matuska {
448e3aa18adSMartin Matuska int16_t zstd_level;
449e3aa18adSMartin Matuska if (zstd_enum_to_level(level, &zstd_level)) {
450e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_com_inval);
451e3aa18adSMartin Matuska return (s_len);
452e3aa18adSMartin Matuska }
453e3aa18adSMartin Matuska /*
454e3aa18adSMartin Matuska * A zstd early abort heuristic.
455e3aa18adSMartin Matuska *
456e3aa18adSMartin Matuska * - Zeroth, if this is <= zstd-3, or < zstd_abort_size (currently
457e3aa18adSMartin Matuska * 128k), don't try any of this, just go.
458e3aa18adSMartin Matuska * (because experimentally that was a reasonable cutoff for a perf win
459e3aa18adSMartin Matuska * with tiny ratio change)
460e3aa18adSMartin Matuska * - First, we try LZ4 compression, and if it doesn't early abort, we
461e3aa18adSMartin Matuska * jump directly to whatever compression level we intended to try.
462e3aa18adSMartin Matuska * - Second, we try zstd-1 - if that errors out (usually, but not
463e3aa18adSMartin Matuska * exclusively, if it would overflow), we give up early.
464e3aa18adSMartin Matuska *
465e3aa18adSMartin Matuska * If it works, instead we go on and compress anyway.
466e3aa18adSMartin Matuska *
467e3aa18adSMartin Matuska * Why two passes? LZ4 alone gets you a lot of the way, but on highly
468e3aa18adSMartin Matuska * compressible data, it was losing up to 8.5% of the compressed
469e3aa18adSMartin Matuska * savings versus no early abort, and all the zstd-fast levels are
470e3aa18adSMartin Matuska * worse indications on their own than LZ4, and don't improve the LZ4
471e3aa18adSMartin Matuska * pass noticably if stacked like this.
472e3aa18adSMartin Matuska */
473e3aa18adSMartin Matuska size_t actual_abort_size = zstd_abort_size;
474e3aa18adSMartin Matuska if (zstd_earlyabort_pass > 0 && zstd_level >= zstd_cutoff_level &&
475e3aa18adSMartin Matuska s_len >= actual_abort_size) {
476e3aa18adSMartin Matuska int pass_len = 1;
477e2df9bb4SMartin Matuska pass_len = zfs_lz4_compress(s_start, d_start, s_len, d_len, 0);
478e3aa18adSMartin Matuska if (pass_len < d_len) {
479e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_lz4pass_allowed);
480e3aa18adSMartin Matuska goto keep_trying;
481e3aa18adSMartin Matuska }
482e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_lz4pass_rejected);
483e3aa18adSMartin Matuska
484e3aa18adSMartin Matuska pass_len = zfs_zstd_compress(s_start, d_start, s_len, d_len,
485e3aa18adSMartin Matuska ZIO_ZSTD_LEVEL_1);
486e3aa18adSMartin Matuska if (pass_len == s_len || pass_len <= 0 || pass_len > d_len) {
487e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_zstdpass_rejected);
488e3aa18adSMartin Matuska return (s_len);
489e3aa18adSMartin Matuska }
490e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_zstdpass_allowed);
491e3aa18adSMartin Matuska } else {
492e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_passignored);
493e3aa18adSMartin Matuska if (s_len < actual_abort_size) {
494e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_passignored_size);
495e3aa18adSMartin Matuska }
496e3aa18adSMartin Matuska }
497e3aa18adSMartin Matuska keep_trying:
498e3aa18adSMartin Matuska return (zfs_zstd_compress(s_start, d_start, s_len, d_len, level));
499e3aa18adSMartin Matuska
500e3aa18adSMartin Matuska }
50121b492edSMartin Matuska
502eda14cbcSMatt Macy /* Compress block using zstd */
503e2df9bb4SMartin Matuska static size_t
zfs_zstd_compress_impl(void * s_start,void * d_start,size_t s_len,size_t d_len,int level)504e2df9bb4SMartin Matuska zfs_zstd_compress_impl(void *s_start, void *d_start, size_t s_len, size_t d_len,
505eda14cbcSMatt Macy int level)
506eda14cbcSMatt Macy {
507eda14cbcSMatt Macy size_t c_len;
508eda14cbcSMatt Macy int16_t zstd_level;
509eda14cbcSMatt Macy zfs_zstdhdr_t *hdr;
510eda14cbcSMatt Macy ZSTD_CCtx *cctx;
511eda14cbcSMatt Macy
512eda14cbcSMatt Macy hdr = (zfs_zstdhdr_t *)d_start;
513eda14cbcSMatt Macy
514eda14cbcSMatt Macy /* Skip compression if the specified level is invalid */
515eda14cbcSMatt Macy if (zstd_enum_to_level(level, &zstd_level)) {
516eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_com_inval);
517eda14cbcSMatt Macy return (s_len);
518eda14cbcSMatt Macy }
519eda14cbcSMatt Macy
520eda14cbcSMatt Macy ASSERT3U(d_len, >=, sizeof (*hdr));
521eda14cbcSMatt Macy ASSERT3U(d_len, <=, s_len);
522eda14cbcSMatt Macy ASSERT3U(zstd_level, !=, 0);
523eda14cbcSMatt Macy
524eda14cbcSMatt Macy cctx = ZSTD_createCCtx_advanced(zstd_malloc);
525eda14cbcSMatt Macy
526eda14cbcSMatt Macy /*
527eda14cbcSMatt Macy * Out of kernel memory, gently fall through - this will disable
528eda14cbcSMatt Macy * compression in zio_compress_data
529eda14cbcSMatt Macy */
530eda14cbcSMatt Macy if (!cctx) {
531eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_com_alloc_fail);
532eda14cbcSMatt Macy return (s_len);
533eda14cbcSMatt Macy }
534eda14cbcSMatt Macy
535eda14cbcSMatt Macy /* Set the compression level */
536eda14cbcSMatt Macy ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstd_level);
537eda14cbcSMatt Macy
538eda14cbcSMatt Macy /* Use the "magicless" zstd header which saves us 4 header bytes */
539eda14cbcSMatt Macy ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless);
540eda14cbcSMatt Macy
541eda14cbcSMatt Macy /*
542eda14cbcSMatt Macy * Disable redundant checksum calculation and content size storage since
543eda14cbcSMatt Macy * this is already done by ZFS itself.
544eda14cbcSMatt Macy */
545eda14cbcSMatt Macy ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0);
546eda14cbcSMatt Macy ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0);
547eda14cbcSMatt Macy
548eda14cbcSMatt Macy c_len = ZSTD_compress2(cctx,
549eda14cbcSMatt Macy hdr->data,
550eda14cbcSMatt Macy d_len - sizeof (*hdr),
551eda14cbcSMatt Macy s_start, s_len);
552eda14cbcSMatt Macy
553eda14cbcSMatt Macy ZSTD_freeCCtx(cctx);
554eda14cbcSMatt Macy
555eda14cbcSMatt Macy /* Error in the compression routine, disable compression. */
556eda14cbcSMatt Macy if (ZSTD_isError(c_len)) {
557eda14cbcSMatt Macy /*
558eda14cbcSMatt Macy * If we are aborting the compression because the saves are
559eda14cbcSMatt Macy * too small, that is not a failure. Everything else is a
560eda14cbcSMatt Macy * failure, so increment the compression failure counter.
561eda14cbcSMatt Macy */
562e3aa18adSMartin Matuska int err = ZSTD_getErrorCode(c_len);
563e3aa18adSMartin Matuska if (err != ZSTD_error_dstSize_tooSmall) {
564eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_com_fail);
565e3aa18adSMartin Matuska dprintf("Error: %s", ZSTD_getErrorString(err));
566eda14cbcSMatt Macy }
567eda14cbcSMatt Macy return (s_len);
568eda14cbcSMatt Macy }
569eda14cbcSMatt Macy
570eda14cbcSMatt Macy /*
571eda14cbcSMatt Macy * Encode the compressed buffer size at the start. We'll need this in
572eda14cbcSMatt Macy * decompression to counter the effects of padding which might be added
573eda14cbcSMatt Macy * to the compressed buffer and which, if unhandled, would confuse the
574eda14cbcSMatt Macy * hell out of our decompression function.
575eda14cbcSMatt Macy */
576eda14cbcSMatt Macy hdr->c_len = BE_32(c_len);
577eda14cbcSMatt Macy
578eda14cbcSMatt Macy /*
579eda14cbcSMatt Macy * Check version for overflow.
580eda14cbcSMatt Macy * The limit of 24 bits must not be exceeded. This allows a maximum
581eda14cbcSMatt Macy * version 1677.72.15 which we don't expect to be ever reached.
582eda14cbcSMatt Macy */
583eda14cbcSMatt Macy ASSERT3U(ZSTD_VERSION_NUMBER, <=, 0xFFFFFF);
584eda14cbcSMatt Macy
585eda14cbcSMatt Macy /*
586eda14cbcSMatt Macy * Encode the compression level as well. We may need to know the
587eda14cbcSMatt Macy * original compression level if compressed_arc is disabled, to match
588eda14cbcSMatt Macy * the compression settings to write this block to the L2ARC.
589eda14cbcSMatt Macy *
590eda14cbcSMatt Macy * Encode the actual level, so if the enum changes in the future, we
591eda14cbcSMatt Macy * will be compatible.
592eda14cbcSMatt Macy *
593eda14cbcSMatt Macy * The upper 24 bits store the ZSTD version to be able to provide
594eda14cbcSMatt Macy * future compatibility, since new versions might enhance the
595eda14cbcSMatt Macy * compression algorithm in a way, where the compressed data will
596eda14cbcSMatt Macy * change.
597eda14cbcSMatt Macy *
598eda14cbcSMatt Macy * As soon as such incompatibility occurs, handling code needs to be
599eda14cbcSMatt Macy * added, differentiating between the versions.
600eda14cbcSMatt Macy */
60121b492edSMartin Matuska zfs_set_hdrversion(hdr, ZSTD_VERSION_NUMBER);
60221b492edSMartin Matuska zfs_set_hdrlevel(hdr, level);
603eda14cbcSMatt Macy hdr->raw_version_level = BE_32(hdr->raw_version_level);
604eda14cbcSMatt Macy
605eda14cbcSMatt Macy return (c_len + sizeof (*hdr));
606eda14cbcSMatt Macy }
607e2df9bb4SMartin Matuska
608e2df9bb4SMartin Matuska static size_t
zfs_zstd_compress_buf(void * s_start,void * d_start,size_t s_len,size_t d_len,int level)609e2df9bb4SMartin Matuska zfs_zstd_compress_buf(void *s_start, void *d_start, size_t s_len, size_t d_len,
610e2df9bb4SMartin Matuska int level)
611e2df9bb4SMartin Matuska {
612e2df9bb4SMartin Matuska int16_t zstd_level;
613e2df9bb4SMartin Matuska if (zstd_enum_to_level(level, &zstd_level)) {
614e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_com_inval);
615e2df9bb4SMartin Matuska return (s_len);
616e2df9bb4SMartin Matuska }
617e2df9bb4SMartin Matuska /*
618e2df9bb4SMartin Matuska * A zstd early abort heuristic.
619e2df9bb4SMartin Matuska *
620e2df9bb4SMartin Matuska * - Zeroth, if this is <= zstd-3, or < zstd_abort_size (currently
621e2df9bb4SMartin Matuska * 128k), don't try any of this, just go.
622e2df9bb4SMartin Matuska * (because experimentally that was a reasonable cutoff for a perf win
623e2df9bb4SMartin Matuska * with tiny ratio change)
624e2df9bb4SMartin Matuska * - First, we try LZ4 compression, and if it doesn't early abort, we
625e2df9bb4SMartin Matuska * jump directly to whatever compression level we intended to try.
626e2df9bb4SMartin Matuska * - Second, we try zstd-1 - if that errors out (usually, but not
627e2df9bb4SMartin Matuska * exclusively, if it would overflow), we give up early.
628e2df9bb4SMartin Matuska *
629e2df9bb4SMartin Matuska * If it works, instead we go on and compress anyway.
630e2df9bb4SMartin Matuska *
631e2df9bb4SMartin Matuska * Why two passes? LZ4 alone gets you a lot of the way, but on highly
632e2df9bb4SMartin Matuska * compressible data, it was losing up to 8.5% of the compressed
633e2df9bb4SMartin Matuska * savings versus no early abort, and all the zstd-fast levels are
634e2df9bb4SMartin Matuska * worse indications on their own than LZ4, and don't improve the LZ4
635e2df9bb4SMartin Matuska * pass noticably if stacked like this.
636e2df9bb4SMartin Matuska */
637e2df9bb4SMartin Matuska size_t actual_abort_size = zstd_abort_size;
638e2df9bb4SMartin Matuska if (zstd_earlyabort_pass > 0 && zstd_level >= zstd_cutoff_level &&
639e2df9bb4SMartin Matuska s_len >= actual_abort_size) {
640e2df9bb4SMartin Matuska int pass_len = 1;
641e2df9bb4SMartin Matuska abd_t sabd, dabd;
642e2df9bb4SMartin Matuska abd_get_from_buf_struct(&sabd, s_start, s_len);
643e2df9bb4SMartin Matuska abd_get_from_buf_struct(&dabd, d_start, d_len);
644e2df9bb4SMartin Matuska pass_len = zfs_lz4_compress(&sabd, &dabd, s_len, d_len, 0);
645e2df9bb4SMartin Matuska abd_free(&dabd);
646e2df9bb4SMartin Matuska abd_free(&sabd);
647e2df9bb4SMartin Matuska if (pass_len < d_len) {
648e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_lz4pass_allowed);
649e2df9bb4SMartin Matuska goto keep_trying;
650e2df9bb4SMartin Matuska }
651e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_lz4pass_rejected);
652e2df9bb4SMartin Matuska
653e2df9bb4SMartin Matuska pass_len = zfs_zstd_compress_impl(s_start, d_start, s_len,
654e2df9bb4SMartin Matuska d_len, ZIO_ZSTD_LEVEL_1);
655e2df9bb4SMartin Matuska if (pass_len == s_len || pass_len <= 0 || pass_len > d_len) {
656e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_zstdpass_rejected);
657e2df9bb4SMartin Matuska return (s_len);
658e2df9bb4SMartin Matuska }
659e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_zstdpass_allowed);
660e2df9bb4SMartin Matuska } else {
661e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_passignored);
662e2df9bb4SMartin Matuska if (s_len < actual_abort_size) {
663e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_passignored_size);
664e2df9bb4SMartin Matuska }
665e2df9bb4SMartin Matuska }
666e2df9bb4SMartin Matuska keep_trying:
667e2df9bb4SMartin Matuska return (zfs_zstd_compress_impl(s_start, d_start, s_len, d_len, level));
668e2df9bb4SMartin Matuska
669e2df9bb4SMartin Matuska }
6704f0c9b76SWarner Losh #endif
671eda14cbcSMatt Macy
672eda14cbcSMatt Macy /* Decompress block using zstd and return its stored level */
673e2df9bb4SMartin Matuska static int
zfs_zstd_decompress_level_buf(void * s_start,void * d_start,size_t s_len,size_t d_len,uint8_t * level)674e2df9bb4SMartin Matuska zfs_zstd_decompress_level_buf(void *s_start, void *d_start, size_t s_len,
675eda14cbcSMatt Macy size_t d_len, uint8_t *level)
676eda14cbcSMatt Macy {
677eda14cbcSMatt Macy ZSTD_DCtx *dctx;
678eda14cbcSMatt Macy size_t result;
679eda14cbcSMatt Macy int16_t zstd_level;
680eda14cbcSMatt Macy uint32_t c_len;
681eda14cbcSMatt Macy const zfs_zstdhdr_t *hdr;
682eda14cbcSMatt Macy zfs_zstdhdr_t hdr_copy;
683eda14cbcSMatt Macy
684eda14cbcSMatt Macy hdr = (const zfs_zstdhdr_t *)s_start;
685eda14cbcSMatt Macy c_len = BE_32(hdr->c_len);
686eda14cbcSMatt Macy
687eda14cbcSMatt Macy /*
688eda14cbcSMatt Macy * Make a copy instead of directly converting the header, since we must
689eda14cbcSMatt Macy * not modify the original data that may be used again later.
690eda14cbcSMatt Macy */
691eda14cbcSMatt Macy hdr_copy.raw_version_level = BE_32(hdr->raw_version_level);
69221b492edSMartin Matuska uint8_t curlevel = zfs_get_hdrlevel(&hdr_copy);
693eda14cbcSMatt Macy
694eda14cbcSMatt Macy /*
695eda14cbcSMatt Macy * NOTE: We ignore the ZSTD version for now. As soon as any
69616038816SMartin Matuska * incompatibility occurs, it has to be handled accordingly.
697eda14cbcSMatt Macy * The version can be accessed via `hdr_copy.version`.
698eda14cbcSMatt Macy */
699eda14cbcSMatt Macy
700eda14cbcSMatt Macy /*
701eda14cbcSMatt Macy * Convert and check the level
702eda14cbcSMatt Macy * An invalid level is a strong indicator for data corruption! In such
703eda14cbcSMatt Macy * case return an error so the upper layers can try to fix it.
704eda14cbcSMatt Macy */
70521b492edSMartin Matuska if (zstd_enum_to_level(curlevel, &zstd_level)) {
706eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_dec_inval);
707eda14cbcSMatt Macy return (1);
708eda14cbcSMatt Macy }
709eda14cbcSMatt Macy
710eda14cbcSMatt Macy ASSERT3U(d_len, >=, s_len);
71121b492edSMartin Matuska ASSERT3U(curlevel, !=, ZIO_COMPLEVEL_INHERIT);
712eda14cbcSMatt Macy
713eda14cbcSMatt Macy /* Invalid compressed buffer size encoded at start */
714eda14cbcSMatt Macy if (c_len + sizeof (*hdr) > s_len) {
715eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_dec_header_inval);
716eda14cbcSMatt Macy return (1);
717eda14cbcSMatt Macy }
718eda14cbcSMatt Macy
719eda14cbcSMatt Macy dctx = ZSTD_createDCtx_advanced(zstd_dctx_malloc);
720eda14cbcSMatt Macy if (!dctx) {
721eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_dec_alloc_fail);
722eda14cbcSMatt Macy return (1);
723eda14cbcSMatt Macy }
724eda14cbcSMatt Macy
725eda14cbcSMatt Macy /* Set header type to "magicless" */
726eda14cbcSMatt Macy ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless);
727eda14cbcSMatt Macy
728eda14cbcSMatt Macy /* Decompress the data and release the context */
729eda14cbcSMatt Macy result = ZSTD_decompressDCtx(dctx, d_start, d_len, hdr->data, c_len);
730eda14cbcSMatt Macy ZSTD_freeDCtx(dctx);
731eda14cbcSMatt Macy
732eda14cbcSMatt Macy /*
733eda14cbcSMatt Macy * Returns 0 on success (decompression function returned non-negative)
734eda14cbcSMatt Macy * and non-zero on failure (decompression function returned negative.
735eda14cbcSMatt Macy */
736eda14cbcSMatt Macy if (ZSTD_isError(result)) {
737eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_dec_fail);
738eda14cbcSMatt Macy return (1);
739eda14cbcSMatt Macy }
740eda14cbcSMatt Macy
741eda14cbcSMatt Macy if (level) {
74221b492edSMartin Matuska *level = curlevel;
743eda14cbcSMatt Macy }
744eda14cbcSMatt Macy
745eda14cbcSMatt Macy return (0);
746eda14cbcSMatt Macy }
747eda14cbcSMatt Macy
748eda14cbcSMatt Macy /* Decompress datablock using zstd */
749e2df9bb4SMartin Matuska #ifdef IN_BASE
750eda14cbcSMatt Macy int
zfs_zstd_decompress_buf(void * s_start,void * d_start,size_t s_len,size_t d_len,int level __maybe_unused)751e2df9bb4SMartin Matuska zfs_zstd_decompress_buf(void *s_start, void *d_start, size_t s_len,
752e2df9bb4SMartin Matuska size_t d_len, int level __maybe_unused)
753eda14cbcSMatt Macy {
754eda14cbcSMatt Macy
755e2df9bb4SMartin Matuska return (zfs_zstd_decompress_level_buf(s_start, d_start, s_len, d_len,
756eda14cbcSMatt Macy NULL));
757eda14cbcSMatt Macy }
758e2df9bb4SMartin Matuska #else
759e2df9bb4SMartin Matuska static int
zfs_zstd_decompress_buf(void * s_start,void * d_start,size_t s_len,size_t d_len,int level __maybe_unused)760e2df9bb4SMartin Matuska zfs_zstd_decompress_buf(void *s_start, void *d_start, size_t s_len,
761e2df9bb4SMartin Matuska size_t d_len, int level __maybe_unused)
762e2df9bb4SMartin Matuska {
763e2df9bb4SMartin Matuska
764e2df9bb4SMartin Matuska return (zfs_zstd_decompress_level_buf(s_start, d_start, s_len, d_len,
765e2df9bb4SMartin Matuska NULL));
766e2df9bb4SMartin Matuska }
767e2df9bb4SMartin Matuska #endif
768eda14cbcSMatt Macy
7694f0c9b76SWarner Losh #ifndef IN_LIBSA
770e2df9bb4SMartin Matuska ZFS_COMPRESS_WRAP_DECL(zfs_zstd_compress)
ZFS_DECOMPRESS_WRAP_DECL(zfs_zstd_decompress)771e2df9bb4SMartin Matuska ZFS_DECOMPRESS_WRAP_DECL(zfs_zstd_decompress)
772e2df9bb4SMartin Matuska ZFS_DECOMPRESS_LEVEL_WRAP_DECL(zfs_zstd_decompress_level)
773e2df9bb4SMartin Matuska
774eda14cbcSMatt Macy /* Allocator for zstd compression context using mempool_allocator */
775eda14cbcSMatt Macy static void *
776eda14cbcSMatt Macy zstd_alloc(void *opaque __maybe_unused, size_t size)
777eda14cbcSMatt Macy {
778eda14cbcSMatt Macy size_t nbytes = sizeof (struct zstd_kmem) + size;
779eda14cbcSMatt Macy struct zstd_kmem *z = NULL;
780eda14cbcSMatt Macy
781eda14cbcSMatt Macy z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_cctx, nbytes);
782eda14cbcSMatt Macy
783eda14cbcSMatt Macy if (!z) {
784eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
785eda14cbcSMatt Macy return (NULL);
786eda14cbcSMatt Macy }
787eda14cbcSMatt Macy
788eda14cbcSMatt Macy return ((void*)z + (sizeof (struct zstd_kmem)));
789eda14cbcSMatt Macy }
790eda14cbcSMatt Macy
791e2df9bb4SMartin Matuska #endif
792eda14cbcSMatt Macy /*
793eda14cbcSMatt Macy * Allocator for zstd decompression context using mempool_allocator with
794eda14cbcSMatt Macy * fallback to reserved memory if allocation fails
795eda14cbcSMatt Macy */
796eda14cbcSMatt Macy static void *
zstd_dctx_alloc(void * opaque __maybe_unused,size_t size)797eda14cbcSMatt Macy zstd_dctx_alloc(void *opaque __maybe_unused, size_t size)
798eda14cbcSMatt Macy {
799eda14cbcSMatt Macy size_t nbytes = sizeof (struct zstd_kmem) + size;
800eda14cbcSMatt Macy struct zstd_kmem *z = NULL;
801eda14cbcSMatt Macy enum zstd_kmem_type type = ZSTD_KMEM_DEFAULT;
802eda14cbcSMatt Macy
803eda14cbcSMatt Macy z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_dctx, nbytes);
804eda14cbcSMatt Macy if (!z) {
805eda14cbcSMatt Macy /* Try harder, decompression shall not fail */
806eda14cbcSMatt Macy z = vmem_alloc(nbytes, KM_SLEEP);
807eda14cbcSMatt Macy if (z) {
808eda14cbcSMatt Macy z->pool = NULL;
809eda14cbcSMatt Macy }
810eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
811eda14cbcSMatt Macy } else {
812eda14cbcSMatt Macy return ((void*)z + (sizeof (struct zstd_kmem)));
813eda14cbcSMatt Macy }
814eda14cbcSMatt Macy
815eda14cbcSMatt Macy /* Fallback if everything fails */
816eda14cbcSMatt Macy if (!z) {
817eda14cbcSMatt Macy /*
818eda14cbcSMatt Macy * Barrier since we only can handle it in a single thread. All
819eda14cbcSMatt Macy * other following threads need to wait here until decompression
820eda14cbcSMatt Macy * is completed. zstd_free will release this barrier later.
821eda14cbcSMatt Macy */
822eda14cbcSMatt Macy mutex_enter(&zstd_dctx_fallback.barrier);
823eda14cbcSMatt Macy
824eda14cbcSMatt Macy z = zstd_dctx_fallback.mem;
825eda14cbcSMatt Macy type = ZSTD_KMEM_DCTX;
826eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_alloc_fallback);
827eda14cbcSMatt Macy }
828eda14cbcSMatt Macy
829eda14cbcSMatt Macy /* Allocation should always be successful */
830eda14cbcSMatt Macy if (!z) {
831eda14cbcSMatt Macy return (NULL);
832eda14cbcSMatt Macy }
833eda14cbcSMatt Macy
834eda14cbcSMatt Macy z->kmem_type = type;
835eda14cbcSMatt Macy z->kmem_size = nbytes;
836eda14cbcSMatt Macy
837eda14cbcSMatt Macy return ((void*)z + (sizeof (struct zstd_kmem)));
838eda14cbcSMatt Macy }
839eda14cbcSMatt Macy
840eda14cbcSMatt Macy /* Free allocated memory by its specific type */
841eda14cbcSMatt Macy static void
zstd_free(void * opaque __maybe_unused,void * ptr)842eda14cbcSMatt Macy zstd_free(void *opaque __maybe_unused, void *ptr)
843eda14cbcSMatt Macy {
844eda14cbcSMatt Macy struct zstd_kmem *z = (ptr - sizeof (struct zstd_kmem));
845eda14cbcSMatt Macy enum zstd_kmem_type type;
846eda14cbcSMatt Macy
847eda14cbcSMatt Macy ASSERT3U(z->kmem_type, <, ZSTD_KMEM_COUNT);
848eda14cbcSMatt Macy ASSERT3U(z->kmem_type, >, ZSTD_KMEM_UNKNOWN);
849eda14cbcSMatt Macy
850eda14cbcSMatt Macy type = z->kmem_type;
851eda14cbcSMatt Macy switch (type) {
852eda14cbcSMatt Macy case ZSTD_KMEM_DEFAULT:
853eda14cbcSMatt Macy vmem_free(z, z->kmem_size);
854eda14cbcSMatt Macy break;
855eda14cbcSMatt Macy case ZSTD_KMEM_POOL:
856eda14cbcSMatt Macy zstd_mempool_free(z);
857eda14cbcSMatt Macy break;
858eda14cbcSMatt Macy case ZSTD_KMEM_DCTX:
859eda14cbcSMatt Macy mutex_exit(&zstd_dctx_fallback.barrier);
860eda14cbcSMatt Macy break;
861eda14cbcSMatt Macy default:
862eda14cbcSMatt Macy break;
863eda14cbcSMatt Macy }
864eda14cbcSMatt Macy }
865eda14cbcSMatt Macy
866eda14cbcSMatt Macy /* Allocate fallback memory to ensure safe decompression */
867eda14cbcSMatt Macy static void __init
create_fallback_mem(struct zstd_fallback_mem * mem,size_t size)868eda14cbcSMatt Macy create_fallback_mem(struct zstd_fallback_mem *mem, size_t size)
869eda14cbcSMatt Macy {
870eda14cbcSMatt Macy mem->mem_size = size;
871eda14cbcSMatt Macy mem->mem = vmem_zalloc(mem->mem_size, KM_SLEEP);
872eda14cbcSMatt Macy mutex_init(&mem->barrier, NULL, MUTEX_DEFAULT, NULL);
873eda14cbcSMatt Macy }
874eda14cbcSMatt Macy
875eda14cbcSMatt Macy /* Initialize memory pool barrier mutexes */
876eda14cbcSMatt Macy static void __init
zstd_mempool_init(void)877eda14cbcSMatt Macy zstd_mempool_init(void)
878eda14cbcSMatt Macy {
87915f0b8c3SMartin Matuska zstd_mempool_cctx =
880eda14cbcSMatt Macy kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
88115f0b8c3SMartin Matuska zstd_mempool_dctx =
882eda14cbcSMatt Macy kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
883eda14cbcSMatt Macy
884eda14cbcSMatt Macy for (int i = 0; i < ZSTD_POOL_MAX; i++) {
885eda14cbcSMatt Macy mutex_init(&zstd_mempool_cctx[i].barrier, NULL,
886eda14cbcSMatt Macy MUTEX_DEFAULT, NULL);
887eda14cbcSMatt Macy mutex_init(&zstd_mempool_dctx[i].barrier, NULL,
888eda14cbcSMatt Macy MUTEX_DEFAULT, NULL);
889eda14cbcSMatt Macy }
890eda14cbcSMatt Macy }
891eda14cbcSMatt Macy
892eda14cbcSMatt Macy /* Initialize zstd-related memory handling */
893eda14cbcSMatt Macy static int __init
zstd_meminit(void)894eda14cbcSMatt Macy zstd_meminit(void)
895eda14cbcSMatt Macy {
896eda14cbcSMatt Macy zstd_mempool_init();
897eda14cbcSMatt Macy
898eda14cbcSMatt Macy /*
899eda14cbcSMatt Macy * Estimate the size of the fallback decompression context.
900eda14cbcSMatt Macy * The expected size on x64 with current ZSTD should be about 160 KB.
901eda14cbcSMatt Macy */
902eda14cbcSMatt Macy create_fallback_mem(&zstd_dctx_fallback,
903eda14cbcSMatt Macy P2ROUNDUP(ZSTD_estimateDCtxSize() + sizeof (struct zstd_kmem),
904eda14cbcSMatt Macy PAGESIZE));
905eda14cbcSMatt Macy
906eda14cbcSMatt Macy return (0);
907eda14cbcSMatt Macy }
908eda14cbcSMatt Macy
909eda14cbcSMatt Macy /* Release object from pool and free memory */
910716fd348SMartin Matuska static void
release_pool(struct zstd_pool * pool)911eda14cbcSMatt Macy release_pool(struct zstd_pool *pool)
912eda14cbcSMatt Macy {
913eda14cbcSMatt Macy mutex_destroy(&pool->barrier);
914eda14cbcSMatt Macy vmem_free(pool->mem, pool->size);
915eda14cbcSMatt Macy pool->mem = NULL;
916eda14cbcSMatt Macy pool->size = 0;
917eda14cbcSMatt Macy }
918eda14cbcSMatt Macy
919eda14cbcSMatt Macy /* Release memory pool objects */
920716fd348SMartin Matuska static void
zstd_mempool_deinit(void)921eda14cbcSMatt Macy zstd_mempool_deinit(void)
922eda14cbcSMatt Macy {
923eda14cbcSMatt Macy for (int i = 0; i < ZSTD_POOL_MAX; i++) {
924eda14cbcSMatt Macy release_pool(&zstd_mempool_cctx[i]);
925eda14cbcSMatt Macy release_pool(&zstd_mempool_dctx[i]);
926eda14cbcSMatt Macy }
927eda14cbcSMatt Macy
928eda14cbcSMatt Macy kmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
929eda14cbcSMatt Macy kmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
930eda14cbcSMatt Macy zstd_mempool_dctx = NULL;
931eda14cbcSMatt Macy zstd_mempool_cctx = NULL;
932eda14cbcSMatt Macy }
933eda14cbcSMatt Macy
934c40487d4SMatt Macy /* release unused memory from pool */
935c40487d4SMatt Macy
936c40487d4SMatt Macy void
zfs_zstd_cache_reap_now(void)937c40487d4SMatt Macy zfs_zstd_cache_reap_now(void)
938c40487d4SMatt Macy {
93936639c39SMateusz Guzik
94036639c39SMateusz Guzik /*
94136639c39SMateusz Guzik * Short-circuit if there are no buffers to begin with.
94236639c39SMateusz Guzik */
94336639c39SMateusz Guzik if (ZSTDSTAT(zstd_stat_buffers) == 0)
94436639c39SMateusz Guzik return;
94536639c39SMateusz Guzik
946c40487d4SMatt Macy /*
947c40487d4SMatt Macy * calling alloc with zero size seeks
948c40487d4SMatt Macy * and releases old unused objects
949c40487d4SMatt Macy */
9507877fdebSMatt Macy zstd_mempool_reap(zstd_mempool_cctx);
9517877fdebSMatt Macy zstd_mempool_reap(zstd_mempool_dctx);
952c40487d4SMatt Macy }
953c40487d4SMatt Macy
954eda14cbcSMatt Macy extern int __init
zstd_init(void)955eda14cbcSMatt Macy zstd_init(void)
956eda14cbcSMatt Macy {
957eda14cbcSMatt Macy /* Set pool size by using maximum sane thread count * 4 */
958eda14cbcSMatt Macy pool_count = (boot_ncpus * 4);
959eda14cbcSMatt Macy zstd_meminit();
960eda14cbcSMatt Macy
961eda14cbcSMatt Macy /* Initialize kstat */
962eda14cbcSMatt Macy zstd_ksp = kstat_create("zfs", 0, "zstd", "misc",
963eda14cbcSMatt Macy KSTAT_TYPE_NAMED, sizeof (zstd_stats) / sizeof (kstat_named_t),
964eda14cbcSMatt Macy KSTAT_FLAG_VIRTUAL);
965eda14cbcSMatt Macy if (zstd_ksp != NULL) {
966eda14cbcSMatt Macy zstd_ksp->ks_data = &zstd_stats;
967eda14cbcSMatt Macy kstat_install(zstd_ksp);
968e3aa18adSMartin Matuska #ifdef _KERNEL
969e3aa18adSMartin Matuska zstd_ksp->ks_update = kstat_zstd_update;
970e3aa18adSMartin Matuska #endif
971eda14cbcSMatt Macy }
972eda14cbcSMatt Macy
973eda14cbcSMatt Macy return (0);
974eda14cbcSMatt Macy }
975eda14cbcSMatt Macy
976716fd348SMartin Matuska extern void
zstd_fini(void)977eda14cbcSMatt Macy zstd_fini(void)
978eda14cbcSMatt Macy {
979eda14cbcSMatt Macy /* Deinitialize kstat */
980eda14cbcSMatt Macy if (zstd_ksp != NULL) {
981eda14cbcSMatt Macy kstat_delete(zstd_ksp);
982eda14cbcSMatt Macy zstd_ksp = NULL;
983eda14cbcSMatt Macy }
984eda14cbcSMatt Macy
985eda14cbcSMatt Macy /* Release fallback memory */
986eda14cbcSMatt Macy vmem_free(zstd_dctx_fallback.mem, zstd_dctx_fallback.mem_size);
987eda14cbcSMatt Macy mutex_destroy(&zstd_dctx_fallback.barrier);
988eda14cbcSMatt Macy
989eda14cbcSMatt Macy /* Deinit memory pool */
990eda14cbcSMatt Macy zstd_mempool_deinit();
991eda14cbcSMatt Macy }
992eda14cbcSMatt Macy
993eda14cbcSMatt Macy #if defined(_KERNEL)
994716fd348SMartin Matuska #ifdef __FreeBSD__
995eda14cbcSMatt Macy module_init(zstd_init);
996eda14cbcSMatt Macy module_exit(zstd_fini);
997716fd348SMartin Matuska #endif
998eda14cbcSMatt Macy
999be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs, zstd_, earlyabort_pass, UINT, ZMOD_RW,
1000e3aa18adSMartin Matuska "Enable early abort attempts when using zstd");
1001e3aa18adSMartin Matuska ZFS_MODULE_PARAM(zfs, zstd_, abort_size, UINT, ZMOD_RW,
1002e3aa18adSMartin Matuska "Minimal size of block to attempt early abort");
1003eda14cbcSMatt Macy #endif
1004