1 /* 2 * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html) 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * 1. Redistributions of source code must retain the above copyright notice, 8 * this list of conditions and the following disclaimer. 9 * 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 14 * 3. Neither the name of the copyright holder nor the names of its 15 * contributors may be used to endorse or promote products derived from this 16 * software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * Copyright (c) 2016-2018, Klara Inc. 33 * Copyright (c) 2016-2018, Allan Jude 34 * Copyright (c) 2018-2020, Sebastian Gottschall 35 * Copyright (c) 2019-2020, Michael Niewöhner 36 * Copyright (c) 2020, The FreeBSD Foundation [1] 37 * 38 * [1] Portions of this software were developed by Allan Jude 39 * under sponsorship from the FreeBSD Foundation. 40 */ 41 42 #include <sys/param.h> 43 #include <sys/sysmacros.h> 44 #include <sys/zfs_context.h> 45 #include <sys/zio_compress.h> 46 #include <sys/spa.h> 47 #include <sys/zstd/zstd.h> 48 49 #define ZSTD_STATIC_LINKING_ONLY 50 #include "lib/zstd.h" 51 #include "lib/zstd_errors.h" 52 53 kstat_t *zstd_ksp = NULL; 54 55 typedef struct zstd_stats { 56 kstat_named_t zstd_stat_alloc_fail; 57 kstat_named_t zstd_stat_alloc_fallback; 58 kstat_named_t zstd_stat_com_alloc_fail; 59 kstat_named_t zstd_stat_dec_alloc_fail; 60 kstat_named_t zstd_stat_com_inval; 61 kstat_named_t zstd_stat_dec_inval; 62 kstat_named_t zstd_stat_dec_header_inval; 63 kstat_named_t zstd_stat_com_fail; 64 kstat_named_t zstd_stat_dec_fail; 65 } zstd_stats_t; 66 67 static zstd_stats_t zstd_stats = { 68 { "alloc_fail", KSTAT_DATA_UINT64 }, 69 { "alloc_fallback", KSTAT_DATA_UINT64 }, 70 { "compress_alloc_fail", KSTAT_DATA_UINT64 }, 71 { "decompress_alloc_fail", KSTAT_DATA_UINT64 }, 72 { "compress_level_invalid", KSTAT_DATA_UINT64 }, 73 { "decompress_level_invalid", KSTAT_DATA_UINT64 }, 74 { "decompress_header_invalid", KSTAT_DATA_UINT64 }, 75 { "compress_failed", KSTAT_DATA_UINT64 }, 76 { "decompress_failed", KSTAT_DATA_UINT64 }, 77 }; 78 79 /* Enums describing the allocator type specified by kmem_type in zstd_kmem */ 80 enum zstd_kmem_type { 81 ZSTD_KMEM_UNKNOWN = 0, 82 /* Allocation type using kmem_vmalloc */ 83 ZSTD_KMEM_DEFAULT, 84 /* Pool based allocation using mempool_alloc */ 85 ZSTD_KMEM_POOL, 86 /* Reserved fallback memory for decompression only */ 87 ZSTD_KMEM_DCTX, 88 ZSTD_KMEM_COUNT, 89 }; 90 91 /* Structure for pooled memory objects */ 92 struct zstd_pool { 93 void *mem; 94 size_t size; 95 kmutex_t barrier; 96 hrtime_t timeout; 97 }; 98 99 /* Global structure for handling memory allocations */ 100 struct zstd_kmem { 101 enum zstd_kmem_type kmem_type; 102 size_t kmem_size; 103 struct zstd_pool *pool; 104 }; 105 106 /* Fallback memory structure used for decompression only if memory runs out */ 107 struct zstd_fallback_mem { 108 size_t mem_size; 109 void *mem; 110 kmutex_t barrier; 111 }; 112 113 struct zstd_levelmap { 114 int16_t zstd_level; 115 enum zio_zstd_levels level; 116 }; 117 118 /* 119 * ZSTD memory handlers 120 * 121 * For decompression we use a different handler which also provides fallback 122 * memory allocation in case memory runs out. 123 * 124 * The ZSTD handlers were split up for the most simplified implementation. 125 */ 126 static void *zstd_alloc(void *opaque, size_t size); 127 static void *zstd_dctx_alloc(void *opaque, size_t size); 128 static void zstd_free(void *opaque, void *ptr); 129 130 /* Compression memory handler */ 131 static const ZSTD_customMem zstd_malloc = { 132 zstd_alloc, 133 zstd_free, 134 NULL, 135 }; 136 137 /* Decompression memory handler */ 138 static const ZSTD_customMem zstd_dctx_malloc = { 139 zstd_dctx_alloc, 140 zstd_free, 141 NULL, 142 }; 143 144 /* Level map for converting ZFS internal levels to ZSTD levels and vice versa */ 145 static struct zstd_levelmap zstd_levels[] = { 146 {ZIO_ZSTD_LEVEL_1, ZIO_ZSTD_LEVEL_1}, 147 {ZIO_ZSTD_LEVEL_2, ZIO_ZSTD_LEVEL_2}, 148 {ZIO_ZSTD_LEVEL_3, ZIO_ZSTD_LEVEL_3}, 149 {ZIO_ZSTD_LEVEL_4, ZIO_ZSTD_LEVEL_4}, 150 {ZIO_ZSTD_LEVEL_5, ZIO_ZSTD_LEVEL_5}, 151 {ZIO_ZSTD_LEVEL_6, ZIO_ZSTD_LEVEL_6}, 152 {ZIO_ZSTD_LEVEL_7, ZIO_ZSTD_LEVEL_7}, 153 {ZIO_ZSTD_LEVEL_8, ZIO_ZSTD_LEVEL_8}, 154 {ZIO_ZSTD_LEVEL_9, ZIO_ZSTD_LEVEL_9}, 155 {ZIO_ZSTD_LEVEL_10, ZIO_ZSTD_LEVEL_10}, 156 {ZIO_ZSTD_LEVEL_11, ZIO_ZSTD_LEVEL_11}, 157 {ZIO_ZSTD_LEVEL_12, ZIO_ZSTD_LEVEL_12}, 158 {ZIO_ZSTD_LEVEL_13, ZIO_ZSTD_LEVEL_13}, 159 {ZIO_ZSTD_LEVEL_14, ZIO_ZSTD_LEVEL_14}, 160 {ZIO_ZSTD_LEVEL_15, ZIO_ZSTD_LEVEL_15}, 161 {ZIO_ZSTD_LEVEL_16, ZIO_ZSTD_LEVEL_16}, 162 {ZIO_ZSTD_LEVEL_17, ZIO_ZSTD_LEVEL_17}, 163 {ZIO_ZSTD_LEVEL_18, ZIO_ZSTD_LEVEL_18}, 164 {ZIO_ZSTD_LEVEL_19, ZIO_ZSTD_LEVEL_19}, 165 {-1, ZIO_ZSTD_LEVEL_FAST_1}, 166 {-2, ZIO_ZSTD_LEVEL_FAST_2}, 167 {-3, ZIO_ZSTD_LEVEL_FAST_3}, 168 {-4, ZIO_ZSTD_LEVEL_FAST_4}, 169 {-5, ZIO_ZSTD_LEVEL_FAST_5}, 170 {-6, ZIO_ZSTD_LEVEL_FAST_6}, 171 {-7, ZIO_ZSTD_LEVEL_FAST_7}, 172 {-8, ZIO_ZSTD_LEVEL_FAST_8}, 173 {-9, ZIO_ZSTD_LEVEL_FAST_9}, 174 {-10, ZIO_ZSTD_LEVEL_FAST_10}, 175 {-20, ZIO_ZSTD_LEVEL_FAST_20}, 176 {-30, ZIO_ZSTD_LEVEL_FAST_30}, 177 {-40, ZIO_ZSTD_LEVEL_FAST_40}, 178 {-50, ZIO_ZSTD_LEVEL_FAST_50}, 179 {-60, ZIO_ZSTD_LEVEL_FAST_60}, 180 {-70, ZIO_ZSTD_LEVEL_FAST_70}, 181 {-80, ZIO_ZSTD_LEVEL_FAST_80}, 182 {-90, ZIO_ZSTD_LEVEL_FAST_90}, 183 {-100, ZIO_ZSTD_LEVEL_FAST_100}, 184 {-500, ZIO_ZSTD_LEVEL_FAST_500}, 185 {-1000, ZIO_ZSTD_LEVEL_FAST_1000}, 186 }; 187 188 /* 189 * This variable represents the maximum count of the pool based on the number 190 * of CPUs plus some buffer. We default to cpu count * 4, see init_zstd. 191 */ 192 static int pool_count = 16; 193 194 #define ZSTD_POOL_MAX pool_count 195 #define ZSTD_POOL_TIMEOUT 60 * 2 196 197 static struct zstd_fallback_mem zstd_dctx_fallback; 198 static struct zstd_pool *zstd_mempool_cctx; 199 static struct zstd_pool *zstd_mempool_dctx; 200 201 /* 202 * Try to get a cached allocated buffer from memory pool or allocate a new one 203 * if necessary. If a object is older than 2 minutes and does not fit the 204 * requested size, it will be released and a new cached entry will be allocated. 205 * If other pooled objects are detected without being used for 2 minutes, they 206 * will be released, too. 207 * 208 * The concept is that high frequency memory allocations of bigger objects are 209 * expensive. So if a lot of work is going on, allocations will be kept for a 210 * while and can be reused in that time frame. 211 * 212 * The scheduled release will be updated every time a object is reused. 213 */ 214 static void * 215 zstd_mempool_alloc(struct zstd_pool *zstd_mempool, size_t size) 216 { 217 struct zstd_pool *pool; 218 struct zstd_kmem *mem = NULL; 219 220 if (!zstd_mempool) { 221 return (NULL); 222 } 223 224 /* Seek for preallocated memory slot and free obsolete slots */ 225 for (int i = 0; i < ZSTD_POOL_MAX; i++) { 226 pool = &zstd_mempool[i]; 227 /* 228 * This lock is simply a marker for a pool object beeing in use. 229 * If it's already hold, it will be skipped. 230 * 231 * We need to create it before checking it to avoid race 232 * conditions caused by running in a threaded context. 233 * 234 * The lock is later released by zstd_mempool_free. 235 */ 236 if (mutex_tryenter(&pool->barrier)) { 237 /* 238 * Check if objects fits the size, if so we take it and 239 * update the timestamp. 240 */ 241 if (!mem && pool->mem && size <= pool->size) { 242 pool->timeout = gethrestime_sec() + 243 ZSTD_POOL_TIMEOUT; 244 mem = pool->mem; 245 continue; 246 } 247 248 /* Free memory if unused object older than 2 minutes */ 249 if (pool->mem && gethrestime_sec() > pool->timeout) { 250 vmem_free(pool->mem, pool->size); 251 pool->mem = NULL; 252 pool->size = 0; 253 pool->timeout = 0; 254 } 255 256 mutex_exit(&pool->barrier); 257 } 258 } 259 260 if (mem) { 261 return (mem); 262 } 263 264 /* 265 * If no preallocated slot was found, try to fill in a new one. 266 * 267 * We run a similar algorithm twice here to avoid pool fragmentation. 268 * The first one may generate holes in the list if objects get released. 269 * We always make sure that these holes get filled instead of adding new 270 * allocations constantly at the end. 271 */ 272 for (int i = 0; i < ZSTD_POOL_MAX; i++) { 273 pool = &zstd_mempool[i]; 274 if (mutex_tryenter(&pool->barrier)) { 275 /* Object is free, try to allocate new one */ 276 if (!pool->mem) { 277 mem = vmem_alloc(size, KM_SLEEP); 278 pool->mem = mem; 279 280 if (pool->mem) { 281 /* Keep track for later release */ 282 mem->pool = pool; 283 pool->size = size; 284 mem->kmem_type = ZSTD_KMEM_POOL; 285 mem->kmem_size = size; 286 } 287 } 288 289 if (size <= pool->size) { 290 /* Update timestamp */ 291 pool->timeout = gethrestime_sec() + 292 ZSTD_POOL_TIMEOUT; 293 294 return (pool->mem); 295 } 296 297 mutex_exit(&pool->barrier); 298 } 299 } 300 301 /* 302 * If the pool is full or the allocation failed, try lazy allocation 303 * instead. 304 */ 305 if (!mem) { 306 mem = vmem_alloc(size, KM_NOSLEEP); 307 if (mem) { 308 mem->pool = NULL; 309 mem->kmem_type = ZSTD_KMEM_DEFAULT; 310 mem->kmem_size = size; 311 } 312 } 313 314 return (mem); 315 } 316 317 /* Mark object as released by releasing the barrier mutex */ 318 static void 319 zstd_mempool_free(struct zstd_kmem *z) 320 { 321 mutex_exit(&z->pool->barrier); 322 } 323 324 /* Convert ZFS internal enum to ZSTD level */ 325 static int 326 zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level) 327 { 328 if (level > 0 && level <= ZIO_ZSTD_LEVEL_19) { 329 *zstd_level = zstd_levels[level - 1].zstd_level; 330 return (0); 331 } 332 if (level >= ZIO_ZSTD_LEVEL_FAST_1 && 333 level <= ZIO_ZSTD_LEVEL_FAST_1000) { 334 *zstd_level = zstd_levels[level - ZIO_ZSTD_LEVEL_FAST_1 335 + ZIO_ZSTD_LEVEL_19].zstd_level; 336 return (0); 337 } 338 339 /* Invalid/unknown zfs compression enum - this should never happen. */ 340 return (1); 341 } 342 343 /* Compress block using zstd */ 344 size_t 345 zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, 346 int level) 347 { 348 size_t c_len; 349 int16_t zstd_level; 350 zfs_zstdhdr_t *hdr; 351 ZSTD_CCtx *cctx; 352 353 hdr = (zfs_zstdhdr_t *)d_start; 354 355 /* Skip compression if the specified level is invalid */ 356 if (zstd_enum_to_level(level, &zstd_level)) { 357 ZSTDSTAT_BUMP(zstd_stat_com_inval); 358 return (s_len); 359 } 360 361 ASSERT3U(d_len, >=, sizeof (*hdr)); 362 ASSERT3U(d_len, <=, s_len); 363 ASSERT3U(zstd_level, !=, 0); 364 365 cctx = ZSTD_createCCtx_advanced(zstd_malloc); 366 367 /* 368 * Out of kernel memory, gently fall through - this will disable 369 * compression in zio_compress_data 370 */ 371 if (!cctx) { 372 ZSTDSTAT_BUMP(zstd_stat_com_alloc_fail); 373 return (s_len); 374 } 375 376 /* Set the compression level */ 377 ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstd_level); 378 379 /* Use the "magicless" zstd header which saves us 4 header bytes */ 380 ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless); 381 382 /* 383 * Disable redundant checksum calculation and content size storage since 384 * this is already done by ZFS itself. 385 */ 386 ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0); 387 ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0); 388 389 c_len = ZSTD_compress2(cctx, 390 hdr->data, 391 d_len - sizeof (*hdr), 392 s_start, s_len); 393 394 ZSTD_freeCCtx(cctx); 395 396 /* Error in the compression routine, disable compression. */ 397 if (ZSTD_isError(c_len)) { 398 /* 399 * If we are aborting the compression because the saves are 400 * too small, that is not a failure. Everything else is a 401 * failure, so increment the compression failure counter. 402 */ 403 if (ZSTD_getErrorCode(c_len) != ZSTD_error_dstSize_tooSmall) { 404 ZSTDSTAT_BUMP(zstd_stat_com_fail); 405 } 406 return (s_len); 407 } 408 409 /* 410 * Encode the compressed buffer size at the start. We'll need this in 411 * decompression to counter the effects of padding which might be added 412 * to the compressed buffer and which, if unhandled, would confuse the 413 * hell out of our decompression function. 414 */ 415 hdr->c_len = BE_32(c_len); 416 417 /* 418 * Check version for overflow. 419 * The limit of 24 bits must not be exceeded. This allows a maximum 420 * version 1677.72.15 which we don't expect to be ever reached. 421 */ 422 ASSERT3U(ZSTD_VERSION_NUMBER, <=, 0xFFFFFF); 423 424 /* 425 * Encode the compression level as well. We may need to know the 426 * original compression level if compressed_arc is disabled, to match 427 * the compression settings to write this block to the L2ARC. 428 * 429 * Encode the actual level, so if the enum changes in the future, we 430 * will be compatible. 431 * 432 * The upper 24 bits store the ZSTD version to be able to provide 433 * future compatibility, since new versions might enhance the 434 * compression algorithm in a way, where the compressed data will 435 * change. 436 * 437 * As soon as such incompatibility occurs, handling code needs to be 438 * added, differentiating between the versions. 439 */ 440 hdr->version = ZSTD_VERSION_NUMBER; 441 hdr->level = level; 442 hdr->raw_version_level = BE_32(hdr->raw_version_level); 443 444 return (c_len + sizeof (*hdr)); 445 } 446 447 /* Decompress block using zstd and return its stored level */ 448 int 449 zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len, 450 size_t d_len, uint8_t *level) 451 { 452 ZSTD_DCtx *dctx; 453 size_t result; 454 int16_t zstd_level; 455 uint32_t c_len; 456 const zfs_zstdhdr_t *hdr; 457 zfs_zstdhdr_t hdr_copy; 458 459 hdr = (const zfs_zstdhdr_t *)s_start; 460 c_len = BE_32(hdr->c_len); 461 462 /* 463 * Make a copy instead of directly converting the header, since we must 464 * not modify the original data that may be used again later. 465 */ 466 hdr_copy.raw_version_level = BE_32(hdr->raw_version_level); 467 468 /* 469 * NOTE: We ignore the ZSTD version for now. As soon as any 470 * incompatibility occurrs, it has to be handled accordingly. 471 * The version can be accessed via `hdr_copy.version`. 472 */ 473 474 /* 475 * Convert and check the level 476 * An invalid level is a strong indicator for data corruption! In such 477 * case return an error so the upper layers can try to fix it. 478 */ 479 if (zstd_enum_to_level(hdr_copy.level, &zstd_level)) { 480 ZSTDSTAT_BUMP(zstd_stat_dec_inval); 481 return (1); 482 } 483 484 ASSERT3U(d_len, >=, s_len); 485 ASSERT3U(hdr_copy.level, !=, ZIO_COMPLEVEL_INHERIT); 486 487 /* Invalid compressed buffer size encoded at start */ 488 if (c_len + sizeof (*hdr) > s_len) { 489 ZSTDSTAT_BUMP(zstd_stat_dec_header_inval); 490 return (1); 491 } 492 493 dctx = ZSTD_createDCtx_advanced(zstd_dctx_malloc); 494 if (!dctx) { 495 ZSTDSTAT_BUMP(zstd_stat_dec_alloc_fail); 496 return (1); 497 } 498 499 /* Set header type to "magicless" */ 500 ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless); 501 502 /* Decompress the data and release the context */ 503 result = ZSTD_decompressDCtx(dctx, d_start, d_len, hdr->data, c_len); 504 ZSTD_freeDCtx(dctx); 505 506 /* 507 * Returns 0 on success (decompression function returned non-negative) 508 * and non-zero on failure (decompression function returned negative. 509 */ 510 if (ZSTD_isError(result)) { 511 ZSTDSTAT_BUMP(zstd_stat_dec_fail); 512 return (1); 513 } 514 515 if (level) { 516 *level = hdr_copy.level; 517 } 518 519 return (0); 520 } 521 522 /* Decompress datablock using zstd */ 523 int 524 zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, 525 int level __maybe_unused) 526 { 527 528 return (zfs_zstd_decompress_level(s_start, d_start, s_len, d_len, 529 NULL)); 530 } 531 532 /* Allocator for zstd compression context using mempool_allocator */ 533 static void * 534 zstd_alloc(void *opaque __maybe_unused, size_t size) 535 { 536 size_t nbytes = sizeof (struct zstd_kmem) + size; 537 struct zstd_kmem *z = NULL; 538 539 z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_cctx, nbytes); 540 541 if (!z) { 542 ZSTDSTAT_BUMP(zstd_stat_alloc_fail); 543 return (NULL); 544 } 545 546 return ((void*)z + (sizeof (struct zstd_kmem))); 547 } 548 549 /* 550 * Allocator for zstd decompression context using mempool_allocator with 551 * fallback to reserved memory if allocation fails 552 */ 553 static void * 554 zstd_dctx_alloc(void *opaque __maybe_unused, size_t size) 555 { 556 size_t nbytes = sizeof (struct zstd_kmem) + size; 557 struct zstd_kmem *z = NULL; 558 enum zstd_kmem_type type = ZSTD_KMEM_DEFAULT; 559 560 z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_dctx, nbytes); 561 if (!z) { 562 /* Try harder, decompression shall not fail */ 563 z = vmem_alloc(nbytes, KM_SLEEP); 564 if (z) { 565 z->pool = NULL; 566 } 567 ZSTDSTAT_BUMP(zstd_stat_alloc_fail); 568 } else { 569 return ((void*)z + (sizeof (struct zstd_kmem))); 570 } 571 572 /* Fallback if everything fails */ 573 if (!z) { 574 /* 575 * Barrier since we only can handle it in a single thread. All 576 * other following threads need to wait here until decompression 577 * is completed. zstd_free will release this barrier later. 578 */ 579 mutex_enter(&zstd_dctx_fallback.barrier); 580 581 z = zstd_dctx_fallback.mem; 582 type = ZSTD_KMEM_DCTX; 583 ZSTDSTAT_BUMP(zstd_stat_alloc_fallback); 584 } 585 586 /* Allocation should always be successful */ 587 if (!z) { 588 return (NULL); 589 } 590 591 z->kmem_type = type; 592 z->kmem_size = nbytes; 593 594 return ((void*)z + (sizeof (struct zstd_kmem))); 595 } 596 597 /* Free allocated memory by its specific type */ 598 static void 599 zstd_free(void *opaque __maybe_unused, void *ptr) 600 { 601 struct zstd_kmem *z = (ptr - sizeof (struct zstd_kmem)); 602 enum zstd_kmem_type type; 603 604 ASSERT3U(z->kmem_type, <, ZSTD_KMEM_COUNT); 605 ASSERT3U(z->kmem_type, >, ZSTD_KMEM_UNKNOWN); 606 607 type = z->kmem_type; 608 switch (type) { 609 case ZSTD_KMEM_DEFAULT: 610 vmem_free(z, z->kmem_size); 611 break; 612 case ZSTD_KMEM_POOL: 613 zstd_mempool_free(z); 614 break; 615 case ZSTD_KMEM_DCTX: 616 mutex_exit(&zstd_dctx_fallback.barrier); 617 break; 618 default: 619 break; 620 } 621 } 622 623 /* Allocate fallback memory to ensure safe decompression */ 624 static void __init 625 create_fallback_mem(struct zstd_fallback_mem *mem, size_t size) 626 { 627 mem->mem_size = size; 628 mem->mem = vmem_zalloc(mem->mem_size, KM_SLEEP); 629 mutex_init(&mem->barrier, NULL, MUTEX_DEFAULT, NULL); 630 } 631 632 /* Initialize memory pool barrier mutexes */ 633 static void __init 634 zstd_mempool_init(void) 635 { 636 zstd_mempool_cctx = (struct zstd_pool *) 637 kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); 638 zstd_mempool_dctx = (struct zstd_pool *) 639 kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); 640 641 for (int i = 0; i < ZSTD_POOL_MAX; i++) { 642 mutex_init(&zstd_mempool_cctx[i].barrier, NULL, 643 MUTEX_DEFAULT, NULL); 644 mutex_init(&zstd_mempool_dctx[i].barrier, NULL, 645 MUTEX_DEFAULT, NULL); 646 } 647 } 648 649 /* Initialize zstd-related memory handling */ 650 static int __init 651 zstd_meminit(void) 652 { 653 zstd_mempool_init(); 654 655 /* 656 * Estimate the size of the fallback decompression context. 657 * The expected size on x64 with current ZSTD should be about 160 KB. 658 */ 659 create_fallback_mem(&zstd_dctx_fallback, 660 P2ROUNDUP(ZSTD_estimateDCtxSize() + sizeof (struct zstd_kmem), 661 PAGESIZE)); 662 663 return (0); 664 } 665 666 /* Release object from pool and free memory */ 667 static void __exit 668 release_pool(struct zstd_pool *pool) 669 { 670 mutex_destroy(&pool->barrier); 671 vmem_free(pool->mem, pool->size); 672 pool->mem = NULL; 673 pool->size = 0; 674 } 675 676 /* Release memory pool objects */ 677 static void __exit 678 zstd_mempool_deinit(void) 679 { 680 for (int i = 0; i < ZSTD_POOL_MAX; i++) { 681 release_pool(&zstd_mempool_cctx[i]); 682 release_pool(&zstd_mempool_dctx[i]); 683 } 684 685 kmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); 686 kmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); 687 zstd_mempool_dctx = NULL; 688 zstd_mempool_cctx = NULL; 689 } 690 691 extern int __init 692 zstd_init(void) 693 { 694 /* Set pool size by using maximum sane thread count * 4 */ 695 pool_count = (boot_ncpus * 4); 696 zstd_meminit(); 697 698 /* Initialize kstat */ 699 zstd_ksp = kstat_create("zfs", 0, "zstd", "misc", 700 KSTAT_TYPE_NAMED, sizeof (zstd_stats) / sizeof (kstat_named_t), 701 KSTAT_FLAG_VIRTUAL); 702 if (zstd_ksp != NULL) { 703 zstd_ksp->ks_data = &zstd_stats; 704 kstat_install(zstd_ksp); 705 } 706 707 return (0); 708 } 709 710 extern void __exit 711 zstd_fini(void) 712 { 713 /* Deinitialize kstat */ 714 if (zstd_ksp != NULL) { 715 kstat_delete(zstd_ksp); 716 zstd_ksp = NULL; 717 } 718 719 /* Release fallback memory */ 720 vmem_free(zstd_dctx_fallback.mem, zstd_dctx_fallback.mem_size); 721 mutex_destroy(&zstd_dctx_fallback.barrier); 722 723 /* Deinit memory pool */ 724 zstd_mempool_deinit(); 725 } 726 727 #if defined(_KERNEL) 728 module_init(zstd_init); 729 module_exit(zstd_fini); 730 731 ZFS_MODULE_DESCRIPTION("ZSTD Compression for ZFS"); 732 ZFS_MODULE_LICENSE("BSD"); 733 ZFS_MODULE_VERSION(ZSTD_VERSION_STRING); 734 735 EXPORT_SYMBOL(zfs_zstd_compress); 736 EXPORT_SYMBOL(zfs_zstd_decompress_level); 737 EXPORT_SYMBOL(zfs_zstd_decompress); 738 #endif 739