1 /* 2 * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html) 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * 1. Redistributions of source code must retain the above copyright notice, 8 * this list of conditions and the following disclaimer. 9 * 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 14 * 3. Neither the name of the copyright holder nor the names of its 15 * contributors may be used to endorse or promote products derived from this 16 * software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * Copyright (c) 2016-2018, Klara Inc. 33 * Copyright (c) 2016-2018, Allan Jude 34 * Copyright (c) 2018-2020, Sebastian Gottschall 35 * Copyright (c) 2019-2020, Michael Niewöhner 36 * Copyright (c) 2020, The FreeBSD Foundation [1] 37 * 38 * [1] Portions of this software were developed by Allan Jude 39 * under sponsorship from the FreeBSD Foundation. 40 */ 41 42 #include <sys/param.h> 43 #include <sys/sysmacros.h> 44 #include <sys/zfs_context.h> 45 #include <sys/zio_compress.h> 46 #include <sys/spa.h> 47 #include <sys/zstd/zstd.h> 48 49 #define ZSTD_STATIC_LINKING_ONLY 50 #include "lib/zstd.h" 51 #include "lib/zstd_errors.h" 52 53 kstat_t *zstd_ksp = NULL; 54 55 typedef struct zstd_stats { 56 kstat_named_t zstd_stat_alloc_fail; 57 kstat_named_t zstd_stat_alloc_fallback; 58 kstat_named_t zstd_stat_com_alloc_fail; 59 kstat_named_t zstd_stat_dec_alloc_fail; 60 kstat_named_t zstd_stat_com_inval; 61 kstat_named_t zstd_stat_dec_inval; 62 kstat_named_t zstd_stat_dec_header_inval; 63 kstat_named_t zstd_stat_com_fail; 64 kstat_named_t zstd_stat_dec_fail; 65 kstat_named_t zstd_stat_buffers; 66 kstat_named_t zstd_stat_size; 67 } zstd_stats_t; 68 69 static zstd_stats_t zstd_stats = { 70 { "alloc_fail", KSTAT_DATA_UINT64 }, 71 { "alloc_fallback", KSTAT_DATA_UINT64 }, 72 { "compress_alloc_fail", KSTAT_DATA_UINT64 }, 73 { "decompress_alloc_fail", KSTAT_DATA_UINT64 }, 74 { "compress_level_invalid", KSTAT_DATA_UINT64 }, 75 { "decompress_level_invalid", KSTAT_DATA_UINT64 }, 76 { "decompress_header_invalid", KSTAT_DATA_UINT64 }, 77 { "compress_failed", KSTAT_DATA_UINT64 }, 78 { "decompress_failed", KSTAT_DATA_UINT64 }, 79 { "buffers", KSTAT_DATA_UINT64 }, 80 { "size", KSTAT_DATA_UINT64 }, 81 }; 82 83 /* Enums describing the allocator type specified by kmem_type in zstd_kmem */ 84 enum zstd_kmem_type { 85 ZSTD_KMEM_UNKNOWN = 0, 86 /* Allocation type using kmem_vmalloc */ 87 ZSTD_KMEM_DEFAULT, 88 /* Pool based allocation using mempool_alloc */ 89 ZSTD_KMEM_POOL, 90 /* Reserved fallback memory for decompression only */ 91 ZSTD_KMEM_DCTX, 92 ZSTD_KMEM_COUNT, 93 }; 94 95 /* Structure for pooled memory objects */ 96 struct zstd_pool { 97 void *mem; 98 size_t size; 99 kmutex_t barrier; 100 hrtime_t timeout; 101 }; 102 103 /* Global structure for handling memory allocations */ 104 struct zstd_kmem { 105 enum zstd_kmem_type kmem_type; 106 size_t kmem_size; 107 struct zstd_pool *pool; 108 }; 109 110 /* Fallback memory structure used for decompression only if memory runs out */ 111 struct zstd_fallback_mem { 112 size_t mem_size; 113 void *mem; 114 kmutex_t barrier; 115 }; 116 117 struct zstd_levelmap { 118 int16_t zstd_level; 119 enum zio_zstd_levels level; 120 }; 121 122 /* 123 * ZSTD memory handlers 124 * 125 * For decompression we use a different handler which also provides fallback 126 * memory allocation in case memory runs out. 127 * 128 * The ZSTD handlers were split up for the most simplified implementation. 129 */ 130 static void *zstd_alloc(void *opaque, size_t size); 131 static void *zstd_dctx_alloc(void *opaque, size_t size); 132 static void zstd_free(void *opaque, void *ptr); 133 134 /* Compression memory handler */ 135 static const ZSTD_customMem zstd_malloc = { 136 zstd_alloc, 137 zstd_free, 138 NULL, 139 }; 140 141 /* Decompression memory handler */ 142 static const ZSTD_customMem zstd_dctx_malloc = { 143 zstd_dctx_alloc, 144 zstd_free, 145 NULL, 146 }; 147 148 /* Level map for converting ZFS internal levels to ZSTD levels and vice versa */ 149 static struct zstd_levelmap zstd_levels[] = { 150 {ZIO_ZSTD_LEVEL_1, ZIO_ZSTD_LEVEL_1}, 151 {ZIO_ZSTD_LEVEL_2, ZIO_ZSTD_LEVEL_2}, 152 {ZIO_ZSTD_LEVEL_3, ZIO_ZSTD_LEVEL_3}, 153 {ZIO_ZSTD_LEVEL_4, ZIO_ZSTD_LEVEL_4}, 154 {ZIO_ZSTD_LEVEL_5, ZIO_ZSTD_LEVEL_5}, 155 {ZIO_ZSTD_LEVEL_6, ZIO_ZSTD_LEVEL_6}, 156 {ZIO_ZSTD_LEVEL_7, ZIO_ZSTD_LEVEL_7}, 157 {ZIO_ZSTD_LEVEL_8, ZIO_ZSTD_LEVEL_8}, 158 {ZIO_ZSTD_LEVEL_9, ZIO_ZSTD_LEVEL_9}, 159 {ZIO_ZSTD_LEVEL_10, ZIO_ZSTD_LEVEL_10}, 160 {ZIO_ZSTD_LEVEL_11, ZIO_ZSTD_LEVEL_11}, 161 {ZIO_ZSTD_LEVEL_12, ZIO_ZSTD_LEVEL_12}, 162 {ZIO_ZSTD_LEVEL_13, ZIO_ZSTD_LEVEL_13}, 163 {ZIO_ZSTD_LEVEL_14, ZIO_ZSTD_LEVEL_14}, 164 {ZIO_ZSTD_LEVEL_15, ZIO_ZSTD_LEVEL_15}, 165 {ZIO_ZSTD_LEVEL_16, ZIO_ZSTD_LEVEL_16}, 166 {ZIO_ZSTD_LEVEL_17, ZIO_ZSTD_LEVEL_17}, 167 {ZIO_ZSTD_LEVEL_18, ZIO_ZSTD_LEVEL_18}, 168 {ZIO_ZSTD_LEVEL_19, ZIO_ZSTD_LEVEL_19}, 169 {-1, ZIO_ZSTD_LEVEL_FAST_1}, 170 {-2, ZIO_ZSTD_LEVEL_FAST_2}, 171 {-3, ZIO_ZSTD_LEVEL_FAST_3}, 172 {-4, ZIO_ZSTD_LEVEL_FAST_4}, 173 {-5, ZIO_ZSTD_LEVEL_FAST_5}, 174 {-6, ZIO_ZSTD_LEVEL_FAST_6}, 175 {-7, ZIO_ZSTD_LEVEL_FAST_7}, 176 {-8, ZIO_ZSTD_LEVEL_FAST_8}, 177 {-9, ZIO_ZSTD_LEVEL_FAST_9}, 178 {-10, ZIO_ZSTD_LEVEL_FAST_10}, 179 {-20, ZIO_ZSTD_LEVEL_FAST_20}, 180 {-30, ZIO_ZSTD_LEVEL_FAST_30}, 181 {-40, ZIO_ZSTD_LEVEL_FAST_40}, 182 {-50, ZIO_ZSTD_LEVEL_FAST_50}, 183 {-60, ZIO_ZSTD_LEVEL_FAST_60}, 184 {-70, ZIO_ZSTD_LEVEL_FAST_70}, 185 {-80, ZIO_ZSTD_LEVEL_FAST_80}, 186 {-90, ZIO_ZSTD_LEVEL_FAST_90}, 187 {-100, ZIO_ZSTD_LEVEL_FAST_100}, 188 {-500, ZIO_ZSTD_LEVEL_FAST_500}, 189 {-1000, ZIO_ZSTD_LEVEL_FAST_1000}, 190 }; 191 192 /* 193 * This variable represents the maximum count of the pool based on the number 194 * of CPUs plus some buffer. We default to cpu count * 4, see init_zstd. 195 */ 196 static int pool_count = 16; 197 198 #define ZSTD_POOL_MAX pool_count 199 #define ZSTD_POOL_TIMEOUT 60 * 2 200 201 static struct zstd_fallback_mem zstd_dctx_fallback; 202 static struct zstd_pool *zstd_mempool_cctx; 203 static struct zstd_pool *zstd_mempool_dctx; 204 205 /* 206 * The library zstd code expects these if ADDRESS_SANITIZER gets defined, 207 * and while ASAN does this, KASAN defines that and does not. So to avoid 208 * changing the external code, we do this. 209 */ 210 #if defined(__has_feature) 211 #if __has_feature(address_sanitizer) 212 #define ADDRESS_SANITIZER 1 213 #endif 214 #elif defined(__SANITIZE_ADDRESS__) 215 #define ADDRESS_SANITIZER 1 216 #endif 217 #if defined(_KERNEL) && defined(ADDRESS_SANITIZER) 218 void __asan_unpoison_memory_region(void const volatile *addr, size_t size); 219 void __asan_poison_memory_region(void const volatile *addr, size_t size); 220 void __asan_unpoison_memory_region(void const volatile *addr, size_t size) {}; 221 void __asan_poison_memory_region(void const volatile *addr, size_t size) {}; 222 #endif 223 224 225 static void 226 zstd_mempool_reap(struct zstd_pool *zstd_mempool) 227 { 228 struct zstd_pool *pool; 229 230 if (!zstd_mempool || !ZSTDSTAT(zstd_stat_buffers)) { 231 return; 232 } 233 234 /* free obsolete slots */ 235 for (int i = 0; i < ZSTD_POOL_MAX; i++) { 236 pool = &zstd_mempool[i]; 237 if (pool->mem && mutex_tryenter(&pool->barrier)) { 238 /* Free memory if unused object older than 2 minutes */ 239 if (pool->mem && gethrestime_sec() > pool->timeout) { 240 vmem_free(pool->mem, pool->size); 241 ZSTDSTAT_SUB(zstd_stat_buffers, 1); 242 ZSTDSTAT_SUB(zstd_stat_size, pool->size); 243 pool->mem = NULL; 244 pool->size = 0; 245 pool->timeout = 0; 246 } 247 mutex_exit(&pool->barrier); 248 } 249 } 250 } 251 252 /* 253 * Try to get a cached allocated buffer from memory pool or allocate a new one 254 * if necessary. If a object is older than 2 minutes and does not fit the 255 * requested size, it will be released and a new cached entry will be allocated. 256 * If other pooled objects are detected without being used for 2 minutes, they 257 * will be released, too. 258 * 259 * The concept is that high frequency memory allocations of bigger objects are 260 * expensive. So if a lot of work is going on, allocations will be kept for a 261 * while and can be reused in that time frame. 262 * 263 * The scheduled release will be updated every time a object is reused. 264 */ 265 266 static void * 267 zstd_mempool_alloc(struct zstd_pool *zstd_mempool, size_t size) 268 { 269 struct zstd_pool *pool; 270 struct zstd_kmem *mem = NULL; 271 272 if (!zstd_mempool) { 273 return (NULL); 274 } 275 276 /* Seek for preallocated memory slot and free obsolete slots */ 277 for (int i = 0; i < ZSTD_POOL_MAX; i++) { 278 pool = &zstd_mempool[i]; 279 /* 280 * This lock is simply a marker for a pool object being in use. 281 * If it's already hold, it will be skipped. 282 * 283 * We need to create it before checking it to avoid race 284 * conditions caused by running in a threaded context. 285 * 286 * The lock is later released by zstd_mempool_free. 287 */ 288 if (mutex_tryenter(&pool->barrier)) { 289 /* 290 * Check if objects fits the size, if so we take it and 291 * update the timestamp. 292 */ 293 if (pool->mem && size <= pool->size) { 294 pool->timeout = gethrestime_sec() + 295 ZSTD_POOL_TIMEOUT; 296 mem = pool->mem; 297 return (mem); 298 } 299 mutex_exit(&pool->barrier); 300 } 301 } 302 303 /* 304 * If no preallocated slot was found, try to fill in a new one. 305 * 306 * We run a similar algorithm twice here to avoid pool fragmentation. 307 * The first one may generate holes in the list if objects get released. 308 * We always make sure that these holes get filled instead of adding new 309 * allocations constantly at the end. 310 */ 311 for (int i = 0; i < ZSTD_POOL_MAX; i++) { 312 pool = &zstd_mempool[i]; 313 if (mutex_tryenter(&pool->barrier)) { 314 /* Object is free, try to allocate new one */ 315 if (!pool->mem) { 316 mem = vmem_alloc(size, KM_SLEEP); 317 if (mem) { 318 ZSTDSTAT_ADD(zstd_stat_buffers, 1); 319 ZSTDSTAT_ADD(zstd_stat_size, size); 320 pool->mem = mem; 321 pool->size = size; 322 /* Keep track for later release */ 323 mem->pool = pool; 324 mem->kmem_type = ZSTD_KMEM_POOL; 325 mem->kmem_size = size; 326 } 327 } 328 329 if (size <= pool->size) { 330 /* Update timestamp */ 331 pool->timeout = gethrestime_sec() + 332 ZSTD_POOL_TIMEOUT; 333 334 return (pool->mem); 335 } 336 337 mutex_exit(&pool->barrier); 338 } 339 } 340 341 /* 342 * If the pool is full or the allocation failed, try lazy allocation 343 * instead. 344 */ 345 if (!mem) { 346 mem = vmem_alloc(size, KM_NOSLEEP); 347 if (mem) { 348 mem->pool = NULL; 349 mem->kmem_type = ZSTD_KMEM_DEFAULT; 350 mem->kmem_size = size; 351 } 352 } 353 354 return (mem); 355 } 356 357 /* Mark object as released by releasing the barrier mutex */ 358 static void 359 zstd_mempool_free(struct zstd_kmem *z) 360 { 361 mutex_exit(&z->pool->barrier); 362 } 363 364 /* Convert ZFS internal enum to ZSTD level */ 365 static int 366 zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level) 367 { 368 if (level > 0 && level <= ZIO_ZSTD_LEVEL_19) { 369 *zstd_level = zstd_levels[level - 1].zstd_level; 370 return (0); 371 } 372 if (level >= ZIO_ZSTD_LEVEL_FAST_1 && 373 level <= ZIO_ZSTD_LEVEL_FAST_1000) { 374 *zstd_level = zstd_levels[level - ZIO_ZSTD_LEVEL_FAST_1 375 + ZIO_ZSTD_LEVEL_19].zstd_level; 376 return (0); 377 } 378 379 /* Invalid/unknown zfs compression enum - this should never happen. */ 380 return (1); 381 } 382 383 384 /* Compress block using zstd */ 385 size_t 386 zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, 387 int level) 388 { 389 size_t c_len; 390 int16_t zstd_level; 391 zfs_zstdhdr_t *hdr; 392 ZSTD_CCtx *cctx; 393 394 hdr = (zfs_zstdhdr_t *)d_start; 395 396 /* Skip compression if the specified level is invalid */ 397 if (zstd_enum_to_level(level, &zstd_level)) { 398 ZSTDSTAT_BUMP(zstd_stat_com_inval); 399 return (s_len); 400 } 401 402 ASSERT3U(d_len, >=, sizeof (*hdr)); 403 ASSERT3U(d_len, <=, s_len); 404 ASSERT3U(zstd_level, !=, 0); 405 406 cctx = ZSTD_createCCtx_advanced(zstd_malloc); 407 408 /* 409 * Out of kernel memory, gently fall through - this will disable 410 * compression in zio_compress_data 411 */ 412 if (!cctx) { 413 ZSTDSTAT_BUMP(zstd_stat_com_alloc_fail); 414 return (s_len); 415 } 416 417 /* Set the compression level */ 418 ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstd_level); 419 420 /* Use the "magicless" zstd header which saves us 4 header bytes */ 421 ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless); 422 423 /* 424 * Disable redundant checksum calculation and content size storage since 425 * this is already done by ZFS itself. 426 */ 427 ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0); 428 ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0); 429 430 c_len = ZSTD_compress2(cctx, 431 hdr->data, 432 d_len - sizeof (*hdr), 433 s_start, s_len); 434 435 ZSTD_freeCCtx(cctx); 436 437 /* Error in the compression routine, disable compression. */ 438 if (ZSTD_isError(c_len)) { 439 /* 440 * If we are aborting the compression because the saves are 441 * too small, that is not a failure. Everything else is a 442 * failure, so increment the compression failure counter. 443 */ 444 if (ZSTD_getErrorCode(c_len) != ZSTD_error_dstSize_tooSmall) { 445 ZSTDSTAT_BUMP(zstd_stat_com_fail); 446 } 447 return (s_len); 448 } 449 450 /* 451 * Encode the compressed buffer size at the start. We'll need this in 452 * decompression to counter the effects of padding which might be added 453 * to the compressed buffer and which, if unhandled, would confuse the 454 * hell out of our decompression function. 455 */ 456 hdr->c_len = BE_32(c_len); 457 458 /* 459 * Check version for overflow. 460 * The limit of 24 bits must not be exceeded. This allows a maximum 461 * version 1677.72.15 which we don't expect to be ever reached. 462 */ 463 ASSERT3U(ZSTD_VERSION_NUMBER, <=, 0xFFFFFF); 464 465 /* 466 * Encode the compression level as well. We may need to know the 467 * original compression level if compressed_arc is disabled, to match 468 * the compression settings to write this block to the L2ARC. 469 * 470 * Encode the actual level, so if the enum changes in the future, we 471 * will be compatible. 472 * 473 * The upper 24 bits store the ZSTD version to be able to provide 474 * future compatibility, since new versions might enhance the 475 * compression algorithm in a way, where the compressed data will 476 * change. 477 * 478 * As soon as such incompatibility occurs, handling code needs to be 479 * added, differentiating between the versions. 480 */ 481 zfs_set_hdrversion(hdr, ZSTD_VERSION_NUMBER); 482 zfs_set_hdrlevel(hdr, level); 483 hdr->raw_version_level = BE_32(hdr->raw_version_level); 484 485 return (c_len + sizeof (*hdr)); 486 } 487 488 /* Decompress block using zstd and return its stored level */ 489 int 490 zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len, 491 size_t d_len, uint8_t *level) 492 { 493 ZSTD_DCtx *dctx; 494 size_t result; 495 int16_t zstd_level; 496 uint32_t c_len; 497 const zfs_zstdhdr_t *hdr; 498 zfs_zstdhdr_t hdr_copy; 499 500 hdr = (const zfs_zstdhdr_t *)s_start; 501 c_len = BE_32(hdr->c_len); 502 503 /* 504 * Make a copy instead of directly converting the header, since we must 505 * not modify the original data that may be used again later. 506 */ 507 hdr_copy.raw_version_level = BE_32(hdr->raw_version_level); 508 uint8_t curlevel = zfs_get_hdrlevel(&hdr_copy); 509 510 /* 511 * NOTE: We ignore the ZSTD version for now. As soon as any 512 * incompatibility occurs, it has to be handled accordingly. 513 * The version can be accessed via `hdr_copy.version`. 514 */ 515 516 /* 517 * Convert and check the level 518 * An invalid level is a strong indicator for data corruption! In such 519 * case return an error so the upper layers can try to fix it. 520 */ 521 if (zstd_enum_to_level(curlevel, &zstd_level)) { 522 ZSTDSTAT_BUMP(zstd_stat_dec_inval); 523 return (1); 524 } 525 526 ASSERT3U(d_len, >=, s_len); 527 ASSERT3U(curlevel, !=, ZIO_COMPLEVEL_INHERIT); 528 529 /* Invalid compressed buffer size encoded at start */ 530 if (c_len + sizeof (*hdr) > s_len) { 531 ZSTDSTAT_BUMP(zstd_stat_dec_header_inval); 532 return (1); 533 } 534 535 dctx = ZSTD_createDCtx_advanced(zstd_dctx_malloc); 536 if (!dctx) { 537 ZSTDSTAT_BUMP(zstd_stat_dec_alloc_fail); 538 return (1); 539 } 540 541 /* Set header type to "magicless" */ 542 ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless); 543 544 /* Decompress the data and release the context */ 545 result = ZSTD_decompressDCtx(dctx, d_start, d_len, hdr->data, c_len); 546 ZSTD_freeDCtx(dctx); 547 548 /* 549 * Returns 0 on success (decompression function returned non-negative) 550 * and non-zero on failure (decompression function returned negative. 551 */ 552 if (ZSTD_isError(result)) { 553 ZSTDSTAT_BUMP(zstd_stat_dec_fail); 554 return (1); 555 } 556 557 if (level) { 558 *level = curlevel; 559 } 560 561 return (0); 562 } 563 564 /* Decompress datablock using zstd */ 565 int 566 zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, 567 int level __maybe_unused) 568 { 569 570 return (zfs_zstd_decompress_level(s_start, d_start, s_len, d_len, 571 NULL)); 572 } 573 574 /* Allocator for zstd compression context using mempool_allocator */ 575 static void * 576 zstd_alloc(void *opaque __maybe_unused, size_t size) 577 { 578 size_t nbytes = sizeof (struct zstd_kmem) + size; 579 struct zstd_kmem *z = NULL; 580 581 z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_cctx, nbytes); 582 583 if (!z) { 584 ZSTDSTAT_BUMP(zstd_stat_alloc_fail); 585 return (NULL); 586 } 587 588 return ((void*)z + (sizeof (struct zstd_kmem))); 589 } 590 591 /* 592 * Allocator for zstd decompression context using mempool_allocator with 593 * fallback to reserved memory if allocation fails 594 */ 595 static void * 596 zstd_dctx_alloc(void *opaque __maybe_unused, size_t size) 597 { 598 size_t nbytes = sizeof (struct zstd_kmem) + size; 599 struct zstd_kmem *z = NULL; 600 enum zstd_kmem_type type = ZSTD_KMEM_DEFAULT; 601 602 z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_dctx, nbytes); 603 if (!z) { 604 /* Try harder, decompression shall not fail */ 605 z = vmem_alloc(nbytes, KM_SLEEP); 606 if (z) { 607 z->pool = NULL; 608 } 609 ZSTDSTAT_BUMP(zstd_stat_alloc_fail); 610 } else { 611 return ((void*)z + (sizeof (struct zstd_kmem))); 612 } 613 614 /* Fallback if everything fails */ 615 if (!z) { 616 /* 617 * Barrier since we only can handle it in a single thread. All 618 * other following threads need to wait here until decompression 619 * is completed. zstd_free will release this barrier later. 620 */ 621 mutex_enter(&zstd_dctx_fallback.barrier); 622 623 z = zstd_dctx_fallback.mem; 624 type = ZSTD_KMEM_DCTX; 625 ZSTDSTAT_BUMP(zstd_stat_alloc_fallback); 626 } 627 628 /* Allocation should always be successful */ 629 if (!z) { 630 return (NULL); 631 } 632 633 z->kmem_type = type; 634 z->kmem_size = nbytes; 635 636 return ((void*)z + (sizeof (struct zstd_kmem))); 637 } 638 639 /* Free allocated memory by its specific type */ 640 static void 641 zstd_free(void *opaque __maybe_unused, void *ptr) 642 { 643 struct zstd_kmem *z = (ptr - sizeof (struct zstd_kmem)); 644 enum zstd_kmem_type type; 645 646 ASSERT3U(z->kmem_type, <, ZSTD_KMEM_COUNT); 647 ASSERT3U(z->kmem_type, >, ZSTD_KMEM_UNKNOWN); 648 649 type = z->kmem_type; 650 switch (type) { 651 case ZSTD_KMEM_DEFAULT: 652 vmem_free(z, z->kmem_size); 653 break; 654 case ZSTD_KMEM_POOL: 655 zstd_mempool_free(z); 656 break; 657 case ZSTD_KMEM_DCTX: 658 mutex_exit(&zstd_dctx_fallback.barrier); 659 break; 660 default: 661 break; 662 } 663 } 664 665 /* Allocate fallback memory to ensure safe decompression */ 666 static void __init 667 create_fallback_mem(struct zstd_fallback_mem *mem, size_t size) 668 { 669 mem->mem_size = size; 670 mem->mem = vmem_zalloc(mem->mem_size, KM_SLEEP); 671 mutex_init(&mem->barrier, NULL, MUTEX_DEFAULT, NULL); 672 } 673 674 /* Initialize memory pool barrier mutexes */ 675 static void __init 676 zstd_mempool_init(void) 677 { 678 zstd_mempool_cctx = (struct zstd_pool *) 679 kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); 680 zstd_mempool_dctx = (struct zstd_pool *) 681 kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); 682 683 for (int i = 0; i < ZSTD_POOL_MAX; i++) { 684 mutex_init(&zstd_mempool_cctx[i].barrier, NULL, 685 MUTEX_DEFAULT, NULL); 686 mutex_init(&zstd_mempool_dctx[i].barrier, NULL, 687 MUTEX_DEFAULT, NULL); 688 } 689 } 690 691 /* Initialize zstd-related memory handling */ 692 static int __init 693 zstd_meminit(void) 694 { 695 zstd_mempool_init(); 696 697 /* 698 * Estimate the size of the fallback decompression context. 699 * The expected size on x64 with current ZSTD should be about 160 KB. 700 */ 701 create_fallback_mem(&zstd_dctx_fallback, 702 P2ROUNDUP(ZSTD_estimateDCtxSize() + sizeof (struct zstd_kmem), 703 PAGESIZE)); 704 705 return (0); 706 } 707 708 /* Release object from pool and free memory */ 709 static void __exit 710 release_pool(struct zstd_pool *pool) 711 { 712 mutex_destroy(&pool->barrier); 713 vmem_free(pool->mem, pool->size); 714 pool->mem = NULL; 715 pool->size = 0; 716 } 717 718 /* Release memory pool objects */ 719 static void __exit 720 zstd_mempool_deinit(void) 721 { 722 for (int i = 0; i < ZSTD_POOL_MAX; i++) { 723 release_pool(&zstd_mempool_cctx[i]); 724 release_pool(&zstd_mempool_dctx[i]); 725 } 726 727 kmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); 728 kmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); 729 zstd_mempool_dctx = NULL; 730 zstd_mempool_cctx = NULL; 731 } 732 733 /* release unused memory from pool */ 734 735 void 736 zfs_zstd_cache_reap_now(void) 737 { 738 739 /* 740 * Short-circuit if there are no buffers to begin with. 741 */ 742 if (ZSTDSTAT(zstd_stat_buffers) == 0) 743 return; 744 745 /* 746 * calling alloc with zero size seeks 747 * and releases old unused objects 748 */ 749 zstd_mempool_reap(zstd_mempool_cctx); 750 zstd_mempool_reap(zstd_mempool_dctx); 751 } 752 753 extern int __init 754 zstd_init(void) 755 { 756 /* Set pool size by using maximum sane thread count * 4 */ 757 pool_count = (boot_ncpus * 4); 758 zstd_meminit(); 759 760 /* Initialize kstat */ 761 zstd_ksp = kstat_create("zfs", 0, "zstd", "misc", 762 KSTAT_TYPE_NAMED, sizeof (zstd_stats) / sizeof (kstat_named_t), 763 KSTAT_FLAG_VIRTUAL); 764 if (zstd_ksp != NULL) { 765 zstd_ksp->ks_data = &zstd_stats; 766 kstat_install(zstd_ksp); 767 } 768 769 return (0); 770 } 771 772 extern void __exit 773 zstd_fini(void) 774 { 775 /* Deinitialize kstat */ 776 if (zstd_ksp != NULL) { 777 kstat_delete(zstd_ksp); 778 zstd_ksp = NULL; 779 } 780 781 /* Release fallback memory */ 782 vmem_free(zstd_dctx_fallback.mem, zstd_dctx_fallback.mem_size); 783 mutex_destroy(&zstd_dctx_fallback.barrier); 784 785 /* Deinit memory pool */ 786 zstd_mempool_deinit(); 787 } 788 789 #if defined(_KERNEL) 790 module_init(zstd_init); 791 module_exit(zstd_fini); 792 793 ZFS_MODULE_DESCRIPTION("ZSTD Compression for ZFS"); 794 ZFS_MODULE_LICENSE("Dual BSD/GPL"); 795 ZFS_MODULE_VERSION(ZSTD_VERSION_STRING "a"); 796 797 EXPORT_SYMBOL(zfs_zstd_compress); 798 EXPORT_SYMBOL(zfs_zstd_decompress_level); 799 EXPORT_SYMBOL(zfs_zstd_decompress); 800 EXPORT_SYMBOL(zfs_zstd_cache_reap_now); 801 #endif 802