1 /* 2 * Copyright (c) Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 /*-************************************* 12 * Dependencies 13 ***************************************/ 14 #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ 15 #include "../common/mem.h" 16 #include "hist.h" /* HIST_countFast_wksp */ 17 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ 18 #include "../common/fse.h" 19 #define HUF_STATIC_LINKING_ONLY 20 #include "../common/huf.h" 21 #include "zstd_compress_internal.h" 22 #include "zstd_compress_sequences.h" 23 #include "zstd_compress_literals.h" 24 #include "zstd_fast.h" 25 #include "zstd_double_fast.h" 26 #include "zstd_lazy.h" 27 #include "zstd_opt.h" 28 #include "zstd_ldm.h" 29 #include "zstd_compress_superblock.h" 30 31 /* *************************************************************** 32 * Tuning parameters 33 *****************************************************************/ 34 /*! 35 * COMPRESS_HEAPMODE : 36 * Select how default decompression function ZSTD_compress() allocates its context, 37 * on stack (0, default), or into heap (1). 38 * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected. 39 */ 40 #ifndef ZSTD_COMPRESS_HEAPMODE 41 # define ZSTD_COMPRESS_HEAPMODE 0 42 #endif 43 44 /*! 45 * ZSTD_HASHLOG3_MAX : 46 * Maximum size of the hash table dedicated to find 3-bytes matches, 47 * in log format, aka 17 => 1 << 17 == 128Ki positions. 48 * This structure is only used in zstd_opt. 49 * Since allocation is centralized for all strategies, it has to be known here. 50 * The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3, 51 * so that zstd_opt.c doesn't need to know about this constant. 52 */ 53 #ifndef ZSTD_HASHLOG3_MAX 54 # define ZSTD_HASHLOG3_MAX 17 55 #endif 56 57 /*-************************************* 58 * Helper functions 59 ***************************************/ 60 /* ZSTD_compressBound() 61 * Note that the result from this function is only compatible with the "normal" 62 * full-block strategy. 63 * When there are a lot of small blocks due to frequent flush in streaming mode 64 * the overhead of headers can make the compressed data to be larger than the 65 * return value of ZSTD_compressBound(). 66 */ 67 size_t ZSTD_compressBound(size_t srcSize) { 68 return ZSTD_COMPRESSBOUND(srcSize); 69 } 70 71 72 /*-************************************* 73 * Context memory management 74 ***************************************/ 75 struct ZSTD_CDict_s { 76 const void* dictContent; 77 size_t dictContentSize; 78 ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ 79 U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ 80 ZSTD_cwksp workspace; 81 ZSTD_matchState_t matchState; 82 ZSTD_compressedBlockState_t cBlockState; 83 ZSTD_customMem customMem; 84 U32 dictID; 85 int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ 86 ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use 87 * row-based matchfinder. Unless the cdict is reloaded, we will use 88 * the same greedy/lazy matchfinder at compression time. 89 */ 90 }; /* typedef'd to ZSTD_CDict within "zstd.h" */ 91 92 ZSTD_CCtx* ZSTD_createCCtx(void) 93 { 94 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); 95 } 96 97 static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) 98 { 99 assert(cctx != NULL); 100 ZSTD_memset(cctx, 0, sizeof(*cctx)); 101 cctx->customMem = memManager; 102 cctx->bmi2 = ZSTD_cpuSupportsBmi2(); 103 { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); 104 assert(!ZSTD_isError(err)); 105 (void)err; 106 } 107 } 108 109 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) 110 { 111 ZSTD_STATIC_ASSERT(zcss_init==0); 112 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); 113 if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; 114 { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem); 115 if (!cctx) return NULL; 116 ZSTD_initCCtx(cctx, customMem); 117 return cctx; 118 } 119 } 120 121 ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) 122 { 123 ZSTD_cwksp ws; 124 ZSTD_CCtx* cctx; 125 if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ 126 if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ 127 ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); 128 129 cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); 130 if (cctx == NULL) return NULL; 131 132 ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx)); 133 ZSTD_cwksp_move(&cctx->workspace, &ws); 134 cctx->staticSize = workspaceSize; 135 136 /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ 137 if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; 138 cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); 139 cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); 140 cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE); 141 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); 142 return cctx; 143 } 144 145 /** 146 * Clears and frees all of the dictionaries in the CCtx. 147 */ 148 static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) 149 { 150 ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem); 151 ZSTD_freeCDict(cctx->localDict.cdict); 152 ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict)); 153 ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); 154 cctx->cdict = NULL; 155 } 156 157 static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) 158 { 159 size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; 160 size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); 161 return bufferSize + cdictSize; 162 } 163 164 static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) 165 { 166 assert(cctx != NULL); 167 assert(cctx->staticSize == 0); 168 ZSTD_clearAllDicts(cctx); 169 #ifdef ZSTD_MULTITHREAD 170 ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; 171 #endif 172 ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); 173 } 174 175 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) 176 { 177 if (cctx==NULL) return 0; /* support free on NULL */ 178 RETURN_ERROR_IF(cctx->staticSize, memory_allocation, 179 "not compatible with static CCtx"); 180 { 181 int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); 182 ZSTD_freeCCtxContent(cctx); 183 if (!cctxInWorkspace) { 184 ZSTD_customFree(cctx, cctx->customMem); 185 } 186 } 187 return 0; 188 } 189 190 191 static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) 192 { 193 #ifdef ZSTD_MULTITHREAD 194 return ZSTDMT_sizeof_CCtx(cctx->mtctx); 195 #else 196 (void)cctx; 197 return 0; 198 #endif 199 } 200 201 202 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) 203 { 204 if (cctx==NULL) return 0; /* support sizeof on NULL */ 205 /* cctx may be in the workspace */ 206 return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) 207 + ZSTD_cwksp_sizeof(&cctx->workspace) 208 + ZSTD_sizeof_localDict(cctx->localDict) 209 + ZSTD_sizeof_mtctx(cctx); 210 } 211 212 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) 213 { 214 return ZSTD_sizeof_CCtx(zcs); /* same object */ 215 } 216 217 /* private API call, for dictBuilder only */ 218 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } 219 220 /* Returns true if the strategy supports using a row based matchfinder */ 221 static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) { 222 return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2); 223 } 224 225 /* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder 226 * for this compression. 227 */ 228 static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) { 229 assert(mode != ZSTD_ps_auto); 230 return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable); 231 } 232 233 /* Returns row matchfinder usage given an initial mode and cParams */ 234 static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode, 235 const ZSTD_compressionParameters* const cParams) { 236 #if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON) 237 int const kHasSIMD128 = 1; 238 #else 239 int const kHasSIMD128 = 0; 240 #endif 241 if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */ 242 mode = ZSTD_ps_disable; 243 if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode; 244 if (kHasSIMD128) { 245 if (cParams->windowLog > 14) mode = ZSTD_ps_enable; 246 } else { 247 if (cParams->windowLog > 17) mode = ZSTD_ps_enable; 248 } 249 return mode; 250 } 251 252 /* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */ 253 static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode, 254 const ZSTD_compressionParameters* const cParams) { 255 if (mode != ZSTD_ps_auto) return mode; 256 return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable; 257 } 258 259 /* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */ 260 static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, 261 const ZSTD_paramSwitch_e useRowMatchFinder, 262 const U32 forDDSDict) { 263 assert(useRowMatchFinder != ZSTD_ps_auto); 264 /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate. 265 * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder. 266 */ 267 return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder)); 268 } 269 270 /* Returns 1 if compression parameters are such that we should 271 * enable long distance matching (wlog >= 27, strategy >= btopt). 272 * Returns 0 otherwise. 273 */ 274 static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, 275 const ZSTD_compressionParameters* const cParams) { 276 if (mode != ZSTD_ps_auto) return mode; 277 return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable; 278 } 279 280 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( 281 ZSTD_compressionParameters cParams) 282 { 283 ZSTD_CCtx_params cctxParams; 284 /* should not matter, as all cParams are presumed properly defined */ 285 ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT); 286 cctxParams.cParams = cParams; 287 288 /* Adjust advanced params according to cParams */ 289 cctxParams.ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams.ldmParams.enableLdm, &cParams); 290 if (cctxParams.ldmParams.enableLdm == ZSTD_ps_enable) { 291 ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams); 292 assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); 293 assert(cctxParams.ldmParams.hashRateLog < 32); 294 } 295 cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams); 296 cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); 297 assert(!ZSTD_checkCParams(cParams)); 298 return cctxParams; 299 } 300 301 static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( 302 ZSTD_customMem customMem) 303 { 304 ZSTD_CCtx_params* params; 305 if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; 306 params = (ZSTD_CCtx_params*)ZSTD_customCalloc( 307 sizeof(ZSTD_CCtx_params), customMem); 308 if (!params) { return NULL; } 309 ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); 310 params->customMem = customMem; 311 return params; 312 } 313 314 ZSTD_CCtx_params* ZSTD_createCCtxParams(void) 315 { 316 return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); 317 } 318 319 size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) 320 { 321 if (params == NULL) { return 0; } 322 ZSTD_customFree(params, params->customMem); 323 return 0; 324 } 325 326 size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) 327 { 328 return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); 329 } 330 331 size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { 332 RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); 333 ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); 334 cctxParams->compressionLevel = compressionLevel; 335 cctxParams->fParams.contentSizeFlag = 1; 336 return 0; 337 } 338 339 #define ZSTD_NO_CLEVEL 0 340 341 /** 342 * Initializes the cctxParams from params and compressionLevel. 343 * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. 344 */ 345 static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) 346 { 347 assert(!ZSTD_checkCParams(params->cParams)); 348 ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); 349 cctxParams->cParams = params->cParams; 350 cctxParams->fParams = params->fParams; 351 /* Should not matter, as all cParams are presumed properly defined. 352 * But, set it for tracing anyway. 353 */ 354 cctxParams->compressionLevel = compressionLevel; 355 cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams); 356 cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams); 357 cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams); 358 DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d", 359 cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm); 360 } 361 362 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) 363 { 364 RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); 365 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); 366 ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL); 367 return 0; 368 } 369 370 /** 371 * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. 372 * @param param Validated zstd parameters. 373 */ 374 static void ZSTD_CCtxParams_setZstdParams( 375 ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) 376 { 377 assert(!ZSTD_checkCParams(params->cParams)); 378 cctxParams->cParams = params->cParams; 379 cctxParams->fParams = params->fParams; 380 /* Should not matter, as all cParams are presumed properly defined. 381 * But, set it for tracing anyway. 382 */ 383 cctxParams->compressionLevel = ZSTD_NO_CLEVEL; 384 } 385 386 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) 387 { 388 ZSTD_bounds bounds = { 0, 0, 0 }; 389 390 switch(param) 391 { 392 case ZSTD_c_compressionLevel: 393 bounds.lowerBound = ZSTD_minCLevel(); 394 bounds.upperBound = ZSTD_maxCLevel(); 395 return bounds; 396 397 case ZSTD_c_windowLog: 398 bounds.lowerBound = ZSTD_WINDOWLOG_MIN; 399 bounds.upperBound = ZSTD_WINDOWLOG_MAX; 400 return bounds; 401 402 case ZSTD_c_hashLog: 403 bounds.lowerBound = ZSTD_HASHLOG_MIN; 404 bounds.upperBound = ZSTD_HASHLOG_MAX; 405 return bounds; 406 407 case ZSTD_c_chainLog: 408 bounds.lowerBound = ZSTD_CHAINLOG_MIN; 409 bounds.upperBound = ZSTD_CHAINLOG_MAX; 410 return bounds; 411 412 case ZSTD_c_searchLog: 413 bounds.lowerBound = ZSTD_SEARCHLOG_MIN; 414 bounds.upperBound = ZSTD_SEARCHLOG_MAX; 415 return bounds; 416 417 case ZSTD_c_minMatch: 418 bounds.lowerBound = ZSTD_MINMATCH_MIN; 419 bounds.upperBound = ZSTD_MINMATCH_MAX; 420 return bounds; 421 422 case ZSTD_c_targetLength: 423 bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; 424 bounds.upperBound = ZSTD_TARGETLENGTH_MAX; 425 return bounds; 426 427 case ZSTD_c_strategy: 428 bounds.lowerBound = ZSTD_STRATEGY_MIN; 429 bounds.upperBound = ZSTD_STRATEGY_MAX; 430 return bounds; 431 432 case ZSTD_c_contentSizeFlag: 433 bounds.lowerBound = 0; 434 bounds.upperBound = 1; 435 return bounds; 436 437 case ZSTD_c_checksumFlag: 438 bounds.lowerBound = 0; 439 bounds.upperBound = 1; 440 return bounds; 441 442 case ZSTD_c_dictIDFlag: 443 bounds.lowerBound = 0; 444 bounds.upperBound = 1; 445 return bounds; 446 447 case ZSTD_c_nbWorkers: 448 bounds.lowerBound = 0; 449 #ifdef ZSTD_MULTITHREAD 450 bounds.upperBound = ZSTDMT_NBWORKERS_MAX; 451 #else 452 bounds.upperBound = 0; 453 #endif 454 return bounds; 455 456 case ZSTD_c_jobSize: 457 bounds.lowerBound = 0; 458 #ifdef ZSTD_MULTITHREAD 459 bounds.upperBound = ZSTDMT_JOBSIZE_MAX; 460 #else 461 bounds.upperBound = 0; 462 #endif 463 return bounds; 464 465 case ZSTD_c_overlapLog: 466 #ifdef ZSTD_MULTITHREAD 467 bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; 468 bounds.upperBound = ZSTD_OVERLAPLOG_MAX; 469 #else 470 bounds.lowerBound = 0; 471 bounds.upperBound = 0; 472 #endif 473 return bounds; 474 475 case ZSTD_c_enableDedicatedDictSearch: 476 bounds.lowerBound = 0; 477 bounds.upperBound = 1; 478 return bounds; 479 480 case ZSTD_c_enableLongDistanceMatching: 481 bounds.lowerBound = 0; 482 bounds.upperBound = 1; 483 return bounds; 484 485 case ZSTD_c_ldmHashLog: 486 bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; 487 bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; 488 return bounds; 489 490 case ZSTD_c_ldmMinMatch: 491 bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; 492 bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; 493 return bounds; 494 495 case ZSTD_c_ldmBucketSizeLog: 496 bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; 497 bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; 498 return bounds; 499 500 case ZSTD_c_ldmHashRateLog: 501 bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; 502 bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; 503 return bounds; 504 505 /* experimental parameters */ 506 case ZSTD_c_rsyncable: 507 bounds.lowerBound = 0; 508 bounds.upperBound = 1; 509 return bounds; 510 511 case ZSTD_c_forceMaxWindow : 512 bounds.lowerBound = 0; 513 bounds.upperBound = 1; 514 return bounds; 515 516 case ZSTD_c_format: 517 ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); 518 bounds.lowerBound = ZSTD_f_zstd1; 519 bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ 520 return bounds; 521 522 case ZSTD_c_forceAttachDict: 523 ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad); 524 bounds.lowerBound = ZSTD_dictDefaultAttach; 525 bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ 526 return bounds; 527 528 case ZSTD_c_literalCompressionMode: 529 ZSTD_STATIC_ASSERT(ZSTD_ps_auto < ZSTD_ps_enable && ZSTD_ps_enable < ZSTD_ps_disable); 530 bounds.lowerBound = (int)ZSTD_ps_auto; 531 bounds.upperBound = (int)ZSTD_ps_disable; 532 return bounds; 533 534 case ZSTD_c_targetCBlockSize: 535 bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; 536 bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; 537 return bounds; 538 539 case ZSTD_c_srcSizeHint: 540 bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; 541 bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; 542 return bounds; 543 544 case ZSTD_c_stableInBuffer: 545 case ZSTD_c_stableOutBuffer: 546 bounds.lowerBound = (int)ZSTD_bm_buffered; 547 bounds.upperBound = (int)ZSTD_bm_stable; 548 return bounds; 549 550 case ZSTD_c_blockDelimiters: 551 bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters; 552 bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters; 553 return bounds; 554 555 case ZSTD_c_validateSequences: 556 bounds.lowerBound = 0; 557 bounds.upperBound = 1; 558 return bounds; 559 560 case ZSTD_c_useBlockSplitter: 561 bounds.lowerBound = (int)ZSTD_ps_auto; 562 bounds.upperBound = (int)ZSTD_ps_disable; 563 return bounds; 564 565 case ZSTD_c_useRowMatchFinder: 566 bounds.lowerBound = (int)ZSTD_ps_auto; 567 bounds.upperBound = (int)ZSTD_ps_disable; 568 return bounds; 569 570 case ZSTD_c_deterministicRefPrefix: 571 bounds.lowerBound = 0; 572 bounds.upperBound = 1; 573 return bounds; 574 575 default: 576 bounds.error = ERROR(parameter_unsupported); 577 return bounds; 578 } 579 } 580 581 /* ZSTD_cParam_clampBounds: 582 * Clamps the value into the bounded range. 583 */ 584 static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) 585 { 586 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); 587 if (ZSTD_isError(bounds.error)) return bounds.error; 588 if (*value < bounds.lowerBound) *value = bounds.lowerBound; 589 if (*value > bounds.upperBound) *value = bounds.upperBound; 590 return 0; 591 } 592 593 #define BOUNDCHECK(cParam, val) { \ 594 RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ 595 parameter_outOfBound, "Param out of bounds"); \ 596 } 597 598 599 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) 600 { 601 switch(param) 602 { 603 case ZSTD_c_compressionLevel: 604 case ZSTD_c_hashLog: 605 case ZSTD_c_chainLog: 606 case ZSTD_c_searchLog: 607 case ZSTD_c_minMatch: 608 case ZSTD_c_targetLength: 609 case ZSTD_c_strategy: 610 return 1; 611 612 case ZSTD_c_format: 613 case ZSTD_c_windowLog: 614 case ZSTD_c_contentSizeFlag: 615 case ZSTD_c_checksumFlag: 616 case ZSTD_c_dictIDFlag: 617 case ZSTD_c_forceMaxWindow : 618 case ZSTD_c_nbWorkers: 619 case ZSTD_c_jobSize: 620 case ZSTD_c_overlapLog: 621 case ZSTD_c_rsyncable: 622 case ZSTD_c_enableDedicatedDictSearch: 623 case ZSTD_c_enableLongDistanceMatching: 624 case ZSTD_c_ldmHashLog: 625 case ZSTD_c_ldmMinMatch: 626 case ZSTD_c_ldmBucketSizeLog: 627 case ZSTD_c_ldmHashRateLog: 628 case ZSTD_c_forceAttachDict: 629 case ZSTD_c_literalCompressionMode: 630 case ZSTD_c_targetCBlockSize: 631 case ZSTD_c_srcSizeHint: 632 case ZSTD_c_stableInBuffer: 633 case ZSTD_c_stableOutBuffer: 634 case ZSTD_c_blockDelimiters: 635 case ZSTD_c_validateSequences: 636 case ZSTD_c_useBlockSplitter: 637 case ZSTD_c_useRowMatchFinder: 638 case ZSTD_c_deterministicRefPrefix: 639 default: 640 return 0; 641 } 642 } 643 644 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) 645 { 646 DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); 647 if (cctx->streamStage != zcss_init) { 648 if (ZSTD_isUpdateAuthorized(param)) { 649 cctx->cParamsChanged = 1; 650 } else { 651 RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); 652 } } 653 654 switch(param) 655 { 656 case ZSTD_c_nbWorkers: 657 RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, 658 "MT not compatible with static alloc"); 659 break; 660 661 case ZSTD_c_compressionLevel: 662 case ZSTD_c_windowLog: 663 case ZSTD_c_hashLog: 664 case ZSTD_c_chainLog: 665 case ZSTD_c_searchLog: 666 case ZSTD_c_minMatch: 667 case ZSTD_c_targetLength: 668 case ZSTD_c_strategy: 669 case ZSTD_c_ldmHashRateLog: 670 case ZSTD_c_format: 671 case ZSTD_c_contentSizeFlag: 672 case ZSTD_c_checksumFlag: 673 case ZSTD_c_dictIDFlag: 674 case ZSTD_c_forceMaxWindow: 675 case ZSTD_c_forceAttachDict: 676 case ZSTD_c_literalCompressionMode: 677 case ZSTD_c_jobSize: 678 case ZSTD_c_overlapLog: 679 case ZSTD_c_rsyncable: 680 case ZSTD_c_enableDedicatedDictSearch: 681 case ZSTD_c_enableLongDistanceMatching: 682 case ZSTD_c_ldmHashLog: 683 case ZSTD_c_ldmMinMatch: 684 case ZSTD_c_ldmBucketSizeLog: 685 case ZSTD_c_targetCBlockSize: 686 case ZSTD_c_srcSizeHint: 687 case ZSTD_c_stableInBuffer: 688 case ZSTD_c_stableOutBuffer: 689 case ZSTD_c_blockDelimiters: 690 case ZSTD_c_validateSequences: 691 case ZSTD_c_useBlockSplitter: 692 case ZSTD_c_useRowMatchFinder: 693 case ZSTD_c_deterministicRefPrefix: 694 break; 695 696 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 697 } 698 return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); 699 } 700 701 size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, 702 ZSTD_cParameter param, int value) 703 { 704 DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); 705 switch(param) 706 { 707 case ZSTD_c_format : 708 BOUNDCHECK(ZSTD_c_format, value); 709 CCtxParams->format = (ZSTD_format_e)value; 710 return (size_t)CCtxParams->format; 711 712 case ZSTD_c_compressionLevel : { 713 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); 714 if (value == 0) 715 CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ 716 else 717 CCtxParams->compressionLevel = value; 718 if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; 719 return 0; /* return type (size_t) cannot represent negative values */ 720 } 721 722 case ZSTD_c_windowLog : 723 if (value!=0) /* 0 => use default */ 724 BOUNDCHECK(ZSTD_c_windowLog, value); 725 CCtxParams->cParams.windowLog = (U32)value; 726 return CCtxParams->cParams.windowLog; 727 728 case ZSTD_c_hashLog : 729 if (value!=0) /* 0 => use default */ 730 BOUNDCHECK(ZSTD_c_hashLog, value); 731 CCtxParams->cParams.hashLog = (U32)value; 732 return CCtxParams->cParams.hashLog; 733 734 case ZSTD_c_chainLog : 735 if (value!=0) /* 0 => use default */ 736 BOUNDCHECK(ZSTD_c_chainLog, value); 737 CCtxParams->cParams.chainLog = (U32)value; 738 return CCtxParams->cParams.chainLog; 739 740 case ZSTD_c_searchLog : 741 if (value!=0) /* 0 => use default */ 742 BOUNDCHECK(ZSTD_c_searchLog, value); 743 CCtxParams->cParams.searchLog = (U32)value; 744 return (size_t)value; 745 746 case ZSTD_c_minMatch : 747 if (value!=0) /* 0 => use default */ 748 BOUNDCHECK(ZSTD_c_minMatch, value); 749 CCtxParams->cParams.minMatch = value; 750 return CCtxParams->cParams.minMatch; 751 752 case ZSTD_c_targetLength : 753 BOUNDCHECK(ZSTD_c_targetLength, value); 754 CCtxParams->cParams.targetLength = value; 755 return CCtxParams->cParams.targetLength; 756 757 case ZSTD_c_strategy : 758 if (value!=0) /* 0 => use default */ 759 BOUNDCHECK(ZSTD_c_strategy, value); 760 CCtxParams->cParams.strategy = (ZSTD_strategy)value; 761 return (size_t)CCtxParams->cParams.strategy; 762 763 case ZSTD_c_contentSizeFlag : 764 /* Content size written in frame header _when known_ (default:1) */ 765 DEBUGLOG(4, "set content size flag = %u", (value!=0)); 766 CCtxParams->fParams.contentSizeFlag = value != 0; 767 return CCtxParams->fParams.contentSizeFlag; 768 769 case ZSTD_c_checksumFlag : 770 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ 771 CCtxParams->fParams.checksumFlag = value != 0; 772 return CCtxParams->fParams.checksumFlag; 773 774 case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ 775 DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); 776 CCtxParams->fParams.noDictIDFlag = !value; 777 return !CCtxParams->fParams.noDictIDFlag; 778 779 case ZSTD_c_forceMaxWindow : 780 CCtxParams->forceWindow = (value != 0); 781 return CCtxParams->forceWindow; 782 783 case ZSTD_c_forceAttachDict : { 784 const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; 785 BOUNDCHECK(ZSTD_c_forceAttachDict, pref); 786 CCtxParams->attachDictPref = pref; 787 return CCtxParams->attachDictPref; 788 } 789 790 case ZSTD_c_literalCompressionMode : { 791 const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value; 792 BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); 793 CCtxParams->literalCompressionMode = lcm; 794 return CCtxParams->literalCompressionMode; 795 } 796 797 case ZSTD_c_nbWorkers : 798 #ifndef ZSTD_MULTITHREAD 799 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 800 return 0; 801 #else 802 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); 803 CCtxParams->nbWorkers = value; 804 return CCtxParams->nbWorkers; 805 #endif 806 807 case ZSTD_c_jobSize : 808 #ifndef ZSTD_MULTITHREAD 809 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 810 return 0; 811 #else 812 /* Adjust to the minimum non-default value. */ 813 if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) 814 value = ZSTDMT_JOBSIZE_MIN; 815 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); 816 assert(value >= 0); 817 CCtxParams->jobSize = value; 818 return CCtxParams->jobSize; 819 #endif 820 821 case ZSTD_c_overlapLog : 822 #ifndef ZSTD_MULTITHREAD 823 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 824 return 0; 825 #else 826 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); 827 CCtxParams->overlapLog = value; 828 return CCtxParams->overlapLog; 829 #endif 830 831 case ZSTD_c_rsyncable : 832 #ifndef ZSTD_MULTITHREAD 833 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 834 return 0; 835 #else 836 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); 837 CCtxParams->rsyncable = value; 838 return CCtxParams->rsyncable; 839 #endif 840 841 case ZSTD_c_enableDedicatedDictSearch : 842 CCtxParams->enableDedicatedDictSearch = (value!=0); 843 return CCtxParams->enableDedicatedDictSearch; 844 845 case ZSTD_c_enableLongDistanceMatching : 846 CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value; 847 return CCtxParams->ldmParams.enableLdm; 848 849 case ZSTD_c_ldmHashLog : 850 if (value!=0) /* 0 ==> auto */ 851 BOUNDCHECK(ZSTD_c_ldmHashLog, value); 852 CCtxParams->ldmParams.hashLog = value; 853 return CCtxParams->ldmParams.hashLog; 854 855 case ZSTD_c_ldmMinMatch : 856 if (value!=0) /* 0 ==> default */ 857 BOUNDCHECK(ZSTD_c_ldmMinMatch, value); 858 CCtxParams->ldmParams.minMatchLength = value; 859 return CCtxParams->ldmParams.minMatchLength; 860 861 case ZSTD_c_ldmBucketSizeLog : 862 if (value!=0) /* 0 ==> default */ 863 BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); 864 CCtxParams->ldmParams.bucketSizeLog = value; 865 return CCtxParams->ldmParams.bucketSizeLog; 866 867 case ZSTD_c_ldmHashRateLog : 868 if (value!=0) /* 0 ==> default */ 869 BOUNDCHECK(ZSTD_c_ldmHashRateLog, value); 870 CCtxParams->ldmParams.hashRateLog = value; 871 return CCtxParams->ldmParams.hashRateLog; 872 873 case ZSTD_c_targetCBlockSize : 874 if (value!=0) /* 0 ==> default */ 875 BOUNDCHECK(ZSTD_c_targetCBlockSize, value); 876 CCtxParams->targetCBlockSize = value; 877 return CCtxParams->targetCBlockSize; 878 879 case ZSTD_c_srcSizeHint : 880 if (value!=0) /* 0 ==> default */ 881 BOUNDCHECK(ZSTD_c_srcSizeHint, value); 882 CCtxParams->srcSizeHint = value; 883 return CCtxParams->srcSizeHint; 884 885 case ZSTD_c_stableInBuffer: 886 BOUNDCHECK(ZSTD_c_stableInBuffer, value); 887 CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value; 888 return CCtxParams->inBufferMode; 889 890 case ZSTD_c_stableOutBuffer: 891 BOUNDCHECK(ZSTD_c_stableOutBuffer, value); 892 CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value; 893 return CCtxParams->outBufferMode; 894 895 case ZSTD_c_blockDelimiters: 896 BOUNDCHECK(ZSTD_c_blockDelimiters, value); 897 CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; 898 return CCtxParams->blockDelimiters; 899 900 case ZSTD_c_validateSequences: 901 BOUNDCHECK(ZSTD_c_validateSequences, value); 902 CCtxParams->validateSequences = value; 903 return CCtxParams->validateSequences; 904 905 case ZSTD_c_useBlockSplitter: 906 BOUNDCHECK(ZSTD_c_useBlockSplitter, value); 907 CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value; 908 return CCtxParams->useBlockSplitter; 909 910 case ZSTD_c_useRowMatchFinder: 911 BOUNDCHECK(ZSTD_c_useRowMatchFinder, value); 912 CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value; 913 return CCtxParams->useRowMatchFinder; 914 915 case ZSTD_c_deterministicRefPrefix: 916 BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value); 917 CCtxParams->deterministicRefPrefix = !!value; 918 return CCtxParams->deterministicRefPrefix; 919 920 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 921 } 922 } 923 924 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value) 925 { 926 return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); 927 } 928 929 size_t ZSTD_CCtxParams_getParameter( 930 ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value) 931 { 932 switch(param) 933 { 934 case ZSTD_c_format : 935 *value = CCtxParams->format; 936 break; 937 case ZSTD_c_compressionLevel : 938 *value = CCtxParams->compressionLevel; 939 break; 940 case ZSTD_c_windowLog : 941 *value = (int)CCtxParams->cParams.windowLog; 942 break; 943 case ZSTD_c_hashLog : 944 *value = (int)CCtxParams->cParams.hashLog; 945 break; 946 case ZSTD_c_chainLog : 947 *value = (int)CCtxParams->cParams.chainLog; 948 break; 949 case ZSTD_c_searchLog : 950 *value = CCtxParams->cParams.searchLog; 951 break; 952 case ZSTD_c_minMatch : 953 *value = CCtxParams->cParams.minMatch; 954 break; 955 case ZSTD_c_targetLength : 956 *value = CCtxParams->cParams.targetLength; 957 break; 958 case ZSTD_c_strategy : 959 *value = (unsigned)CCtxParams->cParams.strategy; 960 break; 961 case ZSTD_c_contentSizeFlag : 962 *value = CCtxParams->fParams.contentSizeFlag; 963 break; 964 case ZSTD_c_checksumFlag : 965 *value = CCtxParams->fParams.checksumFlag; 966 break; 967 case ZSTD_c_dictIDFlag : 968 *value = !CCtxParams->fParams.noDictIDFlag; 969 break; 970 case ZSTD_c_forceMaxWindow : 971 *value = CCtxParams->forceWindow; 972 break; 973 case ZSTD_c_forceAttachDict : 974 *value = CCtxParams->attachDictPref; 975 break; 976 case ZSTD_c_literalCompressionMode : 977 *value = CCtxParams->literalCompressionMode; 978 break; 979 case ZSTD_c_nbWorkers : 980 #ifndef ZSTD_MULTITHREAD 981 assert(CCtxParams->nbWorkers == 0); 982 #endif 983 *value = CCtxParams->nbWorkers; 984 break; 985 case ZSTD_c_jobSize : 986 #ifndef ZSTD_MULTITHREAD 987 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 988 #else 989 assert(CCtxParams->jobSize <= INT_MAX); 990 *value = (int)CCtxParams->jobSize; 991 break; 992 #endif 993 case ZSTD_c_overlapLog : 994 #ifndef ZSTD_MULTITHREAD 995 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 996 #else 997 *value = CCtxParams->overlapLog; 998 break; 999 #endif 1000 case ZSTD_c_rsyncable : 1001 #ifndef ZSTD_MULTITHREAD 1002 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 1003 #else 1004 *value = CCtxParams->rsyncable; 1005 break; 1006 #endif 1007 case ZSTD_c_enableDedicatedDictSearch : 1008 *value = CCtxParams->enableDedicatedDictSearch; 1009 break; 1010 case ZSTD_c_enableLongDistanceMatching : 1011 *value = CCtxParams->ldmParams.enableLdm; 1012 break; 1013 case ZSTD_c_ldmHashLog : 1014 *value = CCtxParams->ldmParams.hashLog; 1015 break; 1016 case ZSTD_c_ldmMinMatch : 1017 *value = CCtxParams->ldmParams.minMatchLength; 1018 break; 1019 case ZSTD_c_ldmBucketSizeLog : 1020 *value = CCtxParams->ldmParams.bucketSizeLog; 1021 break; 1022 case ZSTD_c_ldmHashRateLog : 1023 *value = CCtxParams->ldmParams.hashRateLog; 1024 break; 1025 case ZSTD_c_targetCBlockSize : 1026 *value = (int)CCtxParams->targetCBlockSize; 1027 break; 1028 case ZSTD_c_srcSizeHint : 1029 *value = (int)CCtxParams->srcSizeHint; 1030 break; 1031 case ZSTD_c_stableInBuffer : 1032 *value = (int)CCtxParams->inBufferMode; 1033 break; 1034 case ZSTD_c_stableOutBuffer : 1035 *value = (int)CCtxParams->outBufferMode; 1036 break; 1037 case ZSTD_c_blockDelimiters : 1038 *value = (int)CCtxParams->blockDelimiters; 1039 break; 1040 case ZSTD_c_validateSequences : 1041 *value = (int)CCtxParams->validateSequences; 1042 break; 1043 case ZSTD_c_useBlockSplitter : 1044 *value = (int)CCtxParams->useBlockSplitter; 1045 break; 1046 case ZSTD_c_useRowMatchFinder : 1047 *value = (int)CCtxParams->useRowMatchFinder; 1048 break; 1049 case ZSTD_c_deterministicRefPrefix: 1050 *value = (int)CCtxParams->deterministicRefPrefix; 1051 break; 1052 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 1053 } 1054 return 0; 1055 } 1056 1057 /** ZSTD_CCtx_setParametersUsingCCtxParams() : 1058 * just applies `params` into `cctx` 1059 * no action is performed, parameters are merely stored. 1060 * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. 1061 * This is possible even if a compression is ongoing. 1062 * In which case, new parameters will be applied on the fly, starting with next compression job. 1063 */ 1064 size_t ZSTD_CCtx_setParametersUsingCCtxParams( 1065 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) 1066 { 1067 DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); 1068 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1069 "The context is in the wrong stage!"); 1070 RETURN_ERROR_IF(cctx->cdict, stage_wrong, 1071 "Can't override parameters with cdict attached (some must " 1072 "be inherited from the cdict)."); 1073 1074 cctx->requestedParams = *params; 1075 return 0; 1076 } 1077 1078 size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) 1079 { 1080 DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); 1081 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1082 "Can't set pledgedSrcSize when not in init stage."); 1083 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; 1084 return 0; 1085 } 1086 1087 static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams( 1088 int const compressionLevel, 1089 size_t const dictSize); 1090 static int ZSTD_dedicatedDictSearch_isSupported( 1091 const ZSTD_compressionParameters* cParams); 1092 static void ZSTD_dedicatedDictSearch_revertCParams( 1093 ZSTD_compressionParameters* cParams); 1094 1095 /** 1096 * Initializes the local dict using the requested parameters. 1097 * NOTE: This does not use the pledged src size, because it may be used for more 1098 * than one compression. 1099 */ 1100 static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) 1101 { 1102 ZSTD_localDict* const dl = &cctx->localDict; 1103 if (dl->dict == NULL) { 1104 /* No local dictionary. */ 1105 assert(dl->dictBuffer == NULL); 1106 assert(dl->cdict == NULL); 1107 assert(dl->dictSize == 0); 1108 return 0; 1109 } 1110 if (dl->cdict != NULL) { 1111 assert(cctx->cdict == dl->cdict); 1112 /* Local dictionary already initialized. */ 1113 return 0; 1114 } 1115 assert(dl->dictSize > 0); 1116 assert(cctx->cdict == NULL); 1117 assert(cctx->prefixDict.dict == NULL); 1118 1119 dl->cdict = ZSTD_createCDict_advanced2( 1120 dl->dict, 1121 dl->dictSize, 1122 ZSTD_dlm_byRef, 1123 dl->dictContentType, 1124 &cctx->requestedParams, 1125 cctx->customMem); 1126 RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); 1127 cctx->cdict = dl->cdict; 1128 return 0; 1129 } 1130 1131 size_t ZSTD_CCtx_loadDictionary_advanced( 1132 ZSTD_CCtx* cctx, const void* dict, size_t dictSize, 1133 ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) 1134 { 1135 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1136 "Can't load a dictionary when ctx is not in init stage."); 1137 DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); 1138 ZSTD_clearAllDicts(cctx); /* in case one already exists */ 1139 if (dict == NULL || dictSize == 0) /* no dictionary mode */ 1140 return 0; 1141 if (dictLoadMethod == ZSTD_dlm_byRef) { 1142 cctx->localDict.dict = dict; 1143 } else { 1144 void* dictBuffer; 1145 RETURN_ERROR_IF(cctx->staticSize, memory_allocation, 1146 "no malloc for static CCtx"); 1147 dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem); 1148 RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); 1149 ZSTD_memcpy(dictBuffer, dict, dictSize); 1150 cctx->localDict.dictBuffer = dictBuffer; 1151 cctx->localDict.dict = dictBuffer; 1152 } 1153 cctx->localDict.dictSize = dictSize; 1154 cctx->localDict.dictContentType = dictContentType; 1155 return 0; 1156 } 1157 1158 size_t ZSTD_CCtx_loadDictionary_byReference( 1159 ZSTD_CCtx* cctx, const void* dict, size_t dictSize) 1160 { 1161 return ZSTD_CCtx_loadDictionary_advanced( 1162 cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); 1163 } 1164 1165 size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) 1166 { 1167 return ZSTD_CCtx_loadDictionary_advanced( 1168 cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); 1169 } 1170 1171 1172 size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 1173 { 1174 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1175 "Can't ref a dict when ctx not in init stage."); 1176 /* Free the existing local cdict (if any) to save memory. */ 1177 ZSTD_clearAllDicts(cctx); 1178 cctx->cdict = cdict; 1179 return 0; 1180 } 1181 1182 size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool) 1183 { 1184 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1185 "Can't ref a pool when ctx not in init stage."); 1186 cctx->pool = pool; 1187 return 0; 1188 } 1189 1190 size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) 1191 { 1192 return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); 1193 } 1194 1195 size_t ZSTD_CCtx_refPrefix_advanced( 1196 ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) 1197 { 1198 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1199 "Can't ref a prefix when ctx not in init stage."); 1200 ZSTD_clearAllDicts(cctx); 1201 if (prefix != NULL && prefixSize > 0) { 1202 cctx->prefixDict.dict = prefix; 1203 cctx->prefixDict.dictSize = prefixSize; 1204 cctx->prefixDict.dictContentType = dictContentType; 1205 } 1206 return 0; 1207 } 1208 1209 /*! ZSTD_CCtx_reset() : 1210 * Also dumps dictionary */ 1211 size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) 1212 { 1213 if ( (reset == ZSTD_reset_session_only) 1214 || (reset == ZSTD_reset_session_and_parameters) ) { 1215 cctx->streamStage = zcss_init; 1216 cctx->pledgedSrcSizePlusOne = 0; 1217 } 1218 if ( (reset == ZSTD_reset_parameters) 1219 || (reset == ZSTD_reset_session_and_parameters) ) { 1220 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1221 "Can't reset parameters only when not in init stage."); 1222 ZSTD_clearAllDicts(cctx); 1223 return ZSTD_CCtxParams_reset(&cctx->requestedParams); 1224 } 1225 return 0; 1226 } 1227 1228 1229 /** ZSTD_checkCParams() : 1230 control CParam values remain within authorized range. 1231 @return : 0, or an error code if one value is beyond authorized range */ 1232 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) 1233 { 1234 BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); 1235 BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); 1236 BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); 1237 BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); 1238 BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); 1239 BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); 1240 BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); 1241 return 0; 1242 } 1243 1244 /** ZSTD_clampCParams() : 1245 * make CParam values within valid range. 1246 * @return : valid CParams */ 1247 static ZSTD_compressionParameters 1248 ZSTD_clampCParams(ZSTD_compressionParameters cParams) 1249 { 1250 # define CLAMP_TYPE(cParam, val, type) { \ 1251 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ 1252 if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \ 1253 else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ 1254 } 1255 # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) 1256 CLAMP(ZSTD_c_windowLog, cParams.windowLog); 1257 CLAMP(ZSTD_c_chainLog, cParams.chainLog); 1258 CLAMP(ZSTD_c_hashLog, cParams.hashLog); 1259 CLAMP(ZSTD_c_searchLog, cParams.searchLog); 1260 CLAMP(ZSTD_c_minMatch, cParams.minMatch); 1261 CLAMP(ZSTD_c_targetLength,cParams.targetLength); 1262 CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); 1263 return cParams; 1264 } 1265 1266 /** ZSTD_cycleLog() : 1267 * condition for correct operation : hashLog > 1 */ 1268 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) 1269 { 1270 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); 1271 return hashLog - btScale; 1272 } 1273 1274 /** ZSTD_dictAndWindowLog() : 1275 * Returns an adjusted window log that is large enough to fit the source and the dictionary. 1276 * The zstd format says that the entire dictionary is valid if one byte of the dictionary 1277 * is within the window. So the hashLog and chainLog should be large enough to reference both 1278 * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing 1279 * the hashLog and windowLog. 1280 * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN. 1281 */ 1282 static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) 1283 { 1284 const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX; 1285 /* No dictionary ==> No change */ 1286 if (dictSize == 0) { 1287 return windowLog; 1288 } 1289 assert(windowLog <= ZSTD_WINDOWLOG_MAX); 1290 assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */ 1291 { 1292 U64 const windowSize = 1ULL << windowLog; 1293 U64 const dictAndWindowSize = dictSize + windowSize; 1294 /* If the window size is already large enough to fit both the source and the dictionary 1295 * then just use the window size. Otherwise adjust so that it fits the dictionary and 1296 * the window. 1297 */ 1298 if (windowSize >= dictSize + srcSize) { 1299 return windowLog; /* Window size large enough already */ 1300 } else if (dictAndWindowSize >= maxWindowSize) { 1301 return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */ 1302 } else { 1303 return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1; 1304 } 1305 } 1306 } 1307 1308 /** ZSTD_adjustCParams_internal() : 1309 * optimize `cPar` for a specified input (`srcSize` and `dictSize`). 1310 * mostly downsize to reduce memory consumption and initialization latency. 1311 * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. 1312 * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`. 1313 * note : `srcSize==0` means 0! 1314 * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ 1315 static ZSTD_compressionParameters 1316 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, 1317 unsigned long long srcSize, 1318 size_t dictSize, 1319 ZSTD_cParamMode_e mode) 1320 { 1321 const U64 minSrcSize = 513; /* (1<<9) + 1 */ 1322 const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); 1323 assert(ZSTD_checkCParams(cPar)==0); 1324 1325 switch (mode) { 1326 case ZSTD_cpm_unknown: 1327 case ZSTD_cpm_noAttachDict: 1328 /* If we don't know the source size, don't make any 1329 * assumptions about it. We will already have selected 1330 * smaller parameters if a dictionary is in use. 1331 */ 1332 break; 1333 case ZSTD_cpm_createCDict: 1334 /* Assume a small source size when creating a dictionary 1335 * with an unknown source size. 1336 */ 1337 if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) 1338 srcSize = minSrcSize; 1339 break; 1340 case ZSTD_cpm_attachDict: 1341 /* Dictionary has its own dedicated parameters which have 1342 * already been selected. We are selecting parameters 1343 * for only the source. 1344 */ 1345 dictSize = 0; 1346 break; 1347 default: 1348 assert(0); 1349 break; 1350 } 1351 1352 /* resize windowLog if input is small enough, to use less memory */ 1353 if ( (srcSize < maxWindowResize) 1354 && (dictSize < maxWindowResize) ) { 1355 U32 const tSize = (U32)(srcSize + dictSize); 1356 static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; 1357 U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : 1358 ZSTD_highbit32(tSize-1) + 1; 1359 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; 1360 } 1361 if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) { 1362 U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); 1363 U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); 1364 if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; 1365 if (cycleLog > dictAndWindowLog) 1366 cPar.chainLog -= (cycleLog - dictAndWindowLog); 1367 } 1368 1369 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) 1370 cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ 1371 1372 return cPar; 1373 } 1374 1375 ZSTD_compressionParameters 1376 ZSTD_adjustCParams(ZSTD_compressionParameters cPar, 1377 unsigned long long srcSize, 1378 size_t dictSize) 1379 { 1380 cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ 1381 if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; 1382 return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown); 1383 } 1384 1385 static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); 1386 static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); 1387 1388 static void ZSTD_overrideCParams( 1389 ZSTD_compressionParameters* cParams, 1390 const ZSTD_compressionParameters* overrides) 1391 { 1392 if (overrides->windowLog) cParams->windowLog = overrides->windowLog; 1393 if (overrides->hashLog) cParams->hashLog = overrides->hashLog; 1394 if (overrides->chainLog) cParams->chainLog = overrides->chainLog; 1395 if (overrides->searchLog) cParams->searchLog = overrides->searchLog; 1396 if (overrides->minMatch) cParams->minMatch = overrides->minMatch; 1397 if (overrides->targetLength) cParams->targetLength = overrides->targetLength; 1398 if (overrides->strategy) cParams->strategy = overrides->strategy; 1399 } 1400 1401 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( 1402 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) 1403 { 1404 ZSTD_compressionParameters cParams; 1405 if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { 1406 srcSizeHint = CCtxParams->srcSizeHint; 1407 } 1408 cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); 1409 if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; 1410 ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); 1411 assert(!ZSTD_checkCParams(cParams)); 1412 /* srcSizeHint == 0 means 0 */ 1413 return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode); 1414 } 1415 1416 static size_t 1417 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, 1418 const ZSTD_paramSwitch_e useRowMatchFinder, 1419 const U32 enableDedicatedDictSearch, 1420 const U32 forCCtx) 1421 { 1422 /* chain table size should be 0 for fast or row-hash strategies */ 1423 size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx) 1424 ? ((size_t)1 << cParams->chainLog) 1425 : 0; 1426 size_t const hSize = ((size_t)1) << cParams->hashLog; 1427 U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; 1428 size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; 1429 /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't 1430 * surrounded by redzones in ASAN. */ 1431 size_t const tableSpace = chainSize * sizeof(U32) 1432 + hSize * sizeof(U32) 1433 + h3Size * sizeof(U32); 1434 size_t const optPotentialSpace = 1435 ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32)) 1436 + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32)) 1437 + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32)) 1438 + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32)) 1439 + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)) 1440 + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); 1441 size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder) 1442 ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16)) 1443 : 0; 1444 size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) 1445 ? optPotentialSpace 1446 : 0; 1447 size_t const slackSpace = ZSTD_cwksp_slack_space_required(); 1448 1449 /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */ 1450 ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4); 1451 assert(useRowMatchFinder != ZSTD_ps_auto); 1452 1453 DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", 1454 (U32)chainSize, (U32)hSize, (U32)h3Size); 1455 return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; 1456 } 1457 1458 static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1459 const ZSTD_compressionParameters* cParams, 1460 const ldmParams_t* ldmParams, 1461 const int isStatic, 1462 const ZSTD_paramSwitch_e useRowMatchFinder, 1463 const size_t buffInSize, 1464 const size_t buffOutSize, 1465 const U64 pledgedSrcSize) 1466 { 1467 size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); 1468 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 1469 U32 const divider = (cParams->minMatch==3) ? 3 : 4; 1470 size_t const maxNbSeq = blockSize / divider; 1471 size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) 1472 + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) 1473 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); 1474 size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); 1475 size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); 1476 size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1); 1477 1478 size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); 1479 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); 1480 size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ? 1481 ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; 1482 1483 1484 size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) 1485 + ZSTD_cwksp_alloc_size(buffOutSize); 1486 1487 size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; 1488 1489 size_t const neededSpace = 1490 cctxSpace + 1491 entropySpace + 1492 blockStateSpace + 1493 ldmSpace + 1494 ldmSeqSpace + 1495 matchStateSize + 1496 tokenSpace + 1497 bufferSpace; 1498 1499 DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); 1500 return neededSpace; 1501 } 1502 1503 size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) 1504 { 1505 ZSTD_compressionParameters const cParams = 1506 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); 1507 ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, 1508 &cParams); 1509 1510 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); 1511 /* estimateCCtxSize is for one-shot compression. So no buffers should 1512 * be needed. However, we still allocate two 0-sized buffers, which can 1513 * take space under ASAN. */ 1514 return ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1515 &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); 1516 } 1517 1518 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) 1519 { 1520 ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); 1521 if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { 1522 /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ 1523 size_t noRowCCtxSize; 1524 size_t rowCCtxSize; 1525 initialParams.useRowMatchFinder = ZSTD_ps_disable; 1526 noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); 1527 initialParams.useRowMatchFinder = ZSTD_ps_enable; 1528 rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); 1529 return MAX(noRowCCtxSize, rowCCtxSize); 1530 } else { 1531 return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); 1532 } 1533 } 1534 1535 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) 1536 { 1537 int tier = 0; 1538 size_t largestSize = 0; 1539 static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN}; 1540 for (; tier < 4; ++tier) { 1541 /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */ 1542 ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict); 1543 largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize); 1544 } 1545 return largestSize; 1546 } 1547 1548 size_t ZSTD_estimateCCtxSize(int compressionLevel) 1549 { 1550 int level; 1551 size_t memBudget = 0; 1552 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { 1553 /* Ensure monotonically increasing memory usage as compression level increases */ 1554 size_t const newMB = ZSTD_estimateCCtxSize_internal(level); 1555 if (newMB > memBudget) memBudget = newMB; 1556 } 1557 return memBudget; 1558 } 1559 1560 size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) 1561 { 1562 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); 1563 { ZSTD_compressionParameters const cParams = 1564 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); 1565 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); 1566 size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) 1567 ? ((size_t)1 << cParams.windowLog) + blockSize 1568 : 0; 1569 size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) 1570 ? ZSTD_compressBound(blockSize) + 1 1571 : 0; 1572 ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams); 1573 1574 return ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1575 &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, 1576 ZSTD_CONTENTSIZE_UNKNOWN); 1577 } 1578 } 1579 1580 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) 1581 { 1582 ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); 1583 if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { 1584 /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ 1585 size_t noRowCCtxSize; 1586 size_t rowCCtxSize; 1587 initialParams.useRowMatchFinder = ZSTD_ps_disable; 1588 noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); 1589 initialParams.useRowMatchFinder = ZSTD_ps_enable; 1590 rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); 1591 return MAX(noRowCCtxSize, rowCCtxSize); 1592 } else { 1593 return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); 1594 } 1595 } 1596 1597 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) 1598 { 1599 ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); 1600 return ZSTD_estimateCStreamSize_usingCParams(cParams); 1601 } 1602 1603 size_t ZSTD_estimateCStreamSize(int compressionLevel) 1604 { 1605 int level; 1606 size_t memBudget = 0; 1607 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { 1608 size_t const newMB = ZSTD_estimateCStreamSize_internal(level); 1609 if (newMB > memBudget) memBudget = newMB; 1610 } 1611 return memBudget; 1612 } 1613 1614 /* ZSTD_getFrameProgression(): 1615 * tells how much data has been consumed (input) and produced (output) for current frame. 1616 * able to count progression inside worker threads (non-blocking mode). 1617 */ 1618 ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) 1619 { 1620 #ifdef ZSTD_MULTITHREAD 1621 if (cctx->appliedParams.nbWorkers > 0) { 1622 return ZSTDMT_getFrameProgression(cctx->mtctx); 1623 } 1624 #endif 1625 { ZSTD_frameProgression fp; 1626 size_t const buffered = (cctx->inBuff == NULL) ? 0 : 1627 cctx->inBuffPos - cctx->inToCompress; 1628 if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); 1629 assert(buffered <= ZSTD_BLOCKSIZE_MAX); 1630 fp.ingested = cctx->consumedSrcSize + buffered; 1631 fp.consumed = cctx->consumedSrcSize; 1632 fp.produced = cctx->producedCSize; 1633 fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ 1634 fp.currentJobID = 0; 1635 fp.nbActiveWorkers = 0; 1636 return fp; 1637 } } 1638 1639 /*! ZSTD_toFlushNow() 1640 * Only useful for multithreading scenarios currently (nbWorkers >= 1). 1641 */ 1642 size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) 1643 { 1644 #ifdef ZSTD_MULTITHREAD 1645 if (cctx->appliedParams.nbWorkers > 0) { 1646 return ZSTDMT_toFlushNow(cctx->mtctx); 1647 } 1648 #endif 1649 (void)cctx; 1650 return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ 1651 } 1652 1653 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, 1654 ZSTD_compressionParameters cParams2) 1655 { 1656 (void)cParams1; 1657 (void)cParams2; 1658 assert(cParams1.windowLog == cParams2.windowLog); 1659 assert(cParams1.chainLog == cParams2.chainLog); 1660 assert(cParams1.hashLog == cParams2.hashLog); 1661 assert(cParams1.searchLog == cParams2.searchLog); 1662 assert(cParams1.minMatch == cParams2.minMatch); 1663 assert(cParams1.targetLength == cParams2.targetLength); 1664 assert(cParams1.strategy == cParams2.strategy); 1665 } 1666 1667 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) 1668 { 1669 int i; 1670 for (i = 0; i < ZSTD_REP_NUM; ++i) 1671 bs->rep[i] = repStartValue[i]; 1672 bs->entropy.huf.repeatMode = HUF_repeat_none; 1673 bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; 1674 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; 1675 bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; 1676 } 1677 1678 /*! ZSTD_invalidateMatchState() 1679 * Invalidate all the matches in the match finder tables. 1680 * Requires nextSrc and base to be set (can be NULL). 1681 */ 1682 static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) 1683 { 1684 ZSTD_window_clear(&ms->window); 1685 1686 ms->nextToUpdate = ms->window.dictLimit; 1687 ms->loadedDictEnd = 0; 1688 ms->opt.litLengthSum = 0; /* force reset of btopt stats */ 1689 ms->dictMatchState = NULL; 1690 } 1691 1692 /** 1693 * Controls, for this matchState reset, whether the tables need to be cleared / 1694 * prepared for the coming compression (ZSTDcrp_makeClean), or whether the 1695 * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a 1696 * subsequent operation will overwrite the table space anyways (e.g., copying 1697 * the matchState contents in from a CDict). 1698 */ 1699 typedef enum { 1700 ZSTDcrp_makeClean, 1701 ZSTDcrp_leaveDirty 1702 } ZSTD_compResetPolicy_e; 1703 1704 /** 1705 * Controls, for this matchState reset, whether indexing can continue where it 1706 * left off (ZSTDirp_continue), or whether it needs to be restarted from zero 1707 * (ZSTDirp_reset). 1708 */ 1709 typedef enum { 1710 ZSTDirp_continue, 1711 ZSTDirp_reset 1712 } ZSTD_indexResetPolicy_e; 1713 1714 typedef enum { 1715 ZSTD_resetTarget_CDict, 1716 ZSTD_resetTarget_CCtx 1717 } ZSTD_resetTarget_e; 1718 1719 1720 static size_t 1721 ZSTD_reset_matchState(ZSTD_matchState_t* ms, 1722 ZSTD_cwksp* ws, 1723 const ZSTD_compressionParameters* cParams, 1724 const ZSTD_paramSwitch_e useRowMatchFinder, 1725 const ZSTD_compResetPolicy_e crp, 1726 const ZSTD_indexResetPolicy_e forceResetIndex, 1727 const ZSTD_resetTarget_e forWho) 1728 { 1729 /* disable chain table allocation for fast or row-based strategies */ 1730 size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, 1731 ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict)) 1732 ? ((size_t)1 << cParams->chainLog) 1733 : 0; 1734 size_t const hSize = ((size_t)1) << cParams->hashLog; 1735 U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; 1736 size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; 1737 1738 DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); 1739 assert(useRowMatchFinder != ZSTD_ps_auto); 1740 if (forceResetIndex == ZSTDirp_reset) { 1741 ZSTD_window_init(&ms->window); 1742 ZSTD_cwksp_mark_tables_dirty(ws); 1743 } 1744 1745 ms->hashLog3 = hashLog3; 1746 1747 ZSTD_invalidateMatchState(ms); 1748 1749 assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ 1750 1751 ZSTD_cwksp_clear_tables(ws); 1752 1753 DEBUGLOG(5, "reserving table space"); 1754 /* table Space */ 1755 ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); 1756 ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); 1757 ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); 1758 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, 1759 "failed a workspace allocation in ZSTD_reset_matchState"); 1760 1761 DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); 1762 if (crp!=ZSTDcrp_leaveDirty) { 1763 /* reset tables only */ 1764 ZSTD_cwksp_clean_tables(ws); 1765 } 1766 1767 /* opt parser space */ 1768 if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { 1769 DEBUGLOG(4, "reserving optimal parser space"); 1770 ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned)); 1771 ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); 1772 ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); 1773 ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); 1774 ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); 1775 ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); 1776 } 1777 1778 if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) { 1779 { /* Row match finder needs an additional table of hashes ("tags") */ 1780 size_t const tagTableSize = hSize*sizeof(U16); 1781 ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize); 1782 if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize); 1783 } 1784 { /* Switch to 32-entry rows if searchLog is 5 (or more) */ 1785 U32 const rowLog = BOUNDED(4, cParams->searchLog, 6); 1786 assert(cParams->hashLog >= rowLog); 1787 ms->rowHashLog = cParams->hashLog - rowLog; 1788 } 1789 } 1790 1791 ms->cParams = *cParams; 1792 1793 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, 1794 "failed a workspace allocation in ZSTD_reset_matchState"); 1795 return 0; 1796 } 1797 1798 /* ZSTD_indexTooCloseToMax() : 1799 * minor optimization : prefer memset() rather than reduceIndex() 1800 * which is measurably slow in some circumstances (reported for Visual Studio). 1801 * Works when re-using a context for a lot of smallish inputs : 1802 * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, 1803 * memset() will be triggered before reduceIndex(). 1804 */ 1805 #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) 1806 static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) 1807 { 1808 return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); 1809 } 1810 1811 /** ZSTD_dictTooBig(): 1812 * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in 1813 * one go generically. So we ensure that in that case we reset the tables to zero, 1814 * so that we can load as much of the dictionary as possible. 1815 */ 1816 static int ZSTD_dictTooBig(size_t const loadedDictSize) 1817 { 1818 return loadedDictSize > ZSTD_CHUNKSIZE_MAX; 1819 } 1820 1821 /*! ZSTD_resetCCtx_internal() : 1822 * @param loadedDictSize The size of the dictionary to be loaded 1823 * into the context, if any. If no dictionary is used, or the 1824 * dictionary is being attached / copied, then pass 0. 1825 * note : `params` are assumed fully validated at this stage. 1826 */ 1827 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, 1828 ZSTD_CCtx_params const* params, 1829 U64 const pledgedSrcSize, 1830 size_t const loadedDictSize, 1831 ZSTD_compResetPolicy_e const crp, 1832 ZSTD_buffered_policy_e const zbuff) 1833 { 1834 ZSTD_cwksp* const ws = &zc->workspace; 1835 DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d", 1836 (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter); 1837 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); 1838 1839 zc->isFirstBlock = 1; 1840 1841 /* Set applied params early so we can modify them for LDM, 1842 * and point params at the applied params. 1843 */ 1844 zc->appliedParams = *params; 1845 params = &zc->appliedParams; 1846 1847 assert(params->useRowMatchFinder != ZSTD_ps_auto); 1848 assert(params->useBlockSplitter != ZSTD_ps_auto); 1849 assert(params->ldmParams.enableLdm != ZSTD_ps_auto); 1850 if (params->ldmParams.enableLdm == ZSTD_ps_enable) { 1851 /* Adjust long distance matching parameters */ 1852 ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams); 1853 assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog); 1854 assert(params->ldmParams.hashRateLog < 32); 1855 } 1856 1857 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); 1858 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 1859 U32 const divider = (params->cParams.minMatch==3) ? 3 : 4; 1860 size_t const maxNbSeq = blockSize / divider; 1861 size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) 1862 ? ZSTD_compressBound(blockSize) + 1 1863 : 0; 1864 size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered) 1865 ? windowSize + blockSize 1866 : 0; 1867 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize); 1868 1869 int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window); 1870 int const dictTooBig = ZSTD_dictTooBig(loadedDictSize); 1871 ZSTD_indexResetPolicy_e needsIndexReset = 1872 (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue; 1873 1874 size_t const neededSpace = 1875 ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1876 ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, 1877 buffInSize, buffOutSize, pledgedSrcSize); 1878 int resizeWorkspace; 1879 1880 FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); 1881 1882 if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); 1883 1884 { /* Check if workspace is large enough, alloc a new one if needed */ 1885 int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; 1886 int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); 1887 resizeWorkspace = workspaceTooSmall || workspaceWasteful; 1888 DEBUGLOG(4, "Need %zu B workspace", neededSpace); 1889 DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); 1890 1891 if (resizeWorkspace) { 1892 DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", 1893 ZSTD_cwksp_sizeof(ws) >> 10, 1894 neededSpace >> 10); 1895 1896 RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); 1897 1898 needsIndexReset = ZSTDirp_reset; 1899 1900 ZSTD_cwksp_free(ws, zc->customMem); 1901 FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); 1902 1903 DEBUGLOG(5, "reserving object space"); 1904 /* Statically sized space. 1905 * entropyWorkspace never moves, 1906 * though prev/next block swap places */ 1907 assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); 1908 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); 1909 RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); 1910 zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); 1911 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); 1912 zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); 1913 RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); 1914 } } 1915 1916 ZSTD_cwksp_clear(ws); 1917 1918 /* init params */ 1919 zc->blockState.matchState.cParams = params->cParams; 1920 zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; 1921 zc->consumedSrcSize = 0; 1922 zc->producedCSize = 0; 1923 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) 1924 zc->appliedParams.fParams.contentSizeFlag = 0; 1925 DEBUGLOG(4, "pledged content size : %u ; flag : %u", 1926 (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); 1927 zc->blockSize = blockSize; 1928 1929 XXH64_reset(&zc->xxhState, 0); 1930 zc->stage = ZSTDcs_init; 1931 zc->dictID = 0; 1932 zc->dictContentSize = 0; 1933 1934 ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); 1935 1936 /* ZSTD_wildcopy() is used to copy into the literals buffer, 1937 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. 1938 */ 1939 zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); 1940 zc->seqStore.maxNbLit = blockSize; 1941 1942 /* buffers */ 1943 zc->bufferedPolicy = zbuff; 1944 zc->inBuffSize = buffInSize; 1945 zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); 1946 zc->outBuffSize = buffOutSize; 1947 zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); 1948 1949 /* ldm bucketOffsets table */ 1950 if (params->ldmParams.enableLdm == ZSTD_ps_enable) { 1951 /* TODO: avoid memset? */ 1952 size_t const numBuckets = 1953 ((size_t)1) << (params->ldmParams.hashLog - 1954 params->ldmParams.bucketSizeLog); 1955 zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets); 1956 ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets); 1957 } 1958 1959 /* sequences storage */ 1960 ZSTD_referenceExternalSequences(zc, NULL, 0); 1961 zc->seqStore.maxNbSeq = maxNbSeq; 1962 zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 1963 zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 1964 zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 1965 zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); 1966 1967 FORWARD_IF_ERROR(ZSTD_reset_matchState( 1968 &zc->blockState.matchState, 1969 ws, 1970 ¶ms->cParams, 1971 params->useRowMatchFinder, 1972 crp, 1973 needsIndexReset, 1974 ZSTD_resetTarget_CCtx), ""); 1975 1976 /* ldm hash table */ 1977 if (params->ldmParams.enableLdm == ZSTD_ps_enable) { 1978 /* TODO: avoid memset? */ 1979 size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; 1980 zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); 1981 ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); 1982 zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); 1983 zc->maxNbLdmSequences = maxNbLdmSeq; 1984 1985 ZSTD_window_init(&zc->ldmState.window); 1986 zc->ldmState.loadedDictEnd = 0; 1987 } 1988 1989 DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); 1990 assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace)); 1991 1992 zc->initialized = 1; 1993 1994 return 0; 1995 } 1996 } 1997 1998 /* ZSTD_invalidateRepCodes() : 1999 * ensures next compression will not use repcodes from previous block. 2000 * Note : only works with regular variant; 2001 * do not use with extDict variant ! */ 2002 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { 2003 int i; 2004 for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0; 2005 assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); 2006 } 2007 2008 /* These are the approximate sizes for each strategy past which copying the 2009 * dictionary tables into the working context is faster than using them 2010 * in-place. 2011 */ 2012 static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { 2013 8 KB, /* unused */ 2014 8 KB, /* ZSTD_fast */ 2015 16 KB, /* ZSTD_dfast */ 2016 32 KB, /* ZSTD_greedy */ 2017 32 KB, /* ZSTD_lazy */ 2018 32 KB, /* ZSTD_lazy2 */ 2019 32 KB, /* ZSTD_btlazy2 */ 2020 32 KB, /* ZSTD_btopt */ 2021 8 KB, /* ZSTD_btultra */ 2022 8 KB /* ZSTD_btultra2 */ 2023 }; 2024 2025 static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, 2026 const ZSTD_CCtx_params* params, 2027 U64 pledgedSrcSize) 2028 { 2029 size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; 2030 int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch; 2031 return dedicatedDictSearch 2032 || ( ( pledgedSrcSize <= cutoff 2033 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN 2034 || params->attachDictPref == ZSTD_dictForceAttach ) 2035 && params->attachDictPref != ZSTD_dictForceCopy 2036 && !params->forceWindow ); /* dictMatchState isn't correctly 2037 * handled in _enforceMaxDist */ 2038 } 2039 2040 static size_t 2041 ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, 2042 const ZSTD_CDict* cdict, 2043 ZSTD_CCtx_params params, 2044 U64 pledgedSrcSize, 2045 ZSTD_buffered_policy_e zbuff) 2046 { 2047 DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu", 2048 (unsigned long long)pledgedSrcSize); 2049 { 2050 ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; 2051 unsigned const windowLog = params.cParams.windowLog; 2052 assert(windowLog != 0); 2053 /* Resize working context table params for input only, since the dict 2054 * has its own tables. */ 2055 /* pledgedSrcSize == 0 means 0! */ 2056 2057 if (cdict->matchState.dedicatedDictSearch) { 2058 ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams); 2059 } 2060 2061 params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, 2062 cdict->dictContentSize, ZSTD_cpm_attachDict); 2063 params.cParams.windowLog = windowLog; 2064 params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ 2065 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, 2066 /* loadedDictSize */ 0, 2067 ZSTDcrp_makeClean, zbuff), ""); 2068 assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); 2069 } 2070 2071 { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc 2072 - cdict->matchState.window.base); 2073 const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; 2074 if (cdictLen == 0) { 2075 /* don't even attach dictionaries with no contents */ 2076 DEBUGLOG(4, "skipping attaching empty dictionary"); 2077 } else { 2078 DEBUGLOG(4, "attaching dictionary into context"); 2079 cctx->blockState.matchState.dictMatchState = &cdict->matchState; 2080 2081 /* prep working match state so dict matches never have negative indices 2082 * when they are translated to the working context's index space. */ 2083 if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { 2084 cctx->blockState.matchState.window.nextSrc = 2085 cctx->blockState.matchState.window.base + cdictEnd; 2086 ZSTD_window_clear(&cctx->blockState.matchState.window); 2087 } 2088 /* loadedDictEnd is expressed within the referential of the active context */ 2089 cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; 2090 } } 2091 2092 cctx->dictID = cdict->dictID; 2093 cctx->dictContentSize = cdict->dictContentSize; 2094 2095 /* copy block state */ 2096 ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); 2097 2098 return 0; 2099 } 2100 2101 static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, 2102 const ZSTD_CDict* cdict, 2103 ZSTD_CCtx_params params, 2104 U64 pledgedSrcSize, 2105 ZSTD_buffered_policy_e zbuff) 2106 { 2107 const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; 2108 2109 assert(!cdict->matchState.dedicatedDictSearch); 2110 DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu", 2111 (unsigned long long)pledgedSrcSize); 2112 2113 { unsigned const windowLog = params.cParams.windowLog; 2114 assert(windowLog != 0); 2115 /* Copy only compression parameters related to tables. */ 2116 params.cParams = *cdict_cParams; 2117 params.cParams.windowLog = windowLog; 2118 params.useRowMatchFinder = cdict->useRowMatchFinder; 2119 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, 2120 /* loadedDictSize */ 0, 2121 ZSTDcrp_leaveDirty, zbuff), ""); 2122 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); 2123 assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); 2124 assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); 2125 } 2126 2127 ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); 2128 assert(params.useRowMatchFinder != ZSTD_ps_auto); 2129 2130 /* copy tables */ 2131 { size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */) 2132 ? ((size_t)1 << cdict_cParams->chainLog) 2133 : 0; 2134 size_t const hSize = (size_t)1 << cdict_cParams->hashLog; 2135 2136 ZSTD_memcpy(cctx->blockState.matchState.hashTable, 2137 cdict->matchState.hashTable, 2138 hSize * sizeof(U32)); 2139 /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */ 2140 if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) { 2141 ZSTD_memcpy(cctx->blockState.matchState.chainTable, 2142 cdict->matchState.chainTable, 2143 chainSize * sizeof(U32)); 2144 } 2145 /* copy tag table */ 2146 if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) { 2147 size_t const tagTableSize = hSize*sizeof(U16); 2148 ZSTD_memcpy(cctx->blockState.matchState.tagTable, 2149 cdict->matchState.tagTable, 2150 tagTableSize); 2151 } 2152 } 2153 2154 /* Zero the hashTable3, since the cdict never fills it */ 2155 { int const h3log = cctx->blockState.matchState.hashLog3; 2156 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; 2157 assert(cdict->matchState.hashLog3 == 0); 2158 ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); 2159 } 2160 2161 ZSTD_cwksp_mark_tables_clean(&cctx->workspace); 2162 2163 /* copy dictionary offsets */ 2164 { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; 2165 ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; 2166 dstMatchState->window = srcMatchState->window; 2167 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 2168 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; 2169 } 2170 2171 cctx->dictID = cdict->dictID; 2172 cctx->dictContentSize = cdict->dictContentSize; 2173 2174 /* copy block state */ 2175 ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); 2176 2177 return 0; 2178 } 2179 2180 /* We have a choice between copying the dictionary context into the working 2181 * context, or referencing the dictionary context from the working context 2182 * in-place. We decide here which strategy to use. */ 2183 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, 2184 const ZSTD_CDict* cdict, 2185 const ZSTD_CCtx_params* params, 2186 U64 pledgedSrcSize, 2187 ZSTD_buffered_policy_e zbuff) 2188 { 2189 2190 DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", 2191 (unsigned)pledgedSrcSize); 2192 2193 if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { 2194 return ZSTD_resetCCtx_byAttachingCDict( 2195 cctx, cdict, *params, pledgedSrcSize, zbuff); 2196 } else { 2197 return ZSTD_resetCCtx_byCopyingCDict( 2198 cctx, cdict, *params, pledgedSrcSize, zbuff); 2199 } 2200 } 2201 2202 /*! ZSTD_copyCCtx_internal() : 2203 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. 2204 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). 2205 * The "context", in this case, refers to the hash and chain tables, 2206 * entropy tables, and dictionary references. 2207 * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. 2208 * @return : 0, or an error code */ 2209 static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, 2210 const ZSTD_CCtx* srcCCtx, 2211 ZSTD_frameParameters fParams, 2212 U64 pledgedSrcSize, 2213 ZSTD_buffered_policy_e zbuff) 2214 { 2215 RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, 2216 "Can't copy a ctx that's not in init stage."); 2217 DEBUGLOG(5, "ZSTD_copyCCtx_internal"); 2218 ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); 2219 { ZSTD_CCtx_params params = dstCCtx->requestedParams; 2220 /* Copy only compression parameters related to tables. */ 2221 params.cParams = srcCCtx->appliedParams.cParams; 2222 assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto); 2223 assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto); 2224 assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto); 2225 params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder; 2226 params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter; 2227 params.ldmParams = srcCCtx->appliedParams.ldmParams; 2228 params.fParams = fParams; 2229 ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize, 2230 /* loadedDictSize */ 0, 2231 ZSTDcrp_leaveDirty, zbuff); 2232 assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); 2233 assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); 2234 assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); 2235 assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); 2236 assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); 2237 } 2238 2239 ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); 2240 2241 /* copy tables */ 2242 { size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy, 2243 srcCCtx->appliedParams.useRowMatchFinder, 2244 0 /* forDDSDict */) 2245 ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog) 2246 : 0; 2247 size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; 2248 int const h3log = srcCCtx->blockState.matchState.hashLog3; 2249 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; 2250 2251 ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable, 2252 srcCCtx->blockState.matchState.hashTable, 2253 hSize * sizeof(U32)); 2254 ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable, 2255 srcCCtx->blockState.matchState.chainTable, 2256 chainSize * sizeof(U32)); 2257 ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3, 2258 srcCCtx->blockState.matchState.hashTable3, 2259 h3Size * sizeof(U32)); 2260 } 2261 2262 ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); 2263 2264 /* copy dictionary offsets */ 2265 { 2266 const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; 2267 ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; 2268 dstMatchState->window = srcMatchState->window; 2269 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 2270 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; 2271 } 2272 dstCCtx->dictID = srcCCtx->dictID; 2273 dstCCtx->dictContentSize = srcCCtx->dictContentSize; 2274 2275 /* copy block state */ 2276 ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); 2277 2278 return 0; 2279 } 2280 2281 /*! ZSTD_copyCCtx() : 2282 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. 2283 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). 2284 * pledgedSrcSize==0 means "unknown". 2285 * @return : 0, or an error code */ 2286 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) 2287 { 2288 ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 2289 ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy; 2290 ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); 2291 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; 2292 fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); 2293 2294 return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, 2295 fParams, pledgedSrcSize, 2296 zbuff); 2297 } 2298 2299 2300 #define ZSTD_ROWSIZE 16 2301 /*! ZSTD_reduceTable() : 2302 * reduce table indexes by `reducerValue`, or squash to zero. 2303 * PreserveMark preserves "unsorted mark" for btlazy2 strategy. 2304 * It must be set to a clear 0/1 value, to remove branch during inlining. 2305 * Presume table size is a multiple of ZSTD_ROWSIZE 2306 * to help auto-vectorization */ 2307 FORCE_INLINE_TEMPLATE void 2308 ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) 2309 { 2310 int const nbRows = (int)size / ZSTD_ROWSIZE; 2311 int cellNb = 0; 2312 int rowNb; 2313 /* Protect special index values < ZSTD_WINDOW_START_INDEX. */ 2314 U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX; 2315 assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ 2316 assert(size < (1U<<31)); /* can be casted to int */ 2317 2318 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) 2319 /* To validate that the table re-use logic is sound, and that we don't 2320 * access table space that we haven't cleaned, we re-"poison" the table 2321 * space every time we mark it dirty. 2322 * 2323 * This function however is intended to operate on those dirty tables and 2324 * re-clean them. So when this function is used correctly, we can unpoison 2325 * the memory it operated on. This introduces a blind spot though, since 2326 * if we now try to operate on __actually__ poisoned memory, we will not 2327 * detect that. */ 2328 __msan_unpoison(table, size * sizeof(U32)); 2329 #endif 2330 2331 for (rowNb=0 ; rowNb < nbRows ; rowNb++) { 2332 int column; 2333 for (column=0; column<ZSTD_ROWSIZE; column++) { 2334 U32 newVal; 2335 if (preserveMark && table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) { 2336 /* This write is pointless, but is required(?) for the compiler 2337 * to auto-vectorize the loop. */ 2338 newVal = ZSTD_DUBT_UNSORTED_MARK; 2339 } else if (table[cellNb] < reducerThreshold) { 2340 newVal = 0; 2341 } else { 2342 newVal = table[cellNb] - reducerValue; 2343 } 2344 table[cellNb] = newVal; 2345 cellNb++; 2346 } } 2347 } 2348 2349 static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue) 2350 { 2351 ZSTD_reduceTable_internal(table, size, reducerValue, 0); 2352 } 2353 2354 static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue) 2355 { 2356 ZSTD_reduceTable_internal(table, size, reducerValue, 1); 2357 } 2358 2359 /*! ZSTD_reduceIndex() : 2360 * rescale all indexes to avoid future overflow (indexes are U32) */ 2361 static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) 2362 { 2363 { U32 const hSize = (U32)1 << params->cParams.hashLog; 2364 ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); 2365 } 2366 2367 if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) { 2368 U32 const chainSize = (U32)1 << params->cParams.chainLog; 2369 if (params->cParams.strategy == ZSTD_btlazy2) 2370 ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); 2371 else 2372 ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); 2373 } 2374 2375 if (ms->hashLog3) { 2376 U32 const h3Size = (U32)1 << ms->hashLog3; 2377 ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); 2378 } 2379 } 2380 2381 2382 /*-******************************************************* 2383 * Block entropic compression 2384 *********************************************************/ 2385 2386 /* See doc/zstd_compression_format.md for detailed format description */ 2387 2388 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) 2389 { 2390 const seqDef* const sequences = seqStorePtr->sequencesStart; 2391 BYTE* const llCodeTable = seqStorePtr->llCode; 2392 BYTE* const ofCodeTable = seqStorePtr->ofCode; 2393 BYTE* const mlCodeTable = seqStorePtr->mlCode; 2394 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 2395 U32 u; 2396 assert(nbSeq <= seqStorePtr->maxNbSeq); 2397 for (u=0; u<nbSeq; u++) { 2398 U32 const llv = sequences[u].litLength; 2399 U32 const mlv = sequences[u].mlBase; 2400 llCodeTable[u] = (BYTE)ZSTD_LLcode(llv); 2401 ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offBase); 2402 mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv); 2403 } 2404 if (seqStorePtr->longLengthType==ZSTD_llt_literalLength) 2405 llCodeTable[seqStorePtr->longLengthPos] = MaxLL; 2406 if (seqStorePtr->longLengthType==ZSTD_llt_matchLength) 2407 mlCodeTable[seqStorePtr->longLengthPos] = MaxML; 2408 } 2409 2410 /* ZSTD_useTargetCBlockSize(): 2411 * Returns if target compressed block size param is being used. 2412 * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. 2413 * Returns 1 if true, 0 otherwise. */ 2414 static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) 2415 { 2416 DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); 2417 return (cctxParams->targetCBlockSize != 0); 2418 } 2419 2420 /* ZSTD_blockSplitterEnabled(): 2421 * Returns if block splitting param is being used 2422 * If used, compression will do best effort to split a block in order to improve compression ratio. 2423 * At the time this function is called, the parameter must be finalized. 2424 * Returns 1 if true, 0 otherwise. */ 2425 static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams) 2426 { 2427 DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter); 2428 assert(cctxParams->useBlockSplitter != ZSTD_ps_auto); 2429 return (cctxParams->useBlockSplitter == ZSTD_ps_enable); 2430 } 2431 2432 /* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types 2433 * and size of the sequences statistics 2434 */ 2435 typedef struct { 2436 U32 LLtype; 2437 U32 Offtype; 2438 U32 MLtype; 2439 size_t size; 2440 size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ 2441 } ZSTD_symbolEncodingTypeStats_t; 2442 2443 /* ZSTD_buildSequencesStatistics(): 2444 * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field. 2445 * Modifies `nextEntropy` to have the appropriate values as a side effect. 2446 * nbSeq must be greater than 0. 2447 * 2448 * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) 2449 */ 2450 static ZSTD_symbolEncodingTypeStats_t 2451 ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, 2452 const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, 2453 BYTE* dst, const BYTE* const dstEnd, 2454 ZSTD_strategy strategy, unsigned* countWorkspace, 2455 void* entropyWorkspace, size_t entropyWkspSize) { 2456 BYTE* const ostart = dst; 2457 const BYTE* const oend = dstEnd; 2458 BYTE* op = ostart; 2459 FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; 2460 FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; 2461 FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; 2462 const BYTE* const ofCodeTable = seqStorePtr->ofCode; 2463 const BYTE* const llCodeTable = seqStorePtr->llCode; 2464 const BYTE* const mlCodeTable = seqStorePtr->mlCode; 2465 ZSTD_symbolEncodingTypeStats_t stats; 2466 2467 stats.lastCountSize = 0; 2468 /* convert length/distances into codes */ 2469 ZSTD_seqToCodes(seqStorePtr); 2470 assert(op <= oend); 2471 assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */ 2472 /* build CTable for Literal Lengths */ 2473 { unsigned max = MaxLL; 2474 size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ 2475 DEBUGLOG(5, "Building LL table"); 2476 nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; 2477 stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, 2478 countWorkspace, max, mostFrequent, nbSeq, 2479 LLFSELog, prevEntropy->litlengthCTable, 2480 LL_defaultNorm, LL_defaultNormLog, 2481 ZSTD_defaultAllowed, strategy); 2482 assert(set_basic < set_compressed && set_rle < set_compressed); 2483 assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2484 { size_t const countSize = ZSTD_buildCTable( 2485 op, (size_t)(oend - op), 2486 CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype, 2487 countWorkspace, max, llCodeTable, nbSeq, 2488 LL_defaultNorm, LL_defaultNormLog, MaxLL, 2489 prevEntropy->litlengthCTable, 2490 sizeof(prevEntropy->litlengthCTable), 2491 entropyWorkspace, entropyWkspSize); 2492 if (ZSTD_isError(countSize)) { 2493 DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed"); 2494 stats.size = countSize; 2495 return stats; 2496 } 2497 if (stats.LLtype == set_compressed) 2498 stats.lastCountSize = countSize; 2499 op += countSize; 2500 assert(op <= oend); 2501 } } 2502 /* build CTable for Offsets */ 2503 { unsigned max = MaxOff; 2504 size_t const mostFrequent = HIST_countFast_wksp( 2505 countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ 2506 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ 2507 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; 2508 DEBUGLOG(5, "Building OF table"); 2509 nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; 2510 stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, 2511 countWorkspace, max, mostFrequent, nbSeq, 2512 OffFSELog, prevEntropy->offcodeCTable, 2513 OF_defaultNorm, OF_defaultNormLog, 2514 defaultPolicy, strategy); 2515 assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2516 { size_t const countSize = ZSTD_buildCTable( 2517 op, (size_t)(oend - op), 2518 CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype, 2519 countWorkspace, max, ofCodeTable, nbSeq, 2520 OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, 2521 prevEntropy->offcodeCTable, 2522 sizeof(prevEntropy->offcodeCTable), 2523 entropyWorkspace, entropyWkspSize); 2524 if (ZSTD_isError(countSize)) { 2525 DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed"); 2526 stats.size = countSize; 2527 return stats; 2528 } 2529 if (stats.Offtype == set_compressed) 2530 stats.lastCountSize = countSize; 2531 op += countSize; 2532 assert(op <= oend); 2533 } } 2534 /* build CTable for MatchLengths */ 2535 { unsigned max = MaxML; 2536 size_t const mostFrequent = HIST_countFast_wksp( 2537 countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ 2538 DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); 2539 nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; 2540 stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, 2541 countWorkspace, max, mostFrequent, nbSeq, 2542 MLFSELog, prevEntropy->matchlengthCTable, 2543 ML_defaultNorm, ML_defaultNormLog, 2544 ZSTD_defaultAllowed, strategy); 2545 assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2546 { size_t const countSize = ZSTD_buildCTable( 2547 op, (size_t)(oend - op), 2548 CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype, 2549 countWorkspace, max, mlCodeTable, nbSeq, 2550 ML_defaultNorm, ML_defaultNormLog, MaxML, 2551 prevEntropy->matchlengthCTable, 2552 sizeof(prevEntropy->matchlengthCTable), 2553 entropyWorkspace, entropyWkspSize); 2554 if (ZSTD_isError(countSize)) { 2555 DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed"); 2556 stats.size = countSize; 2557 return stats; 2558 } 2559 if (stats.MLtype == set_compressed) 2560 stats.lastCountSize = countSize; 2561 op += countSize; 2562 assert(op <= oend); 2563 } } 2564 stats.size = (size_t)(op-ostart); 2565 return stats; 2566 } 2567 2568 /* ZSTD_entropyCompressSeqStore_internal(): 2569 * compresses both literals and sequences 2570 * Returns compressed size of block, or a zstd error. 2571 */ 2572 #define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20 2573 MEM_STATIC size_t 2574 ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, 2575 const ZSTD_entropyCTables_t* prevEntropy, 2576 ZSTD_entropyCTables_t* nextEntropy, 2577 const ZSTD_CCtx_params* cctxParams, 2578 void* dst, size_t dstCapacity, 2579 void* entropyWorkspace, size_t entropyWkspSize, 2580 const int bmi2) 2581 { 2582 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; 2583 ZSTD_strategy const strategy = cctxParams->cParams.strategy; 2584 unsigned* count = (unsigned*)entropyWorkspace; 2585 FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; 2586 FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; 2587 FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; 2588 const seqDef* const sequences = seqStorePtr->sequencesStart; 2589 const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; 2590 const BYTE* const ofCodeTable = seqStorePtr->ofCode; 2591 const BYTE* const llCodeTable = seqStorePtr->llCode; 2592 const BYTE* const mlCodeTable = seqStorePtr->mlCode; 2593 BYTE* const ostart = (BYTE*)dst; 2594 BYTE* const oend = ostart + dstCapacity; 2595 BYTE* op = ostart; 2596 size_t lastCountSize; 2597 2598 entropyWorkspace = count + (MaxSeq + 1); 2599 entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); 2600 2601 DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq); 2602 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); 2603 assert(entropyWkspSize >= HUF_WORKSPACE_SIZE); 2604 2605 /* Compress literals */ 2606 { const BYTE* const literals = seqStorePtr->litStart; 2607 size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart; 2608 size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart; 2609 /* Base suspicion of uncompressibility on ratio of literals to sequences */ 2610 unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO); 2611 size_t const litSize = (size_t)(seqStorePtr->lit - literals); 2612 size_t const cSize = ZSTD_compressLiterals( 2613 &prevEntropy->huf, &nextEntropy->huf, 2614 cctxParams->cParams.strategy, 2615 ZSTD_literalsCompressionIsDisabled(cctxParams), 2616 op, dstCapacity, 2617 literals, litSize, 2618 entropyWorkspace, entropyWkspSize, 2619 bmi2, suspectUncompressible); 2620 FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); 2621 assert(cSize <= dstCapacity); 2622 op += cSize; 2623 } 2624 2625 /* Sequences Header */ 2626 RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, 2627 dstSize_tooSmall, "Can't fit seq hdr in output buf!"); 2628 if (nbSeq < 128) { 2629 *op++ = (BYTE)nbSeq; 2630 } else if (nbSeq < LONGNBSEQ) { 2631 op[0] = (BYTE)((nbSeq>>8) + 0x80); 2632 op[1] = (BYTE)nbSeq; 2633 op+=2; 2634 } else { 2635 op[0]=0xFF; 2636 MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); 2637 op+=3; 2638 } 2639 assert(op <= oend); 2640 if (nbSeq==0) { 2641 /* Copy the old tables over as if we repeated them */ 2642 ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); 2643 return (size_t)(op - ostart); 2644 } 2645 { 2646 ZSTD_symbolEncodingTypeStats_t stats; 2647 BYTE* seqHead = op++; 2648 /* build stats for sequences */ 2649 stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, 2650 &prevEntropy->fse, &nextEntropy->fse, 2651 op, oend, 2652 strategy, count, 2653 entropyWorkspace, entropyWkspSize); 2654 FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); 2655 *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2)); 2656 lastCountSize = stats.lastCountSize; 2657 op += stats.size; 2658 } 2659 2660 { size_t const bitstreamSize = ZSTD_encodeSequences( 2661 op, (size_t)(oend - op), 2662 CTable_MatchLength, mlCodeTable, 2663 CTable_OffsetBits, ofCodeTable, 2664 CTable_LitLength, llCodeTable, 2665 sequences, nbSeq, 2666 longOffsets, bmi2); 2667 FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); 2668 op += bitstreamSize; 2669 assert(op <= oend); 2670 /* zstd versions <= 1.3.4 mistakenly report corruption when 2671 * FSE_readNCount() receives a buffer < 4 bytes. 2672 * Fixed by https://github.com/facebook/zstd/pull/1146. 2673 * This can happen when the last set_compressed table present is 2 2674 * bytes and the bitstream is only one byte. 2675 * In this exceedingly rare case, we will simply emit an uncompressed 2676 * block, since it isn't worth optimizing. 2677 */ 2678 if (lastCountSize && (lastCountSize + bitstreamSize) < 4) { 2679 /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ 2680 assert(lastCountSize + bitstreamSize == 3); 2681 DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " 2682 "emitting an uncompressed block."); 2683 return 0; 2684 } 2685 } 2686 2687 DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); 2688 return (size_t)(op - ostart); 2689 } 2690 2691 MEM_STATIC size_t 2692 ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, 2693 const ZSTD_entropyCTables_t* prevEntropy, 2694 ZSTD_entropyCTables_t* nextEntropy, 2695 const ZSTD_CCtx_params* cctxParams, 2696 void* dst, size_t dstCapacity, 2697 size_t srcSize, 2698 void* entropyWorkspace, size_t entropyWkspSize, 2699 int bmi2) 2700 { 2701 size_t const cSize = ZSTD_entropyCompressSeqStore_internal( 2702 seqStorePtr, prevEntropy, nextEntropy, cctxParams, 2703 dst, dstCapacity, 2704 entropyWorkspace, entropyWkspSize, bmi2); 2705 if (cSize == 0) return 0; 2706 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. 2707 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. 2708 */ 2709 if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) 2710 return 0; /* block not compressed */ 2711 FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed"); 2712 2713 /* Check compressibility */ 2714 { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); 2715 if (cSize >= maxCSize) return 0; /* block not compressed */ 2716 } 2717 DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); 2718 return cSize; 2719 } 2720 2721 /* ZSTD_selectBlockCompressor() : 2722 * Not static, but internal use only (used by long distance matcher) 2723 * assumption : strat is a valid strategy */ 2724 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode) 2725 { 2726 static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { 2727 { ZSTD_compressBlock_fast /* default for 0 */, 2728 ZSTD_compressBlock_fast, 2729 ZSTD_compressBlock_doubleFast, 2730 ZSTD_compressBlock_greedy, 2731 ZSTD_compressBlock_lazy, 2732 ZSTD_compressBlock_lazy2, 2733 ZSTD_compressBlock_btlazy2, 2734 ZSTD_compressBlock_btopt, 2735 ZSTD_compressBlock_btultra, 2736 ZSTD_compressBlock_btultra2 }, 2737 { ZSTD_compressBlock_fast_extDict /* default for 0 */, 2738 ZSTD_compressBlock_fast_extDict, 2739 ZSTD_compressBlock_doubleFast_extDict, 2740 ZSTD_compressBlock_greedy_extDict, 2741 ZSTD_compressBlock_lazy_extDict, 2742 ZSTD_compressBlock_lazy2_extDict, 2743 ZSTD_compressBlock_btlazy2_extDict, 2744 ZSTD_compressBlock_btopt_extDict, 2745 ZSTD_compressBlock_btultra_extDict, 2746 ZSTD_compressBlock_btultra_extDict }, 2747 { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, 2748 ZSTD_compressBlock_fast_dictMatchState, 2749 ZSTD_compressBlock_doubleFast_dictMatchState, 2750 ZSTD_compressBlock_greedy_dictMatchState, 2751 ZSTD_compressBlock_lazy_dictMatchState, 2752 ZSTD_compressBlock_lazy2_dictMatchState, 2753 ZSTD_compressBlock_btlazy2_dictMatchState, 2754 ZSTD_compressBlock_btopt_dictMatchState, 2755 ZSTD_compressBlock_btultra_dictMatchState, 2756 ZSTD_compressBlock_btultra_dictMatchState }, 2757 { NULL /* default for 0 */, 2758 NULL, 2759 NULL, 2760 ZSTD_compressBlock_greedy_dedicatedDictSearch, 2761 ZSTD_compressBlock_lazy_dedicatedDictSearch, 2762 ZSTD_compressBlock_lazy2_dedicatedDictSearch, 2763 NULL, 2764 NULL, 2765 NULL, 2766 NULL } 2767 }; 2768 ZSTD_blockCompressor selectedCompressor; 2769 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); 2770 2771 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); 2772 DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); 2773 if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { 2774 static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { 2775 { ZSTD_compressBlock_greedy_row, 2776 ZSTD_compressBlock_lazy_row, 2777 ZSTD_compressBlock_lazy2_row }, 2778 { ZSTD_compressBlock_greedy_extDict_row, 2779 ZSTD_compressBlock_lazy_extDict_row, 2780 ZSTD_compressBlock_lazy2_extDict_row }, 2781 { ZSTD_compressBlock_greedy_dictMatchState_row, 2782 ZSTD_compressBlock_lazy_dictMatchState_row, 2783 ZSTD_compressBlock_lazy2_dictMatchState_row }, 2784 { ZSTD_compressBlock_greedy_dedicatedDictSearch_row, 2785 ZSTD_compressBlock_lazy_dedicatedDictSearch_row, 2786 ZSTD_compressBlock_lazy2_dedicatedDictSearch_row } 2787 }; 2788 DEBUGLOG(4, "Selecting a row-based matchfinder"); 2789 assert(useRowMatchFinder != ZSTD_ps_auto); 2790 selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy]; 2791 } else { 2792 selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; 2793 } 2794 assert(selectedCompressor != NULL); 2795 return selectedCompressor; 2796 } 2797 2798 static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, 2799 const BYTE* anchor, size_t lastLLSize) 2800 { 2801 ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize); 2802 seqStorePtr->lit += lastLLSize; 2803 } 2804 2805 void ZSTD_resetSeqStore(seqStore_t* ssPtr) 2806 { 2807 ssPtr->lit = ssPtr->litStart; 2808 ssPtr->sequences = ssPtr->sequencesStart; 2809 ssPtr->longLengthType = ZSTD_llt_none; 2810 } 2811 2812 typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; 2813 2814 static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) 2815 { 2816 ZSTD_matchState_t* const ms = &zc->blockState.matchState; 2817 DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); 2818 assert(srcSize <= ZSTD_BLOCKSIZE_MAX); 2819 /* Assert that we have correctly flushed the ctx params into the ms's copy */ 2820 ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); 2821 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { 2822 if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { 2823 ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); 2824 } else { 2825 ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); 2826 } 2827 return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ 2828 } 2829 ZSTD_resetSeqStore(&(zc->seqStore)); 2830 /* required for optimal parser to read stats from dictionary */ 2831 ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; 2832 /* tell the optimal parser how we expect to compress literals */ 2833 ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; 2834 /* a gap between an attached dict and the current window is not safe, 2835 * they must remain adjacent, 2836 * and when that stops being the case, the dict must be unset */ 2837 assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); 2838 2839 /* limited update after a very long match */ 2840 { const BYTE* const base = ms->window.base; 2841 const BYTE* const istart = (const BYTE*)src; 2842 const U32 curr = (U32)(istart-base); 2843 if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ 2844 if (curr > ms->nextToUpdate + 384) 2845 ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384)); 2846 } 2847 2848 /* select and store sequences */ 2849 { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); 2850 size_t lastLLSize; 2851 { int i; 2852 for (i = 0; i < ZSTD_REP_NUM; ++i) 2853 zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; 2854 } 2855 if (zc->externSeqStore.pos < zc->externSeqStore.size) { 2856 assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); 2857 /* Updates ldmSeqStore.pos */ 2858 lastLLSize = 2859 ZSTD_ldm_blockCompress(&zc->externSeqStore, 2860 ms, &zc->seqStore, 2861 zc->blockState.nextCBlock->rep, 2862 zc->appliedParams.useRowMatchFinder, 2863 src, srcSize); 2864 assert(zc->externSeqStore.pos <= zc->externSeqStore.size); 2865 } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { 2866 rawSeqStore_t ldmSeqStore = kNullRawSeqStore; 2867 2868 ldmSeqStore.seq = zc->ldmSequences; 2869 ldmSeqStore.capacity = zc->maxNbLdmSequences; 2870 /* Updates ldmSeqStore.size */ 2871 FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, 2872 &zc->appliedParams.ldmParams, 2873 src, srcSize), ""); 2874 /* Updates ldmSeqStore.pos */ 2875 lastLLSize = 2876 ZSTD_ldm_blockCompress(&ldmSeqStore, 2877 ms, &zc->seqStore, 2878 zc->blockState.nextCBlock->rep, 2879 zc->appliedParams.useRowMatchFinder, 2880 src, srcSize); 2881 assert(ldmSeqStore.pos == ldmSeqStore.size); 2882 } else { /* not long range mode */ 2883 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, 2884 zc->appliedParams.useRowMatchFinder, 2885 dictMode); 2886 ms->ldmSeqStore = NULL; 2887 lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); 2888 } 2889 { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; 2890 ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); 2891 } } 2892 return ZSTDbss_compress; 2893 } 2894 2895 static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) 2896 { 2897 const seqStore_t* seqStore = ZSTD_getSeqStore(zc); 2898 const seqDef* seqStoreSeqs = seqStore->sequencesStart; 2899 size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; 2900 size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); 2901 size_t literalsRead = 0; 2902 size_t lastLLSize; 2903 2904 ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; 2905 size_t i; 2906 repcodes_t updatedRepcodes; 2907 2908 assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); 2909 /* Ensure we have enough space for last literals "sequence" */ 2910 assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); 2911 ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); 2912 for (i = 0; i < seqStoreSeqSize; ++i) { 2913 U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM; 2914 outSeqs[i].litLength = seqStoreSeqs[i].litLength; 2915 outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH; 2916 outSeqs[i].rep = 0; 2917 2918 if (i == seqStore->longLengthPos) { 2919 if (seqStore->longLengthType == ZSTD_llt_literalLength) { 2920 outSeqs[i].litLength += 0x10000; 2921 } else if (seqStore->longLengthType == ZSTD_llt_matchLength) { 2922 outSeqs[i].matchLength += 0x10000; 2923 } 2924 } 2925 2926 if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) { 2927 /* Derive the correct offset corresponding to a repcode */ 2928 outSeqs[i].rep = seqStoreSeqs[i].offBase; 2929 if (outSeqs[i].litLength != 0) { 2930 rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; 2931 } else { 2932 if (outSeqs[i].rep == 3) { 2933 rawOffset = updatedRepcodes.rep[0] - 1; 2934 } else { 2935 rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; 2936 } 2937 } 2938 } 2939 outSeqs[i].offset = rawOffset; 2940 /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode 2941 so we provide seqStoreSeqs[i].offset - 1 */ 2942 ZSTD_updateRep(updatedRepcodes.rep, 2943 seqStoreSeqs[i].offBase - 1, 2944 seqStoreSeqs[i].litLength == 0); 2945 literalsRead += outSeqs[i].litLength; 2946 } 2947 /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. 2948 * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker 2949 * for the block boundary, according to the API. 2950 */ 2951 assert(seqStoreLiteralsSize >= literalsRead); 2952 lastLLSize = seqStoreLiteralsSize - literalsRead; 2953 outSeqs[i].litLength = (U32)lastLLSize; 2954 outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; 2955 seqStoreSeqSize++; 2956 zc->seqCollector.seqIndex += seqStoreSeqSize; 2957 } 2958 2959 size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, 2960 size_t outSeqsSize, const void* src, size_t srcSize) 2961 { 2962 const size_t dstCapacity = ZSTD_compressBound(srcSize); 2963 void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); 2964 SeqCollector seqCollector; 2965 2966 RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); 2967 2968 seqCollector.collectSequences = 1; 2969 seqCollector.seqStart = outSeqs; 2970 seqCollector.seqIndex = 0; 2971 seqCollector.maxSequences = outSeqsSize; 2972 zc->seqCollector = seqCollector; 2973 2974 ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); 2975 ZSTD_customFree(dst, ZSTD_defaultCMem); 2976 return zc->seqCollector.seqIndex; 2977 } 2978 2979 size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) { 2980 size_t in = 0; 2981 size_t out = 0; 2982 for (; in < seqsSize; ++in) { 2983 if (sequences[in].offset == 0 && sequences[in].matchLength == 0) { 2984 if (in != seqsSize - 1) { 2985 sequences[in+1].litLength += sequences[in].litLength; 2986 } 2987 } else { 2988 sequences[out] = sequences[in]; 2989 ++out; 2990 } 2991 } 2992 return out; 2993 } 2994 2995 /* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */ 2996 static int ZSTD_isRLE(const BYTE* src, size_t length) { 2997 const BYTE* ip = src; 2998 const BYTE value = ip[0]; 2999 const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL); 3000 const size_t unrollSize = sizeof(size_t) * 4; 3001 const size_t unrollMask = unrollSize - 1; 3002 const size_t prefixLength = length & unrollMask; 3003 size_t i; 3004 size_t u; 3005 if (length == 1) return 1; 3006 /* Check if prefix is RLE first before using unrolled loop */ 3007 if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { 3008 return 0; 3009 } 3010 for (i = prefixLength; i != length; i += unrollSize) { 3011 for (u = 0; u < unrollSize; u += sizeof(size_t)) { 3012 if (MEM_readST(ip + i + u) != valueST) { 3013 return 0; 3014 } 3015 } 3016 } 3017 return 1; 3018 } 3019 3020 /* Returns true if the given block may be RLE. 3021 * This is just a heuristic based on the compressibility. 3022 * It may return both false positives and false negatives. 3023 */ 3024 static int ZSTD_maybeRLE(seqStore_t const* seqStore) 3025 { 3026 size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); 3027 size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); 3028 3029 return nbSeqs < 4 && nbLits < 10; 3030 } 3031 3032 static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) 3033 { 3034 ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock; 3035 bs->prevCBlock = bs->nextCBlock; 3036 bs->nextCBlock = tmp; 3037 } 3038 3039 /* Writes the block header */ 3040 static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { 3041 U32 const cBlockHeader = cSize == 1 ? 3042 lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : 3043 lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); 3044 MEM_writeLE24(op, cBlockHeader); 3045 DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); 3046 } 3047 3048 /** ZSTD_buildBlockEntropyStats_literals() : 3049 * Builds entropy for the literals. 3050 * Stores literals block type (raw, rle, compressed, repeat) and 3051 * huffman description table to hufMetadata. 3052 * Requires ENTROPY_WORKSPACE_SIZE workspace 3053 * @return : size of huffman description table or error code */ 3054 static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, 3055 const ZSTD_hufCTables_t* prevHuf, 3056 ZSTD_hufCTables_t* nextHuf, 3057 ZSTD_hufCTablesMetadata_t* hufMetadata, 3058 const int literalsCompressionIsDisabled, 3059 void* workspace, size_t wkspSize) 3060 { 3061 BYTE* const wkspStart = (BYTE*)workspace; 3062 BYTE* const wkspEnd = wkspStart + wkspSize; 3063 BYTE* const countWkspStart = wkspStart; 3064 unsigned* const countWksp = (unsigned*)workspace; 3065 const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); 3066 BYTE* const nodeWksp = countWkspStart + countWkspSize; 3067 const size_t nodeWkspSize = wkspEnd-nodeWksp; 3068 unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; 3069 unsigned huffLog = HUF_TABLELOG_DEFAULT; 3070 HUF_repeat repeat = prevHuf->repeatMode; 3071 DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize); 3072 3073 /* Prepare nextEntropy assuming reusing the existing table */ 3074 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 3075 3076 if (literalsCompressionIsDisabled) { 3077 DEBUGLOG(5, "set_basic - disabled"); 3078 hufMetadata->hType = set_basic; 3079 return 0; 3080 } 3081 3082 /* small ? don't even attempt compression (speed opt) */ 3083 #ifndef COMPRESS_LITERALS_SIZE_MIN 3084 #define COMPRESS_LITERALS_SIZE_MIN 63 3085 #endif 3086 { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; 3087 if (srcSize <= minLitSize) { 3088 DEBUGLOG(5, "set_basic - too small"); 3089 hufMetadata->hType = set_basic; 3090 return 0; 3091 } 3092 } 3093 3094 /* Scan input and build symbol stats */ 3095 { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); 3096 FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); 3097 if (largest == srcSize) { 3098 DEBUGLOG(5, "set_rle"); 3099 hufMetadata->hType = set_rle; 3100 return 0; 3101 } 3102 if (largest <= (srcSize >> 7)+4) { 3103 DEBUGLOG(5, "set_basic - no gain"); 3104 hufMetadata->hType = set_basic; 3105 return 0; 3106 } 3107 } 3108 3109 /* Validate the previous Huffman table */ 3110 if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { 3111 repeat = HUF_repeat_none; 3112 } 3113 3114 /* Build Huffman Tree */ 3115 ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); 3116 huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); 3117 { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, 3118 maxSymbolValue, huffLog, 3119 nodeWksp, nodeWkspSize); 3120 FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); 3121 huffLog = (U32)maxBits; 3122 { /* Build and write the CTable */ 3123 size_t const newCSize = HUF_estimateCompressedSize( 3124 (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); 3125 size_t const hSize = HUF_writeCTable_wksp( 3126 hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), 3127 (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, 3128 nodeWksp, nodeWkspSize); 3129 /* Check against repeating the previous CTable */ 3130 if (repeat != HUF_repeat_none) { 3131 size_t const oldCSize = HUF_estimateCompressedSize( 3132 (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); 3133 if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { 3134 DEBUGLOG(5, "set_repeat - smaller"); 3135 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 3136 hufMetadata->hType = set_repeat; 3137 return 0; 3138 } 3139 } 3140 if (newCSize + hSize >= srcSize) { 3141 DEBUGLOG(5, "set_basic - no gains"); 3142 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 3143 hufMetadata->hType = set_basic; 3144 return 0; 3145 } 3146 DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); 3147 hufMetadata->hType = set_compressed; 3148 nextHuf->repeatMode = HUF_repeat_check; 3149 return hSize; 3150 } 3151 } 3152 } 3153 3154 3155 /* ZSTD_buildDummySequencesStatistics(): 3156 * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic, 3157 * and updates nextEntropy to the appropriate repeatMode. 3158 */ 3159 static ZSTD_symbolEncodingTypeStats_t 3160 ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { 3161 ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0}; 3162 nextEntropy->litlength_repeatMode = FSE_repeat_none; 3163 nextEntropy->offcode_repeatMode = FSE_repeat_none; 3164 nextEntropy->matchlength_repeatMode = FSE_repeat_none; 3165 return stats; 3166 } 3167 3168 /** ZSTD_buildBlockEntropyStats_sequences() : 3169 * Builds entropy for the sequences. 3170 * Stores symbol compression modes and fse table to fseMetadata. 3171 * Requires ENTROPY_WORKSPACE_SIZE wksp. 3172 * @return : size of fse tables or error code */ 3173 static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, 3174 const ZSTD_fseCTables_t* prevEntropy, 3175 ZSTD_fseCTables_t* nextEntropy, 3176 const ZSTD_CCtx_params* cctxParams, 3177 ZSTD_fseCTablesMetadata_t* fseMetadata, 3178 void* workspace, size_t wkspSize) 3179 { 3180 ZSTD_strategy const strategy = cctxParams->cParams.strategy; 3181 size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; 3182 BYTE* const ostart = fseMetadata->fseTablesBuffer; 3183 BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); 3184 BYTE* op = ostart; 3185 unsigned* countWorkspace = (unsigned*)workspace; 3186 unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1); 3187 size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace); 3188 ZSTD_symbolEncodingTypeStats_t stats; 3189 3190 DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq); 3191 stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, 3192 prevEntropy, nextEntropy, op, oend, 3193 strategy, countWorkspace, 3194 entropyWorkspace, entropyWorkspaceSize) 3195 : ZSTD_buildDummySequencesStatistics(nextEntropy); 3196 FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); 3197 fseMetadata->llType = (symbolEncodingType_e) stats.LLtype; 3198 fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype; 3199 fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype; 3200 fseMetadata->lastCountSize = stats.lastCountSize; 3201 return stats.size; 3202 } 3203 3204 3205 /** ZSTD_buildBlockEntropyStats() : 3206 * Builds entropy for the block. 3207 * Requires workspace size ENTROPY_WORKSPACE_SIZE 3208 * 3209 * @return : 0 on success or error code 3210 */ 3211 size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, 3212 const ZSTD_entropyCTables_t* prevEntropy, 3213 ZSTD_entropyCTables_t* nextEntropy, 3214 const ZSTD_CCtx_params* cctxParams, 3215 ZSTD_entropyCTablesMetadata_t* entropyMetadata, 3216 void* workspace, size_t wkspSize) 3217 { 3218 size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; 3219 entropyMetadata->hufMetadata.hufDesSize = 3220 ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, 3221 &prevEntropy->huf, &nextEntropy->huf, 3222 &entropyMetadata->hufMetadata, 3223 ZSTD_literalsCompressionIsDisabled(cctxParams), 3224 workspace, wkspSize); 3225 FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed"); 3226 entropyMetadata->fseMetadata.fseTablesSize = 3227 ZSTD_buildBlockEntropyStats_sequences(seqStorePtr, 3228 &prevEntropy->fse, &nextEntropy->fse, 3229 cctxParams, 3230 &entropyMetadata->fseMetadata, 3231 workspace, wkspSize); 3232 FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed"); 3233 return 0; 3234 } 3235 3236 /* Returns the size estimate for the literals section (header + content) of a block */ 3237 static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, 3238 const ZSTD_hufCTables_t* huf, 3239 const ZSTD_hufCTablesMetadata_t* hufMetadata, 3240 void* workspace, size_t wkspSize, 3241 int writeEntropy) 3242 { 3243 unsigned* const countWksp = (unsigned*)workspace; 3244 unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; 3245 size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB); 3246 U32 singleStream = litSize < 256; 3247 3248 if (hufMetadata->hType == set_basic) return litSize; 3249 else if (hufMetadata->hType == set_rle) return 1; 3250 else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { 3251 size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); 3252 if (ZSTD_isError(largest)) return litSize; 3253 { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); 3254 if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; 3255 if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */ 3256 return cLitSizeEstimate + literalSectionHeaderSize; 3257 } } 3258 assert(0); /* impossible */ 3259 return 0; 3260 } 3261 3262 /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ 3263 static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, 3264 const BYTE* codeTable, size_t nbSeq, unsigned maxCode, 3265 const FSE_CTable* fseCTable, 3266 const U8* additionalBits, 3267 short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, 3268 void* workspace, size_t wkspSize) 3269 { 3270 unsigned* const countWksp = (unsigned*)workspace; 3271 const BYTE* ctp = codeTable; 3272 const BYTE* const ctStart = ctp; 3273 const BYTE* const ctEnd = ctStart + nbSeq; 3274 size_t cSymbolTypeSizeEstimateInBits = 0; 3275 unsigned max = maxCode; 3276 3277 HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ 3278 if (type == set_basic) { 3279 /* We selected this encoding type, so it must be valid. */ 3280 assert(max <= defaultMax); 3281 (void)defaultMax; 3282 cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); 3283 } else if (type == set_rle) { 3284 cSymbolTypeSizeEstimateInBits = 0; 3285 } else if (type == set_compressed || type == set_repeat) { 3286 cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); 3287 } 3288 if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) { 3289 return nbSeq * 10; 3290 } 3291 while (ctp < ctEnd) { 3292 if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; 3293 else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ 3294 ctp++; 3295 } 3296 return cSymbolTypeSizeEstimateInBits >> 3; 3297 } 3298 3299 /* Returns the size estimate for the sequences section (header + content) of a block */ 3300 static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, 3301 const BYTE* llCodeTable, 3302 const BYTE* mlCodeTable, 3303 size_t nbSeq, 3304 const ZSTD_fseCTables_t* fseTables, 3305 const ZSTD_fseCTablesMetadata_t* fseMetadata, 3306 void* workspace, size_t wkspSize, 3307 int writeEntropy) 3308 { 3309 size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); 3310 size_t cSeqSizeEstimate = 0; 3311 cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff, 3312 fseTables->offcodeCTable, NULL, 3313 OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, 3314 workspace, wkspSize); 3315 cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL, 3316 fseTables->litlengthCTable, LL_bits, 3317 LL_defaultNorm, LL_defaultNormLog, MaxLL, 3318 workspace, wkspSize); 3319 cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML, 3320 fseTables->matchlengthCTable, ML_bits, 3321 ML_defaultNorm, ML_defaultNormLog, MaxML, 3322 workspace, wkspSize); 3323 if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; 3324 return cSeqSizeEstimate + sequencesSectionHeaderSize; 3325 } 3326 3327 /* Returns the size estimate for a given stream of literals, of, ll, ml */ 3328 static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, 3329 const BYTE* ofCodeTable, 3330 const BYTE* llCodeTable, 3331 const BYTE* mlCodeTable, 3332 size_t nbSeq, 3333 const ZSTD_entropyCTables_t* entropy, 3334 const ZSTD_entropyCTablesMetadata_t* entropyMetadata, 3335 void* workspace, size_t wkspSize, 3336 int writeLitEntropy, int writeSeqEntropy) { 3337 size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize, 3338 &entropy->huf, &entropyMetadata->hufMetadata, 3339 workspace, wkspSize, writeLitEntropy); 3340 size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, 3341 nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, 3342 workspace, wkspSize, writeSeqEntropy); 3343 return seqSize + literalsSize + ZSTD_blockHeaderSize; 3344 } 3345 3346 /* Builds entropy statistics and uses them for blocksize estimation. 3347 * 3348 * Returns the estimated compressed size of the seqStore, or a zstd error. 3349 */ 3350 static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) { 3351 ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata; 3352 DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()"); 3353 FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, 3354 &zc->blockState.prevCBlock->entropy, 3355 &zc->blockState.nextCBlock->entropy, 3356 &zc->appliedParams, 3357 entropyMetadata, 3358 zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); 3359 return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), 3360 seqStore->ofCode, seqStore->llCode, seqStore->mlCode, 3361 (size_t)(seqStore->sequences - seqStore->sequencesStart), 3362 &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, 3363 (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1); 3364 } 3365 3366 /* Returns literals bytes represented in a seqStore */ 3367 static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) { 3368 size_t literalsBytes = 0; 3369 size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; 3370 size_t i; 3371 for (i = 0; i < nbSeqs; ++i) { 3372 seqDef seq = seqStore->sequencesStart[i]; 3373 literalsBytes += seq.litLength; 3374 if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) { 3375 literalsBytes += 0x10000; 3376 } 3377 } 3378 return literalsBytes; 3379 } 3380 3381 /* Returns match bytes represented in a seqStore */ 3382 static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) { 3383 size_t matchBytes = 0; 3384 size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; 3385 size_t i; 3386 for (i = 0; i < nbSeqs; ++i) { 3387 seqDef seq = seqStore->sequencesStart[i]; 3388 matchBytes += seq.mlBase + MINMATCH; 3389 if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) { 3390 matchBytes += 0x10000; 3391 } 3392 } 3393 return matchBytes; 3394 } 3395 3396 /* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx). 3397 * Stores the result in resultSeqStore. 3398 */ 3399 static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, 3400 const seqStore_t* originalSeqStore, 3401 size_t startIdx, size_t endIdx) { 3402 BYTE* const litEnd = originalSeqStore->lit; 3403 size_t literalsBytes; 3404 size_t literalsBytesPreceding = 0; 3405 3406 *resultSeqStore = *originalSeqStore; 3407 if (startIdx > 0) { 3408 resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; 3409 literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); 3410 } 3411 3412 /* Move longLengthPos into the correct position if necessary */ 3413 if (originalSeqStore->longLengthType != ZSTD_llt_none) { 3414 if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) { 3415 resultSeqStore->longLengthType = ZSTD_llt_none; 3416 } else { 3417 resultSeqStore->longLengthPos -= (U32)startIdx; 3418 } 3419 } 3420 resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx; 3421 resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx; 3422 literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); 3423 resultSeqStore->litStart += literalsBytesPreceding; 3424 if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) { 3425 /* This accounts for possible last literals if the derived chunk reaches the end of the block */ 3426 resultSeqStore->lit = litEnd; 3427 } else { 3428 resultSeqStore->lit = resultSeqStore->litStart+literalsBytes; 3429 } 3430 resultSeqStore->llCode += startIdx; 3431 resultSeqStore->mlCode += startIdx; 3432 resultSeqStore->ofCode += startIdx; 3433 } 3434 3435 /** 3436 * Returns the raw offset represented by the combination of offCode, ll0, and repcode history. 3437 * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq(). 3438 */ 3439 static U32 3440 ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) 3441 { 3442 U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0; /* [ 0 - 3 ] */ 3443 assert(STORED_IS_REPCODE(offCode)); 3444 if (adjustedOffCode == ZSTD_REP_NUM) { 3445 /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */ 3446 assert(rep[0] > 0); 3447 return rep[0] - 1; 3448 } 3449 return rep[adjustedOffCode]; 3450 } 3451 3452 /** 3453 * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise 3454 * due to emission of RLE/raw blocks that disturb the offset history, 3455 * and replaces any repcodes within the seqStore that may be invalid. 3456 * 3457 * dRepcodes are updated as would be on the decompression side. 3458 * cRepcodes are updated exactly in accordance with the seqStore. 3459 * 3460 * Note : this function assumes seq->offBase respects the following numbering scheme : 3461 * 0 : invalid 3462 * 1-3 : repcode 1-3 3463 * 4+ : real_offset+3 3464 */ 3465 static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, 3466 seqStore_t* const seqStore, U32 const nbSeq) { 3467 U32 idx = 0; 3468 for (; idx < nbSeq; ++idx) { 3469 seqDef* const seq = seqStore->sequencesStart + idx; 3470 U32 const ll0 = (seq->litLength == 0); 3471 U32 const offCode = OFFBASE_TO_STORED(seq->offBase); 3472 assert(seq->offBase > 0); 3473 if (STORED_IS_REPCODE(offCode)) { 3474 U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0); 3475 U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0); 3476 /* Adjust simulated decompression repcode history if we come across a mismatch. Replace 3477 * the repcode with the offset it actually references, determined by the compression 3478 * repcode history. 3479 */ 3480 if (dRawOffset != cRawOffset) { 3481 seq->offBase = cRawOffset + ZSTD_REP_NUM; 3482 } 3483 } 3484 /* Compression repcode history is always updated with values directly from the unmodified seqStore. 3485 * Decompression repcode history may use modified seq->offset value taken from compression repcode history. 3486 */ 3487 ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0); 3488 ZSTD_updateRep(cRepcodes->rep, offCode, ll0); 3489 } 3490 } 3491 3492 /* ZSTD_compressSeqStore_singleBlock(): 3493 * Compresses a seqStore into a block with a block header, into the buffer dst. 3494 * 3495 * Returns the total size of that block (including header) or a ZSTD error code. 3496 */ 3497 static size_t 3498 ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, 3499 repcodes_t* const dRep, repcodes_t* const cRep, 3500 void* dst, size_t dstCapacity, 3501 const void* src, size_t srcSize, 3502 U32 lastBlock, U32 isPartition) 3503 { 3504 const U32 rleMaxLength = 25; 3505 BYTE* op = (BYTE*)dst; 3506 const BYTE* ip = (const BYTE*)src; 3507 size_t cSize; 3508 size_t cSeqsSize; 3509 3510 /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */ 3511 repcodes_t const dRepOriginal = *dRep; 3512 DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock"); 3513 if (isPartition) 3514 ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart)); 3515 3516 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "Block header doesn't fit"); 3517 cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore, 3518 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, 3519 &zc->appliedParams, 3520 op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, 3521 srcSize, 3522 zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, 3523 zc->bmi2); 3524 FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!"); 3525 3526 if (!zc->isFirstBlock && 3527 cSeqsSize < rleMaxLength && 3528 ZSTD_isRLE((BYTE const*)src, srcSize)) { 3529 /* We don't want to emit our first block as a RLE even if it qualifies because 3530 * doing so will cause the decoder (cli only) to throw a "should consume all input error." 3531 * This is only an issue for zstd <= v1.4.3 3532 */ 3533 cSeqsSize = 1; 3534 } 3535 3536 if (zc->seqCollector.collectSequences) { 3537 ZSTD_copyBlockSequences(zc); 3538 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); 3539 return 0; 3540 } 3541 3542 if (cSeqsSize == 0) { 3543 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); 3544 FORWARD_IF_ERROR(cSize, "Nocompress block failed"); 3545 DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize); 3546 *dRep = dRepOriginal; /* reset simulated decompression repcode history */ 3547 } else if (cSeqsSize == 1) { 3548 cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock); 3549 FORWARD_IF_ERROR(cSize, "RLE compress block failed"); 3550 DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize); 3551 *dRep = dRepOriginal; /* reset simulated decompression repcode history */ 3552 } else { 3553 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); 3554 writeBlockHeader(op, cSeqsSize, srcSize, lastBlock); 3555 cSize = ZSTD_blockHeaderSize + cSeqsSize; 3556 DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize); 3557 } 3558 3559 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 3560 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 3561 3562 return cSize; 3563 } 3564 3565 /* Struct to keep track of where we are in our recursive calls. */ 3566 typedef struct { 3567 U32* splitLocations; /* Array of split indices */ 3568 size_t idx; /* The current index within splitLocations being worked on */ 3569 } seqStoreSplits; 3570 3571 #define MIN_SEQUENCES_BLOCK_SPLITTING 300 3572 3573 /* Helper function to perform the recursive search for block splits. 3574 * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. 3575 * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then 3576 * we do not recurse. 3577 * 3578 * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. 3579 * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). 3580 * In practice, recursion depth usually doesn't go beyond 4. 3581 * 3582 * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize 3583 * maximum of 128 KB, this value is actually impossible to reach. 3584 */ 3585 static void 3586 ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, 3587 ZSTD_CCtx* zc, const seqStore_t* origSeqStore) 3588 { 3589 seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; 3590 seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; 3591 seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; 3592 size_t estimatedOriginalSize; 3593 size_t estimatedFirstHalfSize; 3594 size_t estimatedSecondHalfSize; 3595 size_t midIdx = (startIdx + endIdx)/2; 3596 3597 if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) { 3598 DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences"); 3599 return; 3600 } 3601 DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); 3602 ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx); 3603 ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx); 3604 ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx); 3605 estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc); 3606 estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc); 3607 estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc); 3608 DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu", 3609 estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize); 3610 if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) { 3611 return; 3612 } 3613 if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { 3614 ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore); 3615 splits->splitLocations[splits->idx] = (U32)midIdx; 3616 splits->idx++; 3617 ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore); 3618 } 3619 } 3620 3621 /* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio. 3622 * 3623 * Returns the number of splits made (which equals the size of the partition table - 1). 3624 */ 3625 static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) { 3626 seqStoreSplits splits = {partitions, 0}; 3627 if (nbSeq <= 4) { 3628 DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split"); 3629 /* Refuse to try and split anything with less than 4 sequences */ 3630 return 0; 3631 } 3632 ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore); 3633 splits.splitLocations[splits.idx] = nbSeq; 3634 DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1); 3635 return splits.idx; 3636 } 3637 3638 /* ZSTD_compressBlock_splitBlock(): 3639 * Attempts to split a given block into multiple blocks to improve compression ratio. 3640 * 3641 * Returns combined size of all blocks (which includes headers), or a ZSTD error code. 3642 */ 3643 static size_t 3644 ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, 3645 const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) 3646 { 3647 size_t cSize = 0; 3648 const BYTE* ip = (const BYTE*)src; 3649 BYTE* op = (BYTE*)dst; 3650 size_t i = 0; 3651 size_t srcBytesTotal = 0; 3652 U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ 3653 seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore; 3654 seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore; 3655 size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); 3656 3657 /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history 3658 * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two 3659 * separate repcode histories that simulate repcode history on compression and decompression side, 3660 * and use the histories to determine whether we must replace a particular repcode with its raw offset. 3661 * 3662 * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed 3663 * or RLE. This allows us to retrieve the offset value that an invalid repcode references within 3664 * a nocompress/RLE block. 3665 * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use 3666 * the replacement offset value rather than the original repcode to update the repcode history. 3667 * dRep also will be the final repcode history sent to the next block. 3668 * 3669 * See ZSTD_seqStore_resolveOffCodes() for more details. 3670 */ 3671 repcodes_t dRep; 3672 repcodes_t cRep; 3673 ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); 3674 ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); 3675 ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t)); 3676 3677 DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", 3678 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, 3679 (unsigned)zc->blockState.matchState.nextToUpdate); 3680 3681 if (numSplits == 0) { 3682 size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, 3683 &dRep, &cRep, 3684 op, dstCapacity, 3685 ip, blockSize, 3686 lastBlock, 0 /* isPartition */); 3687 FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); 3688 DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits"); 3689 assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); 3690 return cSizeSingleBlock; 3691 } 3692 3693 ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]); 3694 for (i = 0; i <= numSplits; ++i) { 3695 size_t srcBytes; 3696 size_t cSizeChunk; 3697 U32 const lastPartition = (i == numSplits); 3698 U32 lastBlockEntireSrc = 0; 3699 3700 srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore); 3701 srcBytesTotal += srcBytes; 3702 if (lastPartition) { 3703 /* This is the final partition, need to account for possible last literals */ 3704 srcBytes += blockSize - srcBytesTotal; 3705 lastBlockEntireSrc = lastBlock; 3706 } else { 3707 ZSTD_deriveSeqStoreChunk(nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]); 3708 } 3709 3710 cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, currSeqStore, 3711 &dRep, &cRep, 3712 op, dstCapacity, 3713 ip, srcBytes, 3714 lastBlockEntireSrc, 1 /* isPartition */); 3715 DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); 3716 FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!"); 3717 3718 ip += srcBytes; 3719 op += cSizeChunk; 3720 dstCapacity -= cSizeChunk; 3721 cSize += cSizeChunk; 3722 *currSeqStore = *nextSeqStore; 3723 assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); 3724 } 3725 /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes 3726 * for the next block. 3727 */ 3728 ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t)); 3729 return cSize; 3730 } 3731 3732 static size_t 3733 ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, 3734 void* dst, size_t dstCapacity, 3735 const void* src, size_t srcSize, U32 lastBlock) 3736 { 3737 const BYTE* ip = (const BYTE*)src; 3738 BYTE* op = (BYTE*)dst; 3739 U32 nbSeq; 3740 size_t cSize; 3741 DEBUGLOG(4, "ZSTD_compressBlock_splitBlock"); 3742 assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable); 3743 3744 { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); 3745 FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); 3746 if (bss == ZSTDbss_noCompress) { 3747 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 3748 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 3749 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); 3750 FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); 3751 DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block"); 3752 return cSize; 3753 } 3754 nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart); 3755 } 3756 3757 cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq); 3758 FORWARD_IF_ERROR(cSize, "Splitting blocks failed!"); 3759 return cSize; 3760 } 3761 3762 static size_t 3763 ZSTD_compressBlock_internal(ZSTD_CCtx* zc, 3764 void* dst, size_t dstCapacity, 3765 const void* src, size_t srcSize, U32 frame) 3766 { 3767 /* This the upper bound for the length of an rle block. 3768 * This isn't the actual upper bound. Finding the real threshold 3769 * needs further investigation. 3770 */ 3771 const U32 rleMaxLength = 25; 3772 size_t cSize; 3773 const BYTE* ip = (const BYTE*)src; 3774 BYTE* op = (BYTE*)dst; 3775 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", 3776 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, 3777 (unsigned)zc->blockState.matchState.nextToUpdate); 3778 3779 { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); 3780 FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); 3781 if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } 3782 } 3783 3784 if (zc->seqCollector.collectSequences) { 3785 ZSTD_copyBlockSequences(zc); 3786 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); 3787 return 0; 3788 } 3789 3790 /* encode sequences and literals */ 3791 cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore, 3792 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, 3793 &zc->appliedParams, 3794 dst, dstCapacity, 3795 srcSize, 3796 zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, 3797 zc->bmi2); 3798 3799 if (frame && 3800 /* We don't want to emit our first block as a RLE even if it qualifies because 3801 * doing so will cause the decoder (cli only) to throw a "should consume all input error." 3802 * This is only an issue for zstd <= v1.4.3 3803 */ 3804 !zc->isFirstBlock && 3805 cSize < rleMaxLength && 3806 ZSTD_isRLE(ip, srcSize)) 3807 { 3808 cSize = 1; 3809 op[0] = ip[0]; 3810 } 3811 3812 out: 3813 if (!ZSTD_isError(cSize) && cSize > 1) { 3814 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); 3815 } 3816 /* We check that dictionaries have offset codes available for the first 3817 * block. After the first block, the offcode table might not have large 3818 * enough codes to represent the offsets in the data. 3819 */ 3820 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 3821 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 3822 3823 return cSize; 3824 } 3825 3826 static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, 3827 void* dst, size_t dstCapacity, 3828 const void* src, size_t srcSize, 3829 const size_t bss, U32 lastBlock) 3830 { 3831 DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); 3832 if (bss == ZSTDbss_compress) { 3833 if (/* We don't want to emit our first block as a RLE even if it qualifies because 3834 * doing so will cause the decoder (cli only) to throw a "should consume all input error." 3835 * This is only an issue for zstd <= v1.4.3 3836 */ 3837 !zc->isFirstBlock && 3838 ZSTD_maybeRLE(&zc->seqStore) && 3839 ZSTD_isRLE((BYTE const*)src, srcSize)) 3840 { 3841 return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); 3842 } 3843 /* Attempt superblock compression. 3844 * 3845 * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the 3846 * standard ZSTD_compressBound(). This is a problem, because even if we have 3847 * space now, taking an extra byte now could cause us to run out of space later 3848 * and violate ZSTD_compressBound(). 3849 * 3850 * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. 3851 * 3852 * In order to respect ZSTD_compressBound() we must attempt to emit a raw 3853 * uncompressed block in these cases: 3854 * * cSize == 0: Return code for an uncompressed block. 3855 * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). 3856 * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of 3857 * output space. 3858 * * cSize >= blockBound(srcSize): We have expanded the block too much so 3859 * emit an uncompressed block. 3860 */ 3861 { 3862 size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); 3863 if (cSize != ERROR(dstSize_tooSmall)) { 3864 size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); 3865 FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); 3866 if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { 3867 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); 3868 return cSize; 3869 } 3870 } 3871 } 3872 } 3873 3874 DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); 3875 /* Superblock compression failed, attempt to emit a single no compress block. 3876 * The decoder will be able to stream this block since it is uncompressed. 3877 */ 3878 return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); 3879 } 3880 3881 static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, 3882 void* dst, size_t dstCapacity, 3883 const void* src, size_t srcSize, 3884 U32 lastBlock) 3885 { 3886 size_t cSize = 0; 3887 const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); 3888 DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", 3889 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); 3890 FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); 3891 3892 cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); 3893 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); 3894 3895 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 3896 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 3897 3898 return cSize; 3899 } 3900 3901 static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, 3902 ZSTD_cwksp* ws, 3903 ZSTD_CCtx_params const* params, 3904 void const* ip, 3905 void const* iend) 3906 { 3907 U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); 3908 U32 const maxDist = (U32)1 << params->cParams.windowLog; 3909 if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) { 3910 U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); 3911 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); 3912 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); 3913 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); 3914 ZSTD_cwksp_mark_tables_dirty(ws); 3915 ZSTD_reduceIndex(ms, params, correction); 3916 ZSTD_cwksp_mark_tables_clean(ws); 3917 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; 3918 else ms->nextToUpdate -= correction; 3919 /* invalidate dictionaries on overflow correction */ 3920 ms->loadedDictEnd = 0; 3921 ms->dictMatchState = NULL; 3922 } 3923 } 3924 3925 /*! ZSTD_compress_frameChunk() : 3926 * Compress a chunk of data into one or multiple blocks. 3927 * All blocks will be terminated, all input will be consumed. 3928 * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. 3929 * Frame is supposed already started (header already produced) 3930 * @return : compressed size, or an error code 3931 */ 3932 static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, 3933 void* dst, size_t dstCapacity, 3934 const void* src, size_t srcSize, 3935 U32 lastFrameChunk) 3936 { 3937 size_t blockSize = cctx->blockSize; 3938 size_t remaining = srcSize; 3939 const BYTE* ip = (const BYTE*)src; 3940 BYTE* const ostart = (BYTE*)dst; 3941 BYTE* op = ostart; 3942 U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; 3943 3944 assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); 3945 3946 DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); 3947 if (cctx->appliedParams.fParams.checksumFlag && srcSize) 3948 XXH64_update(&cctx->xxhState, src, srcSize); 3949 3950 while (remaining) { 3951 ZSTD_matchState_t* const ms = &cctx->blockState.matchState; 3952 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); 3953 3954 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, 3955 dstSize_tooSmall, 3956 "not enough space to store compressed block"); 3957 if (remaining < blockSize) blockSize = remaining; 3958 3959 ZSTD_overflowCorrectIfNeeded( 3960 ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); 3961 ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); 3962 ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); 3963 3964 /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ 3965 if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; 3966 3967 { size_t cSize; 3968 if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { 3969 cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); 3970 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); 3971 assert(cSize > 0); 3972 assert(cSize <= blockSize + ZSTD_blockHeaderSize); 3973 } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) { 3974 cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock); 3975 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed"); 3976 assert(cSize > 0 || cctx->seqCollector.collectSequences == 1); 3977 } else { 3978 cSize = ZSTD_compressBlock_internal(cctx, 3979 op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, 3980 ip, blockSize, 1 /* frame */); 3981 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); 3982 3983 if (cSize == 0) { /* block is not compressible */ 3984 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); 3985 FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); 3986 } else { 3987 U32 const cBlockHeader = cSize == 1 ? 3988 lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : 3989 lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); 3990 MEM_writeLE24(op, cBlockHeader); 3991 cSize += ZSTD_blockHeaderSize; 3992 } 3993 } 3994 3995 3996 ip += blockSize; 3997 assert(remaining >= blockSize); 3998 remaining -= blockSize; 3999 op += cSize; 4000 assert(dstCapacity >= cSize); 4001 dstCapacity -= cSize; 4002 cctx->isFirstBlock = 0; 4003 DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", 4004 (unsigned)cSize); 4005 } } 4006 4007 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; 4008 return (size_t)(op-ostart); 4009 } 4010 4011 4012 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, 4013 const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) 4014 { BYTE* const op = (BYTE*)dst; 4015 U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ 4016 U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ 4017 U32 const checksumFlag = params->fParams.checksumFlag>0; 4018 U32 const windowSize = (U32)1 << params->cParams.windowLog; 4019 U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); 4020 BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); 4021 U32 const fcsCode = params->fParams.contentSizeFlag ? 4022 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ 4023 BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); 4024 size_t pos=0; 4025 4026 assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); 4027 RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, 4028 "dst buf is too small to fit worst-case frame header size."); 4029 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", 4030 !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); 4031 if (params->format == ZSTD_f_zstd1) { 4032 MEM_writeLE32(dst, ZSTD_MAGICNUMBER); 4033 pos = 4; 4034 } 4035 op[pos++] = frameHeaderDescriptionByte; 4036 if (!singleSegment) op[pos++] = windowLogByte; 4037 switch(dictIDSizeCode) 4038 { 4039 default: 4040 assert(0); /* impossible */ 4041 ZSTD_FALLTHROUGH; 4042 case 0 : break; 4043 case 1 : op[pos] = (BYTE)(dictID); pos++; break; 4044 case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; 4045 case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; 4046 } 4047 switch(fcsCode) 4048 { 4049 default: 4050 assert(0); /* impossible */ 4051 ZSTD_FALLTHROUGH; 4052 case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; 4053 case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; 4054 case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; 4055 case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; 4056 } 4057 return pos; 4058 } 4059 4060 /* ZSTD_writeSkippableFrame_advanced() : 4061 * Writes out a skippable frame with the specified magic number variant (16 are supported), 4062 * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data. 4063 * 4064 * Returns the total number of bytes written, or a ZSTD error code. 4065 */ 4066 size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, 4067 const void* src, size_t srcSize, unsigned magicVariant) { 4068 BYTE* op = (BYTE*)dst; 4069 RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */, 4070 dstSize_tooSmall, "Not enough room for skippable frame"); 4071 RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame"); 4072 RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported"); 4073 4074 MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant)); 4075 MEM_writeLE32(op+4, (U32)srcSize); 4076 ZSTD_memcpy(op+8, src, srcSize); 4077 return srcSize + ZSTD_SKIPPABLEHEADERSIZE; 4078 } 4079 4080 /* ZSTD_writeLastEmptyBlock() : 4081 * output an empty Block with end-of-frame mark to complete a frame 4082 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) 4083 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) 4084 */ 4085 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) 4086 { 4087 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, 4088 "dst buf is too small to write frame trailer empty block."); 4089 { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */ 4090 MEM_writeLE24(dst, cBlockHeader24); 4091 return ZSTD_blockHeaderSize; 4092 } 4093 } 4094 4095 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) 4096 { 4097 RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, 4098 "wrong cctx stage"); 4099 RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable, 4100 parameter_unsupported, 4101 "incompatible with ldm"); 4102 cctx->externSeqStore.seq = seq; 4103 cctx->externSeqStore.size = nbSeq; 4104 cctx->externSeqStore.capacity = nbSeq; 4105 cctx->externSeqStore.pos = 0; 4106 cctx->externSeqStore.posInSequence = 0; 4107 return 0; 4108 } 4109 4110 4111 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, 4112 void* dst, size_t dstCapacity, 4113 const void* src, size_t srcSize, 4114 U32 frame, U32 lastFrameChunk) 4115 { 4116 ZSTD_matchState_t* const ms = &cctx->blockState.matchState; 4117 size_t fhSize = 0; 4118 4119 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", 4120 cctx->stage, (unsigned)srcSize); 4121 RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, 4122 "missing init (ZSTD_compressBegin)"); 4123 4124 if (frame && (cctx->stage==ZSTDcs_init)) { 4125 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 4126 cctx->pledgedSrcSizePlusOne-1, cctx->dictID); 4127 FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); 4128 assert(fhSize <= dstCapacity); 4129 dstCapacity -= fhSize; 4130 dst = (char*)dst + fhSize; 4131 cctx->stage = ZSTDcs_ongoing; 4132 } 4133 4134 if (!srcSize) return fhSize; /* do not generate an empty block if no input */ 4135 4136 if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) { 4137 ms->forceNonContiguous = 0; 4138 ms->nextToUpdate = ms->window.dictLimit; 4139 } 4140 if (cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { 4141 ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0); 4142 } 4143 4144 if (!frame) { 4145 /* overflow check and correction for block mode */ 4146 ZSTD_overflowCorrectIfNeeded( 4147 ms, &cctx->workspace, &cctx->appliedParams, 4148 src, (BYTE const*)src + srcSize); 4149 } 4150 4151 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); 4152 { size_t const cSize = frame ? 4153 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : 4154 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); 4155 FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); 4156 cctx->consumedSrcSize += srcSize; 4157 cctx->producedCSize += (cSize + fhSize); 4158 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); 4159 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ 4160 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); 4161 RETURN_ERROR_IF( 4162 cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, 4163 srcSize_wrong, 4164 "error : pledgedSrcSize = %u, while realSrcSize >= %u", 4165 (unsigned)cctx->pledgedSrcSizePlusOne-1, 4166 (unsigned)cctx->consumedSrcSize); 4167 } 4168 return cSize + fhSize; 4169 } 4170 } 4171 4172 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, 4173 void* dst, size_t dstCapacity, 4174 const void* src, size_t srcSize) 4175 { 4176 DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); 4177 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); 4178 } 4179 4180 4181 size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) 4182 { 4183 ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; 4184 assert(!ZSTD_checkCParams(cParams)); 4185 return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); 4186 } 4187 4188 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) 4189 { 4190 DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); 4191 { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); 4192 RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } 4193 4194 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); 4195 } 4196 4197 /*! ZSTD_loadDictionaryContent() : 4198 * @return : 0, or an error code 4199 */ 4200 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, 4201 ldmState_t* ls, 4202 ZSTD_cwksp* ws, 4203 ZSTD_CCtx_params const* params, 4204 const void* src, size_t srcSize, 4205 ZSTD_dictTableLoadMethod_e dtlm) 4206 { 4207 const BYTE* ip = (const BYTE*) src; 4208 const BYTE* const iend = ip + srcSize; 4209 int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL; 4210 4211 /* Assert that we the ms params match the params we're being given */ 4212 ZSTD_assertEqualCParams(params->cParams, ms->cParams); 4213 4214 if (srcSize > ZSTD_CHUNKSIZE_MAX) { 4215 /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX. 4216 * Dictionaries right at the edge will immediately trigger overflow 4217 * correction, but I don't want to insert extra constraints here. 4218 */ 4219 U32 const maxDictSize = ZSTD_CURRENT_MAX - 1; 4220 /* We must have cleared our windows when our source is this large. */ 4221 assert(ZSTD_window_isEmpty(ms->window)); 4222 if (loadLdmDict) 4223 assert(ZSTD_window_isEmpty(ls->window)); 4224 /* If the dictionary is too large, only load the suffix of the dictionary. */ 4225 if (srcSize > maxDictSize) { 4226 ip = iend - maxDictSize; 4227 src = ip; 4228 srcSize = maxDictSize; 4229 } 4230 } 4231 4232 DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder); 4233 ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0); 4234 ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); 4235 ms->forceNonContiguous = params->deterministicRefPrefix; 4236 4237 if (loadLdmDict) { 4238 ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0); 4239 ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); 4240 } 4241 4242 if (srcSize <= HASH_READ_SIZE) return 0; 4243 4244 ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend); 4245 4246 if (loadLdmDict) 4247 ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams); 4248 4249 switch(params->cParams.strategy) 4250 { 4251 case ZSTD_fast: 4252 ZSTD_fillHashTable(ms, iend, dtlm); 4253 break; 4254 case ZSTD_dfast: 4255 ZSTD_fillDoubleHashTable(ms, iend, dtlm); 4256 break; 4257 4258 case ZSTD_greedy: 4259 case ZSTD_lazy: 4260 case ZSTD_lazy2: 4261 assert(srcSize >= HASH_READ_SIZE); 4262 if (ms->dedicatedDictSearch) { 4263 assert(ms->chainTable != NULL); 4264 ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE); 4265 } else { 4266 assert(params->useRowMatchFinder != ZSTD_ps_auto); 4267 if (params->useRowMatchFinder == ZSTD_ps_enable) { 4268 size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16); 4269 ZSTD_memset(ms->tagTable, 0, tagTableSize); 4270 ZSTD_row_update(ms, iend-HASH_READ_SIZE); 4271 DEBUGLOG(4, "Using row-based hash table for lazy dict"); 4272 } else { 4273 ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); 4274 DEBUGLOG(4, "Using chain-based hash table for lazy dict"); 4275 } 4276 } 4277 break; 4278 4279 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ 4280 case ZSTD_btopt: 4281 case ZSTD_btultra: 4282 case ZSTD_btultra2: 4283 assert(srcSize >= HASH_READ_SIZE); 4284 ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); 4285 break; 4286 4287 default: 4288 assert(0); /* not possible : not a valid strategy id */ 4289 } 4290 4291 ms->nextToUpdate = (U32)(iend - ms->window.base); 4292 return 0; 4293 } 4294 4295 4296 /* Dictionaries that assign zero probability to symbols that show up causes problems 4297 * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check 4298 * and only dictionaries with 100% valid symbols can be assumed valid. 4299 */ 4300 static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) 4301 { 4302 U32 s; 4303 if (dictMaxSymbolValue < maxSymbolValue) { 4304 return FSE_repeat_check; 4305 } 4306 for (s = 0; s <= maxSymbolValue; ++s) { 4307 if (normalizedCounter[s] == 0) { 4308 return FSE_repeat_check; 4309 } 4310 } 4311 return FSE_repeat_valid; 4312 } 4313 4314 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, 4315 const void* const dict, size_t dictSize) 4316 { 4317 short offcodeNCount[MaxOff+1]; 4318 unsigned offcodeMaxValue = MaxOff; 4319 const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ 4320 const BYTE* const dictEnd = dictPtr + dictSize; 4321 dictPtr += 8; 4322 bs->entropy.huf.repeatMode = HUF_repeat_check; 4323 4324 { unsigned maxSymbolValue = 255; 4325 unsigned hasZeroWeights = 1; 4326 size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, 4327 dictEnd-dictPtr, &hasZeroWeights); 4328 4329 /* We only set the loaded table as valid if it contains all non-zero 4330 * weights. Otherwise, we set it to check */ 4331 if (!hasZeroWeights) 4332 bs->entropy.huf.repeatMode = HUF_repeat_valid; 4333 4334 RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); 4335 RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); 4336 dictPtr += hufHeaderSize; 4337 } 4338 4339 { unsigned offcodeLog; 4340 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); 4341 RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); 4342 RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); 4343 /* fill all offset symbols to avoid garbage at end of table */ 4344 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 4345 bs->entropy.fse.offcodeCTable, 4346 offcodeNCount, MaxOff, offcodeLog, 4347 workspace, HUF_WORKSPACE_SIZE)), 4348 dictionary_corrupted, ""); 4349 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ 4350 dictPtr += offcodeHeaderSize; 4351 } 4352 4353 { short matchlengthNCount[MaxML+1]; 4354 unsigned matchlengthMaxValue = MaxML, matchlengthLog; 4355 size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); 4356 RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); 4357 RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); 4358 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 4359 bs->entropy.fse.matchlengthCTable, 4360 matchlengthNCount, matchlengthMaxValue, matchlengthLog, 4361 workspace, HUF_WORKSPACE_SIZE)), 4362 dictionary_corrupted, ""); 4363 bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML); 4364 dictPtr += matchlengthHeaderSize; 4365 } 4366 4367 { short litlengthNCount[MaxLL+1]; 4368 unsigned litlengthMaxValue = MaxLL, litlengthLog; 4369 size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); 4370 RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); 4371 RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); 4372 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 4373 bs->entropy.fse.litlengthCTable, 4374 litlengthNCount, litlengthMaxValue, litlengthLog, 4375 workspace, HUF_WORKSPACE_SIZE)), 4376 dictionary_corrupted, ""); 4377 bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL); 4378 dictPtr += litlengthHeaderSize; 4379 } 4380 4381 RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); 4382 bs->rep[0] = MEM_readLE32(dictPtr+0); 4383 bs->rep[1] = MEM_readLE32(dictPtr+4); 4384 bs->rep[2] = MEM_readLE32(dictPtr+8); 4385 dictPtr += 12; 4386 4387 { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); 4388 U32 offcodeMax = MaxOff; 4389 if (dictContentSize <= ((U32)-1) - 128 KB) { 4390 U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ 4391 offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ 4392 } 4393 /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */ 4394 bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)); 4395 4396 /* All repCodes must be <= dictContentSize and != 0 */ 4397 { U32 u; 4398 for (u=0; u<3; u++) { 4399 RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); 4400 RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); 4401 } } } 4402 4403 return dictPtr - (const BYTE*)dict; 4404 } 4405 4406 /* Dictionary format : 4407 * See : 4408 * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format 4409 */ 4410 /*! ZSTD_loadZstdDictionary() : 4411 * @return : dictID, or an error code 4412 * assumptions : magic number supposed already checked 4413 * dictSize supposed >= 8 4414 */ 4415 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, 4416 ZSTD_matchState_t* ms, 4417 ZSTD_cwksp* ws, 4418 ZSTD_CCtx_params const* params, 4419 const void* dict, size_t dictSize, 4420 ZSTD_dictTableLoadMethod_e dtlm, 4421 void* workspace) 4422 { 4423 const BYTE* dictPtr = (const BYTE*)dict; 4424 const BYTE* const dictEnd = dictPtr + dictSize; 4425 size_t dictID; 4426 size_t eSize; 4427 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); 4428 assert(dictSize >= 8); 4429 assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); 4430 4431 dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); 4432 eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize); 4433 FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); 4434 dictPtr += eSize; 4435 4436 { 4437 size_t const dictContentSize = (size_t)(dictEnd - dictPtr); 4438 FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( 4439 ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); 4440 } 4441 return dictID; 4442 } 4443 4444 /** ZSTD_compress_insertDictionary() : 4445 * @return : dictID, or an error code */ 4446 static size_t 4447 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, 4448 ZSTD_matchState_t* ms, 4449 ldmState_t* ls, 4450 ZSTD_cwksp* ws, 4451 const ZSTD_CCtx_params* params, 4452 const void* dict, size_t dictSize, 4453 ZSTD_dictContentType_e dictContentType, 4454 ZSTD_dictTableLoadMethod_e dtlm, 4455 void* workspace) 4456 { 4457 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); 4458 if ((dict==NULL) || (dictSize<8)) { 4459 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); 4460 return 0; 4461 } 4462 4463 ZSTD_reset_compressedBlockState(bs); 4464 4465 /* dict restricted modes */ 4466 if (dictContentType == ZSTD_dct_rawContent) 4467 return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); 4468 4469 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { 4470 if (dictContentType == ZSTD_dct_auto) { 4471 DEBUGLOG(4, "raw content dictionary detected"); 4472 return ZSTD_loadDictionaryContent( 4473 ms, ls, ws, params, dict, dictSize, dtlm); 4474 } 4475 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); 4476 assert(0); /* impossible */ 4477 } 4478 4479 /* dict as full zstd dictionary */ 4480 return ZSTD_loadZstdDictionary( 4481 bs, ms, ws, params, dict, dictSize, dtlm, workspace); 4482 } 4483 4484 #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) 4485 #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) 4486 4487 /*! ZSTD_compressBegin_internal() : 4488 * @return : 0, or an error code */ 4489 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, 4490 const void* dict, size_t dictSize, 4491 ZSTD_dictContentType_e dictContentType, 4492 ZSTD_dictTableLoadMethod_e dtlm, 4493 const ZSTD_CDict* cdict, 4494 const ZSTD_CCtx_params* params, U64 pledgedSrcSize, 4495 ZSTD_buffered_policy_e zbuff) 4496 { 4497 size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize; 4498 #if ZSTD_TRACE 4499 cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0; 4500 #endif 4501 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); 4502 /* params are supposed to be fully validated at this point */ 4503 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); 4504 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 4505 if ( (cdict) 4506 && (cdict->dictContentSize > 0) 4507 && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF 4508 || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER 4509 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN 4510 || cdict->compressionLevel == 0) 4511 && (params->attachDictPref != ZSTD_dictForceLoad) ) { 4512 return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); 4513 } 4514 4515 FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, 4516 dictContentSize, 4517 ZSTDcrp_makeClean, zbuff) , ""); 4518 { size_t const dictID = cdict ? 4519 ZSTD_compress_insertDictionary( 4520 cctx->blockState.prevCBlock, &cctx->blockState.matchState, 4521 &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, 4522 cdict->dictContentSize, cdict->dictContentType, dtlm, 4523 cctx->entropyWorkspace) 4524 : ZSTD_compress_insertDictionary( 4525 cctx->blockState.prevCBlock, &cctx->blockState.matchState, 4526 &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, 4527 dictContentType, dtlm, cctx->entropyWorkspace); 4528 FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); 4529 assert(dictID <= UINT_MAX); 4530 cctx->dictID = (U32)dictID; 4531 cctx->dictContentSize = dictContentSize; 4532 } 4533 return 0; 4534 } 4535 4536 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, 4537 const void* dict, size_t dictSize, 4538 ZSTD_dictContentType_e dictContentType, 4539 ZSTD_dictTableLoadMethod_e dtlm, 4540 const ZSTD_CDict* cdict, 4541 const ZSTD_CCtx_params* params, 4542 unsigned long long pledgedSrcSize) 4543 { 4544 DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); 4545 /* compression parameters verification and optimization */ 4546 FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); 4547 return ZSTD_compressBegin_internal(cctx, 4548 dict, dictSize, dictContentType, dtlm, 4549 cdict, 4550 params, pledgedSrcSize, 4551 ZSTDb_not_buffered); 4552 } 4553 4554 /*! ZSTD_compressBegin_advanced() : 4555 * @return : 0, or an error code */ 4556 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, 4557 const void* dict, size_t dictSize, 4558 ZSTD_parameters params, unsigned long long pledgedSrcSize) 4559 { 4560 ZSTD_CCtx_params cctxParams; 4561 ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); 4562 return ZSTD_compressBegin_advanced_internal(cctx, 4563 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, 4564 NULL /*cdict*/, 4565 &cctxParams, pledgedSrcSize); 4566 } 4567 4568 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) 4569 { 4570 ZSTD_CCtx_params cctxParams; 4571 { 4572 ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); 4573 ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); 4574 } 4575 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); 4576 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, 4577 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); 4578 } 4579 4580 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) 4581 { 4582 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); 4583 } 4584 4585 4586 /*! ZSTD_writeEpilogue() : 4587 * Ends a frame. 4588 * @return : nb of bytes written into dst (or an error code) */ 4589 static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) 4590 { 4591 BYTE* const ostart = (BYTE*)dst; 4592 BYTE* op = ostart; 4593 size_t fhSize = 0; 4594 4595 DEBUGLOG(4, "ZSTD_writeEpilogue"); 4596 RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); 4597 4598 /* special case : empty frame */ 4599 if (cctx->stage == ZSTDcs_init) { 4600 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); 4601 FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); 4602 dstCapacity -= fhSize; 4603 op += fhSize; 4604 cctx->stage = ZSTDcs_ongoing; 4605 } 4606 4607 if (cctx->stage != ZSTDcs_ending) { 4608 /* write one last empty block, make it the "last" block */ 4609 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; 4610 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); 4611 MEM_writeLE32(op, cBlockHeader24); 4612 op += ZSTD_blockHeaderSize; 4613 dstCapacity -= ZSTD_blockHeaderSize; 4614 } 4615 4616 if (cctx->appliedParams.fParams.checksumFlag) { 4617 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); 4618 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); 4619 DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); 4620 MEM_writeLE32(op, checksum); 4621 op += 4; 4622 } 4623 4624 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ 4625 return op-ostart; 4626 } 4627 4628 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize) 4629 { 4630 #if ZSTD_TRACE 4631 if (cctx->traceCtx && ZSTD_trace_compress_end != NULL) { 4632 int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0; 4633 ZSTD_Trace trace; 4634 ZSTD_memset(&trace, 0, sizeof(trace)); 4635 trace.version = ZSTD_VERSION_NUMBER; 4636 trace.streaming = streaming; 4637 trace.dictionaryID = cctx->dictID; 4638 trace.dictionarySize = cctx->dictContentSize; 4639 trace.uncompressedSize = cctx->consumedSrcSize; 4640 trace.compressedSize = cctx->producedCSize + extraCSize; 4641 trace.params = &cctx->appliedParams; 4642 trace.cctx = cctx; 4643 ZSTD_trace_compress_end(cctx->traceCtx, &trace); 4644 } 4645 cctx->traceCtx = 0; 4646 #else 4647 (void)cctx; 4648 (void)extraCSize; 4649 #endif 4650 } 4651 4652 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, 4653 void* dst, size_t dstCapacity, 4654 const void* src, size_t srcSize) 4655 { 4656 size_t endResult; 4657 size_t const cSize = ZSTD_compressContinue_internal(cctx, 4658 dst, dstCapacity, src, srcSize, 4659 1 /* frame mode */, 1 /* last chunk */); 4660 FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); 4661 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); 4662 FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); 4663 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); 4664 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ 4665 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); 4666 DEBUGLOG(4, "end of frame : controlling src size"); 4667 RETURN_ERROR_IF( 4668 cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, 4669 srcSize_wrong, 4670 "error : pledgedSrcSize = %u, while realSrcSize = %u", 4671 (unsigned)cctx->pledgedSrcSizePlusOne-1, 4672 (unsigned)cctx->consumedSrcSize); 4673 } 4674 ZSTD_CCtx_trace(cctx, endResult); 4675 return cSize + endResult; 4676 } 4677 4678 size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, 4679 void* dst, size_t dstCapacity, 4680 const void* src, size_t srcSize, 4681 const void* dict,size_t dictSize, 4682 ZSTD_parameters params) 4683 { 4684 DEBUGLOG(4, "ZSTD_compress_advanced"); 4685 FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); 4686 ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, ZSTD_NO_CLEVEL); 4687 return ZSTD_compress_advanced_internal(cctx, 4688 dst, dstCapacity, 4689 src, srcSize, 4690 dict, dictSize, 4691 &cctx->simpleApiParams); 4692 } 4693 4694 /* Internal */ 4695 size_t ZSTD_compress_advanced_internal( 4696 ZSTD_CCtx* cctx, 4697 void* dst, size_t dstCapacity, 4698 const void* src, size_t srcSize, 4699 const void* dict,size_t dictSize, 4700 const ZSTD_CCtx_params* params) 4701 { 4702 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); 4703 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, 4704 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, 4705 params, srcSize, ZSTDb_not_buffered) , ""); 4706 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 4707 } 4708 4709 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, 4710 void* dst, size_t dstCapacity, 4711 const void* src, size_t srcSize, 4712 const void* dict, size_t dictSize, 4713 int compressionLevel) 4714 { 4715 { 4716 ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); 4717 assert(params.fParams.contentSizeFlag == 1); 4718 ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); 4719 } 4720 DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); 4721 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams); 4722 } 4723 4724 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, 4725 void* dst, size_t dstCapacity, 4726 const void* src, size_t srcSize, 4727 int compressionLevel) 4728 { 4729 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); 4730 assert(cctx != NULL); 4731 return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); 4732 } 4733 4734 size_t ZSTD_compress(void* dst, size_t dstCapacity, 4735 const void* src, size_t srcSize, 4736 int compressionLevel) 4737 { 4738 size_t result; 4739 #if ZSTD_COMPRESS_HEAPMODE 4740 ZSTD_CCtx* cctx = ZSTD_createCCtx(); 4741 RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed"); 4742 result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); 4743 ZSTD_freeCCtx(cctx); 4744 #else 4745 ZSTD_CCtx ctxBody; 4746 ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem); 4747 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); 4748 ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ 4749 #endif 4750 return result; 4751 } 4752 4753 4754 /* ===== Dictionary API ===== */ 4755 4756 /*! ZSTD_estimateCDictSize_advanced() : 4757 * Estimate amount of memory that will be needed to create a dictionary with following arguments */ 4758 size_t ZSTD_estimateCDictSize_advanced( 4759 size_t dictSize, ZSTD_compressionParameters cParams, 4760 ZSTD_dictLoadMethod_e dictLoadMethod) 4761 { 4762 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); 4763 return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) 4764 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) 4765 /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small 4766 * in case we are using DDS with row-hash. */ 4767 + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams), 4768 /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0) 4769 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 4770 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); 4771 } 4772 4773 size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) 4774 { 4775 ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 4776 return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); 4777 } 4778 4779 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) 4780 { 4781 if (cdict==NULL) return 0; /* support sizeof on NULL */ 4782 DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); 4783 /* cdict may be in the workspace */ 4784 return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) 4785 + ZSTD_cwksp_sizeof(&cdict->workspace); 4786 } 4787 4788 static size_t ZSTD_initCDict_internal( 4789 ZSTD_CDict* cdict, 4790 const void* dictBuffer, size_t dictSize, 4791 ZSTD_dictLoadMethod_e dictLoadMethod, 4792 ZSTD_dictContentType_e dictContentType, 4793 ZSTD_CCtx_params params) 4794 { 4795 DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); 4796 assert(!ZSTD_checkCParams(params.cParams)); 4797 cdict->matchState.cParams = params.cParams; 4798 cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; 4799 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { 4800 cdict->dictContent = dictBuffer; 4801 } else { 4802 void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); 4803 RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); 4804 cdict->dictContent = internalBuffer; 4805 ZSTD_memcpy(internalBuffer, dictBuffer, dictSize); 4806 } 4807 cdict->dictContentSize = dictSize; 4808 cdict->dictContentType = dictContentType; 4809 4810 cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); 4811 4812 4813 /* Reset the state to no dictionary */ 4814 ZSTD_reset_compressedBlockState(&cdict->cBlockState); 4815 FORWARD_IF_ERROR(ZSTD_reset_matchState( 4816 &cdict->matchState, 4817 &cdict->workspace, 4818 ¶ms.cParams, 4819 params.useRowMatchFinder, 4820 ZSTDcrp_makeClean, 4821 ZSTDirp_reset, 4822 ZSTD_resetTarget_CDict), ""); 4823 /* (Maybe) load the dictionary 4824 * Skips loading the dictionary if it is < 8 bytes. 4825 */ 4826 { params.compressionLevel = ZSTD_CLEVEL_DEFAULT; 4827 params.fParams.contentSizeFlag = 1; 4828 { size_t const dictID = ZSTD_compress_insertDictionary( 4829 &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, 4830 ¶ms, cdict->dictContent, cdict->dictContentSize, 4831 dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); 4832 FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); 4833 assert(dictID <= (size_t)(U32)-1); 4834 cdict->dictID = (U32)dictID; 4835 } 4836 } 4837 4838 return 0; 4839 } 4840 4841 static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, 4842 ZSTD_dictLoadMethod_e dictLoadMethod, 4843 ZSTD_compressionParameters cParams, 4844 ZSTD_paramSwitch_e useRowMatchFinder, 4845 U32 enableDedicatedDictSearch, 4846 ZSTD_customMem customMem) 4847 { 4848 if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; 4849 4850 { size_t const workspaceSize = 4851 ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + 4852 ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + 4853 ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) + 4854 (dictLoadMethod == ZSTD_dlm_byRef ? 0 4855 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); 4856 void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); 4857 ZSTD_cwksp ws; 4858 ZSTD_CDict* cdict; 4859 4860 if (!workspace) { 4861 ZSTD_customFree(workspace, customMem); 4862 return NULL; 4863 } 4864 4865 ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc); 4866 4867 cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); 4868 assert(cdict != NULL); 4869 ZSTD_cwksp_move(&cdict->workspace, &ws); 4870 cdict->customMem = customMem; 4871 cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ 4872 cdict->useRowMatchFinder = useRowMatchFinder; 4873 return cdict; 4874 } 4875 } 4876 4877 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, 4878 ZSTD_dictLoadMethod_e dictLoadMethod, 4879 ZSTD_dictContentType_e dictContentType, 4880 ZSTD_compressionParameters cParams, 4881 ZSTD_customMem customMem) 4882 { 4883 ZSTD_CCtx_params cctxParams; 4884 ZSTD_memset(&cctxParams, 0, sizeof(cctxParams)); 4885 ZSTD_CCtxParams_init(&cctxParams, 0); 4886 cctxParams.cParams = cParams; 4887 cctxParams.customMem = customMem; 4888 return ZSTD_createCDict_advanced2( 4889 dictBuffer, dictSize, 4890 dictLoadMethod, dictContentType, 4891 &cctxParams, customMem); 4892 } 4893 4894 ZSTD_CDict* ZSTD_createCDict_advanced2( 4895 const void* dict, size_t dictSize, 4896 ZSTD_dictLoadMethod_e dictLoadMethod, 4897 ZSTD_dictContentType_e dictContentType, 4898 const ZSTD_CCtx_params* originalCctxParams, 4899 ZSTD_customMem customMem) 4900 { 4901 ZSTD_CCtx_params cctxParams = *originalCctxParams; 4902 ZSTD_compressionParameters cParams; 4903 ZSTD_CDict* cdict; 4904 4905 DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType); 4906 if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 4907 4908 if (cctxParams.enableDedicatedDictSearch) { 4909 cParams = ZSTD_dedicatedDictSearch_getCParams( 4910 cctxParams.compressionLevel, dictSize); 4911 ZSTD_overrideCParams(&cParams, &cctxParams.cParams); 4912 } else { 4913 cParams = ZSTD_getCParamsFromCCtxParams( 4914 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 4915 } 4916 4917 if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) { 4918 /* Fall back to non-DDSS params */ 4919 cctxParams.enableDedicatedDictSearch = 0; 4920 cParams = ZSTD_getCParamsFromCCtxParams( 4921 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 4922 } 4923 4924 DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch); 4925 cctxParams.cParams = cParams; 4926 cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); 4927 4928 cdict = ZSTD_createCDict_advanced_internal(dictSize, 4929 dictLoadMethod, cctxParams.cParams, 4930 cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch, 4931 customMem); 4932 4933 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 4934 dict, dictSize, 4935 dictLoadMethod, dictContentType, 4936 cctxParams) )) { 4937 ZSTD_freeCDict(cdict); 4938 return NULL; 4939 } 4940 4941 return cdict; 4942 } 4943 4944 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) 4945 { 4946 ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 4947 ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, 4948 ZSTD_dlm_byCopy, ZSTD_dct_auto, 4949 cParams, ZSTD_defaultCMem); 4950 if (cdict) 4951 cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; 4952 return cdict; 4953 } 4954 4955 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) 4956 { 4957 ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 4958 ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, 4959 ZSTD_dlm_byRef, ZSTD_dct_auto, 4960 cParams, ZSTD_defaultCMem); 4961 if (cdict) 4962 cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; 4963 return cdict; 4964 } 4965 4966 size_t ZSTD_freeCDict(ZSTD_CDict* cdict) 4967 { 4968 if (cdict==NULL) return 0; /* support free on NULL */ 4969 { ZSTD_customMem const cMem = cdict->customMem; 4970 int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); 4971 ZSTD_cwksp_free(&cdict->workspace, cMem); 4972 if (!cdictInWorkspace) { 4973 ZSTD_customFree(cdict, cMem); 4974 } 4975 return 0; 4976 } 4977 } 4978 4979 /*! ZSTD_initStaticCDict_advanced() : 4980 * Generate a digested dictionary in provided memory area. 4981 * workspace: The memory area to emplace the dictionary into. 4982 * Provided pointer must 8-bytes aligned. 4983 * It must outlive dictionary usage. 4984 * workspaceSize: Use ZSTD_estimateCDictSize() 4985 * to determine how large workspace must be. 4986 * cParams : use ZSTD_getCParams() to transform a compression level 4987 * into its relevants cParams. 4988 * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) 4989 * Note : there is no corresponding "free" function. 4990 * Since workspace was allocated externally, it must be freed externally. 4991 */ 4992 const ZSTD_CDict* ZSTD_initStaticCDict( 4993 void* workspace, size_t workspaceSize, 4994 const void* dict, size_t dictSize, 4995 ZSTD_dictLoadMethod_e dictLoadMethod, 4996 ZSTD_dictContentType_e dictContentType, 4997 ZSTD_compressionParameters cParams) 4998 { 4999 ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams); 5000 /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */ 5001 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0); 5002 size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) 5003 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 5004 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) 5005 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) 5006 + matchStateSize; 5007 ZSTD_CDict* cdict; 5008 ZSTD_CCtx_params params; 5009 5010 if ((size_t)workspace & 7) return NULL; /* 8-aligned */ 5011 5012 { 5013 ZSTD_cwksp ws; 5014 ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); 5015 cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); 5016 if (cdict == NULL) return NULL; 5017 ZSTD_cwksp_move(&cdict->workspace, &ws); 5018 } 5019 5020 DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", 5021 (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); 5022 if (workspaceSize < neededSize) return NULL; 5023 5024 ZSTD_CCtxParams_init(¶ms, 0); 5025 params.cParams = cParams; 5026 params.useRowMatchFinder = useRowMatchFinder; 5027 cdict->useRowMatchFinder = useRowMatchFinder; 5028 5029 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 5030 dict, dictSize, 5031 dictLoadMethod, dictContentType, 5032 params) )) 5033 return NULL; 5034 5035 return cdict; 5036 } 5037 5038 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) 5039 { 5040 assert(cdict != NULL); 5041 return cdict->matchState.cParams; 5042 } 5043 5044 /*! ZSTD_getDictID_fromCDict() : 5045 * Provides the dictID of the dictionary loaded into `cdict`. 5046 * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. 5047 * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ 5048 unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) 5049 { 5050 if (cdict==NULL) return 0; 5051 return cdict->dictID; 5052 } 5053 5054 /* ZSTD_compressBegin_usingCDict_internal() : 5055 * Implementation of various ZSTD_compressBegin_usingCDict* functions. 5056 */ 5057 static size_t ZSTD_compressBegin_usingCDict_internal( 5058 ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, 5059 ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) 5060 { 5061 ZSTD_CCtx_params cctxParams; 5062 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal"); 5063 RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); 5064 /* Initialize the cctxParams from the cdict */ 5065 { 5066 ZSTD_parameters params; 5067 params.fParams = fParams; 5068 params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF 5069 || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER 5070 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN 5071 || cdict->compressionLevel == 0 ) ? 5072 ZSTD_getCParamsFromCDict(cdict) 5073 : ZSTD_getCParams(cdict->compressionLevel, 5074 pledgedSrcSize, 5075 cdict->dictContentSize); 5076 ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel); 5077 } 5078 /* Increase window log to fit the entire dictionary and source if the 5079 * source size is known. Limit the increase to 19, which is the 5080 * window log for compression level 1 with the largest source size. 5081 */ 5082 if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { 5083 U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); 5084 U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; 5085 cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog); 5086 } 5087 return ZSTD_compressBegin_internal(cctx, 5088 NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, 5089 cdict, 5090 &cctxParams, pledgedSrcSize, 5091 ZSTDb_not_buffered); 5092 } 5093 5094 5095 /* ZSTD_compressBegin_usingCDict_advanced() : 5096 * This function is DEPRECATED. 5097 * cdict must be != NULL */ 5098 size_t ZSTD_compressBegin_usingCDict_advanced( 5099 ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, 5100 ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) 5101 { 5102 return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize); 5103 } 5104 5105 /* ZSTD_compressBegin_usingCDict() : 5106 * cdict must be != NULL */ 5107 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 5108 { 5109 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 5110 return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); 5111 } 5112 5113 /*! ZSTD_compress_usingCDict_internal(): 5114 * Implementation of various ZSTD_compress_usingCDict* functions. 5115 */ 5116 static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx, 5117 void* dst, size_t dstCapacity, 5118 const void* src, size_t srcSize, 5119 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) 5120 { 5121 FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ 5122 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 5123 } 5124 5125 /*! ZSTD_compress_usingCDict_advanced(): 5126 * This function is DEPRECATED. 5127 */ 5128 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, 5129 void* dst, size_t dstCapacity, 5130 const void* src, size_t srcSize, 5131 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) 5132 { 5133 return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); 5134 } 5135 5136 /*! ZSTD_compress_usingCDict() : 5137 * Compression using a digested Dictionary. 5138 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. 5139 * Note that compression parameters are decided at CDict creation time 5140 * while frame parameters are hardcoded */ 5141 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, 5142 void* dst, size_t dstCapacity, 5143 const void* src, size_t srcSize, 5144 const ZSTD_CDict* cdict) 5145 { 5146 ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 5147 return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); 5148 } 5149 5150 5151 5152 /* ****************************************************************** 5153 * Streaming 5154 ********************************************************************/ 5155 5156 ZSTD_CStream* ZSTD_createCStream(void) 5157 { 5158 DEBUGLOG(3, "ZSTD_createCStream"); 5159 return ZSTD_createCStream_advanced(ZSTD_defaultCMem); 5160 } 5161 5162 ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) 5163 { 5164 return ZSTD_initStaticCCtx(workspace, workspaceSize); 5165 } 5166 5167 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) 5168 { /* CStream and CCtx are now same object */ 5169 return ZSTD_createCCtx_advanced(customMem); 5170 } 5171 5172 size_t ZSTD_freeCStream(ZSTD_CStream* zcs) 5173 { 5174 return ZSTD_freeCCtx(zcs); /* same object */ 5175 } 5176 5177 5178 5179 /*====== Initialization ======*/ 5180 5181 size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } 5182 5183 size_t ZSTD_CStreamOutSize(void) 5184 { 5185 return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; 5186 } 5187 5188 static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) 5189 { 5190 if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) 5191 return ZSTD_cpm_attachDict; 5192 else 5193 return ZSTD_cpm_noAttachDict; 5194 } 5195 5196 /* ZSTD_resetCStream(): 5197 * pledgedSrcSize == 0 means "unknown" */ 5198 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) 5199 { 5200 /* temporary : 0 interpreted as "unknown" during transition period. 5201 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. 5202 * 0 will be interpreted as "empty" in the future. 5203 */ 5204 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 5205 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); 5206 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5207 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 5208 return 0; 5209 } 5210 5211 /*! ZSTD_initCStream_internal() : 5212 * Note : for lib/compress only. Used by zstdmt_compress.c. 5213 * Assumption 1 : params are valid 5214 * Assumption 2 : either dict, or cdict, is defined, not both */ 5215 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, 5216 const void* dict, size_t dictSize, const ZSTD_CDict* cdict, 5217 const ZSTD_CCtx_params* params, 5218 unsigned long long pledgedSrcSize) 5219 { 5220 DEBUGLOG(4, "ZSTD_initCStream_internal"); 5221 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5222 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 5223 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); 5224 zcs->requestedParams = *params; 5225 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 5226 if (dict) { 5227 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); 5228 } else { 5229 /* Dictionary is cleared if !cdict */ 5230 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); 5231 } 5232 return 0; 5233 } 5234 5235 /* ZSTD_initCStream_usingCDict_advanced() : 5236 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ 5237 size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, 5238 const ZSTD_CDict* cdict, 5239 ZSTD_frameParameters fParams, 5240 unsigned long long pledgedSrcSize) 5241 { 5242 DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); 5243 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5244 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 5245 zcs->requestedParams.fParams = fParams; 5246 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); 5247 return 0; 5248 } 5249 5250 /* note : cdict must outlive compression session */ 5251 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) 5252 { 5253 DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); 5254 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5255 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); 5256 return 0; 5257 } 5258 5259 5260 /* ZSTD_initCStream_advanced() : 5261 * pledgedSrcSize must be exact. 5262 * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. 5263 * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ 5264 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, 5265 const void* dict, size_t dictSize, 5266 ZSTD_parameters params, unsigned long long pss) 5267 { 5268 /* for compatibility with older programs relying on this behavior. 5269 * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. 5270 * This line will be removed in the future. 5271 */ 5272 U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 5273 DEBUGLOG(4, "ZSTD_initCStream_advanced"); 5274 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5275 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 5276 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); 5277 ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms); 5278 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); 5279 return 0; 5280 } 5281 5282 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) 5283 { 5284 DEBUGLOG(4, "ZSTD_initCStream_usingDict"); 5285 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5286 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); 5287 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); 5288 return 0; 5289 } 5290 5291 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) 5292 { 5293 /* temporary : 0 interpreted as "unknown" during transition period. 5294 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. 5295 * 0 will be interpreted as "empty" in the future. 5296 */ 5297 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 5298 DEBUGLOG(4, "ZSTD_initCStream_srcSize"); 5299 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5300 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); 5301 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); 5302 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 5303 return 0; 5304 } 5305 5306 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) 5307 { 5308 DEBUGLOG(4, "ZSTD_initCStream"); 5309 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5310 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); 5311 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); 5312 return 0; 5313 } 5314 5315 /*====== Compression ======*/ 5316 5317 static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) 5318 { 5319 size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; 5320 if (hintInSize==0) hintInSize = cctx->blockSize; 5321 return hintInSize; 5322 } 5323 5324 /** ZSTD_compressStream_generic(): 5325 * internal function for all *compressStream*() variants 5326 * non-static, because can be called from zstdmt_compress.c 5327 * @return : hint size for next input */ 5328 static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, 5329 ZSTD_outBuffer* output, 5330 ZSTD_inBuffer* input, 5331 ZSTD_EndDirective const flushMode) 5332 { 5333 const char* const istart = (const char*)input->src; 5334 const char* const iend = input->size != 0 ? istart + input->size : istart; 5335 const char* ip = input->pos != 0 ? istart + input->pos : istart; 5336 char* const ostart = (char*)output->dst; 5337 char* const oend = output->size != 0 ? ostart + output->size : ostart; 5338 char* op = output->pos != 0 ? ostart + output->pos : ostart; 5339 U32 someMoreWork = 1; 5340 5341 /* check expectations */ 5342 DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); 5343 if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { 5344 assert(zcs->inBuff != NULL); 5345 assert(zcs->inBuffSize > 0); 5346 } 5347 if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) { 5348 assert(zcs->outBuff != NULL); 5349 assert(zcs->outBuffSize > 0); 5350 } 5351 assert(output->pos <= output->size); 5352 assert(input->pos <= input->size); 5353 assert((U32)flushMode <= (U32)ZSTD_e_end); 5354 5355 while (someMoreWork) { 5356 switch(zcs->streamStage) 5357 { 5358 case zcss_init: 5359 RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); 5360 5361 case zcss_load: 5362 if ( (flushMode == ZSTD_e_end) 5363 && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */ 5364 || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ 5365 && (zcs->inBuffPos == 0) ) { 5366 /* shortcut to compression pass directly into output buffer */ 5367 size_t const cSize = ZSTD_compressEnd(zcs, 5368 op, oend-op, ip, iend-ip); 5369 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); 5370 FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); 5371 ip = iend; 5372 op += cSize; 5373 zcs->frameEnded = 1; 5374 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 5375 someMoreWork = 0; break; 5376 } 5377 /* complete loading into inBuffer in buffered mode */ 5378 if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { 5379 size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; 5380 size_t const loaded = ZSTD_limitCopy( 5381 zcs->inBuff + zcs->inBuffPos, toLoad, 5382 ip, iend-ip); 5383 zcs->inBuffPos += loaded; 5384 if (loaded != 0) 5385 ip += loaded; 5386 if ( (flushMode == ZSTD_e_continue) 5387 && (zcs->inBuffPos < zcs->inBuffTarget) ) { 5388 /* not enough input to fill full block : stop here */ 5389 someMoreWork = 0; break; 5390 } 5391 if ( (flushMode == ZSTD_e_flush) 5392 && (zcs->inBuffPos == zcs->inToCompress) ) { 5393 /* empty */ 5394 someMoreWork = 0; break; 5395 } 5396 } 5397 /* compress current block (note : this stage cannot be stopped in the middle) */ 5398 DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); 5399 { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered); 5400 void* cDst; 5401 size_t cSize; 5402 size_t oSize = oend-op; 5403 size_t const iSize = inputBuffered 5404 ? zcs->inBuffPos - zcs->inToCompress 5405 : MIN((size_t)(iend - ip), zcs->blockSize); 5406 if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) 5407 cDst = op; /* compress into output buffer, to skip flush stage */ 5408 else 5409 cDst = zcs->outBuff, oSize = zcs->outBuffSize; 5410 if (inputBuffered) { 5411 unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); 5412 cSize = lastBlock ? 5413 ZSTD_compressEnd(zcs, cDst, oSize, 5414 zcs->inBuff + zcs->inToCompress, iSize) : 5415 ZSTD_compressContinue(zcs, cDst, oSize, 5416 zcs->inBuff + zcs->inToCompress, iSize); 5417 FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); 5418 zcs->frameEnded = lastBlock; 5419 /* prepare next block */ 5420 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; 5421 if (zcs->inBuffTarget > zcs->inBuffSize) 5422 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; 5423 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", 5424 (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); 5425 if (!lastBlock) 5426 assert(zcs->inBuffTarget <= zcs->inBuffSize); 5427 zcs->inToCompress = zcs->inBuffPos; 5428 } else { 5429 unsigned const lastBlock = (ip + iSize == iend); 5430 assert(flushMode == ZSTD_e_end /* Already validated */); 5431 cSize = lastBlock ? 5432 ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) : 5433 ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize); 5434 /* Consume the input prior to error checking to mirror buffered mode. */ 5435 if (iSize > 0) 5436 ip += iSize; 5437 FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); 5438 zcs->frameEnded = lastBlock; 5439 if (lastBlock) 5440 assert(ip == iend); 5441 } 5442 if (cDst == op) { /* no need to flush */ 5443 op += cSize; 5444 if (zcs->frameEnded) { 5445 DEBUGLOG(5, "Frame completed directly in outBuffer"); 5446 someMoreWork = 0; 5447 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 5448 } 5449 break; 5450 } 5451 zcs->outBuffContentSize = cSize; 5452 zcs->outBuffFlushedSize = 0; 5453 zcs->streamStage = zcss_flush; /* pass-through to flush stage */ 5454 } 5455 ZSTD_FALLTHROUGH; 5456 case zcss_flush: 5457 DEBUGLOG(5, "flush stage"); 5458 assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered); 5459 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; 5460 size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), 5461 zcs->outBuff + zcs->outBuffFlushedSize, toFlush); 5462 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", 5463 (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); 5464 if (flushed) 5465 op += flushed; 5466 zcs->outBuffFlushedSize += flushed; 5467 if (toFlush!=flushed) { 5468 /* flush not fully completed, presumably because dst is too small */ 5469 assert(op==oend); 5470 someMoreWork = 0; 5471 break; 5472 } 5473 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; 5474 if (zcs->frameEnded) { 5475 DEBUGLOG(5, "Frame completed on flush"); 5476 someMoreWork = 0; 5477 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 5478 break; 5479 } 5480 zcs->streamStage = zcss_load; 5481 break; 5482 } 5483 5484 default: /* impossible */ 5485 assert(0); 5486 } 5487 } 5488 5489 input->pos = ip - istart; 5490 output->pos = op - ostart; 5491 if (zcs->frameEnded) return 0; 5492 return ZSTD_nextInputSizeHint(zcs); 5493 } 5494 5495 static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) 5496 { 5497 #ifdef ZSTD_MULTITHREAD 5498 if (cctx->appliedParams.nbWorkers >= 1) { 5499 assert(cctx->mtctx != NULL); 5500 return ZSTDMT_nextInputSizeHint(cctx->mtctx); 5501 } 5502 #endif 5503 return ZSTD_nextInputSizeHint(cctx); 5504 5505 } 5506 5507 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) 5508 { 5509 FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); 5510 return ZSTD_nextInputSizeHint_MTorST(zcs); 5511 } 5512 5513 /* After a compression call set the expected input/output buffer. 5514 * This is validated at the start of the next compression call. 5515 */ 5516 static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input) 5517 { 5518 if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { 5519 cctx->expectedInBuffer = *input; 5520 } 5521 if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { 5522 cctx->expectedOutBufferSize = output->size - output->pos; 5523 } 5524 } 5525 5526 /* Validate that the input/output buffers match the expectations set by 5527 * ZSTD_setBufferExpectations. 5528 */ 5529 static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx, 5530 ZSTD_outBuffer const* output, 5531 ZSTD_inBuffer const* input, 5532 ZSTD_EndDirective endOp) 5533 { 5534 if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { 5535 ZSTD_inBuffer const expect = cctx->expectedInBuffer; 5536 if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size) 5537 RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!"); 5538 if (endOp != ZSTD_e_end) 5539 RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!"); 5540 } 5541 if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { 5542 size_t const outBufferSize = output->size - output->pos; 5543 if (cctx->expectedOutBufferSize != outBufferSize) 5544 RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!"); 5545 } 5546 return 0; 5547 } 5548 5549 static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, 5550 ZSTD_EndDirective endOp, 5551 size_t inSize) { 5552 ZSTD_CCtx_params params = cctx->requestedParams; 5553 ZSTD_prefixDict const prefixDict = cctx->prefixDict; 5554 FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ 5555 ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ 5556 assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ 5557 if (cctx->cdict && !cctx->localDict.cdict) { 5558 /* Let the cdict's compression level take priority over the requested params. 5559 * But do not take the cdict's compression level if the "cdict" is actually a localDict 5560 * generated from ZSTD_initLocalDict(). 5561 */ 5562 params.compressionLevel = cctx->cdict->compressionLevel; 5563 } 5564 DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); 5565 if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ 5566 { 5567 size_t const dictSize = prefixDict.dict 5568 ? prefixDict.dictSize 5569 : (cctx->cdict ? cctx->cdict->dictContentSize : 0); 5570 ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); 5571 params.cParams = ZSTD_getCParamsFromCCtxParams( 5572 ¶ms, cctx->pledgedSrcSizePlusOne-1, 5573 dictSize, mode); 5574 } 5575 5576 params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams); 5577 params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams); 5578 params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams); 5579 5580 #ifdef ZSTD_MULTITHREAD 5581 if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { 5582 params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ 5583 } 5584 if (params.nbWorkers > 0) { 5585 #if ZSTD_TRACE 5586 cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0; 5587 #endif 5588 /* mt context creation */ 5589 if (cctx->mtctx == NULL) { 5590 DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", 5591 params.nbWorkers); 5592 cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool); 5593 RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!"); 5594 } 5595 /* mt compression */ 5596 DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); 5597 FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( 5598 cctx->mtctx, 5599 prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, 5600 cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); 5601 cctx->dictID = cctx->cdict ? cctx->cdict->dictID : 0; 5602 cctx->dictContentSize = cctx->cdict ? cctx->cdict->dictContentSize : prefixDict.dictSize; 5603 cctx->consumedSrcSize = 0; 5604 cctx->producedCSize = 0; 5605 cctx->streamStage = zcss_load; 5606 cctx->appliedParams = params; 5607 } else 5608 #endif 5609 { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; 5610 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 5611 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, 5612 prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast, 5613 cctx->cdict, 5614 ¶ms, pledgedSrcSize, 5615 ZSTDb_buffered) , ""); 5616 assert(cctx->appliedParams.nbWorkers == 0); 5617 cctx->inToCompress = 0; 5618 cctx->inBuffPos = 0; 5619 if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) { 5620 /* for small input: avoid automatic flush on reaching end of block, since 5621 * it would require to add a 3-bytes null block to end frame 5622 */ 5623 cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); 5624 } else { 5625 cctx->inBuffTarget = 0; 5626 } 5627 cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; 5628 cctx->streamStage = zcss_load; 5629 cctx->frameEnded = 0; 5630 } 5631 return 0; 5632 } 5633 5634 size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, 5635 ZSTD_outBuffer* output, 5636 ZSTD_inBuffer* input, 5637 ZSTD_EndDirective endOp) 5638 { 5639 DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); 5640 /* check conditions */ 5641 RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer"); 5642 RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer"); 5643 RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective"); 5644 assert(cctx != NULL); 5645 5646 /* transparent initialization stage */ 5647 if (cctx->streamStage == zcss_init) { 5648 FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed"); 5649 ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ 5650 } 5651 /* end of transparent initialization stage */ 5652 5653 FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers"); 5654 /* compression stage */ 5655 #ifdef ZSTD_MULTITHREAD 5656 if (cctx->appliedParams.nbWorkers > 0) { 5657 size_t flushMin; 5658 if (cctx->cParamsChanged) { 5659 ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); 5660 cctx->cParamsChanged = 0; 5661 } 5662 for (;;) { 5663 size_t const ipos = input->pos; 5664 size_t const opos = output->pos; 5665 flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); 5666 cctx->consumedSrcSize += (U64)(input->pos - ipos); 5667 cctx->producedCSize += (U64)(output->pos - opos); 5668 if ( ZSTD_isError(flushMin) 5669 || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ 5670 if (flushMin == 0) 5671 ZSTD_CCtx_trace(cctx, 0); 5672 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); 5673 } 5674 FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); 5675 5676 if (endOp == ZSTD_e_continue) { 5677 /* We only require some progress with ZSTD_e_continue, not maximal progress. 5678 * We're done if we've consumed or produced any bytes, or either buffer is 5679 * full. 5680 */ 5681 if (input->pos != ipos || output->pos != opos || input->pos == input->size || output->pos == output->size) 5682 break; 5683 } else { 5684 assert(endOp == ZSTD_e_flush || endOp == ZSTD_e_end); 5685 /* We require maximal progress. We're done when the flush is complete or the 5686 * output buffer is full. 5687 */ 5688 if (flushMin == 0 || output->pos == output->size) 5689 break; 5690 } 5691 } 5692 DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); 5693 /* Either we don't require maximum forward progress, we've finished the 5694 * flush, or we are out of output space. 5695 */ 5696 assert(endOp == ZSTD_e_continue || flushMin == 0 || output->pos == output->size); 5697 ZSTD_setBufferExpectations(cctx, output, input); 5698 return flushMin; 5699 } 5700 #endif 5701 FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); 5702 DEBUGLOG(5, "completed ZSTD_compressStream2"); 5703 ZSTD_setBufferExpectations(cctx, output, input); 5704 return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ 5705 } 5706 5707 size_t ZSTD_compressStream2_simpleArgs ( 5708 ZSTD_CCtx* cctx, 5709 void* dst, size_t dstCapacity, size_t* dstPos, 5710 const void* src, size_t srcSize, size_t* srcPos, 5711 ZSTD_EndDirective endOp) 5712 { 5713 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; 5714 ZSTD_inBuffer input = { src, srcSize, *srcPos }; 5715 /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ 5716 size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); 5717 *dstPos = output.pos; 5718 *srcPos = input.pos; 5719 return cErr; 5720 } 5721 5722 size_t ZSTD_compress2(ZSTD_CCtx* cctx, 5723 void* dst, size_t dstCapacity, 5724 const void* src, size_t srcSize) 5725 { 5726 ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode; 5727 ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode; 5728 DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); 5729 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); 5730 /* Enable stable input/output buffers. */ 5731 cctx->requestedParams.inBufferMode = ZSTD_bm_stable; 5732 cctx->requestedParams.outBufferMode = ZSTD_bm_stable; 5733 { size_t oPos = 0; 5734 size_t iPos = 0; 5735 size_t const result = ZSTD_compressStream2_simpleArgs(cctx, 5736 dst, dstCapacity, &oPos, 5737 src, srcSize, &iPos, 5738 ZSTD_e_end); 5739 /* Reset to the original values. */ 5740 cctx->requestedParams.inBufferMode = originalInBufferMode; 5741 cctx->requestedParams.outBufferMode = originalOutBufferMode; 5742 FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); 5743 if (result != 0) { /* compression not completed, due to lack of output space */ 5744 assert(oPos == dstCapacity); 5745 RETURN_ERROR(dstSize_tooSmall, ""); 5746 } 5747 assert(iPos == srcSize); /* all input is expected consumed */ 5748 return oPos; 5749 } 5750 } 5751 5752 typedef struct { 5753 U32 idx; /* Index in array of ZSTD_Sequence */ 5754 U32 posInSequence; /* Position within sequence at idx */ 5755 size_t posInSrc; /* Number of bytes given by sequences provided so far */ 5756 } ZSTD_sequencePosition; 5757 5758 /* ZSTD_validateSequence() : 5759 * @offCode : is presumed to follow format required by ZSTD_storeSeq() 5760 * @returns a ZSTD error code if sequence is not valid 5761 */ 5762 static size_t 5763 ZSTD_validateSequence(U32 offCode, U32 matchLength, 5764 size_t posInSrc, U32 windowLog, size_t dictSize) 5765 { 5766 U32 const windowSize = 1 << windowLog; 5767 /* posInSrc represents the amount of data the the decoder would decode up to this point. 5768 * As long as the amount of data decoded is less than or equal to window size, offsets may be 5769 * larger than the total length of output decoded in order to reference the dict, even larger than 5770 * window size. After output surpasses windowSize, we're limited to windowSize offsets again. 5771 */ 5772 size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; 5773 RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!"); 5774 RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small"); 5775 return 0; 5776 } 5777 5778 /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ 5779 static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) 5780 { 5781 U32 offCode = STORE_OFFSET(rawOffset); 5782 5783 if (!ll0 && rawOffset == rep[0]) { 5784 offCode = STORE_REPCODE_1; 5785 } else if (rawOffset == rep[1]) { 5786 offCode = STORE_REPCODE(2 - ll0); 5787 } else if (rawOffset == rep[2]) { 5788 offCode = STORE_REPCODE(3 - ll0); 5789 } else if (ll0 && rawOffset == rep[0] - 1) { 5790 offCode = STORE_REPCODE_3; 5791 } 5792 return offCode; 5793 } 5794 5795 /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of 5796 * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. 5797 */ 5798 static size_t 5799 ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, 5800 ZSTD_sequencePosition* seqPos, 5801 const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 5802 const void* src, size_t blockSize) 5803 { 5804 U32 idx = seqPos->idx; 5805 BYTE const* ip = (BYTE const*)(src); 5806 const BYTE* const iend = ip + blockSize; 5807 repcodes_t updatedRepcodes; 5808 U32 dictSize; 5809 5810 if (cctx->cdict) { 5811 dictSize = (U32)cctx->cdict->dictContentSize; 5812 } else if (cctx->prefixDict.dict) { 5813 dictSize = (U32)cctx->prefixDict.dictSize; 5814 } else { 5815 dictSize = 0; 5816 } 5817 ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); 5818 for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { 5819 U32 const litLength = inSeqs[idx].litLength; 5820 U32 const ll0 = (litLength == 0); 5821 U32 const matchLength = inSeqs[idx].matchLength; 5822 U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); 5823 ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); 5824 5825 DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); 5826 if (cctx->appliedParams.validateSequences) { 5827 seqPos->posInSrc += litLength + matchLength; 5828 FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, 5829 cctx->appliedParams.cParams.windowLog, dictSize), 5830 "Sequence validation failed"); 5831 } 5832 RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, 5833 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); 5834 ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); 5835 ip += matchLength + litLength; 5836 } 5837 ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); 5838 5839 if (inSeqs[idx].litLength) { 5840 DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength); 5841 ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength); 5842 ip += inSeqs[idx].litLength; 5843 seqPos->posInSrc += inSeqs[idx].litLength; 5844 } 5845 RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!"); 5846 seqPos->idx = idx+1; 5847 return 0; 5848 } 5849 5850 /* Returns the number of bytes to move the current read position back by. Only non-zero 5851 * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something 5852 * went wrong. 5853 * 5854 * This function will attempt to scan through blockSize bytes represented by the sequences 5855 * in inSeqs, storing any (partial) sequences. 5856 * 5857 * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to 5858 * avoid splitting a match, or to avoid splitting a match such that it would produce a match 5859 * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. 5860 */ 5861 static size_t 5862 ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, 5863 const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 5864 const void* src, size_t blockSize) 5865 { 5866 U32 idx = seqPos->idx; 5867 U32 startPosInSequence = seqPos->posInSequence; 5868 U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; 5869 size_t dictSize; 5870 BYTE const* ip = (BYTE const*)(src); 5871 BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ 5872 repcodes_t updatedRepcodes; 5873 U32 bytesAdjustment = 0; 5874 U32 finalMatchSplit = 0; 5875 5876 if (cctx->cdict) { 5877 dictSize = cctx->cdict->dictContentSize; 5878 } else if (cctx->prefixDict.dict) { 5879 dictSize = cctx->prefixDict.dictSize; 5880 } else { 5881 dictSize = 0; 5882 } 5883 DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); 5884 DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); 5885 ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); 5886 while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { 5887 const ZSTD_Sequence currSeq = inSeqs[idx]; 5888 U32 litLength = currSeq.litLength; 5889 U32 matchLength = currSeq.matchLength; 5890 U32 const rawOffset = currSeq.offset; 5891 U32 offCode; 5892 5893 /* Modify the sequence depending on where endPosInSequence lies */ 5894 if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { 5895 if (startPosInSequence >= litLength) { 5896 startPosInSequence -= litLength; 5897 litLength = 0; 5898 matchLength -= startPosInSequence; 5899 } else { 5900 litLength -= startPosInSequence; 5901 } 5902 /* Move to the next sequence */ 5903 endPosInSequence -= currSeq.litLength + currSeq.matchLength; 5904 startPosInSequence = 0; 5905 idx++; 5906 } else { 5907 /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence 5908 does not reach the end of the match. So, we have to split the sequence */ 5909 DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", 5910 currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence); 5911 if (endPosInSequence > litLength) { 5912 U32 firstHalfMatchLength; 5913 litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence; 5914 firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength; 5915 if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) { 5916 /* Only ever split the match if it is larger than the block size */ 5917 U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence; 5918 if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) { 5919 /* Move the endPosInSequence backward so that it creates match of minMatch length */ 5920 endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; 5921 bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; 5922 firstHalfMatchLength -= bytesAdjustment; 5923 } 5924 matchLength = firstHalfMatchLength; 5925 /* Flag that we split the last match - after storing the sequence, exit the loop, 5926 but keep the value of endPosInSequence */ 5927 finalMatchSplit = 1; 5928 } else { 5929 /* Move the position in sequence backwards so that we don't split match, and break to store 5930 * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence 5931 * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so 5932 * would cause the first half of the match to be too small 5933 */ 5934 bytesAdjustment = endPosInSequence - currSeq.litLength; 5935 endPosInSequence = currSeq.litLength; 5936 break; 5937 } 5938 } else { 5939 /* This sequence ends inside the literals, break to store the last literals */ 5940 break; 5941 } 5942 } 5943 /* Check if this offset can be represented with a repcode */ 5944 { U32 const ll0 = (litLength == 0); 5945 offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); 5946 ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); 5947 } 5948 5949 if (cctx->appliedParams.validateSequences) { 5950 seqPos->posInSrc += litLength + matchLength; 5951 FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, 5952 cctx->appliedParams.cParams.windowLog, dictSize), 5953 "Sequence validation failed"); 5954 } 5955 DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); 5956 RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, 5957 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); 5958 ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); 5959 ip += matchLength + litLength; 5960 } 5961 DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); 5962 assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); 5963 seqPos->idx = idx; 5964 seqPos->posInSequence = endPosInSequence; 5965 ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); 5966 5967 iend -= bytesAdjustment; 5968 if (ip != iend) { 5969 /* Store any last literals */ 5970 U32 lastLLSize = (U32)(iend - ip); 5971 assert(ip <= iend); 5972 DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize); 5973 ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize); 5974 seqPos->posInSrc += lastLLSize; 5975 } 5976 5977 return bytesAdjustment; 5978 } 5979 5980 typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, 5981 const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 5982 const void* src, size_t blockSize); 5983 static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) 5984 { 5985 ZSTD_sequenceCopier sequenceCopier = NULL; 5986 assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); 5987 if (mode == ZSTD_sf_explicitBlockDelimiters) { 5988 return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; 5989 } else if (mode == ZSTD_sf_noBlockDelimiters) { 5990 return ZSTD_copySequencesToSeqStoreNoBlockDelim; 5991 } 5992 assert(sequenceCopier != NULL); 5993 return sequenceCopier; 5994 } 5995 5996 /* Compress, block-by-block, all of the sequences given. 5997 * 5998 * Returns the cumulative size of all compressed blocks (including their headers), 5999 * otherwise a ZSTD error. 6000 */ 6001 static size_t 6002 ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, 6003 void* dst, size_t dstCapacity, 6004 const ZSTD_Sequence* inSeqs, size_t inSeqsSize, 6005 const void* src, size_t srcSize) 6006 { 6007 size_t cSize = 0; 6008 U32 lastBlock; 6009 size_t blockSize; 6010 size_t compressedSeqsSize; 6011 size_t remaining = srcSize; 6012 ZSTD_sequencePosition seqPos = {0, 0, 0}; 6013 6014 BYTE const* ip = (BYTE const*)src; 6015 BYTE* op = (BYTE*)dst; 6016 ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); 6017 6018 DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); 6019 /* Special case: empty frame */ 6020 if (remaining == 0) { 6021 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1); 6022 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header"); 6023 MEM_writeLE32(op, cBlockHeader24); 6024 op += ZSTD_blockHeaderSize; 6025 dstCapacity -= ZSTD_blockHeaderSize; 6026 cSize += ZSTD_blockHeaderSize; 6027 } 6028 6029 while (remaining) { 6030 size_t cBlockSize; 6031 size_t additionalByteAdjustment; 6032 lastBlock = remaining <= cctx->blockSize; 6033 blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; 6034 ZSTD_resetSeqStore(&cctx->seqStore); 6035 DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize); 6036 6037 additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize); 6038 FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy"); 6039 blockSize -= additionalByteAdjustment; 6040 6041 /* If blocks are too small, emit as a nocompress block */ 6042 if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { 6043 cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); 6044 FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); 6045 DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); 6046 cSize += cBlockSize; 6047 ip += blockSize; 6048 op += cBlockSize; 6049 remaining -= blockSize; 6050 dstCapacity -= cBlockSize; 6051 continue; 6052 } 6053 6054 compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore, 6055 &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, 6056 &cctx->appliedParams, 6057 op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, 6058 blockSize, 6059 cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, 6060 cctx->bmi2); 6061 FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); 6062 DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize); 6063 6064 if (!cctx->isFirstBlock && 6065 ZSTD_maybeRLE(&cctx->seqStore) && 6066 ZSTD_isRLE((BYTE const*)src, srcSize)) { 6067 /* We don't want to emit our first block as a RLE even if it qualifies because 6068 * doing so will cause the decoder (cli only) to throw a "should consume all input error." 6069 * This is only an issue for zstd <= v1.4.3 6070 */ 6071 compressedSeqsSize = 1; 6072 } 6073 6074 if (compressedSeqsSize == 0) { 6075 /* ZSTD_noCompressBlock writes the block header as well */ 6076 cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); 6077 FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); 6078 DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize); 6079 } else if (compressedSeqsSize == 1) { 6080 cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock); 6081 FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed"); 6082 DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize); 6083 } else { 6084 U32 cBlockHeader; 6085 /* Error checking and repcodes update */ 6086 ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState); 6087 if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 6088 cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 6089 6090 /* Write block header into beginning of block*/ 6091 cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); 6092 MEM_writeLE24(op, cBlockHeader); 6093 cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; 6094 DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize); 6095 } 6096 6097 cSize += cBlockSize; 6098 DEBUGLOG(4, "cSize running total: %zu", cSize); 6099 6100 if (lastBlock) { 6101 break; 6102 } else { 6103 ip += blockSize; 6104 op += cBlockSize; 6105 remaining -= blockSize; 6106 dstCapacity -= cBlockSize; 6107 cctx->isFirstBlock = 0; 6108 } 6109 } 6110 6111 return cSize; 6112 } 6113 6114 size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, 6115 const ZSTD_Sequence* inSeqs, size_t inSeqsSize, 6116 const void* src, size_t srcSize) 6117 { 6118 BYTE* op = (BYTE*)dst; 6119 size_t cSize = 0; 6120 size_t compressedBlocksSize = 0; 6121 size_t frameHeaderSize = 0; 6122 6123 /* Transparent initialization stage, same as compressStream2() */ 6124 DEBUGLOG(3, "ZSTD_compressSequences()"); 6125 assert(cctx != NULL); 6126 FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); 6127 /* Begin writing output, starting with frame header */ 6128 frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); 6129 op += frameHeaderSize; 6130 dstCapacity -= frameHeaderSize; 6131 cSize += frameHeaderSize; 6132 if (cctx->appliedParams.fParams.checksumFlag && srcSize) { 6133 XXH64_update(&cctx->xxhState, src, srcSize); 6134 } 6135 /* cSize includes block header size and compressed sequences size */ 6136 compressedBlocksSize = ZSTD_compressSequences_internal(cctx, 6137 op, dstCapacity, 6138 inSeqs, inSeqsSize, 6139 src, srcSize); 6140 FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); 6141 cSize += compressedBlocksSize; 6142 dstCapacity -= compressedBlocksSize; 6143 6144 if (cctx->appliedParams.fParams.checksumFlag) { 6145 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); 6146 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); 6147 DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum); 6148 MEM_writeLE32((char*)dst + cSize, checksum); 6149 cSize += 4; 6150 } 6151 6152 DEBUGLOG(3, "Final compressed size: %zu", cSize); 6153 return cSize; 6154 } 6155 6156 /*====== Finalize ======*/ 6157 6158 /*! ZSTD_flushStream() : 6159 * @return : amount of data remaining to flush */ 6160 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 6161 { 6162 ZSTD_inBuffer input = { NULL, 0, 0 }; 6163 return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); 6164 } 6165 6166 6167 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 6168 { 6169 ZSTD_inBuffer input = { NULL, 0, 0 }; 6170 size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); 6171 FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); 6172 if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ 6173 /* single thread mode : attempt to calculate remaining to flush more precisely */ 6174 { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; 6175 size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); 6176 size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; 6177 DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); 6178 return toFlush; 6179 } 6180 } 6181 6182 6183 /*-===== Pre-defined compression levels =====-*/ 6184 #include "clevels.h" 6185 6186 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } 6187 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } 6188 int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; } 6189 6190 static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) 6191 { 6192 ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict); 6193 switch (cParams.strategy) { 6194 case ZSTD_fast: 6195 case ZSTD_dfast: 6196 break; 6197 case ZSTD_greedy: 6198 case ZSTD_lazy: 6199 case ZSTD_lazy2: 6200 cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG; 6201 break; 6202 case ZSTD_btlazy2: 6203 case ZSTD_btopt: 6204 case ZSTD_btultra: 6205 case ZSTD_btultra2: 6206 break; 6207 } 6208 return cParams; 6209 } 6210 6211 static int ZSTD_dedicatedDictSearch_isSupported( 6212 ZSTD_compressionParameters const* cParams) 6213 { 6214 return (cParams->strategy >= ZSTD_greedy) 6215 && (cParams->strategy <= ZSTD_lazy2) 6216 && (cParams->hashLog > cParams->chainLog) 6217 && (cParams->chainLog <= 24); 6218 } 6219 6220 /** 6221 * Reverses the adjustment applied to cparams when enabling dedicated dict 6222 * search. This is used to recover the params set to be used in the working 6223 * context. (Otherwise, those tables would also grow.) 6224 */ 6225 static void ZSTD_dedicatedDictSearch_revertCParams( 6226 ZSTD_compressionParameters* cParams) { 6227 switch (cParams->strategy) { 6228 case ZSTD_fast: 6229 case ZSTD_dfast: 6230 break; 6231 case ZSTD_greedy: 6232 case ZSTD_lazy: 6233 case ZSTD_lazy2: 6234 cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; 6235 if (cParams->hashLog < ZSTD_HASHLOG_MIN) { 6236 cParams->hashLog = ZSTD_HASHLOG_MIN; 6237 } 6238 break; 6239 case ZSTD_btlazy2: 6240 case ZSTD_btopt: 6241 case ZSTD_btultra: 6242 case ZSTD_btultra2: 6243 break; 6244 } 6245 } 6246 6247 static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) 6248 { 6249 switch (mode) { 6250 case ZSTD_cpm_unknown: 6251 case ZSTD_cpm_noAttachDict: 6252 case ZSTD_cpm_createCDict: 6253 break; 6254 case ZSTD_cpm_attachDict: 6255 dictSize = 0; 6256 break; 6257 default: 6258 assert(0); 6259 break; 6260 } 6261 { int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; 6262 size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; 6263 return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; 6264 } 6265 } 6266 6267 /*! ZSTD_getCParams_internal() : 6268 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. 6269 * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. 6270 * Use dictSize == 0 for unknown or unused. 6271 * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */ 6272 static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) 6273 { 6274 U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); 6275 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); 6276 int row; 6277 DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); 6278 6279 /* row */ 6280 if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ 6281 else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ 6282 else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; 6283 else row = compressionLevel; 6284 6285 { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; 6286 DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy); 6287 /* acceleration factor */ 6288 if (compressionLevel < 0) { 6289 int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); 6290 cp.targetLength = (unsigned)(-clampedCompressionLevel); 6291 } 6292 /* refine parameters based on srcSize & dictSize */ 6293 return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode); 6294 } 6295 } 6296 6297 /*! ZSTD_getCParams() : 6298 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. 6299 * Size values are optional, provide 0 if not known or unused */ 6300 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) 6301 { 6302 if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; 6303 return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); 6304 } 6305 6306 /*! ZSTD_getParams() : 6307 * same idea as ZSTD_getCParams() 6308 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). 6309 * Fields of `ZSTD_frameParameters` are set to default values */ 6310 static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { 6311 ZSTD_parameters params; 6312 ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); 6313 DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); 6314 ZSTD_memset(¶ms, 0, sizeof(params)); 6315 params.cParams = cParams; 6316 params.fParams.contentSizeFlag = 1; 6317 return params; 6318 } 6319 6320 /*! ZSTD_getParams() : 6321 * same idea as ZSTD_getCParams() 6322 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). 6323 * Fields of `ZSTD_frameParameters` are set to default values */ 6324 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { 6325 if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; 6326 return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); 6327 } 6328