1 /* 2 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 /*-************************************* 12 * Dependencies 13 ***************************************/ 14 #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ 15 #include "../common/cpu.h" 16 #include "../common/mem.h" 17 #include "hist.h" /* HIST_countFast_wksp */ 18 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ 19 #include "../common/fse.h" 20 #define HUF_STATIC_LINKING_ONLY 21 #include "../common/huf.h" 22 #include "zstd_compress_internal.h" 23 #include "zstd_compress_sequences.h" 24 #include "zstd_compress_literals.h" 25 #include "zstd_fast.h" 26 #include "zstd_double_fast.h" 27 #include "zstd_lazy.h" 28 #include "zstd_opt.h" 29 #include "zstd_ldm.h" 30 #include "zstd_compress_superblock.h" 31 32 /* *************************************************************** 33 * Tuning parameters 34 *****************************************************************/ 35 /*! 36 * COMPRESS_HEAPMODE : 37 * Select how default decompression function ZSTD_compress() allocates its context, 38 * on stack (0, default), or into heap (1). 39 * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected. 40 */ 41 #ifndef ZSTD_COMPRESS_HEAPMODE 42 # define ZSTD_COMPRESS_HEAPMODE 0 43 #endif 44 45 46 /*-************************************* 47 * Helper functions 48 ***************************************/ 49 /* ZSTD_compressBound() 50 * Note that the result from this function is only compatible with the "normal" 51 * full-block strategy. 52 * When there are a lot of small blocks due to frequent flush in streaming mode 53 * the overhead of headers can make the compressed data to be larger than the 54 * return value of ZSTD_compressBound(). 55 */ 56 size_t ZSTD_compressBound(size_t srcSize) { 57 return ZSTD_COMPRESSBOUND(srcSize); 58 } 59 60 61 /*-************************************* 62 * Context memory management 63 ***************************************/ 64 struct ZSTD_CDict_s { 65 const void* dictContent; 66 size_t dictContentSize; 67 ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ 68 U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ 69 ZSTD_cwksp workspace; 70 ZSTD_matchState_t matchState; 71 ZSTD_compressedBlockState_t cBlockState; 72 ZSTD_customMem customMem; 73 U32 dictID; 74 int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ 75 }; /* typedef'd to ZSTD_CDict within "zstd.h" */ 76 77 ZSTD_CCtx* ZSTD_createCCtx(void) 78 { 79 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); 80 } 81 82 static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) 83 { 84 assert(cctx != NULL); 85 ZSTD_memset(cctx, 0, sizeof(*cctx)); 86 cctx->customMem = memManager; 87 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); 88 { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); 89 assert(!ZSTD_isError(err)); 90 (void)err; 91 } 92 } 93 94 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) 95 { 96 ZSTD_STATIC_ASSERT(zcss_init==0); 97 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); 98 if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; 99 { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem); 100 if (!cctx) return NULL; 101 ZSTD_initCCtx(cctx, customMem); 102 return cctx; 103 } 104 } 105 106 ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) 107 { 108 ZSTD_cwksp ws; 109 ZSTD_CCtx* cctx; 110 if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ 111 if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ 112 ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); 113 114 cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); 115 if (cctx == NULL) return NULL; 116 117 ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx)); 118 ZSTD_cwksp_move(&cctx->workspace, &ws); 119 cctx->staticSize = workspaceSize; 120 121 /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ 122 if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; 123 cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); 124 cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); 125 cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE); 126 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); 127 return cctx; 128 } 129 130 /** 131 * Clears and frees all of the dictionaries in the CCtx. 132 */ 133 static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) 134 { 135 ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem); 136 ZSTD_freeCDict(cctx->localDict.cdict); 137 ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict)); 138 ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); 139 cctx->cdict = NULL; 140 } 141 142 static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) 143 { 144 size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; 145 size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); 146 return bufferSize + cdictSize; 147 } 148 149 static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) 150 { 151 assert(cctx != NULL); 152 assert(cctx->staticSize == 0); 153 ZSTD_clearAllDicts(cctx); 154 #ifdef ZSTD_MULTITHREAD 155 ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; 156 #endif 157 ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); 158 } 159 160 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) 161 { 162 if (cctx==NULL) return 0; /* support free on NULL */ 163 RETURN_ERROR_IF(cctx->staticSize, memory_allocation, 164 "not compatible with static CCtx"); 165 { 166 int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); 167 ZSTD_freeCCtxContent(cctx); 168 if (!cctxInWorkspace) { 169 ZSTD_customFree(cctx, cctx->customMem); 170 } 171 } 172 return 0; 173 } 174 175 176 static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) 177 { 178 #ifdef ZSTD_MULTITHREAD 179 return ZSTDMT_sizeof_CCtx(cctx->mtctx); 180 #else 181 (void)cctx; 182 return 0; 183 #endif 184 } 185 186 187 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) 188 { 189 if (cctx==NULL) return 0; /* support sizeof on NULL */ 190 /* cctx may be in the workspace */ 191 return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) 192 + ZSTD_cwksp_sizeof(&cctx->workspace) 193 + ZSTD_sizeof_localDict(cctx->localDict) 194 + ZSTD_sizeof_mtctx(cctx); 195 } 196 197 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) 198 { 199 return ZSTD_sizeof_CCtx(zcs); /* same object */ 200 } 201 202 /* private API call, for dictBuilder only */ 203 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } 204 205 /* Returns 1 if compression parameters are such that we should 206 * enable long distance matching (wlog >= 27, strategy >= btopt). 207 * Returns 0 otherwise. 208 */ 209 static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) { 210 return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27; 211 } 212 213 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( 214 ZSTD_compressionParameters cParams) 215 { 216 ZSTD_CCtx_params cctxParams; 217 /* should not matter, as all cParams are presumed properly defined */ 218 ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT); 219 cctxParams.cParams = cParams; 220 221 if (ZSTD_CParams_shouldEnableLdm(&cParams)) { 222 DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params"); 223 cctxParams.ldmParams.enableLdm = 1; 224 /* LDM is enabled by default for optimal parser and window size >= 128MB */ 225 ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams); 226 assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); 227 assert(cctxParams.ldmParams.hashRateLog < 32); 228 } 229 230 assert(!ZSTD_checkCParams(cParams)); 231 return cctxParams; 232 } 233 234 static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( 235 ZSTD_customMem customMem) 236 { 237 ZSTD_CCtx_params* params; 238 if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; 239 params = (ZSTD_CCtx_params*)ZSTD_customCalloc( 240 sizeof(ZSTD_CCtx_params), customMem); 241 if (!params) { return NULL; } 242 ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); 243 params->customMem = customMem; 244 return params; 245 } 246 247 ZSTD_CCtx_params* ZSTD_createCCtxParams(void) 248 { 249 return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); 250 } 251 252 size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) 253 { 254 if (params == NULL) { return 0; } 255 ZSTD_customFree(params, params->customMem); 256 return 0; 257 } 258 259 size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) 260 { 261 return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); 262 } 263 264 size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { 265 RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); 266 ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); 267 cctxParams->compressionLevel = compressionLevel; 268 cctxParams->fParams.contentSizeFlag = 1; 269 return 0; 270 } 271 272 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) 273 { 274 RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); 275 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); 276 ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); 277 assert(!ZSTD_checkCParams(params.cParams)); 278 cctxParams->cParams = params.cParams; 279 cctxParams->fParams = params.fParams; 280 cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ 281 return 0; 282 } 283 284 /* ZSTD_assignParamsToCCtxParams() : 285 * params is presumed valid at this stage */ 286 static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( 287 const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) 288 { 289 ZSTD_CCtx_params ret = *cctxParams; 290 assert(!ZSTD_checkCParams(params->cParams)); 291 ret.cParams = params->cParams; 292 ret.fParams = params->fParams; 293 ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ 294 return ret; 295 } 296 297 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) 298 { 299 ZSTD_bounds bounds = { 0, 0, 0 }; 300 301 switch(param) 302 { 303 case ZSTD_c_compressionLevel: 304 bounds.lowerBound = ZSTD_minCLevel(); 305 bounds.upperBound = ZSTD_maxCLevel(); 306 return bounds; 307 308 case ZSTD_c_windowLog: 309 bounds.lowerBound = ZSTD_WINDOWLOG_MIN; 310 bounds.upperBound = ZSTD_WINDOWLOG_MAX; 311 return bounds; 312 313 case ZSTD_c_hashLog: 314 bounds.lowerBound = ZSTD_HASHLOG_MIN; 315 bounds.upperBound = ZSTD_HASHLOG_MAX; 316 return bounds; 317 318 case ZSTD_c_chainLog: 319 bounds.lowerBound = ZSTD_CHAINLOG_MIN; 320 bounds.upperBound = ZSTD_CHAINLOG_MAX; 321 return bounds; 322 323 case ZSTD_c_searchLog: 324 bounds.lowerBound = ZSTD_SEARCHLOG_MIN; 325 bounds.upperBound = ZSTD_SEARCHLOG_MAX; 326 return bounds; 327 328 case ZSTD_c_minMatch: 329 bounds.lowerBound = ZSTD_MINMATCH_MIN; 330 bounds.upperBound = ZSTD_MINMATCH_MAX; 331 return bounds; 332 333 case ZSTD_c_targetLength: 334 bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; 335 bounds.upperBound = ZSTD_TARGETLENGTH_MAX; 336 return bounds; 337 338 case ZSTD_c_strategy: 339 bounds.lowerBound = ZSTD_STRATEGY_MIN; 340 bounds.upperBound = ZSTD_STRATEGY_MAX; 341 return bounds; 342 343 case ZSTD_c_contentSizeFlag: 344 bounds.lowerBound = 0; 345 bounds.upperBound = 1; 346 return bounds; 347 348 case ZSTD_c_checksumFlag: 349 bounds.lowerBound = 0; 350 bounds.upperBound = 1; 351 return bounds; 352 353 case ZSTD_c_dictIDFlag: 354 bounds.lowerBound = 0; 355 bounds.upperBound = 1; 356 return bounds; 357 358 case ZSTD_c_nbWorkers: 359 bounds.lowerBound = 0; 360 #ifdef ZSTD_MULTITHREAD 361 bounds.upperBound = ZSTDMT_NBWORKERS_MAX; 362 #else 363 bounds.upperBound = 0; 364 #endif 365 return bounds; 366 367 case ZSTD_c_jobSize: 368 bounds.lowerBound = 0; 369 #ifdef ZSTD_MULTITHREAD 370 bounds.upperBound = ZSTDMT_JOBSIZE_MAX; 371 #else 372 bounds.upperBound = 0; 373 #endif 374 return bounds; 375 376 case ZSTD_c_overlapLog: 377 #ifdef ZSTD_MULTITHREAD 378 bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; 379 bounds.upperBound = ZSTD_OVERLAPLOG_MAX; 380 #else 381 bounds.lowerBound = 0; 382 bounds.upperBound = 0; 383 #endif 384 return bounds; 385 386 case ZSTD_c_enableDedicatedDictSearch: 387 bounds.lowerBound = 0; 388 bounds.upperBound = 1; 389 return bounds; 390 391 case ZSTD_c_enableLongDistanceMatching: 392 bounds.lowerBound = 0; 393 bounds.upperBound = 1; 394 return bounds; 395 396 case ZSTD_c_ldmHashLog: 397 bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; 398 bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; 399 return bounds; 400 401 case ZSTD_c_ldmMinMatch: 402 bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; 403 bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; 404 return bounds; 405 406 case ZSTD_c_ldmBucketSizeLog: 407 bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; 408 bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; 409 return bounds; 410 411 case ZSTD_c_ldmHashRateLog: 412 bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; 413 bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; 414 return bounds; 415 416 /* experimental parameters */ 417 case ZSTD_c_rsyncable: 418 bounds.lowerBound = 0; 419 bounds.upperBound = 1; 420 return bounds; 421 422 case ZSTD_c_forceMaxWindow : 423 bounds.lowerBound = 0; 424 bounds.upperBound = 1; 425 return bounds; 426 427 case ZSTD_c_format: 428 ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); 429 bounds.lowerBound = ZSTD_f_zstd1; 430 bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ 431 return bounds; 432 433 case ZSTD_c_forceAttachDict: 434 ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad); 435 bounds.lowerBound = ZSTD_dictDefaultAttach; 436 bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ 437 return bounds; 438 439 case ZSTD_c_literalCompressionMode: 440 ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); 441 bounds.lowerBound = ZSTD_lcm_auto; 442 bounds.upperBound = ZSTD_lcm_uncompressed; 443 return bounds; 444 445 case ZSTD_c_targetCBlockSize: 446 bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; 447 bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; 448 return bounds; 449 450 case ZSTD_c_srcSizeHint: 451 bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; 452 bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; 453 return bounds; 454 455 case ZSTD_c_stableInBuffer: 456 case ZSTD_c_stableOutBuffer: 457 bounds.lowerBound = (int)ZSTD_bm_buffered; 458 bounds.upperBound = (int)ZSTD_bm_stable; 459 return bounds; 460 461 case ZSTD_c_blockDelimiters: 462 bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters; 463 bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters; 464 return bounds; 465 466 case ZSTD_c_validateSequences: 467 bounds.lowerBound = 0; 468 bounds.upperBound = 1; 469 return bounds; 470 471 default: 472 bounds.error = ERROR(parameter_unsupported); 473 return bounds; 474 } 475 } 476 477 /* ZSTD_cParam_clampBounds: 478 * Clamps the value into the bounded range. 479 */ 480 static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) 481 { 482 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); 483 if (ZSTD_isError(bounds.error)) return bounds.error; 484 if (*value < bounds.lowerBound) *value = bounds.lowerBound; 485 if (*value > bounds.upperBound) *value = bounds.upperBound; 486 return 0; 487 } 488 489 #define BOUNDCHECK(cParam, val) { \ 490 RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ 491 parameter_outOfBound, "Param out of bounds"); \ 492 } 493 494 495 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) 496 { 497 switch(param) 498 { 499 case ZSTD_c_compressionLevel: 500 case ZSTD_c_hashLog: 501 case ZSTD_c_chainLog: 502 case ZSTD_c_searchLog: 503 case ZSTD_c_minMatch: 504 case ZSTD_c_targetLength: 505 case ZSTD_c_strategy: 506 return 1; 507 508 case ZSTD_c_format: 509 case ZSTD_c_windowLog: 510 case ZSTD_c_contentSizeFlag: 511 case ZSTD_c_checksumFlag: 512 case ZSTD_c_dictIDFlag: 513 case ZSTD_c_forceMaxWindow : 514 case ZSTD_c_nbWorkers: 515 case ZSTD_c_jobSize: 516 case ZSTD_c_overlapLog: 517 case ZSTD_c_rsyncable: 518 case ZSTD_c_enableDedicatedDictSearch: 519 case ZSTD_c_enableLongDistanceMatching: 520 case ZSTD_c_ldmHashLog: 521 case ZSTD_c_ldmMinMatch: 522 case ZSTD_c_ldmBucketSizeLog: 523 case ZSTD_c_ldmHashRateLog: 524 case ZSTD_c_forceAttachDict: 525 case ZSTD_c_literalCompressionMode: 526 case ZSTD_c_targetCBlockSize: 527 case ZSTD_c_srcSizeHint: 528 case ZSTD_c_stableInBuffer: 529 case ZSTD_c_stableOutBuffer: 530 case ZSTD_c_blockDelimiters: 531 case ZSTD_c_validateSequences: 532 default: 533 return 0; 534 } 535 } 536 537 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) 538 { 539 DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); 540 if (cctx->streamStage != zcss_init) { 541 if (ZSTD_isUpdateAuthorized(param)) { 542 cctx->cParamsChanged = 1; 543 } else { 544 RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); 545 } } 546 547 switch(param) 548 { 549 case ZSTD_c_nbWorkers: 550 RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, 551 "MT not compatible with static alloc"); 552 break; 553 554 case ZSTD_c_compressionLevel: 555 case ZSTD_c_windowLog: 556 case ZSTD_c_hashLog: 557 case ZSTD_c_chainLog: 558 case ZSTD_c_searchLog: 559 case ZSTD_c_minMatch: 560 case ZSTD_c_targetLength: 561 case ZSTD_c_strategy: 562 case ZSTD_c_ldmHashRateLog: 563 case ZSTD_c_format: 564 case ZSTD_c_contentSizeFlag: 565 case ZSTD_c_checksumFlag: 566 case ZSTD_c_dictIDFlag: 567 case ZSTD_c_forceMaxWindow: 568 case ZSTD_c_forceAttachDict: 569 case ZSTD_c_literalCompressionMode: 570 case ZSTD_c_jobSize: 571 case ZSTD_c_overlapLog: 572 case ZSTD_c_rsyncable: 573 case ZSTD_c_enableDedicatedDictSearch: 574 case ZSTD_c_enableLongDistanceMatching: 575 case ZSTD_c_ldmHashLog: 576 case ZSTD_c_ldmMinMatch: 577 case ZSTD_c_ldmBucketSizeLog: 578 case ZSTD_c_targetCBlockSize: 579 case ZSTD_c_srcSizeHint: 580 case ZSTD_c_stableInBuffer: 581 case ZSTD_c_stableOutBuffer: 582 case ZSTD_c_blockDelimiters: 583 case ZSTD_c_validateSequences: 584 break; 585 586 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 587 } 588 return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); 589 } 590 591 size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, 592 ZSTD_cParameter param, int value) 593 { 594 DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); 595 switch(param) 596 { 597 case ZSTD_c_format : 598 BOUNDCHECK(ZSTD_c_format, value); 599 CCtxParams->format = (ZSTD_format_e)value; 600 return (size_t)CCtxParams->format; 601 602 case ZSTD_c_compressionLevel : { 603 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); 604 if (value == 0) 605 CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ 606 else 607 CCtxParams->compressionLevel = value; 608 if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; 609 return 0; /* return type (size_t) cannot represent negative values */ 610 } 611 612 case ZSTD_c_windowLog : 613 if (value!=0) /* 0 => use default */ 614 BOUNDCHECK(ZSTD_c_windowLog, value); 615 CCtxParams->cParams.windowLog = (U32)value; 616 return CCtxParams->cParams.windowLog; 617 618 case ZSTD_c_hashLog : 619 if (value!=0) /* 0 => use default */ 620 BOUNDCHECK(ZSTD_c_hashLog, value); 621 CCtxParams->cParams.hashLog = (U32)value; 622 return CCtxParams->cParams.hashLog; 623 624 case ZSTD_c_chainLog : 625 if (value!=0) /* 0 => use default */ 626 BOUNDCHECK(ZSTD_c_chainLog, value); 627 CCtxParams->cParams.chainLog = (U32)value; 628 return CCtxParams->cParams.chainLog; 629 630 case ZSTD_c_searchLog : 631 if (value!=0) /* 0 => use default */ 632 BOUNDCHECK(ZSTD_c_searchLog, value); 633 CCtxParams->cParams.searchLog = (U32)value; 634 return (size_t)value; 635 636 case ZSTD_c_minMatch : 637 if (value!=0) /* 0 => use default */ 638 BOUNDCHECK(ZSTD_c_minMatch, value); 639 CCtxParams->cParams.minMatch = value; 640 return CCtxParams->cParams.minMatch; 641 642 case ZSTD_c_targetLength : 643 BOUNDCHECK(ZSTD_c_targetLength, value); 644 CCtxParams->cParams.targetLength = value; 645 return CCtxParams->cParams.targetLength; 646 647 case ZSTD_c_strategy : 648 if (value!=0) /* 0 => use default */ 649 BOUNDCHECK(ZSTD_c_strategy, value); 650 CCtxParams->cParams.strategy = (ZSTD_strategy)value; 651 return (size_t)CCtxParams->cParams.strategy; 652 653 case ZSTD_c_contentSizeFlag : 654 /* Content size written in frame header _when known_ (default:1) */ 655 DEBUGLOG(4, "set content size flag = %u", (value!=0)); 656 CCtxParams->fParams.contentSizeFlag = value != 0; 657 return CCtxParams->fParams.contentSizeFlag; 658 659 case ZSTD_c_checksumFlag : 660 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ 661 CCtxParams->fParams.checksumFlag = value != 0; 662 return CCtxParams->fParams.checksumFlag; 663 664 case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ 665 DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); 666 CCtxParams->fParams.noDictIDFlag = !value; 667 return !CCtxParams->fParams.noDictIDFlag; 668 669 case ZSTD_c_forceMaxWindow : 670 CCtxParams->forceWindow = (value != 0); 671 return CCtxParams->forceWindow; 672 673 case ZSTD_c_forceAttachDict : { 674 const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; 675 BOUNDCHECK(ZSTD_c_forceAttachDict, pref); 676 CCtxParams->attachDictPref = pref; 677 return CCtxParams->attachDictPref; 678 } 679 680 case ZSTD_c_literalCompressionMode : { 681 const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; 682 BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); 683 CCtxParams->literalCompressionMode = lcm; 684 return CCtxParams->literalCompressionMode; 685 } 686 687 case ZSTD_c_nbWorkers : 688 #ifndef ZSTD_MULTITHREAD 689 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 690 return 0; 691 #else 692 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); 693 CCtxParams->nbWorkers = value; 694 return CCtxParams->nbWorkers; 695 #endif 696 697 case ZSTD_c_jobSize : 698 #ifndef ZSTD_MULTITHREAD 699 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 700 return 0; 701 #else 702 /* Adjust to the minimum non-default value. */ 703 if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) 704 value = ZSTDMT_JOBSIZE_MIN; 705 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); 706 assert(value >= 0); 707 CCtxParams->jobSize = value; 708 return CCtxParams->jobSize; 709 #endif 710 711 case ZSTD_c_overlapLog : 712 #ifndef ZSTD_MULTITHREAD 713 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 714 return 0; 715 #else 716 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); 717 CCtxParams->overlapLog = value; 718 return CCtxParams->overlapLog; 719 #endif 720 721 case ZSTD_c_rsyncable : 722 #ifndef ZSTD_MULTITHREAD 723 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 724 return 0; 725 #else 726 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); 727 CCtxParams->rsyncable = value; 728 return CCtxParams->rsyncable; 729 #endif 730 731 case ZSTD_c_enableDedicatedDictSearch : 732 CCtxParams->enableDedicatedDictSearch = (value!=0); 733 return CCtxParams->enableDedicatedDictSearch; 734 735 case ZSTD_c_enableLongDistanceMatching : 736 CCtxParams->ldmParams.enableLdm = (value!=0); 737 return CCtxParams->ldmParams.enableLdm; 738 739 case ZSTD_c_ldmHashLog : 740 if (value!=0) /* 0 ==> auto */ 741 BOUNDCHECK(ZSTD_c_ldmHashLog, value); 742 CCtxParams->ldmParams.hashLog = value; 743 return CCtxParams->ldmParams.hashLog; 744 745 case ZSTD_c_ldmMinMatch : 746 if (value!=0) /* 0 ==> default */ 747 BOUNDCHECK(ZSTD_c_ldmMinMatch, value); 748 CCtxParams->ldmParams.minMatchLength = value; 749 return CCtxParams->ldmParams.minMatchLength; 750 751 case ZSTD_c_ldmBucketSizeLog : 752 if (value!=0) /* 0 ==> default */ 753 BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); 754 CCtxParams->ldmParams.bucketSizeLog = value; 755 return CCtxParams->ldmParams.bucketSizeLog; 756 757 case ZSTD_c_ldmHashRateLog : 758 RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, 759 parameter_outOfBound, "Param out of bounds!"); 760 CCtxParams->ldmParams.hashRateLog = value; 761 return CCtxParams->ldmParams.hashRateLog; 762 763 case ZSTD_c_targetCBlockSize : 764 if (value!=0) /* 0 ==> default */ 765 BOUNDCHECK(ZSTD_c_targetCBlockSize, value); 766 CCtxParams->targetCBlockSize = value; 767 return CCtxParams->targetCBlockSize; 768 769 case ZSTD_c_srcSizeHint : 770 if (value!=0) /* 0 ==> default */ 771 BOUNDCHECK(ZSTD_c_srcSizeHint, value); 772 CCtxParams->srcSizeHint = value; 773 return CCtxParams->srcSizeHint; 774 775 case ZSTD_c_stableInBuffer: 776 BOUNDCHECK(ZSTD_c_stableInBuffer, value); 777 CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value; 778 return CCtxParams->inBufferMode; 779 780 case ZSTD_c_stableOutBuffer: 781 BOUNDCHECK(ZSTD_c_stableOutBuffer, value); 782 CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value; 783 return CCtxParams->outBufferMode; 784 785 case ZSTD_c_blockDelimiters: 786 BOUNDCHECK(ZSTD_c_blockDelimiters, value); 787 CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; 788 return CCtxParams->blockDelimiters; 789 790 case ZSTD_c_validateSequences: 791 BOUNDCHECK(ZSTD_c_validateSequences, value); 792 CCtxParams->validateSequences = value; 793 return CCtxParams->validateSequences; 794 795 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 796 } 797 } 798 799 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) 800 { 801 return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); 802 } 803 804 size_t ZSTD_CCtxParams_getParameter( 805 ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) 806 { 807 switch(param) 808 { 809 case ZSTD_c_format : 810 *value = CCtxParams->format; 811 break; 812 case ZSTD_c_compressionLevel : 813 *value = CCtxParams->compressionLevel; 814 break; 815 case ZSTD_c_windowLog : 816 *value = (int)CCtxParams->cParams.windowLog; 817 break; 818 case ZSTD_c_hashLog : 819 *value = (int)CCtxParams->cParams.hashLog; 820 break; 821 case ZSTD_c_chainLog : 822 *value = (int)CCtxParams->cParams.chainLog; 823 break; 824 case ZSTD_c_searchLog : 825 *value = CCtxParams->cParams.searchLog; 826 break; 827 case ZSTD_c_minMatch : 828 *value = CCtxParams->cParams.minMatch; 829 break; 830 case ZSTD_c_targetLength : 831 *value = CCtxParams->cParams.targetLength; 832 break; 833 case ZSTD_c_strategy : 834 *value = (unsigned)CCtxParams->cParams.strategy; 835 break; 836 case ZSTD_c_contentSizeFlag : 837 *value = CCtxParams->fParams.contentSizeFlag; 838 break; 839 case ZSTD_c_checksumFlag : 840 *value = CCtxParams->fParams.checksumFlag; 841 break; 842 case ZSTD_c_dictIDFlag : 843 *value = !CCtxParams->fParams.noDictIDFlag; 844 break; 845 case ZSTD_c_forceMaxWindow : 846 *value = CCtxParams->forceWindow; 847 break; 848 case ZSTD_c_forceAttachDict : 849 *value = CCtxParams->attachDictPref; 850 break; 851 case ZSTD_c_literalCompressionMode : 852 *value = CCtxParams->literalCompressionMode; 853 break; 854 case ZSTD_c_nbWorkers : 855 #ifndef ZSTD_MULTITHREAD 856 assert(CCtxParams->nbWorkers == 0); 857 #endif 858 *value = CCtxParams->nbWorkers; 859 break; 860 case ZSTD_c_jobSize : 861 #ifndef ZSTD_MULTITHREAD 862 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 863 #else 864 assert(CCtxParams->jobSize <= INT_MAX); 865 *value = (int)CCtxParams->jobSize; 866 break; 867 #endif 868 case ZSTD_c_overlapLog : 869 #ifndef ZSTD_MULTITHREAD 870 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 871 #else 872 *value = CCtxParams->overlapLog; 873 break; 874 #endif 875 case ZSTD_c_rsyncable : 876 #ifndef ZSTD_MULTITHREAD 877 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 878 #else 879 *value = CCtxParams->rsyncable; 880 break; 881 #endif 882 case ZSTD_c_enableDedicatedDictSearch : 883 *value = CCtxParams->enableDedicatedDictSearch; 884 break; 885 case ZSTD_c_enableLongDistanceMatching : 886 *value = CCtxParams->ldmParams.enableLdm; 887 break; 888 case ZSTD_c_ldmHashLog : 889 *value = CCtxParams->ldmParams.hashLog; 890 break; 891 case ZSTD_c_ldmMinMatch : 892 *value = CCtxParams->ldmParams.minMatchLength; 893 break; 894 case ZSTD_c_ldmBucketSizeLog : 895 *value = CCtxParams->ldmParams.bucketSizeLog; 896 break; 897 case ZSTD_c_ldmHashRateLog : 898 *value = CCtxParams->ldmParams.hashRateLog; 899 break; 900 case ZSTD_c_targetCBlockSize : 901 *value = (int)CCtxParams->targetCBlockSize; 902 break; 903 case ZSTD_c_srcSizeHint : 904 *value = (int)CCtxParams->srcSizeHint; 905 break; 906 case ZSTD_c_stableInBuffer : 907 *value = (int)CCtxParams->inBufferMode; 908 break; 909 case ZSTD_c_stableOutBuffer : 910 *value = (int)CCtxParams->outBufferMode; 911 break; 912 case ZSTD_c_blockDelimiters : 913 *value = (int)CCtxParams->blockDelimiters; 914 break; 915 case ZSTD_c_validateSequences : 916 *value = (int)CCtxParams->validateSequences; 917 break; 918 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 919 } 920 return 0; 921 } 922 923 /** ZSTD_CCtx_setParametersUsingCCtxParams() : 924 * just applies `params` into `cctx` 925 * no action is performed, parameters are merely stored. 926 * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. 927 * This is possible even if a compression is ongoing. 928 * In which case, new parameters will be applied on the fly, starting with next compression job. 929 */ 930 size_t ZSTD_CCtx_setParametersUsingCCtxParams( 931 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) 932 { 933 DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); 934 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 935 "The context is in the wrong stage!"); 936 RETURN_ERROR_IF(cctx->cdict, stage_wrong, 937 "Can't override parameters with cdict attached (some must " 938 "be inherited from the cdict)."); 939 940 cctx->requestedParams = *params; 941 return 0; 942 } 943 944 ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) 945 { 946 DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); 947 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 948 "Can't set pledgedSrcSize when not in init stage."); 949 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; 950 return 0; 951 } 952 953 static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams( 954 int const compressionLevel, 955 size_t const dictSize); 956 static int ZSTD_dedicatedDictSearch_isSupported( 957 const ZSTD_compressionParameters* cParams); 958 static void ZSTD_dedicatedDictSearch_revertCParams( 959 ZSTD_compressionParameters* cParams); 960 961 /** 962 * Initializes the local dict using the requested parameters. 963 * NOTE: This does not use the pledged src size, because it may be used for more 964 * than one compression. 965 */ 966 static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) 967 { 968 ZSTD_localDict* const dl = &cctx->localDict; 969 if (dl->dict == NULL) { 970 /* No local dictionary. */ 971 assert(dl->dictBuffer == NULL); 972 assert(dl->cdict == NULL); 973 assert(dl->dictSize == 0); 974 return 0; 975 } 976 if (dl->cdict != NULL) { 977 assert(cctx->cdict == dl->cdict); 978 /* Local dictionary already initialized. */ 979 return 0; 980 } 981 assert(dl->dictSize > 0); 982 assert(cctx->cdict == NULL); 983 assert(cctx->prefixDict.dict == NULL); 984 985 dl->cdict = ZSTD_createCDict_advanced2( 986 dl->dict, 987 dl->dictSize, 988 ZSTD_dlm_byRef, 989 dl->dictContentType, 990 &cctx->requestedParams, 991 cctx->customMem); 992 RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); 993 cctx->cdict = dl->cdict; 994 return 0; 995 } 996 997 size_t ZSTD_CCtx_loadDictionary_advanced( 998 ZSTD_CCtx* cctx, const void* dict, size_t dictSize, 999 ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) 1000 { 1001 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1002 "Can't load a dictionary when ctx is not in init stage."); 1003 DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); 1004 ZSTD_clearAllDicts(cctx); /* in case one already exists */ 1005 if (dict == NULL || dictSize == 0) /* no dictionary mode */ 1006 return 0; 1007 if (dictLoadMethod == ZSTD_dlm_byRef) { 1008 cctx->localDict.dict = dict; 1009 } else { 1010 void* dictBuffer; 1011 RETURN_ERROR_IF(cctx->staticSize, memory_allocation, 1012 "no malloc for static CCtx"); 1013 dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem); 1014 RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); 1015 ZSTD_memcpy(dictBuffer, dict, dictSize); 1016 cctx->localDict.dictBuffer = dictBuffer; 1017 cctx->localDict.dict = dictBuffer; 1018 } 1019 cctx->localDict.dictSize = dictSize; 1020 cctx->localDict.dictContentType = dictContentType; 1021 return 0; 1022 } 1023 1024 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( 1025 ZSTD_CCtx* cctx, const void* dict, size_t dictSize) 1026 { 1027 return ZSTD_CCtx_loadDictionary_advanced( 1028 cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); 1029 } 1030 1031 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) 1032 { 1033 return ZSTD_CCtx_loadDictionary_advanced( 1034 cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); 1035 } 1036 1037 1038 size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 1039 { 1040 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1041 "Can't ref a dict when ctx not in init stage."); 1042 /* Free the existing local cdict (if any) to save memory. */ 1043 ZSTD_clearAllDicts(cctx); 1044 cctx->cdict = cdict; 1045 return 0; 1046 } 1047 1048 size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool) 1049 { 1050 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1051 "Can't ref a pool when ctx not in init stage."); 1052 cctx->pool = pool; 1053 return 0; 1054 } 1055 1056 size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) 1057 { 1058 return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); 1059 } 1060 1061 size_t ZSTD_CCtx_refPrefix_advanced( 1062 ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) 1063 { 1064 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1065 "Can't ref a prefix when ctx not in init stage."); 1066 ZSTD_clearAllDicts(cctx); 1067 if (prefix != NULL && prefixSize > 0) { 1068 cctx->prefixDict.dict = prefix; 1069 cctx->prefixDict.dictSize = prefixSize; 1070 cctx->prefixDict.dictContentType = dictContentType; 1071 } 1072 return 0; 1073 } 1074 1075 /*! ZSTD_CCtx_reset() : 1076 * Also dumps dictionary */ 1077 size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) 1078 { 1079 if ( (reset == ZSTD_reset_session_only) 1080 || (reset == ZSTD_reset_session_and_parameters) ) { 1081 cctx->streamStage = zcss_init; 1082 cctx->pledgedSrcSizePlusOne = 0; 1083 } 1084 if ( (reset == ZSTD_reset_parameters) 1085 || (reset == ZSTD_reset_session_and_parameters) ) { 1086 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1087 "Can't reset parameters only when not in init stage."); 1088 ZSTD_clearAllDicts(cctx); 1089 return ZSTD_CCtxParams_reset(&cctx->requestedParams); 1090 } 1091 return 0; 1092 } 1093 1094 1095 /** ZSTD_checkCParams() : 1096 control CParam values remain within authorized range. 1097 @return : 0, or an error code if one value is beyond authorized range */ 1098 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) 1099 { 1100 BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); 1101 BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); 1102 BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); 1103 BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); 1104 BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); 1105 BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); 1106 BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); 1107 return 0; 1108 } 1109 1110 /** ZSTD_clampCParams() : 1111 * make CParam values within valid range. 1112 * @return : valid CParams */ 1113 static ZSTD_compressionParameters 1114 ZSTD_clampCParams(ZSTD_compressionParameters cParams) 1115 { 1116 # define CLAMP_TYPE(cParam, val, type) { \ 1117 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ 1118 if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \ 1119 else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ 1120 } 1121 # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) 1122 CLAMP(ZSTD_c_windowLog, cParams.windowLog); 1123 CLAMP(ZSTD_c_chainLog, cParams.chainLog); 1124 CLAMP(ZSTD_c_hashLog, cParams.hashLog); 1125 CLAMP(ZSTD_c_searchLog, cParams.searchLog); 1126 CLAMP(ZSTD_c_minMatch, cParams.minMatch); 1127 CLAMP(ZSTD_c_targetLength,cParams.targetLength); 1128 CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); 1129 return cParams; 1130 } 1131 1132 /** ZSTD_cycleLog() : 1133 * condition for correct operation : hashLog > 1 */ 1134 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) 1135 { 1136 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); 1137 return hashLog - btScale; 1138 } 1139 1140 /** ZSTD_dictAndWindowLog() : 1141 * Returns an adjusted window log that is large enough to fit the source and the dictionary. 1142 * The zstd format says that the entire dictionary is valid if one byte of the dictionary 1143 * is within the window. So the hashLog and chainLog should be large enough to reference both 1144 * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing 1145 * the hashLog and windowLog. 1146 * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN. 1147 */ 1148 static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) 1149 { 1150 const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX; 1151 /* No dictionary ==> No change */ 1152 if (dictSize == 0) { 1153 return windowLog; 1154 } 1155 assert(windowLog <= ZSTD_WINDOWLOG_MAX); 1156 assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */ 1157 { 1158 U64 const windowSize = 1ULL << windowLog; 1159 U64 const dictAndWindowSize = dictSize + windowSize; 1160 /* If the window size is already large enough to fit both the source and the dictionary 1161 * then just use the window size. Otherwise adjust so that it fits the dictionary and 1162 * the window. 1163 */ 1164 if (windowSize >= dictSize + srcSize) { 1165 return windowLog; /* Window size large enough already */ 1166 } else if (dictAndWindowSize >= maxWindowSize) { 1167 return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */ 1168 } else { 1169 return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1; 1170 } 1171 } 1172 } 1173 1174 /** ZSTD_adjustCParams_internal() : 1175 * optimize `cPar` for a specified input (`srcSize` and `dictSize`). 1176 * mostly downsize to reduce memory consumption and initialization latency. 1177 * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. 1178 * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`. 1179 * note : `srcSize==0` means 0! 1180 * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ 1181 static ZSTD_compressionParameters 1182 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, 1183 unsigned long long srcSize, 1184 size_t dictSize, 1185 ZSTD_cParamMode_e mode) 1186 { 1187 const U64 minSrcSize = 513; /* (1<<9) + 1 */ 1188 const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); 1189 assert(ZSTD_checkCParams(cPar)==0); 1190 1191 if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) 1192 srcSize = minSrcSize; 1193 1194 switch (mode) { 1195 case ZSTD_cpm_noAttachDict: 1196 case ZSTD_cpm_unknown: 1197 case ZSTD_cpm_createCDict: 1198 break; 1199 case ZSTD_cpm_attachDict: 1200 dictSize = 0; 1201 break; 1202 default: 1203 assert(0); 1204 break; 1205 } 1206 1207 /* resize windowLog if input is small enough, to use less memory */ 1208 if ( (srcSize < maxWindowResize) 1209 && (dictSize < maxWindowResize) ) { 1210 U32 const tSize = (U32)(srcSize + dictSize); 1211 static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; 1212 U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : 1213 ZSTD_highbit32(tSize-1) + 1; 1214 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; 1215 } 1216 { U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); 1217 U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); 1218 if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; 1219 if (cycleLog > dictAndWindowLog) 1220 cPar.chainLog -= (cycleLog - dictAndWindowLog); 1221 } 1222 1223 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) 1224 cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ 1225 1226 return cPar; 1227 } 1228 1229 ZSTD_compressionParameters 1230 ZSTD_adjustCParams(ZSTD_compressionParameters cPar, 1231 unsigned long long srcSize, 1232 size_t dictSize) 1233 { 1234 cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ 1235 if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; 1236 return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown); 1237 } 1238 1239 static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); 1240 static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); 1241 1242 static void ZSTD_overrideCParams( 1243 ZSTD_compressionParameters* cParams, 1244 const ZSTD_compressionParameters* overrides) 1245 { 1246 if (overrides->windowLog) cParams->windowLog = overrides->windowLog; 1247 if (overrides->hashLog) cParams->hashLog = overrides->hashLog; 1248 if (overrides->chainLog) cParams->chainLog = overrides->chainLog; 1249 if (overrides->searchLog) cParams->searchLog = overrides->searchLog; 1250 if (overrides->minMatch) cParams->minMatch = overrides->minMatch; 1251 if (overrides->targetLength) cParams->targetLength = overrides->targetLength; 1252 if (overrides->strategy) cParams->strategy = overrides->strategy; 1253 } 1254 1255 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( 1256 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) 1257 { 1258 ZSTD_compressionParameters cParams; 1259 if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { 1260 srcSizeHint = CCtxParams->srcSizeHint; 1261 } 1262 cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); 1263 if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; 1264 ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); 1265 assert(!ZSTD_checkCParams(cParams)); 1266 /* srcSizeHint == 0 means 0 */ 1267 return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode); 1268 } 1269 1270 static size_t 1271 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, 1272 const U32 forCCtx) 1273 { 1274 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); 1275 size_t const hSize = ((size_t)1) << cParams->hashLog; 1276 U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; 1277 size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; 1278 /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't 1279 * surrounded by redzones in ASAN. */ 1280 size_t const tableSpace = chainSize * sizeof(U32) 1281 + hSize * sizeof(U32) 1282 + h3Size * sizeof(U32); 1283 size_t const optPotentialSpace = 1284 ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) 1285 + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) 1286 + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) 1287 + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32)) 1288 + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)) 1289 + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); 1290 size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) 1291 ? optPotentialSpace 1292 : 0; 1293 DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", 1294 (U32)chainSize, (U32)hSize, (U32)h3Size); 1295 return tableSpace + optSpace; 1296 } 1297 1298 static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1299 const ZSTD_compressionParameters* cParams, 1300 const ldmParams_t* ldmParams, 1301 const int isStatic, 1302 const size_t buffInSize, 1303 const size_t buffOutSize, 1304 const U64 pledgedSrcSize) 1305 { 1306 size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize)); 1307 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 1308 U32 const divider = (cParams->minMatch==3) ? 3 : 4; 1309 size_t const maxNbSeq = blockSize / divider; 1310 size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) 1311 + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) 1312 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); 1313 size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); 1314 size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); 1315 size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1); 1316 1317 size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); 1318 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); 1319 size_t const ldmSeqSpace = ldmParams->enableLdm ? 1320 ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; 1321 1322 1323 size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) 1324 + ZSTD_cwksp_alloc_size(buffOutSize); 1325 1326 size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; 1327 1328 size_t const neededSpace = 1329 cctxSpace + 1330 entropySpace + 1331 blockStateSpace + 1332 ldmSpace + 1333 ldmSeqSpace + 1334 matchStateSize + 1335 tokenSpace + 1336 bufferSpace; 1337 1338 DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); 1339 return neededSpace; 1340 } 1341 1342 size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) 1343 { 1344 ZSTD_compressionParameters const cParams = 1345 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); 1346 1347 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); 1348 /* estimateCCtxSize is for one-shot compression. So no buffers should 1349 * be needed. However, we still allocate two 0-sized buffers, which can 1350 * take space under ASAN. */ 1351 return ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1352 &cParams, ¶ms->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); 1353 } 1354 1355 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) 1356 { 1357 ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); 1358 return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); 1359 } 1360 1361 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) 1362 { 1363 ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); 1364 return ZSTD_estimateCCtxSize_usingCParams(cParams); 1365 } 1366 1367 size_t ZSTD_estimateCCtxSize(int compressionLevel) 1368 { 1369 int level; 1370 size_t memBudget = 0; 1371 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { 1372 size_t const newMB = ZSTD_estimateCCtxSize_internal(level); 1373 if (newMB > memBudget) memBudget = newMB; 1374 } 1375 return memBudget; 1376 } 1377 1378 size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) 1379 { 1380 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); 1381 { ZSTD_compressionParameters const cParams = 1382 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); 1383 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); 1384 size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) 1385 ? ((size_t)1 << cParams.windowLog) + blockSize 1386 : 0; 1387 size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) 1388 ? ZSTD_compressBound(blockSize) + 1 1389 : 0; 1390 1391 return ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1392 &cParams, ¶ms->ldmParams, 1, inBuffSize, outBuffSize, 1393 ZSTD_CONTENTSIZE_UNKNOWN); 1394 } 1395 } 1396 1397 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) 1398 { 1399 ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); 1400 return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); 1401 } 1402 1403 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) 1404 { 1405 ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); 1406 return ZSTD_estimateCStreamSize_usingCParams(cParams); 1407 } 1408 1409 size_t ZSTD_estimateCStreamSize(int compressionLevel) 1410 { 1411 int level; 1412 size_t memBudget = 0; 1413 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { 1414 size_t const newMB = ZSTD_estimateCStreamSize_internal(level); 1415 if (newMB > memBudget) memBudget = newMB; 1416 } 1417 return memBudget; 1418 } 1419 1420 /* ZSTD_getFrameProgression(): 1421 * tells how much data has been consumed (input) and produced (output) for current frame. 1422 * able to count progression inside worker threads (non-blocking mode). 1423 */ 1424 ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) 1425 { 1426 #ifdef ZSTD_MULTITHREAD 1427 if (cctx->appliedParams.nbWorkers > 0) { 1428 return ZSTDMT_getFrameProgression(cctx->mtctx); 1429 } 1430 #endif 1431 { ZSTD_frameProgression fp; 1432 size_t const buffered = (cctx->inBuff == NULL) ? 0 : 1433 cctx->inBuffPos - cctx->inToCompress; 1434 if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); 1435 assert(buffered <= ZSTD_BLOCKSIZE_MAX); 1436 fp.ingested = cctx->consumedSrcSize + buffered; 1437 fp.consumed = cctx->consumedSrcSize; 1438 fp.produced = cctx->producedCSize; 1439 fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ 1440 fp.currentJobID = 0; 1441 fp.nbActiveWorkers = 0; 1442 return fp; 1443 } } 1444 1445 /*! ZSTD_toFlushNow() 1446 * Only useful for multithreading scenarios currently (nbWorkers >= 1). 1447 */ 1448 size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) 1449 { 1450 #ifdef ZSTD_MULTITHREAD 1451 if (cctx->appliedParams.nbWorkers > 0) { 1452 return ZSTDMT_toFlushNow(cctx->mtctx); 1453 } 1454 #endif 1455 (void)cctx; 1456 return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ 1457 } 1458 1459 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, 1460 ZSTD_compressionParameters cParams2) 1461 { 1462 (void)cParams1; 1463 (void)cParams2; 1464 assert(cParams1.windowLog == cParams2.windowLog); 1465 assert(cParams1.chainLog == cParams2.chainLog); 1466 assert(cParams1.hashLog == cParams2.hashLog); 1467 assert(cParams1.searchLog == cParams2.searchLog); 1468 assert(cParams1.minMatch == cParams2.minMatch); 1469 assert(cParams1.targetLength == cParams2.targetLength); 1470 assert(cParams1.strategy == cParams2.strategy); 1471 } 1472 1473 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) 1474 { 1475 int i; 1476 for (i = 0; i < ZSTD_REP_NUM; ++i) 1477 bs->rep[i] = repStartValue[i]; 1478 bs->entropy.huf.repeatMode = HUF_repeat_none; 1479 bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; 1480 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; 1481 bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; 1482 } 1483 1484 /*! ZSTD_invalidateMatchState() 1485 * Invalidate all the matches in the match finder tables. 1486 * Requires nextSrc and base to be set (can be NULL). 1487 */ 1488 static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) 1489 { 1490 ZSTD_window_clear(&ms->window); 1491 1492 ms->nextToUpdate = ms->window.dictLimit; 1493 ms->loadedDictEnd = 0; 1494 ms->opt.litLengthSum = 0; /* force reset of btopt stats */ 1495 ms->dictMatchState = NULL; 1496 } 1497 1498 /** 1499 * Controls, for this matchState reset, whether the tables need to be cleared / 1500 * prepared for the coming compression (ZSTDcrp_makeClean), or whether the 1501 * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a 1502 * subsequent operation will overwrite the table space anyways (e.g., copying 1503 * the matchState contents in from a CDict). 1504 */ 1505 typedef enum { 1506 ZSTDcrp_makeClean, 1507 ZSTDcrp_leaveDirty 1508 } ZSTD_compResetPolicy_e; 1509 1510 /** 1511 * Controls, for this matchState reset, whether indexing can continue where it 1512 * left off (ZSTDirp_continue), or whether it needs to be restarted from zero 1513 * (ZSTDirp_reset). 1514 */ 1515 typedef enum { 1516 ZSTDirp_continue, 1517 ZSTDirp_reset 1518 } ZSTD_indexResetPolicy_e; 1519 1520 typedef enum { 1521 ZSTD_resetTarget_CDict, 1522 ZSTD_resetTarget_CCtx 1523 } ZSTD_resetTarget_e; 1524 1525 static size_t 1526 ZSTD_reset_matchState(ZSTD_matchState_t* ms, 1527 ZSTD_cwksp* ws, 1528 const ZSTD_compressionParameters* cParams, 1529 const ZSTD_compResetPolicy_e crp, 1530 const ZSTD_indexResetPolicy_e forceResetIndex, 1531 const ZSTD_resetTarget_e forWho) 1532 { 1533 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); 1534 size_t const hSize = ((size_t)1) << cParams->hashLog; 1535 U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; 1536 size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; 1537 1538 DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); 1539 if (forceResetIndex == ZSTDirp_reset) { 1540 ZSTD_window_init(&ms->window); 1541 ZSTD_cwksp_mark_tables_dirty(ws); 1542 } 1543 1544 ms->hashLog3 = hashLog3; 1545 1546 ZSTD_invalidateMatchState(ms); 1547 1548 assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ 1549 1550 ZSTD_cwksp_clear_tables(ws); 1551 1552 DEBUGLOG(5, "reserving table space"); 1553 /* table Space */ 1554 ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); 1555 ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); 1556 ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); 1557 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, 1558 "failed a workspace allocation in ZSTD_reset_matchState"); 1559 1560 DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); 1561 if (crp!=ZSTDcrp_leaveDirty) { 1562 /* reset tables only */ 1563 ZSTD_cwksp_clean_tables(ws); 1564 } 1565 1566 /* opt parser space */ 1567 if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { 1568 DEBUGLOG(4, "reserving optimal parser space"); 1569 ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned)); 1570 ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); 1571 ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); 1572 ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); 1573 ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); 1574 ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); 1575 } 1576 1577 ms->cParams = *cParams; 1578 1579 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, 1580 "failed a workspace allocation in ZSTD_reset_matchState"); 1581 1582 return 0; 1583 } 1584 1585 /* ZSTD_indexTooCloseToMax() : 1586 * minor optimization : prefer memset() rather than reduceIndex() 1587 * which is measurably slow in some circumstances (reported for Visual Studio). 1588 * Works when re-using a context for a lot of smallish inputs : 1589 * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, 1590 * memset() will be triggered before reduceIndex(). 1591 */ 1592 #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) 1593 static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) 1594 { 1595 return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); 1596 } 1597 1598 /*! ZSTD_resetCCtx_internal() : 1599 note : `params` are assumed fully validated at this stage */ 1600 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, 1601 ZSTD_CCtx_params params, 1602 U64 const pledgedSrcSize, 1603 ZSTD_compResetPolicy_e const crp, 1604 ZSTD_buffered_policy_e const zbuff) 1605 { 1606 ZSTD_cwksp* const ws = &zc->workspace; 1607 DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", 1608 (U32)pledgedSrcSize, params.cParams.windowLog); 1609 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 1610 1611 zc->isFirstBlock = 1; 1612 1613 if (params.ldmParams.enableLdm) { 1614 /* Adjust long distance matching parameters */ 1615 ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); 1616 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); 1617 assert(params.ldmParams.hashRateLog < 32); 1618 zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); 1619 } 1620 1621 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); 1622 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 1623 U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; 1624 size_t const maxNbSeq = blockSize / divider; 1625 size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered) 1626 ? ZSTD_compressBound(blockSize) + 1 1627 : 0; 1628 size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered) 1629 ? windowSize + blockSize 1630 : 0; 1631 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); 1632 1633 int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window); 1634 ZSTD_indexResetPolicy_e needsIndexReset = 1635 (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset; 1636 1637 size_t const neededSpace = 1638 ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1639 ¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0, 1640 buffInSize, buffOutSize, pledgedSrcSize); 1641 FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); 1642 1643 if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); 1644 1645 /* Check if workspace is large enough, alloc a new one if needed */ 1646 { 1647 int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; 1648 int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); 1649 1650 DEBUGLOG(4, "Need %zu B workspace", neededSpace); 1651 DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); 1652 1653 if (workspaceTooSmall || workspaceWasteful) { 1654 DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", 1655 ZSTD_cwksp_sizeof(ws) >> 10, 1656 neededSpace >> 10); 1657 1658 RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); 1659 1660 needsIndexReset = ZSTDirp_reset; 1661 1662 ZSTD_cwksp_free(ws, zc->customMem); 1663 FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); 1664 1665 DEBUGLOG(5, "reserving object space"); 1666 /* Statically sized space. 1667 * entropyWorkspace never moves, 1668 * though prev/next block swap places */ 1669 assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); 1670 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); 1671 RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); 1672 zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); 1673 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); 1674 zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); 1675 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); 1676 } } 1677 1678 ZSTD_cwksp_clear(ws); 1679 1680 /* init params */ 1681 zc->appliedParams = params; 1682 zc->blockState.matchState.cParams = params.cParams; 1683 zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; 1684 zc->consumedSrcSize = 0; 1685 zc->producedCSize = 0; 1686 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) 1687 zc->appliedParams.fParams.contentSizeFlag = 0; 1688 DEBUGLOG(4, "pledged content size : %u ; flag : %u", 1689 (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); 1690 zc->blockSize = blockSize; 1691 1692 XXH64_reset(&zc->xxhState, 0); 1693 zc->stage = ZSTDcs_init; 1694 zc->dictID = 0; 1695 1696 ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); 1697 1698 /* ZSTD_wildcopy() is used to copy into the literals buffer, 1699 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. 1700 */ 1701 zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); 1702 zc->seqStore.maxNbLit = blockSize; 1703 1704 /* buffers */ 1705 zc->bufferedPolicy = zbuff; 1706 zc->inBuffSize = buffInSize; 1707 zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); 1708 zc->outBuffSize = buffOutSize; 1709 zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); 1710 1711 /* ldm bucketOffsets table */ 1712 if (params.ldmParams.enableLdm) { 1713 /* TODO: avoid memset? */ 1714 size_t const ldmBucketSize = 1715 ((size_t)1) << (params.ldmParams.hashLog - 1716 params.ldmParams.bucketSizeLog); 1717 zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize); 1718 ZSTD_memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); 1719 } 1720 1721 /* sequences storage */ 1722 ZSTD_referenceExternalSequences(zc, NULL, 0); 1723 zc->seqStore.maxNbSeq = maxNbSeq; 1724 zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 1725 zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 1726 zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 1727 zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); 1728 1729 FORWARD_IF_ERROR(ZSTD_reset_matchState( 1730 &zc->blockState.matchState, 1731 ws, 1732 ¶ms.cParams, 1733 crp, 1734 needsIndexReset, 1735 ZSTD_resetTarget_CCtx), ""); 1736 1737 /* ldm hash table */ 1738 if (params.ldmParams.enableLdm) { 1739 /* TODO: avoid memset? */ 1740 size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; 1741 zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); 1742 ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); 1743 zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); 1744 zc->maxNbLdmSequences = maxNbLdmSeq; 1745 1746 ZSTD_window_init(&zc->ldmState.window); 1747 ZSTD_window_clear(&zc->ldmState.window); 1748 zc->ldmState.loadedDictEnd = 0; 1749 } 1750 1751 /* Due to alignment, when reusing a workspace, we can actually consume 1752 * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h 1753 */ 1754 assert(ZSTD_cwksp_used(ws) >= neededSpace && 1755 ZSTD_cwksp_used(ws) <= neededSpace + 3); 1756 1757 DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); 1758 zc->initialized = 1; 1759 1760 return 0; 1761 } 1762 } 1763 1764 /* ZSTD_invalidateRepCodes() : 1765 * ensures next compression will not use repcodes from previous block. 1766 * Note : only works with regular variant; 1767 * do not use with extDict variant ! */ 1768 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { 1769 int i; 1770 for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0; 1771 assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); 1772 } 1773 1774 /* These are the approximate sizes for each strategy past which copying the 1775 * dictionary tables into the working context is faster than using them 1776 * in-place. 1777 */ 1778 static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { 1779 8 KB, /* unused */ 1780 8 KB, /* ZSTD_fast */ 1781 16 KB, /* ZSTD_dfast */ 1782 32 KB, /* ZSTD_greedy */ 1783 32 KB, /* ZSTD_lazy */ 1784 32 KB, /* ZSTD_lazy2 */ 1785 32 KB, /* ZSTD_btlazy2 */ 1786 32 KB, /* ZSTD_btopt */ 1787 8 KB, /* ZSTD_btultra */ 1788 8 KB /* ZSTD_btultra2 */ 1789 }; 1790 1791 static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, 1792 const ZSTD_CCtx_params* params, 1793 U64 pledgedSrcSize) 1794 { 1795 size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; 1796 int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch; 1797 return dedicatedDictSearch 1798 || ( ( pledgedSrcSize <= cutoff 1799 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN 1800 || params->attachDictPref == ZSTD_dictForceAttach ) 1801 && params->attachDictPref != ZSTD_dictForceCopy 1802 && !params->forceWindow ); /* dictMatchState isn't correctly 1803 * handled in _enforceMaxDist */ 1804 } 1805 1806 static size_t 1807 ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, 1808 const ZSTD_CDict* cdict, 1809 ZSTD_CCtx_params params, 1810 U64 pledgedSrcSize, 1811 ZSTD_buffered_policy_e zbuff) 1812 { 1813 { 1814 ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; 1815 unsigned const windowLog = params.cParams.windowLog; 1816 assert(windowLog != 0); 1817 /* Resize working context table params for input only, since the dict 1818 * has its own tables. */ 1819 /* pledgedSrcSize == 0 means 0! */ 1820 1821 if (cdict->matchState.dedicatedDictSearch) { 1822 ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams); 1823 } 1824 1825 params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, 1826 cdict->dictContentSize, ZSTD_cpm_attachDict); 1827 params.cParams.windowLog = windowLog; 1828 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, 1829 ZSTDcrp_makeClean, zbuff), ""); 1830 assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); 1831 } 1832 1833 { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc 1834 - cdict->matchState.window.base); 1835 const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; 1836 if (cdictLen == 0) { 1837 /* don't even attach dictionaries with no contents */ 1838 DEBUGLOG(4, "skipping attaching empty dictionary"); 1839 } else { 1840 DEBUGLOG(4, "attaching dictionary into context"); 1841 cctx->blockState.matchState.dictMatchState = &cdict->matchState; 1842 1843 /* prep working match state so dict matches never have negative indices 1844 * when they are translated to the working context's index space. */ 1845 if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { 1846 cctx->blockState.matchState.window.nextSrc = 1847 cctx->blockState.matchState.window.base + cdictEnd; 1848 ZSTD_window_clear(&cctx->blockState.matchState.window); 1849 } 1850 /* loadedDictEnd is expressed within the referential of the active context */ 1851 cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; 1852 } } 1853 1854 cctx->dictID = cdict->dictID; 1855 1856 /* copy block state */ 1857 ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); 1858 1859 return 0; 1860 } 1861 1862 static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, 1863 const ZSTD_CDict* cdict, 1864 ZSTD_CCtx_params params, 1865 U64 pledgedSrcSize, 1866 ZSTD_buffered_policy_e zbuff) 1867 { 1868 const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; 1869 1870 assert(!cdict->matchState.dedicatedDictSearch); 1871 1872 DEBUGLOG(4, "copying dictionary into context"); 1873 1874 { unsigned const windowLog = params.cParams.windowLog; 1875 assert(windowLog != 0); 1876 /* Copy only compression parameters related to tables. */ 1877 params.cParams = *cdict_cParams; 1878 params.cParams.windowLog = windowLog; 1879 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, 1880 ZSTDcrp_leaveDirty, zbuff), ""); 1881 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); 1882 assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); 1883 assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); 1884 } 1885 1886 ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); 1887 1888 /* copy tables */ 1889 { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); 1890 size_t const hSize = (size_t)1 << cdict_cParams->hashLog; 1891 1892 ZSTD_memcpy(cctx->blockState.matchState.hashTable, 1893 cdict->matchState.hashTable, 1894 hSize * sizeof(U32)); 1895 ZSTD_memcpy(cctx->blockState.matchState.chainTable, 1896 cdict->matchState.chainTable, 1897 chainSize * sizeof(U32)); 1898 } 1899 1900 /* Zero the hashTable3, since the cdict never fills it */ 1901 { int const h3log = cctx->blockState.matchState.hashLog3; 1902 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; 1903 assert(cdict->matchState.hashLog3 == 0); 1904 ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); 1905 } 1906 1907 ZSTD_cwksp_mark_tables_clean(&cctx->workspace); 1908 1909 /* copy dictionary offsets */ 1910 { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; 1911 ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; 1912 dstMatchState->window = srcMatchState->window; 1913 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 1914 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; 1915 } 1916 1917 cctx->dictID = cdict->dictID; 1918 1919 /* copy block state */ 1920 ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); 1921 1922 return 0; 1923 } 1924 1925 /* We have a choice between copying the dictionary context into the working 1926 * context, or referencing the dictionary context from the working context 1927 * in-place. We decide here which strategy to use. */ 1928 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, 1929 const ZSTD_CDict* cdict, 1930 const ZSTD_CCtx_params* params, 1931 U64 pledgedSrcSize, 1932 ZSTD_buffered_policy_e zbuff) 1933 { 1934 1935 DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", 1936 (unsigned)pledgedSrcSize); 1937 1938 if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { 1939 return ZSTD_resetCCtx_byAttachingCDict( 1940 cctx, cdict, *params, pledgedSrcSize, zbuff); 1941 } else { 1942 return ZSTD_resetCCtx_byCopyingCDict( 1943 cctx, cdict, *params, pledgedSrcSize, zbuff); 1944 } 1945 } 1946 1947 /*! ZSTD_copyCCtx_internal() : 1948 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. 1949 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). 1950 * The "context", in this case, refers to the hash and chain tables, 1951 * entropy tables, and dictionary references. 1952 * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. 1953 * @return : 0, or an error code */ 1954 static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, 1955 const ZSTD_CCtx* srcCCtx, 1956 ZSTD_frameParameters fParams, 1957 U64 pledgedSrcSize, 1958 ZSTD_buffered_policy_e zbuff) 1959 { 1960 DEBUGLOG(5, "ZSTD_copyCCtx_internal"); 1961 RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, 1962 "Can't copy a ctx that's not in init stage."); 1963 1964 ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); 1965 { ZSTD_CCtx_params params = dstCCtx->requestedParams; 1966 /* Copy only compression parameters related to tables. */ 1967 params.cParams = srcCCtx->appliedParams.cParams; 1968 params.fParams = fParams; 1969 ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, 1970 ZSTDcrp_leaveDirty, zbuff); 1971 assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); 1972 assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); 1973 assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); 1974 assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); 1975 assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); 1976 } 1977 1978 ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); 1979 1980 /* copy tables */ 1981 { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); 1982 size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; 1983 int const h3log = srcCCtx->blockState.matchState.hashLog3; 1984 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; 1985 1986 ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable, 1987 srcCCtx->blockState.matchState.hashTable, 1988 hSize * sizeof(U32)); 1989 ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable, 1990 srcCCtx->blockState.matchState.chainTable, 1991 chainSize * sizeof(U32)); 1992 ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3, 1993 srcCCtx->blockState.matchState.hashTable3, 1994 h3Size * sizeof(U32)); 1995 } 1996 1997 ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); 1998 1999 /* copy dictionary offsets */ 2000 { 2001 const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; 2002 ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; 2003 dstMatchState->window = srcMatchState->window; 2004 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 2005 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; 2006 } 2007 dstCCtx->dictID = srcCCtx->dictID; 2008 2009 /* copy block state */ 2010 ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); 2011 2012 return 0; 2013 } 2014 2015 /*! ZSTD_copyCCtx() : 2016 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. 2017 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). 2018 * pledgedSrcSize==0 means "unknown". 2019 * @return : 0, or an error code */ 2020 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) 2021 { 2022 ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 2023 ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy; 2024 ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); 2025 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; 2026 fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); 2027 2028 return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, 2029 fParams, pledgedSrcSize, 2030 zbuff); 2031 } 2032 2033 2034 #define ZSTD_ROWSIZE 16 2035 /*! ZSTD_reduceTable() : 2036 * reduce table indexes by `reducerValue`, or squash to zero. 2037 * PreserveMark preserves "unsorted mark" for btlazy2 strategy. 2038 * It must be set to a clear 0/1 value, to remove branch during inlining. 2039 * Presume table size is a multiple of ZSTD_ROWSIZE 2040 * to help auto-vectorization */ 2041 FORCE_INLINE_TEMPLATE void 2042 ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) 2043 { 2044 int const nbRows = (int)size / ZSTD_ROWSIZE; 2045 int cellNb = 0; 2046 int rowNb; 2047 assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ 2048 assert(size < (1U<<31)); /* can be casted to int */ 2049 2050 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) 2051 /* To validate that the table re-use logic is sound, and that we don't 2052 * access table space that we haven't cleaned, we re-"poison" the table 2053 * space every time we mark it dirty. 2054 * 2055 * This function however is intended to operate on those dirty tables and 2056 * re-clean them. So when this function is used correctly, we can unpoison 2057 * the memory it operated on. This introduces a blind spot though, since 2058 * if we now try to operate on __actually__ poisoned memory, we will not 2059 * detect that. */ 2060 __msan_unpoison(table, size * sizeof(U32)); 2061 #endif 2062 2063 for (rowNb=0 ; rowNb < nbRows ; rowNb++) { 2064 int column; 2065 for (column=0; column<ZSTD_ROWSIZE; column++) { 2066 if (preserveMark) { 2067 U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0; 2068 table[cellNb] += adder; 2069 } 2070 if (table[cellNb] < reducerValue) table[cellNb] = 0; 2071 else table[cellNb] -= reducerValue; 2072 cellNb++; 2073 } } 2074 } 2075 2076 static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue) 2077 { 2078 ZSTD_reduceTable_internal(table, size, reducerValue, 0); 2079 } 2080 2081 static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue) 2082 { 2083 ZSTD_reduceTable_internal(table, size, reducerValue, 1); 2084 } 2085 2086 /*! ZSTD_reduceIndex() : 2087 * rescale all indexes to avoid future overflow (indexes are U32) */ 2088 static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) 2089 { 2090 { U32 const hSize = (U32)1 << params->cParams.hashLog; 2091 ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); 2092 } 2093 2094 if (params->cParams.strategy != ZSTD_fast) { 2095 U32 const chainSize = (U32)1 << params->cParams.chainLog; 2096 if (params->cParams.strategy == ZSTD_btlazy2) 2097 ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); 2098 else 2099 ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); 2100 } 2101 2102 if (ms->hashLog3) { 2103 U32 const h3Size = (U32)1 << ms->hashLog3; 2104 ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); 2105 } 2106 } 2107 2108 2109 /*-******************************************************* 2110 * Block entropic compression 2111 *********************************************************/ 2112 2113 /* See doc/zstd_compression_format.md for detailed format description */ 2114 2115 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) 2116 { 2117 const seqDef* const sequences = seqStorePtr->sequencesStart; 2118 BYTE* const llCodeTable = seqStorePtr->llCode; 2119 BYTE* const ofCodeTable = seqStorePtr->ofCode; 2120 BYTE* const mlCodeTable = seqStorePtr->mlCode; 2121 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 2122 U32 u; 2123 assert(nbSeq <= seqStorePtr->maxNbSeq); 2124 for (u=0; u<nbSeq; u++) { 2125 U32 const llv = sequences[u].litLength; 2126 U32 const mlv = sequences[u].matchLength; 2127 llCodeTable[u] = (BYTE)ZSTD_LLcode(llv); 2128 ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset); 2129 mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv); 2130 } 2131 if (seqStorePtr->longLengthID==1) 2132 llCodeTable[seqStorePtr->longLengthPos] = MaxLL; 2133 if (seqStorePtr->longLengthID==2) 2134 mlCodeTable[seqStorePtr->longLengthPos] = MaxML; 2135 } 2136 2137 /* ZSTD_useTargetCBlockSize(): 2138 * Returns if target compressed block size param is being used. 2139 * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. 2140 * Returns 1 if true, 0 otherwise. */ 2141 static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) 2142 { 2143 DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); 2144 return (cctxParams->targetCBlockSize != 0); 2145 } 2146 2147 /* ZSTD_entropyCompressSequences_internal(): 2148 * actually compresses both literals and sequences */ 2149 MEM_STATIC size_t 2150 ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, 2151 const ZSTD_entropyCTables_t* prevEntropy, 2152 ZSTD_entropyCTables_t* nextEntropy, 2153 const ZSTD_CCtx_params* cctxParams, 2154 void* dst, size_t dstCapacity, 2155 void* entropyWorkspace, size_t entropyWkspSize, 2156 const int bmi2) 2157 { 2158 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; 2159 ZSTD_strategy const strategy = cctxParams->cParams.strategy; 2160 unsigned* count = (unsigned*)entropyWorkspace; 2161 FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; 2162 FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; 2163 FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; 2164 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ 2165 const seqDef* const sequences = seqStorePtr->sequencesStart; 2166 const BYTE* const ofCodeTable = seqStorePtr->ofCode; 2167 const BYTE* const llCodeTable = seqStorePtr->llCode; 2168 const BYTE* const mlCodeTable = seqStorePtr->mlCode; 2169 BYTE* const ostart = (BYTE*)dst; 2170 BYTE* const oend = ostart + dstCapacity; 2171 BYTE* op = ostart; 2172 size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 2173 BYTE* seqHead; 2174 BYTE* lastNCount = NULL; 2175 2176 entropyWorkspace = count + (MaxSeq + 1); 2177 entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); 2178 2179 DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq); 2180 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); 2181 assert(entropyWkspSize >= HUF_WORKSPACE_SIZE); 2182 2183 /* Compress literals */ 2184 { const BYTE* const literals = seqStorePtr->litStart; 2185 size_t const litSize = (size_t)(seqStorePtr->lit - literals); 2186 size_t const cSize = ZSTD_compressLiterals( 2187 &prevEntropy->huf, &nextEntropy->huf, 2188 cctxParams->cParams.strategy, 2189 ZSTD_disableLiteralsCompression(cctxParams), 2190 op, dstCapacity, 2191 literals, litSize, 2192 entropyWorkspace, entropyWkspSize, 2193 bmi2); 2194 FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); 2195 assert(cSize <= dstCapacity); 2196 op += cSize; 2197 } 2198 2199 /* Sequences Header */ 2200 RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, 2201 dstSize_tooSmall, "Can't fit seq hdr in output buf!"); 2202 if (nbSeq < 128) { 2203 *op++ = (BYTE)nbSeq; 2204 } else if (nbSeq < LONGNBSEQ) { 2205 op[0] = (BYTE)((nbSeq>>8) + 0x80); 2206 op[1] = (BYTE)nbSeq; 2207 op+=2; 2208 } else { 2209 op[0]=0xFF; 2210 MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); 2211 op+=3; 2212 } 2213 assert(op <= oend); 2214 if (nbSeq==0) { 2215 /* Copy the old tables over as if we repeated them */ 2216 ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); 2217 return (size_t)(op - ostart); 2218 } 2219 2220 /* seqHead : flags for FSE encoding type */ 2221 seqHead = op++; 2222 assert(op <= oend); 2223 2224 /* convert length/distances into codes */ 2225 ZSTD_seqToCodes(seqStorePtr); 2226 /* build CTable for Literal Lengths */ 2227 { unsigned max = MaxLL; 2228 size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ 2229 DEBUGLOG(5, "Building LL table"); 2230 nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; 2231 LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, 2232 count, max, mostFrequent, nbSeq, 2233 LLFSELog, prevEntropy->fse.litlengthCTable, 2234 LL_defaultNorm, LL_defaultNormLog, 2235 ZSTD_defaultAllowed, strategy); 2236 assert(set_basic < set_compressed && set_rle < set_compressed); 2237 assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2238 { size_t const countSize = ZSTD_buildCTable( 2239 op, (size_t)(oend - op), 2240 CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, 2241 count, max, llCodeTable, nbSeq, 2242 LL_defaultNorm, LL_defaultNormLog, MaxLL, 2243 prevEntropy->fse.litlengthCTable, 2244 sizeof(prevEntropy->fse.litlengthCTable), 2245 entropyWorkspace, entropyWkspSize); 2246 FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); 2247 if (LLtype == set_compressed) 2248 lastNCount = op; 2249 op += countSize; 2250 assert(op <= oend); 2251 } } 2252 /* build CTable for Offsets */ 2253 { unsigned max = MaxOff; 2254 size_t const mostFrequent = HIST_countFast_wksp( 2255 count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ 2256 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ 2257 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; 2258 DEBUGLOG(5, "Building OF table"); 2259 nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; 2260 Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, 2261 count, max, mostFrequent, nbSeq, 2262 OffFSELog, prevEntropy->fse.offcodeCTable, 2263 OF_defaultNorm, OF_defaultNormLog, 2264 defaultPolicy, strategy); 2265 assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2266 { size_t const countSize = ZSTD_buildCTable( 2267 op, (size_t)(oend - op), 2268 CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, 2269 count, max, ofCodeTable, nbSeq, 2270 OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, 2271 prevEntropy->fse.offcodeCTable, 2272 sizeof(prevEntropy->fse.offcodeCTable), 2273 entropyWorkspace, entropyWkspSize); 2274 FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); 2275 if (Offtype == set_compressed) 2276 lastNCount = op; 2277 op += countSize; 2278 assert(op <= oend); 2279 } } 2280 /* build CTable for MatchLengths */ 2281 { unsigned max = MaxML; 2282 size_t const mostFrequent = HIST_countFast_wksp( 2283 count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ 2284 DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); 2285 nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; 2286 MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, 2287 count, max, mostFrequent, nbSeq, 2288 MLFSELog, prevEntropy->fse.matchlengthCTable, 2289 ML_defaultNorm, ML_defaultNormLog, 2290 ZSTD_defaultAllowed, strategy); 2291 assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2292 { size_t const countSize = ZSTD_buildCTable( 2293 op, (size_t)(oend - op), 2294 CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, 2295 count, max, mlCodeTable, nbSeq, 2296 ML_defaultNorm, ML_defaultNormLog, MaxML, 2297 prevEntropy->fse.matchlengthCTable, 2298 sizeof(prevEntropy->fse.matchlengthCTable), 2299 entropyWorkspace, entropyWkspSize); 2300 FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); 2301 if (MLtype == set_compressed) 2302 lastNCount = op; 2303 op += countSize; 2304 assert(op <= oend); 2305 } } 2306 2307 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); 2308 2309 { size_t const bitstreamSize = ZSTD_encodeSequences( 2310 op, (size_t)(oend - op), 2311 CTable_MatchLength, mlCodeTable, 2312 CTable_OffsetBits, ofCodeTable, 2313 CTable_LitLength, llCodeTable, 2314 sequences, nbSeq, 2315 longOffsets, bmi2); 2316 FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); 2317 op += bitstreamSize; 2318 assert(op <= oend); 2319 /* zstd versions <= 1.3.4 mistakenly report corruption when 2320 * FSE_readNCount() receives a buffer < 4 bytes. 2321 * Fixed by https://github.com/facebook/zstd/pull/1146. 2322 * This can happen when the last set_compressed table present is 2 2323 * bytes and the bitstream is only one byte. 2324 * In this exceedingly rare case, we will simply emit an uncompressed 2325 * block, since it isn't worth optimizing. 2326 */ 2327 if (lastNCount && (op - lastNCount) < 4) { 2328 /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ 2329 assert(op - lastNCount == 3); 2330 DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " 2331 "emitting an uncompressed block."); 2332 return 0; 2333 } 2334 } 2335 2336 DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); 2337 return (size_t)(op - ostart); 2338 } 2339 2340 MEM_STATIC size_t 2341 ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, 2342 const ZSTD_entropyCTables_t* prevEntropy, 2343 ZSTD_entropyCTables_t* nextEntropy, 2344 const ZSTD_CCtx_params* cctxParams, 2345 void* dst, size_t dstCapacity, 2346 size_t srcSize, 2347 void* entropyWorkspace, size_t entropyWkspSize, 2348 int bmi2) 2349 { 2350 size_t const cSize = ZSTD_entropyCompressSequences_internal( 2351 seqStorePtr, prevEntropy, nextEntropy, cctxParams, 2352 dst, dstCapacity, 2353 entropyWorkspace, entropyWkspSize, bmi2); 2354 if (cSize == 0) return 0; 2355 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. 2356 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. 2357 */ 2358 if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) 2359 return 0; /* block not compressed */ 2360 FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed"); 2361 2362 /* Check compressibility */ 2363 { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); 2364 if (cSize >= maxCSize) return 0; /* block not compressed */ 2365 } 2366 DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize); 2367 return cSize; 2368 } 2369 2370 /* ZSTD_selectBlockCompressor() : 2371 * Not static, but internal use only (used by long distance matcher) 2372 * assumption : strat is a valid strategy */ 2373 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) 2374 { 2375 static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { 2376 { ZSTD_compressBlock_fast /* default for 0 */, 2377 ZSTD_compressBlock_fast, 2378 ZSTD_compressBlock_doubleFast, 2379 ZSTD_compressBlock_greedy, 2380 ZSTD_compressBlock_lazy, 2381 ZSTD_compressBlock_lazy2, 2382 ZSTD_compressBlock_btlazy2, 2383 ZSTD_compressBlock_btopt, 2384 ZSTD_compressBlock_btultra, 2385 ZSTD_compressBlock_btultra2 }, 2386 { ZSTD_compressBlock_fast_extDict /* default for 0 */, 2387 ZSTD_compressBlock_fast_extDict, 2388 ZSTD_compressBlock_doubleFast_extDict, 2389 ZSTD_compressBlock_greedy_extDict, 2390 ZSTD_compressBlock_lazy_extDict, 2391 ZSTD_compressBlock_lazy2_extDict, 2392 ZSTD_compressBlock_btlazy2_extDict, 2393 ZSTD_compressBlock_btopt_extDict, 2394 ZSTD_compressBlock_btultra_extDict, 2395 ZSTD_compressBlock_btultra_extDict }, 2396 { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, 2397 ZSTD_compressBlock_fast_dictMatchState, 2398 ZSTD_compressBlock_doubleFast_dictMatchState, 2399 ZSTD_compressBlock_greedy_dictMatchState, 2400 ZSTD_compressBlock_lazy_dictMatchState, 2401 ZSTD_compressBlock_lazy2_dictMatchState, 2402 ZSTD_compressBlock_btlazy2_dictMatchState, 2403 ZSTD_compressBlock_btopt_dictMatchState, 2404 ZSTD_compressBlock_btultra_dictMatchState, 2405 ZSTD_compressBlock_btultra_dictMatchState }, 2406 { NULL /* default for 0 */, 2407 NULL, 2408 NULL, 2409 ZSTD_compressBlock_greedy_dedicatedDictSearch, 2410 ZSTD_compressBlock_lazy_dedicatedDictSearch, 2411 ZSTD_compressBlock_lazy2_dedicatedDictSearch, 2412 NULL, 2413 NULL, 2414 NULL, 2415 NULL } 2416 }; 2417 ZSTD_blockCompressor selectedCompressor; 2418 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); 2419 2420 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); 2421 selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; 2422 assert(selectedCompressor != NULL); 2423 return selectedCompressor; 2424 } 2425 2426 static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, 2427 const BYTE* anchor, size_t lastLLSize) 2428 { 2429 ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize); 2430 seqStorePtr->lit += lastLLSize; 2431 } 2432 2433 void ZSTD_resetSeqStore(seqStore_t* ssPtr) 2434 { 2435 ssPtr->lit = ssPtr->litStart; 2436 ssPtr->sequences = ssPtr->sequencesStart; 2437 ssPtr->longLengthID = 0; 2438 } 2439 2440 typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; 2441 2442 static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) 2443 { 2444 ZSTD_matchState_t* const ms = &zc->blockState.matchState; 2445 DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); 2446 assert(srcSize <= ZSTD_BLOCKSIZE_MAX); 2447 /* Assert that we have correctly flushed the ctx params into the ms's copy */ 2448 ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); 2449 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { 2450 if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { 2451 ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); 2452 } else { 2453 ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); 2454 } 2455 return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ 2456 } 2457 ZSTD_resetSeqStore(&(zc->seqStore)); 2458 /* required for optimal parser to read stats from dictionary */ 2459 ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; 2460 /* tell the optimal parser how we expect to compress literals */ 2461 ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; 2462 /* a gap between an attached dict and the current window is not safe, 2463 * they must remain adjacent, 2464 * and when that stops being the case, the dict must be unset */ 2465 assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); 2466 2467 /* limited update after a very long match */ 2468 { const BYTE* const base = ms->window.base; 2469 const BYTE* const istart = (const BYTE*)src; 2470 const U32 curr = (U32)(istart-base); 2471 if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ 2472 if (curr > ms->nextToUpdate + 384) 2473 ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384)); 2474 } 2475 2476 /* select and store sequences */ 2477 { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); 2478 size_t lastLLSize; 2479 { int i; 2480 for (i = 0; i < ZSTD_REP_NUM; ++i) 2481 zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; 2482 } 2483 if (zc->externSeqStore.pos < zc->externSeqStore.size) { 2484 assert(!zc->appliedParams.ldmParams.enableLdm); 2485 /* Updates ldmSeqStore.pos */ 2486 lastLLSize = 2487 ZSTD_ldm_blockCompress(&zc->externSeqStore, 2488 ms, &zc->seqStore, 2489 zc->blockState.nextCBlock->rep, 2490 src, srcSize); 2491 assert(zc->externSeqStore.pos <= zc->externSeqStore.size); 2492 } else if (zc->appliedParams.ldmParams.enableLdm) { 2493 rawSeqStore_t ldmSeqStore = kNullRawSeqStore; 2494 2495 ldmSeqStore.seq = zc->ldmSequences; 2496 ldmSeqStore.capacity = zc->maxNbLdmSequences; 2497 /* Updates ldmSeqStore.size */ 2498 FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, 2499 &zc->appliedParams.ldmParams, 2500 src, srcSize), ""); 2501 /* Updates ldmSeqStore.pos */ 2502 lastLLSize = 2503 ZSTD_ldm_blockCompress(&ldmSeqStore, 2504 ms, &zc->seqStore, 2505 zc->blockState.nextCBlock->rep, 2506 src, srcSize); 2507 assert(ldmSeqStore.pos == ldmSeqStore.size); 2508 } else { /* not long range mode */ 2509 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); 2510 ms->ldmSeqStore = NULL; 2511 lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); 2512 } 2513 { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; 2514 ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); 2515 } } 2516 return ZSTDbss_compress; 2517 } 2518 2519 static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) 2520 { 2521 const seqStore_t* seqStore = ZSTD_getSeqStore(zc); 2522 const seqDef* seqStoreSeqs = seqStore->sequencesStart; 2523 size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; 2524 size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); 2525 size_t literalsRead = 0; 2526 size_t lastLLSize; 2527 2528 ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; 2529 size_t i; 2530 repcodes_t updatedRepcodes; 2531 2532 assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); 2533 /* Ensure we have enough space for last literals "sequence" */ 2534 assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); 2535 ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); 2536 for (i = 0; i < seqStoreSeqSize; ++i) { 2537 U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM; 2538 outSeqs[i].litLength = seqStoreSeqs[i].litLength; 2539 outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH; 2540 outSeqs[i].rep = 0; 2541 2542 if (i == seqStore->longLengthPos) { 2543 if (seqStore->longLengthID == 1) { 2544 outSeqs[i].litLength += 0x10000; 2545 } else if (seqStore->longLengthID == 2) { 2546 outSeqs[i].matchLength += 0x10000; 2547 } 2548 } 2549 2550 if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) { 2551 /* Derive the correct offset corresponding to a repcode */ 2552 outSeqs[i].rep = seqStoreSeqs[i].offset; 2553 if (outSeqs[i].litLength != 0) { 2554 rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; 2555 } else { 2556 if (outSeqs[i].rep == 3) { 2557 rawOffset = updatedRepcodes.rep[0] - 1; 2558 } else { 2559 rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; 2560 } 2561 } 2562 } 2563 outSeqs[i].offset = rawOffset; 2564 /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode 2565 so we provide seqStoreSeqs[i].offset - 1 */ 2566 updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, 2567 seqStoreSeqs[i].offset - 1, 2568 seqStoreSeqs[i].litLength == 0); 2569 literalsRead += outSeqs[i].litLength; 2570 } 2571 /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. 2572 * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker 2573 * for the block boundary, according to the API. 2574 */ 2575 assert(seqStoreLiteralsSize >= literalsRead); 2576 lastLLSize = seqStoreLiteralsSize - literalsRead; 2577 outSeqs[i].litLength = (U32)lastLLSize; 2578 outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; 2579 seqStoreSeqSize++; 2580 zc->seqCollector.seqIndex += seqStoreSeqSize; 2581 } 2582 2583 size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, 2584 size_t outSeqsSize, const void* src, size_t srcSize) 2585 { 2586 const size_t dstCapacity = ZSTD_compressBound(srcSize); 2587 void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); 2588 SeqCollector seqCollector; 2589 2590 RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); 2591 2592 seqCollector.collectSequences = 1; 2593 seqCollector.seqStart = outSeqs; 2594 seqCollector.seqIndex = 0; 2595 seqCollector.maxSequences = outSeqsSize; 2596 zc->seqCollector = seqCollector; 2597 2598 ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); 2599 ZSTD_customFree(dst, ZSTD_defaultCMem); 2600 return zc->seqCollector.seqIndex; 2601 } 2602 2603 size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) { 2604 size_t in = 0; 2605 size_t out = 0; 2606 for (; in < seqsSize; ++in) { 2607 if (sequences[in].offset == 0 && sequences[in].matchLength == 0) { 2608 if (in != seqsSize - 1) { 2609 sequences[in+1].litLength += sequences[in].litLength; 2610 } 2611 } else { 2612 sequences[out] = sequences[in]; 2613 ++out; 2614 } 2615 } 2616 return out; 2617 } 2618 2619 /* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */ 2620 static int ZSTD_isRLE(const BYTE* src, size_t length) { 2621 const BYTE* ip = src; 2622 const BYTE value = ip[0]; 2623 const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL); 2624 const size_t unrollSize = sizeof(size_t) * 4; 2625 const size_t unrollMask = unrollSize - 1; 2626 const size_t prefixLength = length & unrollMask; 2627 size_t i; 2628 size_t u; 2629 if (length == 1) return 1; 2630 /* Check if prefix is RLE first before using unrolled loop */ 2631 if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { 2632 return 0; 2633 } 2634 for (i = prefixLength; i != length; i += unrollSize) { 2635 for (u = 0; u < unrollSize; u += sizeof(size_t)) { 2636 if (MEM_readST(ip + i + u) != valueST) { 2637 return 0; 2638 } 2639 } 2640 } 2641 return 1; 2642 } 2643 2644 /* Returns true if the given block may be RLE. 2645 * This is just a heuristic based on the compressibility. 2646 * It may return both false positives and false negatives. 2647 */ 2648 static int ZSTD_maybeRLE(seqStore_t const* seqStore) 2649 { 2650 size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); 2651 size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); 2652 2653 return nbSeqs < 4 && nbLits < 10; 2654 } 2655 2656 static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) 2657 { 2658 ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; 2659 zc->blockState.prevCBlock = zc->blockState.nextCBlock; 2660 zc->blockState.nextCBlock = tmp; 2661 } 2662 2663 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, 2664 void* dst, size_t dstCapacity, 2665 const void* src, size_t srcSize, U32 frame) 2666 { 2667 /* This the upper bound for the length of an rle block. 2668 * This isn't the actual upper bound. Finding the real threshold 2669 * needs further investigation. 2670 */ 2671 const U32 rleMaxLength = 25; 2672 size_t cSize; 2673 const BYTE* ip = (const BYTE*)src; 2674 BYTE* op = (BYTE*)dst; 2675 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", 2676 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, 2677 (unsigned)zc->blockState.matchState.nextToUpdate); 2678 2679 { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); 2680 FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); 2681 if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } 2682 } 2683 2684 if (zc->seqCollector.collectSequences) { 2685 ZSTD_copyBlockSequences(zc); 2686 ZSTD_confirmRepcodesAndEntropyTables(zc); 2687 return 0; 2688 } 2689 2690 /* encode sequences and literals */ 2691 cSize = ZSTD_entropyCompressSequences(&zc->seqStore, 2692 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, 2693 &zc->appliedParams, 2694 dst, dstCapacity, 2695 srcSize, 2696 zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, 2697 zc->bmi2); 2698 2699 if (zc->seqCollector.collectSequences) { 2700 ZSTD_copyBlockSequences(zc); 2701 return 0; 2702 } 2703 2704 2705 if (frame && 2706 /* We don't want to emit our first block as a RLE even if it qualifies because 2707 * doing so will cause the decoder (cli only) to throw a "should consume all input error." 2708 * This is only an issue for zstd <= v1.4.3 2709 */ 2710 !zc->isFirstBlock && 2711 cSize < rleMaxLength && 2712 ZSTD_isRLE(ip, srcSize)) 2713 { 2714 cSize = 1; 2715 op[0] = ip[0]; 2716 } 2717 2718 out: 2719 if (!ZSTD_isError(cSize) && cSize > 1) { 2720 ZSTD_confirmRepcodesAndEntropyTables(zc); 2721 } 2722 /* We check that dictionaries have offset codes available for the first 2723 * block. After the first block, the offcode table might not have large 2724 * enough codes to represent the offsets in the data. 2725 */ 2726 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 2727 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 2728 2729 return cSize; 2730 } 2731 2732 static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, 2733 void* dst, size_t dstCapacity, 2734 const void* src, size_t srcSize, 2735 const size_t bss, U32 lastBlock) 2736 { 2737 DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); 2738 if (bss == ZSTDbss_compress) { 2739 if (/* We don't want to emit our first block as a RLE even if it qualifies because 2740 * doing so will cause the decoder (cli only) to throw a "should consume all input error." 2741 * This is only an issue for zstd <= v1.4.3 2742 */ 2743 !zc->isFirstBlock && 2744 ZSTD_maybeRLE(&zc->seqStore) && 2745 ZSTD_isRLE((BYTE const*)src, srcSize)) 2746 { 2747 return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); 2748 } 2749 /* Attempt superblock compression. 2750 * 2751 * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the 2752 * standard ZSTD_compressBound(). This is a problem, because even if we have 2753 * space now, taking an extra byte now could cause us to run out of space later 2754 * and violate ZSTD_compressBound(). 2755 * 2756 * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. 2757 * 2758 * In order to respect ZSTD_compressBound() we must attempt to emit a raw 2759 * uncompressed block in these cases: 2760 * * cSize == 0: Return code for an uncompressed block. 2761 * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). 2762 * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of 2763 * output space. 2764 * * cSize >= blockBound(srcSize): We have expanded the block too much so 2765 * emit an uncompressed block. 2766 */ 2767 { 2768 size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); 2769 if (cSize != ERROR(dstSize_tooSmall)) { 2770 size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); 2771 FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); 2772 if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { 2773 ZSTD_confirmRepcodesAndEntropyTables(zc); 2774 return cSize; 2775 } 2776 } 2777 } 2778 } 2779 2780 DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); 2781 /* Superblock compression failed, attempt to emit a single no compress block. 2782 * The decoder will be able to stream this block since it is uncompressed. 2783 */ 2784 return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); 2785 } 2786 2787 static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, 2788 void* dst, size_t dstCapacity, 2789 const void* src, size_t srcSize, 2790 U32 lastBlock) 2791 { 2792 size_t cSize = 0; 2793 const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); 2794 DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", 2795 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); 2796 FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); 2797 2798 cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); 2799 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); 2800 2801 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 2802 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 2803 2804 return cSize; 2805 } 2806 2807 static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, 2808 ZSTD_cwksp* ws, 2809 ZSTD_CCtx_params const* params, 2810 void const* ip, 2811 void const* iend) 2812 { 2813 if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { 2814 U32 const maxDist = (U32)1 << params->cParams.windowLog; 2815 U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); 2816 U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); 2817 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); 2818 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); 2819 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); 2820 ZSTD_cwksp_mark_tables_dirty(ws); 2821 ZSTD_reduceIndex(ms, params, correction); 2822 ZSTD_cwksp_mark_tables_clean(ws); 2823 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; 2824 else ms->nextToUpdate -= correction; 2825 /* invalidate dictionaries on overflow correction */ 2826 ms->loadedDictEnd = 0; 2827 ms->dictMatchState = NULL; 2828 } 2829 } 2830 2831 /*! ZSTD_compress_frameChunk() : 2832 * Compress a chunk of data into one or multiple blocks. 2833 * All blocks will be terminated, all input will be consumed. 2834 * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. 2835 * Frame is supposed already started (header already produced) 2836 * @return : compressed size, or an error code 2837 */ 2838 static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, 2839 void* dst, size_t dstCapacity, 2840 const void* src, size_t srcSize, 2841 U32 lastFrameChunk) 2842 { 2843 size_t blockSize = cctx->blockSize; 2844 size_t remaining = srcSize; 2845 const BYTE* ip = (const BYTE*)src; 2846 BYTE* const ostart = (BYTE*)dst; 2847 BYTE* op = ostart; 2848 U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; 2849 2850 assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); 2851 2852 DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); 2853 if (cctx->appliedParams.fParams.checksumFlag && srcSize) 2854 XXH64_update(&cctx->xxhState, src, srcSize); 2855 2856 while (remaining) { 2857 ZSTD_matchState_t* const ms = &cctx->blockState.matchState; 2858 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); 2859 2860 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, 2861 dstSize_tooSmall, 2862 "not enough space to store compressed block"); 2863 if (remaining < blockSize) blockSize = remaining; 2864 2865 ZSTD_overflowCorrectIfNeeded( 2866 ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); 2867 ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); 2868 2869 /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ 2870 if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; 2871 2872 { size_t cSize; 2873 if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { 2874 cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); 2875 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); 2876 assert(cSize > 0); 2877 assert(cSize <= blockSize + ZSTD_blockHeaderSize); 2878 } else { 2879 cSize = ZSTD_compressBlock_internal(cctx, 2880 op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, 2881 ip, blockSize, 1 /* frame */); 2882 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); 2883 2884 if (cSize == 0) { /* block is not compressible */ 2885 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); 2886 FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); 2887 } else { 2888 U32 const cBlockHeader = cSize == 1 ? 2889 lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : 2890 lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); 2891 MEM_writeLE24(op, cBlockHeader); 2892 cSize += ZSTD_blockHeaderSize; 2893 } 2894 } 2895 2896 2897 ip += blockSize; 2898 assert(remaining >= blockSize); 2899 remaining -= blockSize; 2900 op += cSize; 2901 assert(dstCapacity >= cSize); 2902 dstCapacity -= cSize; 2903 cctx->isFirstBlock = 0; 2904 DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", 2905 (unsigned)cSize); 2906 } } 2907 2908 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; 2909 return (size_t)(op-ostart); 2910 } 2911 2912 2913 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, 2914 const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) 2915 { BYTE* const op = (BYTE*)dst; 2916 U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ 2917 U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ 2918 U32 const checksumFlag = params->fParams.checksumFlag>0; 2919 U32 const windowSize = (U32)1 << params->cParams.windowLog; 2920 U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); 2921 BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); 2922 U32 const fcsCode = params->fParams.contentSizeFlag ? 2923 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ 2924 BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); 2925 size_t pos=0; 2926 2927 assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); 2928 RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, 2929 "dst buf is too small to fit worst-case frame header size."); 2930 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", 2931 !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); 2932 if (params->format == ZSTD_f_zstd1) { 2933 MEM_writeLE32(dst, ZSTD_MAGICNUMBER); 2934 pos = 4; 2935 } 2936 op[pos++] = frameHeaderDescriptionByte; 2937 if (!singleSegment) op[pos++] = windowLogByte; 2938 switch(dictIDSizeCode) 2939 { 2940 default: assert(0); /* impossible */ 2941 case 0 : break; 2942 case 1 : op[pos] = (BYTE)(dictID); pos++; break; 2943 case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; 2944 case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; 2945 } 2946 switch(fcsCode) 2947 { 2948 default: assert(0); /* impossible */ 2949 case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; 2950 case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; 2951 case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; 2952 case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; 2953 } 2954 return pos; 2955 } 2956 2957 /* ZSTD_writeLastEmptyBlock() : 2958 * output an empty Block with end-of-frame mark to complete a frame 2959 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) 2960 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) 2961 */ 2962 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) 2963 { 2964 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, 2965 "dst buf is too small to write frame trailer empty block."); 2966 { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */ 2967 MEM_writeLE24(dst, cBlockHeader24); 2968 return ZSTD_blockHeaderSize; 2969 } 2970 } 2971 2972 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) 2973 { 2974 RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, 2975 "wrong cctx stage"); 2976 RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, 2977 parameter_unsupported, 2978 "incompatible with ldm"); 2979 cctx->externSeqStore.seq = seq; 2980 cctx->externSeqStore.size = nbSeq; 2981 cctx->externSeqStore.capacity = nbSeq; 2982 cctx->externSeqStore.pos = 0; 2983 cctx->externSeqStore.posInSequence = 0; 2984 return 0; 2985 } 2986 2987 2988 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, 2989 void* dst, size_t dstCapacity, 2990 const void* src, size_t srcSize, 2991 U32 frame, U32 lastFrameChunk) 2992 { 2993 ZSTD_matchState_t* const ms = &cctx->blockState.matchState; 2994 size_t fhSize = 0; 2995 2996 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", 2997 cctx->stage, (unsigned)srcSize); 2998 RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, 2999 "missing init (ZSTD_compressBegin)"); 3000 3001 if (frame && (cctx->stage==ZSTDcs_init)) { 3002 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 3003 cctx->pledgedSrcSizePlusOne-1, cctx->dictID); 3004 FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); 3005 assert(fhSize <= dstCapacity); 3006 dstCapacity -= fhSize; 3007 dst = (char*)dst + fhSize; 3008 cctx->stage = ZSTDcs_ongoing; 3009 } 3010 3011 if (!srcSize) return fhSize; /* do not generate an empty block if no input */ 3012 3013 if (!ZSTD_window_update(&ms->window, src, srcSize)) { 3014 ms->nextToUpdate = ms->window.dictLimit; 3015 } 3016 if (cctx->appliedParams.ldmParams.enableLdm) { 3017 ZSTD_window_update(&cctx->ldmState.window, src, srcSize); 3018 } 3019 3020 if (!frame) { 3021 /* overflow check and correction for block mode */ 3022 ZSTD_overflowCorrectIfNeeded( 3023 ms, &cctx->workspace, &cctx->appliedParams, 3024 src, (BYTE const*)src + srcSize); 3025 } 3026 3027 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); 3028 { size_t const cSize = frame ? 3029 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : 3030 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); 3031 FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); 3032 cctx->consumedSrcSize += srcSize; 3033 cctx->producedCSize += (cSize + fhSize); 3034 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); 3035 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ 3036 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); 3037 RETURN_ERROR_IF( 3038 cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, 3039 srcSize_wrong, 3040 "error : pledgedSrcSize = %u, while realSrcSize >= %u", 3041 (unsigned)cctx->pledgedSrcSizePlusOne-1, 3042 (unsigned)cctx->consumedSrcSize); 3043 } 3044 return cSize + fhSize; 3045 } 3046 } 3047 3048 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, 3049 void* dst, size_t dstCapacity, 3050 const void* src, size_t srcSize) 3051 { 3052 DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); 3053 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); 3054 } 3055 3056 3057 size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) 3058 { 3059 ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; 3060 assert(!ZSTD_checkCParams(cParams)); 3061 return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); 3062 } 3063 3064 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) 3065 { 3066 DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); 3067 { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); 3068 RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } 3069 3070 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); 3071 } 3072 3073 /*! ZSTD_loadDictionaryContent() : 3074 * @return : 0, or an error code 3075 */ 3076 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, 3077 ldmState_t* ls, 3078 ZSTD_cwksp* ws, 3079 ZSTD_CCtx_params const* params, 3080 const void* src, size_t srcSize, 3081 ZSTD_dictTableLoadMethod_e dtlm) 3082 { 3083 const BYTE* ip = (const BYTE*) src; 3084 const BYTE* const iend = ip + srcSize; 3085 3086 ZSTD_window_update(&ms->window, src, srcSize); 3087 ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); 3088 3089 if (params->ldmParams.enableLdm && ls != NULL) { 3090 ZSTD_window_update(&ls->window, src, srcSize); 3091 ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); 3092 } 3093 3094 /* Assert that we the ms params match the params we're being given */ 3095 ZSTD_assertEqualCParams(params->cParams, ms->cParams); 3096 3097 if (srcSize <= HASH_READ_SIZE) return 0; 3098 3099 while (iend - ip > HASH_READ_SIZE) { 3100 size_t const remaining = (size_t)(iend - ip); 3101 size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); 3102 const BYTE* const ichunk = ip + chunk; 3103 3104 ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); 3105 3106 if (params->ldmParams.enableLdm && ls != NULL) 3107 ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); 3108 3109 switch(params->cParams.strategy) 3110 { 3111 case ZSTD_fast: 3112 ZSTD_fillHashTable(ms, ichunk, dtlm); 3113 break; 3114 case ZSTD_dfast: 3115 ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); 3116 break; 3117 3118 case ZSTD_greedy: 3119 case ZSTD_lazy: 3120 case ZSTD_lazy2: 3121 if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) { 3122 assert(chunk == remaining); /* must load everything in one go */ 3123 ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); 3124 } else if (chunk >= HASH_READ_SIZE) { 3125 ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); 3126 } 3127 break; 3128 3129 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ 3130 case ZSTD_btopt: 3131 case ZSTD_btultra: 3132 case ZSTD_btultra2: 3133 if (chunk >= HASH_READ_SIZE) 3134 ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); 3135 break; 3136 3137 default: 3138 assert(0); /* not possible : not a valid strategy id */ 3139 } 3140 3141 ip = ichunk; 3142 } 3143 3144 ms->nextToUpdate = (U32)(iend - ms->window.base); 3145 return 0; 3146 } 3147 3148 3149 /* Dictionaries that assign zero probability to symbols that show up causes problems 3150 * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check 3151 * and only dictionaries with 100% valid symbols can be assumed valid. 3152 */ 3153 static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) 3154 { 3155 U32 s; 3156 if (dictMaxSymbolValue < maxSymbolValue) { 3157 return FSE_repeat_check; 3158 } 3159 for (s = 0; s <= maxSymbolValue; ++s) { 3160 if (normalizedCounter[s] == 0) { 3161 return FSE_repeat_check; 3162 } 3163 } 3164 return FSE_repeat_valid; 3165 } 3166 3167 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, 3168 const void* const dict, size_t dictSize) 3169 { 3170 short offcodeNCount[MaxOff+1]; 3171 unsigned offcodeMaxValue = MaxOff; 3172 const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ 3173 const BYTE* const dictEnd = dictPtr + dictSize; 3174 dictPtr += 8; 3175 bs->entropy.huf.repeatMode = HUF_repeat_check; 3176 3177 { unsigned maxSymbolValue = 255; 3178 unsigned hasZeroWeights = 1; 3179 size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, 3180 dictEnd-dictPtr, &hasZeroWeights); 3181 3182 /* We only set the loaded table as valid if it contains all non-zero 3183 * weights. Otherwise, we set it to check */ 3184 if (!hasZeroWeights) 3185 bs->entropy.huf.repeatMode = HUF_repeat_valid; 3186 3187 RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); 3188 RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); 3189 dictPtr += hufHeaderSize; 3190 } 3191 3192 { unsigned offcodeLog; 3193 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); 3194 RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); 3195 RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); 3196 /* fill all offset symbols to avoid garbage at end of table */ 3197 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 3198 bs->entropy.fse.offcodeCTable, 3199 offcodeNCount, MaxOff, offcodeLog, 3200 workspace, HUF_WORKSPACE_SIZE)), 3201 dictionary_corrupted, ""); 3202 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ 3203 dictPtr += offcodeHeaderSize; 3204 } 3205 3206 { short matchlengthNCount[MaxML+1]; 3207 unsigned matchlengthMaxValue = MaxML, matchlengthLog; 3208 size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); 3209 RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); 3210 RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); 3211 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 3212 bs->entropy.fse.matchlengthCTable, 3213 matchlengthNCount, matchlengthMaxValue, matchlengthLog, 3214 workspace, HUF_WORKSPACE_SIZE)), 3215 dictionary_corrupted, ""); 3216 bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML); 3217 dictPtr += matchlengthHeaderSize; 3218 } 3219 3220 { short litlengthNCount[MaxLL+1]; 3221 unsigned litlengthMaxValue = MaxLL, litlengthLog; 3222 size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); 3223 RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); 3224 RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); 3225 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 3226 bs->entropy.fse.litlengthCTable, 3227 litlengthNCount, litlengthMaxValue, litlengthLog, 3228 workspace, HUF_WORKSPACE_SIZE)), 3229 dictionary_corrupted, ""); 3230 bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL); 3231 dictPtr += litlengthHeaderSize; 3232 } 3233 3234 RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); 3235 bs->rep[0] = MEM_readLE32(dictPtr+0); 3236 bs->rep[1] = MEM_readLE32(dictPtr+4); 3237 bs->rep[2] = MEM_readLE32(dictPtr+8); 3238 dictPtr += 12; 3239 3240 { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); 3241 U32 offcodeMax = MaxOff; 3242 if (dictContentSize <= ((U32)-1) - 128 KB) { 3243 U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ 3244 offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ 3245 } 3246 /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */ 3247 bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)); 3248 3249 /* All repCodes must be <= dictContentSize and != 0 */ 3250 { U32 u; 3251 for (u=0; u<3; u++) { 3252 RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); 3253 RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); 3254 } } } 3255 3256 return dictPtr - (const BYTE*)dict; 3257 } 3258 3259 /* Dictionary format : 3260 * See : 3261 * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format 3262 */ 3263 /*! ZSTD_loadZstdDictionary() : 3264 * @return : dictID, or an error code 3265 * assumptions : magic number supposed already checked 3266 * dictSize supposed >= 8 3267 */ 3268 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, 3269 ZSTD_matchState_t* ms, 3270 ZSTD_cwksp* ws, 3271 ZSTD_CCtx_params const* params, 3272 const void* dict, size_t dictSize, 3273 ZSTD_dictTableLoadMethod_e dtlm, 3274 void* workspace) 3275 { 3276 const BYTE* dictPtr = (const BYTE*)dict; 3277 const BYTE* const dictEnd = dictPtr + dictSize; 3278 size_t dictID; 3279 size_t eSize; 3280 3281 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); 3282 assert(dictSize >= 8); 3283 assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); 3284 3285 dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); 3286 eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize); 3287 FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); 3288 dictPtr += eSize; 3289 3290 { 3291 size_t const dictContentSize = (size_t)(dictEnd - dictPtr); 3292 FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( 3293 ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); 3294 } 3295 return dictID; 3296 } 3297 3298 /** ZSTD_compress_insertDictionary() : 3299 * @return : dictID, or an error code */ 3300 static size_t 3301 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, 3302 ZSTD_matchState_t* ms, 3303 ldmState_t* ls, 3304 ZSTD_cwksp* ws, 3305 const ZSTD_CCtx_params* params, 3306 const void* dict, size_t dictSize, 3307 ZSTD_dictContentType_e dictContentType, 3308 ZSTD_dictTableLoadMethod_e dtlm, 3309 void* workspace) 3310 { 3311 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); 3312 if ((dict==NULL) || (dictSize<8)) { 3313 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); 3314 return 0; 3315 } 3316 3317 ZSTD_reset_compressedBlockState(bs); 3318 3319 /* dict restricted modes */ 3320 if (dictContentType == ZSTD_dct_rawContent) 3321 return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); 3322 3323 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { 3324 if (dictContentType == ZSTD_dct_auto) { 3325 DEBUGLOG(4, "raw content dictionary detected"); 3326 return ZSTD_loadDictionaryContent( 3327 ms, ls, ws, params, dict, dictSize, dtlm); 3328 } 3329 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); 3330 assert(0); /* impossible */ 3331 } 3332 3333 /* dict as full zstd dictionary */ 3334 return ZSTD_loadZstdDictionary( 3335 bs, ms, ws, params, dict, dictSize, dtlm, workspace); 3336 } 3337 3338 #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) 3339 #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) 3340 3341 /*! ZSTD_compressBegin_internal() : 3342 * @return : 0, or an error code */ 3343 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, 3344 const void* dict, size_t dictSize, 3345 ZSTD_dictContentType_e dictContentType, 3346 ZSTD_dictTableLoadMethod_e dtlm, 3347 const ZSTD_CDict* cdict, 3348 const ZSTD_CCtx_params* params, U64 pledgedSrcSize, 3349 ZSTD_buffered_policy_e zbuff) 3350 { 3351 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); 3352 /* params are supposed to be fully validated at this point */ 3353 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); 3354 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 3355 if ( (cdict) 3356 && (cdict->dictContentSize > 0) 3357 && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF 3358 || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER 3359 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN 3360 || cdict->compressionLevel == 0) 3361 && (params->attachDictPref != ZSTD_dictForceLoad) ) { 3362 return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); 3363 } 3364 3365 FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, 3366 ZSTDcrp_makeClean, zbuff) , ""); 3367 { size_t const dictID = cdict ? 3368 ZSTD_compress_insertDictionary( 3369 cctx->blockState.prevCBlock, &cctx->blockState.matchState, 3370 &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, 3371 cdict->dictContentSize, cdict->dictContentType, dtlm, 3372 cctx->entropyWorkspace) 3373 : ZSTD_compress_insertDictionary( 3374 cctx->blockState.prevCBlock, &cctx->blockState.matchState, 3375 &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, 3376 dictContentType, dtlm, cctx->entropyWorkspace); 3377 FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); 3378 assert(dictID <= UINT_MAX); 3379 cctx->dictID = (U32)dictID; 3380 } 3381 return 0; 3382 } 3383 3384 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, 3385 const void* dict, size_t dictSize, 3386 ZSTD_dictContentType_e dictContentType, 3387 ZSTD_dictTableLoadMethod_e dtlm, 3388 const ZSTD_CDict* cdict, 3389 const ZSTD_CCtx_params* params, 3390 unsigned long long pledgedSrcSize) 3391 { 3392 DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); 3393 /* compression parameters verification and optimization */ 3394 FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); 3395 return ZSTD_compressBegin_internal(cctx, 3396 dict, dictSize, dictContentType, dtlm, 3397 cdict, 3398 params, pledgedSrcSize, 3399 ZSTDb_not_buffered); 3400 } 3401 3402 /*! ZSTD_compressBegin_advanced() : 3403 * @return : 0, or an error code */ 3404 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, 3405 const void* dict, size_t dictSize, 3406 ZSTD_parameters params, unsigned long long pledgedSrcSize) 3407 { 3408 ZSTD_CCtx_params const cctxParams = 3409 ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); 3410 return ZSTD_compressBegin_advanced_internal(cctx, 3411 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, 3412 NULL /*cdict*/, 3413 &cctxParams, pledgedSrcSize); 3414 } 3415 3416 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) 3417 { 3418 ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); 3419 ZSTD_CCtx_params const cctxParams = 3420 ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); 3421 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); 3422 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, 3423 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); 3424 } 3425 3426 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) 3427 { 3428 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); 3429 } 3430 3431 3432 /*! ZSTD_writeEpilogue() : 3433 * Ends a frame. 3434 * @return : nb of bytes written into dst (or an error code) */ 3435 static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) 3436 { 3437 BYTE* const ostart = (BYTE*)dst; 3438 BYTE* op = ostart; 3439 size_t fhSize = 0; 3440 3441 DEBUGLOG(4, "ZSTD_writeEpilogue"); 3442 RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); 3443 3444 /* special case : empty frame */ 3445 if (cctx->stage == ZSTDcs_init) { 3446 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); 3447 FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); 3448 dstCapacity -= fhSize; 3449 op += fhSize; 3450 cctx->stage = ZSTDcs_ongoing; 3451 } 3452 3453 if (cctx->stage != ZSTDcs_ending) { 3454 /* write one last empty block, make it the "last" block */ 3455 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; 3456 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); 3457 MEM_writeLE32(op, cBlockHeader24); 3458 op += ZSTD_blockHeaderSize; 3459 dstCapacity -= ZSTD_blockHeaderSize; 3460 } 3461 3462 if (cctx->appliedParams.fParams.checksumFlag) { 3463 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); 3464 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); 3465 DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); 3466 MEM_writeLE32(op, checksum); 3467 op += 4; 3468 } 3469 3470 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ 3471 return op-ostart; 3472 } 3473 3474 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, 3475 void* dst, size_t dstCapacity, 3476 const void* src, size_t srcSize) 3477 { 3478 size_t endResult; 3479 size_t const cSize = ZSTD_compressContinue_internal(cctx, 3480 dst, dstCapacity, src, srcSize, 3481 1 /* frame mode */, 1 /* last chunk */); 3482 FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); 3483 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); 3484 FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); 3485 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); 3486 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ 3487 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); 3488 DEBUGLOG(4, "end of frame : controlling src size"); 3489 RETURN_ERROR_IF( 3490 cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, 3491 srcSize_wrong, 3492 "error : pledgedSrcSize = %u, while realSrcSize = %u", 3493 (unsigned)cctx->pledgedSrcSizePlusOne-1, 3494 (unsigned)cctx->consumedSrcSize); 3495 } 3496 return cSize + endResult; 3497 } 3498 3499 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, 3500 void* dst, size_t dstCapacity, 3501 const void* src, size_t srcSize, 3502 const void* dict,size_t dictSize, 3503 const ZSTD_parameters* params) 3504 { 3505 ZSTD_CCtx_params const cctxParams = 3506 ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); 3507 DEBUGLOG(4, "ZSTD_compress_internal"); 3508 return ZSTD_compress_advanced_internal(cctx, 3509 dst, dstCapacity, 3510 src, srcSize, 3511 dict, dictSize, 3512 &cctxParams); 3513 } 3514 3515 size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, 3516 void* dst, size_t dstCapacity, 3517 const void* src, size_t srcSize, 3518 const void* dict,size_t dictSize, 3519 ZSTD_parameters params) 3520 { 3521 DEBUGLOG(4, "ZSTD_compress_advanced"); 3522 FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); 3523 return ZSTD_compress_internal(cctx, 3524 dst, dstCapacity, 3525 src, srcSize, 3526 dict, dictSize, 3527 ¶ms); 3528 } 3529 3530 /* Internal */ 3531 size_t ZSTD_compress_advanced_internal( 3532 ZSTD_CCtx* cctx, 3533 void* dst, size_t dstCapacity, 3534 const void* src, size_t srcSize, 3535 const void* dict,size_t dictSize, 3536 const ZSTD_CCtx_params* params) 3537 { 3538 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); 3539 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, 3540 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, 3541 params, srcSize, ZSTDb_not_buffered) , ""); 3542 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 3543 } 3544 3545 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, 3546 void* dst, size_t dstCapacity, 3547 const void* src, size_t srcSize, 3548 const void* dict, size_t dictSize, 3549 int compressionLevel) 3550 { 3551 ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); 3552 ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); 3553 DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); 3554 assert(params.fParams.contentSizeFlag == 1); 3555 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); 3556 } 3557 3558 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, 3559 void* dst, size_t dstCapacity, 3560 const void* src, size_t srcSize, 3561 int compressionLevel) 3562 { 3563 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); 3564 assert(cctx != NULL); 3565 return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); 3566 } 3567 3568 size_t ZSTD_compress(void* dst, size_t dstCapacity, 3569 const void* src, size_t srcSize, 3570 int compressionLevel) 3571 { 3572 size_t result; 3573 #if ZSTD_COMPRESS_HEAPMODE 3574 ZSTD_CCtx* cctx = ZSTD_createCCtx(); 3575 RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed"); 3576 result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); 3577 ZSTD_freeCCtx(cctx); 3578 #else 3579 ZSTD_CCtx ctxBody; 3580 ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem); 3581 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); 3582 ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ 3583 #endif 3584 return result; 3585 } 3586 3587 3588 /* ===== Dictionary API ===== */ 3589 3590 /*! ZSTD_estimateCDictSize_advanced() : 3591 * Estimate amount of memory that will be needed to create a dictionary with following arguments */ 3592 size_t ZSTD_estimateCDictSize_advanced( 3593 size_t dictSize, ZSTD_compressionParameters cParams, 3594 ZSTD_dictLoadMethod_e dictLoadMethod) 3595 { 3596 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); 3597 return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) 3598 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) 3599 + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) 3600 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 3601 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); 3602 } 3603 3604 size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) 3605 { 3606 ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 3607 return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); 3608 } 3609 3610 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) 3611 { 3612 if (cdict==NULL) return 0; /* support sizeof on NULL */ 3613 DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); 3614 /* cdict may be in the workspace */ 3615 return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) 3616 + ZSTD_cwksp_sizeof(&cdict->workspace); 3617 } 3618 3619 static size_t ZSTD_initCDict_internal( 3620 ZSTD_CDict* cdict, 3621 const void* dictBuffer, size_t dictSize, 3622 ZSTD_dictLoadMethod_e dictLoadMethod, 3623 ZSTD_dictContentType_e dictContentType, 3624 ZSTD_CCtx_params params) 3625 { 3626 DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); 3627 assert(!ZSTD_checkCParams(params.cParams)); 3628 cdict->matchState.cParams = params.cParams; 3629 cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; 3630 if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) { 3631 cdict->matchState.dedicatedDictSearch = 0; 3632 } 3633 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { 3634 cdict->dictContent = dictBuffer; 3635 } else { 3636 void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); 3637 RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); 3638 cdict->dictContent = internalBuffer; 3639 ZSTD_memcpy(internalBuffer, dictBuffer, dictSize); 3640 } 3641 cdict->dictContentSize = dictSize; 3642 cdict->dictContentType = dictContentType; 3643 3644 cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); 3645 3646 3647 /* Reset the state to no dictionary */ 3648 ZSTD_reset_compressedBlockState(&cdict->cBlockState); 3649 FORWARD_IF_ERROR(ZSTD_reset_matchState( 3650 &cdict->matchState, 3651 &cdict->workspace, 3652 ¶ms.cParams, 3653 ZSTDcrp_makeClean, 3654 ZSTDirp_reset, 3655 ZSTD_resetTarget_CDict), ""); 3656 /* (Maybe) load the dictionary 3657 * Skips loading the dictionary if it is < 8 bytes. 3658 */ 3659 { params.compressionLevel = ZSTD_CLEVEL_DEFAULT; 3660 params.fParams.contentSizeFlag = 1; 3661 { size_t const dictID = ZSTD_compress_insertDictionary( 3662 &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, 3663 ¶ms, cdict->dictContent, cdict->dictContentSize, 3664 dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); 3665 FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); 3666 assert(dictID <= (size_t)(U32)-1); 3667 cdict->dictID = (U32)dictID; 3668 } 3669 } 3670 3671 return 0; 3672 } 3673 3674 static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, 3675 ZSTD_dictLoadMethod_e dictLoadMethod, 3676 ZSTD_compressionParameters cParams, ZSTD_customMem customMem) 3677 { 3678 if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; 3679 3680 { size_t const workspaceSize = 3681 ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + 3682 ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + 3683 ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + 3684 (dictLoadMethod == ZSTD_dlm_byRef ? 0 3685 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); 3686 void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); 3687 ZSTD_cwksp ws; 3688 ZSTD_CDict* cdict; 3689 3690 if (!workspace) { 3691 ZSTD_customFree(workspace, customMem); 3692 return NULL; 3693 } 3694 3695 ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc); 3696 3697 cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); 3698 assert(cdict != NULL); 3699 ZSTD_cwksp_move(&cdict->workspace, &ws); 3700 cdict->customMem = customMem; 3701 cdict->compressionLevel = 0; /* signals advanced API usage */ 3702 3703 return cdict; 3704 } 3705 } 3706 3707 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, 3708 ZSTD_dictLoadMethod_e dictLoadMethod, 3709 ZSTD_dictContentType_e dictContentType, 3710 ZSTD_compressionParameters cParams, 3711 ZSTD_customMem customMem) 3712 { 3713 ZSTD_CCtx_params cctxParams; 3714 ZSTD_memset(&cctxParams, 0, sizeof(cctxParams)); 3715 ZSTD_CCtxParams_init(&cctxParams, 0); 3716 cctxParams.cParams = cParams; 3717 cctxParams.customMem = customMem; 3718 return ZSTD_createCDict_advanced2( 3719 dictBuffer, dictSize, 3720 dictLoadMethod, dictContentType, 3721 &cctxParams, customMem); 3722 } 3723 3724 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( 3725 const void* dict, size_t dictSize, 3726 ZSTD_dictLoadMethod_e dictLoadMethod, 3727 ZSTD_dictContentType_e dictContentType, 3728 const ZSTD_CCtx_params* originalCctxParams, 3729 ZSTD_customMem customMem) 3730 { 3731 ZSTD_CCtx_params cctxParams = *originalCctxParams; 3732 ZSTD_compressionParameters cParams; 3733 ZSTD_CDict* cdict; 3734 3735 DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType); 3736 if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 3737 3738 if (cctxParams.enableDedicatedDictSearch) { 3739 cParams = ZSTD_dedicatedDictSearch_getCParams( 3740 cctxParams.compressionLevel, dictSize); 3741 ZSTD_overrideCParams(&cParams, &cctxParams.cParams); 3742 } else { 3743 cParams = ZSTD_getCParamsFromCCtxParams( 3744 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 3745 } 3746 3747 if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) { 3748 /* Fall back to non-DDSS params */ 3749 cctxParams.enableDedicatedDictSearch = 0; 3750 cParams = ZSTD_getCParamsFromCCtxParams( 3751 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 3752 } 3753 3754 cctxParams.cParams = cParams; 3755 3756 cdict = ZSTD_createCDict_advanced_internal(dictSize, 3757 dictLoadMethod, cctxParams.cParams, 3758 customMem); 3759 3760 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 3761 dict, dictSize, 3762 dictLoadMethod, dictContentType, 3763 cctxParams) )) { 3764 ZSTD_freeCDict(cdict); 3765 return NULL; 3766 } 3767 3768 return cdict; 3769 } 3770 3771 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) 3772 { 3773 ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 3774 ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, 3775 ZSTD_dlm_byCopy, ZSTD_dct_auto, 3776 cParams, ZSTD_defaultCMem); 3777 if (cdict) 3778 cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; 3779 return cdict; 3780 } 3781 3782 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) 3783 { 3784 ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 3785 ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, 3786 ZSTD_dlm_byRef, ZSTD_dct_auto, 3787 cParams, ZSTD_defaultCMem); 3788 if (cdict) 3789 cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; 3790 return cdict; 3791 } 3792 3793 size_t ZSTD_freeCDict(ZSTD_CDict* cdict) 3794 { 3795 if (cdict==NULL) return 0; /* support free on NULL */ 3796 { ZSTD_customMem const cMem = cdict->customMem; 3797 int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); 3798 ZSTD_cwksp_free(&cdict->workspace, cMem); 3799 if (!cdictInWorkspace) { 3800 ZSTD_customFree(cdict, cMem); 3801 } 3802 return 0; 3803 } 3804 } 3805 3806 /*! ZSTD_initStaticCDict_advanced() : 3807 * Generate a digested dictionary in provided memory area. 3808 * workspace: The memory area to emplace the dictionary into. 3809 * Provided pointer must 8-bytes aligned. 3810 * It must outlive dictionary usage. 3811 * workspaceSize: Use ZSTD_estimateCDictSize() 3812 * to determine how large workspace must be. 3813 * cParams : use ZSTD_getCParams() to transform a compression level 3814 * into its relevants cParams. 3815 * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) 3816 * Note : there is no corresponding "free" function. 3817 * Since workspace was allocated externally, it must be freed externally. 3818 */ 3819 const ZSTD_CDict* ZSTD_initStaticCDict( 3820 void* workspace, size_t workspaceSize, 3821 const void* dict, size_t dictSize, 3822 ZSTD_dictLoadMethod_e dictLoadMethod, 3823 ZSTD_dictContentType_e dictContentType, 3824 ZSTD_compressionParameters cParams) 3825 { 3826 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); 3827 size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) 3828 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 3829 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) 3830 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) 3831 + matchStateSize; 3832 ZSTD_CDict* cdict; 3833 ZSTD_CCtx_params params; 3834 3835 if ((size_t)workspace & 7) return NULL; /* 8-aligned */ 3836 3837 { 3838 ZSTD_cwksp ws; 3839 ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); 3840 cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); 3841 if (cdict == NULL) return NULL; 3842 ZSTD_cwksp_move(&cdict->workspace, &ws); 3843 } 3844 3845 DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", 3846 (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); 3847 if (workspaceSize < neededSize) return NULL; 3848 3849 ZSTD_CCtxParams_init(¶ms, 0); 3850 params.cParams = cParams; 3851 3852 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 3853 dict, dictSize, 3854 dictLoadMethod, dictContentType, 3855 params) )) 3856 return NULL; 3857 3858 return cdict; 3859 } 3860 3861 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) 3862 { 3863 assert(cdict != NULL); 3864 return cdict->matchState.cParams; 3865 } 3866 3867 /*! ZSTD_getDictID_fromCDict() : 3868 * Provides the dictID of the dictionary loaded into `cdict`. 3869 * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. 3870 * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ 3871 unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) 3872 { 3873 if (cdict==NULL) return 0; 3874 return cdict->dictID; 3875 } 3876 3877 3878 /* ZSTD_compressBegin_usingCDict_advanced() : 3879 * cdict must be != NULL */ 3880 size_t ZSTD_compressBegin_usingCDict_advanced( 3881 ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, 3882 ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) 3883 { 3884 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); 3885 RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); 3886 { ZSTD_CCtx_params params = cctx->requestedParams; 3887 params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF 3888 || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER 3889 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN 3890 || cdict->compressionLevel == 0 ) 3891 && (params.attachDictPref != ZSTD_dictForceLoad) ? 3892 ZSTD_getCParamsFromCDict(cdict) 3893 : ZSTD_getCParams(cdict->compressionLevel, 3894 pledgedSrcSize, 3895 cdict->dictContentSize); 3896 /* Increase window log to fit the entire dictionary and source if the 3897 * source size is known. Limit the increase to 19, which is the 3898 * window log for compression level 1 with the largest source size. 3899 */ 3900 if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { 3901 U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); 3902 U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; 3903 params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog); 3904 } 3905 params.fParams = fParams; 3906 return ZSTD_compressBegin_internal(cctx, 3907 NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, 3908 cdict, 3909 ¶ms, pledgedSrcSize, 3910 ZSTDb_not_buffered); 3911 } 3912 } 3913 3914 /* ZSTD_compressBegin_usingCDict() : 3915 * pledgedSrcSize=0 means "unknown" 3916 * if pledgedSrcSize>0, it will enable contentSizeFlag */ 3917 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 3918 { 3919 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 3920 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); 3921 return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); 3922 } 3923 3924 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, 3925 void* dst, size_t dstCapacity, 3926 const void* src, size_t srcSize, 3927 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) 3928 { 3929 FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ 3930 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 3931 } 3932 3933 /*! ZSTD_compress_usingCDict() : 3934 * Compression using a digested Dictionary. 3935 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. 3936 * Note that compression parameters are decided at CDict creation time 3937 * while frame parameters are hardcoded */ 3938 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, 3939 void* dst, size_t dstCapacity, 3940 const void* src, size_t srcSize, 3941 const ZSTD_CDict* cdict) 3942 { 3943 ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 3944 return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); 3945 } 3946 3947 3948 3949 /* ****************************************************************** 3950 * Streaming 3951 ********************************************************************/ 3952 3953 ZSTD_CStream* ZSTD_createCStream(void) 3954 { 3955 DEBUGLOG(3, "ZSTD_createCStream"); 3956 return ZSTD_createCStream_advanced(ZSTD_defaultCMem); 3957 } 3958 3959 ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) 3960 { 3961 return ZSTD_initStaticCCtx(workspace, workspaceSize); 3962 } 3963 3964 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) 3965 { /* CStream and CCtx are now same object */ 3966 return ZSTD_createCCtx_advanced(customMem); 3967 } 3968 3969 size_t ZSTD_freeCStream(ZSTD_CStream* zcs) 3970 { 3971 return ZSTD_freeCCtx(zcs); /* same object */ 3972 } 3973 3974 3975 3976 /*====== Initialization ======*/ 3977 3978 size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } 3979 3980 size_t ZSTD_CStreamOutSize(void) 3981 { 3982 return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; 3983 } 3984 3985 static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) 3986 { 3987 if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) 3988 return ZSTD_cpm_attachDict; 3989 else 3990 return ZSTD_cpm_noAttachDict; 3991 } 3992 3993 /* ZSTD_resetCStream(): 3994 * pledgedSrcSize == 0 means "unknown" */ 3995 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) 3996 { 3997 /* temporary : 0 interpreted as "unknown" during transition period. 3998 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. 3999 * 0 will be interpreted as "empty" in the future. 4000 */ 4001 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 4002 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); 4003 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 4004 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 4005 return 0; 4006 } 4007 4008 /*! ZSTD_initCStream_internal() : 4009 * Note : for lib/compress only. Used by zstdmt_compress.c. 4010 * Assumption 1 : params are valid 4011 * Assumption 2 : either dict, or cdict, is defined, not both */ 4012 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, 4013 const void* dict, size_t dictSize, const ZSTD_CDict* cdict, 4014 const ZSTD_CCtx_params* params, 4015 unsigned long long pledgedSrcSize) 4016 { 4017 DEBUGLOG(4, "ZSTD_initCStream_internal"); 4018 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 4019 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 4020 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); 4021 zcs->requestedParams = *params; 4022 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 4023 if (dict) { 4024 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); 4025 } else { 4026 /* Dictionary is cleared if !cdict */ 4027 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); 4028 } 4029 return 0; 4030 } 4031 4032 /* ZSTD_initCStream_usingCDict_advanced() : 4033 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ 4034 size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, 4035 const ZSTD_CDict* cdict, 4036 ZSTD_frameParameters fParams, 4037 unsigned long long pledgedSrcSize) 4038 { 4039 DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); 4040 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 4041 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 4042 zcs->requestedParams.fParams = fParams; 4043 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); 4044 return 0; 4045 } 4046 4047 /* note : cdict must outlive compression session */ 4048 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) 4049 { 4050 DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); 4051 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 4052 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); 4053 return 0; 4054 } 4055 4056 4057 /* ZSTD_initCStream_advanced() : 4058 * pledgedSrcSize must be exact. 4059 * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. 4060 * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ 4061 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, 4062 const void* dict, size_t dictSize, 4063 ZSTD_parameters params, unsigned long long pss) 4064 { 4065 /* for compatibility with older programs relying on this behavior. 4066 * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. 4067 * This line will be removed in the future. 4068 */ 4069 U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 4070 DEBUGLOG(4, "ZSTD_initCStream_advanced"); 4071 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 4072 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 4073 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); 4074 zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, ¶ms); 4075 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); 4076 return 0; 4077 } 4078 4079 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) 4080 { 4081 DEBUGLOG(4, "ZSTD_initCStream_usingDict"); 4082 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 4083 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); 4084 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); 4085 return 0; 4086 } 4087 4088 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) 4089 { 4090 /* temporary : 0 interpreted as "unknown" during transition period. 4091 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. 4092 * 0 will be interpreted as "empty" in the future. 4093 */ 4094 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 4095 DEBUGLOG(4, "ZSTD_initCStream_srcSize"); 4096 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 4097 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); 4098 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); 4099 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 4100 return 0; 4101 } 4102 4103 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) 4104 { 4105 DEBUGLOG(4, "ZSTD_initCStream"); 4106 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 4107 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); 4108 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); 4109 return 0; 4110 } 4111 4112 /*====== Compression ======*/ 4113 4114 static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) 4115 { 4116 size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; 4117 if (hintInSize==0) hintInSize = cctx->blockSize; 4118 return hintInSize; 4119 } 4120 4121 /** ZSTD_compressStream_generic(): 4122 * internal function for all *compressStream*() variants 4123 * non-static, because can be called from zstdmt_compress.c 4124 * @return : hint size for next input */ 4125 static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, 4126 ZSTD_outBuffer* output, 4127 ZSTD_inBuffer* input, 4128 ZSTD_EndDirective const flushMode) 4129 { 4130 const char* const istart = (const char*)input->src; 4131 const char* const iend = input->size != 0 ? istart + input->size : istart; 4132 const char* ip = input->pos != 0 ? istart + input->pos : istart; 4133 char* const ostart = (char*)output->dst; 4134 char* const oend = output->size != 0 ? ostart + output->size : ostart; 4135 char* op = output->pos != 0 ? ostart + output->pos : ostart; 4136 U32 someMoreWork = 1; 4137 4138 /* check expectations */ 4139 DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); 4140 if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { 4141 assert(zcs->inBuff != NULL); 4142 assert(zcs->inBuffSize > 0); 4143 } 4144 if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) { 4145 assert(zcs->outBuff != NULL); 4146 assert(zcs->outBuffSize > 0); 4147 } 4148 assert(output->pos <= output->size); 4149 assert(input->pos <= input->size); 4150 assert((U32)flushMode <= (U32)ZSTD_e_end); 4151 4152 while (someMoreWork) { 4153 switch(zcs->streamStage) 4154 { 4155 case zcss_init: 4156 RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); 4157 4158 case zcss_load: 4159 if ( (flushMode == ZSTD_e_end) 4160 && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */ 4161 || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ 4162 && (zcs->inBuffPos == 0) ) { 4163 /* shortcut to compression pass directly into output buffer */ 4164 size_t const cSize = ZSTD_compressEnd(zcs, 4165 op, oend-op, ip, iend-ip); 4166 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); 4167 FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); 4168 ip = iend; 4169 op += cSize; 4170 zcs->frameEnded = 1; 4171 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 4172 someMoreWork = 0; break; 4173 } 4174 /* complete loading into inBuffer in buffered mode */ 4175 if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { 4176 size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; 4177 size_t const loaded = ZSTD_limitCopy( 4178 zcs->inBuff + zcs->inBuffPos, toLoad, 4179 ip, iend-ip); 4180 zcs->inBuffPos += loaded; 4181 if (loaded != 0) 4182 ip += loaded; 4183 if ( (flushMode == ZSTD_e_continue) 4184 && (zcs->inBuffPos < zcs->inBuffTarget) ) { 4185 /* not enough input to fill full block : stop here */ 4186 someMoreWork = 0; break; 4187 } 4188 if ( (flushMode == ZSTD_e_flush) 4189 && (zcs->inBuffPos == zcs->inToCompress) ) { 4190 /* empty */ 4191 someMoreWork = 0; break; 4192 } 4193 } 4194 /* compress current block (note : this stage cannot be stopped in the middle) */ 4195 DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); 4196 { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered); 4197 void* cDst; 4198 size_t cSize; 4199 size_t oSize = oend-op; 4200 size_t const iSize = inputBuffered 4201 ? zcs->inBuffPos - zcs->inToCompress 4202 : MIN((size_t)(iend - ip), zcs->blockSize); 4203 if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) 4204 cDst = op; /* compress into output buffer, to skip flush stage */ 4205 else 4206 cDst = zcs->outBuff, oSize = zcs->outBuffSize; 4207 if (inputBuffered) { 4208 unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); 4209 cSize = lastBlock ? 4210 ZSTD_compressEnd(zcs, cDst, oSize, 4211 zcs->inBuff + zcs->inToCompress, iSize) : 4212 ZSTD_compressContinue(zcs, cDst, oSize, 4213 zcs->inBuff + zcs->inToCompress, iSize); 4214 FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); 4215 zcs->frameEnded = lastBlock; 4216 /* prepare next block */ 4217 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; 4218 if (zcs->inBuffTarget > zcs->inBuffSize) 4219 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; 4220 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", 4221 (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); 4222 if (!lastBlock) 4223 assert(zcs->inBuffTarget <= zcs->inBuffSize); 4224 zcs->inToCompress = zcs->inBuffPos; 4225 } else { 4226 unsigned const lastBlock = (ip + iSize == iend); 4227 assert(flushMode == ZSTD_e_end /* Already validated */); 4228 cSize = lastBlock ? 4229 ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) : 4230 ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize); 4231 /* Consume the input prior to error checking to mirror buffered mode. */ 4232 if (iSize > 0) 4233 ip += iSize; 4234 FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); 4235 zcs->frameEnded = lastBlock; 4236 if (lastBlock) 4237 assert(ip == iend); 4238 } 4239 if (cDst == op) { /* no need to flush */ 4240 op += cSize; 4241 if (zcs->frameEnded) { 4242 DEBUGLOG(5, "Frame completed directly in outBuffer"); 4243 someMoreWork = 0; 4244 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 4245 } 4246 break; 4247 } 4248 zcs->outBuffContentSize = cSize; 4249 zcs->outBuffFlushedSize = 0; 4250 zcs->streamStage = zcss_flush; /* pass-through to flush stage */ 4251 } 4252 /* fall-through */ 4253 case zcss_flush: 4254 DEBUGLOG(5, "flush stage"); 4255 assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered); 4256 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; 4257 size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), 4258 zcs->outBuff + zcs->outBuffFlushedSize, toFlush); 4259 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", 4260 (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); 4261 if (flushed) 4262 op += flushed; 4263 zcs->outBuffFlushedSize += flushed; 4264 if (toFlush!=flushed) { 4265 /* flush not fully completed, presumably because dst is too small */ 4266 assert(op==oend); 4267 someMoreWork = 0; 4268 break; 4269 } 4270 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; 4271 if (zcs->frameEnded) { 4272 DEBUGLOG(5, "Frame completed on flush"); 4273 someMoreWork = 0; 4274 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 4275 break; 4276 } 4277 zcs->streamStage = zcss_load; 4278 break; 4279 } 4280 4281 default: /* impossible */ 4282 assert(0); 4283 } 4284 } 4285 4286 input->pos = ip - istart; 4287 output->pos = op - ostart; 4288 if (zcs->frameEnded) return 0; 4289 return ZSTD_nextInputSizeHint(zcs); 4290 } 4291 4292 static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) 4293 { 4294 #ifdef ZSTD_MULTITHREAD 4295 if (cctx->appliedParams.nbWorkers >= 1) { 4296 assert(cctx->mtctx != NULL); 4297 return ZSTDMT_nextInputSizeHint(cctx->mtctx); 4298 } 4299 #endif 4300 return ZSTD_nextInputSizeHint(cctx); 4301 4302 } 4303 4304 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) 4305 { 4306 FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); 4307 return ZSTD_nextInputSizeHint_MTorST(zcs); 4308 } 4309 4310 /* After a compression call set the expected input/output buffer. 4311 * This is validated at the start of the next compression call. 4312 */ 4313 static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input) 4314 { 4315 if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { 4316 cctx->expectedInBuffer = *input; 4317 } 4318 if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { 4319 cctx->expectedOutBufferSize = output->size - output->pos; 4320 } 4321 } 4322 4323 /* Validate that the input/output buffers match the expectations set by 4324 * ZSTD_setBufferExpectations. 4325 */ 4326 static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx, 4327 ZSTD_outBuffer const* output, 4328 ZSTD_inBuffer const* input, 4329 ZSTD_EndDirective endOp) 4330 { 4331 if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { 4332 ZSTD_inBuffer const expect = cctx->expectedInBuffer; 4333 if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size) 4334 RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!"); 4335 if (endOp != ZSTD_e_end) 4336 RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!"); 4337 } 4338 if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { 4339 size_t const outBufferSize = output->size - output->pos; 4340 if (cctx->expectedOutBufferSize != outBufferSize) 4341 RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!"); 4342 } 4343 return 0; 4344 } 4345 4346 static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, 4347 ZSTD_EndDirective endOp, 4348 size_t inSize) { 4349 ZSTD_CCtx_params params = cctx->requestedParams; 4350 ZSTD_prefixDict const prefixDict = cctx->prefixDict; 4351 FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ 4352 ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ 4353 assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ 4354 if (cctx->cdict) 4355 params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */ 4356 DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); 4357 if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ 4358 { 4359 size_t const dictSize = prefixDict.dict 4360 ? prefixDict.dictSize 4361 : (cctx->cdict ? cctx->cdict->dictContentSize : 0); 4362 ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); 4363 params.cParams = ZSTD_getCParamsFromCCtxParams( 4364 ¶ms, cctx->pledgedSrcSizePlusOne-1, 4365 dictSize, mode); 4366 } 4367 4368 if (ZSTD_CParams_shouldEnableLdm(¶ms.cParams)) { 4369 /* Enable LDM by default for optimal parser and window size >= 128MB */ 4370 DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)"); 4371 params.ldmParams.enableLdm = 1; 4372 } 4373 4374 #ifdef ZSTD_MULTITHREAD 4375 if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { 4376 params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ 4377 } 4378 if (params.nbWorkers > 0) { 4379 /* mt context creation */ 4380 if (cctx->mtctx == NULL) { 4381 DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", 4382 params.nbWorkers); 4383 cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool); 4384 RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!"); 4385 } 4386 /* mt compression */ 4387 DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); 4388 FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( 4389 cctx->mtctx, 4390 prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, 4391 cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); 4392 cctx->streamStage = zcss_load; 4393 cctx->appliedParams = params; 4394 } else 4395 #endif 4396 { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; 4397 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 4398 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, 4399 prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast, 4400 cctx->cdict, 4401 ¶ms, pledgedSrcSize, 4402 ZSTDb_buffered) , ""); 4403 assert(cctx->appliedParams.nbWorkers == 0); 4404 cctx->inToCompress = 0; 4405 cctx->inBuffPos = 0; 4406 if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) { 4407 /* for small input: avoid automatic flush on reaching end of block, since 4408 * it would require to add a 3-bytes null block to end frame 4409 */ 4410 cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); 4411 } else { 4412 cctx->inBuffTarget = 0; 4413 } 4414 cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; 4415 cctx->streamStage = zcss_load; 4416 cctx->frameEnded = 0; 4417 } 4418 return 0; 4419 } 4420 4421 size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, 4422 ZSTD_outBuffer* output, 4423 ZSTD_inBuffer* input, 4424 ZSTD_EndDirective endOp) 4425 { 4426 DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); 4427 /* check conditions */ 4428 RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer"); 4429 RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer"); 4430 RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective"); 4431 assert(cctx != NULL); 4432 4433 /* transparent initialization stage */ 4434 if (cctx->streamStage == zcss_init) { 4435 FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed"); 4436 ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ 4437 } 4438 /* end of transparent initialization stage */ 4439 4440 FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers"); 4441 /* compression stage */ 4442 #ifdef ZSTD_MULTITHREAD 4443 if (cctx->appliedParams.nbWorkers > 0) { 4444 size_t flushMin; 4445 if (cctx->cParamsChanged) { 4446 ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); 4447 cctx->cParamsChanged = 0; 4448 } 4449 for (;;) { 4450 size_t const ipos = input->pos; 4451 size_t const opos = output->pos; 4452 flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); 4453 if ( ZSTD_isError(flushMin) 4454 || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ 4455 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); 4456 } 4457 FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); 4458 4459 if (endOp == ZSTD_e_continue) { 4460 /* We only require some progress with ZSTD_e_continue, not maximal progress. 4461 * We're done if we've consumed or produced any bytes, or either buffer is 4462 * full. 4463 */ 4464 if (input->pos != ipos || output->pos != opos || input->pos == input->size || output->pos == output->size) 4465 break; 4466 } else { 4467 assert(endOp == ZSTD_e_flush || endOp == ZSTD_e_end); 4468 /* We require maximal progress. We're done when the flush is complete or the 4469 * output buffer is full. 4470 */ 4471 if (flushMin == 0 || output->pos == output->size) 4472 break; 4473 } 4474 } 4475 DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); 4476 /* Either we don't require maximum forward progress, we've finished the 4477 * flush, or we are out of output space. 4478 */ 4479 assert(endOp == ZSTD_e_continue || flushMin == 0 || output->pos == output->size); 4480 ZSTD_setBufferExpectations(cctx, output, input); 4481 return flushMin; 4482 } 4483 #endif 4484 FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); 4485 DEBUGLOG(5, "completed ZSTD_compressStream2"); 4486 ZSTD_setBufferExpectations(cctx, output, input); 4487 return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ 4488 } 4489 4490 size_t ZSTD_compressStream2_simpleArgs ( 4491 ZSTD_CCtx* cctx, 4492 void* dst, size_t dstCapacity, size_t* dstPos, 4493 const void* src, size_t srcSize, size_t* srcPos, 4494 ZSTD_EndDirective endOp) 4495 { 4496 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; 4497 ZSTD_inBuffer input = { src, srcSize, *srcPos }; 4498 /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ 4499 size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); 4500 *dstPos = output.pos; 4501 *srcPos = input.pos; 4502 return cErr; 4503 } 4504 4505 size_t ZSTD_compress2(ZSTD_CCtx* cctx, 4506 void* dst, size_t dstCapacity, 4507 const void* src, size_t srcSize) 4508 { 4509 ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode; 4510 ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode; 4511 DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); 4512 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); 4513 /* Enable stable input/output buffers. */ 4514 cctx->requestedParams.inBufferMode = ZSTD_bm_stable; 4515 cctx->requestedParams.outBufferMode = ZSTD_bm_stable; 4516 { size_t oPos = 0; 4517 size_t iPos = 0; 4518 size_t const result = ZSTD_compressStream2_simpleArgs(cctx, 4519 dst, dstCapacity, &oPos, 4520 src, srcSize, &iPos, 4521 ZSTD_e_end); 4522 /* Reset to the original values. */ 4523 cctx->requestedParams.inBufferMode = originalInBufferMode; 4524 cctx->requestedParams.outBufferMode = originalOutBufferMode; 4525 FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); 4526 if (result != 0) { /* compression not completed, due to lack of output space */ 4527 assert(oPos == dstCapacity); 4528 RETURN_ERROR(dstSize_tooSmall, ""); 4529 } 4530 assert(iPos == srcSize); /* all input is expected consumed */ 4531 return oPos; 4532 } 4533 } 4534 4535 typedef struct { 4536 U32 idx; /* Index in array of ZSTD_Sequence */ 4537 U32 posInSequence; /* Position within sequence at idx */ 4538 size_t posInSrc; /* Number of bytes given by sequences provided so far */ 4539 } ZSTD_sequencePosition; 4540 4541 /* Returns a ZSTD error code if sequence is not valid */ 4542 static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength, 4543 size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) { 4544 size_t offsetBound; 4545 U32 windowSize = 1 << windowLog; 4546 /* posInSrc represents the amount of data the the decoder would decode up to this point. 4547 * As long as the amount of data decoded is less than or equal to window size, offsets may be 4548 * larger than the total length of output decoded in order to reference the dict, even larger than 4549 * window size. After output surpasses windowSize, we're limited to windowSize offsets again. 4550 */ 4551 offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; 4552 RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!"); 4553 RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small"); 4554 return 0; 4555 } 4556 4557 /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ 4558 static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) { 4559 U32 offCode = rawOffset + ZSTD_REP_MOVE; 4560 U32 repCode = 0; 4561 4562 if (!ll0 && rawOffset == rep[0]) { 4563 repCode = 1; 4564 } else if (rawOffset == rep[1]) { 4565 repCode = 2 - ll0; 4566 } else if (rawOffset == rep[2]) { 4567 repCode = 3 - ll0; 4568 } else if (ll0 && rawOffset == rep[0] - 1) { 4569 repCode = 3; 4570 } 4571 if (repCode) { 4572 /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */ 4573 offCode = repCode - 1; 4574 } 4575 return offCode; 4576 } 4577 4578 /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of 4579 * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. 4580 */ 4581 static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, 4582 const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 4583 const void* src, size_t blockSize) { 4584 U32 idx = seqPos->idx; 4585 BYTE const* ip = (BYTE const*)(src); 4586 const BYTE* const iend = ip + blockSize; 4587 repcodes_t updatedRepcodes; 4588 U32 dictSize; 4589 U32 litLength; 4590 U32 matchLength; 4591 U32 ll0; 4592 U32 offCode; 4593 4594 if (cctx->cdict) { 4595 dictSize = (U32)cctx->cdict->dictContentSize; 4596 } else if (cctx->prefixDict.dict) { 4597 dictSize = (U32)cctx->prefixDict.dictSize; 4598 } else { 4599 dictSize = 0; 4600 } 4601 ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); 4602 for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { 4603 litLength = inSeqs[idx].litLength; 4604 matchLength = inSeqs[idx].matchLength; 4605 ll0 = litLength == 0; 4606 offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); 4607 updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); 4608 4609 DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); 4610 if (cctx->appliedParams.validateSequences) { 4611 seqPos->posInSrc += litLength + matchLength; 4612 FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, 4613 cctx->appliedParams.cParams.windowLog, dictSize, 4614 cctx->appliedParams.cParams.minMatch), 4615 "Sequence validation failed"); 4616 } 4617 RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, 4618 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); 4619 ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); 4620 ip += matchLength + litLength; 4621 } 4622 ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); 4623 4624 if (inSeqs[idx].litLength) { 4625 DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength); 4626 ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength); 4627 ip += inSeqs[idx].litLength; 4628 seqPos->posInSrc += inSeqs[idx].litLength; 4629 } 4630 RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!"); 4631 seqPos->idx = idx+1; 4632 return 0; 4633 } 4634 4635 /* Returns the number of bytes to move the current read position back by. Only non-zero 4636 * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something 4637 * went wrong. 4638 * 4639 * This function will attempt to scan through blockSize bytes represented by the sequences 4640 * in inSeqs, storing any (partial) sequences. 4641 * 4642 * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to 4643 * avoid splitting a match, or to avoid splitting a match such that it would produce a match 4644 * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. 4645 */ 4646 static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, 4647 const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 4648 const void* src, size_t blockSize) { 4649 U32 idx = seqPos->idx; 4650 U32 startPosInSequence = seqPos->posInSequence; 4651 U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; 4652 size_t dictSize; 4653 BYTE const* ip = (BYTE const*)(src); 4654 BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ 4655 repcodes_t updatedRepcodes; 4656 U32 bytesAdjustment = 0; 4657 U32 finalMatchSplit = 0; 4658 U32 litLength; 4659 U32 matchLength; 4660 U32 rawOffset; 4661 U32 offCode; 4662 4663 if (cctx->cdict) { 4664 dictSize = cctx->cdict->dictContentSize; 4665 } else if (cctx->prefixDict.dict) { 4666 dictSize = cctx->prefixDict.dictSize; 4667 } else { 4668 dictSize = 0; 4669 } 4670 DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); 4671 DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); 4672 ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); 4673 while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { 4674 const ZSTD_Sequence currSeq = inSeqs[idx]; 4675 litLength = currSeq.litLength; 4676 matchLength = currSeq.matchLength; 4677 rawOffset = currSeq.offset; 4678 4679 /* Modify the sequence depending on where endPosInSequence lies */ 4680 if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { 4681 if (startPosInSequence >= litLength) { 4682 startPosInSequence -= litLength; 4683 litLength = 0; 4684 matchLength -= startPosInSequence; 4685 } else { 4686 litLength -= startPosInSequence; 4687 } 4688 /* Move to the next sequence */ 4689 endPosInSequence -= currSeq.litLength + currSeq.matchLength; 4690 startPosInSequence = 0; 4691 idx++; 4692 } else { 4693 /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence 4694 does not reach the end of the match. So, we have to split the sequence */ 4695 DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", 4696 currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence); 4697 if (endPosInSequence > litLength) { 4698 U32 firstHalfMatchLength; 4699 litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence; 4700 firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength; 4701 if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) { 4702 /* Only ever split the match if it is larger than the block size */ 4703 U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence; 4704 if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) { 4705 /* Move the endPosInSequence backward so that it creates match of minMatch length */ 4706 endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; 4707 bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; 4708 firstHalfMatchLength -= bytesAdjustment; 4709 } 4710 matchLength = firstHalfMatchLength; 4711 /* Flag that we split the last match - after storing the sequence, exit the loop, 4712 but keep the value of endPosInSequence */ 4713 finalMatchSplit = 1; 4714 } else { 4715 /* Move the position in sequence backwards so that we don't split match, and break to store 4716 * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence 4717 * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so 4718 * would cause the first half of the match to be too small 4719 */ 4720 bytesAdjustment = endPosInSequence - currSeq.litLength; 4721 endPosInSequence = currSeq.litLength; 4722 break; 4723 } 4724 } else { 4725 /* This sequence ends inside the literals, break to store the last literals */ 4726 break; 4727 } 4728 } 4729 /* Check if this offset can be represented with a repcode */ 4730 { U32 ll0 = (litLength == 0); 4731 offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); 4732 updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); 4733 } 4734 4735 if (cctx->appliedParams.validateSequences) { 4736 seqPos->posInSrc += litLength + matchLength; 4737 FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, 4738 cctx->appliedParams.cParams.windowLog, dictSize, 4739 cctx->appliedParams.cParams.minMatch), 4740 "Sequence validation failed"); 4741 } 4742 DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); 4743 RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, 4744 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); 4745 ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); 4746 ip += matchLength + litLength; 4747 } 4748 DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); 4749 assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); 4750 seqPos->idx = idx; 4751 seqPos->posInSequence = endPosInSequence; 4752 ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); 4753 4754 iend -= bytesAdjustment; 4755 if (ip != iend) { 4756 /* Store any last literals */ 4757 U32 lastLLSize = (U32)(iend - ip); 4758 assert(ip <= iend); 4759 DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize); 4760 ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize); 4761 seqPos->posInSrc += lastLLSize; 4762 } 4763 4764 return bytesAdjustment; 4765 } 4766 4767 typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, 4768 const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 4769 const void* src, size_t blockSize); 4770 static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) { 4771 ZSTD_sequenceCopier sequenceCopier = NULL; 4772 assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); 4773 if (mode == ZSTD_sf_explicitBlockDelimiters) { 4774 return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; 4775 } else if (mode == ZSTD_sf_noBlockDelimiters) { 4776 return ZSTD_copySequencesToSeqStoreNoBlockDelim; 4777 } 4778 assert(sequenceCopier != NULL); 4779 return sequenceCopier; 4780 } 4781 4782 /* Compress, block-by-block, all of the sequences given. 4783 * 4784 * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error. 4785 */ 4786 static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, 4787 void* dst, size_t dstCapacity, 4788 const ZSTD_Sequence* inSeqs, size_t inSeqsSize, 4789 const void* src, size_t srcSize) { 4790 size_t cSize = 0; 4791 U32 lastBlock; 4792 size_t blockSize; 4793 size_t compressedSeqsSize; 4794 size_t remaining = srcSize; 4795 ZSTD_sequencePosition seqPos = {0, 0, 0}; 4796 4797 BYTE const* ip = (BYTE const*)src; 4798 BYTE* op = (BYTE*)dst; 4799 ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); 4800 4801 DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); 4802 /* Special case: empty frame */ 4803 if (remaining == 0) { 4804 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1); 4805 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header"); 4806 MEM_writeLE32(op, cBlockHeader24); 4807 op += ZSTD_blockHeaderSize; 4808 dstCapacity -= ZSTD_blockHeaderSize; 4809 cSize += ZSTD_blockHeaderSize; 4810 } 4811 4812 while (remaining) { 4813 size_t cBlockSize; 4814 size_t additionalByteAdjustment; 4815 lastBlock = remaining <= cctx->blockSize; 4816 blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; 4817 ZSTD_resetSeqStore(&cctx->seqStore); 4818 DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize); 4819 4820 additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize); 4821 FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy"); 4822 blockSize -= additionalByteAdjustment; 4823 4824 /* If blocks are too small, emit as a nocompress block */ 4825 if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { 4826 cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); 4827 FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); 4828 DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); 4829 cSize += cBlockSize; 4830 ip += blockSize; 4831 op += cBlockSize; 4832 remaining -= blockSize; 4833 dstCapacity -= cBlockSize; 4834 continue; 4835 } 4836 4837 compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore, 4838 &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, 4839 &cctx->appliedParams, 4840 op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, 4841 blockSize, 4842 cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, 4843 cctx->bmi2); 4844 FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); 4845 DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize); 4846 4847 if (!cctx->isFirstBlock && 4848 ZSTD_maybeRLE(&cctx->seqStore) && 4849 ZSTD_isRLE((BYTE const*)src, srcSize)) { 4850 /* We don't want to emit our first block as a RLE even if it qualifies because 4851 * doing so will cause the decoder (cli only) to throw a "should consume all input error." 4852 * This is only an issue for zstd <= v1.4.3 4853 */ 4854 compressedSeqsSize = 1; 4855 } 4856 4857 if (compressedSeqsSize == 0) { 4858 /* ZSTD_noCompressBlock writes the block header as well */ 4859 cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); 4860 FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); 4861 DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize); 4862 } else if (compressedSeqsSize == 1) { 4863 cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock); 4864 FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed"); 4865 DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize); 4866 } else { 4867 U32 cBlockHeader; 4868 /* Error checking and repcodes update */ 4869 ZSTD_confirmRepcodesAndEntropyTables(cctx); 4870 if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 4871 cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 4872 4873 /* Write block header into beginning of block*/ 4874 cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); 4875 MEM_writeLE24(op, cBlockHeader); 4876 cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; 4877 DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize); 4878 } 4879 4880 cSize += cBlockSize; 4881 DEBUGLOG(4, "cSize running total: %zu", cSize); 4882 4883 if (lastBlock) { 4884 break; 4885 } else { 4886 ip += blockSize; 4887 op += cBlockSize; 4888 remaining -= blockSize; 4889 dstCapacity -= cBlockSize; 4890 cctx->isFirstBlock = 0; 4891 } 4892 } 4893 4894 return cSize; 4895 } 4896 4897 size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, 4898 const ZSTD_Sequence* inSeqs, size_t inSeqsSize, 4899 const void* src, size_t srcSize) { 4900 BYTE* op = (BYTE*)dst; 4901 size_t cSize = 0; 4902 size_t compressedBlocksSize = 0; 4903 size_t frameHeaderSize = 0; 4904 4905 /* Transparent initialization stage, same as compressStream2() */ 4906 DEBUGLOG(3, "ZSTD_compressSequences()"); 4907 assert(cctx != NULL); 4908 FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); 4909 /* Begin writing output, starting with frame header */ 4910 frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); 4911 op += frameHeaderSize; 4912 dstCapacity -= frameHeaderSize; 4913 cSize += frameHeaderSize; 4914 if (cctx->appliedParams.fParams.checksumFlag && srcSize) { 4915 XXH64_update(&cctx->xxhState, src, srcSize); 4916 } 4917 /* cSize includes block header size and compressed sequences size */ 4918 compressedBlocksSize = ZSTD_compressSequences_internal(cctx, 4919 op, dstCapacity, 4920 inSeqs, inSeqsSize, 4921 src, srcSize); 4922 FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); 4923 cSize += compressedBlocksSize; 4924 dstCapacity -= compressedBlocksSize; 4925 4926 if (cctx->appliedParams.fParams.checksumFlag) { 4927 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); 4928 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); 4929 DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum); 4930 MEM_writeLE32((char*)dst + cSize, checksum); 4931 cSize += 4; 4932 } 4933 4934 DEBUGLOG(3, "Final compressed size: %zu", cSize); 4935 return cSize; 4936 } 4937 4938 /*====== Finalize ======*/ 4939 4940 /*! ZSTD_flushStream() : 4941 * @return : amount of data remaining to flush */ 4942 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 4943 { 4944 ZSTD_inBuffer input = { NULL, 0, 0 }; 4945 return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); 4946 } 4947 4948 4949 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 4950 { 4951 ZSTD_inBuffer input = { NULL, 0, 0 }; 4952 size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); 4953 FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); 4954 if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ 4955 /* single thread mode : attempt to calculate remaining to flush more precisely */ 4956 { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; 4957 size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); 4958 size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; 4959 DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); 4960 return toFlush; 4961 } 4962 } 4963 4964 4965 /*-===== Pre-defined compression levels =====-*/ 4966 4967 #define ZSTD_MAX_CLEVEL 22 4968 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } 4969 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } 4970 4971 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { 4972 { /* "default" - for any srcSize > 256 KB */ 4973 /* W, C, H, S, L, TL, strat */ 4974 { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ 4975 { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ 4976 { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ 4977 { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ 4978 { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ 4979 { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ 4980 { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ 4981 { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ 4982 { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ 4983 { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ 4984 { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ 4985 { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ 4986 { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ 4987 { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ 4988 { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ 4989 { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ 4990 { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ 4991 { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ 4992 { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ 4993 { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ 4994 { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ 4995 { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ 4996 { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ 4997 }, 4998 { /* for srcSize <= 256 KB */ 4999 /* W, C, H, S, L, T, strat */ 5000 { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ 5001 { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ 5002 { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ 5003 { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ 5004 { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ 5005 { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ 5006 { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ 5007 { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ 5008 { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ 5009 { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ 5010 { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ 5011 { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ 5012 { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ 5013 { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ 5014 { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ 5015 { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ 5016 { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ 5017 { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ 5018 { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ 5019 { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ 5020 { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ 5021 { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ 5022 { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ 5023 }, 5024 { /* for srcSize <= 128 KB */ 5025 /* W, C, H, S, L, T, strat */ 5026 { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ 5027 { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ 5028 { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ 5029 { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ 5030 { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ 5031 { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ 5032 { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ 5033 { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ 5034 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ 5035 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ 5036 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ 5037 { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ 5038 { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ 5039 { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ 5040 { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ 5041 { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ 5042 { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ 5043 { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ 5044 { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ 5045 { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ 5046 { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ 5047 { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ 5048 { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ 5049 }, 5050 { /* for srcSize <= 16 KB */ 5051 /* W, C, H, S, L, T, strat */ 5052 { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ 5053 { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ 5054 { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ 5055 { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ 5056 { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ 5057 { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ 5058 { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ 5059 { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ 5060 { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ 5061 { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ 5062 { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ 5063 { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ 5064 { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ 5065 { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ 5066 { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ 5067 { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ 5068 { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ 5069 { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ 5070 { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ 5071 { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ 5072 { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ 5073 { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ 5074 { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ 5075 }, 5076 }; 5077 5078 static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) 5079 { 5080 ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict); 5081 switch (cParams.strategy) { 5082 case ZSTD_fast: 5083 case ZSTD_dfast: 5084 break; 5085 case ZSTD_greedy: 5086 case ZSTD_lazy: 5087 case ZSTD_lazy2: 5088 cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG; 5089 break; 5090 case ZSTD_btlazy2: 5091 case ZSTD_btopt: 5092 case ZSTD_btultra: 5093 case ZSTD_btultra2: 5094 break; 5095 } 5096 return cParams; 5097 } 5098 5099 static int ZSTD_dedicatedDictSearch_isSupported( 5100 ZSTD_compressionParameters const* cParams) 5101 { 5102 return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2); 5103 } 5104 5105 /** 5106 * Reverses the adjustment applied to cparams when enabling dedicated dict 5107 * search. This is used to recover the params set to be used in the working 5108 * context. (Otherwise, those tables would also grow.) 5109 */ 5110 static void ZSTD_dedicatedDictSearch_revertCParams( 5111 ZSTD_compressionParameters* cParams) { 5112 switch (cParams->strategy) { 5113 case ZSTD_fast: 5114 case ZSTD_dfast: 5115 break; 5116 case ZSTD_greedy: 5117 case ZSTD_lazy: 5118 case ZSTD_lazy2: 5119 cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; 5120 break; 5121 case ZSTD_btlazy2: 5122 case ZSTD_btopt: 5123 case ZSTD_btultra: 5124 case ZSTD_btultra2: 5125 break; 5126 } 5127 } 5128 5129 static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) 5130 { 5131 switch (mode) { 5132 case ZSTD_cpm_unknown: 5133 case ZSTD_cpm_noAttachDict: 5134 case ZSTD_cpm_createCDict: 5135 break; 5136 case ZSTD_cpm_attachDict: 5137 dictSize = 0; 5138 break; 5139 default: 5140 assert(0); 5141 break; 5142 } 5143 { int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; 5144 size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; 5145 return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; 5146 } 5147 } 5148 5149 /*! ZSTD_getCParams_internal() : 5150 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. 5151 * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. 5152 * Use dictSize == 0 for unknown or unused. 5153 * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */ 5154 static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) 5155 { 5156 U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); 5157 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); 5158 int row; 5159 DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); 5160 5161 /* row */ 5162 if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ 5163 else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ 5164 else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; 5165 else row = compressionLevel; 5166 5167 { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; 5168 /* acceleration factor */ 5169 if (compressionLevel < 0) { 5170 int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); 5171 cp.targetLength = (unsigned)(-clampedCompressionLevel); 5172 } 5173 /* refine parameters based on srcSize & dictSize */ 5174 return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode); 5175 } 5176 } 5177 5178 /*! ZSTD_getCParams() : 5179 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. 5180 * Size values are optional, provide 0 if not known or unused */ 5181 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) 5182 { 5183 if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; 5184 return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); 5185 } 5186 5187 /*! ZSTD_getParams() : 5188 * same idea as ZSTD_getCParams() 5189 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). 5190 * Fields of `ZSTD_frameParameters` are set to default values */ 5191 static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { 5192 ZSTD_parameters params; 5193 ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); 5194 DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); 5195 ZSTD_memset(¶ms, 0, sizeof(params)); 5196 params.cParams = cParams; 5197 params.fParams.contentSizeFlag = 1; 5198 return params; 5199 } 5200 5201 /*! ZSTD_getParams() : 5202 * same idea as ZSTD_getCParams() 5203 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). 5204 * Fields of `ZSTD_frameParameters` are set to default values */ 5205 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { 5206 if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; 5207 return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); 5208 } 5209