1 /* 2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 /*-************************************* 12 * Dependencies 13 ***************************************/ 14 #include <limits.h> /* INT_MAX */ 15 #include <string.h> /* memset */ 16 #include "cpu.h" 17 #include "mem.h" 18 #include "hist.h" /* HIST_countFast_wksp */ 19 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ 20 #include "fse.h" 21 #define HUF_STATIC_LINKING_ONLY 22 #include "huf.h" 23 #include "zstd_compress_internal.h" 24 #include "zstd_compress_sequences.h" 25 #include "zstd_compress_literals.h" 26 #include "zstd_fast.h" 27 #include "zstd_double_fast.h" 28 #include "zstd_lazy.h" 29 #include "zstd_opt.h" 30 #include "zstd_ldm.h" 31 32 33 /*-************************************* 34 * Helper functions 35 ***************************************/ 36 size_t ZSTD_compressBound(size_t srcSize) { 37 return ZSTD_COMPRESSBOUND(srcSize); 38 } 39 40 41 /*-************************************* 42 * Context memory management 43 ***************************************/ 44 struct ZSTD_CDict_s { 45 const void* dictContent; 46 size_t dictContentSize; 47 U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ 48 ZSTD_cwksp workspace; 49 ZSTD_matchState_t matchState; 50 ZSTD_compressedBlockState_t cBlockState; 51 ZSTD_customMem customMem; 52 U32 dictID; 53 int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ 54 }; /* typedef'd to ZSTD_CDict within "zstd.h" */ 55 56 ZSTD_CCtx* ZSTD_createCCtx(void) 57 { 58 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); 59 } 60 61 static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) 62 { 63 assert(cctx != NULL); 64 memset(cctx, 0, sizeof(*cctx)); 65 cctx->customMem = memManager; 66 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); 67 { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); 68 assert(!ZSTD_isError(err)); 69 (void)err; 70 } 71 } 72 73 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) 74 { 75 ZSTD_STATIC_ASSERT(zcss_init==0); 76 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); 77 if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 78 { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); 79 if (!cctx) return NULL; 80 ZSTD_initCCtx(cctx, customMem); 81 return cctx; 82 } 83 } 84 85 ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) 86 { 87 ZSTD_cwksp ws; 88 ZSTD_CCtx* cctx; 89 if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ 90 if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ 91 ZSTD_cwksp_init(&ws, workspace, workspaceSize); 92 93 cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); 94 if (cctx == NULL) { 95 return NULL; 96 } 97 memset(cctx, 0, sizeof(ZSTD_CCtx)); 98 ZSTD_cwksp_move(&cctx->workspace, &ws); 99 cctx->staticSize = workspaceSize; 100 101 /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ 102 if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; 103 cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); 104 cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); 105 cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object( 106 &cctx->workspace, HUF_WORKSPACE_SIZE); 107 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); 108 return cctx; 109 } 110 111 /** 112 * Clears and frees all of the dictionaries in the CCtx. 113 */ 114 static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) 115 { 116 ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem); 117 ZSTD_freeCDict(cctx->localDict.cdict); 118 memset(&cctx->localDict, 0, sizeof(cctx->localDict)); 119 memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); 120 cctx->cdict = NULL; 121 } 122 123 static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) 124 { 125 size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; 126 size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); 127 return bufferSize + cdictSize; 128 } 129 130 static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) 131 { 132 assert(cctx != NULL); 133 assert(cctx->staticSize == 0); 134 ZSTD_clearAllDicts(cctx); 135 #ifdef ZSTD_MULTITHREAD 136 ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; 137 #endif 138 ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); 139 } 140 141 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) 142 { 143 if (cctx==NULL) return 0; /* support free on NULL */ 144 RETURN_ERROR_IF(cctx->staticSize, memory_allocation, 145 "not compatible with static CCtx"); 146 { 147 int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); 148 ZSTD_freeCCtxContent(cctx); 149 if (!cctxInWorkspace) { 150 ZSTD_free(cctx, cctx->customMem); 151 } 152 } 153 return 0; 154 } 155 156 157 static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) 158 { 159 #ifdef ZSTD_MULTITHREAD 160 return ZSTDMT_sizeof_CCtx(cctx->mtctx); 161 #else 162 (void)cctx; 163 return 0; 164 #endif 165 } 166 167 168 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) 169 { 170 if (cctx==NULL) return 0; /* support sizeof on NULL */ 171 /* cctx may be in the workspace */ 172 return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) 173 + ZSTD_cwksp_sizeof(&cctx->workspace) 174 + ZSTD_sizeof_localDict(cctx->localDict) 175 + ZSTD_sizeof_mtctx(cctx); 176 } 177 178 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) 179 { 180 return ZSTD_sizeof_CCtx(zcs); /* same object */ 181 } 182 183 /* private API call, for dictBuilder only */ 184 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } 185 186 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( 187 ZSTD_compressionParameters cParams) 188 { 189 ZSTD_CCtx_params cctxParams; 190 memset(&cctxParams, 0, sizeof(cctxParams)); 191 cctxParams.cParams = cParams; 192 cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ 193 assert(!ZSTD_checkCParams(cParams)); 194 cctxParams.fParams.contentSizeFlag = 1; 195 return cctxParams; 196 } 197 198 static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( 199 ZSTD_customMem customMem) 200 { 201 ZSTD_CCtx_params* params; 202 if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 203 params = (ZSTD_CCtx_params*)ZSTD_calloc( 204 sizeof(ZSTD_CCtx_params), customMem); 205 if (!params) { return NULL; } 206 params->customMem = customMem; 207 params->compressionLevel = ZSTD_CLEVEL_DEFAULT; 208 params->fParams.contentSizeFlag = 1; 209 return params; 210 } 211 212 ZSTD_CCtx_params* ZSTD_createCCtxParams(void) 213 { 214 return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); 215 } 216 217 size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) 218 { 219 if (params == NULL) { return 0; } 220 ZSTD_free(params, params->customMem); 221 return 0; 222 } 223 224 size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) 225 { 226 return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); 227 } 228 229 size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { 230 RETURN_ERROR_IF(!cctxParams, GENERIC); 231 memset(cctxParams, 0, sizeof(*cctxParams)); 232 cctxParams->compressionLevel = compressionLevel; 233 cctxParams->fParams.contentSizeFlag = 1; 234 return 0; 235 } 236 237 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) 238 { 239 RETURN_ERROR_IF(!cctxParams, GENERIC); 240 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); 241 memset(cctxParams, 0, sizeof(*cctxParams)); 242 assert(!ZSTD_checkCParams(params.cParams)); 243 cctxParams->cParams = params.cParams; 244 cctxParams->fParams = params.fParams; 245 cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ 246 return 0; 247 } 248 249 /* ZSTD_assignParamsToCCtxParams() : 250 * params is presumed valid at this stage */ 251 static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( 252 const ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) 253 { 254 ZSTD_CCtx_params ret = *cctxParams; 255 assert(!ZSTD_checkCParams(params.cParams)); 256 ret.cParams = params.cParams; 257 ret.fParams = params.fParams; 258 ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ 259 return ret; 260 } 261 262 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) 263 { 264 ZSTD_bounds bounds = { 0, 0, 0 }; 265 266 switch(param) 267 { 268 case ZSTD_c_compressionLevel: 269 bounds.lowerBound = ZSTD_minCLevel(); 270 bounds.upperBound = ZSTD_maxCLevel(); 271 return bounds; 272 273 case ZSTD_c_windowLog: 274 bounds.lowerBound = ZSTD_WINDOWLOG_MIN; 275 bounds.upperBound = ZSTD_WINDOWLOG_MAX; 276 return bounds; 277 278 case ZSTD_c_hashLog: 279 bounds.lowerBound = ZSTD_HASHLOG_MIN; 280 bounds.upperBound = ZSTD_HASHLOG_MAX; 281 return bounds; 282 283 case ZSTD_c_chainLog: 284 bounds.lowerBound = ZSTD_CHAINLOG_MIN; 285 bounds.upperBound = ZSTD_CHAINLOG_MAX; 286 return bounds; 287 288 case ZSTD_c_searchLog: 289 bounds.lowerBound = ZSTD_SEARCHLOG_MIN; 290 bounds.upperBound = ZSTD_SEARCHLOG_MAX; 291 return bounds; 292 293 case ZSTD_c_minMatch: 294 bounds.lowerBound = ZSTD_MINMATCH_MIN; 295 bounds.upperBound = ZSTD_MINMATCH_MAX; 296 return bounds; 297 298 case ZSTD_c_targetLength: 299 bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; 300 bounds.upperBound = ZSTD_TARGETLENGTH_MAX; 301 return bounds; 302 303 case ZSTD_c_strategy: 304 bounds.lowerBound = ZSTD_STRATEGY_MIN; 305 bounds.upperBound = ZSTD_STRATEGY_MAX; 306 return bounds; 307 308 case ZSTD_c_contentSizeFlag: 309 bounds.lowerBound = 0; 310 bounds.upperBound = 1; 311 return bounds; 312 313 case ZSTD_c_checksumFlag: 314 bounds.lowerBound = 0; 315 bounds.upperBound = 1; 316 return bounds; 317 318 case ZSTD_c_dictIDFlag: 319 bounds.lowerBound = 0; 320 bounds.upperBound = 1; 321 return bounds; 322 323 case ZSTD_c_nbWorkers: 324 bounds.lowerBound = 0; 325 #ifdef ZSTD_MULTITHREAD 326 bounds.upperBound = ZSTDMT_NBWORKERS_MAX; 327 #else 328 bounds.upperBound = 0; 329 #endif 330 return bounds; 331 332 case ZSTD_c_jobSize: 333 bounds.lowerBound = 0; 334 #ifdef ZSTD_MULTITHREAD 335 bounds.upperBound = ZSTDMT_JOBSIZE_MAX; 336 #else 337 bounds.upperBound = 0; 338 #endif 339 return bounds; 340 341 case ZSTD_c_overlapLog: 342 bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; 343 bounds.upperBound = ZSTD_OVERLAPLOG_MAX; 344 return bounds; 345 346 case ZSTD_c_enableLongDistanceMatching: 347 bounds.lowerBound = 0; 348 bounds.upperBound = 1; 349 return bounds; 350 351 case ZSTD_c_ldmHashLog: 352 bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; 353 bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; 354 return bounds; 355 356 case ZSTD_c_ldmMinMatch: 357 bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; 358 bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; 359 return bounds; 360 361 case ZSTD_c_ldmBucketSizeLog: 362 bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; 363 bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; 364 return bounds; 365 366 case ZSTD_c_ldmHashRateLog: 367 bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; 368 bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; 369 return bounds; 370 371 /* experimental parameters */ 372 case ZSTD_c_rsyncable: 373 bounds.lowerBound = 0; 374 bounds.upperBound = 1; 375 return bounds; 376 377 case ZSTD_c_forceMaxWindow : 378 bounds.lowerBound = 0; 379 bounds.upperBound = 1; 380 return bounds; 381 382 case ZSTD_c_format: 383 ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); 384 bounds.lowerBound = ZSTD_f_zstd1; 385 bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ 386 return bounds; 387 388 case ZSTD_c_forceAttachDict: 389 ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); 390 bounds.lowerBound = ZSTD_dictDefaultAttach; 391 bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ 392 return bounds; 393 394 case ZSTD_c_literalCompressionMode: 395 ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); 396 bounds.lowerBound = ZSTD_lcm_auto; 397 bounds.upperBound = ZSTD_lcm_uncompressed; 398 return bounds; 399 400 case ZSTD_c_targetCBlockSize: 401 bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; 402 bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; 403 return bounds; 404 405 case ZSTD_c_srcSizeHint: 406 bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; 407 bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; 408 return bounds; 409 410 default: 411 { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; 412 return boundError; 413 } 414 } 415 } 416 417 /* ZSTD_cParam_clampBounds: 418 * Clamps the value into the bounded range. 419 */ 420 static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) 421 { 422 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); 423 if (ZSTD_isError(bounds.error)) return bounds.error; 424 if (*value < bounds.lowerBound) *value = bounds.lowerBound; 425 if (*value > bounds.upperBound) *value = bounds.upperBound; 426 return 0; 427 } 428 429 #define BOUNDCHECK(cParam, val) { \ 430 RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ 431 parameter_outOfBound); \ 432 } 433 434 435 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) 436 { 437 switch(param) 438 { 439 case ZSTD_c_compressionLevel: 440 case ZSTD_c_hashLog: 441 case ZSTD_c_chainLog: 442 case ZSTD_c_searchLog: 443 case ZSTD_c_minMatch: 444 case ZSTD_c_targetLength: 445 case ZSTD_c_strategy: 446 return 1; 447 448 case ZSTD_c_format: 449 case ZSTD_c_windowLog: 450 case ZSTD_c_contentSizeFlag: 451 case ZSTD_c_checksumFlag: 452 case ZSTD_c_dictIDFlag: 453 case ZSTD_c_forceMaxWindow : 454 case ZSTD_c_nbWorkers: 455 case ZSTD_c_jobSize: 456 case ZSTD_c_overlapLog: 457 case ZSTD_c_rsyncable: 458 case ZSTD_c_enableLongDistanceMatching: 459 case ZSTD_c_ldmHashLog: 460 case ZSTD_c_ldmMinMatch: 461 case ZSTD_c_ldmBucketSizeLog: 462 case ZSTD_c_ldmHashRateLog: 463 case ZSTD_c_forceAttachDict: 464 case ZSTD_c_literalCompressionMode: 465 case ZSTD_c_targetCBlockSize: 466 case ZSTD_c_srcSizeHint: 467 default: 468 return 0; 469 } 470 } 471 472 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) 473 { 474 DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); 475 if (cctx->streamStage != zcss_init) { 476 if (ZSTD_isUpdateAuthorized(param)) { 477 cctx->cParamsChanged = 1; 478 } else { 479 RETURN_ERROR(stage_wrong); 480 } } 481 482 switch(param) 483 { 484 case ZSTD_c_nbWorkers: 485 RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, 486 "MT not compatible with static alloc"); 487 break; 488 489 case ZSTD_c_compressionLevel: 490 case ZSTD_c_windowLog: 491 case ZSTD_c_hashLog: 492 case ZSTD_c_chainLog: 493 case ZSTD_c_searchLog: 494 case ZSTD_c_minMatch: 495 case ZSTD_c_targetLength: 496 case ZSTD_c_strategy: 497 case ZSTD_c_ldmHashRateLog: 498 case ZSTD_c_format: 499 case ZSTD_c_contentSizeFlag: 500 case ZSTD_c_checksumFlag: 501 case ZSTD_c_dictIDFlag: 502 case ZSTD_c_forceMaxWindow: 503 case ZSTD_c_forceAttachDict: 504 case ZSTD_c_literalCompressionMode: 505 case ZSTD_c_jobSize: 506 case ZSTD_c_overlapLog: 507 case ZSTD_c_rsyncable: 508 case ZSTD_c_enableLongDistanceMatching: 509 case ZSTD_c_ldmHashLog: 510 case ZSTD_c_ldmMinMatch: 511 case ZSTD_c_ldmBucketSizeLog: 512 case ZSTD_c_targetCBlockSize: 513 case ZSTD_c_srcSizeHint: 514 break; 515 516 default: RETURN_ERROR(parameter_unsupported); 517 } 518 return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); 519 } 520 521 size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, 522 ZSTD_cParameter param, int value) 523 { 524 DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); 525 switch(param) 526 { 527 case ZSTD_c_format : 528 BOUNDCHECK(ZSTD_c_format, value); 529 CCtxParams->format = (ZSTD_format_e)value; 530 return (size_t)CCtxParams->format; 531 532 case ZSTD_c_compressionLevel : { 533 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); 534 if (value) { /* 0 : does not change current level */ 535 CCtxParams->compressionLevel = value; 536 } 537 if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; 538 return 0; /* return type (size_t) cannot represent negative values */ 539 } 540 541 case ZSTD_c_windowLog : 542 if (value!=0) /* 0 => use default */ 543 BOUNDCHECK(ZSTD_c_windowLog, value); 544 CCtxParams->cParams.windowLog = (U32)value; 545 return CCtxParams->cParams.windowLog; 546 547 case ZSTD_c_hashLog : 548 if (value!=0) /* 0 => use default */ 549 BOUNDCHECK(ZSTD_c_hashLog, value); 550 CCtxParams->cParams.hashLog = (U32)value; 551 return CCtxParams->cParams.hashLog; 552 553 case ZSTD_c_chainLog : 554 if (value!=0) /* 0 => use default */ 555 BOUNDCHECK(ZSTD_c_chainLog, value); 556 CCtxParams->cParams.chainLog = (U32)value; 557 return CCtxParams->cParams.chainLog; 558 559 case ZSTD_c_searchLog : 560 if (value!=0) /* 0 => use default */ 561 BOUNDCHECK(ZSTD_c_searchLog, value); 562 CCtxParams->cParams.searchLog = (U32)value; 563 return (size_t)value; 564 565 case ZSTD_c_minMatch : 566 if (value!=0) /* 0 => use default */ 567 BOUNDCHECK(ZSTD_c_minMatch, value); 568 CCtxParams->cParams.minMatch = value; 569 return CCtxParams->cParams.minMatch; 570 571 case ZSTD_c_targetLength : 572 BOUNDCHECK(ZSTD_c_targetLength, value); 573 CCtxParams->cParams.targetLength = value; 574 return CCtxParams->cParams.targetLength; 575 576 case ZSTD_c_strategy : 577 if (value!=0) /* 0 => use default */ 578 BOUNDCHECK(ZSTD_c_strategy, value); 579 CCtxParams->cParams.strategy = (ZSTD_strategy)value; 580 return (size_t)CCtxParams->cParams.strategy; 581 582 case ZSTD_c_contentSizeFlag : 583 /* Content size written in frame header _when known_ (default:1) */ 584 DEBUGLOG(4, "set content size flag = %u", (value!=0)); 585 CCtxParams->fParams.contentSizeFlag = value != 0; 586 return CCtxParams->fParams.contentSizeFlag; 587 588 case ZSTD_c_checksumFlag : 589 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ 590 CCtxParams->fParams.checksumFlag = value != 0; 591 return CCtxParams->fParams.checksumFlag; 592 593 case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ 594 DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); 595 CCtxParams->fParams.noDictIDFlag = !value; 596 return !CCtxParams->fParams.noDictIDFlag; 597 598 case ZSTD_c_forceMaxWindow : 599 CCtxParams->forceWindow = (value != 0); 600 return CCtxParams->forceWindow; 601 602 case ZSTD_c_forceAttachDict : { 603 const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; 604 BOUNDCHECK(ZSTD_c_forceAttachDict, pref); 605 CCtxParams->attachDictPref = pref; 606 return CCtxParams->attachDictPref; 607 } 608 609 case ZSTD_c_literalCompressionMode : { 610 const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; 611 BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); 612 CCtxParams->literalCompressionMode = lcm; 613 return CCtxParams->literalCompressionMode; 614 } 615 616 case ZSTD_c_nbWorkers : 617 #ifndef ZSTD_MULTITHREAD 618 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 619 return 0; 620 #else 621 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); 622 CCtxParams->nbWorkers = value; 623 return CCtxParams->nbWorkers; 624 #endif 625 626 case ZSTD_c_jobSize : 627 #ifndef ZSTD_MULTITHREAD 628 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 629 return 0; 630 #else 631 /* Adjust to the minimum non-default value. */ 632 if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) 633 value = ZSTDMT_JOBSIZE_MIN; 634 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); 635 assert(value >= 0); 636 CCtxParams->jobSize = value; 637 return CCtxParams->jobSize; 638 #endif 639 640 case ZSTD_c_overlapLog : 641 #ifndef ZSTD_MULTITHREAD 642 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 643 return 0; 644 #else 645 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); 646 CCtxParams->overlapLog = value; 647 return CCtxParams->overlapLog; 648 #endif 649 650 case ZSTD_c_rsyncable : 651 #ifndef ZSTD_MULTITHREAD 652 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 653 return 0; 654 #else 655 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); 656 CCtxParams->rsyncable = value; 657 return CCtxParams->rsyncable; 658 #endif 659 660 case ZSTD_c_enableLongDistanceMatching : 661 CCtxParams->ldmParams.enableLdm = (value!=0); 662 return CCtxParams->ldmParams.enableLdm; 663 664 case ZSTD_c_ldmHashLog : 665 if (value!=0) /* 0 ==> auto */ 666 BOUNDCHECK(ZSTD_c_ldmHashLog, value); 667 CCtxParams->ldmParams.hashLog = value; 668 return CCtxParams->ldmParams.hashLog; 669 670 case ZSTD_c_ldmMinMatch : 671 if (value!=0) /* 0 ==> default */ 672 BOUNDCHECK(ZSTD_c_ldmMinMatch, value); 673 CCtxParams->ldmParams.minMatchLength = value; 674 return CCtxParams->ldmParams.minMatchLength; 675 676 case ZSTD_c_ldmBucketSizeLog : 677 if (value!=0) /* 0 ==> default */ 678 BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); 679 CCtxParams->ldmParams.bucketSizeLog = value; 680 return CCtxParams->ldmParams.bucketSizeLog; 681 682 case ZSTD_c_ldmHashRateLog : 683 RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, 684 parameter_outOfBound); 685 CCtxParams->ldmParams.hashRateLog = value; 686 return CCtxParams->ldmParams.hashRateLog; 687 688 case ZSTD_c_targetCBlockSize : 689 if (value!=0) /* 0 ==> default */ 690 BOUNDCHECK(ZSTD_c_targetCBlockSize, value); 691 CCtxParams->targetCBlockSize = value; 692 return CCtxParams->targetCBlockSize; 693 694 case ZSTD_c_srcSizeHint : 695 if (value!=0) /* 0 ==> default */ 696 BOUNDCHECK(ZSTD_c_srcSizeHint, value); 697 CCtxParams->srcSizeHint = value; 698 return CCtxParams->srcSizeHint; 699 700 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 701 } 702 } 703 704 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) 705 { 706 return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); 707 } 708 709 size_t ZSTD_CCtxParams_getParameter( 710 ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) 711 { 712 switch(param) 713 { 714 case ZSTD_c_format : 715 *value = CCtxParams->format; 716 break; 717 case ZSTD_c_compressionLevel : 718 *value = CCtxParams->compressionLevel; 719 break; 720 case ZSTD_c_windowLog : 721 *value = (int)CCtxParams->cParams.windowLog; 722 break; 723 case ZSTD_c_hashLog : 724 *value = (int)CCtxParams->cParams.hashLog; 725 break; 726 case ZSTD_c_chainLog : 727 *value = (int)CCtxParams->cParams.chainLog; 728 break; 729 case ZSTD_c_searchLog : 730 *value = CCtxParams->cParams.searchLog; 731 break; 732 case ZSTD_c_minMatch : 733 *value = CCtxParams->cParams.minMatch; 734 break; 735 case ZSTD_c_targetLength : 736 *value = CCtxParams->cParams.targetLength; 737 break; 738 case ZSTD_c_strategy : 739 *value = (unsigned)CCtxParams->cParams.strategy; 740 break; 741 case ZSTD_c_contentSizeFlag : 742 *value = CCtxParams->fParams.contentSizeFlag; 743 break; 744 case ZSTD_c_checksumFlag : 745 *value = CCtxParams->fParams.checksumFlag; 746 break; 747 case ZSTD_c_dictIDFlag : 748 *value = !CCtxParams->fParams.noDictIDFlag; 749 break; 750 case ZSTD_c_forceMaxWindow : 751 *value = CCtxParams->forceWindow; 752 break; 753 case ZSTD_c_forceAttachDict : 754 *value = CCtxParams->attachDictPref; 755 break; 756 case ZSTD_c_literalCompressionMode : 757 *value = CCtxParams->literalCompressionMode; 758 break; 759 case ZSTD_c_nbWorkers : 760 #ifndef ZSTD_MULTITHREAD 761 assert(CCtxParams->nbWorkers == 0); 762 #endif 763 *value = CCtxParams->nbWorkers; 764 break; 765 case ZSTD_c_jobSize : 766 #ifndef ZSTD_MULTITHREAD 767 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 768 #else 769 assert(CCtxParams->jobSize <= INT_MAX); 770 *value = (int)CCtxParams->jobSize; 771 break; 772 #endif 773 case ZSTD_c_overlapLog : 774 #ifndef ZSTD_MULTITHREAD 775 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 776 #else 777 *value = CCtxParams->overlapLog; 778 break; 779 #endif 780 case ZSTD_c_rsyncable : 781 #ifndef ZSTD_MULTITHREAD 782 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 783 #else 784 *value = CCtxParams->rsyncable; 785 break; 786 #endif 787 case ZSTD_c_enableLongDistanceMatching : 788 *value = CCtxParams->ldmParams.enableLdm; 789 break; 790 case ZSTD_c_ldmHashLog : 791 *value = CCtxParams->ldmParams.hashLog; 792 break; 793 case ZSTD_c_ldmMinMatch : 794 *value = CCtxParams->ldmParams.minMatchLength; 795 break; 796 case ZSTD_c_ldmBucketSizeLog : 797 *value = CCtxParams->ldmParams.bucketSizeLog; 798 break; 799 case ZSTD_c_ldmHashRateLog : 800 *value = CCtxParams->ldmParams.hashRateLog; 801 break; 802 case ZSTD_c_targetCBlockSize : 803 *value = (int)CCtxParams->targetCBlockSize; 804 break; 805 case ZSTD_c_srcSizeHint : 806 *value = (int)CCtxParams->srcSizeHint; 807 break; 808 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 809 } 810 return 0; 811 } 812 813 /** ZSTD_CCtx_setParametersUsingCCtxParams() : 814 * just applies `params` into `cctx` 815 * no action is performed, parameters are merely stored. 816 * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. 817 * This is possible even if a compression is ongoing. 818 * In which case, new parameters will be applied on the fly, starting with next compression job. 819 */ 820 size_t ZSTD_CCtx_setParametersUsingCCtxParams( 821 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) 822 { 823 DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); 824 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); 825 RETURN_ERROR_IF(cctx->cdict, stage_wrong); 826 827 cctx->requestedParams = *params; 828 return 0; 829 } 830 831 ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) 832 { 833 DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); 834 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); 835 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; 836 return 0; 837 } 838 839 /** 840 * Initializes the local dict using the requested parameters. 841 * NOTE: This does not use the pledged src size, because it may be used for more 842 * than one compression. 843 */ 844 static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) 845 { 846 ZSTD_localDict* const dl = &cctx->localDict; 847 ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( 848 &cctx->requestedParams, 0, dl->dictSize); 849 if (dl->dict == NULL) { 850 /* No local dictionary. */ 851 assert(dl->dictBuffer == NULL); 852 assert(dl->cdict == NULL); 853 assert(dl->dictSize == 0); 854 return 0; 855 } 856 if (dl->cdict != NULL) { 857 assert(cctx->cdict == dl->cdict); 858 /* Local dictionary already initialized. */ 859 return 0; 860 } 861 assert(dl->dictSize > 0); 862 assert(cctx->cdict == NULL); 863 assert(cctx->prefixDict.dict == NULL); 864 865 dl->cdict = ZSTD_createCDict_advanced( 866 dl->dict, 867 dl->dictSize, 868 ZSTD_dlm_byRef, 869 dl->dictContentType, 870 cParams, 871 cctx->customMem); 872 RETURN_ERROR_IF(!dl->cdict, memory_allocation); 873 cctx->cdict = dl->cdict; 874 return 0; 875 } 876 877 size_t ZSTD_CCtx_loadDictionary_advanced( 878 ZSTD_CCtx* cctx, const void* dict, size_t dictSize, 879 ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) 880 { 881 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); 882 RETURN_ERROR_IF(cctx->staticSize, memory_allocation, 883 "no malloc for static CCtx"); 884 DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); 885 ZSTD_clearAllDicts(cctx); /* in case one already exists */ 886 if (dict == NULL || dictSize == 0) /* no dictionary mode */ 887 return 0; 888 if (dictLoadMethod == ZSTD_dlm_byRef) { 889 cctx->localDict.dict = dict; 890 } else { 891 void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem); 892 RETURN_ERROR_IF(!dictBuffer, memory_allocation); 893 memcpy(dictBuffer, dict, dictSize); 894 cctx->localDict.dictBuffer = dictBuffer; 895 cctx->localDict.dict = dictBuffer; 896 } 897 cctx->localDict.dictSize = dictSize; 898 cctx->localDict.dictContentType = dictContentType; 899 return 0; 900 } 901 902 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( 903 ZSTD_CCtx* cctx, const void* dict, size_t dictSize) 904 { 905 return ZSTD_CCtx_loadDictionary_advanced( 906 cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); 907 } 908 909 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) 910 { 911 return ZSTD_CCtx_loadDictionary_advanced( 912 cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); 913 } 914 915 916 size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 917 { 918 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); 919 /* Free the existing local cdict (if any) to save memory. */ 920 ZSTD_clearAllDicts(cctx); 921 cctx->cdict = cdict; 922 return 0; 923 } 924 925 size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) 926 { 927 return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); 928 } 929 930 size_t ZSTD_CCtx_refPrefix_advanced( 931 ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) 932 { 933 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); 934 ZSTD_clearAllDicts(cctx); 935 cctx->prefixDict.dict = prefix; 936 cctx->prefixDict.dictSize = prefixSize; 937 cctx->prefixDict.dictContentType = dictContentType; 938 return 0; 939 } 940 941 /*! ZSTD_CCtx_reset() : 942 * Also dumps dictionary */ 943 size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) 944 { 945 if ( (reset == ZSTD_reset_session_only) 946 || (reset == ZSTD_reset_session_and_parameters) ) { 947 cctx->streamStage = zcss_init; 948 cctx->pledgedSrcSizePlusOne = 0; 949 } 950 if ( (reset == ZSTD_reset_parameters) 951 || (reset == ZSTD_reset_session_and_parameters) ) { 952 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); 953 ZSTD_clearAllDicts(cctx); 954 return ZSTD_CCtxParams_reset(&cctx->requestedParams); 955 } 956 return 0; 957 } 958 959 960 /** ZSTD_checkCParams() : 961 control CParam values remain within authorized range. 962 @return : 0, or an error code if one value is beyond authorized range */ 963 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) 964 { 965 BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); 966 BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); 967 BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); 968 BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); 969 BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); 970 BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); 971 BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); 972 return 0; 973 } 974 975 /** ZSTD_clampCParams() : 976 * make CParam values within valid range. 977 * @return : valid CParams */ 978 static ZSTD_compressionParameters 979 ZSTD_clampCParams(ZSTD_compressionParameters cParams) 980 { 981 # define CLAMP_TYPE(cParam, val, type) { \ 982 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ 983 if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \ 984 else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ 985 } 986 # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) 987 CLAMP(ZSTD_c_windowLog, cParams.windowLog); 988 CLAMP(ZSTD_c_chainLog, cParams.chainLog); 989 CLAMP(ZSTD_c_hashLog, cParams.hashLog); 990 CLAMP(ZSTD_c_searchLog, cParams.searchLog); 991 CLAMP(ZSTD_c_minMatch, cParams.minMatch); 992 CLAMP(ZSTD_c_targetLength,cParams.targetLength); 993 CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); 994 return cParams; 995 } 996 997 /** ZSTD_cycleLog() : 998 * condition for correct operation : hashLog > 1 */ 999 static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) 1000 { 1001 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); 1002 return hashLog - btScale; 1003 } 1004 1005 /** ZSTD_adjustCParams_internal() : 1006 * optimize `cPar` for a specified input (`srcSize` and `dictSize`). 1007 * mostly downsize to reduce memory consumption and initialization latency. 1008 * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. 1009 * note : for the time being, `srcSize==0` means "unknown" too, for compatibility with older convention. 1010 * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ 1011 static ZSTD_compressionParameters 1012 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, 1013 unsigned long long srcSize, 1014 size_t dictSize) 1015 { 1016 static const U64 minSrcSize = 513; /* (1<<9) + 1 */ 1017 static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); 1018 assert(ZSTD_checkCParams(cPar)==0); 1019 1020 if (dictSize && (srcSize+1<2) /* ZSTD_CONTENTSIZE_UNKNOWN and 0 mean "unknown" */ ) 1021 srcSize = minSrcSize; /* presumed small when there is a dictionary */ 1022 else if (srcSize == 0) 1023 srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */ 1024 1025 /* resize windowLog if input is small enough, to use less memory */ 1026 if ( (srcSize < maxWindowResize) 1027 && (dictSize < maxWindowResize) ) { 1028 U32 const tSize = (U32)(srcSize + dictSize); 1029 static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; 1030 U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : 1031 ZSTD_highbit32(tSize-1) + 1; 1032 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; 1033 } 1034 if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1; 1035 { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); 1036 if (cycleLog > cPar.windowLog) 1037 cPar.chainLog -= (cycleLog - cPar.windowLog); 1038 } 1039 1040 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) 1041 cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ 1042 1043 return cPar; 1044 } 1045 1046 ZSTD_compressionParameters 1047 ZSTD_adjustCParams(ZSTD_compressionParameters cPar, 1048 unsigned long long srcSize, 1049 size_t dictSize) 1050 { 1051 cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ 1052 return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); 1053 } 1054 1055 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( 1056 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) 1057 { 1058 ZSTD_compressionParameters cParams; 1059 if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { 1060 srcSizeHint = CCtxParams->srcSizeHint; 1061 } 1062 cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); 1063 if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; 1064 if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; 1065 if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; 1066 if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog; 1067 if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog; 1068 if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch; 1069 if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; 1070 if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; 1071 assert(!ZSTD_checkCParams(cParams)); 1072 return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize); 1073 } 1074 1075 static size_t 1076 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, 1077 const U32 forCCtx) 1078 { 1079 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); 1080 size_t const hSize = ((size_t)1) << cParams->hashLog; 1081 U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; 1082 size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; 1083 /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't 1084 * surrounded by redzones in ASAN. */ 1085 size_t const tableSpace = chainSize * sizeof(U32) 1086 + hSize * sizeof(U32) 1087 + h3Size * sizeof(U32); 1088 size_t const optPotentialSpace = 1089 ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) 1090 + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) 1091 + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) 1092 + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32)) 1093 + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)) 1094 + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); 1095 size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) 1096 ? optPotentialSpace 1097 : 0; 1098 DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", 1099 (U32)chainSize, (U32)hSize, (U32)h3Size); 1100 return tableSpace + optSpace; 1101 } 1102 1103 size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) 1104 { 1105 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); 1106 { ZSTD_compressionParameters const cParams = 1107 ZSTD_getCParamsFromCCtxParams(params, 0, 0); 1108 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); 1109 U32 const divider = (cParams.minMatch==3) ? 3 : 4; 1110 size_t const maxNbSeq = blockSize / divider; 1111 size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) 1112 + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) 1113 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); 1114 size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); 1115 size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); 1116 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); 1117 1118 size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); 1119 size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq)); 1120 1121 size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace + 1122 matchStateSize + ldmSpace + ldmSeqSpace; 1123 size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)); 1124 1125 DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)cctxSpace); 1126 DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); 1127 return cctxSpace + neededSpace; 1128 } 1129 } 1130 1131 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) 1132 { 1133 ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); 1134 return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); 1135 } 1136 1137 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) 1138 { 1139 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); 1140 return ZSTD_estimateCCtxSize_usingCParams(cParams); 1141 } 1142 1143 size_t ZSTD_estimateCCtxSize(int compressionLevel) 1144 { 1145 int level; 1146 size_t memBudget = 0; 1147 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { 1148 size_t const newMB = ZSTD_estimateCCtxSize_internal(level); 1149 if (newMB > memBudget) memBudget = newMB; 1150 } 1151 return memBudget; 1152 } 1153 1154 size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) 1155 { 1156 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); 1157 { ZSTD_compressionParameters const cParams = 1158 ZSTD_getCParamsFromCCtxParams(params, 0, 0); 1159 size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); 1160 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); 1161 size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; 1162 size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; 1163 size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize) 1164 + ZSTD_cwksp_alloc_size(outBuffSize); 1165 1166 return CCtxSize + streamingSize; 1167 } 1168 } 1169 1170 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) 1171 { 1172 ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); 1173 return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); 1174 } 1175 1176 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) 1177 { 1178 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); 1179 return ZSTD_estimateCStreamSize_usingCParams(cParams); 1180 } 1181 1182 size_t ZSTD_estimateCStreamSize(int compressionLevel) 1183 { 1184 int level; 1185 size_t memBudget = 0; 1186 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { 1187 size_t const newMB = ZSTD_estimateCStreamSize_internal(level); 1188 if (newMB > memBudget) memBudget = newMB; 1189 } 1190 return memBudget; 1191 } 1192 1193 /* ZSTD_getFrameProgression(): 1194 * tells how much data has been consumed (input) and produced (output) for current frame. 1195 * able to count progression inside worker threads (non-blocking mode). 1196 */ 1197 ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) 1198 { 1199 #ifdef ZSTD_MULTITHREAD 1200 if (cctx->appliedParams.nbWorkers > 0) { 1201 return ZSTDMT_getFrameProgression(cctx->mtctx); 1202 } 1203 #endif 1204 { ZSTD_frameProgression fp; 1205 size_t const buffered = (cctx->inBuff == NULL) ? 0 : 1206 cctx->inBuffPos - cctx->inToCompress; 1207 if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); 1208 assert(buffered <= ZSTD_BLOCKSIZE_MAX); 1209 fp.ingested = cctx->consumedSrcSize + buffered; 1210 fp.consumed = cctx->consumedSrcSize; 1211 fp.produced = cctx->producedCSize; 1212 fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ 1213 fp.currentJobID = 0; 1214 fp.nbActiveWorkers = 0; 1215 return fp; 1216 } } 1217 1218 /*! ZSTD_toFlushNow() 1219 * Only useful for multithreading scenarios currently (nbWorkers >= 1). 1220 */ 1221 size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) 1222 { 1223 #ifdef ZSTD_MULTITHREAD 1224 if (cctx->appliedParams.nbWorkers > 0) { 1225 return ZSTDMT_toFlushNow(cctx->mtctx); 1226 } 1227 #endif 1228 (void)cctx; 1229 return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ 1230 } 1231 1232 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, 1233 ZSTD_compressionParameters cParams2) 1234 { 1235 (void)cParams1; 1236 (void)cParams2; 1237 assert(cParams1.windowLog == cParams2.windowLog); 1238 assert(cParams1.chainLog == cParams2.chainLog); 1239 assert(cParams1.hashLog == cParams2.hashLog); 1240 assert(cParams1.searchLog == cParams2.searchLog); 1241 assert(cParams1.minMatch == cParams2.minMatch); 1242 assert(cParams1.targetLength == cParams2.targetLength); 1243 assert(cParams1.strategy == cParams2.strategy); 1244 } 1245 1246 static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) 1247 { 1248 int i; 1249 for (i = 0; i < ZSTD_REP_NUM; ++i) 1250 bs->rep[i] = repStartValue[i]; 1251 bs->entropy.huf.repeatMode = HUF_repeat_none; 1252 bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; 1253 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; 1254 bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; 1255 } 1256 1257 /*! ZSTD_invalidateMatchState() 1258 * Invalidate all the matches in the match finder tables. 1259 * Requires nextSrc and base to be set (can be NULL). 1260 */ 1261 static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) 1262 { 1263 ZSTD_window_clear(&ms->window); 1264 1265 ms->nextToUpdate = ms->window.dictLimit; 1266 ms->loadedDictEnd = 0; 1267 ms->opt.litLengthSum = 0; /* force reset of btopt stats */ 1268 ms->dictMatchState = NULL; 1269 } 1270 1271 /** 1272 * Indicates whether this compression proceeds directly from user-provided 1273 * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or 1274 * whether the context needs to buffer the input/output (ZSTDb_buffered). 1275 */ 1276 typedef enum { 1277 ZSTDb_not_buffered, 1278 ZSTDb_buffered 1279 } ZSTD_buffered_policy_e; 1280 1281 /** 1282 * Controls, for this matchState reset, whether the tables need to be cleared / 1283 * prepared for the coming compression (ZSTDcrp_makeClean), or whether the 1284 * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a 1285 * subsequent operation will overwrite the table space anyways (e.g., copying 1286 * the matchState contents in from a CDict). 1287 */ 1288 typedef enum { 1289 ZSTDcrp_makeClean, 1290 ZSTDcrp_leaveDirty 1291 } ZSTD_compResetPolicy_e; 1292 1293 /** 1294 * Controls, for this matchState reset, whether indexing can continue where it 1295 * left off (ZSTDirp_continue), or whether it needs to be restarted from zero 1296 * (ZSTDirp_reset). 1297 */ 1298 typedef enum { 1299 ZSTDirp_continue, 1300 ZSTDirp_reset 1301 } ZSTD_indexResetPolicy_e; 1302 1303 typedef enum { 1304 ZSTD_resetTarget_CDict, 1305 ZSTD_resetTarget_CCtx 1306 } ZSTD_resetTarget_e; 1307 1308 static size_t 1309 ZSTD_reset_matchState(ZSTD_matchState_t* ms, 1310 ZSTD_cwksp* ws, 1311 const ZSTD_compressionParameters* cParams, 1312 const ZSTD_compResetPolicy_e crp, 1313 const ZSTD_indexResetPolicy_e forceResetIndex, 1314 const ZSTD_resetTarget_e forWho) 1315 { 1316 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); 1317 size_t const hSize = ((size_t)1) << cParams->hashLog; 1318 U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; 1319 size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; 1320 1321 DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); 1322 if (forceResetIndex == ZSTDirp_reset) { 1323 memset(&ms->window, 0, sizeof(ms->window)); 1324 ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */ 1325 ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ 1326 ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */ 1327 ZSTD_cwksp_mark_tables_dirty(ws); 1328 } 1329 1330 ms->hashLog3 = hashLog3; 1331 1332 ZSTD_invalidateMatchState(ms); 1333 1334 assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ 1335 1336 ZSTD_cwksp_clear_tables(ws); 1337 1338 DEBUGLOG(5, "reserving table space"); 1339 /* table Space */ 1340 ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); 1341 ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); 1342 ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); 1343 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, 1344 "failed a workspace allocation in ZSTD_reset_matchState"); 1345 1346 DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); 1347 if (crp!=ZSTDcrp_leaveDirty) { 1348 /* reset tables only */ 1349 ZSTD_cwksp_clean_tables(ws); 1350 } 1351 1352 /* opt parser space */ 1353 if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { 1354 DEBUGLOG(4, "reserving optimal parser space"); 1355 ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned)); 1356 ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); 1357 ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); 1358 ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); 1359 ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); 1360 ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); 1361 } 1362 1363 ms->cParams = *cParams; 1364 1365 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, 1366 "failed a workspace allocation in ZSTD_reset_matchState"); 1367 1368 return 0; 1369 } 1370 1371 /* ZSTD_indexTooCloseToMax() : 1372 * minor optimization : prefer memset() rather than reduceIndex() 1373 * which is measurably slow in some circumstances (reported for Visual Studio). 1374 * Works when re-using a context for a lot of smallish inputs : 1375 * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, 1376 * memset() will be triggered before reduceIndex(). 1377 */ 1378 #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) 1379 static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) 1380 { 1381 return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); 1382 } 1383 1384 /*! ZSTD_resetCCtx_internal() : 1385 note : `params` are assumed fully validated at this stage */ 1386 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, 1387 ZSTD_CCtx_params params, 1388 U64 const pledgedSrcSize, 1389 ZSTD_compResetPolicy_e const crp, 1390 ZSTD_buffered_policy_e const zbuff) 1391 { 1392 ZSTD_cwksp* const ws = &zc->workspace; 1393 DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", 1394 (U32)pledgedSrcSize, params.cParams.windowLog); 1395 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 1396 1397 zc->isFirstBlock = 1; 1398 1399 if (params.ldmParams.enableLdm) { 1400 /* Adjust long distance matching parameters */ 1401 ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); 1402 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); 1403 assert(params.ldmParams.hashRateLog < 32); 1404 zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); 1405 } 1406 1407 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); 1408 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 1409 U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; 1410 size_t const maxNbSeq = blockSize / divider; 1411 size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) 1412 + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) 1413 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); 1414 size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; 1415 size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; 1416 size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); 1417 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); 1418 1419 ZSTD_indexResetPolicy_e needsIndexReset = ZSTDirp_continue; 1420 1421 if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { 1422 needsIndexReset = ZSTDirp_reset; 1423 } 1424 1425 ZSTD_cwksp_bump_oversized_duration(ws, 0); 1426 1427 /* Check if workspace is large enough, alloc a new one if needed */ 1428 { size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; 1429 size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); 1430 size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); 1431 size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize); 1432 size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); 1433 size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)); 1434 1435 size_t const neededSpace = 1436 cctxSpace + 1437 entropySpace + 1438 blockStateSpace + 1439 ldmSpace + 1440 ldmSeqSpace + 1441 matchStateSize + 1442 tokenSpace + 1443 bufferSpace; 1444 1445 int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; 1446 int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); 1447 1448 DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers", 1449 neededSpace>>10, matchStateSize>>10, bufferSpace>>10); 1450 DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); 1451 1452 if (workspaceTooSmall || workspaceWasteful) { 1453 DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", 1454 ZSTD_cwksp_sizeof(ws) >> 10, 1455 neededSpace >> 10); 1456 1457 RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); 1458 1459 needsIndexReset = ZSTDirp_reset; 1460 1461 ZSTD_cwksp_free(ws, zc->customMem); 1462 FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem)); 1463 1464 DEBUGLOG(5, "reserving object space"); 1465 /* Statically sized space. 1466 * entropyWorkspace never moves, 1467 * though prev/next block swap places */ 1468 assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); 1469 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); 1470 RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); 1471 zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); 1472 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); 1473 zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE); 1474 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); 1475 } } 1476 1477 ZSTD_cwksp_clear(ws); 1478 1479 /* init params */ 1480 zc->appliedParams = params; 1481 zc->blockState.matchState.cParams = params.cParams; 1482 zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; 1483 zc->consumedSrcSize = 0; 1484 zc->producedCSize = 0; 1485 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) 1486 zc->appliedParams.fParams.contentSizeFlag = 0; 1487 DEBUGLOG(4, "pledged content size : %u ; flag : %u", 1488 (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); 1489 zc->blockSize = blockSize; 1490 1491 XXH64_reset(&zc->xxhState, 0); 1492 zc->stage = ZSTDcs_init; 1493 zc->dictID = 0; 1494 1495 ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); 1496 1497 /* ZSTD_wildcopy() is used to copy into the literals buffer, 1498 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. 1499 */ 1500 zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); 1501 zc->seqStore.maxNbLit = blockSize; 1502 1503 /* buffers */ 1504 zc->inBuffSize = buffInSize; 1505 zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); 1506 zc->outBuffSize = buffOutSize; 1507 zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); 1508 1509 /* ldm bucketOffsets table */ 1510 if (params.ldmParams.enableLdm) { 1511 /* TODO: avoid memset? */ 1512 size_t const ldmBucketSize = 1513 ((size_t)1) << (params.ldmParams.hashLog - 1514 params.ldmParams.bucketSizeLog); 1515 zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize); 1516 memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); 1517 } 1518 1519 /* sequences storage */ 1520 ZSTD_referenceExternalSequences(zc, NULL, 0); 1521 zc->seqStore.maxNbSeq = maxNbSeq; 1522 zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 1523 zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 1524 zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 1525 zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); 1526 1527 FORWARD_IF_ERROR(ZSTD_reset_matchState( 1528 &zc->blockState.matchState, 1529 ws, 1530 ¶ms.cParams, 1531 crp, 1532 needsIndexReset, 1533 ZSTD_resetTarget_CCtx)); 1534 1535 /* ldm hash table */ 1536 if (params.ldmParams.enableLdm) { 1537 /* TODO: avoid memset? */ 1538 size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; 1539 zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); 1540 memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); 1541 zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); 1542 zc->maxNbLdmSequences = maxNbLdmSeq; 1543 1544 memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); 1545 ZSTD_window_clear(&zc->ldmState.window); 1546 } 1547 1548 DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); 1549 1550 return 0; 1551 } 1552 } 1553 1554 /* ZSTD_invalidateRepCodes() : 1555 * ensures next compression will not use repcodes from previous block. 1556 * Note : only works with regular variant; 1557 * do not use with extDict variant ! */ 1558 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { 1559 int i; 1560 for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0; 1561 assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); 1562 } 1563 1564 /* These are the approximate sizes for each strategy past which copying the 1565 * dictionary tables into the working context is faster than using them 1566 * in-place. 1567 */ 1568 static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { 1569 8 KB, /* unused */ 1570 8 KB, /* ZSTD_fast */ 1571 16 KB, /* ZSTD_dfast */ 1572 32 KB, /* ZSTD_greedy */ 1573 32 KB, /* ZSTD_lazy */ 1574 32 KB, /* ZSTD_lazy2 */ 1575 32 KB, /* ZSTD_btlazy2 */ 1576 32 KB, /* ZSTD_btopt */ 1577 8 KB, /* ZSTD_btultra */ 1578 8 KB /* ZSTD_btultra2 */ 1579 }; 1580 1581 static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, 1582 const ZSTD_CCtx_params* params, 1583 U64 pledgedSrcSize) 1584 { 1585 size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; 1586 return ( pledgedSrcSize <= cutoff 1587 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN 1588 || params->attachDictPref == ZSTD_dictForceAttach ) 1589 && params->attachDictPref != ZSTD_dictForceCopy 1590 && !params->forceWindow; /* dictMatchState isn't correctly 1591 * handled in _enforceMaxDist */ 1592 } 1593 1594 static size_t 1595 ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, 1596 const ZSTD_CDict* cdict, 1597 ZSTD_CCtx_params params, 1598 U64 pledgedSrcSize, 1599 ZSTD_buffered_policy_e zbuff) 1600 { 1601 { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams; 1602 unsigned const windowLog = params.cParams.windowLog; 1603 assert(windowLog != 0); 1604 /* Resize working context table params for input only, since the dict 1605 * has its own tables. */ 1606 params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); 1607 params.cParams.windowLog = windowLog; 1608 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, 1609 ZSTDcrp_makeClean, zbuff)); 1610 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); 1611 } 1612 1613 { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc 1614 - cdict->matchState.window.base); 1615 const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; 1616 if (cdictLen == 0) { 1617 /* don't even attach dictionaries with no contents */ 1618 DEBUGLOG(4, "skipping attaching empty dictionary"); 1619 } else { 1620 DEBUGLOG(4, "attaching dictionary into context"); 1621 cctx->blockState.matchState.dictMatchState = &cdict->matchState; 1622 1623 /* prep working match state so dict matches never have negative indices 1624 * when they are translated to the working context's index space. */ 1625 if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { 1626 cctx->blockState.matchState.window.nextSrc = 1627 cctx->blockState.matchState.window.base + cdictEnd; 1628 ZSTD_window_clear(&cctx->blockState.matchState.window); 1629 } 1630 /* loadedDictEnd is expressed within the referential of the active context */ 1631 cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; 1632 } } 1633 1634 cctx->dictID = cdict->dictID; 1635 1636 /* copy block state */ 1637 memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); 1638 1639 return 0; 1640 } 1641 1642 static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, 1643 const ZSTD_CDict* cdict, 1644 ZSTD_CCtx_params params, 1645 U64 pledgedSrcSize, 1646 ZSTD_buffered_policy_e zbuff) 1647 { 1648 const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; 1649 1650 DEBUGLOG(4, "copying dictionary into context"); 1651 1652 { unsigned const windowLog = params.cParams.windowLog; 1653 assert(windowLog != 0); 1654 /* Copy only compression parameters related to tables. */ 1655 params.cParams = *cdict_cParams; 1656 params.cParams.windowLog = windowLog; 1657 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, 1658 ZSTDcrp_leaveDirty, zbuff)); 1659 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); 1660 assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); 1661 assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); 1662 } 1663 1664 ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); 1665 1666 /* copy tables */ 1667 { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); 1668 size_t const hSize = (size_t)1 << cdict_cParams->hashLog; 1669 1670 memcpy(cctx->blockState.matchState.hashTable, 1671 cdict->matchState.hashTable, 1672 hSize * sizeof(U32)); 1673 memcpy(cctx->blockState.matchState.chainTable, 1674 cdict->matchState.chainTable, 1675 chainSize * sizeof(U32)); 1676 } 1677 1678 /* Zero the hashTable3, since the cdict never fills it */ 1679 { int const h3log = cctx->blockState.matchState.hashLog3; 1680 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; 1681 assert(cdict->matchState.hashLog3 == 0); 1682 memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); 1683 } 1684 1685 ZSTD_cwksp_mark_tables_clean(&cctx->workspace); 1686 1687 /* copy dictionary offsets */ 1688 { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; 1689 ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; 1690 dstMatchState->window = srcMatchState->window; 1691 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 1692 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; 1693 } 1694 1695 cctx->dictID = cdict->dictID; 1696 1697 /* copy block state */ 1698 memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); 1699 1700 return 0; 1701 } 1702 1703 /* We have a choice between copying the dictionary context into the working 1704 * context, or referencing the dictionary context from the working context 1705 * in-place. We decide here which strategy to use. */ 1706 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, 1707 const ZSTD_CDict* cdict, 1708 const ZSTD_CCtx_params* params, 1709 U64 pledgedSrcSize, 1710 ZSTD_buffered_policy_e zbuff) 1711 { 1712 1713 DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", 1714 (unsigned)pledgedSrcSize); 1715 1716 if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { 1717 return ZSTD_resetCCtx_byAttachingCDict( 1718 cctx, cdict, *params, pledgedSrcSize, zbuff); 1719 } else { 1720 return ZSTD_resetCCtx_byCopyingCDict( 1721 cctx, cdict, *params, pledgedSrcSize, zbuff); 1722 } 1723 } 1724 1725 /*! ZSTD_copyCCtx_internal() : 1726 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. 1727 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). 1728 * The "context", in this case, refers to the hash and chain tables, 1729 * entropy tables, and dictionary references. 1730 * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. 1731 * @return : 0, or an error code */ 1732 static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, 1733 const ZSTD_CCtx* srcCCtx, 1734 ZSTD_frameParameters fParams, 1735 U64 pledgedSrcSize, 1736 ZSTD_buffered_policy_e zbuff) 1737 { 1738 DEBUGLOG(5, "ZSTD_copyCCtx_internal"); 1739 RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong); 1740 1741 memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); 1742 { ZSTD_CCtx_params params = dstCCtx->requestedParams; 1743 /* Copy only compression parameters related to tables. */ 1744 params.cParams = srcCCtx->appliedParams.cParams; 1745 params.fParams = fParams; 1746 ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, 1747 ZSTDcrp_leaveDirty, zbuff); 1748 assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); 1749 assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); 1750 assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); 1751 assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); 1752 assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); 1753 } 1754 1755 ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); 1756 1757 /* copy tables */ 1758 { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); 1759 size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; 1760 int const h3log = srcCCtx->blockState.matchState.hashLog3; 1761 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; 1762 1763 memcpy(dstCCtx->blockState.matchState.hashTable, 1764 srcCCtx->blockState.matchState.hashTable, 1765 hSize * sizeof(U32)); 1766 memcpy(dstCCtx->blockState.matchState.chainTable, 1767 srcCCtx->blockState.matchState.chainTable, 1768 chainSize * sizeof(U32)); 1769 memcpy(dstCCtx->blockState.matchState.hashTable3, 1770 srcCCtx->blockState.matchState.hashTable3, 1771 h3Size * sizeof(U32)); 1772 } 1773 1774 ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); 1775 1776 /* copy dictionary offsets */ 1777 { 1778 const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; 1779 ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; 1780 dstMatchState->window = srcMatchState->window; 1781 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 1782 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; 1783 } 1784 dstCCtx->dictID = srcCCtx->dictID; 1785 1786 /* copy block state */ 1787 memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); 1788 1789 return 0; 1790 } 1791 1792 /*! ZSTD_copyCCtx() : 1793 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. 1794 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). 1795 * pledgedSrcSize==0 means "unknown". 1796 * @return : 0, or an error code */ 1797 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) 1798 { 1799 ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 1800 ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0); 1801 ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); 1802 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; 1803 fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); 1804 1805 return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, 1806 fParams, pledgedSrcSize, 1807 zbuff); 1808 } 1809 1810 1811 #define ZSTD_ROWSIZE 16 1812 /*! ZSTD_reduceTable() : 1813 * reduce table indexes by `reducerValue`, or squash to zero. 1814 * PreserveMark preserves "unsorted mark" for btlazy2 strategy. 1815 * It must be set to a clear 0/1 value, to remove branch during inlining. 1816 * Presume table size is a multiple of ZSTD_ROWSIZE 1817 * to help auto-vectorization */ 1818 FORCE_INLINE_TEMPLATE void 1819 ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) 1820 { 1821 int const nbRows = (int)size / ZSTD_ROWSIZE; 1822 int cellNb = 0; 1823 int rowNb; 1824 assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ 1825 assert(size < (1U<<31)); /* can be casted to int */ 1826 1827 #if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) 1828 /* To validate that the table re-use logic is sound, and that we don't 1829 * access table space that we haven't cleaned, we re-"poison" the table 1830 * space every time we mark it dirty. 1831 * 1832 * This function however is intended to operate on those dirty tables and 1833 * re-clean them. So when this function is used correctly, we can unpoison 1834 * the memory it operated on. This introduces a blind spot though, since 1835 * if we now try to operate on __actually__ poisoned memory, we will not 1836 * detect that. */ 1837 __msan_unpoison(table, size * sizeof(U32)); 1838 #endif 1839 1840 for (rowNb=0 ; rowNb < nbRows ; rowNb++) { 1841 int column; 1842 for (column=0; column<ZSTD_ROWSIZE; column++) { 1843 if (preserveMark) { 1844 U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0; 1845 table[cellNb] += adder; 1846 } 1847 if (table[cellNb] < reducerValue) table[cellNb] = 0; 1848 else table[cellNb] -= reducerValue; 1849 cellNb++; 1850 } } 1851 } 1852 1853 static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue) 1854 { 1855 ZSTD_reduceTable_internal(table, size, reducerValue, 0); 1856 } 1857 1858 static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue) 1859 { 1860 ZSTD_reduceTable_internal(table, size, reducerValue, 1); 1861 } 1862 1863 /*! ZSTD_reduceIndex() : 1864 * rescale all indexes to avoid future overflow (indexes are U32) */ 1865 static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) 1866 { 1867 { U32 const hSize = (U32)1 << params->cParams.hashLog; 1868 ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); 1869 } 1870 1871 if (params->cParams.strategy != ZSTD_fast) { 1872 U32 const chainSize = (U32)1 << params->cParams.chainLog; 1873 if (params->cParams.strategy == ZSTD_btlazy2) 1874 ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); 1875 else 1876 ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); 1877 } 1878 1879 if (ms->hashLog3) { 1880 U32 const h3Size = (U32)1 << ms->hashLog3; 1881 ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); 1882 } 1883 } 1884 1885 1886 /*-******************************************************* 1887 * Block entropic compression 1888 *********************************************************/ 1889 1890 /* See doc/zstd_compression_format.md for detailed format description */ 1891 1892 static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) 1893 { 1894 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); 1895 RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, 1896 dstSize_tooSmall); 1897 MEM_writeLE24(dst, cBlockHeader24); 1898 memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); 1899 return ZSTD_blockHeaderSize + srcSize; 1900 } 1901 1902 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) 1903 { 1904 const seqDef* const sequences = seqStorePtr->sequencesStart; 1905 BYTE* const llCodeTable = seqStorePtr->llCode; 1906 BYTE* const ofCodeTable = seqStorePtr->ofCode; 1907 BYTE* const mlCodeTable = seqStorePtr->mlCode; 1908 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 1909 U32 u; 1910 assert(nbSeq <= seqStorePtr->maxNbSeq); 1911 for (u=0; u<nbSeq; u++) { 1912 U32 const llv = sequences[u].litLength; 1913 U32 const mlv = sequences[u].matchLength; 1914 llCodeTable[u] = (BYTE)ZSTD_LLcode(llv); 1915 ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset); 1916 mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv); 1917 } 1918 if (seqStorePtr->longLengthID==1) 1919 llCodeTable[seqStorePtr->longLengthPos] = MaxLL; 1920 if (seqStorePtr->longLengthID==2) 1921 mlCodeTable[seqStorePtr->longLengthPos] = MaxML; 1922 } 1923 1924 static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) 1925 { 1926 switch (cctxParams->literalCompressionMode) { 1927 case ZSTD_lcm_huffman: 1928 return 0; 1929 case ZSTD_lcm_uncompressed: 1930 return 1; 1931 default: 1932 assert(0 /* impossible: pre-validated */); 1933 /* fall-through */ 1934 case ZSTD_lcm_auto: 1935 return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); 1936 } 1937 } 1938 1939 /* ZSTD_compressSequences_internal(): 1940 * actually compresses both literals and sequences */ 1941 MEM_STATIC size_t 1942 ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, 1943 const ZSTD_entropyCTables_t* prevEntropy, 1944 ZSTD_entropyCTables_t* nextEntropy, 1945 const ZSTD_CCtx_params* cctxParams, 1946 void* dst, size_t dstCapacity, 1947 void* entropyWorkspace, size_t entropyWkspSize, 1948 const int bmi2) 1949 { 1950 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; 1951 ZSTD_strategy const strategy = cctxParams->cParams.strategy; 1952 unsigned count[MaxSeq+1]; 1953 FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; 1954 FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; 1955 FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; 1956 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ 1957 const seqDef* const sequences = seqStorePtr->sequencesStart; 1958 const BYTE* const ofCodeTable = seqStorePtr->ofCode; 1959 const BYTE* const llCodeTable = seqStorePtr->llCode; 1960 const BYTE* const mlCodeTable = seqStorePtr->mlCode; 1961 BYTE* const ostart = (BYTE*)dst; 1962 BYTE* const oend = ostart + dstCapacity; 1963 BYTE* op = ostart; 1964 size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 1965 BYTE* seqHead; 1966 BYTE* lastNCount = NULL; 1967 1968 DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq); 1969 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); 1970 1971 /* Compress literals */ 1972 { const BYTE* const literals = seqStorePtr->litStart; 1973 size_t const litSize = (size_t)(seqStorePtr->lit - literals); 1974 size_t const cSize = ZSTD_compressLiterals( 1975 &prevEntropy->huf, &nextEntropy->huf, 1976 cctxParams->cParams.strategy, 1977 ZSTD_disableLiteralsCompression(cctxParams), 1978 op, dstCapacity, 1979 literals, litSize, 1980 entropyWorkspace, entropyWkspSize, 1981 bmi2); 1982 FORWARD_IF_ERROR(cSize); 1983 assert(cSize <= dstCapacity); 1984 op += cSize; 1985 } 1986 1987 /* Sequences Header */ 1988 RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, 1989 dstSize_tooSmall); 1990 if (nbSeq < 128) { 1991 *op++ = (BYTE)nbSeq; 1992 } else if (nbSeq < LONGNBSEQ) { 1993 op[0] = (BYTE)((nbSeq>>8) + 0x80); 1994 op[1] = (BYTE)nbSeq; 1995 op+=2; 1996 } else { 1997 op[0]=0xFF; 1998 MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); 1999 op+=3; 2000 } 2001 assert(op <= oend); 2002 if (nbSeq==0) { 2003 /* Copy the old tables over as if we repeated them */ 2004 memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); 2005 return (size_t)(op - ostart); 2006 } 2007 2008 /* seqHead : flags for FSE encoding type */ 2009 seqHead = op++; 2010 assert(op <= oend); 2011 2012 /* convert length/distances into codes */ 2013 ZSTD_seqToCodes(seqStorePtr); 2014 /* build CTable for Literal Lengths */ 2015 { unsigned max = MaxLL; 2016 size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ 2017 DEBUGLOG(5, "Building LL table"); 2018 nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; 2019 LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, 2020 count, max, mostFrequent, nbSeq, 2021 LLFSELog, prevEntropy->fse.litlengthCTable, 2022 LL_defaultNorm, LL_defaultNormLog, 2023 ZSTD_defaultAllowed, strategy); 2024 assert(set_basic < set_compressed && set_rle < set_compressed); 2025 assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2026 { size_t const countSize = ZSTD_buildCTable( 2027 op, (size_t)(oend - op), 2028 CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, 2029 count, max, llCodeTable, nbSeq, 2030 LL_defaultNorm, LL_defaultNormLog, MaxLL, 2031 prevEntropy->fse.litlengthCTable, 2032 sizeof(prevEntropy->fse.litlengthCTable), 2033 entropyWorkspace, entropyWkspSize); 2034 FORWARD_IF_ERROR(countSize); 2035 if (LLtype == set_compressed) 2036 lastNCount = op; 2037 op += countSize; 2038 assert(op <= oend); 2039 } } 2040 /* build CTable for Offsets */ 2041 { unsigned max = MaxOff; 2042 size_t const mostFrequent = HIST_countFast_wksp( 2043 count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ 2044 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ 2045 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; 2046 DEBUGLOG(5, "Building OF table"); 2047 nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; 2048 Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, 2049 count, max, mostFrequent, nbSeq, 2050 OffFSELog, prevEntropy->fse.offcodeCTable, 2051 OF_defaultNorm, OF_defaultNormLog, 2052 defaultPolicy, strategy); 2053 assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2054 { size_t const countSize = ZSTD_buildCTable( 2055 op, (size_t)(oend - op), 2056 CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, 2057 count, max, ofCodeTable, nbSeq, 2058 OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, 2059 prevEntropy->fse.offcodeCTable, 2060 sizeof(prevEntropy->fse.offcodeCTable), 2061 entropyWorkspace, entropyWkspSize); 2062 FORWARD_IF_ERROR(countSize); 2063 if (Offtype == set_compressed) 2064 lastNCount = op; 2065 op += countSize; 2066 assert(op <= oend); 2067 } } 2068 /* build CTable for MatchLengths */ 2069 { unsigned max = MaxML; 2070 size_t const mostFrequent = HIST_countFast_wksp( 2071 count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ 2072 DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); 2073 nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; 2074 MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, 2075 count, max, mostFrequent, nbSeq, 2076 MLFSELog, prevEntropy->fse.matchlengthCTable, 2077 ML_defaultNorm, ML_defaultNormLog, 2078 ZSTD_defaultAllowed, strategy); 2079 assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2080 { size_t const countSize = ZSTD_buildCTable( 2081 op, (size_t)(oend - op), 2082 CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, 2083 count, max, mlCodeTable, nbSeq, 2084 ML_defaultNorm, ML_defaultNormLog, MaxML, 2085 prevEntropy->fse.matchlengthCTable, 2086 sizeof(prevEntropy->fse.matchlengthCTable), 2087 entropyWorkspace, entropyWkspSize); 2088 FORWARD_IF_ERROR(countSize); 2089 if (MLtype == set_compressed) 2090 lastNCount = op; 2091 op += countSize; 2092 assert(op <= oend); 2093 } } 2094 2095 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); 2096 2097 { size_t const bitstreamSize = ZSTD_encodeSequences( 2098 op, (size_t)(oend - op), 2099 CTable_MatchLength, mlCodeTable, 2100 CTable_OffsetBits, ofCodeTable, 2101 CTable_LitLength, llCodeTable, 2102 sequences, nbSeq, 2103 longOffsets, bmi2); 2104 FORWARD_IF_ERROR(bitstreamSize); 2105 op += bitstreamSize; 2106 assert(op <= oend); 2107 /* zstd versions <= 1.3.4 mistakenly report corruption when 2108 * FSE_readNCount() receives a buffer < 4 bytes. 2109 * Fixed by https://github.com/facebook/zstd/pull/1146. 2110 * This can happen when the last set_compressed table present is 2 2111 * bytes and the bitstream is only one byte. 2112 * In this exceedingly rare case, we will simply emit an uncompressed 2113 * block, since it isn't worth optimizing. 2114 */ 2115 if (lastNCount && (op - lastNCount) < 4) { 2116 /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ 2117 assert(op - lastNCount == 3); 2118 DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " 2119 "emitting an uncompressed block."); 2120 return 0; 2121 } 2122 } 2123 2124 DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); 2125 return (size_t)(op - ostart); 2126 } 2127 2128 MEM_STATIC size_t 2129 ZSTD_compressSequences(seqStore_t* seqStorePtr, 2130 const ZSTD_entropyCTables_t* prevEntropy, 2131 ZSTD_entropyCTables_t* nextEntropy, 2132 const ZSTD_CCtx_params* cctxParams, 2133 void* dst, size_t dstCapacity, 2134 size_t srcSize, 2135 void* entropyWorkspace, size_t entropyWkspSize, 2136 int bmi2) 2137 { 2138 size_t const cSize = ZSTD_compressSequences_internal( 2139 seqStorePtr, prevEntropy, nextEntropy, cctxParams, 2140 dst, dstCapacity, 2141 entropyWorkspace, entropyWkspSize, bmi2); 2142 if (cSize == 0) return 0; 2143 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. 2144 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. 2145 */ 2146 if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) 2147 return 0; /* block not compressed */ 2148 FORWARD_IF_ERROR(cSize); 2149 2150 /* Check compressibility */ 2151 { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); 2152 if (cSize >= maxCSize) return 0; /* block not compressed */ 2153 } 2154 2155 return cSize; 2156 } 2157 2158 /* ZSTD_selectBlockCompressor() : 2159 * Not static, but internal use only (used by long distance matcher) 2160 * assumption : strat is a valid strategy */ 2161 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) 2162 { 2163 static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = { 2164 { ZSTD_compressBlock_fast /* default for 0 */, 2165 ZSTD_compressBlock_fast, 2166 ZSTD_compressBlock_doubleFast, 2167 ZSTD_compressBlock_greedy, 2168 ZSTD_compressBlock_lazy, 2169 ZSTD_compressBlock_lazy2, 2170 ZSTD_compressBlock_btlazy2, 2171 ZSTD_compressBlock_btopt, 2172 ZSTD_compressBlock_btultra, 2173 ZSTD_compressBlock_btultra2 }, 2174 { ZSTD_compressBlock_fast_extDict /* default for 0 */, 2175 ZSTD_compressBlock_fast_extDict, 2176 ZSTD_compressBlock_doubleFast_extDict, 2177 ZSTD_compressBlock_greedy_extDict, 2178 ZSTD_compressBlock_lazy_extDict, 2179 ZSTD_compressBlock_lazy2_extDict, 2180 ZSTD_compressBlock_btlazy2_extDict, 2181 ZSTD_compressBlock_btopt_extDict, 2182 ZSTD_compressBlock_btultra_extDict, 2183 ZSTD_compressBlock_btultra_extDict }, 2184 { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, 2185 ZSTD_compressBlock_fast_dictMatchState, 2186 ZSTD_compressBlock_doubleFast_dictMatchState, 2187 ZSTD_compressBlock_greedy_dictMatchState, 2188 ZSTD_compressBlock_lazy_dictMatchState, 2189 ZSTD_compressBlock_lazy2_dictMatchState, 2190 ZSTD_compressBlock_btlazy2_dictMatchState, 2191 ZSTD_compressBlock_btopt_dictMatchState, 2192 ZSTD_compressBlock_btultra_dictMatchState, 2193 ZSTD_compressBlock_btultra_dictMatchState } 2194 }; 2195 ZSTD_blockCompressor selectedCompressor; 2196 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); 2197 2198 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); 2199 selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; 2200 assert(selectedCompressor != NULL); 2201 return selectedCompressor; 2202 } 2203 2204 static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, 2205 const BYTE* anchor, size_t lastLLSize) 2206 { 2207 memcpy(seqStorePtr->lit, anchor, lastLLSize); 2208 seqStorePtr->lit += lastLLSize; 2209 } 2210 2211 void ZSTD_resetSeqStore(seqStore_t* ssPtr) 2212 { 2213 ssPtr->lit = ssPtr->litStart; 2214 ssPtr->sequences = ssPtr->sequencesStart; 2215 ssPtr->longLengthID = 0; 2216 } 2217 2218 typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; 2219 2220 static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) 2221 { 2222 ZSTD_matchState_t* const ms = &zc->blockState.matchState; 2223 DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); 2224 assert(srcSize <= ZSTD_BLOCKSIZE_MAX); 2225 /* Assert that we have correctly flushed the ctx params into the ms's copy */ 2226 ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); 2227 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { 2228 ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); 2229 return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ 2230 } 2231 ZSTD_resetSeqStore(&(zc->seqStore)); 2232 /* required for optimal parser to read stats from dictionary */ 2233 ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; 2234 /* tell the optimal parser how we expect to compress literals */ 2235 ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; 2236 /* a gap between an attached dict and the current window is not safe, 2237 * they must remain adjacent, 2238 * and when that stops being the case, the dict must be unset */ 2239 assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); 2240 2241 /* limited update after a very long match */ 2242 { const BYTE* const base = ms->window.base; 2243 const BYTE* const istart = (const BYTE*)src; 2244 const U32 current = (U32)(istart-base); 2245 if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ 2246 if (current > ms->nextToUpdate + 384) 2247 ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); 2248 } 2249 2250 /* select and store sequences */ 2251 { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); 2252 size_t lastLLSize; 2253 { int i; 2254 for (i = 0; i < ZSTD_REP_NUM; ++i) 2255 zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; 2256 } 2257 if (zc->externSeqStore.pos < zc->externSeqStore.size) { 2258 assert(!zc->appliedParams.ldmParams.enableLdm); 2259 /* Updates ldmSeqStore.pos */ 2260 lastLLSize = 2261 ZSTD_ldm_blockCompress(&zc->externSeqStore, 2262 ms, &zc->seqStore, 2263 zc->blockState.nextCBlock->rep, 2264 src, srcSize); 2265 assert(zc->externSeqStore.pos <= zc->externSeqStore.size); 2266 } else if (zc->appliedParams.ldmParams.enableLdm) { 2267 rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; 2268 2269 ldmSeqStore.seq = zc->ldmSequences; 2270 ldmSeqStore.capacity = zc->maxNbLdmSequences; 2271 /* Updates ldmSeqStore.size */ 2272 FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, 2273 &zc->appliedParams.ldmParams, 2274 src, srcSize)); 2275 /* Updates ldmSeqStore.pos */ 2276 lastLLSize = 2277 ZSTD_ldm_blockCompress(&ldmSeqStore, 2278 ms, &zc->seqStore, 2279 zc->blockState.nextCBlock->rep, 2280 src, srcSize); 2281 assert(ldmSeqStore.pos == ldmSeqStore.size); 2282 } else { /* not long range mode */ 2283 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); 2284 lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); 2285 } 2286 { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; 2287 ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); 2288 } } 2289 return ZSTDbss_compress; 2290 } 2291 2292 static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) 2293 { 2294 const seqStore_t* seqStore = ZSTD_getSeqStore(zc); 2295 const seqDef* seqs = seqStore->sequencesStart; 2296 size_t seqsSize = seqStore->sequences - seqs; 2297 2298 ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; 2299 size_t i; size_t position; int repIdx; 2300 2301 assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); 2302 for (i = 0, position = 0; i < seqsSize; ++i) { 2303 outSeqs[i].offset = seqs[i].offset; 2304 outSeqs[i].litLength = seqs[i].litLength; 2305 outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; 2306 2307 if (i == seqStore->longLengthPos) { 2308 if (seqStore->longLengthID == 1) { 2309 outSeqs[i].litLength += 0x10000; 2310 } else if (seqStore->longLengthID == 2) { 2311 outSeqs[i].matchLength += 0x10000; 2312 } 2313 } 2314 2315 if (outSeqs[i].offset <= ZSTD_REP_NUM) { 2316 outSeqs[i].rep = outSeqs[i].offset; 2317 repIdx = (unsigned int)i - outSeqs[i].offset; 2318 2319 if (outSeqs[i].litLength == 0) { 2320 if (outSeqs[i].offset < 3) { 2321 --repIdx; 2322 } else { 2323 repIdx = (unsigned int)i - 1; 2324 } 2325 ++outSeqs[i].rep; 2326 } 2327 assert(repIdx >= -3); 2328 outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1]; 2329 if (outSeqs[i].rep == 4) { 2330 --outSeqs[i].offset; 2331 } 2332 } else { 2333 outSeqs[i].offset -= ZSTD_REP_NUM; 2334 } 2335 2336 position += outSeqs[i].litLength; 2337 outSeqs[i].matchPos = (unsigned int)position; 2338 position += outSeqs[i].matchLength; 2339 } 2340 zc->seqCollector.seqIndex += seqsSize; 2341 } 2342 2343 size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, 2344 size_t outSeqsSize, const void* src, size_t srcSize) 2345 { 2346 const size_t dstCapacity = ZSTD_compressBound(srcSize); 2347 void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem); 2348 SeqCollector seqCollector; 2349 2350 RETURN_ERROR_IF(dst == NULL, memory_allocation); 2351 2352 seqCollector.collectSequences = 1; 2353 seqCollector.seqStart = outSeqs; 2354 seqCollector.seqIndex = 0; 2355 seqCollector.maxSequences = outSeqsSize; 2356 zc->seqCollector = seqCollector; 2357 2358 ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); 2359 ZSTD_free(dst, ZSTD_defaultCMem); 2360 return zc->seqCollector.seqIndex; 2361 } 2362 2363 /* Returns true if the given block is a RLE block */ 2364 static int ZSTD_isRLE(const BYTE *ip, size_t length) { 2365 size_t i; 2366 if (length < 2) return 1; 2367 for (i = 1; i < length; ++i) { 2368 if (ip[0] != ip[i]) return 0; 2369 } 2370 return 1; 2371 } 2372 2373 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, 2374 void* dst, size_t dstCapacity, 2375 const void* src, size_t srcSize, U32 frame) 2376 { 2377 /* This the upper bound for the length of an rle block. 2378 * This isn't the actual upper bound. Finding the real threshold 2379 * needs further investigation. 2380 */ 2381 const U32 rleMaxLength = 25; 2382 size_t cSize; 2383 const BYTE* ip = (const BYTE*)src; 2384 BYTE* op = (BYTE*)dst; 2385 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", 2386 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, 2387 (unsigned)zc->blockState.matchState.nextToUpdate); 2388 2389 { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); 2390 FORWARD_IF_ERROR(bss); 2391 if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } 2392 } 2393 2394 if (zc->seqCollector.collectSequences) { 2395 ZSTD_copyBlockSequences(zc); 2396 return 0; 2397 } 2398 2399 /* encode sequences and literals */ 2400 cSize = ZSTD_compressSequences(&zc->seqStore, 2401 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, 2402 &zc->appliedParams, 2403 dst, dstCapacity, 2404 srcSize, 2405 zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, 2406 zc->bmi2); 2407 2408 if (frame && 2409 /* We don't want to emit our first block as a RLE even if it qualifies because 2410 * doing so will cause the decoder (cli only) to throw a "should consume all input error." 2411 * This is only an issue for zstd <= v1.4.3 2412 */ 2413 !zc->isFirstBlock && 2414 cSize < rleMaxLength && 2415 ZSTD_isRLE(ip, srcSize)) 2416 { 2417 cSize = 1; 2418 op[0] = ip[0]; 2419 } 2420 2421 out: 2422 if (!ZSTD_isError(cSize) && cSize > 1) { 2423 /* confirm repcodes and entropy tables when emitting a compressed block */ 2424 ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; 2425 zc->blockState.prevCBlock = zc->blockState.nextCBlock; 2426 zc->blockState.nextCBlock = tmp; 2427 } 2428 /* We check that dictionaries have offset codes available for the first 2429 * block. After the first block, the offcode table might not have large 2430 * enough codes to represent the offsets in the data. 2431 */ 2432 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 2433 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 2434 2435 return cSize; 2436 } 2437 2438 2439 static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, 2440 ZSTD_cwksp* ws, 2441 ZSTD_CCtx_params const* params, 2442 void const* ip, 2443 void const* iend) 2444 { 2445 if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { 2446 U32 const maxDist = (U32)1 << params->cParams.windowLog; 2447 U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); 2448 U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); 2449 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); 2450 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); 2451 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); 2452 ZSTD_cwksp_mark_tables_dirty(ws); 2453 ZSTD_reduceIndex(ms, params, correction); 2454 ZSTD_cwksp_mark_tables_clean(ws); 2455 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; 2456 else ms->nextToUpdate -= correction; 2457 /* invalidate dictionaries on overflow correction */ 2458 ms->loadedDictEnd = 0; 2459 ms->dictMatchState = NULL; 2460 } 2461 } 2462 2463 /*! ZSTD_compress_frameChunk() : 2464 * Compress a chunk of data into one or multiple blocks. 2465 * All blocks will be terminated, all input will be consumed. 2466 * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. 2467 * Frame is supposed already started (header already produced) 2468 * @return : compressed size, or an error code 2469 */ 2470 static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, 2471 void* dst, size_t dstCapacity, 2472 const void* src, size_t srcSize, 2473 U32 lastFrameChunk) 2474 { 2475 size_t blockSize = cctx->blockSize; 2476 size_t remaining = srcSize; 2477 const BYTE* ip = (const BYTE*)src; 2478 BYTE* const ostart = (BYTE*)dst; 2479 BYTE* op = ostart; 2480 U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; 2481 assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); 2482 2483 DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); 2484 if (cctx->appliedParams.fParams.checksumFlag && srcSize) 2485 XXH64_update(&cctx->xxhState, src, srcSize); 2486 2487 while (remaining) { 2488 ZSTD_matchState_t* const ms = &cctx->blockState.matchState; 2489 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); 2490 2491 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, 2492 dstSize_tooSmall, 2493 "not enough space to store compressed block"); 2494 if (remaining < blockSize) blockSize = remaining; 2495 2496 ZSTD_overflowCorrectIfNeeded( 2497 ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); 2498 ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); 2499 2500 /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ 2501 if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; 2502 2503 { size_t cSize = ZSTD_compressBlock_internal(cctx, 2504 op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, 2505 ip, blockSize, 1 /* frame */); 2506 FORWARD_IF_ERROR(cSize); 2507 if (cSize == 0) { /* block is not compressible */ 2508 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); 2509 FORWARD_IF_ERROR(cSize); 2510 } else { 2511 const U32 cBlockHeader = cSize == 1 ? 2512 lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : 2513 lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); 2514 MEM_writeLE24(op, cBlockHeader); 2515 cSize += ZSTD_blockHeaderSize; 2516 } 2517 2518 ip += blockSize; 2519 assert(remaining >= blockSize); 2520 remaining -= blockSize; 2521 op += cSize; 2522 assert(dstCapacity >= cSize); 2523 dstCapacity -= cSize; 2524 cctx->isFirstBlock = 0; 2525 DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", 2526 (unsigned)cSize); 2527 } } 2528 2529 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; 2530 return (size_t)(op-ostart); 2531 } 2532 2533 2534 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, 2535 const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) 2536 { BYTE* const op = (BYTE*)dst; 2537 U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ 2538 U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ 2539 U32 const checksumFlag = params->fParams.checksumFlag>0; 2540 U32 const windowSize = (U32)1 << params->cParams.windowLog; 2541 U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); 2542 BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); 2543 U32 const fcsCode = params->fParams.contentSizeFlag ? 2544 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ 2545 BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); 2546 size_t pos=0; 2547 2548 assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); 2549 RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall); 2550 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", 2551 !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); 2552 2553 if (params->format == ZSTD_f_zstd1) { 2554 MEM_writeLE32(dst, ZSTD_MAGICNUMBER); 2555 pos = 4; 2556 } 2557 op[pos++] = frameHeaderDescriptionByte; 2558 if (!singleSegment) op[pos++] = windowLogByte; 2559 switch(dictIDSizeCode) 2560 { 2561 default: assert(0); /* impossible */ 2562 case 0 : break; 2563 case 1 : op[pos] = (BYTE)(dictID); pos++; break; 2564 case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; 2565 case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; 2566 } 2567 switch(fcsCode) 2568 { 2569 default: assert(0); /* impossible */ 2570 case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; 2571 case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; 2572 case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; 2573 case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; 2574 } 2575 return pos; 2576 } 2577 2578 /* ZSTD_writeLastEmptyBlock() : 2579 * output an empty Block with end-of-frame mark to complete a frame 2580 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) 2581 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) 2582 */ 2583 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) 2584 { 2585 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall); 2586 { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */ 2587 MEM_writeLE24(dst, cBlockHeader24); 2588 return ZSTD_blockHeaderSize; 2589 } 2590 } 2591 2592 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) 2593 { 2594 RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong); 2595 RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, 2596 parameter_unsupported); 2597 cctx->externSeqStore.seq = seq; 2598 cctx->externSeqStore.size = nbSeq; 2599 cctx->externSeqStore.capacity = nbSeq; 2600 cctx->externSeqStore.pos = 0; 2601 return 0; 2602 } 2603 2604 2605 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, 2606 void* dst, size_t dstCapacity, 2607 const void* src, size_t srcSize, 2608 U32 frame, U32 lastFrameChunk) 2609 { 2610 ZSTD_matchState_t* const ms = &cctx->blockState.matchState; 2611 size_t fhSize = 0; 2612 2613 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", 2614 cctx->stage, (unsigned)srcSize); 2615 RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, 2616 "missing init (ZSTD_compressBegin)"); 2617 2618 if (frame && (cctx->stage==ZSTDcs_init)) { 2619 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 2620 cctx->pledgedSrcSizePlusOne-1, cctx->dictID); 2621 FORWARD_IF_ERROR(fhSize); 2622 assert(fhSize <= dstCapacity); 2623 dstCapacity -= fhSize; 2624 dst = (char*)dst + fhSize; 2625 cctx->stage = ZSTDcs_ongoing; 2626 } 2627 2628 if (!srcSize) return fhSize; /* do not generate an empty block if no input */ 2629 2630 if (!ZSTD_window_update(&ms->window, src, srcSize)) { 2631 ms->nextToUpdate = ms->window.dictLimit; 2632 } 2633 if (cctx->appliedParams.ldmParams.enableLdm) { 2634 ZSTD_window_update(&cctx->ldmState.window, src, srcSize); 2635 } 2636 2637 if (!frame) { 2638 /* overflow check and correction for block mode */ 2639 ZSTD_overflowCorrectIfNeeded( 2640 ms, &cctx->workspace, &cctx->appliedParams, 2641 src, (BYTE const*)src + srcSize); 2642 } 2643 2644 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); 2645 { size_t const cSize = frame ? 2646 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : 2647 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); 2648 FORWARD_IF_ERROR(cSize); 2649 cctx->consumedSrcSize += srcSize; 2650 cctx->producedCSize += (cSize + fhSize); 2651 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); 2652 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ 2653 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); 2654 RETURN_ERROR_IF( 2655 cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, 2656 srcSize_wrong, 2657 "error : pledgedSrcSize = %u, while realSrcSize >= %u", 2658 (unsigned)cctx->pledgedSrcSizePlusOne-1, 2659 (unsigned)cctx->consumedSrcSize); 2660 } 2661 return cSize + fhSize; 2662 } 2663 } 2664 2665 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, 2666 void* dst, size_t dstCapacity, 2667 const void* src, size_t srcSize) 2668 { 2669 DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); 2670 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); 2671 } 2672 2673 2674 size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) 2675 { 2676 ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; 2677 assert(!ZSTD_checkCParams(cParams)); 2678 return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); 2679 } 2680 2681 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) 2682 { 2683 DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); 2684 { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); 2685 RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); } 2686 2687 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); 2688 } 2689 2690 /*! ZSTD_loadDictionaryContent() : 2691 * @return : 0, or an error code 2692 */ 2693 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, 2694 ZSTD_cwksp* ws, 2695 ZSTD_CCtx_params const* params, 2696 const void* src, size_t srcSize, 2697 ZSTD_dictTableLoadMethod_e dtlm) 2698 { 2699 const BYTE* ip = (const BYTE*) src; 2700 const BYTE* const iend = ip + srcSize; 2701 2702 ZSTD_window_update(&ms->window, src, srcSize); 2703 ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); 2704 2705 /* Assert that we the ms params match the params we're being given */ 2706 ZSTD_assertEqualCParams(params->cParams, ms->cParams); 2707 2708 if (srcSize <= HASH_READ_SIZE) return 0; 2709 2710 while (iend - ip > HASH_READ_SIZE) { 2711 size_t const remaining = (size_t)(iend - ip); 2712 size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); 2713 const BYTE* const ichunk = ip + chunk; 2714 2715 ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); 2716 2717 switch(params->cParams.strategy) 2718 { 2719 case ZSTD_fast: 2720 ZSTD_fillHashTable(ms, ichunk, dtlm); 2721 break; 2722 case ZSTD_dfast: 2723 ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); 2724 break; 2725 2726 case ZSTD_greedy: 2727 case ZSTD_lazy: 2728 case ZSTD_lazy2: 2729 if (chunk >= HASH_READ_SIZE) 2730 ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); 2731 break; 2732 2733 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ 2734 case ZSTD_btopt: 2735 case ZSTD_btultra: 2736 case ZSTD_btultra2: 2737 if (chunk >= HASH_READ_SIZE) 2738 ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); 2739 break; 2740 2741 default: 2742 assert(0); /* not possible : not a valid strategy id */ 2743 } 2744 2745 ip = ichunk; 2746 } 2747 2748 ms->nextToUpdate = (U32)(iend - ms->window.base); 2749 return 0; 2750 } 2751 2752 2753 /* Dictionaries that assign zero probability to symbols that show up causes problems 2754 when FSE encoding. Refuse dictionaries that assign zero probability to symbols 2755 that we may encounter during compression. 2756 NOTE: This behavior is not standard and could be improved in the future. */ 2757 static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { 2758 U32 s; 2759 RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted); 2760 for (s = 0; s <= maxSymbolValue; ++s) { 2761 RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted); 2762 } 2763 return 0; 2764 } 2765 2766 2767 /* Dictionary format : 2768 * See : 2769 * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format 2770 */ 2771 /*! ZSTD_loadZstdDictionary() : 2772 * @return : dictID, or an error code 2773 * assumptions : magic number supposed already checked 2774 * dictSize supposed >= 8 2775 */ 2776 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, 2777 ZSTD_matchState_t* ms, 2778 ZSTD_cwksp* ws, 2779 ZSTD_CCtx_params const* params, 2780 const void* dict, size_t dictSize, 2781 ZSTD_dictTableLoadMethod_e dtlm, 2782 void* workspace) 2783 { 2784 const BYTE* dictPtr = (const BYTE*)dict; 2785 const BYTE* const dictEnd = dictPtr + dictSize; 2786 short offcodeNCount[MaxOff+1]; 2787 unsigned offcodeMaxValue = MaxOff; 2788 size_t dictID; 2789 2790 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); 2791 assert(dictSize >= 8); 2792 assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); 2793 2794 dictPtr += 4; /* skip magic number */ 2795 dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); 2796 dictPtr += 4; 2797 2798 { unsigned maxSymbolValue = 255; 2799 size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); 2800 RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted); 2801 RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted); 2802 dictPtr += hufHeaderSize; 2803 } 2804 2805 { unsigned offcodeLog; 2806 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); 2807 RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); 2808 RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); 2809 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ 2810 /* fill all offset symbols to avoid garbage at end of table */ 2811 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 2812 bs->entropy.fse.offcodeCTable, 2813 offcodeNCount, MaxOff, offcodeLog, 2814 workspace, HUF_WORKSPACE_SIZE)), 2815 dictionary_corrupted); 2816 dictPtr += offcodeHeaderSize; 2817 } 2818 2819 { short matchlengthNCount[MaxML+1]; 2820 unsigned matchlengthMaxValue = MaxML, matchlengthLog; 2821 size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); 2822 RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); 2823 RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); 2824 /* Every match length code must have non-zero probability */ 2825 FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); 2826 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 2827 bs->entropy.fse.matchlengthCTable, 2828 matchlengthNCount, matchlengthMaxValue, matchlengthLog, 2829 workspace, HUF_WORKSPACE_SIZE)), 2830 dictionary_corrupted); 2831 dictPtr += matchlengthHeaderSize; 2832 } 2833 2834 { short litlengthNCount[MaxLL+1]; 2835 unsigned litlengthMaxValue = MaxLL, litlengthLog; 2836 size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); 2837 RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); 2838 RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); 2839 /* Every literal length code must have non-zero probability */ 2840 FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); 2841 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 2842 bs->entropy.fse.litlengthCTable, 2843 litlengthNCount, litlengthMaxValue, litlengthLog, 2844 workspace, HUF_WORKSPACE_SIZE)), 2845 dictionary_corrupted); 2846 dictPtr += litlengthHeaderSize; 2847 } 2848 2849 RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); 2850 bs->rep[0] = MEM_readLE32(dictPtr+0); 2851 bs->rep[1] = MEM_readLE32(dictPtr+4); 2852 bs->rep[2] = MEM_readLE32(dictPtr+8); 2853 dictPtr += 12; 2854 2855 { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); 2856 U32 offcodeMax = MaxOff; 2857 if (dictContentSize <= ((U32)-1) - 128 KB) { 2858 U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ 2859 offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ 2860 } 2861 /* All offset values <= dictContentSize + 128 KB must be representable */ 2862 FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); 2863 /* All repCodes must be <= dictContentSize and != 0*/ 2864 { U32 u; 2865 for (u=0; u<3; u++) { 2866 RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted); 2867 RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted); 2868 } } 2869 2870 bs->entropy.huf.repeatMode = HUF_repeat_valid; 2871 bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; 2872 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; 2873 bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; 2874 FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( 2875 ms, ws, params, dictPtr, dictContentSize, dtlm)); 2876 return dictID; 2877 } 2878 } 2879 2880 /** ZSTD_compress_insertDictionary() : 2881 * @return : dictID, or an error code */ 2882 static size_t 2883 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, 2884 ZSTD_matchState_t* ms, 2885 ZSTD_cwksp* ws, 2886 const ZSTD_CCtx_params* params, 2887 const void* dict, size_t dictSize, 2888 ZSTD_dictContentType_e dictContentType, 2889 ZSTD_dictTableLoadMethod_e dtlm, 2890 void* workspace) 2891 { 2892 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); 2893 if ((dict==NULL) || (dictSize<8)) { 2894 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); 2895 return 0; 2896 } 2897 2898 ZSTD_reset_compressedBlockState(bs); 2899 2900 /* dict restricted modes */ 2901 if (dictContentType == ZSTD_dct_rawContent) 2902 return ZSTD_loadDictionaryContent(ms, ws, params, dict, dictSize, dtlm); 2903 2904 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { 2905 if (dictContentType == ZSTD_dct_auto) { 2906 DEBUGLOG(4, "raw content dictionary detected"); 2907 return ZSTD_loadDictionaryContent( 2908 ms, ws, params, dict, dictSize, dtlm); 2909 } 2910 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); 2911 assert(0); /* impossible */ 2912 } 2913 2914 /* dict as full zstd dictionary */ 2915 return ZSTD_loadZstdDictionary( 2916 bs, ms, ws, params, dict, dictSize, dtlm, workspace); 2917 } 2918 2919 #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) 2920 #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6) 2921 2922 /*! ZSTD_compressBegin_internal() : 2923 * @return : 0, or an error code */ 2924 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, 2925 const void* dict, size_t dictSize, 2926 ZSTD_dictContentType_e dictContentType, 2927 ZSTD_dictTableLoadMethod_e dtlm, 2928 const ZSTD_CDict* cdict, 2929 const ZSTD_CCtx_params* params, U64 pledgedSrcSize, 2930 ZSTD_buffered_policy_e zbuff) 2931 { 2932 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); 2933 /* params are supposed to be fully validated at this point */ 2934 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); 2935 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 2936 if ( (cdict) 2937 && (cdict->dictContentSize > 0) 2938 && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF 2939 || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER 2940 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN 2941 || cdict->compressionLevel == 0) 2942 && (params->attachDictPref != ZSTD_dictForceLoad) ) { 2943 return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); 2944 } 2945 2946 FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, 2947 ZSTDcrp_makeClean, zbuff) ); 2948 { size_t const dictID = cdict ? 2949 ZSTD_compress_insertDictionary( 2950 cctx->blockState.prevCBlock, &cctx->blockState.matchState, 2951 &cctx->workspace, params, cdict->dictContent, cdict->dictContentSize, 2952 dictContentType, dtlm, cctx->entropyWorkspace) 2953 : ZSTD_compress_insertDictionary( 2954 cctx->blockState.prevCBlock, &cctx->blockState.matchState, 2955 &cctx->workspace, params, dict, dictSize, 2956 dictContentType, dtlm, cctx->entropyWorkspace); 2957 FORWARD_IF_ERROR(dictID); 2958 assert(dictID <= UINT_MAX); 2959 cctx->dictID = (U32)dictID; 2960 } 2961 return 0; 2962 } 2963 2964 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, 2965 const void* dict, size_t dictSize, 2966 ZSTD_dictContentType_e dictContentType, 2967 ZSTD_dictTableLoadMethod_e dtlm, 2968 const ZSTD_CDict* cdict, 2969 const ZSTD_CCtx_params* params, 2970 unsigned long long pledgedSrcSize) 2971 { 2972 DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); 2973 /* compression parameters verification and optimization */ 2974 FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) ); 2975 return ZSTD_compressBegin_internal(cctx, 2976 dict, dictSize, dictContentType, dtlm, 2977 cdict, 2978 params, pledgedSrcSize, 2979 ZSTDb_not_buffered); 2980 } 2981 2982 /*! ZSTD_compressBegin_advanced() : 2983 * @return : 0, or an error code */ 2984 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, 2985 const void* dict, size_t dictSize, 2986 ZSTD_parameters params, unsigned long long pledgedSrcSize) 2987 { 2988 ZSTD_CCtx_params const cctxParams = 2989 ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); 2990 return ZSTD_compressBegin_advanced_internal(cctx, 2991 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, 2992 NULL /*cdict*/, 2993 &cctxParams, pledgedSrcSize); 2994 } 2995 2996 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) 2997 { 2998 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); 2999 ZSTD_CCtx_params const cctxParams = 3000 ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); 3001 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); 3002 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, 3003 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); 3004 } 3005 3006 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) 3007 { 3008 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); 3009 } 3010 3011 3012 /*! ZSTD_writeEpilogue() : 3013 * Ends a frame. 3014 * @return : nb of bytes written into dst (or an error code) */ 3015 static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) 3016 { 3017 BYTE* const ostart = (BYTE*)dst; 3018 BYTE* op = ostart; 3019 size_t fhSize = 0; 3020 3021 DEBUGLOG(4, "ZSTD_writeEpilogue"); 3022 RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); 3023 3024 /* special case : empty frame */ 3025 if (cctx->stage == ZSTDcs_init) { 3026 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); 3027 FORWARD_IF_ERROR(fhSize); 3028 dstCapacity -= fhSize; 3029 op += fhSize; 3030 cctx->stage = ZSTDcs_ongoing; 3031 } 3032 3033 if (cctx->stage != ZSTDcs_ending) { 3034 /* write one last empty block, make it the "last" block */ 3035 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; 3036 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); 3037 MEM_writeLE32(op, cBlockHeader24); 3038 op += ZSTD_blockHeaderSize; 3039 dstCapacity -= ZSTD_blockHeaderSize; 3040 } 3041 3042 if (cctx->appliedParams.fParams.checksumFlag) { 3043 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); 3044 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); 3045 DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); 3046 MEM_writeLE32(op, checksum); 3047 op += 4; 3048 } 3049 3050 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ 3051 return op-ostart; 3052 } 3053 3054 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, 3055 void* dst, size_t dstCapacity, 3056 const void* src, size_t srcSize) 3057 { 3058 size_t endResult; 3059 size_t const cSize = ZSTD_compressContinue_internal(cctx, 3060 dst, dstCapacity, src, srcSize, 3061 1 /* frame mode */, 1 /* last chunk */); 3062 FORWARD_IF_ERROR(cSize); 3063 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); 3064 FORWARD_IF_ERROR(endResult); 3065 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); 3066 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ 3067 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); 3068 DEBUGLOG(4, "end of frame : controlling src size"); 3069 RETURN_ERROR_IF( 3070 cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, 3071 srcSize_wrong, 3072 "error : pledgedSrcSize = %u, while realSrcSize = %u", 3073 (unsigned)cctx->pledgedSrcSizePlusOne-1, 3074 (unsigned)cctx->consumedSrcSize); 3075 } 3076 return cSize + endResult; 3077 } 3078 3079 3080 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, 3081 void* dst, size_t dstCapacity, 3082 const void* src, size_t srcSize, 3083 const void* dict,size_t dictSize, 3084 ZSTD_parameters params) 3085 { 3086 ZSTD_CCtx_params const cctxParams = 3087 ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); 3088 DEBUGLOG(4, "ZSTD_compress_internal"); 3089 return ZSTD_compress_advanced_internal(cctx, 3090 dst, dstCapacity, 3091 src, srcSize, 3092 dict, dictSize, 3093 &cctxParams); 3094 } 3095 3096 size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, 3097 void* dst, size_t dstCapacity, 3098 const void* src, size_t srcSize, 3099 const void* dict,size_t dictSize, 3100 ZSTD_parameters params) 3101 { 3102 DEBUGLOG(4, "ZSTD_compress_advanced"); 3103 FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams)); 3104 return ZSTD_compress_internal(cctx, 3105 dst, dstCapacity, 3106 src, srcSize, 3107 dict, dictSize, 3108 params); 3109 } 3110 3111 /* Internal */ 3112 size_t ZSTD_compress_advanced_internal( 3113 ZSTD_CCtx* cctx, 3114 void* dst, size_t dstCapacity, 3115 const void* src, size_t srcSize, 3116 const void* dict,size_t dictSize, 3117 const ZSTD_CCtx_params* params) 3118 { 3119 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); 3120 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, 3121 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, 3122 params, srcSize, ZSTDb_not_buffered) ); 3123 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 3124 } 3125 3126 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, 3127 void* dst, size_t dstCapacity, 3128 const void* src, size_t srcSize, 3129 const void* dict, size_t dictSize, 3130 int compressionLevel) 3131 { 3132 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0); 3133 ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); 3134 assert(params.fParams.contentSizeFlag == 1); 3135 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); 3136 } 3137 3138 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, 3139 void* dst, size_t dstCapacity, 3140 const void* src, size_t srcSize, 3141 int compressionLevel) 3142 { 3143 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); 3144 assert(cctx != NULL); 3145 return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); 3146 } 3147 3148 size_t ZSTD_compress(void* dst, size_t dstCapacity, 3149 const void* src, size_t srcSize, 3150 int compressionLevel) 3151 { 3152 size_t result; 3153 ZSTD_CCtx ctxBody; 3154 ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem); 3155 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); 3156 ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ 3157 return result; 3158 } 3159 3160 3161 /* ===== Dictionary API ===== */ 3162 3163 /*! ZSTD_estimateCDictSize_advanced() : 3164 * Estimate amount of memory that will be needed to create a dictionary with following arguments */ 3165 size_t ZSTD_estimateCDictSize_advanced( 3166 size_t dictSize, ZSTD_compressionParameters cParams, 3167 ZSTD_dictLoadMethod_e dictLoadMethod) 3168 { 3169 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); 3170 return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) 3171 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) 3172 + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) 3173 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 3174 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); 3175 } 3176 3177 size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) 3178 { 3179 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); 3180 return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); 3181 } 3182 3183 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) 3184 { 3185 if (cdict==NULL) return 0; /* support sizeof on NULL */ 3186 DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); 3187 /* cdict may be in the workspace */ 3188 return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) 3189 + ZSTD_cwksp_sizeof(&cdict->workspace); 3190 } 3191 3192 static size_t ZSTD_initCDict_internal( 3193 ZSTD_CDict* cdict, 3194 const void* dictBuffer, size_t dictSize, 3195 ZSTD_dictLoadMethod_e dictLoadMethod, 3196 ZSTD_dictContentType_e dictContentType, 3197 ZSTD_compressionParameters cParams) 3198 { 3199 DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); 3200 assert(!ZSTD_checkCParams(cParams)); 3201 cdict->matchState.cParams = cParams; 3202 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { 3203 cdict->dictContent = dictBuffer; 3204 } else { 3205 void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); 3206 RETURN_ERROR_IF(!internalBuffer, memory_allocation); 3207 cdict->dictContent = internalBuffer; 3208 memcpy(internalBuffer, dictBuffer, dictSize); 3209 } 3210 cdict->dictContentSize = dictSize; 3211 3212 cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); 3213 3214 3215 /* Reset the state to no dictionary */ 3216 ZSTD_reset_compressedBlockState(&cdict->cBlockState); 3217 FORWARD_IF_ERROR(ZSTD_reset_matchState( 3218 &cdict->matchState, 3219 &cdict->workspace, 3220 &cParams, 3221 ZSTDcrp_makeClean, 3222 ZSTDirp_reset, 3223 ZSTD_resetTarget_CDict)); 3224 /* (Maybe) load the dictionary 3225 * Skips loading the dictionary if it is < 8 bytes. 3226 */ 3227 { ZSTD_CCtx_params params; 3228 memset(¶ms, 0, sizeof(params)); 3229 params.compressionLevel = ZSTD_CLEVEL_DEFAULT; 3230 params.fParams.contentSizeFlag = 1; 3231 params.cParams = cParams; 3232 { size_t const dictID = ZSTD_compress_insertDictionary( 3233 &cdict->cBlockState, &cdict->matchState, &cdict->workspace, 3234 ¶ms, cdict->dictContent, cdict->dictContentSize, 3235 dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); 3236 FORWARD_IF_ERROR(dictID); 3237 assert(dictID <= (size_t)(U32)-1); 3238 cdict->dictID = (U32)dictID; 3239 } 3240 } 3241 3242 return 0; 3243 } 3244 3245 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, 3246 ZSTD_dictLoadMethod_e dictLoadMethod, 3247 ZSTD_dictContentType_e dictContentType, 3248 ZSTD_compressionParameters cParams, ZSTD_customMem customMem) 3249 { 3250 DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType); 3251 if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 3252 3253 { size_t const workspaceSize = 3254 ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + 3255 ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + 3256 ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + 3257 (dictLoadMethod == ZSTD_dlm_byRef ? 0 3258 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); 3259 void* const workspace = ZSTD_malloc(workspaceSize, customMem); 3260 ZSTD_cwksp ws; 3261 ZSTD_CDict* cdict; 3262 3263 if (!workspace) { 3264 ZSTD_free(workspace, customMem); 3265 return NULL; 3266 } 3267 3268 ZSTD_cwksp_init(&ws, workspace, workspaceSize); 3269 3270 cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); 3271 assert(cdict != NULL); 3272 ZSTD_cwksp_move(&cdict->workspace, &ws); 3273 cdict->customMem = customMem; 3274 cdict->compressionLevel = 0; /* signals advanced API usage */ 3275 3276 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 3277 dictBuffer, dictSize, 3278 dictLoadMethod, dictContentType, 3279 cParams) )) { 3280 ZSTD_freeCDict(cdict); 3281 return NULL; 3282 } 3283 3284 return cdict; 3285 } 3286 } 3287 3288 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) 3289 { 3290 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); 3291 ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, 3292 ZSTD_dlm_byCopy, ZSTD_dct_auto, 3293 cParams, ZSTD_defaultCMem); 3294 if (cdict) 3295 cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; 3296 return cdict; 3297 } 3298 3299 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) 3300 { 3301 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); 3302 return ZSTD_createCDict_advanced(dict, dictSize, 3303 ZSTD_dlm_byRef, ZSTD_dct_auto, 3304 cParams, ZSTD_defaultCMem); 3305 } 3306 3307 size_t ZSTD_freeCDict(ZSTD_CDict* cdict) 3308 { 3309 if (cdict==NULL) return 0; /* support free on NULL */ 3310 { ZSTD_customMem const cMem = cdict->customMem; 3311 int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); 3312 ZSTD_cwksp_free(&cdict->workspace, cMem); 3313 if (!cdictInWorkspace) { 3314 ZSTD_free(cdict, cMem); 3315 } 3316 return 0; 3317 } 3318 } 3319 3320 /*! ZSTD_initStaticCDict_advanced() : 3321 * Generate a digested dictionary in provided memory area. 3322 * workspace: The memory area to emplace the dictionary into. 3323 * Provided pointer must 8-bytes aligned. 3324 * It must outlive dictionary usage. 3325 * workspaceSize: Use ZSTD_estimateCDictSize() 3326 * to determine how large workspace must be. 3327 * cParams : use ZSTD_getCParams() to transform a compression level 3328 * into its relevants cParams. 3329 * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) 3330 * Note : there is no corresponding "free" function. 3331 * Since workspace was allocated externally, it must be freed externally. 3332 */ 3333 const ZSTD_CDict* ZSTD_initStaticCDict( 3334 void* workspace, size_t workspaceSize, 3335 const void* dict, size_t dictSize, 3336 ZSTD_dictLoadMethod_e dictLoadMethod, 3337 ZSTD_dictContentType_e dictContentType, 3338 ZSTD_compressionParameters cParams) 3339 { 3340 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); 3341 size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) 3342 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 3343 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) 3344 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) 3345 + matchStateSize; 3346 ZSTD_CDict* cdict; 3347 3348 if ((size_t)workspace & 7) return NULL; /* 8-aligned */ 3349 3350 { 3351 ZSTD_cwksp ws; 3352 ZSTD_cwksp_init(&ws, workspace, workspaceSize); 3353 cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); 3354 if (cdict == NULL) return NULL; 3355 ZSTD_cwksp_move(&cdict->workspace, &ws); 3356 } 3357 3358 DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", 3359 (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); 3360 if (workspaceSize < neededSize) return NULL; 3361 3362 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 3363 dict, dictSize, 3364 dictLoadMethod, dictContentType, 3365 cParams) )) 3366 return NULL; 3367 3368 return cdict; 3369 } 3370 3371 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) 3372 { 3373 assert(cdict != NULL); 3374 return cdict->matchState.cParams; 3375 } 3376 3377 /* ZSTD_compressBegin_usingCDict_advanced() : 3378 * cdict must be != NULL */ 3379 size_t ZSTD_compressBegin_usingCDict_advanced( 3380 ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, 3381 ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) 3382 { 3383 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); 3384 RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); 3385 { ZSTD_CCtx_params params = cctx->requestedParams; 3386 params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF 3387 || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER 3388 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN 3389 || cdict->compressionLevel == 0 ) 3390 && (params.attachDictPref != ZSTD_dictForceLoad) ? 3391 ZSTD_getCParamsFromCDict(cdict) 3392 : ZSTD_getCParams(cdict->compressionLevel, 3393 pledgedSrcSize, 3394 cdict->dictContentSize); 3395 /* Increase window log to fit the entire dictionary and source if the 3396 * source size is known. Limit the increase to 19, which is the 3397 * window log for compression level 1 with the largest source size. 3398 */ 3399 if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { 3400 U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); 3401 U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; 3402 params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog); 3403 } 3404 params.fParams = fParams; 3405 return ZSTD_compressBegin_internal(cctx, 3406 NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, 3407 cdict, 3408 ¶ms, pledgedSrcSize, 3409 ZSTDb_not_buffered); 3410 } 3411 } 3412 3413 /* ZSTD_compressBegin_usingCDict() : 3414 * pledgedSrcSize=0 means "unknown" 3415 * if pledgedSrcSize>0, it will enable contentSizeFlag */ 3416 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 3417 { 3418 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 3419 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); 3420 return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); 3421 } 3422 3423 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, 3424 void* dst, size_t dstCapacity, 3425 const void* src, size_t srcSize, 3426 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) 3427 { 3428 FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */ 3429 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 3430 } 3431 3432 /*! ZSTD_compress_usingCDict() : 3433 * Compression using a digested Dictionary. 3434 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. 3435 * Note that compression parameters are decided at CDict creation time 3436 * while frame parameters are hardcoded */ 3437 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, 3438 void* dst, size_t dstCapacity, 3439 const void* src, size_t srcSize, 3440 const ZSTD_CDict* cdict) 3441 { 3442 ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 3443 return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); 3444 } 3445 3446 3447 3448 /* ****************************************************************** 3449 * Streaming 3450 ********************************************************************/ 3451 3452 ZSTD_CStream* ZSTD_createCStream(void) 3453 { 3454 DEBUGLOG(3, "ZSTD_createCStream"); 3455 return ZSTD_createCStream_advanced(ZSTD_defaultCMem); 3456 } 3457 3458 ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) 3459 { 3460 return ZSTD_initStaticCCtx(workspace, workspaceSize); 3461 } 3462 3463 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) 3464 { /* CStream and CCtx are now same object */ 3465 return ZSTD_createCCtx_advanced(customMem); 3466 } 3467 3468 size_t ZSTD_freeCStream(ZSTD_CStream* zcs) 3469 { 3470 return ZSTD_freeCCtx(zcs); /* same object */ 3471 } 3472 3473 3474 3475 /*====== Initialization ======*/ 3476 3477 size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } 3478 3479 size_t ZSTD_CStreamOutSize(void) 3480 { 3481 return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; 3482 } 3483 3484 static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, 3485 const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType, 3486 const ZSTD_CDict* const cdict, 3487 ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize) 3488 { 3489 DEBUGLOG(4, "ZSTD_resetCStream_internal"); 3490 /* Finalize the compression parameters */ 3491 params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, dictSize); 3492 /* params are supposed to be fully validated at this point */ 3493 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 3494 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 3495 3496 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, 3497 dict, dictSize, dictContentType, ZSTD_dtlm_fast, 3498 cdict, 3499 ¶ms, pledgedSrcSize, 3500 ZSTDb_buffered) ); 3501 3502 cctx->inToCompress = 0; 3503 cctx->inBuffPos = 0; 3504 cctx->inBuffTarget = cctx->blockSize 3505 + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ 3506 cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; 3507 cctx->streamStage = zcss_load; 3508 cctx->frameEnded = 0; 3509 return 0; /* ready to go */ 3510 } 3511 3512 /* ZSTD_resetCStream(): 3513 * pledgedSrcSize == 0 means "unknown" */ 3514 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) 3515 { 3516 /* temporary : 0 interpreted as "unknown" during transition period. 3517 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. 3518 * 0 will be interpreted as "empty" in the future. 3519 */ 3520 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 3521 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); 3522 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3523 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); 3524 return 0; 3525 } 3526 3527 /*! ZSTD_initCStream_internal() : 3528 * Note : for lib/compress only. Used by zstdmt_compress.c. 3529 * Assumption 1 : params are valid 3530 * Assumption 2 : either dict, or cdict, is defined, not both */ 3531 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, 3532 const void* dict, size_t dictSize, const ZSTD_CDict* cdict, 3533 const ZSTD_CCtx_params* params, 3534 unsigned long long pledgedSrcSize) 3535 { 3536 DEBUGLOG(4, "ZSTD_initCStream_internal"); 3537 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3538 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); 3539 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); 3540 zcs->requestedParams = *params; 3541 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 3542 if (dict) { 3543 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); 3544 } else { 3545 /* Dictionary is cleared if !cdict */ 3546 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); 3547 } 3548 return 0; 3549 } 3550 3551 /* ZSTD_initCStream_usingCDict_advanced() : 3552 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ 3553 size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, 3554 const ZSTD_CDict* cdict, 3555 ZSTD_frameParameters fParams, 3556 unsigned long long pledgedSrcSize) 3557 { 3558 DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); 3559 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3560 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); 3561 zcs->requestedParams.fParams = fParams; 3562 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); 3563 return 0; 3564 } 3565 3566 /* note : cdict must outlive compression session */ 3567 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) 3568 { 3569 DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); 3570 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3571 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); 3572 return 0; 3573 } 3574 3575 3576 /* ZSTD_initCStream_advanced() : 3577 * pledgedSrcSize must be exact. 3578 * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. 3579 * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ 3580 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, 3581 const void* dict, size_t dictSize, 3582 ZSTD_parameters params, unsigned long long pss) 3583 { 3584 /* for compatibility with older programs relying on this behavior. 3585 * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. 3586 * This line will be removed in the future. 3587 */ 3588 U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 3589 DEBUGLOG(4, "ZSTD_initCStream_advanced"); 3590 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3591 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); 3592 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); 3593 zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, params); 3594 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); 3595 return 0; 3596 } 3597 3598 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) 3599 { 3600 DEBUGLOG(4, "ZSTD_initCStream_usingDict"); 3601 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3602 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); 3603 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); 3604 return 0; 3605 } 3606 3607 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) 3608 { 3609 /* temporary : 0 interpreted as "unknown" during transition period. 3610 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. 3611 * 0 will be interpreted as "empty" in the future. 3612 */ 3613 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 3614 DEBUGLOG(4, "ZSTD_initCStream_srcSize"); 3615 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3616 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); 3617 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); 3618 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); 3619 return 0; 3620 } 3621 3622 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) 3623 { 3624 DEBUGLOG(4, "ZSTD_initCStream"); 3625 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3626 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); 3627 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); 3628 return 0; 3629 } 3630 3631 /*====== Compression ======*/ 3632 3633 static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) 3634 { 3635 size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; 3636 if (hintInSize==0) hintInSize = cctx->blockSize; 3637 return hintInSize; 3638 } 3639 3640 static size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, 3641 const void* src, size_t srcSize) 3642 { 3643 size_t const length = MIN(dstCapacity, srcSize); 3644 if (length) memcpy(dst, src, length); 3645 return length; 3646 } 3647 3648 /** ZSTD_compressStream_generic(): 3649 * internal function for all *compressStream*() variants 3650 * non-static, because can be called from zstdmt_compress.c 3651 * @return : hint size for next input */ 3652 static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, 3653 ZSTD_outBuffer* output, 3654 ZSTD_inBuffer* input, 3655 ZSTD_EndDirective const flushMode) 3656 { 3657 const char* const istart = (const char*)input->src; 3658 const char* const iend = istart + input->size; 3659 const char* ip = istart + input->pos; 3660 char* const ostart = (char*)output->dst; 3661 char* const oend = ostart + output->size; 3662 char* op = ostart + output->pos; 3663 U32 someMoreWork = 1; 3664 3665 /* check expectations */ 3666 DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); 3667 assert(zcs->inBuff != NULL); 3668 assert(zcs->inBuffSize > 0); 3669 assert(zcs->outBuff != NULL); 3670 assert(zcs->outBuffSize > 0); 3671 assert(output->pos <= output->size); 3672 assert(input->pos <= input->size); 3673 3674 while (someMoreWork) { 3675 switch(zcs->streamStage) 3676 { 3677 case zcss_init: 3678 RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); 3679 3680 case zcss_load: 3681 if ( (flushMode == ZSTD_e_end) 3682 && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */ 3683 && (zcs->inBuffPos == 0) ) { 3684 /* shortcut to compression pass directly into output buffer */ 3685 size_t const cSize = ZSTD_compressEnd(zcs, 3686 op, oend-op, ip, iend-ip); 3687 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); 3688 FORWARD_IF_ERROR(cSize); 3689 ip = iend; 3690 op += cSize; 3691 zcs->frameEnded = 1; 3692 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 3693 someMoreWork = 0; break; 3694 } 3695 /* complete loading into inBuffer */ 3696 { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; 3697 size_t const loaded = ZSTD_limitCopy( 3698 zcs->inBuff + zcs->inBuffPos, toLoad, 3699 ip, iend-ip); 3700 zcs->inBuffPos += loaded; 3701 ip += loaded; 3702 if ( (flushMode == ZSTD_e_continue) 3703 && (zcs->inBuffPos < zcs->inBuffTarget) ) { 3704 /* not enough input to fill full block : stop here */ 3705 someMoreWork = 0; break; 3706 } 3707 if ( (flushMode == ZSTD_e_flush) 3708 && (zcs->inBuffPos == zcs->inToCompress) ) { 3709 /* empty */ 3710 someMoreWork = 0; break; 3711 } 3712 } 3713 /* compress current block (note : this stage cannot be stopped in the middle) */ 3714 DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); 3715 { void* cDst; 3716 size_t cSize; 3717 size_t const iSize = zcs->inBuffPos - zcs->inToCompress; 3718 size_t oSize = oend-op; 3719 unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); 3720 if (oSize >= ZSTD_compressBound(iSize)) 3721 cDst = op; /* compress into output buffer, to skip flush stage */ 3722 else 3723 cDst = zcs->outBuff, oSize = zcs->outBuffSize; 3724 cSize = lastBlock ? 3725 ZSTD_compressEnd(zcs, cDst, oSize, 3726 zcs->inBuff + zcs->inToCompress, iSize) : 3727 ZSTD_compressContinue(zcs, cDst, oSize, 3728 zcs->inBuff + zcs->inToCompress, iSize); 3729 FORWARD_IF_ERROR(cSize); 3730 zcs->frameEnded = lastBlock; 3731 /* prepare next block */ 3732 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; 3733 if (zcs->inBuffTarget > zcs->inBuffSize) 3734 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; 3735 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", 3736 (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); 3737 if (!lastBlock) 3738 assert(zcs->inBuffTarget <= zcs->inBuffSize); 3739 zcs->inToCompress = zcs->inBuffPos; 3740 if (cDst == op) { /* no need to flush */ 3741 op += cSize; 3742 if (zcs->frameEnded) { 3743 DEBUGLOG(5, "Frame completed directly in outBuffer"); 3744 someMoreWork = 0; 3745 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 3746 } 3747 break; 3748 } 3749 zcs->outBuffContentSize = cSize; 3750 zcs->outBuffFlushedSize = 0; 3751 zcs->streamStage = zcss_flush; /* pass-through to flush stage */ 3752 } 3753 /* fall-through */ 3754 case zcss_flush: 3755 DEBUGLOG(5, "flush stage"); 3756 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; 3757 size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), 3758 zcs->outBuff + zcs->outBuffFlushedSize, toFlush); 3759 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", 3760 (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); 3761 op += flushed; 3762 zcs->outBuffFlushedSize += flushed; 3763 if (toFlush!=flushed) { 3764 /* flush not fully completed, presumably because dst is too small */ 3765 assert(op==oend); 3766 someMoreWork = 0; 3767 break; 3768 } 3769 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; 3770 if (zcs->frameEnded) { 3771 DEBUGLOG(5, "Frame completed on flush"); 3772 someMoreWork = 0; 3773 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 3774 break; 3775 } 3776 zcs->streamStage = zcss_load; 3777 break; 3778 } 3779 3780 default: /* impossible */ 3781 assert(0); 3782 } 3783 } 3784 3785 input->pos = ip - istart; 3786 output->pos = op - ostart; 3787 if (zcs->frameEnded) return 0; 3788 return ZSTD_nextInputSizeHint(zcs); 3789 } 3790 3791 static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) 3792 { 3793 #ifdef ZSTD_MULTITHREAD 3794 if (cctx->appliedParams.nbWorkers >= 1) { 3795 assert(cctx->mtctx != NULL); 3796 return ZSTDMT_nextInputSizeHint(cctx->mtctx); 3797 } 3798 #endif 3799 return ZSTD_nextInputSizeHint(cctx); 3800 3801 } 3802 3803 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) 3804 { 3805 FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) ); 3806 return ZSTD_nextInputSizeHint_MTorST(zcs); 3807 } 3808 3809 3810 size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, 3811 ZSTD_outBuffer* output, 3812 ZSTD_inBuffer* input, 3813 ZSTD_EndDirective endOp) 3814 { 3815 DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); 3816 /* check conditions */ 3817 RETURN_ERROR_IF(output->pos > output->size, GENERIC); 3818 RETURN_ERROR_IF(input->pos > input->size, GENERIC); 3819 assert(cctx!=NULL); 3820 3821 /* transparent initialization stage */ 3822 if (cctx->streamStage == zcss_init) { 3823 ZSTD_CCtx_params params = cctx->requestedParams; 3824 ZSTD_prefixDict const prefixDict = cctx->prefixDict; 3825 FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) ); /* Init the local dict if present. */ 3826 memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ 3827 assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ 3828 DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); 3829 if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ 3830 params.cParams = ZSTD_getCParamsFromCCtxParams( 3831 &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); 3832 3833 3834 #ifdef ZSTD_MULTITHREAD 3835 if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { 3836 params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ 3837 } 3838 if (params.nbWorkers > 0) { 3839 /* mt context creation */ 3840 if (cctx->mtctx == NULL) { 3841 DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", 3842 params.nbWorkers); 3843 cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem); 3844 RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation); 3845 } 3846 /* mt compression */ 3847 DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); 3848 FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( 3849 cctx->mtctx, 3850 prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent, 3851 cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); 3852 cctx->streamStage = zcss_load; 3853 cctx->appliedParams.nbWorkers = params.nbWorkers; 3854 } else 3855 #endif 3856 { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx, 3857 prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, 3858 cctx->cdict, 3859 params, cctx->pledgedSrcSizePlusOne-1) ); 3860 assert(cctx->streamStage == zcss_load); 3861 assert(cctx->appliedParams.nbWorkers == 0); 3862 } } 3863 /* end of transparent initialization stage */ 3864 3865 /* compression stage */ 3866 #ifdef ZSTD_MULTITHREAD 3867 if (cctx->appliedParams.nbWorkers > 0) { 3868 int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end); 3869 size_t flushMin; 3870 assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */); 3871 if (cctx->cParamsChanged) { 3872 ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); 3873 cctx->cParamsChanged = 0; 3874 } 3875 do { 3876 flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); 3877 if ( ZSTD_isError(flushMin) 3878 || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ 3879 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); 3880 } 3881 FORWARD_IF_ERROR(flushMin); 3882 } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); 3883 DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); 3884 /* Either we don't require maximum forward progress, we've finished the 3885 * flush, or we are out of output space. 3886 */ 3887 assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size); 3888 return flushMin; 3889 } 3890 #endif 3891 FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) ); 3892 DEBUGLOG(5, "completed ZSTD_compressStream2"); 3893 return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ 3894 } 3895 3896 size_t ZSTD_compressStream2_simpleArgs ( 3897 ZSTD_CCtx* cctx, 3898 void* dst, size_t dstCapacity, size_t* dstPos, 3899 const void* src, size_t srcSize, size_t* srcPos, 3900 ZSTD_EndDirective endOp) 3901 { 3902 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; 3903 ZSTD_inBuffer input = { src, srcSize, *srcPos }; 3904 /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ 3905 size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); 3906 *dstPos = output.pos; 3907 *srcPos = input.pos; 3908 return cErr; 3909 } 3910 3911 size_t ZSTD_compress2(ZSTD_CCtx* cctx, 3912 void* dst, size_t dstCapacity, 3913 const void* src, size_t srcSize) 3914 { 3915 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); 3916 { size_t oPos = 0; 3917 size_t iPos = 0; 3918 size_t const result = ZSTD_compressStream2_simpleArgs(cctx, 3919 dst, dstCapacity, &oPos, 3920 src, srcSize, &iPos, 3921 ZSTD_e_end); 3922 FORWARD_IF_ERROR(result); 3923 if (result != 0) { /* compression not completed, due to lack of output space */ 3924 assert(oPos == dstCapacity); 3925 RETURN_ERROR(dstSize_tooSmall); 3926 } 3927 assert(iPos == srcSize); /* all input is expected consumed */ 3928 return oPos; 3929 } 3930 } 3931 3932 /*====== Finalize ======*/ 3933 3934 /*! ZSTD_flushStream() : 3935 * @return : amount of data remaining to flush */ 3936 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 3937 { 3938 ZSTD_inBuffer input = { NULL, 0, 0 }; 3939 return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); 3940 } 3941 3942 3943 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 3944 { 3945 ZSTD_inBuffer input = { NULL, 0, 0 }; 3946 size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); 3947 FORWARD_IF_ERROR( remainingToFlush ); 3948 if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ 3949 /* single thread mode : attempt to calculate remaining to flush more precisely */ 3950 { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; 3951 size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); 3952 size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; 3953 DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); 3954 return toFlush; 3955 } 3956 } 3957 3958 3959 /*-===== Pre-defined compression levels =====-*/ 3960 3961 #define ZSTD_MAX_CLEVEL 22 3962 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } 3963 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } 3964 3965 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { 3966 { /* "default" - for any srcSize > 256 KB */ 3967 /* W, C, H, S, L, TL, strat */ 3968 { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ 3969 { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ 3970 { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ 3971 { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ 3972 { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ 3973 { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ 3974 { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ 3975 { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ 3976 { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ 3977 { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ 3978 { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ 3979 { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ 3980 { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ 3981 { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ 3982 { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ 3983 { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ 3984 { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ 3985 { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ 3986 { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ 3987 { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ 3988 { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ 3989 { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ 3990 { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ 3991 }, 3992 { /* for srcSize <= 256 KB */ 3993 /* W, C, H, S, L, T, strat */ 3994 { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ 3995 { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ 3996 { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ 3997 { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ 3998 { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ 3999 { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ 4000 { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ 4001 { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ 4002 { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ 4003 { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ 4004 { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ 4005 { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ 4006 { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ 4007 { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ 4008 { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ 4009 { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ 4010 { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ 4011 { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ 4012 { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ 4013 { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ 4014 { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ 4015 { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ 4016 { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ 4017 }, 4018 { /* for srcSize <= 128 KB */ 4019 /* W, C, H, S, L, T, strat */ 4020 { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ 4021 { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ 4022 { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ 4023 { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ 4024 { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ 4025 { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ 4026 { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ 4027 { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ 4028 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ 4029 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ 4030 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ 4031 { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ 4032 { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ 4033 { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ 4034 { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ 4035 { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ 4036 { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ 4037 { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ 4038 { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ 4039 { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ 4040 { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ 4041 { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ 4042 { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ 4043 }, 4044 { /* for srcSize <= 16 KB */ 4045 /* W, C, H, S, L, T, strat */ 4046 { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ 4047 { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ 4048 { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ 4049 { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ 4050 { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ 4051 { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ 4052 { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ 4053 { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ 4054 { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ 4055 { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ 4056 { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ 4057 { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ 4058 { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ 4059 { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ 4060 { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ 4061 { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ 4062 { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ 4063 { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ 4064 { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ 4065 { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ 4066 { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ 4067 { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ 4068 { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ 4069 }, 4070 }; 4071 4072 /*! ZSTD_getCParams() : 4073 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. 4074 * Size values are optional, provide 0 if not known or unused */ 4075 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) 4076 { 4077 size_t const addedSize = srcSizeHint ? 0 : 500; 4078 U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : ZSTD_CONTENTSIZE_UNKNOWN; /* intentional overflow for srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN */ 4079 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); 4080 int row = compressionLevel; 4081 DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel); 4082 if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ 4083 if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ 4084 if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; 4085 { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; 4086 if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ 4087 return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); /* refine parameters based on srcSize & dictSize */ 4088 } 4089 } 4090 4091 /*! ZSTD_getParams() : 4092 * same idea as ZSTD_getCParams() 4093 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). 4094 * Fields of `ZSTD_frameParameters` are set to default values */ 4095 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { 4096 ZSTD_parameters params; 4097 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); 4098 DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); 4099 memset(¶ms, 0, sizeof(params)); 4100 params.cParams = cParams; 4101 params.fParams.contentSizeFlag = 1; 4102 return params; 4103 } 4104