1 /* 2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 /*-************************************* 12 * Dependencies 13 ***************************************/ 14 #include <limits.h> /* INT_MAX */ 15 #include <string.h> /* memset */ 16 #include "cpu.h" 17 #include "mem.h" 18 #include "hist.h" /* HIST_countFast_wksp */ 19 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ 20 #include "fse.h" 21 #define HUF_STATIC_LINKING_ONLY 22 #include "huf.h" 23 #include "zstd_compress_internal.h" 24 #include "zstd_fast.h" 25 #include "zstd_double_fast.h" 26 #include "zstd_lazy.h" 27 #include "zstd_opt.h" 28 #include "zstd_ldm.h" 29 30 31 /*-************************************* 32 * Helper functions 33 ***************************************/ 34 size_t ZSTD_compressBound(size_t srcSize) { 35 return ZSTD_COMPRESSBOUND(srcSize); 36 } 37 38 39 /*-************************************* 40 * Context memory management 41 ***************************************/ 42 struct ZSTD_CDict_s { 43 void* dictBuffer; 44 const void* dictContent; 45 size_t dictContentSize; 46 void* workspace; 47 size_t workspaceSize; 48 ZSTD_matchState_t matchState; 49 ZSTD_compressedBlockState_t cBlockState; 50 ZSTD_customMem customMem; 51 U32 dictID; 52 }; /* typedef'd to ZSTD_CDict within "zstd.h" */ 53 54 ZSTD_CCtx* ZSTD_createCCtx(void) 55 { 56 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); 57 } 58 59 static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) 60 { 61 assert(cctx != NULL); 62 memset(cctx, 0, sizeof(*cctx)); 63 cctx->customMem = memManager; 64 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); 65 { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); 66 assert(!ZSTD_isError(err)); 67 (void)err; 68 } 69 } 70 71 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) 72 { 73 ZSTD_STATIC_ASSERT(zcss_init==0); 74 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); 75 if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 76 { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); 77 if (!cctx) return NULL; 78 ZSTD_initCCtx(cctx, customMem); 79 return cctx; 80 } 81 } 82 83 ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) 84 { 85 ZSTD_CCtx* const cctx = (ZSTD_CCtx*) workspace; 86 if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ 87 if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ 88 memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */ 89 cctx->staticSize = workspaceSize; 90 cctx->workSpace = (void*)(cctx+1); 91 cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx); 92 93 /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ 94 if (cctx->workSpaceSize < HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t)) return NULL; 95 assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ 96 cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)cctx->workSpace; 97 cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1; 98 { 99 void* const ptr = cctx->blockState.nextCBlock + 1; 100 cctx->entropyWorkspace = (U32*)ptr; 101 } 102 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); 103 return cctx; 104 } 105 106 /** 107 * Clears and frees all of the dictionaries in the CCtx. 108 */ 109 static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) 110 { 111 ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem); 112 ZSTD_freeCDict(cctx->localDict.cdict); 113 memset(&cctx->localDict, 0, sizeof(cctx->localDict)); 114 memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); 115 cctx->cdict = NULL; 116 } 117 118 static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) 119 { 120 size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; 121 size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); 122 return bufferSize + cdictSize; 123 } 124 125 static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) 126 { 127 assert(cctx != NULL); 128 assert(cctx->staticSize == 0); 129 ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL; 130 ZSTD_clearAllDicts(cctx); 131 #ifdef ZSTD_MULTITHREAD 132 ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; 133 #endif 134 } 135 136 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) 137 { 138 if (cctx==NULL) return 0; /* support free on NULL */ 139 RETURN_ERROR_IF(cctx->staticSize, memory_allocation, 140 "not compatible with static CCtx"); 141 ZSTD_freeCCtxContent(cctx); 142 ZSTD_free(cctx, cctx->customMem); 143 return 0; 144 } 145 146 147 static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) 148 { 149 #ifdef ZSTD_MULTITHREAD 150 return ZSTDMT_sizeof_CCtx(cctx->mtctx); 151 #else 152 (void)cctx; 153 return 0; 154 #endif 155 } 156 157 158 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) 159 { 160 if (cctx==NULL) return 0; /* support sizeof on NULL */ 161 return sizeof(*cctx) + cctx->workSpaceSize 162 + ZSTD_sizeof_localDict(cctx->localDict) 163 + ZSTD_sizeof_mtctx(cctx); 164 } 165 166 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) 167 { 168 return ZSTD_sizeof_CCtx(zcs); /* same object */ 169 } 170 171 /* private API call, for dictBuilder only */ 172 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } 173 174 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( 175 ZSTD_compressionParameters cParams) 176 { 177 ZSTD_CCtx_params cctxParams; 178 memset(&cctxParams, 0, sizeof(cctxParams)); 179 cctxParams.cParams = cParams; 180 cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ 181 assert(!ZSTD_checkCParams(cParams)); 182 cctxParams.fParams.contentSizeFlag = 1; 183 return cctxParams; 184 } 185 186 static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( 187 ZSTD_customMem customMem) 188 { 189 ZSTD_CCtx_params* params; 190 if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 191 params = (ZSTD_CCtx_params*)ZSTD_calloc( 192 sizeof(ZSTD_CCtx_params), customMem); 193 if (!params) { return NULL; } 194 params->customMem = customMem; 195 params->compressionLevel = ZSTD_CLEVEL_DEFAULT; 196 params->fParams.contentSizeFlag = 1; 197 return params; 198 } 199 200 ZSTD_CCtx_params* ZSTD_createCCtxParams(void) 201 { 202 return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); 203 } 204 205 size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) 206 { 207 if (params == NULL) { return 0; } 208 ZSTD_free(params, params->customMem); 209 return 0; 210 } 211 212 size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) 213 { 214 return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); 215 } 216 217 size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { 218 RETURN_ERROR_IF(!cctxParams, GENERIC); 219 memset(cctxParams, 0, sizeof(*cctxParams)); 220 cctxParams->compressionLevel = compressionLevel; 221 cctxParams->fParams.contentSizeFlag = 1; 222 return 0; 223 } 224 225 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) 226 { 227 RETURN_ERROR_IF(!cctxParams, GENERIC); 228 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); 229 memset(cctxParams, 0, sizeof(*cctxParams)); 230 cctxParams->cParams = params.cParams; 231 cctxParams->fParams = params.fParams; 232 cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ 233 assert(!ZSTD_checkCParams(params.cParams)); 234 return 0; 235 } 236 237 /* ZSTD_assignParamsToCCtxParams() : 238 * params is presumed valid at this stage */ 239 static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( 240 ZSTD_CCtx_params cctxParams, ZSTD_parameters params) 241 { 242 ZSTD_CCtx_params ret = cctxParams; 243 ret.cParams = params.cParams; 244 ret.fParams = params.fParams; 245 ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ 246 assert(!ZSTD_checkCParams(params.cParams)); 247 return ret; 248 } 249 250 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) 251 { 252 ZSTD_bounds bounds = { 0, 0, 0 }; 253 254 switch(param) 255 { 256 case ZSTD_c_compressionLevel: 257 bounds.lowerBound = ZSTD_minCLevel(); 258 bounds.upperBound = ZSTD_maxCLevel(); 259 return bounds; 260 261 case ZSTD_c_windowLog: 262 bounds.lowerBound = ZSTD_WINDOWLOG_MIN; 263 bounds.upperBound = ZSTD_WINDOWLOG_MAX; 264 return bounds; 265 266 case ZSTD_c_hashLog: 267 bounds.lowerBound = ZSTD_HASHLOG_MIN; 268 bounds.upperBound = ZSTD_HASHLOG_MAX; 269 return bounds; 270 271 case ZSTD_c_chainLog: 272 bounds.lowerBound = ZSTD_CHAINLOG_MIN; 273 bounds.upperBound = ZSTD_CHAINLOG_MAX; 274 return bounds; 275 276 case ZSTD_c_searchLog: 277 bounds.lowerBound = ZSTD_SEARCHLOG_MIN; 278 bounds.upperBound = ZSTD_SEARCHLOG_MAX; 279 return bounds; 280 281 case ZSTD_c_minMatch: 282 bounds.lowerBound = ZSTD_MINMATCH_MIN; 283 bounds.upperBound = ZSTD_MINMATCH_MAX; 284 return bounds; 285 286 case ZSTD_c_targetLength: 287 bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; 288 bounds.upperBound = ZSTD_TARGETLENGTH_MAX; 289 return bounds; 290 291 case ZSTD_c_strategy: 292 bounds.lowerBound = ZSTD_STRATEGY_MIN; 293 bounds.upperBound = ZSTD_STRATEGY_MAX; 294 return bounds; 295 296 case ZSTD_c_contentSizeFlag: 297 bounds.lowerBound = 0; 298 bounds.upperBound = 1; 299 return bounds; 300 301 case ZSTD_c_checksumFlag: 302 bounds.lowerBound = 0; 303 bounds.upperBound = 1; 304 return bounds; 305 306 case ZSTD_c_dictIDFlag: 307 bounds.lowerBound = 0; 308 bounds.upperBound = 1; 309 return bounds; 310 311 case ZSTD_c_nbWorkers: 312 bounds.lowerBound = 0; 313 #ifdef ZSTD_MULTITHREAD 314 bounds.upperBound = ZSTDMT_NBWORKERS_MAX; 315 #else 316 bounds.upperBound = 0; 317 #endif 318 return bounds; 319 320 case ZSTD_c_jobSize: 321 bounds.lowerBound = 0; 322 #ifdef ZSTD_MULTITHREAD 323 bounds.upperBound = ZSTDMT_JOBSIZE_MAX; 324 #else 325 bounds.upperBound = 0; 326 #endif 327 return bounds; 328 329 case ZSTD_c_overlapLog: 330 bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; 331 bounds.upperBound = ZSTD_OVERLAPLOG_MAX; 332 return bounds; 333 334 case ZSTD_c_enableLongDistanceMatching: 335 bounds.lowerBound = 0; 336 bounds.upperBound = 1; 337 return bounds; 338 339 case ZSTD_c_ldmHashLog: 340 bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; 341 bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; 342 return bounds; 343 344 case ZSTD_c_ldmMinMatch: 345 bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; 346 bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; 347 return bounds; 348 349 case ZSTD_c_ldmBucketSizeLog: 350 bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; 351 bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; 352 return bounds; 353 354 case ZSTD_c_ldmHashRateLog: 355 bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; 356 bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; 357 return bounds; 358 359 /* experimental parameters */ 360 case ZSTD_c_rsyncable: 361 bounds.lowerBound = 0; 362 bounds.upperBound = 1; 363 return bounds; 364 365 case ZSTD_c_forceMaxWindow : 366 bounds.lowerBound = 0; 367 bounds.upperBound = 1; 368 return bounds; 369 370 case ZSTD_c_format: 371 ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); 372 bounds.lowerBound = ZSTD_f_zstd1; 373 bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ 374 return bounds; 375 376 case ZSTD_c_forceAttachDict: 377 ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); 378 bounds.lowerBound = ZSTD_dictDefaultAttach; 379 bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */ 380 return bounds; 381 382 case ZSTD_c_literalCompressionMode: 383 ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); 384 bounds.lowerBound = ZSTD_lcm_auto; 385 bounds.upperBound = ZSTD_lcm_uncompressed; 386 return bounds; 387 388 default: 389 { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; 390 return boundError; 391 } 392 } 393 } 394 395 /* ZSTD_cParam_withinBounds: 396 * @return 1 if value is within cParam bounds, 397 * 0 otherwise */ 398 static int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) 399 { 400 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); 401 if (ZSTD_isError(bounds.error)) return 0; 402 if (value < bounds.lowerBound) return 0; 403 if (value > bounds.upperBound) return 0; 404 return 1; 405 } 406 407 /* ZSTD_cParam_clampBounds: 408 * Clamps the value into the bounded range. 409 */ 410 static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) 411 { 412 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); 413 if (ZSTD_isError(bounds.error)) return bounds.error; 414 if (*value < bounds.lowerBound) *value = bounds.lowerBound; 415 if (*value > bounds.upperBound) *value = bounds.upperBound; 416 return 0; 417 } 418 419 #define BOUNDCHECK(cParam, val) { \ 420 RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ 421 parameter_outOfBound); \ 422 } 423 424 425 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) 426 { 427 switch(param) 428 { 429 case ZSTD_c_compressionLevel: 430 case ZSTD_c_hashLog: 431 case ZSTD_c_chainLog: 432 case ZSTD_c_searchLog: 433 case ZSTD_c_minMatch: 434 case ZSTD_c_targetLength: 435 case ZSTD_c_strategy: 436 return 1; 437 438 case ZSTD_c_format: 439 case ZSTD_c_windowLog: 440 case ZSTD_c_contentSizeFlag: 441 case ZSTD_c_checksumFlag: 442 case ZSTD_c_dictIDFlag: 443 case ZSTD_c_forceMaxWindow : 444 case ZSTD_c_nbWorkers: 445 case ZSTD_c_jobSize: 446 case ZSTD_c_overlapLog: 447 case ZSTD_c_rsyncable: 448 case ZSTD_c_enableLongDistanceMatching: 449 case ZSTD_c_ldmHashLog: 450 case ZSTD_c_ldmMinMatch: 451 case ZSTD_c_ldmBucketSizeLog: 452 case ZSTD_c_ldmHashRateLog: 453 case ZSTD_c_forceAttachDict: 454 case ZSTD_c_literalCompressionMode: 455 default: 456 return 0; 457 } 458 } 459 460 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) 461 { 462 DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); 463 if (cctx->streamStage != zcss_init) { 464 if (ZSTD_isUpdateAuthorized(param)) { 465 cctx->cParamsChanged = 1; 466 } else { 467 RETURN_ERROR(stage_wrong); 468 } } 469 470 switch(param) 471 { 472 case ZSTD_c_nbWorkers: 473 RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, 474 "MT not compatible with static alloc"); 475 break; 476 477 case ZSTD_c_compressionLevel: 478 case ZSTD_c_windowLog: 479 case ZSTD_c_hashLog: 480 case ZSTD_c_chainLog: 481 case ZSTD_c_searchLog: 482 case ZSTD_c_minMatch: 483 case ZSTD_c_targetLength: 484 case ZSTD_c_strategy: 485 case ZSTD_c_ldmHashRateLog: 486 case ZSTD_c_format: 487 case ZSTD_c_contentSizeFlag: 488 case ZSTD_c_checksumFlag: 489 case ZSTD_c_dictIDFlag: 490 case ZSTD_c_forceMaxWindow: 491 case ZSTD_c_forceAttachDict: 492 case ZSTD_c_literalCompressionMode: 493 case ZSTD_c_jobSize: 494 case ZSTD_c_overlapLog: 495 case ZSTD_c_rsyncable: 496 case ZSTD_c_enableLongDistanceMatching: 497 case ZSTD_c_ldmHashLog: 498 case ZSTD_c_ldmMinMatch: 499 case ZSTD_c_ldmBucketSizeLog: 500 break; 501 502 default: RETURN_ERROR(parameter_unsupported); 503 } 504 return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); 505 } 506 507 size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, 508 ZSTD_cParameter param, int value) 509 { 510 DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); 511 switch(param) 512 { 513 case ZSTD_c_format : 514 BOUNDCHECK(ZSTD_c_format, value); 515 CCtxParams->format = (ZSTD_format_e)value; 516 return (size_t)CCtxParams->format; 517 518 case ZSTD_c_compressionLevel : { 519 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); 520 if (value) { /* 0 : does not change current level */ 521 CCtxParams->compressionLevel = value; 522 } 523 if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel; 524 return 0; /* return type (size_t) cannot represent negative values */ 525 } 526 527 case ZSTD_c_windowLog : 528 if (value!=0) /* 0 => use default */ 529 BOUNDCHECK(ZSTD_c_windowLog, value); 530 CCtxParams->cParams.windowLog = value; 531 return CCtxParams->cParams.windowLog; 532 533 case ZSTD_c_hashLog : 534 if (value!=0) /* 0 => use default */ 535 BOUNDCHECK(ZSTD_c_hashLog, value); 536 CCtxParams->cParams.hashLog = value; 537 return CCtxParams->cParams.hashLog; 538 539 case ZSTD_c_chainLog : 540 if (value!=0) /* 0 => use default */ 541 BOUNDCHECK(ZSTD_c_chainLog, value); 542 CCtxParams->cParams.chainLog = value; 543 return CCtxParams->cParams.chainLog; 544 545 case ZSTD_c_searchLog : 546 if (value!=0) /* 0 => use default */ 547 BOUNDCHECK(ZSTD_c_searchLog, value); 548 CCtxParams->cParams.searchLog = value; 549 return value; 550 551 case ZSTD_c_minMatch : 552 if (value!=0) /* 0 => use default */ 553 BOUNDCHECK(ZSTD_c_minMatch, value); 554 CCtxParams->cParams.minMatch = value; 555 return CCtxParams->cParams.minMatch; 556 557 case ZSTD_c_targetLength : 558 BOUNDCHECK(ZSTD_c_targetLength, value); 559 CCtxParams->cParams.targetLength = value; 560 return CCtxParams->cParams.targetLength; 561 562 case ZSTD_c_strategy : 563 if (value!=0) /* 0 => use default */ 564 BOUNDCHECK(ZSTD_c_strategy, value); 565 CCtxParams->cParams.strategy = (ZSTD_strategy)value; 566 return (size_t)CCtxParams->cParams.strategy; 567 568 case ZSTD_c_contentSizeFlag : 569 /* Content size written in frame header _when known_ (default:1) */ 570 DEBUGLOG(4, "set content size flag = %u", (value!=0)); 571 CCtxParams->fParams.contentSizeFlag = value != 0; 572 return CCtxParams->fParams.contentSizeFlag; 573 574 case ZSTD_c_checksumFlag : 575 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ 576 CCtxParams->fParams.checksumFlag = value != 0; 577 return CCtxParams->fParams.checksumFlag; 578 579 case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ 580 DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); 581 CCtxParams->fParams.noDictIDFlag = !value; 582 return !CCtxParams->fParams.noDictIDFlag; 583 584 case ZSTD_c_forceMaxWindow : 585 CCtxParams->forceWindow = (value != 0); 586 return CCtxParams->forceWindow; 587 588 case ZSTD_c_forceAttachDict : { 589 const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; 590 BOUNDCHECK(ZSTD_c_forceAttachDict, pref); 591 CCtxParams->attachDictPref = pref; 592 return CCtxParams->attachDictPref; 593 } 594 595 case ZSTD_c_literalCompressionMode : { 596 const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; 597 BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); 598 CCtxParams->literalCompressionMode = lcm; 599 return CCtxParams->literalCompressionMode; 600 } 601 602 case ZSTD_c_nbWorkers : 603 #ifndef ZSTD_MULTITHREAD 604 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 605 return 0; 606 #else 607 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); 608 CCtxParams->nbWorkers = value; 609 return CCtxParams->nbWorkers; 610 #endif 611 612 case ZSTD_c_jobSize : 613 #ifndef ZSTD_MULTITHREAD 614 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 615 return 0; 616 #else 617 /* Adjust to the minimum non-default value. */ 618 if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) 619 value = ZSTDMT_JOBSIZE_MIN; 620 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); 621 assert(value >= 0); 622 CCtxParams->jobSize = value; 623 return CCtxParams->jobSize; 624 #endif 625 626 case ZSTD_c_overlapLog : 627 #ifndef ZSTD_MULTITHREAD 628 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 629 return 0; 630 #else 631 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); 632 CCtxParams->overlapLog = value; 633 return CCtxParams->overlapLog; 634 #endif 635 636 case ZSTD_c_rsyncable : 637 #ifndef ZSTD_MULTITHREAD 638 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 639 return 0; 640 #else 641 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); 642 CCtxParams->rsyncable = value; 643 return CCtxParams->rsyncable; 644 #endif 645 646 case ZSTD_c_enableLongDistanceMatching : 647 CCtxParams->ldmParams.enableLdm = (value!=0); 648 return CCtxParams->ldmParams.enableLdm; 649 650 case ZSTD_c_ldmHashLog : 651 if (value!=0) /* 0 ==> auto */ 652 BOUNDCHECK(ZSTD_c_ldmHashLog, value); 653 CCtxParams->ldmParams.hashLog = value; 654 return CCtxParams->ldmParams.hashLog; 655 656 case ZSTD_c_ldmMinMatch : 657 if (value!=0) /* 0 ==> default */ 658 BOUNDCHECK(ZSTD_c_ldmMinMatch, value); 659 CCtxParams->ldmParams.minMatchLength = value; 660 return CCtxParams->ldmParams.minMatchLength; 661 662 case ZSTD_c_ldmBucketSizeLog : 663 if (value!=0) /* 0 ==> default */ 664 BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); 665 CCtxParams->ldmParams.bucketSizeLog = value; 666 return CCtxParams->ldmParams.bucketSizeLog; 667 668 case ZSTD_c_ldmHashRateLog : 669 RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, 670 parameter_outOfBound); 671 CCtxParams->ldmParams.hashRateLog = value; 672 return CCtxParams->ldmParams.hashRateLog; 673 674 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 675 } 676 } 677 678 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) 679 { 680 return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); 681 } 682 683 size_t ZSTD_CCtxParams_getParameter( 684 ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) 685 { 686 switch(param) 687 { 688 case ZSTD_c_format : 689 *value = CCtxParams->format; 690 break; 691 case ZSTD_c_compressionLevel : 692 *value = CCtxParams->compressionLevel; 693 break; 694 case ZSTD_c_windowLog : 695 *value = CCtxParams->cParams.windowLog; 696 break; 697 case ZSTD_c_hashLog : 698 *value = CCtxParams->cParams.hashLog; 699 break; 700 case ZSTD_c_chainLog : 701 *value = CCtxParams->cParams.chainLog; 702 break; 703 case ZSTD_c_searchLog : 704 *value = CCtxParams->cParams.searchLog; 705 break; 706 case ZSTD_c_minMatch : 707 *value = CCtxParams->cParams.minMatch; 708 break; 709 case ZSTD_c_targetLength : 710 *value = CCtxParams->cParams.targetLength; 711 break; 712 case ZSTD_c_strategy : 713 *value = (unsigned)CCtxParams->cParams.strategy; 714 break; 715 case ZSTD_c_contentSizeFlag : 716 *value = CCtxParams->fParams.contentSizeFlag; 717 break; 718 case ZSTD_c_checksumFlag : 719 *value = CCtxParams->fParams.checksumFlag; 720 break; 721 case ZSTD_c_dictIDFlag : 722 *value = !CCtxParams->fParams.noDictIDFlag; 723 break; 724 case ZSTD_c_forceMaxWindow : 725 *value = CCtxParams->forceWindow; 726 break; 727 case ZSTD_c_forceAttachDict : 728 *value = CCtxParams->attachDictPref; 729 break; 730 case ZSTD_c_literalCompressionMode : 731 *value = CCtxParams->literalCompressionMode; 732 break; 733 case ZSTD_c_nbWorkers : 734 #ifndef ZSTD_MULTITHREAD 735 assert(CCtxParams->nbWorkers == 0); 736 #endif 737 *value = CCtxParams->nbWorkers; 738 break; 739 case ZSTD_c_jobSize : 740 #ifndef ZSTD_MULTITHREAD 741 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 742 #else 743 assert(CCtxParams->jobSize <= INT_MAX); 744 *value = (int)CCtxParams->jobSize; 745 break; 746 #endif 747 case ZSTD_c_overlapLog : 748 #ifndef ZSTD_MULTITHREAD 749 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 750 #else 751 *value = CCtxParams->overlapLog; 752 break; 753 #endif 754 case ZSTD_c_rsyncable : 755 #ifndef ZSTD_MULTITHREAD 756 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 757 #else 758 *value = CCtxParams->rsyncable; 759 break; 760 #endif 761 case ZSTD_c_enableLongDistanceMatching : 762 *value = CCtxParams->ldmParams.enableLdm; 763 break; 764 case ZSTD_c_ldmHashLog : 765 *value = CCtxParams->ldmParams.hashLog; 766 break; 767 case ZSTD_c_ldmMinMatch : 768 *value = CCtxParams->ldmParams.minMatchLength; 769 break; 770 case ZSTD_c_ldmBucketSizeLog : 771 *value = CCtxParams->ldmParams.bucketSizeLog; 772 break; 773 case ZSTD_c_ldmHashRateLog : 774 *value = CCtxParams->ldmParams.hashRateLog; 775 break; 776 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 777 } 778 return 0; 779 } 780 781 /** ZSTD_CCtx_setParametersUsingCCtxParams() : 782 * just applies `params` into `cctx` 783 * no action is performed, parameters are merely stored. 784 * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. 785 * This is possible even if a compression is ongoing. 786 * In which case, new parameters will be applied on the fly, starting with next compression job. 787 */ 788 size_t ZSTD_CCtx_setParametersUsingCCtxParams( 789 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) 790 { 791 DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); 792 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); 793 RETURN_ERROR_IF(cctx->cdict, stage_wrong); 794 795 cctx->requestedParams = *params; 796 return 0; 797 } 798 799 ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) 800 { 801 DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); 802 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); 803 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; 804 return 0; 805 } 806 807 /** 808 * Initializes the local dict using the requested parameters. 809 * NOTE: This does not use the pledged src size, because it may be used for more 810 * than one compression. 811 */ 812 static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) 813 { 814 ZSTD_localDict* const dl = &cctx->localDict; 815 ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( 816 &cctx->requestedParams, 0, dl->dictSize); 817 if (dl->dict == NULL) { 818 /* No local dictionary. */ 819 assert(dl->dictBuffer == NULL); 820 assert(dl->cdict == NULL); 821 assert(dl->dictSize == 0); 822 return 0; 823 } 824 if (dl->cdict != NULL) { 825 assert(cctx->cdict == dl->cdict); 826 /* Local dictionary already initialized. */ 827 return 0; 828 } 829 assert(dl->dictSize > 0); 830 assert(cctx->cdict == NULL); 831 assert(cctx->prefixDict.dict == NULL); 832 833 dl->cdict = ZSTD_createCDict_advanced( 834 dl->dict, 835 dl->dictSize, 836 ZSTD_dlm_byRef, 837 dl->dictContentType, 838 cParams, 839 cctx->customMem); 840 RETURN_ERROR_IF(!dl->cdict, memory_allocation); 841 cctx->cdict = dl->cdict; 842 return 0; 843 } 844 845 size_t ZSTD_CCtx_loadDictionary_advanced( 846 ZSTD_CCtx* cctx, const void* dict, size_t dictSize, 847 ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) 848 { 849 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); 850 RETURN_ERROR_IF(cctx->staticSize, memory_allocation, 851 "no malloc for static CCtx"); 852 DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); 853 ZSTD_clearAllDicts(cctx); /* in case one already exists */ 854 if (dict == NULL || dictSize == 0) /* no dictionary mode */ 855 return 0; 856 if (dictLoadMethod == ZSTD_dlm_byRef) { 857 cctx->localDict.dict = dict; 858 } else { 859 void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem); 860 RETURN_ERROR_IF(!dictBuffer, memory_allocation); 861 memcpy(dictBuffer, dict, dictSize); 862 cctx->localDict.dictBuffer = dictBuffer; 863 cctx->localDict.dict = dictBuffer; 864 } 865 cctx->localDict.dictSize = dictSize; 866 cctx->localDict.dictContentType = dictContentType; 867 return 0; 868 } 869 870 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( 871 ZSTD_CCtx* cctx, const void* dict, size_t dictSize) 872 { 873 return ZSTD_CCtx_loadDictionary_advanced( 874 cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); 875 } 876 877 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) 878 { 879 return ZSTD_CCtx_loadDictionary_advanced( 880 cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); 881 } 882 883 884 size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 885 { 886 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); 887 /* Free the existing local cdict (if any) to save memory. */ 888 ZSTD_clearAllDicts(cctx); 889 cctx->cdict = cdict; 890 return 0; 891 } 892 893 size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) 894 { 895 return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); 896 } 897 898 size_t ZSTD_CCtx_refPrefix_advanced( 899 ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) 900 { 901 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); 902 ZSTD_clearAllDicts(cctx); 903 cctx->prefixDict.dict = prefix; 904 cctx->prefixDict.dictSize = prefixSize; 905 cctx->prefixDict.dictContentType = dictContentType; 906 return 0; 907 } 908 909 /*! ZSTD_CCtx_reset() : 910 * Also dumps dictionary */ 911 size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) 912 { 913 if ( (reset == ZSTD_reset_session_only) 914 || (reset == ZSTD_reset_session_and_parameters) ) { 915 cctx->streamStage = zcss_init; 916 cctx->pledgedSrcSizePlusOne = 0; 917 } 918 if ( (reset == ZSTD_reset_parameters) 919 || (reset == ZSTD_reset_session_and_parameters) ) { 920 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); 921 ZSTD_clearAllDicts(cctx); 922 return ZSTD_CCtxParams_reset(&cctx->requestedParams); 923 } 924 return 0; 925 } 926 927 928 /** ZSTD_checkCParams() : 929 control CParam values remain within authorized range. 930 @return : 0, or an error code if one value is beyond authorized range */ 931 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) 932 { 933 BOUNDCHECK(ZSTD_c_windowLog, cParams.windowLog); 934 BOUNDCHECK(ZSTD_c_chainLog, cParams.chainLog); 935 BOUNDCHECK(ZSTD_c_hashLog, cParams.hashLog); 936 BOUNDCHECK(ZSTD_c_searchLog, cParams.searchLog); 937 BOUNDCHECK(ZSTD_c_minMatch, cParams.minMatch); 938 BOUNDCHECK(ZSTD_c_targetLength,cParams.targetLength); 939 BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); 940 return 0; 941 } 942 943 /** ZSTD_clampCParams() : 944 * make CParam values within valid range. 945 * @return : valid CParams */ 946 static ZSTD_compressionParameters 947 ZSTD_clampCParams(ZSTD_compressionParameters cParams) 948 { 949 # define CLAMP_TYPE(cParam, val, type) { \ 950 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ 951 if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \ 952 else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ 953 } 954 # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, int) 955 CLAMP(ZSTD_c_windowLog, cParams.windowLog); 956 CLAMP(ZSTD_c_chainLog, cParams.chainLog); 957 CLAMP(ZSTD_c_hashLog, cParams.hashLog); 958 CLAMP(ZSTD_c_searchLog, cParams.searchLog); 959 CLAMP(ZSTD_c_minMatch, cParams.minMatch); 960 CLAMP(ZSTD_c_targetLength,cParams.targetLength); 961 CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); 962 return cParams; 963 } 964 965 /** ZSTD_cycleLog() : 966 * condition for correct operation : hashLog > 1 */ 967 static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) 968 { 969 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); 970 return hashLog - btScale; 971 } 972 973 /** ZSTD_adjustCParams_internal() : 974 * optimize `cPar` for a specified input (`srcSize` and `dictSize`). 975 * mostly downsize to reduce memory consumption and initialization latency. 976 * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. 977 * note : for the time being, `srcSize==0` means "unknown" too, for compatibility with older convention. 978 * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ 979 static ZSTD_compressionParameters 980 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, 981 unsigned long long srcSize, 982 size_t dictSize) 983 { 984 static const U64 minSrcSize = 513; /* (1<<9) + 1 */ 985 static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); 986 assert(ZSTD_checkCParams(cPar)==0); 987 988 if (dictSize && (srcSize+1<2) /* ZSTD_CONTENTSIZE_UNKNOWN and 0 mean "unknown" */ ) 989 srcSize = minSrcSize; /* presumed small when there is a dictionary */ 990 else if (srcSize == 0) 991 srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */ 992 993 /* resize windowLog if input is small enough, to use less memory */ 994 if ( (srcSize < maxWindowResize) 995 && (dictSize < maxWindowResize) ) { 996 U32 const tSize = (U32)(srcSize + dictSize); 997 static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; 998 U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : 999 ZSTD_highbit32(tSize-1) + 1; 1000 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; 1001 } 1002 if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1; 1003 { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); 1004 if (cycleLog > cPar.windowLog) 1005 cPar.chainLog -= (cycleLog - cPar.windowLog); 1006 } 1007 1008 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) 1009 cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ 1010 1011 return cPar; 1012 } 1013 1014 ZSTD_compressionParameters 1015 ZSTD_adjustCParams(ZSTD_compressionParameters cPar, 1016 unsigned long long srcSize, 1017 size_t dictSize) 1018 { 1019 cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ 1020 return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); 1021 } 1022 1023 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( 1024 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) 1025 { 1026 ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); 1027 if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; 1028 if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; 1029 if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; 1030 if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog; 1031 if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog; 1032 if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch; 1033 if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; 1034 if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; 1035 assert(!ZSTD_checkCParams(cParams)); 1036 return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize); 1037 } 1038 1039 static size_t 1040 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, 1041 const U32 forCCtx) 1042 { 1043 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); 1044 size_t const hSize = ((size_t)1) << cParams->hashLog; 1045 U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; 1046 size_t const h3Size = ((size_t)1) << hashLog3; 1047 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); 1048 size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32) 1049 + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t)); 1050 size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) 1051 ? optPotentialSpace 1052 : 0; 1053 DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", 1054 (U32)chainSize, (U32)hSize, (U32)h3Size); 1055 return tableSpace + optSpace; 1056 } 1057 1058 size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) 1059 { 1060 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); 1061 { ZSTD_compressionParameters const cParams = 1062 ZSTD_getCParamsFromCCtxParams(params, 0, 0); 1063 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); 1064 U32 const divider = (cParams.minMatch==3) ? 3 : 4; 1065 size_t const maxNbSeq = blockSize / divider; 1066 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq; 1067 size_t const entropySpace = HUF_WORKSPACE_SIZE; 1068 size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); 1069 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); 1070 1071 size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); 1072 size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq); 1073 1074 size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace + 1075 matchStateSize + ldmSpace + ldmSeqSpace; 1076 1077 DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx)); 1078 DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace); 1079 return sizeof(ZSTD_CCtx) + neededSpace; 1080 } 1081 } 1082 1083 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) 1084 { 1085 ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); 1086 return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); 1087 } 1088 1089 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) 1090 { 1091 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); 1092 return ZSTD_estimateCCtxSize_usingCParams(cParams); 1093 } 1094 1095 size_t ZSTD_estimateCCtxSize(int compressionLevel) 1096 { 1097 int level; 1098 size_t memBudget = 0; 1099 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { 1100 size_t const newMB = ZSTD_estimateCCtxSize_internal(level); 1101 if (newMB > memBudget) memBudget = newMB; 1102 } 1103 return memBudget; 1104 } 1105 1106 size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) 1107 { 1108 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); 1109 { ZSTD_compressionParameters const cParams = 1110 ZSTD_getCParamsFromCCtxParams(params, 0, 0); 1111 size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); 1112 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); 1113 size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; 1114 size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; 1115 size_t const streamingSize = inBuffSize + outBuffSize; 1116 1117 return CCtxSize + streamingSize; 1118 } 1119 } 1120 1121 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) 1122 { 1123 ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); 1124 return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); 1125 } 1126 1127 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) 1128 { 1129 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); 1130 return ZSTD_estimateCStreamSize_usingCParams(cParams); 1131 } 1132 1133 size_t ZSTD_estimateCStreamSize(int compressionLevel) 1134 { 1135 int level; 1136 size_t memBudget = 0; 1137 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { 1138 size_t const newMB = ZSTD_estimateCStreamSize_internal(level); 1139 if (newMB > memBudget) memBudget = newMB; 1140 } 1141 return memBudget; 1142 } 1143 1144 /* ZSTD_getFrameProgression(): 1145 * tells how much data has been consumed (input) and produced (output) for current frame. 1146 * able to count progression inside worker threads (non-blocking mode). 1147 */ 1148 ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) 1149 { 1150 #ifdef ZSTD_MULTITHREAD 1151 if (cctx->appliedParams.nbWorkers > 0) { 1152 return ZSTDMT_getFrameProgression(cctx->mtctx); 1153 } 1154 #endif 1155 { ZSTD_frameProgression fp; 1156 size_t const buffered = (cctx->inBuff == NULL) ? 0 : 1157 cctx->inBuffPos - cctx->inToCompress; 1158 if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); 1159 assert(buffered <= ZSTD_BLOCKSIZE_MAX); 1160 fp.ingested = cctx->consumedSrcSize + buffered; 1161 fp.consumed = cctx->consumedSrcSize; 1162 fp.produced = cctx->producedCSize; 1163 fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ 1164 fp.currentJobID = 0; 1165 fp.nbActiveWorkers = 0; 1166 return fp; 1167 } } 1168 1169 /*! ZSTD_toFlushNow() 1170 * Only useful for multithreading scenarios currently (nbWorkers >= 1). 1171 */ 1172 size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) 1173 { 1174 #ifdef ZSTD_MULTITHREAD 1175 if (cctx->appliedParams.nbWorkers > 0) { 1176 return ZSTDMT_toFlushNow(cctx->mtctx); 1177 } 1178 #endif 1179 (void)cctx; 1180 return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ 1181 } 1182 1183 1184 1185 static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1, 1186 ZSTD_compressionParameters cParams2) 1187 { 1188 return (cParams1.hashLog == cParams2.hashLog) 1189 & (cParams1.chainLog == cParams2.chainLog) 1190 & (cParams1.strategy == cParams2.strategy) /* opt parser space */ 1191 & ((cParams1.minMatch==3) == (cParams2.minMatch==3)); /* hashlog3 space */ 1192 } 1193 1194 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, 1195 ZSTD_compressionParameters cParams2) 1196 { 1197 (void)cParams1; 1198 (void)cParams2; 1199 assert(cParams1.windowLog == cParams2.windowLog); 1200 assert(cParams1.chainLog == cParams2.chainLog); 1201 assert(cParams1.hashLog == cParams2.hashLog); 1202 assert(cParams1.searchLog == cParams2.searchLog); 1203 assert(cParams1.minMatch == cParams2.minMatch); 1204 assert(cParams1.targetLength == cParams2.targetLength); 1205 assert(cParams1.strategy == cParams2.strategy); 1206 } 1207 1208 /** The parameters are equivalent if ldm is not enabled in both sets or 1209 * all the parameters are equivalent. */ 1210 static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1, 1211 ldmParams_t ldmParams2) 1212 { 1213 return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) || 1214 (ldmParams1.enableLdm == ldmParams2.enableLdm && 1215 ldmParams1.hashLog == ldmParams2.hashLog && 1216 ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog && 1217 ldmParams1.minMatchLength == ldmParams2.minMatchLength && 1218 ldmParams1.hashRateLog == ldmParams2.hashRateLog); 1219 } 1220 1221 typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; 1222 1223 /* ZSTD_sufficientBuff() : 1224 * check internal buffers exist for streaming if buffPol == ZSTDb_buffered . 1225 * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */ 1226 static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1, 1227 size_t maxNbLit1, 1228 ZSTD_buffered_policy_e buffPol2, 1229 ZSTD_compressionParameters cParams2, 1230 U64 pledgedSrcSize) 1231 { 1232 size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize)); 1233 size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2); 1234 size_t const maxNbSeq2 = blockSize2 / ((cParams2.minMatch == 3) ? 3 : 4); 1235 size_t const maxNbLit2 = blockSize2; 1236 size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0; 1237 DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u", 1238 (U32)neededBufferSize2, (U32)bufferSize1); 1239 DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbSeq2=%u <= maxNbSeq1=%u", 1240 (U32)maxNbSeq2, (U32)maxNbSeq1); 1241 DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbLit2=%u <= maxNbLit1=%u", 1242 (U32)maxNbLit2, (U32)maxNbLit1); 1243 return (maxNbLit2 <= maxNbLit1) 1244 & (maxNbSeq2 <= maxNbSeq1) 1245 & (neededBufferSize2 <= bufferSize1); 1246 } 1247 1248 /** Equivalence for resetCCtx purposes */ 1249 static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1, 1250 ZSTD_CCtx_params params2, 1251 size_t buffSize1, 1252 size_t maxNbSeq1, size_t maxNbLit1, 1253 ZSTD_buffered_policy_e buffPol2, 1254 U64 pledgedSrcSize) 1255 { 1256 DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize); 1257 if (!ZSTD_equivalentCParams(params1.cParams, params2.cParams)) { 1258 DEBUGLOG(4, "ZSTD_equivalentCParams() == 0"); 1259 return 0; 1260 } 1261 if (!ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams)) { 1262 DEBUGLOG(4, "ZSTD_equivalentLdmParams() == 0"); 1263 return 0; 1264 } 1265 if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, maxNbLit1, buffPol2, 1266 params2.cParams, pledgedSrcSize)) { 1267 DEBUGLOG(4, "ZSTD_sufficientBuff() == 0"); 1268 return 0; 1269 } 1270 return 1; 1271 } 1272 1273 static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) 1274 { 1275 int i; 1276 for (i = 0; i < ZSTD_REP_NUM; ++i) 1277 bs->rep[i] = repStartValue[i]; 1278 bs->entropy.huf.repeatMode = HUF_repeat_none; 1279 bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; 1280 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; 1281 bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; 1282 } 1283 1284 /*! ZSTD_invalidateMatchState() 1285 * Invalidate all the matches in the match finder tables. 1286 * Requires nextSrc and base to be set (can be NULL). 1287 */ 1288 static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) 1289 { 1290 ZSTD_window_clear(&ms->window); 1291 1292 ms->nextToUpdate = ms->window.dictLimit; 1293 ms->nextToUpdate3 = ms->window.dictLimit; 1294 ms->loadedDictEnd = 0; 1295 ms->opt.litLengthSum = 0; /* force reset of btopt stats */ 1296 ms->dictMatchState = NULL; 1297 } 1298 1299 /*! ZSTD_continueCCtx() : 1300 * reuse CCtx without reset (note : requires no dictionary) */ 1301 static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize) 1302 { 1303 size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); 1304 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 1305 DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place"); 1306 1307 cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */ 1308 cctx->appliedParams = params; 1309 cctx->blockState.matchState.cParams = params.cParams; 1310 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; 1311 cctx->consumedSrcSize = 0; 1312 cctx->producedCSize = 0; 1313 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) 1314 cctx->appliedParams.fParams.contentSizeFlag = 0; 1315 DEBUGLOG(4, "pledged content size : %u ; flag : %u", 1316 (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag); 1317 cctx->stage = ZSTDcs_init; 1318 cctx->dictID = 0; 1319 if (params.ldmParams.enableLdm) 1320 ZSTD_window_clear(&cctx->ldmState.window); 1321 ZSTD_referenceExternalSequences(cctx, NULL, 0); 1322 ZSTD_invalidateMatchState(&cctx->blockState.matchState); 1323 ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock); 1324 XXH64_reset(&cctx->xxhState, 0); 1325 return 0; 1326 } 1327 1328 typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; 1329 1330 static void* 1331 ZSTD_reset_matchState(ZSTD_matchState_t* ms, 1332 void* ptr, 1333 const ZSTD_compressionParameters* cParams, 1334 ZSTD_compResetPolicy_e const crp, U32 const forCCtx) 1335 { 1336 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); 1337 size_t const hSize = ((size_t)1) << cParams->hashLog; 1338 U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; 1339 size_t const h3Size = ((size_t)1) << hashLog3; 1340 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); 1341 1342 assert(((size_t)ptr & 3) == 0); 1343 1344 ms->hashLog3 = hashLog3; 1345 memset(&ms->window, 0, sizeof(ms->window)); 1346 ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */ 1347 ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ 1348 ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */ 1349 ZSTD_invalidateMatchState(ms); 1350 1351 /* opt parser space */ 1352 if (forCCtx && (cParams->strategy >= ZSTD_btopt)) { 1353 DEBUGLOG(4, "reserving optimal parser space"); 1354 ms->opt.litFreq = (unsigned*)ptr; 1355 ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits); 1356 ms->opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1); 1357 ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1); 1358 ptr = ms->opt.offCodeFreq + (MaxOff+1); 1359 ms->opt.matchTable = (ZSTD_match_t*)ptr; 1360 ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1; 1361 ms->opt.priceTable = (ZSTD_optimal_t*)ptr; 1362 ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1; 1363 } 1364 1365 /* table Space */ 1366 DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset); 1367 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ 1368 if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */ 1369 ms->hashTable = (U32*)(ptr); 1370 ms->chainTable = ms->hashTable + hSize; 1371 ms->hashTable3 = ms->chainTable + chainSize; 1372 ptr = ms->hashTable3 + h3Size; 1373 1374 ms->cParams = *cParams; 1375 1376 assert(((size_t)ptr & 3) == 0); 1377 return ptr; 1378 } 1379 1380 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */ 1381 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large 1382 * during at least this number of times, 1383 * context's memory usage is considered wasteful, 1384 * because it's sized to handle a worst case scenario which rarely happens. 1385 * In which case, resize it down to free some memory */ 1386 1387 /*! ZSTD_resetCCtx_internal() : 1388 note : `params` are assumed fully validated at this stage */ 1389 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, 1390 ZSTD_CCtx_params params, 1391 U64 pledgedSrcSize, 1392 ZSTD_compResetPolicy_e const crp, 1393 ZSTD_buffered_policy_e const zbuff) 1394 { 1395 DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", 1396 (U32)pledgedSrcSize, params.cParams.windowLog); 1397 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 1398 1399 if (crp == ZSTDcrp_continue) { 1400 if (ZSTD_equivalentParams(zc->appliedParams, params, 1401 zc->inBuffSize, 1402 zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit, 1403 zbuff, pledgedSrcSize)) { 1404 DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)", 1405 zc->appliedParams.cParams.windowLog, zc->blockSize); 1406 zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ 1407 if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) 1408 return ZSTD_continueCCtx(zc, params, pledgedSrcSize); 1409 } } 1410 DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); 1411 1412 if (params.ldmParams.enableLdm) { 1413 /* Adjust long distance matching parameters */ 1414 ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); 1415 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); 1416 assert(params.ldmParams.hashRateLog < 32); 1417 zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); 1418 } 1419 1420 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); 1421 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 1422 U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; 1423 size_t const maxNbSeq = blockSize / divider; 1424 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq; 1425 size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; 1426 size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; 1427 size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); 1428 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); 1429 void* ptr; /* used to partition workSpace */ 1430 1431 /* Check if workSpace is large enough, alloc a new one if needed */ 1432 { size_t const entropySpace = HUF_WORKSPACE_SIZE; 1433 size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); 1434 size_t const bufferSpace = buffInSize + buffOutSize; 1435 size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); 1436 size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq); 1437 1438 size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace + 1439 ldmSeqSpace + matchStateSize + tokenSpace + 1440 bufferSpace; 1441 1442 int const workSpaceTooSmall = zc->workSpaceSize < neededSpace; 1443 int const workSpaceTooLarge = zc->workSpaceSize > ZSTD_WORKSPACETOOLARGE_FACTOR * neededSpace; 1444 int const workSpaceWasteful = workSpaceTooLarge && (zc->workSpaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION); 1445 zc->workSpaceOversizedDuration = workSpaceTooLarge ? zc->workSpaceOversizedDuration+1 : 0; 1446 1447 DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers", 1448 neededSpace>>10, matchStateSize>>10, bufferSpace>>10); 1449 DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); 1450 1451 if (workSpaceTooSmall || workSpaceWasteful) { 1452 DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB", 1453 zc->workSpaceSize >> 10, 1454 neededSpace >> 10); 1455 1456 RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); 1457 1458 zc->workSpaceSize = 0; 1459 ZSTD_free(zc->workSpace, zc->customMem); 1460 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); 1461 RETURN_ERROR_IF(zc->workSpace == NULL, memory_allocation); 1462 zc->workSpaceSize = neededSpace; 1463 zc->workSpaceOversizedDuration = 0; 1464 1465 /* Statically sized space. 1466 * entropyWorkspace never moves, 1467 * though prev/next block swap places */ 1468 assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */ 1469 assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t)); 1470 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace; 1471 zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1; 1472 ptr = zc->blockState.nextCBlock + 1; 1473 zc->entropyWorkspace = (U32*)ptr; 1474 } } 1475 1476 /* init params */ 1477 zc->appliedParams = params; 1478 zc->blockState.matchState.cParams = params.cParams; 1479 zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; 1480 zc->consumedSrcSize = 0; 1481 zc->producedCSize = 0; 1482 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) 1483 zc->appliedParams.fParams.contentSizeFlag = 0; 1484 DEBUGLOG(4, "pledged content size : %u ; flag : %u", 1485 (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); 1486 zc->blockSize = blockSize; 1487 1488 XXH64_reset(&zc->xxhState, 0); 1489 zc->stage = ZSTDcs_init; 1490 zc->dictID = 0; 1491 1492 ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); 1493 1494 ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32; 1495 1496 /* ldm hash table */ 1497 /* initialize bucketOffsets table later for pointer alignment */ 1498 if (params.ldmParams.enableLdm) { 1499 size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; 1500 memset(ptr, 0, ldmHSize * sizeof(ldmEntry_t)); 1501 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ 1502 zc->ldmState.hashTable = (ldmEntry_t*)ptr; 1503 ptr = zc->ldmState.hashTable + ldmHSize; 1504 zc->ldmSequences = (rawSeq*)ptr; 1505 ptr = zc->ldmSequences + maxNbLdmSeq; 1506 zc->maxNbLdmSequences = maxNbLdmSeq; 1507 1508 memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); 1509 } 1510 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ 1511 1512 ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, ¶ms.cParams, crp, /* forCCtx */ 1); 1513 1514 /* sequences storage */ 1515 zc->seqStore.maxNbSeq = maxNbSeq; 1516 zc->seqStore.sequencesStart = (seqDef*)ptr; 1517 ptr = zc->seqStore.sequencesStart + maxNbSeq; 1518 zc->seqStore.llCode = (BYTE*) ptr; 1519 zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; 1520 zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; 1521 zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; 1522 /* ZSTD_wildcopy() is used to copy into the literals buffer, 1523 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. 1524 */ 1525 zc->seqStore.maxNbLit = blockSize; 1526 ptr = zc->seqStore.litStart + blockSize + WILDCOPY_OVERLENGTH; 1527 1528 /* ldm bucketOffsets table */ 1529 if (params.ldmParams.enableLdm) { 1530 size_t const ldmBucketSize = 1531 ((size_t)1) << (params.ldmParams.hashLog - 1532 params.ldmParams.bucketSizeLog); 1533 memset(ptr, 0, ldmBucketSize); 1534 zc->ldmState.bucketOffsets = (BYTE*)ptr; 1535 ptr = zc->ldmState.bucketOffsets + ldmBucketSize; 1536 ZSTD_window_clear(&zc->ldmState.window); 1537 } 1538 ZSTD_referenceExternalSequences(zc, NULL, 0); 1539 1540 /* buffers */ 1541 zc->inBuffSize = buffInSize; 1542 zc->inBuff = (char*)ptr; 1543 zc->outBuffSize = buffOutSize; 1544 zc->outBuff = zc->inBuff + buffInSize; 1545 1546 return 0; 1547 } 1548 } 1549 1550 /* ZSTD_invalidateRepCodes() : 1551 * ensures next compression will not use repcodes from previous block. 1552 * Note : only works with regular variant; 1553 * do not use with extDict variant ! */ 1554 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { 1555 int i; 1556 for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0; 1557 assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); 1558 } 1559 1560 /* These are the approximate sizes for each strategy past which copying the 1561 * dictionary tables into the working context is faster than using them 1562 * in-place. 1563 */ 1564 static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { 1565 8 KB, /* unused */ 1566 8 KB, /* ZSTD_fast */ 1567 16 KB, /* ZSTD_dfast */ 1568 32 KB, /* ZSTD_greedy */ 1569 32 KB, /* ZSTD_lazy */ 1570 32 KB, /* ZSTD_lazy2 */ 1571 32 KB, /* ZSTD_btlazy2 */ 1572 32 KB, /* ZSTD_btopt */ 1573 8 KB, /* ZSTD_btultra */ 1574 8 KB /* ZSTD_btultra2 */ 1575 }; 1576 1577 static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, 1578 ZSTD_CCtx_params params, 1579 U64 pledgedSrcSize) 1580 { 1581 size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; 1582 return ( pledgedSrcSize <= cutoff 1583 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN 1584 || params.attachDictPref == ZSTD_dictForceAttach ) 1585 && params.attachDictPref != ZSTD_dictForceCopy 1586 && !params.forceWindow; /* dictMatchState isn't correctly 1587 * handled in _enforceMaxDist */ 1588 } 1589 1590 static size_t ZSTD_resetCCtx_byAttachingCDict( 1591 ZSTD_CCtx* cctx, 1592 const ZSTD_CDict* cdict, 1593 ZSTD_CCtx_params params, 1594 U64 pledgedSrcSize, 1595 ZSTD_buffered_policy_e zbuff) 1596 { 1597 { 1598 const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; 1599 unsigned const windowLog = params.cParams.windowLog; 1600 assert(windowLog != 0); 1601 /* Resize working context table params for input only, since the dict 1602 * has its own tables. */ 1603 params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); 1604 params.cParams.windowLog = windowLog; 1605 ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, 1606 ZSTDcrp_continue, zbuff); 1607 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); 1608 } 1609 1610 { 1611 const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc 1612 - cdict->matchState.window.base); 1613 const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; 1614 if (cdictLen == 0) { 1615 /* don't even attach dictionaries with no contents */ 1616 DEBUGLOG(4, "skipping attaching empty dictionary"); 1617 } else { 1618 DEBUGLOG(4, "attaching dictionary into context"); 1619 cctx->blockState.matchState.dictMatchState = &cdict->matchState; 1620 1621 /* prep working match state so dict matches never have negative indices 1622 * when they are translated to the working context's index space. */ 1623 if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { 1624 cctx->blockState.matchState.window.nextSrc = 1625 cctx->blockState.matchState.window.base + cdictEnd; 1626 ZSTD_window_clear(&cctx->blockState.matchState.window); 1627 } 1628 cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; 1629 } 1630 } 1631 1632 cctx->dictID = cdict->dictID; 1633 1634 /* copy block state */ 1635 memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); 1636 1637 return 0; 1638 } 1639 1640 static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, 1641 const ZSTD_CDict* cdict, 1642 ZSTD_CCtx_params params, 1643 U64 pledgedSrcSize, 1644 ZSTD_buffered_policy_e zbuff) 1645 { 1646 const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; 1647 1648 DEBUGLOG(4, "copying dictionary into context"); 1649 1650 { unsigned const windowLog = params.cParams.windowLog; 1651 assert(windowLog != 0); 1652 /* Copy only compression parameters related to tables. */ 1653 params.cParams = *cdict_cParams; 1654 params.cParams.windowLog = windowLog; 1655 ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, 1656 ZSTDcrp_noMemset, zbuff); 1657 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); 1658 assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); 1659 assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); 1660 } 1661 1662 /* copy tables */ 1663 { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); 1664 size_t const hSize = (size_t)1 << cdict_cParams->hashLog; 1665 size_t const tableSpace = (chainSize + hSize) * sizeof(U32); 1666 assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ 1667 assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize); 1668 assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */ 1669 assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize); 1670 memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */ 1671 } 1672 1673 /* Zero the hashTable3, since the cdict never fills it */ 1674 { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3; 1675 assert(cdict->matchState.hashLog3 == 0); 1676 memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); 1677 } 1678 1679 /* copy dictionary offsets */ 1680 { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; 1681 ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; 1682 dstMatchState->window = srcMatchState->window; 1683 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 1684 dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; 1685 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; 1686 } 1687 1688 cctx->dictID = cdict->dictID; 1689 1690 /* copy block state */ 1691 memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); 1692 1693 return 0; 1694 } 1695 1696 /* We have a choice between copying the dictionary context into the working 1697 * context, or referencing the dictionary context from the working context 1698 * in-place. We decide here which strategy to use. */ 1699 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, 1700 const ZSTD_CDict* cdict, 1701 ZSTD_CCtx_params params, 1702 U64 pledgedSrcSize, 1703 ZSTD_buffered_policy_e zbuff) 1704 { 1705 1706 DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", 1707 (unsigned)pledgedSrcSize); 1708 1709 if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { 1710 return ZSTD_resetCCtx_byAttachingCDict( 1711 cctx, cdict, params, pledgedSrcSize, zbuff); 1712 } else { 1713 return ZSTD_resetCCtx_byCopyingCDict( 1714 cctx, cdict, params, pledgedSrcSize, zbuff); 1715 } 1716 } 1717 1718 /*! ZSTD_copyCCtx_internal() : 1719 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. 1720 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). 1721 * The "context", in this case, refers to the hash and chain tables, 1722 * entropy tables, and dictionary references. 1723 * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. 1724 * @return : 0, or an error code */ 1725 static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, 1726 const ZSTD_CCtx* srcCCtx, 1727 ZSTD_frameParameters fParams, 1728 U64 pledgedSrcSize, 1729 ZSTD_buffered_policy_e zbuff) 1730 { 1731 DEBUGLOG(5, "ZSTD_copyCCtx_internal"); 1732 RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong); 1733 1734 memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); 1735 { ZSTD_CCtx_params params = dstCCtx->requestedParams; 1736 /* Copy only compression parameters related to tables. */ 1737 params.cParams = srcCCtx->appliedParams.cParams; 1738 params.fParams = fParams; 1739 ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, 1740 ZSTDcrp_noMemset, zbuff); 1741 assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); 1742 assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); 1743 assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); 1744 assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); 1745 assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); 1746 } 1747 1748 /* copy tables */ 1749 { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); 1750 size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; 1751 size_t const h3Size = (size_t)1 << srcCCtx->blockState.matchState.hashLog3; 1752 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); 1753 assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ 1754 assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize); 1755 memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */ 1756 } 1757 1758 /* copy dictionary offsets */ 1759 { 1760 const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; 1761 ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; 1762 dstMatchState->window = srcMatchState->window; 1763 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 1764 dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; 1765 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; 1766 } 1767 dstCCtx->dictID = srcCCtx->dictID; 1768 1769 /* copy block state */ 1770 memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); 1771 1772 return 0; 1773 } 1774 1775 /*! ZSTD_copyCCtx() : 1776 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. 1777 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). 1778 * pledgedSrcSize==0 means "unknown". 1779 * @return : 0, or an error code */ 1780 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) 1781 { 1782 ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 1783 ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0); 1784 ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); 1785 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; 1786 fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); 1787 1788 return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, 1789 fParams, pledgedSrcSize, 1790 zbuff); 1791 } 1792 1793 1794 #define ZSTD_ROWSIZE 16 1795 /*! ZSTD_reduceTable() : 1796 * reduce table indexes by `reducerValue`, or squash to zero. 1797 * PreserveMark preserves "unsorted mark" for btlazy2 strategy. 1798 * It must be set to a clear 0/1 value, to remove branch during inlining. 1799 * Presume table size is a multiple of ZSTD_ROWSIZE 1800 * to help auto-vectorization */ 1801 FORCE_INLINE_TEMPLATE void 1802 ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) 1803 { 1804 int const nbRows = (int)size / ZSTD_ROWSIZE; 1805 int cellNb = 0; 1806 int rowNb; 1807 assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ 1808 assert(size < (1U<<31)); /* can be casted to int */ 1809 for (rowNb=0 ; rowNb < nbRows ; rowNb++) { 1810 int column; 1811 for (column=0; column<ZSTD_ROWSIZE; column++) { 1812 if (preserveMark) { 1813 U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0; 1814 table[cellNb] += adder; 1815 } 1816 if (table[cellNb] < reducerValue) table[cellNb] = 0; 1817 else table[cellNb] -= reducerValue; 1818 cellNb++; 1819 } } 1820 } 1821 1822 static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue) 1823 { 1824 ZSTD_reduceTable_internal(table, size, reducerValue, 0); 1825 } 1826 1827 static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue) 1828 { 1829 ZSTD_reduceTable_internal(table, size, reducerValue, 1); 1830 } 1831 1832 /*! ZSTD_reduceIndex() : 1833 * rescale all indexes to avoid future overflow (indexes are U32) */ 1834 static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) 1835 { 1836 ZSTD_matchState_t* const ms = &zc->blockState.matchState; 1837 { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog; 1838 ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); 1839 } 1840 1841 if (zc->appliedParams.cParams.strategy != ZSTD_fast) { 1842 U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog; 1843 if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2) 1844 ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); 1845 else 1846 ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); 1847 } 1848 1849 if (ms->hashLog3) { 1850 U32 const h3Size = (U32)1 << ms->hashLog3; 1851 ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); 1852 } 1853 } 1854 1855 1856 /*-******************************************************* 1857 * Block entropic compression 1858 *********************************************************/ 1859 1860 /* See doc/zstd_compression_format.md for detailed format description */ 1861 1862 static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) 1863 { 1864 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); 1865 RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, 1866 dstSize_tooSmall); 1867 MEM_writeLE24(dst, cBlockHeader24); 1868 memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); 1869 return ZSTD_blockHeaderSize + srcSize; 1870 } 1871 1872 static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 1873 { 1874 BYTE* const ostart = (BYTE* const)dst; 1875 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); 1876 1877 RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall); 1878 1879 switch(flSize) 1880 { 1881 case 1: /* 2 - 1 - 5 */ 1882 ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); 1883 break; 1884 case 2: /* 2 - 2 - 12 */ 1885 MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); 1886 break; 1887 case 3: /* 2 - 2 - 20 */ 1888 MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); 1889 break; 1890 default: /* not necessary : flSize is {1,2,3} */ 1891 assert(0); 1892 } 1893 1894 memcpy(ostart + flSize, src, srcSize); 1895 return srcSize + flSize; 1896 } 1897 1898 static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 1899 { 1900 BYTE* const ostart = (BYTE* const)dst; 1901 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); 1902 1903 (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ 1904 1905 switch(flSize) 1906 { 1907 case 1: /* 2 - 1 - 5 */ 1908 ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); 1909 break; 1910 case 2: /* 2 - 2 - 12 */ 1911 MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); 1912 break; 1913 case 3: /* 2 - 2 - 20 */ 1914 MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); 1915 break; 1916 default: /* not necessary : flSize is {1,2,3} */ 1917 assert(0); 1918 } 1919 1920 ostart[flSize] = *(const BYTE*)src; 1921 return flSize+1; 1922 } 1923 1924 1925 /* ZSTD_minGain() : 1926 * minimum compression required 1927 * to generate a compress block or a compressed literals section. 1928 * note : use same formula for both situations */ 1929 static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) 1930 { 1931 U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; 1932 ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); 1933 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); 1934 return (srcSize >> minlog) + 2; 1935 } 1936 1937 static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, 1938 ZSTD_hufCTables_t* nextHuf, 1939 ZSTD_strategy strategy, int disableLiteralCompression, 1940 void* dst, size_t dstCapacity, 1941 const void* src, size_t srcSize, 1942 void* workspace, size_t wkspSize, 1943 const int bmi2) 1944 { 1945 size_t const minGain = ZSTD_minGain(srcSize, strategy); 1946 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); 1947 BYTE* const ostart = (BYTE*)dst; 1948 U32 singleStream = srcSize < 256; 1949 symbolEncodingType_e hType = set_compressed; 1950 size_t cLitSize; 1951 1952 DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", 1953 disableLiteralCompression); 1954 1955 /* Prepare nextEntropy assuming reusing the existing table */ 1956 memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 1957 1958 if (disableLiteralCompression) 1959 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 1960 1961 /* small ? don't even attempt compression (speed opt) */ 1962 # define COMPRESS_LITERALS_SIZE_MIN 63 1963 { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; 1964 if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 1965 } 1966 1967 RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); 1968 { HUF_repeat repeat = prevHuf->repeatMode; 1969 int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; 1970 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; 1971 cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, 1972 workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) 1973 : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, 1974 workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); 1975 if (repeat != HUF_repeat_none) { 1976 /* reused the existing table */ 1977 hType = set_repeat; 1978 } 1979 } 1980 1981 if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { 1982 memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 1983 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 1984 } 1985 if (cLitSize==1) { 1986 memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 1987 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); 1988 } 1989 1990 if (hType == set_compressed) { 1991 /* using a newly constructed table */ 1992 nextHuf->repeatMode = HUF_repeat_check; 1993 } 1994 1995 /* Build header */ 1996 switch(lhSize) 1997 { 1998 case 3: /* 2 - 2 - 10 - 10 */ 1999 { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); 2000 MEM_writeLE24(ostart, lhc); 2001 break; 2002 } 2003 case 4: /* 2 - 2 - 14 - 14 */ 2004 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); 2005 MEM_writeLE32(ostart, lhc); 2006 break; 2007 } 2008 case 5: /* 2 - 2 - 18 - 18 */ 2009 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); 2010 MEM_writeLE32(ostart, lhc); 2011 ostart[4] = (BYTE)(cLitSize >> 10); 2012 break; 2013 } 2014 default: /* not possible : lhSize is {3,4,5} */ 2015 assert(0); 2016 } 2017 return lhSize+cLitSize; 2018 } 2019 2020 2021 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) 2022 { 2023 const seqDef* const sequences = seqStorePtr->sequencesStart; 2024 BYTE* const llCodeTable = seqStorePtr->llCode; 2025 BYTE* const ofCodeTable = seqStorePtr->ofCode; 2026 BYTE* const mlCodeTable = seqStorePtr->mlCode; 2027 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 2028 U32 u; 2029 assert(nbSeq <= seqStorePtr->maxNbSeq); 2030 for (u=0; u<nbSeq; u++) { 2031 U32 const llv = sequences[u].litLength; 2032 U32 const mlv = sequences[u].matchLength; 2033 llCodeTable[u] = (BYTE)ZSTD_LLcode(llv); 2034 ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset); 2035 mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv); 2036 } 2037 if (seqStorePtr->longLengthID==1) 2038 llCodeTable[seqStorePtr->longLengthPos] = MaxLL; 2039 if (seqStorePtr->longLengthID==2) 2040 mlCodeTable[seqStorePtr->longLengthPos] = MaxML; 2041 } 2042 2043 2044 /** 2045 * -log2(x / 256) lookup table for x in [0, 256). 2046 * If x == 0: Return 0 2047 * Else: Return floor(-log2(x / 256) * 256) 2048 */ 2049 static unsigned const kInverseProbabilityLog256[256] = { 2050 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, 2051 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, 2052 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, 2053 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, 2054 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, 2055 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, 2056 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, 2057 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, 2058 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, 2059 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, 2060 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, 2061 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, 2062 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, 2063 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, 2064 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, 2065 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, 2066 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, 2067 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, 2068 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, 2069 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, 2070 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, 2071 5, 4, 2, 1, 2072 }; 2073 2074 2075 /** 2076 * Returns the cost in bits of encoding the distribution described by count 2077 * using the entropy bound. 2078 */ 2079 static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) 2080 { 2081 unsigned cost = 0; 2082 unsigned s; 2083 for (s = 0; s <= max; ++s) { 2084 unsigned norm = (unsigned)((256 * count[s]) / total); 2085 if (count[s] != 0 && norm == 0) 2086 norm = 1; 2087 assert(count[s] < total); 2088 cost += count[s] * kInverseProbabilityLog256[norm]; 2089 } 2090 return cost >> 8; 2091 } 2092 2093 2094 /** 2095 * Returns the cost in bits of encoding the distribution in count using the 2096 * table described by norm. The max symbol support by norm is assumed >= max. 2097 * norm must be valid for every symbol with non-zero probability in count. 2098 */ 2099 static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, 2100 unsigned const* count, unsigned const max) 2101 { 2102 unsigned const shift = 8 - accuracyLog; 2103 size_t cost = 0; 2104 unsigned s; 2105 assert(accuracyLog <= 8); 2106 for (s = 0; s <= max; ++s) { 2107 unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; 2108 unsigned const norm256 = normAcc << shift; 2109 assert(norm256 > 0); 2110 assert(norm256 < 256); 2111 cost += count[s] * kInverseProbabilityLog256[norm256]; 2112 } 2113 return cost >> 8; 2114 } 2115 2116 2117 static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { 2118 void const* ptr = ctable; 2119 U16 const* u16ptr = (U16 const*)ptr; 2120 U32 const maxSymbolValue = MEM_read16(u16ptr + 1); 2121 return maxSymbolValue; 2122 } 2123 2124 2125 /** 2126 * Returns the cost in bits of encoding the distribution in count using ctable. 2127 * Returns an error if ctable cannot represent all the symbols in count. 2128 */ 2129 static size_t ZSTD_fseBitCost( 2130 FSE_CTable const* ctable, 2131 unsigned const* count, 2132 unsigned const max) 2133 { 2134 unsigned const kAccuracyLog = 8; 2135 size_t cost = 0; 2136 unsigned s; 2137 FSE_CState_t cstate; 2138 FSE_initCState(&cstate, ctable); 2139 RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC, 2140 "Repeat FSE_CTable has maxSymbolValue %u < %u", 2141 ZSTD_getFSEMaxSymbolValue(ctable), max); 2142 for (s = 0; s <= max; ++s) { 2143 unsigned const tableLog = cstate.stateLog; 2144 unsigned const badCost = (tableLog + 1) << kAccuracyLog; 2145 unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); 2146 if (count[s] == 0) 2147 continue; 2148 RETURN_ERROR_IF(bitCost >= badCost, GENERIC, 2149 "Repeat FSE_CTable has Prob[%u] == 0", s); 2150 cost += count[s] * bitCost; 2151 } 2152 return cost >> kAccuracyLog; 2153 } 2154 2155 /** 2156 * Returns the cost in bytes of encoding the normalized count header. 2157 * Returns an error if any of the helper functions return an error. 2158 */ 2159 static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, 2160 size_t const nbSeq, unsigned const FSELog) 2161 { 2162 BYTE wksp[FSE_NCOUNTBOUND]; 2163 S16 norm[MaxSeq + 1]; 2164 const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); 2165 FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); 2166 return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); 2167 } 2168 2169 2170 typedef enum { 2171 ZSTD_defaultDisallowed = 0, 2172 ZSTD_defaultAllowed = 1 2173 } ZSTD_defaultPolicy_e; 2174 2175 MEM_STATIC symbolEncodingType_e 2176 ZSTD_selectEncodingType( 2177 FSE_repeat* repeatMode, unsigned const* count, unsigned const max, 2178 size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, 2179 FSE_CTable const* prevCTable, 2180 short const* defaultNorm, U32 defaultNormLog, 2181 ZSTD_defaultPolicy_e const isDefaultAllowed, 2182 ZSTD_strategy const strategy) 2183 { 2184 ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); 2185 if (mostFrequent == nbSeq) { 2186 *repeatMode = FSE_repeat_none; 2187 if (isDefaultAllowed && nbSeq <= 2) { 2188 /* Prefer set_basic over set_rle when there are 2 or less symbols, 2189 * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. 2190 * If basic encoding isn't possible, always choose RLE. 2191 */ 2192 DEBUGLOG(5, "Selected set_basic"); 2193 return set_basic; 2194 } 2195 DEBUGLOG(5, "Selected set_rle"); 2196 return set_rle; 2197 } 2198 if (strategy < ZSTD_lazy) { 2199 if (isDefaultAllowed) { 2200 size_t const staticFse_nbSeq_max = 1000; 2201 size_t const mult = 10 - strategy; 2202 size_t const baseLog = 3; 2203 size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ 2204 assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ 2205 assert(mult <= 9 && mult >= 7); 2206 if ( (*repeatMode == FSE_repeat_valid) 2207 && (nbSeq < staticFse_nbSeq_max) ) { 2208 DEBUGLOG(5, "Selected set_repeat"); 2209 return set_repeat; 2210 } 2211 if ( (nbSeq < dynamicFse_nbSeq_min) 2212 || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { 2213 DEBUGLOG(5, "Selected set_basic"); 2214 /* The format allows default tables to be repeated, but it isn't useful. 2215 * When using simple heuristics to select encoding type, we don't want 2216 * to confuse these tables with dictionaries. When running more careful 2217 * analysis, we don't need to waste time checking both repeating tables 2218 * and default tables. 2219 */ 2220 *repeatMode = FSE_repeat_none; 2221 return set_basic; 2222 } 2223 } 2224 } else { 2225 size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); 2226 size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); 2227 size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); 2228 size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); 2229 2230 if (isDefaultAllowed) { 2231 assert(!ZSTD_isError(basicCost)); 2232 assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); 2233 } 2234 assert(!ZSTD_isError(NCountCost)); 2235 assert(compressedCost < ERROR(maxCode)); 2236 DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", 2237 (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); 2238 if (basicCost <= repeatCost && basicCost <= compressedCost) { 2239 DEBUGLOG(5, "Selected set_basic"); 2240 assert(isDefaultAllowed); 2241 *repeatMode = FSE_repeat_none; 2242 return set_basic; 2243 } 2244 if (repeatCost <= compressedCost) { 2245 DEBUGLOG(5, "Selected set_repeat"); 2246 assert(!ZSTD_isError(repeatCost)); 2247 return set_repeat; 2248 } 2249 assert(compressedCost < basicCost && compressedCost < repeatCost); 2250 } 2251 DEBUGLOG(5, "Selected set_compressed"); 2252 *repeatMode = FSE_repeat_check; 2253 return set_compressed; 2254 } 2255 2256 MEM_STATIC size_t 2257 ZSTD_buildCTable(void* dst, size_t dstCapacity, 2258 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, 2259 unsigned* count, U32 max, 2260 const BYTE* codeTable, size_t nbSeq, 2261 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, 2262 const FSE_CTable* prevCTable, size_t prevCTableSize, 2263 void* workspace, size_t workspaceSize) 2264 { 2265 BYTE* op = (BYTE*)dst; 2266 const BYTE* const oend = op + dstCapacity; 2267 DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); 2268 2269 switch (type) { 2270 case set_rle: 2271 FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max)); 2272 RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall); 2273 *op = codeTable[0]; 2274 return 1; 2275 case set_repeat: 2276 memcpy(nextCTable, prevCTable, prevCTableSize); 2277 return 0; 2278 case set_basic: 2279 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ 2280 return 0; 2281 case set_compressed: { 2282 S16 norm[MaxSeq + 1]; 2283 size_t nbSeq_1 = nbSeq; 2284 const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); 2285 if (count[codeTable[nbSeq-1]] > 1) { 2286 count[codeTable[nbSeq-1]]--; 2287 nbSeq_1--; 2288 } 2289 assert(nbSeq_1 > 1); 2290 FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); 2291 { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ 2292 FORWARD_IF_ERROR(NCountSize); 2293 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); 2294 return NCountSize; 2295 } 2296 } 2297 default: assert(0); RETURN_ERROR(GENERIC); 2298 } 2299 } 2300 2301 FORCE_INLINE_TEMPLATE size_t 2302 ZSTD_encodeSequences_body( 2303 void* dst, size_t dstCapacity, 2304 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 2305 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 2306 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 2307 seqDef const* sequences, size_t nbSeq, int longOffsets) 2308 { 2309 BIT_CStream_t blockStream; 2310 FSE_CState_t stateMatchLength; 2311 FSE_CState_t stateOffsetBits; 2312 FSE_CState_t stateLitLength; 2313 2314 RETURN_ERROR_IF( 2315 ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), 2316 dstSize_tooSmall, "not enough space remaining"); 2317 DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", 2318 (int)(blockStream.endPtr - blockStream.startPtr), 2319 (unsigned)dstCapacity); 2320 2321 /* first symbols */ 2322 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); 2323 FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); 2324 FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); 2325 BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); 2326 if (MEM_32bits()) BIT_flushBits(&blockStream); 2327 BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); 2328 if (MEM_32bits()) BIT_flushBits(&blockStream); 2329 if (longOffsets) { 2330 U32 const ofBits = ofCodeTable[nbSeq-1]; 2331 int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); 2332 if (extraBits) { 2333 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); 2334 BIT_flushBits(&blockStream); 2335 } 2336 BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, 2337 ofBits - extraBits); 2338 } else { 2339 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); 2340 } 2341 BIT_flushBits(&blockStream); 2342 2343 { size_t n; 2344 for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ 2345 BYTE const llCode = llCodeTable[n]; 2346 BYTE const ofCode = ofCodeTable[n]; 2347 BYTE const mlCode = mlCodeTable[n]; 2348 U32 const llBits = LL_bits[llCode]; 2349 U32 const ofBits = ofCode; 2350 U32 const mlBits = ML_bits[mlCode]; 2351 DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", 2352 (unsigned)sequences[n].litLength, 2353 (unsigned)sequences[n].matchLength + MINMATCH, 2354 (unsigned)sequences[n].offset); 2355 /* 32b*/ /* 64b*/ 2356 /* (7)*/ /* (7)*/ 2357 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ 2358 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ 2359 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ 2360 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ 2361 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) 2362 BIT_flushBits(&blockStream); /* (7)*/ 2363 BIT_addBits(&blockStream, sequences[n].litLength, llBits); 2364 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); 2365 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); 2366 if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); 2367 if (longOffsets) { 2368 int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); 2369 if (extraBits) { 2370 BIT_addBits(&blockStream, sequences[n].offset, extraBits); 2371 BIT_flushBits(&blockStream); /* (7)*/ 2372 } 2373 BIT_addBits(&blockStream, sequences[n].offset >> extraBits, 2374 ofBits - extraBits); /* 31 */ 2375 } else { 2376 BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ 2377 } 2378 BIT_flushBits(&blockStream); /* (7)*/ 2379 DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); 2380 } } 2381 2382 DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); 2383 FSE_flushCState(&blockStream, &stateMatchLength); 2384 DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); 2385 FSE_flushCState(&blockStream, &stateOffsetBits); 2386 DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); 2387 FSE_flushCState(&blockStream, &stateLitLength); 2388 2389 { size_t const streamSize = BIT_closeCStream(&blockStream); 2390 RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); 2391 return streamSize; 2392 } 2393 } 2394 2395 static size_t 2396 ZSTD_encodeSequences_default( 2397 void* dst, size_t dstCapacity, 2398 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 2399 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 2400 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 2401 seqDef const* sequences, size_t nbSeq, int longOffsets) 2402 { 2403 return ZSTD_encodeSequences_body(dst, dstCapacity, 2404 CTable_MatchLength, mlCodeTable, 2405 CTable_OffsetBits, ofCodeTable, 2406 CTable_LitLength, llCodeTable, 2407 sequences, nbSeq, longOffsets); 2408 } 2409 2410 2411 #if DYNAMIC_BMI2 2412 2413 static TARGET_ATTRIBUTE("bmi2") size_t 2414 ZSTD_encodeSequences_bmi2( 2415 void* dst, size_t dstCapacity, 2416 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 2417 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 2418 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 2419 seqDef const* sequences, size_t nbSeq, int longOffsets) 2420 { 2421 return ZSTD_encodeSequences_body(dst, dstCapacity, 2422 CTable_MatchLength, mlCodeTable, 2423 CTable_OffsetBits, ofCodeTable, 2424 CTable_LitLength, llCodeTable, 2425 sequences, nbSeq, longOffsets); 2426 } 2427 2428 #endif 2429 2430 static size_t ZSTD_encodeSequences( 2431 void* dst, size_t dstCapacity, 2432 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 2433 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 2434 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 2435 seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) 2436 { 2437 DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); 2438 #if DYNAMIC_BMI2 2439 if (bmi2) { 2440 return ZSTD_encodeSequences_bmi2(dst, dstCapacity, 2441 CTable_MatchLength, mlCodeTable, 2442 CTable_OffsetBits, ofCodeTable, 2443 CTable_LitLength, llCodeTable, 2444 sequences, nbSeq, longOffsets); 2445 } 2446 #endif 2447 (void)bmi2; 2448 return ZSTD_encodeSequences_default(dst, dstCapacity, 2449 CTable_MatchLength, mlCodeTable, 2450 CTable_OffsetBits, ofCodeTable, 2451 CTable_LitLength, llCodeTable, 2452 sequences, nbSeq, longOffsets); 2453 } 2454 2455 static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) 2456 { 2457 switch (cctxParams->literalCompressionMode) { 2458 case ZSTD_lcm_huffman: 2459 return 0; 2460 case ZSTD_lcm_uncompressed: 2461 return 1; 2462 default: 2463 assert(0 /* impossible: pre-validated */); 2464 /* fall-through */ 2465 case ZSTD_lcm_auto: 2466 return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); 2467 } 2468 } 2469 2470 /* ZSTD_compressSequences_internal(): 2471 * actually compresses both literals and sequences */ 2472 MEM_STATIC size_t 2473 ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, 2474 const ZSTD_entropyCTables_t* prevEntropy, 2475 ZSTD_entropyCTables_t* nextEntropy, 2476 const ZSTD_CCtx_params* cctxParams, 2477 void* dst, size_t dstCapacity, 2478 void* workspace, size_t wkspSize, 2479 const int bmi2) 2480 { 2481 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; 2482 ZSTD_strategy const strategy = cctxParams->cParams.strategy; 2483 unsigned count[MaxSeq+1]; 2484 FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; 2485 FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; 2486 FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; 2487 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ 2488 const seqDef* const sequences = seqStorePtr->sequencesStart; 2489 const BYTE* const ofCodeTable = seqStorePtr->ofCode; 2490 const BYTE* const llCodeTable = seqStorePtr->llCode; 2491 const BYTE* const mlCodeTable = seqStorePtr->mlCode; 2492 BYTE* const ostart = (BYTE*)dst; 2493 BYTE* const oend = ostart + dstCapacity; 2494 BYTE* op = ostart; 2495 size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; 2496 BYTE* seqHead; 2497 BYTE* lastNCount = NULL; 2498 2499 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); 2500 DEBUGLOG(5, "ZSTD_compressSequences_internal"); 2501 2502 /* Compress literals */ 2503 { const BYTE* const literals = seqStorePtr->litStart; 2504 size_t const litSize = seqStorePtr->lit - literals; 2505 size_t const cSize = ZSTD_compressLiterals( 2506 &prevEntropy->huf, &nextEntropy->huf, 2507 cctxParams->cParams.strategy, 2508 ZSTD_disableLiteralsCompression(cctxParams), 2509 op, dstCapacity, 2510 literals, litSize, 2511 workspace, wkspSize, 2512 bmi2); 2513 FORWARD_IF_ERROR(cSize); 2514 assert(cSize <= dstCapacity); 2515 op += cSize; 2516 } 2517 2518 /* Sequences Header */ 2519 RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, 2520 dstSize_tooSmall); 2521 if (nbSeq < 0x7F) 2522 *op++ = (BYTE)nbSeq; 2523 else if (nbSeq < LONGNBSEQ) 2524 op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; 2525 else 2526 op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; 2527 if (nbSeq==0) { 2528 /* Copy the old tables over as if we repeated them */ 2529 memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); 2530 return op - ostart; 2531 } 2532 2533 /* seqHead : flags for FSE encoding type */ 2534 seqHead = op++; 2535 2536 /* convert length/distances into codes */ 2537 ZSTD_seqToCodes(seqStorePtr); 2538 /* build CTable for Literal Lengths */ 2539 { unsigned max = MaxLL; 2540 size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ 2541 DEBUGLOG(5, "Building LL table"); 2542 nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; 2543 LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, 2544 count, max, mostFrequent, nbSeq, 2545 LLFSELog, prevEntropy->fse.litlengthCTable, 2546 LL_defaultNorm, LL_defaultNormLog, 2547 ZSTD_defaultAllowed, strategy); 2548 assert(set_basic < set_compressed && set_rle < set_compressed); 2549 assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2550 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, 2551 count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, 2552 prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), 2553 workspace, wkspSize); 2554 FORWARD_IF_ERROR(countSize); 2555 if (LLtype == set_compressed) 2556 lastNCount = op; 2557 op += countSize; 2558 } } 2559 /* build CTable for Offsets */ 2560 { unsigned max = MaxOff; 2561 size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ 2562 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ 2563 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; 2564 DEBUGLOG(5, "Building OF table"); 2565 nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; 2566 Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, 2567 count, max, mostFrequent, nbSeq, 2568 OffFSELog, prevEntropy->fse.offcodeCTable, 2569 OF_defaultNorm, OF_defaultNormLog, 2570 defaultPolicy, strategy); 2571 assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2572 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, 2573 count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, 2574 prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), 2575 workspace, wkspSize); 2576 FORWARD_IF_ERROR(countSize); 2577 if (Offtype == set_compressed) 2578 lastNCount = op; 2579 op += countSize; 2580 } } 2581 /* build CTable for MatchLengths */ 2582 { unsigned max = MaxML; 2583 size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ 2584 DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); 2585 nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; 2586 MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, 2587 count, max, mostFrequent, nbSeq, 2588 MLFSELog, prevEntropy->fse.matchlengthCTable, 2589 ML_defaultNorm, ML_defaultNormLog, 2590 ZSTD_defaultAllowed, strategy); 2591 assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2592 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, 2593 count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, 2594 prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), 2595 workspace, wkspSize); 2596 FORWARD_IF_ERROR(countSize); 2597 if (MLtype == set_compressed) 2598 lastNCount = op; 2599 op += countSize; 2600 } } 2601 2602 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); 2603 2604 { size_t const bitstreamSize = ZSTD_encodeSequences( 2605 op, oend - op, 2606 CTable_MatchLength, mlCodeTable, 2607 CTable_OffsetBits, ofCodeTable, 2608 CTable_LitLength, llCodeTable, 2609 sequences, nbSeq, 2610 longOffsets, bmi2); 2611 FORWARD_IF_ERROR(bitstreamSize); 2612 op += bitstreamSize; 2613 /* zstd versions <= 1.3.4 mistakenly report corruption when 2614 * FSE_readNCount() receives a buffer < 4 bytes. 2615 * Fixed by https://github.com/facebook/zstd/pull/1146. 2616 * This can happen when the last set_compressed table present is 2 2617 * bytes and the bitstream is only one byte. 2618 * In this exceedingly rare case, we will simply emit an uncompressed 2619 * block, since it isn't worth optimizing. 2620 */ 2621 if (lastNCount && (op - lastNCount) < 4) { 2622 /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ 2623 assert(op - lastNCount == 3); 2624 DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " 2625 "emitting an uncompressed block."); 2626 return 0; 2627 } 2628 } 2629 2630 DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); 2631 return op - ostart; 2632 } 2633 2634 MEM_STATIC size_t 2635 ZSTD_compressSequences(seqStore_t* seqStorePtr, 2636 const ZSTD_entropyCTables_t* prevEntropy, 2637 ZSTD_entropyCTables_t* nextEntropy, 2638 const ZSTD_CCtx_params* cctxParams, 2639 void* dst, size_t dstCapacity, 2640 size_t srcSize, 2641 void* workspace, size_t wkspSize, 2642 int bmi2) 2643 { 2644 size_t const cSize = ZSTD_compressSequences_internal( 2645 seqStorePtr, prevEntropy, nextEntropy, cctxParams, 2646 dst, dstCapacity, 2647 workspace, wkspSize, bmi2); 2648 if (cSize == 0) return 0; 2649 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. 2650 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. 2651 */ 2652 if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) 2653 return 0; /* block not compressed */ 2654 FORWARD_IF_ERROR(cSize); 2655 2656 /* Check compressibility */ 2657 { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); 2658 if (cSize >= maxCSize) return 0; /* block not compressed */ 2659 } 2660 2661 return cSize; 2662 } 2663 2664 /* ZSTD_selectBlockCompressor() : 2665 * Not static, but internal use only (used by long distance matcher) 2666 * assumption : strat is a valid strategy */ 2667 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) 2668 { 2669 static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = { 2670 { ZSTD_compressBlock_fast /* default for 0 */, 2671 ZSTD_compressBlock_fast, 2672 ZSTD_compressBlock_doubleFast, 2673 ZSTD_compressBlock_greedy, 2674 ZSTD_compressBlock_lazy, 2675 ZSTD_compressBlock_lazy2, 2676 ZSTD_compressBlock_btlazy2, 2677 ZSTD_compressBlock_btopt, 2678 ZSTD_compressBlock_btultra, 2679 ZSTD_compressBlock_btultra2 }, 2680 { ZSTD_compressBlock_fast_extDict /* default for 0 */, 2681 ZSTD_compressBlock_fast_extDict, 2682 ZSTD_compressBlock_doubleFast_extDict, 2683 ZSTD_compressBlock_greedy_extDict, 2684 ZSTD_compressBlock_lazy_extDict, 2685 ZSTD_compressBlock_lazy2_extDict, 2686 ZSTD_compressBlock_btlazy2_extDict, 2687 ZSTD_compressBlock_btopt_extDict, 2688 ZSTD_compressBlock_btultra_extDict, 2689 ZSTD_compressBlock_btultra_extDict }, 2690 { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, 2691 ZSTD_compressBlock_fast_dictMatchState, 2692 ZSTD_compressBlock_doubleFast_dictMatchState, 2693 ZSTD_compressBlock_greedy_dictMatchState, 2694 ZSTD_compressBlock_lazy_dictMatchState, 2695 ZSTD_compressBlock_lazy2_dictMatchState, 2696 ZSTD_compressBlock_btlazy2_dictMatchState, 2697 ZSTD_compressBlock_btopt_dictMatchState, 2698 ZSTD_compressBlock_btultra_dictMatchState, 2699 ZSTD_compressBlock_btultra_dictMatchState } 2700 }; 2701 ZSTD_blockCompressor selectedCompressor; 2702 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); 2703 2704 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); 2705 selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; 2706 assert(selectedCompressor != NULL); 2707 return selectedCompressor; 2708 } 2709 2710 static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, 2711 const BYTE* anchor, size_t lastLLSize) 2712 { 2713 memcpy(seqStorePtr->lit, anchor, lastLLSize); 2714 seqStorePtr->lit += lastLLSize; 2715 } 2716 2717 void ZSTD_resetSeqStore(seqStore_t* ssPtr) 2718 { 2719 ssPtr->lit = ssPtr->litStart; 2720 ssPtr->sequences = ssPtr->sequencesStart; 2721 ssPtr->longLengthID = 0; 2722 } 2723 2724 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, 2725 void* dst, size_t dstCapacity, 2726 const void* src, size_t srcSize) 2727 { 2728 ZSTD_matchState_t* const ms = &zc->blockState.matchState; 2729 size_t cSize; 2730 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", 2731 (unsigned)dstCapacity, (unsigned)ms->window.dictLimit, (unsigned)ms->nextToUpdate); 2732 assert(srcSize <= ZSTD_BLOCKSIZE_MAX); 2733 2734 /* Assert that we have correctly flushed the ctx params into the ms's copy */ 2735 ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); 2736 2737 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { 2738 ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); 2739 cSize = 0; 2740 goto out; /* don't even attempt compression below a certain srcSize */ 2741 } 2742 ZSTD_resetSeqStore(&(zc->seqStore)); 2743 /* required for optimal parser to read stats from dictionary */ 2744 ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; 2745 /* tell the optimal parser how we expect to compress literals */ 2746 ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; 2747 2748 /* a gap between an attached dict and the current window is not safe, 2749 * they must remain adjacent, 2750 * and when that stops being the case, the dict must be unset */ 2751 assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); 2752 2753 /* limited update after a very long match */ 2754 { const BYTE* const base = ms->window.base; 2755 const BYTE* const istart = (const BYTE*)src; 2756 const U32 current = (U32)(istart-base); 2757 if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ 2758 if (current > ms->nextToUpdate + 384) 2759 ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); 2760 } 2761 2762 /* select and store sequences */ 2763 { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); 2764 size_t lastLLSize; 2765 { int i; 2766 for (i = 0; i < ZSTD_REP_NUM; ++i) 2767 zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; 2768 } 2769 if (zc->externSeqStore.pos < zc->externSeqStore.size) { 2770 assert(!zc->appliedParams.ldmParams.enableLdm); 2771 /* Updates ldmSeqStore.pos */ 2772 lastLLSize = 2773 ZSTD_ldm_blockCompress(&zc->externSeqStore, 2774 ms, &zc->seqStore, 2775 zc->blockState.nextCBlock->rep, 2776 src, srcSize); 2777 assert(zc->externSeqStore.pos <= zc->externSeqStore.size); 2778 } else if (zc->appliedParams.ldmParams.enableLdm) { 2779 rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; 2780 2781 ldmSeqStore.seq = zc->ldmSequences; 2782 ldmSeqStore.capacity = zc->maxNbLdmSequences; 2783 /* Updates ldmSeqStore.size */ 2784 FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, 2785 &zc->appliedParams.ldmParams, 2786 src, srcSize)); 2787 /* Updates ldmSeqStore.pos */ 2788 lastLLSize = 2789 ZSTD_ldm_blockCompress(&ldmSeqStore, 2790 ms, &zc->seqStore, 2791 zc->blockState.nextCBlock->rep, 2792 src, srcSize); 2793 assert(ldmSeqStore.pos == ldmSeqStore.size); 2794 } else { /* not long range mode */ 2795 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); 2796 lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); 2797 } 2798 { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; 2799 ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); 2800 } } 2801 2802 /* encode sequences and literals */ 2803 cSize = ZSTD_compressSequences(&zc->seqStore, 2804 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, 2805 &zc->appliedParams, 2806 dst, dstCapacity, 2807 srcSize, 2808 zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, 2809 zc->bmi2); 2810 2811 out: 2812 if (!ZSTD_isError(cSize) && cSize != 0) { 2813 /* confirm repcodes and entropy tables when emitting a compressed block */ 2814 ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; 2815 zc->blockState.prevCBlock = zc->blockState.nextCBlock; 2816 zc->blockState.nextCBlock = tmp; 2817 } 2818 /* We check that dictionaries have offset codes available for the first 2819 * block. After the first block, the offcode table might not have large 2820 * enough codes to represent the offsets in the data. 2821 */ 2822 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 2823 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 2824 2825 return cSize; 2826 } 2827 2828 2829 /*! ZSTD_compress_frameChunk() : 2830 * Compress a chunk of data into one or multiple blocks. 2831 * All blocks will be terminated, all input will be consumed. 2832 * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. 2833 * Frame is supposed already started (header already produced) 2834 * @return : compressed size, or an error code 2835 */ 2836 static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, 2837 void* dst, size_t dstCapacity, 2838 const void* src, size_t srcSize, 2839 U32 lastFrameChunk) 2840 { 2841 size_t blockSize = cctx->blockSize; 2842 size_t remaining = srcSize; 2843 const BYTE* ip = (const BYTE*)src; 2844 BYTE* const ostart = (BYTE*)dst; 2845 BYTE* op = ostart; 2846 U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; 2847 assert(cctx->appliedParams.cParams.windowLog <= 31); 2848 2849 DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); 2850 if (cctx->appliedParams.fParams.checksumFlag && srcSize) 2851 XXH64_update(&cctx->xxhState, src, srcSize); 2852 2853 while (remaining) { 2854 ZSTD_matchState_t* const ms = &cctx->blockState.matchState; 2855 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); 2856 2857 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, 2858 dstSize_tooSmall, 2859 "not enough space to store compressed block"); 2860 if (remaining < blockSize) blockSize = remaining; 2861 2862 if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) { 2863 U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); 2864 U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); 2865 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); 2866 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); 2867 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); 2868 ZSTD_reduceIndex(cctx, correction); 2869 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; 2870 else ms->nextToUpdate -= correction; 2871 ms->loadedDictEnd = 0; 2872 ms->dictMatchState = NULL; 2873 } 2874 ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); 2875 if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; 2876 2877 { size_t cSize = ZSTD_compressBlock_internal(cctx, 2878 op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, 2879 ip, blockSize); 2880 FORWARD_IF_ERROR(cSize); 2881 2882 if (cSize == 0) { /* block is not compressible */ 2883 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); 2884 FORWARD_IF_ERROR(cSize); 2885 } else { 2886 U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); 2887 MEM_writeLE24(op, cBlockHeader24); 2888 cSize += ZSTD_blockHeaderSize; 2889 } 2890 2891 ip += blockSize; 2892 assert(remaining >= blockSize); 2893 remaining -= blockSize; 2894 op += cSize; 2895 assert(dstCapacity >= cSize); 2896 dstCapacity -= cSize; 2897 DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", 2898 (unsigned)cSize); 2899 } } 2900 2901 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; 2902 return op-ostart; 2903 } 2904 2905 2906 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, 2907 ZSTD_CCtx_params params, U64 pledgedSrcSize, U32 dictID) 2908 { BYTE* const op = (BYTE*)dst; 2909 U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ 2910 U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ 2911 U32 const checksumFlag = params.fParams.checksumFlag>0; 2912 U32 const windowSize = (U32)1 << params.cParams.windowLog; 2913 U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); 2914 BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); 2915 U32 const fcsCode = params.fParams.contentSizeFlag ? 2916 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ 2917 BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); 2918 size_t pos=0; 2919 2920 assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); 2921 RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall); 2922 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", 2923 !params.fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); 2924 2925 if (params.format == ZSTD_f_zstd1) { 2926 MEM_writeLE32(dst, ZSTD_MAGICNUMBER); 2927 pos = 4; 2928 } 2929 op[pos++] = frameHeaderDescriptionByte; 2930 if (!singleSegment) op[pos++] = windowLogByte; 2931 switch(dictIDSizeCode) 2932 { 2933 default: assert(0); /* impossible */ 2934 case 0 : break; 2935 case 1 : op[pos] = (BYTE)(dictID); pos++; break; 2936 case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; 2937 case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; 2938 } 2939 switch(fcsCode) 2940 { 2941 default: assert(0); /* impossible */ 2942 case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; 2943 case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; 2944 case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; 2945 case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; 2946 } 2947 return pos; 2948 } 2949 2950 /* ZSTD_writeLastEmptyBlock() : 2951 * output an empty Block with end-of-frame mark to complete a frame 2952 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) 2953 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) 2954 */ 2955 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) 2956 { 2957 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall); 2958 { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */ 2959 MEM_writeLE24(dst, cBlockHeader24); 2960 return ZSTD_blockHeaderSize; 2961 } 2962 } 2963 2964 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) 2965 { 2966 RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong); 2967 RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, 2968 parameter_unsupported); 2969 cctx->externSeqStore.seq = seq; 2970 cctx->externSeqStore.size = nbSeq; 2971 cctx->externSeqStore.capacity = nbSeq; 2972 cctx->externSeqStore.pos = 0; 2973 return 0; 2974 } 2975 2976 2977 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, 2978 void* dst, size_t dstCapacity, 2979 const void* src, size_t srcSize, 2980 U32 frame, U32 lastFrameChunk) 2981 { 2982 ZSTD_matchState_t* const ms = &cctx->blockState.matchState; 2983 size_t fhSize = 0; 2984 2985 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", 2986 cctx->stage, (unsigned)srcSize); 2987 RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, 2988 "missing init (ZSTD_compressBegin)"); 2989 2990 if (frame && (cctx->stage==ZSTDcs_init)) { 2991 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 2992 cctx->pledgedSrcSizePlusOne-1, cctx->dictID); 2993 FORWARD_IF_ERROR(fhSize); 2994 dstCapacity -= fhSize; 2995 dst = (char*)dst + fhSize; 2996 cctx->stage = ZSTDcs_ongoing; 2997 } 2998 2999 if (!srcSize) return fhSize; /* do not generate an empty block if no input */ 3000 3001 if (!ZSTD_window_update(&ms->window, src, srcSize)) { 3002 ms->nextToUpdate = ms->window.dictLimit; 3003 } 3004 if (cctx->appliedParams.ldmParams.enableLdm) { 3005 ZSTD_window_update(&cctx->ldmState.window, src, srcSize); 3006 } 3007 3008 if (!frame) { 3009 /* overflow check and correction for block mode */ 3010 if (ZSTD_window_needOverflowCorrection(ms->window, (const char*)src + srcSize)) { 3011 U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); 3012 U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src); 3013 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); 3014 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); 3015 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); 3016 ZSTD_reduceIndex(cctx, correction); 3017 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; 3018 else ms->nextToUpdate -= correction; 3019 ms->loadedDictEnd = 0; 3020 ms->dictMatchState = NULL; 3021 } 3022 } 3023 3024 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); 3025 { size_t const cSize = frame ? 3026 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : 3027 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); 3028 FORWARD_IF_ERROR(cSize); 3029 cctx->consumedSrcSize += srcSize; 3030 cctx->producedCSize += (cSize + fhSize); 3031 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); 3032 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ 3033 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); 3034 RETURN_ERROR_IF( 3035 cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, 3036 srcSize_wrong, 3037 "error : pledgedSrcSize = %u, while realSrcSize >= %u", 3038 (unsigned)cctx->pledgedSrcSizePlusOne-1, 3039 (unsigned)cctx->consumedSrcSize); 3040 } 3041 return cSize + fhSize; 3042 } 3043 } 3044 3045 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, 3046 void* dst, size_t dstCapacity, 3047 const void* src, size_t srcSize) 3048 { 3049 DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); 3050 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); 3051 } 3052 3053 3054 size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) 3055 { 3056 ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; 3057 assert(!ZSTD_checkCParams(cParams)); 3058 return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); 3059 } 3060 3061 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) 3062 { 3063 size_t const blockSizeMax = ZSTD_getBlockSize(cctx); 3064 RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); 3065 3066 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); 3067 } 3068 3069 /*! ZSTD_loadDictionaryContent() : 3070 * @return : 0, or an error code 3071 */ 3072 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, 3073 ZSTD_CCtx_params const* params, 3074 const void* src, size_t srcSize, 3075 ZSTD_dictTableLoadMethod_e dtlm) 3076 { 3077 const BYTE* const ip = (const BYTE*) src; 3078 const BYTE* const iend = ip + srcSize; 3079 3080 ZSTD_window_update(&ms->window, src, srcSize); 3081 ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); 3082 3083 /* Assert that we the ms params match the params we're being given */ 3084 ZSTD_assertEqualCParams(params->cParams, ms->cParams); 3085 3086 if (srcSize <= HASH_READ_SIZE) return 0; 3087 3088 switch(params->cParams.strategy) 3089 { 3090 case ZSTD_fast: 3091 ZSTD_fillHashTable(ms, iend, dtlm); 3092 break; 3093 case ZSTD_dfast: 3094 ZSTD_fillDoubleHashTable(ms, iend, dtlm); 3095 break; 3096 3097 case ZSTD_greedy: 3098 case ZSTD_lazy: 3099 case ZSTD_lazy2: 3100 if (srcSize >= HASH_READ_SIZE) 3101 ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); 3102 break; 3103 3104 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ 3105 case ZSTD_btopt: 3106 case ZSTD_btultra: 3107 case ZSTD_btultra2: 3108 if (srcSize >= HASH_READ_SIZE) 3109 ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); 3110 break; 3111 3112 default: 3113 assert(0); /* not possible : not a valid strategy id */ 3114 } 3115 3116 ms->nextToUpdate = (U32)(iend - ms->window.base); 3117 return 0; 3118 } 3119 3120 3121 /* Dictionaries that assign zero probability to symbols that show up causes problems 3122 when FSE encoding. Refuse dictionaries that assign zero probability to symbols 3123 that we may encounter during compression. 3124 NOTE: This behavior is not standard and could be improved in the future. */ 3125 static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { 3126 U32 s; 3127 RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted); 3128 for (s = 0; s <= maxSymbolValue; ++s) { 3129 RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted); 3130 } 3131 return 0; 3132 } 3133 3134 3135 /* Dictionary format : 3136 * See : 3137 * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format 3138 */ 3139 /*! ZSTD_loadZstdDictionary() : 3140 * @return : dictID, or an error code 3141 * assumptions : magic number supposed already checked 3142 * dictSize supposed > 8 3143 */ 3144 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, 3145 ZSTD_matchState_t* ms, 3146 ZSTD_CCtx_params const* params, 3147 const void* dict, size_t dictSize, 3148 ZSTD_dictTableLoadMethod_e dtlm, 3149 void* workspace) 3150 { 3151 const BYTE* dictPtr = (const BYTE*)dict; 3152 const BYTE* const dictEnd = dictPtr + dictSize; 3153 short offcodeNCount[MaxOff+1]; 3154 unsigned offcodeMaxValue = MaxOff; 3155 size_t dictID; 3156 3157 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); 3158 assert(dictSize > 8); 3159 assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); 3160 3161 dictPtr += 4; /* skip magic number */ 3162 dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); 3163 dictPtr += 4; 3164 3165 { unsigned maxSymbolValue = 255; 3166 size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); 3167 RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted); 3168 RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted); 3169 dictPtr += hufHeaderSize; 3170 } 3171 3172 { unsigned offcodeLog; 3173 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); 3174 RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); 3175 RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); 3176 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ 3177 /* fill all offset symbols to avoid garbage at end of table */ 3178 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 3179 bs->entropy.fse.offcodeCTable, 3180 offcodeNCount, MaxOff, offcodeLog, 3181 workspace, HUF_WORKSPACE_SIZE)), 3182 dictionary_corrupted); 3183 dictPtr += offcodeHeaderSize; 3184 } 3185 3186 { short matchlengthNCount[MaxML+1]; 3187 unsigned matchlengthMaxValue = MaxML, matchlengthLog; 3188 size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); 3189 RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); 3190 RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); 3191 /* Every match length code must have non-zero probability */ 3192 FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); 3193 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 3194 bs->entropy.fse.matchlengthCTable, 3195 matchlengthNCount, matchlengthMaxValue, matchlengthLog, 3196 workspace, HUF_WORKSPACE_SIZE)), 3197 dictionary_corrupted); 3198 dictPtr += matchlengthHeaderSize; 3199 } 3200 3201 { short litlengthNCount[MaxLL+1]; 3202 unsigned litlengthMaxValue = MaxLL, litlengthLog; 3203 size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); 3204 RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); 3205 RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); 3206 /* Every literal length code must have non-zero probability */ 3207 FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); 3208 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 3209 bs->entropy.fse.litlengthCTable, 3210 litlengthNCount, litlengthMaxValue, litlengthLog, 3211 workspace, HUF_WORKSPACE_SIZE)), 3212 dictionary_corrupted); 3213 dictPtr += litlengthHeaderSize; 3214 } 3215 3216 RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); 3217 bs->rep[0] = MEM_readLE32(dictPtr+0); 3218 bs->rep[1] = MEM_readLE32(dictPtr+4); 3219 bs->rep[2] = MEM_readLE32(dictPtr+8); 3220 dictPtr += 12; 3221 3222 { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); 3223 U32 offcodeMax = MaxOff; 3224 if (dictContentSize <= ((U32)-1) - 128 KB) { 3225 U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ 3226 offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ 3227 } 3228 /* All offset values <= dictContentSize + 128 KB must be representable */ 3229 FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); 3230 /* All repCodes must be <= dictContentSize and != 0*/ 3231 { U32 u; 3232 for (u=0; u<3; u++) { 3233 RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted); 3234 RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted); 3235 } } 3236 3237 bs->entropy.huf.repeatMode = HUF_repeat_valid; 3238 bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; 3239 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; 3240 bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; 3241 FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm)); 3242 return dictID; 3243 } 3244 } 3245 3246 /** ZSTD_compress_insertDictionary() : 3247 * @return : dictID, or an error code */ 3248 static size_t 3249 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, 3250 ZSTD_matchState_t* ms, 3251 const ZSTD_CCtx_params* params, 3252 const void* dict, size_t dictSize, 3253 ZSTD_dictContentType_e dictContentType, 3254 ZSTD_dictTableLoadMethod_e dtlm, 3255 void* workspace) 3256 { 3257 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); 3258 if ((dict==NULL) || (dictSize<=8)) return 0; 3259 3260 ZSTD_reset_compressedBlockState(bs); 3261 3262 /* dict restricted modes */ 3263 if (dictContentType == ZSTD_dct_rawContent) 3264 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); 3265 3266 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { 3267 if (dictContentType == ZSTD_dct_auto) { 3268 DEBUGLOG(4, "raw content dictionary detected"); 3269 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); 3270 } 3271 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); 3272 assert(0); /* impossible */ 3273 } 3274 3275 /* dict as full zstd dictionary */ 3276 return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace); 3277 } 3278 3279 /*! ZSTD_compressBegin_internal() : 3280 * @return : 0, or an error code */ 3281 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, 3282 const void* dict, size_t dictSize, 3283 ZSTD_dictContentType_e dictContentType, 3284 ZSTD_dictTableLoadMethod_e dtlm, 3285 const ZSTD_CDict* cdict, 3286 ZSTD_CCtx_params params, U64 pledgedSrcSize, 3287 ZSTD_buffered_policy_e zbuff) 3288 { 3289 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog); 3290 /* params are supposed to be fully validated at this point */ 3291 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 3292 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 3293 3294 if (cdict && cdict->dictContentSize>0) { 3295 return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); 3296 } 3297 3298 FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, 3299 ZSTDcrp_continue, zbuff) ); 3300 { 3301 size_t const dictID = ZSTD_compress_insertDictionary( 3302 cctx->blockState.prevCBlock, &cctx->blockState.matchState, 3303 ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); 3304 FORWARD_IF_ERROR(dictID); 3305 assert(dictID <= (size_t)(U32)-1); 3306 cctx->dictID = (U32)dictID; 3307 } 3308 return 0; 3309 } 3310 3311 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, 3312 const void* dict, size_t dictSize, 3313 ZSTD_dictContentType_e dictContentType, 3314 ZSTD_dictTableLoadMethod_e dtlm, 3315 const ZSTD_CDict* cdict, 3316 ZSTD_CCtx_params params, 3317 unsigned long long pledgedSrcSize) 3318 { 3319 DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog); 3320 /* compression parameters verification and optimization */ 3321 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); 3322 return ZSTD_compressBegin_internal(cctx, 3323 dict, dictSize, dictContentType, dtlm, 3324 cdict, 3325 params, pledgedSrcSize, 3326 ZSTDb_not_buffered); 3327 } 3328 3329 /*! ZSTD_compressBegin_advanced() : 3330 * @return : 0, or an error code */ 3331 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, 3332 const void* dict, size_t dictSize, 3333 ZSTD_parameters params, unsigned long long pledgedSrcSize) 3334 { 3335 ZSTD_CCtx_params const cctxParams = 3336 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); 3337 return ZSTD_compressBegin_advanced_internal(cctx, 3338 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, 3339 NULL /*cdict*/, 3340 cctxParams, pledgedSrcSize); 3341 } 3342 3343 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) 3344 { 3345 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); 3346 ZSTD_CCtx_params const cctxParams = 3347 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); 3348 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); 3349 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, 3350 cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); 3351 } 3352 3353 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) 3354 { 3355 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); 3356 } 3357 3358 3359 /*! ZSTD_writeEpilogue() : 3360 * Ends a frame. 3361 * @return : nb of bytes written into dst (or an error code) */ 3362 static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) 3363 { 3364 BYTE* const ostart = (BYTE*)dst; 3365 BYTE* op = ostart; 3366 size_t fhSize = 0; 3367 3368 DEBUGLOG(4, "ZSTD_writeEpilogue"); 3369 RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); 3370 3371 /* special case : empty frame */ 3372 if (cctx->stage == ZSTDcs_init) { 3373 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0); 3374 FORWARD_IF_ERROR(fhSize); 3375 dstCapacity -= fhSize; 3376 op += fhSize; 3377 cctx->stage = ZSTDcs_ongoing; 3378 } 3379 3380 if (cctx->stage != ZSTDcs_ending) { 3381 /* write one last empty block, make it the "last" block */ 3382 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; 3383 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); 3384 MEM_writeLE32(op, cBlockHeader24); 3385 op += ZSTD_blockHeaderSize; 3386 dstCapacity -= ZSTD_blockHeaderSize; 3387 } 3388 3389 if (cctx->appliedParams.fParams.checksumFlag) { 3390 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); 3391 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); 3392 DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); 3393 MEM_writeLE32(op, checksum); 3394 op += 4; 3395 } 3396 3397 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ 3398 return op-ostart; 3399 } 3400 3401 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, 3402 void* dst, size_t dstCapacity, 3403 const void* src, size_t srcSize) 3404 { 3405 size_t endResult; 3406 size_t const cSize = ZSTD_compressContinue_internal(cctx, 3407 dst, dstCapacity, src, srcSize, 3408 1 /* frame mode */, 1 /* last chunk */); 3409 FORWARD_IF_ERROR(cSize); 3410 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); 3411 FORWARD_IF_ERROR(endResult); 3412 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); 3413 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ 3414 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); 3415 DEBUGLOG(4, "end of frame : controlling src size"); 3416 RETURN_ERROR_IF( 3417 cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, 3418 srcSize_wrong, 3419 "error : pledgedSrcSize = %u, while realSrcSize = %u", 3420 (unsigned)cctx->pledgedSrcSizePlusOne-1, 3421 (unsigned)cctx->consumedSrcSize); 3422 } 3423 return cSize + endResult; 3424 } 3425 3426 3427 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, 3428 void* dst, size_t dstCapacity, 3429 const void* src, size_t srcSize, 3430 const void* dict,size_t dictSize, 3431 ZSTD_parameters params) 3432 { 3433 ZSTD_CCtx_params const cctxParams = 3434 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); 3435 DEBUGLOG(4, "ZSTD_compress_internal"); 3436 return ZSTD_compress_advanced_internal(cctx, 3437 dst, dstCapacity, 3438 src, srcSize, 3439 dict, dictSize, 3440 cctxParams); 3441 } 3442 3443 size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, 3444 void* dst, size_t dstCapacity, 3445 const void* src, size_t srcSize, 3446 const void* dict,size_t dictSize, 3447 ZSTD_parameters params) 3448 { 3449 DEBUGLOG(4, "ZSTD_compress_advanced"); 3450 FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams)); 3451 return ZSTD_compress_internal(cctx, 3452 dst, dstCapacity, 3453 src, srcSize, 3454 dict, dictSize, 3455 params); 3456 } 3457 3458 /* Internal */ 3459 size_t ZSTD_compress_advanced_internal( 3460 ZSTD_CCtx* cctx, 3461 void* dst, size_t dstCapacity, 3462 const void* src, size_t srcSize, 3463 const void* dict,size_t dictSize, 3464 ZSTD_CCtx_params params) 3465 { 3466 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); 3467 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, 3468 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, 3469 params, srcSize, ZSTDb_not_buffered) ); 3470 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 3471 } 3472 3473 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, 3474 void* dst, size_t dstCapacity, 3475 const void* src, size_t srcSize, 3476 const void* dict, size_t dictSize, 3477 int compressionLevel) 3478 { 3479 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0); 3480 ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); 3481 assert(params.fParams.contentSizeFlag == 1); 3482 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams); 3483 } 3484 3485 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, 3486 void* dst, size_t dstCapacity, 3487 const void* src, size_t srcSize, 3488 int compressionLevel) 3489 { 3490 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); 3491 assert(cctx != NULL); 3492 return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); 3493 } 3494 3495 size_t ZSTD_compress(void* dst, size_t dstCapacity, 3496 const void* src, size_t srcSize, 3497 int compressionLevel) 3498 { 3499 size_t result; 3500 ZSTD_CCtx ctxBody; 3501 ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem); 3502 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); 3503 ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ 3504 return result; 3505 } 3506 3507 3508 /* ===== Dictionary API ===== */ 3509 3510 /*! ZSTD_estimateCDictSize_advanced() : 3511 * Estimate amount of memory that will be needed to create a dictionary with following arguments */ 3512 size_t ZSTD_estimateCDictSize_advanced( 3513 size_t dictSize, ZSTD_compressionParameters cParams, 3514 ZSTD_dictLoadMethod_e dictLoadMethod) 3515 { 3516 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); 3517 return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) 3518 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); 3519 } 3520 3521 size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) 3522 { 3523 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); 3524 return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); 3525 } 3526 3527 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) 3528 { 3529 if (cdict==NULL) return 0; /* support sizeof on NULL */ 3530 DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); 3531 return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict); 3532 } 3533 3534 static size_t ZSTD_initCDict_internal( 3535 ZSTD_CDict* cdict, 3536 const void* dictBuffer, size_t dictSize, 3537 ZSTD_dictLoadMethod_e dictLoadMethod, 3538 ZSTD_dictContentType_e dictContentType, 3539 ZSTD_compressionParameters cParams) 3540 { 3541 DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); 3542 assert(!ZSTD_checkCParams(cParams)); 3543 cdict->matchState.cParams = cParams; 3544 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { 3545 cdict->dictBuffer = NULL; 3546 cdict->dictContent = dictBuffer; 3547 } else { 3548 void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem); 3549 cdict->dictBuffer = internalBuffer; 3550 cdict->dictContent = internalBuffer; 3551 RETURN_ERROR_IF(!internalBuffer, memory_allocation); 3552 memcpy(internalBuffer, dictBuffer, dictSize); 3553 } 3554 cdict->dictContentSize = dictSize; 3555 3556 /* Reset the state to no dictionary */ 3557 ZSTD_reset_compressedBlockState(&cdict->cBlockState); 3558 { void* const end = ZSTD_reset_matchState( 3559 &cdict->matchState, 3560 (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, 3561 &cParams, ZSTDcrp_continue, /* forCCtx */ 0); 3562 assert(end == (char*)cdict->workspace + cdict->workspaceSize); 3563 (void)end; 3564 } 3565 /* (Maybe) load the dictionary 3566 * Skips loading the dictionary if it is <= 8 bytes. 3567 */ 3568 { ZSTD_CCtx_params params; 3569 memset(¶ms, 0, sizeof(params)); 3570 params.compressionLevel = ZSTD_CLEVEL_DEFAULT; 3571 params.fParams.contentSizeFlag = 1; 3572 params.cParams = cParams; 3573 { size_t const dictID = ZSTD_compress_insertDictionary( 3574 &cdict->cBlockState, &cdict->matchState, ¶ms, 3575 cdict->dictContent, cdict->dictContentSize, 3576 dictContentType, ZSTD_dtlm_full, cdict->workspace); 3577 FORWARD_IF_ERROR(dictID); 3578 assert(dictID <= (size_t)(U32)-1); 3579 cdict->dictID = (U32)dictID; 3580 } 3581 } 3582 3583 return 0; 3584 } 3585 3586 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, 3587 ZSTD_dictLoadMethod_e dictLoadMethod, 3588 ZSTD_dictContentType_e dictContentType, 3589 ZSTD_compressionParameters cParams, ZSTD_customMem customMem) 3590 { 3591 DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType); 3592 if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 3593 3594 { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem); 3595 size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); 3596 void* const workspace = ZSTD_malloc(workspaceSize, customMem); 3597 3598 if (!cdict || !workspace) { 3599 ZSTD_free(cdict, customMem); 3600 ZSTD_free(workspace, customMem); 3601 return NULL; 3602 } 3603 cdict->customMem = customMem; 3604 cdict->workspace = workspace; 3605 cdict->workspaceSize = workspaceSize; 3606 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 3607 dictBuffer, dictSize, 3608 dictLoadMethod, dictContentType, 3609 cParams) )) { 3610 ZSTD_freeCDict(cdict); 3611 return NULL; 3612 } 3613 3614 return cdict; 3615 } 3616 } 3617 3618 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) 3619 { 3620 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); 3621 return ZSTD_createCDict_advanced(dict, dictSize, 3622 ZSTD_dlm_byCopy, ZSTD_dct_auto, 3623 cParams, ZSTD_defaultCMem); 3624 } 3625 3626 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) 3627 { 3628 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); 3629 return ZSTD_createCDict_advanced(dict, dictSize, 3630 ZSTD_dlm_byRef, ZSTD_dct_auto, 3631 cParams, ZSTD_defaultCMem); 3632 } 3633 3634 size_t ZSTD_freeCDict(ZSTD_CDict* cdict) 3635 { 3636 if (cdict==NULL) return 0; /* support free on NULL */ 3637 { ZSTD_customMem const cMem = cdict->customMem; 3638 ZSTD_free(cdict->workspace, cMem); 3639 ZSTD_free(cdict->dictBuffer, cMem); 3640 ZSTD_free(cdict, cMem); 3641 return 0; 3642 } 3643 } 3644 3645 /*! ZSTD_initStaticCDict_advanced() : 3646 * Generate a digested dictionary in provided memory area. 3647 * workspace: The memory area to emplace the dictionary into. 3648 * Provided pointer must 8-bytes aligned. 3649 * It must outlive dictionary usage. 3650 * workspaceSize: Use ZSTD_estimateCDictSize() 3651 * to determine how large workspace must be. 3652 * cParams : use ZSTD_getCParams() to transform a compression level 3653 * into its relevants cParams. 3654 * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) 3655 * Note : there is no corresponding "free" function. 3656 * Since workspace was allocated externally, it must be freed externally. 3657 */ 3658 const ZSTD_CDict* ZSTD_initStaticCDict( 3659 void* workspace, size_t workspaceSize, 3660 const void* dict, size_t dictSize, 3661 ZSTD_dictLoadMethod_e dictLoadMethod, 3662 ZSTD_dictContentType_e dictContentType, 3663 ZSTD_compressionParameters cParams) 3664 { 3665 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); 3666 size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize) 3667 + HUF_WORKSPACE_SIZE + matchStateSize; 3668 ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace; 3669 void* ptr; 3670 if ((size_t)workspace & 7) return NULL; /* 8-aligned */ 3671 DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", 3672 (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); 3673 if (workspaceSize < neededSize) return NULL; 3674 3675 if (dictLoadMethod == ZSTD_dlm_byCopy) { 3676 memcpy(cdict+1, dict, dictSize); 3677 dict = cdict+1; 3678 ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize; 3679 } else { 3680 ptr = cdict+1; 3681 } 3682 cdict->workspace = ptr; 3683 cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize; 3684 3685 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 3686 dict, dictSize, 3687 ZSTD_dlm_byRef, dictContentType, 3688 cParams) )) 3689 return NULL; 3690 3691 return cdict; 3692 } 3693 3694 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) 3695 { 3696 assert(cdict != NULL); 3697 return cdict->matchState.cParams; 3698 } 3699 3700 /* ZSTD_compressBegin_usingCDict_advanced() : 3701 * cdict must be != NULL */ 3702 size_t ZSTD_compressBegin_usingCDict_advanced( 3703 ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, 3704 ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) 3705 { 3706 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); 3707 RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); 3708 { ZSTD_CCtx_params params = cctx->requestedParams; 3709 params.cParams = ZSTD_getCParamsFromCDict(cdict); 3710 /* Increase window log to fit the entire dictionary and source if the 3711 * source size is known. Limit the increase to 19, which is the 3712 * window log for compression level 1 with the largest source size. 3713 */ 3714 if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { 3715 U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); 3716 U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; 3717 params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog); 3718 } 3719 params.fParams = fParams; 3720 return ZSTD_compressBegin_internal(cctx, 3721 NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, 3722 cdict, 3723 params, pledgedSrcSize, 3724 ZSTDb_not_buffered); 3725 } 3726 } 3727 3728 /* ZSTD_compressBegin_usingCDict() : 3729 * pledgedSrcSize=0 means "unknown" 3730 * if pledgedSrcSize>0, it will enable contentSizeFlag */ 3731 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 3732 { 3733 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 3734 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); 3735 return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); 3736 } 3737 3738 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, 3739 void* dst, size_t dstCapacity, 3740 const void* src, size_t srcSize, 3741 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) 3742 { 3743 FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */ 3744 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 3745 } 3746 3747 /*! ZSTD_compress_usingCDict() : 3748 * Compression using a digested Dictionary. 3749 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. 3750 * Note that compression parameters are decided at CDict creation time 3751 * while frame parameters are hardcoded */ 3752 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, 3753 void* dst, size_t dstCapacity, 3754 const void* src, size_t srcSize, 3755 const ZSTD_CDict* cdict) 3756 { 3757 ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 3758 return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); 3759 } 3760 3761 3762 3763 /* ****************************************************************** 3764 * Streaming 3765 ********************************************************************/ 3766 3767 ZSTD_CStream* ZSTD_createCStream(void) 3768 { 3769 DEBUGLOG(3, "ZSTD_createCStream"); 3770 return ZSTD_createCStream_advanced(ZSTD_defaultCMem); 3771 } 3772 3773 ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) 3774 { 3775 return ZSTD_initStaticCCtx(workspace, workspaceSize); 3776 } 3777 3778 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) 3779 { /* CStream and CCtx are now same object */ 3780 return ZSTD_createCCtx_advanced(customMem); 3781 } 3782 3783 size_t ZSTD_freeCStream(ZSTD_CStream* zcs) 3784 { 3785 return ZSTD_freeCCtx(zcs); /* same object */ 3786 } 3787 3788 3789 3790 /*====== Initialization ======*/ 3791 3792 size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } 3793 3794 size_t ZSTD_CStreamOutSize(void) 3795 { 3796 return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; 3797 } 3798 3799 static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, 3800 const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType, 3801 const ZSTD_CDict* const cdict, 3802 ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize) 3803 { 3804 DEBUGLOG(4, "ZSTD_resetCStream_internal"); 3805 /* Finalize the compression parameters */ 3806 params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, dictSize); 3807 /* params are supposed to be fully validated at this point */ 3808 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 3809 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 3810 3811 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, 3812 dict, dictSize, dictContentType, ZSTD_dtlm_fast, 3813 cdict, 3814 params, pledgedSrcSize, 3815 ZSTDb_buffered) ); 3816 3817 cctx->inToCompress = 0; 3818 cctx->inBuffPos = 0; 3819 cctx->inBuffTarget = cctx->blockSize 3820 + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ 3821 cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; 3822 cctx->streamStage = zcss_load; 3823 cctx->frameEnded = 0; 3824 return 0; /* ready to go */ 3825 } 3826 3827 /* ZSTD_resetCStream(): 3828 * pledgedSrcSize == 0 means "unknown" */ 3829 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) 3830 { 3831 /* temporary : 0 interpreted as "unknown" during transition period. 3832 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. 3833 * 0 will be interpreted as "empty" in the future. 3834 */ 3835 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 3836 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); 3837 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3838 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); 3839 return 0; 3840 } 3841 3842 /*! ZSTD_initCStream_internal() : 3843 * Note : for lib/compress only. Used by zstdmt_compress.c. 3844 * Assumption 1 : params are valid 3845 * Assumption 2 : either dict, or cdict, is defined, not both */ 3846 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, 3847 const void* dict, size_t dictSize, const ZSTD_CDict* cdict, 3848 ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) 3849 { 3850 DEBUGLOG(4, "ZSTD_initCStream_internal"); 3851 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3852 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); 3853 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 3854 zcs->requestedParams = params; 3855 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 3856 if (dict) { 3857 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); 3858 } else { 3859 /* Dictionary is cleared if !cdict */ 3860 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); 3861 } 3862 return 0; 3863 } 3864 3865 /* ZSTD_initCStream_usingCDict_advanced() : 3866 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ 3867 size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, 3868 const ZSTD_CDict* cdict, 3869 ZSTD_frameParameters fParams, 3870 unsigned long long pledgedSrcSize) 3871 { 3872 DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); 3873 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3874 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); 3875 zcs->requestedParams.fParams = fParams; 3876 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); 3877 return 0; 3878 } 3879 3880 /* note : cdict must outlive compression session */ 3881 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) 3882 { 3883 DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); 3884 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3885 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); 3886 return 0; 3887 } 3888 3889 3890 /* ZSTD_initCStream_advanced() : 3891 * pledgedSrcSize must be exact. 3892 * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. 3893 * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */ 3894 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, 3895 const void* dict, size_t dictSize, 3896 ZSTD_parameters params, unsigned long long pss) 3897 { 3898 /* for compatibility with older programs relying on this behavior. 3899 * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. 3900 * This line will be removed in the future. 3901 */ 3902 U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 3903 DEBUGLOG(4, "ZSTD_initCStream_advanced"); 3904 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3905 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); 3906 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); 3907 zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); 3908 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); 3909 return 0; 3910 } 3911 3912 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) 3913 { 3914 DEBUGLOG(4, "ZSTD_initCStream_usingDict"); 3915 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3916 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); 3917 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); 3918 return 0; 3919 } 3920 3921 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) 3922 { 3923 /* temporary : 0 interpreted as "unknown" during transition period. 3924 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. 3925 * 0 will be interpreted as "empty" in the future. 3926 */ 3927 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 3928 DEBUGLOG(4, "ZSTD_initCStream_srcSize"); 3929 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3930 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); 3931 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); 3932 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); 3933 return 0; 3934 } 3935 3936 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) 3937 { 3938 DEBUGLOG(4, "ZSTD_initCStream"); 3939 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); 3940 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); 3941 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); 3942 return 0; 3943 } 3944 3945 /*====== Compression ======*/ 3946 3947 static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) 3948 { 3949 size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; 3950 if (hintInSize==0) hintInSize = cctx->blockSize; 3951 return hintInSize; 3952 } 3953 3954 static size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, 3955 const void* src, size_t srcSize) 3956 { 3957 size_t const length = MIN(dstCapacity, srcSize); 3958 if (length) memcpy(dst, src, length); 3959 return length; 3960 } 3961 3962 /** ZSTD_compressStream_generic(): 3963 * internal function for all *compressStream*() variants 3964 * non-static, because can be called from zstdmt_compress.c 3965 * @return : hint size for next input */ 3966 static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, 3967 ZSTD_outBuffer* output, 3968 ZSTD_inBuffer* input, 3969 ZSTD_EndDirective const flushMode) 3970 { 3971 const char* const istart = (const char*)input->src; 3972 const char* const iend = istart + input->size; 3973 const char* ip = istart + input->pos; 3974 char* const ostart = (char*)output->dst; 3975 char* const oend = ostart + output->size; 3976 char* op = ostart + output->pos; 3977 U32 someMoreWork = 1; 3978 3979 /* check expectations */ 3980 DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); 3981 assert(zcs->inBuff != NULL); 3982 assert(zcs->inBuffSize > 0); 3983 assert(zcs->outBuff != NULL); 3984 assert(zcs->outBuffSize > 0); 3985 assert(output->pos <= output->size); 3986 assert(input->pos <= input->size); 3987 3988 while (someMoreWork) { 3989 switch(zcs->streamStage) 3990 { 3991 case zcss_init: 3992 RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); 3993 3994 case zcss_load: 3995 if ( (flushMode == ZSTD_e_end) 3996 && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */ 3997 && (zcs->inBuffPos == 0) ) { 3998 /* shortcut to compression pass directly into output buffer */ 3999 size_t const cSize = ZSTD_compressEnd(zcs, 4000 op, oend-op, ip, iend-ip); 4001 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); 4002 FORWARD_IF_ERROR(cSize); 4003 ip = iend; 4004 op += cSize; 4005 zcs->frameEnded = 1; 4006 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 4007 someMoreWork = 0; break; 4008 } 4009 /* complete loading into inBuffer */ 4010 { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; 4011 size_t const loaded = ZSTD_limitCopy( 4012 zcs->inBuff + zcs->inBuffPos, toLoad, 4013 ip, iend-ip); 4014 zcs->inBuffPos += loaded; 4015 ip += loaded; 4016 if ( (flushMode == ZSTD_e_continue) 4017 && (zcs->inBuffPos < zcs->inBuffTarget) ) { 4018 /* not enough input to fill full block : stop here */ 4019 someMoreWork = 0; break; 4020 } 4021 if ( (flushMode == ZSTD_e_flush) 4022 && (zcs->inBuffPos == zcs->inToCompress) ) { 4023 /* empty */ 4024 someMoreWork = 0; break; 4025 } 4026 } 4027 /* compress current block (note : this stage cannot be stopped in the middle) */ 4028 DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); 4029 { void* cDst; 4030 size_t cSize; 4031 size_t const iSize = zcs->inBuffPos - zcs->inToCompress; 4032 size_t oSize = oend-op; 4033 unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); 4034 if (oSize >= ZSTD_compressBound(iSize)) 4035 cDst = op; /* compress into output buffer, to skip flush stage */ 4036 else 4037 cDst = zcs->outBuff, oSize = zcs->outBuffSize; 4038 cSize = lastBlock ? 4039 ZSTD_compressEnd(zcs, cDst, oSize, 4040 zcs->inBuff + zcs->inToCompress, iSize) : 4041 ZSTD_compressContinue(zcs, cDst, oSize, 4042 zcs->inBuff + zcs->inToCompress, iSize); 4043 FORWARD_IF_ERROR(cSize); 4044 zcs->frameEnded = lastBlock; 4045 /* prepare next block */ 4046 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; 4047 if (zcs->inBuffTarget > zcs->inBuffSize) 4048 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; 4049 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", 4050 (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); 4051 if (!lastBlock) 4052 assert(zcs->inBuffTarget <= zcs->inBuffSize); 4053 zcs->inToCompress = zcs->inBuffPos; 4054 if (cDst == op) { /* no need to flush */ 4055 op += cSize; 4056 if (zcs->frameEnded) { 4057 DEBUGLOG(5, "Frame completed directly in outBuffer"); 4058 someMoreWork = 0; 4059 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 4060 } 4061 break; 4062 } 4063 zcs->outBuffContentSize = cSize; 4064 zcs->outBuffFlushedSize = 0; 4065 zcs->streamStage = zcss_flush; /* pass-through to flush stage */ 4066 } 4067 /* fall-through */ 4068 case zcss_flush: 4069 DEBUGLOG(5, "flush stage"); 4070 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; 4071 size_t const flushed = ZSTD_limitCopy(op, oend-op, 4072 zcs->outBuff + zcs->outBuffFlushedSize, toFlush); 4073 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", 4074 (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); 4075 op += flushed; 4076 zcs->outBuffFlushedSize += flushed; 4077 if (toFlush!=flushed) { 4078 /* flush not fully completed, presumably because dst is too small */ 4079 assert(op==oend); 4080 someMoreWork = 0; 4081 break; 4082 } 4083 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; 4084 if (zcs->frameEnded) { 4085 DEBUGLOG(5, "Frame completed on flush"); 4086 someMoreWork = 0; 4087 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 4088 break; 4089 } 4090 zcs->streamStage = zcss_load; 4091 break; 4092 } 4093 4094 default: /* impossible */ 4095 assert(0); 4096 } 4097 } 4098 4099 input->pos = ip - istart; 4100 output->pos = op - ostart; 4101 if (zcs->frameEnded) return 0; 4102 return ZSTD_nextInputSizeHint(zcs); 4103 } 4104 4105 static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) 4106 { 4107 #ifdef ZSTD_MULTITHREAD 4108 if (cctx->appliedParams.nbWorkers >= 1) { 4109 assert(cctx->mtctx != NULL); 4110 return ZSTDMT_nextInputSizeHint(cctx->mtctx); 4111 } 4112 #endif 4113 return ZSTD_nextInputSizeHint(cctx); 4114 4115 } 4116 4117 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) 4118 { 4119 FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) ); 4120 return ZSTD_nextInputSizeHint_MTorST(zcs); 4121 } 4122 4123 4124 size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, 4125 ZSTD_outBuffer* output, 4126 ZSTD_inBuffer* input, 4127 ZSTD_EndDirective endOp) 4128 { 4129 DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); 4130 /* check conditions */ 4131 RETURN_ERROR_IF(output->pos > output->size, GENERIC); 4132 RETURN_ERROR_IF(input->pos > input->size, GENERIC); 4133 assert(cctx!=NULL); 4134 4135 /* transparent initialization stage */ 4136 if (cctx->streamStage == zcss_init) { 4137 ZSTD_CCtx_params params = cctx->requestedParams; 4138 ZSTD_prefixDict const prefixDict = cctx->prefixDict; 4139 FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) ); /* Init the local dict if present. */ 4140 memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ 4141 assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ 4142 DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); 4143 if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ 4144 params.cParams = ZSTD_getCParamsFromCCtxParams( 4145 &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); 4146 4147 4148 #ifdef ZSTD_MULTITHREAD 4149 if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { 4150 params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ 4151 } 4152 if (params.nbWorkers > 0) { 4153 /* mt context creation */ 4154 if (cctx->mtctx == NULL) { 4155 DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", 4156 params.nbWorkers); 4157 cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem); 4158 RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation); 4159 } 4160 /* mt compression */ 4161 DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); 4162 FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( 4163 cctx->mtctx, 4164 prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent, 4165 cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); 4166 cctx->streamStage = zcss_load; 4167 cctx->appliedParams.nbWorkers = params.nbWorkers; 4168 } else 4169 #endif 4170 { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx, 4171 prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, 4172 cctx->cdict, 4173 params, cctx->pledgedSrcSizePlusOne-1) ); 4174 assert(cctx->streamStage == zcss_load); 4175 assert(cctx->appliedParams.nbWorkers == 0); 4176 } } 4177 /* end of transparent initialization stage */ 4178 4179 /* compression stage */ 4180 #ifdef ZSTD_MULTITHREAD 4181 if (cctx->appliedParams.nbWorkers > 0) { 4182 int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end); 4183 size_t flushMin; 4184 assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */); 4185 if (cctx->cParamsChanged) { 4186 ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); 4187 cctx->cParamsChanged = 0; 4188 } 4189 do { 4190 flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); 4191 if ( ZSTD_isError(flushMin) 4192 || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ 4193 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); 4194 } 4195 FORWARD_IF_ERROR(flushMin); 4196 } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); 4197 DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); 4198 /* Either we don't require maximum forward progress, we've finished the 4199 * flush, or we are out of output space. 4200 */ 4201 assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size); 4202 return flushMin; 4203 } 4204 #endif 4205 FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) ); 4206 DEBUGLOG(5, "completed ZSTD_compressStream2"); 4207 return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ 4208 } 4209 4210 size_t ZSTD_compressStream2_simpleArgs ( 4211 ZSTD_CCtx* cctx, 4212 void* dst, size_t dstCapacity, size_t* dstPos, 4213 const void* src, size_t srcSize, size_t* srcPos, 4214 ZSTD_EndDirective endOp) 4215 { 4216 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; 4217 ZSTD_inBuffer input = { src, srcSize, *srcPos }; 4218 /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ 4219 size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); 4220 *dstPos = output.pos; 4221 *srcPos = input.pos; 4222 return cErr; 4223 } 4224 4225 size_t ZSTD_compress2(ZSTD_CCtx* cctx, 4226 void* dst, size_t dstCapacity, 4227 const void* src, size_t srcSize) 4228 { 4229 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); 4230 { size_t oPos = 0; 4231 size_t iPos = 0; 4232 size_t const result = ZSTD_compressStream2_simpleArgs(cctx, 4233 dst, dstCapacity, &oPos, 4234 src, srcSize, &iPos, 4235 ZSTD_e_end); 4236 FORWARD_IF_ERROR(result); 4237 if (result != 0) { /* compression not completed, due to lack of output space */ 4238 assert(oPos == dstCapacity); 4239 RETURN_ERROR(dstSize_tooSmall); 4240 } 4241 assert(iPos == srcSize); /* all input is expected consumed */ 4242 return oPos; 4243 } 4244 } 4245 4246 /*====== Finalize ======*/ 4247 4248 /*! ZSTD_flushStream() : 4249 * @return : amount of data remaining to flush */ 4250 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 4251 { 4252 ZSTD_inBuffer input = { NULL, 0, 0 }; 4253 return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); 4254 } 4255 4256 4257 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 4258 { 4259 ZSTD_inBuffer input = { NULL, 0, 0 }; 4260 size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); 4261 FORWARD_IF_ERROR( remainingToFlush ); 4262 if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ 4263 /* single thread mode : attempt to calculate remaining to flush more precisely */ 4264 { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; 4265 size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4; 4266 size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; 4267 DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); 4268 return toFlush; 4269 } 4270 } 4271 4272 4273 /*-===== Pre-defined compression levels =====-*/ 4274 4275 #define ZSTD_MAX_CLEVEL 22 4276 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } 4277 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } 4278 4279 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { 4280 { /* "default" - for any srcSize > 256 KB */ 4281 /* W, C, H, S, L, TL, strat */ 4282 { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ 4283 { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ 4284 { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ 4285 { 21, 16, 17, 1, 5, 1, ZSTD_dfast }, /* level 3 */ 4286 { 21, 18, 18, 1, 5, 1, ZSTD_dfast }, /* level 4 */ 4287 { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ 4288 { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ 4289 { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ 4290 { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ 4291 { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ 4292 { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ 4293 { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ 4294 { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ 4295 { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ 4296 { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ 4297 { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ 4298 { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ 4299 { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ 4300 { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ 4301 { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ 4302 { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ 4303 { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ 4304 { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ 4305 }, 4306 { /* for srcSize <= 256 KB */ 4307 /* W, C, H, S, L, T, strat */ 4308 { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ 4309 { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ 4310 { 18, 14, 14, 1, 5, 1, ZSTD_dfast }, /* level 2 */ 4311 { 18, 16, 16, 1, 4, 1, ZSTD_dfast }, /* level 3 */ 4312 { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ 4313 { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ 4314 { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ 4315 { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ 4316 { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ 4317 { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ 4318 { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ 4319 { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ 4320 { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ 4321 { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ 4322 { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ 4323 { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ 4324 { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ 4325 { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ 4326 { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ 4327 { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ 4328 { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ 4329 { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ 4330 { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ 4331 }, 4332 { /* for srcSize <= 128 KB */ 4333 /* W, C, H, S, L, T, strat */ 4334 { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ 4335 { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ 4336 { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ 4337 { 17, 15, 16, 2, 5, 1, ZSTD_dfast }, /* level 3 */ 4338 { 17, 17, 17, 2, 4, 1, ZSTD_dfast }, /* level 4 */ 4339 { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ 4340 { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ 4341 { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ 4342 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ 4343 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ 4344 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ 4345 { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ 4346 { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ 4347 { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ 4348 { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ 4349 { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ 4350 { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ 4351 { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ 4352 { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ 4353 { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ 4354 { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ 4355 { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ 4356 { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ 4357 }, 4358 { /* for srcSize <= 16 KB */ 4359 /* W, C, H, S, L, T, strat */ 4360 { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ 4361 { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ 4362 { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ 4363 { 14, 14, 15, 2, 4, 1, ZSTD_dfast }, /* level 3 */ 4364 { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ 4365 { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ 4366 { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ 4367 { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ 4368 { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ 4369 { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ 4370 { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ 4371 { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ 4372 { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ 4373 { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ 4374 { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ 4375 { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ 4376 { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ 4377 { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ 4378 { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ 4379 { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ 4380 { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ 4381 { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ 4382 { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ 4383 }, 4384 }; 4385 4386 /*! ZSTD_getCParams() : 4387 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. 4388 * Size values are optional, provide 0 if not known or unused */ 4389 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) 4390 { 4391 size_t const addedSize = srcSizeHint ? 0 : 500; 4392 U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : ZSTD_CONTENTSIZE_UNKNOWN; /* intentional overflow for srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN */ 4393 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); 4394 int row = compressionLevel; 4395 DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel); 4396 if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ 4397 if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ 4398 if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; 4399 { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; 4400 if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ 4401 return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); /* refine parameters based on srcSize & dictSize */ 4402 } 4403 } 4404 4405 /*! ZSTD_getParams() : 4406 * same idea as ZSTD_getCParams() 4407 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). 4408 * Fields of `ZSTD_frameParameters` are set to default values */ 4409 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { 4410 ZSTD_parameters params; 4411 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); 4412 DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); 4413 memset(¶ms, 0, sizeof(params)); 4414 params.cParams = cParams; 4415 params.fParams.contentSizeFlag = 1; 4416 return params; 4417 } 4418