1 /* 2 * Copyright (c) Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 /*-************************************* 12 * Dependencies 13 ***************************************/ 14 #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ 15 #include "../common/mem.h" 16 #include "hist.h" /* HIST_countFast_wksp */ 17 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ 18 #include "../common/fse.h" 19 #define HUF_STATIC_LINKING_ONLY 20 #include "../common/huf.h" 21 #include "zstd_compress_internal.h" 22 #include "zstd_compress_sequences.h" 23 #include "zstd_compress_literals.h" 24 #include "zstd_fast.h" 25 #include "zstd_double_fast.h" 26 #include "zstd_lazy.h" 27 #include "zstd_opt.h" 28 #include "zstd_ldm.h" 29 #include "zstd_compress_superblock.h" 30 31 /* *************************************************************** 32 * Tuning parameters 33 *****************************************************************/ 34 /*! 35 * COMPRESS_HEAPMODE : 36 * Select how default decompression function ZSTD_compress() allocates its context, 37 * on stack (0, default), or into heap (1). 38 * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected. 39 */ 40 41 /*! 42 * ZSTD_HASHLOG3_MAX : 43 * Maximum size of the hash table dedicated to find 3-bytes matches, 44 * in log format, aka 17 => 1 << 17 == 128Ki positions. 45 * This structure is only used in zstd_opt. 46 * Since allocation is centralized for all strategies, it has to be known here. 47 * The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3, 48 * so that zstd_opt.c doesn't need to know about this constant. 49 */ 50 #ifndef ZSTD_HASHLOG3_MAX 51 # define ZSTD_HASHLOG3_MAX 17 52 #endif 53 54 /*-************************************* 55 * Helper functions 56 ***************************************/ 57 /* ZSTD_compressBound() 58 * Note that the result from this function is only compatible with the "normal" 59 * full-block strategy. 60 * When there are a lot of small blocks due to frequent flush in streaming mode 61 * the overhead of headers can make the compressed data to be larger than the 62 * return value of ZSTD_compressBound(). 63 */ 64 size_t ZSTD_compressBound(size_t srcSize) { 65 return ZSTD_COMPRESSBOUND(srcSize); 66 } 67 68 69 /*-************************************* 70 * Context memory management 71 ***************************************/ 72 struct ZSTD_CDict_s { 73 const void* dictContent; 74 size_t dictContentSize; 75 ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ 76 U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ 77 ZSTD_cwksp workspace; 78 ZSTD_matchState_t matchState; 79 ZSTD_compressedBlockState_t cBlockState; 80 ZSTD_customMem customMem; 81 U32 dictID; 82 int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ 83 ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use 84 * row-based matchfinder. Unless the cdict is reloaded, we will use 85 * the same greedy/lazy matchfinder at compression time. 86 */ 87 }; /* typedef'd to ZSTD_CDict within "zstd.h" */ 88 89 ZSTD_CCtx* ZSTD_createCCtx(void) 90 { 91 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); 92 } 93 94 static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) 95 { 96 assert(cctx != NULL); 97 ZSTD_memset(cctx, 0, sizeof(*cctx)); 98 cctx->customMem = memManager; 99 cctx->bmi2 = ZSTD_cpuSupportsBmi2(); 100 { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); 101 assert(!ZSTD_isError(err)); 102 (void)err; 103 } 104 } 105 106 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) 107 { 108 ZSTD_STATIC_ASSERT(zcss_init==0); 109 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); 110 if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; 111 { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem); 112 if (!cctx) return NULL; 113 ZSTD_initCCtx(cctx, customMem); 114 return cctx; 115 } 116 } 117 118 ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) 119 { 120 ZSTD_cwksp ws; 121 ZSTD_CCtx* cctx; 122 if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ 123 if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ 124 ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); 125 126 cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); 127 if (cctx == NULL) return NULL; 128 129 ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx)); 130 ZSTD_cwksp_move(&cctx->workspace, &ws); 131 cctx->staticSize = workspaceSize; 132 133 /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ 134 if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; 135 cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); 136 cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); 137 cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE); 138 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); 139 return cctx; 140 } 141 142 /* 143 * Clears and frees all of the dictionaries in the CCtx. 144 */ 145 static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) 146 { 147 ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem); 148 ZSTD_freeCDict(cctx->localDict.cdict); 149 ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict)); 150 ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); 151 cctx->cdict = NULL; 152 } 153 154 static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) 155 { 156 size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; 157 size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); 158 return bufferSize + cdictSize; 159 } 160 161 static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) 162 { 163 assert(cctx != NULL); 164 assert(cctx->staticSize == 0); 165 ZSTD_clearAllDicts(cctx); 166 ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); 167 } 168 169 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) 170 { 171 if (cctx==NULL) return 0; /* support free on NULL */ 172 RETURN_ERROR_IF(cctx->staticSize, memory_allocation, 173 "not compatible with static CCtx"); 174 { 175 int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); 176 ZSTD_freeCCtxContent(cctx); 177 if (!cctxInWorkspace) { 178 ZSTD_customFree(cctx, cctx->customMem); 179 } 180 } 181 return 0; 182 } 183 184 185 static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) 186 { 187 (void)cctx; 188 return 0; 189 } 190 191 192 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) 193 { 194 if (cctx==NULL) return 0; /* support sizeof on NULL */ 195 /* cctx may be in the workspace */ 196 return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) 197 + ZSTD_cwksp_sizeof(&cctx->workspace) 198 + ZSTD_sizeof_localDict(cctx->localDict) 199 + ZSTD_sizeof_mtctx(cctx); 200 } 201 202 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) 203 { 204 return ZSTD_sizeof_CCtx(zcs); /* same object */ 205 } 206 207 /* private API call, for dictBuilder only */ 208 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } 209 210 /* Returns true if the strategy supports using a row based matchfinder */ 211 static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) { 212 return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2); 213 } 214 215 /* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder 216 * for this compression. 217 */ 218 static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) { 219 assert(mode != ZSTD_ps_auto); 220 return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable); 221 } 222 223 /* Returns row matchfinder usage given an initial mode and cParams */ 224 static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode, 225 const ZSTD_compressionParameters* const cParams) { 226 #if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON) 227 int const kHasSIMD128 = 1; 228 #else 229 int const kHasSIMD128 = 0; 230 #endif 231 if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */ 232 mode = ZSTD_ps_disable; 233 if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode; 234 if (kHasSIMD128) { 235 if (cParams->windowLog > 14) mode = ZSTD_ps_enable; 236 } else { 237 if (cParams->windowLog > 17) mode = ZSTD_ps_enable; 238 } 239 return mode; 240 } 241 242 /* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */ 243 static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode, 244 const ZSTD_compressionParameters* const cParams) { 245 if (mode != ZSTD_ps_auto) return mode; 246 return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable; 247 } 248 249 /* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */ 250 static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, 251 const ZSTD_paramSwitch_e useRowMatchFinder, 252 const U32 forDDSDict) { 253 assert(useRowMatchFinder != ZSTD_ps_auto); 254 /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate. 255 * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder. 256 */ 257 return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder)); 258 } 259 260 /* Returns 1 if compression parameters are such that we should 261 * enable long distance matching (wlog >= 27, strategy >= btopt). 262 * Returns 0 otherwise. 263 */ 264 static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, 265 const ZSTD_compressionParameters* const cParams) { 266 if (mode != ZSTD_ps_auto) return mode; 267 return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable; 268 } 269 270 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( 271 ZSTD_compressionParameters cParams) 272 { 273 ZSTD_CCtx_params cctxParams; 274 /* should not matter, as all cParams are presumed properly defined */ 275 ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT); 276 cctxParams.cParams = cParams; 277 278 /* Adjust advanced params according to cParams */ 279 cctxParams.ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams.ldmParams.enableLdm, &cParams); 280 if (cctxParams.ldmParams.enableLdm == ZSTD_ps_enable) { 281 ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams); 282 assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); 283 assert(cctxParams.ldmParams.hashRateLog < 32); 284 } 285 cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams); 286 cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); 287 assert(!ZSTD_checkCParams(cParams)); 288 return cctxParams; 289 } 290 291 static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( 292 ZSTD_customMem customMem) 293 { 294 ZSTD_CCtx_params* params; 295 if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; 296 params = (ZSTD_CCtx_params*)ZSTD_customCalloc( 297 sizeof(ZSTD_CCtx_params), customMem); 298 if (!params) { return NULL; } 299 ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); 300 params->customMem = customMem; 301 return params; 302 } 303 304 ZSTD_CCtx_params* ZSTD_createCCtxParams(void) 305 { 306 return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); 307 } 308 309 size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) 310 { 311 if (params == NULL) { return 0; } 312 ZSTD_customFree(params, params->customMem); 313 return 0; 314 } 315 316 size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) 317 { 318 return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); 319 } 320 321 size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { 322 RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); 323 ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); 324 cctxParams->compressionLevel = compressionLevel; 325 cctxParams->fParams.contentSizeFlag = 1; 326 return 0; 327 } 328 329 #define ZSTD_NO_CLEVEL 0 330 331 /* 332 * Initializes the cctxParams from params and compressionLevel. 333 * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. 334 */ 335 static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) 336 { 337 assert(!ZSTD_checkCParams(params->cParams)); 338 ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); 339 cctxParams->cParams = params->cParams; 340 cctxParams->fParams = params->fParams; 341 /* Should not matter, as all cParams are presumed properly defined. 342 * But, set it for tracing anyway. 343 */ 344 cctxParams->compressionLevel = compressionLevel; 345 cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams); 346 cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams); 347 cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams); 348 DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d", 349 cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm); 350 } 351 352 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) 353 { 354 RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); 355 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); 356 ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL); 357 return 0; 358 } 359 360 /* 361 * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. 362 * @param param Validated zstd parameters. 363 */ 364 static void ZSTD_CCtxParams_setZstdParams( 365 ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) 366 { 367 assert(!ZSTD_checkCParams(params->cParams)); 368 cctxParams->cParams = params->cParams; 369 cctxParams->fParams = params->fParams; 370 /* Should not matter, as all cParams are presumed properly defined. 371 * But, set it for tracing anyway. 372 */ 373 cctxParams->compressionLevel = ZSTD_NO_CLEVEL; 374 } 375 376 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) 377 { 378 ZSTD_bounds bounds = { 0, 0, 0 }; 379 380 switch(param) 381 { 382 case ZSTD_c_compressionLevel: 383 bounds.lowerBound = ZSTD_minCLevel(); 384 bounds.upperBound = ZSTD_maxCLevel(); 385 return bounds; 386 387 case ZSTD_c_windowLog: 388 bounds.lowerBound = ZSTD_WINDOWLOG_MIN; 389 bounds.upperBound = ZSTD_WINDOWLOG_MAX; 390 return bounds; 391 392 case ZSTD_c_hashLog: 393 bounds.lowerBound = ZSTD_HASHLOG_MIN; 394 bounds.upperBound = ZSTD_HASHLOG_MAX; 395 return bounds; 396 397 case ZSTD_c_chainLog: 398 bounds.lowerBound = ZSTD_CHAINLOG_MIN; 399 bounds.upperBound = ZSTD_CHAINLOG_MAX; 400 return bounds; 401 402 case ZSTD_c_searchLog: 403 bounds.lowerBound = ZSTD_SEARCHLOG_MIN; 404 bounds.upperBound = ZSTD_SEARCHLOG_MAX; 405 return bounds; 406 407 case ZSTD_c_minMatch: 408 bounds.lowerBound = ZSTD_MINMATCH_MIN; 409 bounds.upperBound = ZSTD_MINMATCH_MAX; 410 return bounds; 411 412 case ZSTD_c_targetLength: 413 bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; 414 bounds.upperBound = ZSTD_TARGETLENGTH_MAX; 415 return bounds; 416 417 case ZSTD_c_strategy: 418 bounds.lowerBound = ZSTD_STRATEGY_MIN; 419 bounds.upperBound = ZSTD_STRATEGY_MAX; 420 return bounds; 421 422 case ZSTD_c_contentSizeFlag: 423 bounds.lowerBound = 0; 424 bounds.upperBound = 1; 425 return bounds; 426 427 case ZSTD_c_checksumFlag: 428 bounds.lowerBound = 0; 429 bounds.upperBound = 1; 430 return bounds; 431 432 case ZSTD_c_dictIDFlag: 433 bounds.lowerBound = 0; 434 bounds.upperBound = 1; 435 return bounds; 436 437 case ZSTD_c_nbWorkers: 438 bounds.lowerBound = 0; 439 bounds.upperBound = 0; 440 return bounds; 441 442 case ZSTD_c_jobSize: 443 bounds.lowerBound = 0; 444 bounds.upperBound = 0; 445 return bounds; 446 447 case ZSTD_c_overlapLog: 448 bounds.lowerBound = 0; 449 bounds.upperBound = 0; 450 return bounds; 451 452 case ZSTD_c_enableDedicatedDictSearch: 453 bounds.lowerBound = 0; 454 bounds.upperBound = 1; 455 return bounds; 456 457 case ZSTD_c_enableLongDistanceMatching: 458 bounds.lowerBound = 0; 459 bounds.upperBound = 1; 460 return bounds; 461 462 case ZSTD_c_ldmHashLog: 463 bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; 464 bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; 465 return bounds; 466 467 case ZSTD_c_ldmMinMatch: 468 bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; 469 bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; 470 return bounds; 471 472 case ZSTD_c_ldmBucketSizeLog: 473 bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; 474 bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; 475 return bounds; 476 477 case ZSTD_c_ldmHashRateLog: 478 bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; 479 bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; 480 return bounds; 481 482 /* experimental parameters */ 483 case ZSTD_c_rsyncable: 484 bounds.lowerBound = 0; 485 bounds.upperBound = 1; 486 return bounds; 487 488 case ZSTD_c_forceMaxWindow : 489 bounds.lowerBound = 0; 490 bounds.upperBound = 1; 491 return bounds; 492 493 case ZSTD_c_format: 494 ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); 495 bounds.lowerBound = ZSTD_f_zstd1; 496 bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ 497 return bounds; 498 499 case ZSTD_c_forceAttachDict: 500 ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad); 501 bounds.lowerBound = ZSTD_dictDefaultAttach; 502 bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ 503 return bounds; 504 505 case ZSTD_c_literalCompressionMode: 506 ZSTD_STATIC_ASSERT(ZSTD_ps_auto < ZSTD_ps_enable && ZSTD_ps_enable < ZSTD_ps_disable); 507 bounds.lowerBound = (int)ZSTD_ps_auto; 508 bounds.upperBound = (int)ZSTD_ps_disable; 509 return bounds; 510 511 case ZSTD_c_targetCBlockSize: 512 bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; 513 bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; 514 return bounds; 515 516 case ZSTD_c_srcSizeHint: 517 bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; 518 bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; 519 return bounds; 520 521 case ZSTD_c_stableInBuffer: 522 case ZSTD_c_stableOutBuffer: 523 bounds.lowerBound = (int)ZSTD_bm_buffered; 524 bounds.upperBound = (int)ZSTD_bm_stable; 525 return bounds; 526 527 case ZSTD_c_blockDelimiters: 528 bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters; 529 bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters; 530 return bounds; 531 532 case ZSTD_c_validateSequences: 533 bounds.lowerBound = 0; 534 bounds.upperBound = 1; 535 return bounds; 536 537 case ZSTD_c_useBlockSplitter: 538 bounds.lowerBound = (int)ZSTD_ps_auto; 539 bounds.upperBound = (int)ZSTD_ps_disable; 540 return bounds; 541 542 case ZSTD_c_useRowMatchFinder: 543 bounds.lowerBound = (int)ZSTD_ps_auto; 544 bounds.upperBound = (int)ZSTD_ps_disable; 545 return bounds; 546 547 case ZSTD_c_deterministicRefPrefix: 548 bounds.lowerBound = 0; 549 bounds.upperBound = 1; 550 return bounds; 551 552 default: 553 bounds.error = ERROR(parameter_unsupported); 554 return bounds; 555 } 556 } 557 558 /* ZSTD_cParam_clampBounds: 559 * Clamps the value into the bounded range. 560 */ 561 static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) 562 { 563 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); 564 if (ZSTD_isError(bounds.error)) return bounds.error; 565 if (*value < bounds.lowerBound) *value = bounds.lowerBound; 566 if (*value > bounds.upperBound) *value = bounds.upperBound; 567 return 0; 568 } 569 570 #define BOUNDCHECK(cParam, val) { \ 571 RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ 572 parameter_outOfBound, "Param out of bounds"); \ 573 } 574 575 576 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) 577 { 578 switch(param) 579 { 580 case ZSTD_c_compressionLevel: 581 case ZSTD_c_hashLog: 582 case ZSTD_c_chainLog: 583 case ZSTD_c_searchLog: 584 case ZSTD_c_minMatch: 585 case ZSTD_c_targetLength: 586 case ZSTD_c_strategy: 587 return 1; 588 589 case ZSTD_c_format: 590 case ZSTD_c_windowLog: 591 case ZSTD_c_contentSizeFlag: 592 case ZSTD_c_checksumFlag: 593 case ZSTD_c_dictIDFlag: 594 case ZSTD_c_forceMaxWindow : 595 case ZSTD_c_nbWorkers: 596 case ZSTD_c_jobSize: 597 case ZSTD_c_overlapLog: 598 case ZSTD_c_rsyncable: 599 case ZSTD_c_enableDedicatedDictSearch: 600 case ZSTD_c_enableLongDistanceMatching: 601 case ZSTD_c_ldmHashLog: 602 case ZSTD_c_ldmMinMatch: 603 case ZSTD_c_ldmBucketSizeLog: 604 case ZSTD_c_ldmHashRateLog: 605 case ZSTD_c_forceAttachDict: 606 case ZSTD_c_literalCompressionMode: 607 case ZSTD_c_targetCBlockSize: 608 case ZSTD_c_srcSizeHint: 609 case ZSTD_c_stableInBuffer: 610 case ZSTD_c_stableOutBuffer: 611 case ZSTD_c_blockDelimiters: 612 case ZSTD_c_validateSequences: 613 case ZSTD_c_useBlockSplitter: 614 case ZSTD_c_useRowMatchFinder: 615 case ZSTD_c_deterministicRefPrefix: 616 default: 617 return 0; 618 } 619 } 620 621 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) 622 { 623 DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); 624 if (cctx->streamStage != zcss_init) { 625 if (ZSTD_isUpdateAuthorized(param)) { 626 cctx->cParamsChanged = 1; 627 } else { 628 RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); 629 } } 630 631 switch(param) 632 { 633 case ZSTD_c_nbWorkers: 634 RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, 635 "MT not compatible with static alloc"); 636 break; 637 638 case ZSTD_c_compressionLevel: 639 case ZSTD_c_windowLog: 640 case ZSTD_c_hashLog: 641 case ZSTD_c_chainLog: 642 case ZSTD_c_searchLog: 643 case ZSTD_c_minMatch: 644 case ZSTD_c_targetLength: 645 case ZSTD_c_strategy: 646 case ZSTD_c_ldmHashRateLog: 647 case ZSTD_c_format: 648 case ZSTD_c_contentSizeFlag: 649 case ZSTD_c_checksumFlag: 650 case ZSTD_c_dictIDFlag: 651 case ZSTD_c_forceMaxWindow: 652 case ZSTD_c_forceAttachDict: 653 case ZSTD_c_literalCompressionMode: 654 case ZSTD_c_jobSize: 655 case ZSTD_c_overlapLog: 656 case ZSTD_c_rsyncable: 657 case ZSTD_c_enableDedicatedDictSearch: 658 case ZSTD_c_enableLongDistanceMatching: 659 case ZSTD_c_ldmHashLog: 660 case ZSTD_c_ldmMinMatch: 661 case ZSTD_c_ldmBucketSizeLog: 662 case ZSTD_c_targetCBlockSize: 663 case ZSTD_c_srcSizeHint: 664 case ZSTD_c_stableInBuffer: 665 case ZSTD_c_stableOutBuffer: 666 case ZSTD_c_blockDelimiters: 667 case ZSTD_c_validateSequences: 668 case ZSTD_c_useBlockSplitter: 669 case ZSTD_c_useRowMatchFinder: 670 case ZSTD_c_deterministicRefPrefix: 671 break; 672 673 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 674 } 675 return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); 676 } 677 678 size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, 679 ZSTD_cParameter param, int value) 680 { 681 DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); 682 switch(param) 683 { 684 case ZSTD_c_format : 685 BOUNDCHECK(ZSTD_c_format, value); 686 CCtxParams->format = (ZSTD_format_e)value; 687 return (size_t)CCtxParams->format; 688 689 case ZSTD_c_compressionLevel : { 690 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); 691 if (value == 0) 692 CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ 693 else 694 CCtxParams->compressionLevel = value; 695 if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; 696 return 0; /* return type (size_t) cannot represent negative values */ 697 } 698 699 case ZSTD_c_windowLog : 700 if (value!=0) /* 0 => use default */ 701 BOUNDCHECK(ZSTD_c_windowLog, value); 702 CCtxParams->cParams.windowLog = (U32)value; 703 return CCtxParams->cParams.windowLog; 704 705 case ZSTD_c_hashLog : 706 if (value!=0) /* 0 => use default */ 707 BOUNDCHECK(ZSTD_c_hashLog, value); 708 CCtxParams->cParams.hashLog = (U32)value; 709 return CCtxParams->cParams.hashLog; 710 711 case ZSTD_c_chainLog : 712 if (value!=0) /* 0 => use default */ 713 BOUNDCHECK(ZSTD_c_chainLog, value); 714 CCtxParams->cParams.chainLog = (U32)value; 715 return CCtxParams->cParams.chainLog; 716 717 case ZSTD_c_searchLog : 718 if (value!=0) /* 0 => use default */ 719 BOUNDCHECK(ZSTD_c_searchLog, value); 720 CCtxParams->cParams.searchLog = (U32)value; 721 return (size_t)value; 722 723 case ZSTD_c_minMatch : 724 if (value!=0) /* 0 => use default */ 725 BOUNDCHECK(ZSTD_c_minMatch, value); 726 CCtxParams->cParams.minMatch = value; 727 return CCtxParams->cParams.minMatch; 728 729 case ZSTD_c_targetLength : 730 BOUNDCHECK(ZSTD_c_targetLength, value); 731 CCtxParams->cParams.targetLength = value; 732 return CCtxParams->cParams.targetLength; 733 734 case ZSTD_c_strategy : 735 if (value!=0) /* 0 => use default */ 736 BOUNDCHECK(ZSTD_c_strategy, value); 737 CCtxParams->cParams.strategy = (ZSTD_strategy)value; 738 return (size_t)CCtxParams->cParams.strategy; 739 740 case ZSTD_c_contentSizeFlag : 741 /* Content size written in frame header _when known_ (default:1) */ 742 DEBUGLOG(4, "set content size flag = %u", (value!=0)); 743 CCtxParams->fParams.contentSizeFlag = value != 0; 744 return CCtxParams->fParams.contentSizeFlag; 745 746 case ZSTD_c_checksumFlag : 747 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ 748 CCtxParams->fParams.checksumFlag = value != 0; 749 return CCtxParams->fParams.checksumFlag; 750 751 case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ 752 DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); 753 CCtxParams->fParams.noDictIDFlag = !value; 754 return !CCtxParams->fParams.noDictIDFlag; 755 756 case ZSTD_c_forceMaxWindow : 757 CCtxParams->forceWindow = (value != 0); 758 return CCtxParams->forceWindow; 759 760 case ZSTD_c_forceAttachDict : { 761 const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; 762 BOUNDCHECK(ZSTD_c_forceAttachDict, pref); 763 CCtxParams->attachDictPref = pref; 764 return CCtxParams->attachDictPref; 765 } 766 767 case ZSTD_c_literalCompressionMode : { 768 const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value; 769 BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); 770 CCtxParams->literalCompressionMode = lcm; 771 return CCtxParams->literalCompressionMode; 772 } 773 774 case ZSTD_c_nbWorkers : 775 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 776 return 0; 777 778 case ZSTD_c_jobSize : 779 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 780 return 0; 781 782 case ZSTD_c_overlapLog : 783 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 784 return 0; 785 786 case ZSTD_c_rsyncable : 787 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); 788 return 0; 789 790 case ZSTD_c_enableDedicatedDictSearch : 791 CCtxParams->enableDedicatedDictSearch = (value!=0); 792 return CCtxParams->enableDedicatedDictSearch; 793 794 case ZSTD_c_enableLongDistanceMatching : 795 CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value; 796 return CCtxParams->ldmParams.enableLdm; 797 798 case ZSTD_c_ldmHashLog : 799 if (value!=0) /* 0 ==> auto */ 800 BOUNDCHECK(ZSTD_c_ldmHashLog, value); 801 CCtxParams->ldmParams.hashLog = value; 802 return CCtxParams->ldmParams.hashLog; 803 804 case ZSTD_c_ldmMinMatch : 805 if (value!=0) /* 0 ==> default */ 806 BOUNDCHECK(ZSTD_c_ldmMinMatch, value); 807 CCtxParams->ldmParams.minMatchLength = value; 808 return CCtxParams->ldmParams.minMatchLength; 809 810 case ZSTD_c_ldmBucketSizeLog : 811 if (value!=0) /* 0 ==> default */ 812 BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); 813 CCtxParams->ldmParams.bucketSizeLog = value; 814 return CCtxParams->ldmParams.bucketSizeLog; 815 816 case ZSTD_c_ldmHashRateLog : 817 if (value!=0) /* 0 ==> default */ 818 BOUNDCHECK(ZSTD_c_ldmHashRateLog, value); 819 CCtxParams->ldmParams.hashRateLog = value; 820 return CCtxParams->ldmParams.hashRateLog; 821 822 case ZSTD_c_targetCBlockSize : 823 if (value!=0) /* 0 ==> default */ 824 BOUNDCHECK(ZSTD_c_targetCBlockSize, value); 825 CCtxParams->targetCBlockSize = value; 826 return CCtxParams->targetCBlockSize; 827 828 case ZSTD_c_srcSizeHint : 829 if (value!=0) /* 0 ==> default */ 830 BOUNDCHECK(ZSTD_c_srcSizeHint, value); 831 CCtxParams->srcSizeHint = value; 832 return CCtxParams->srcSizeHint; 833 834 case ZSTD_c_stableInBuffer: 835 BOUNDCHECK(ZSTD_c_stableInBuffer, value); 836 CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value; 837 return CCtxParams->inBufferMode; 838 839 case ZSTD_c_stableOutBuffer: 840 BOUNDCHECK(ZSTD_c_stableOutBuffer, value); 841 CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value; 842 return CCtxParams->outBufferMode; 843 844 case ZSTD_c_blockDelimiters: 845 BOUNDCHECK(ZSTD_c_blockDelimiters, value); 846 CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; 847 return CCtxParams->blockDelimiters; 848 849 case ZSTD_c_validateSequences: 850 BOUNDCHECK(ZSTD_c_validateSequences, value); 851 CCtxParams->validateSequences = value; 852 return CCtxParams->validateSequences; 853 854 case ZSTD_c_useBlockSplitter: 855 BOUNDCHECK(ZSTD_c_useBlockSplitter, value); 856 CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value; 857 return CCtxParams->useBlockSplitter; 858 859 case ZSTD_c_useRowMatchFinder: 860 BOUNDCHECK(ZSTD_c_useRowMatchFinder, value); 861 CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value; 862 return CCtxParams->useRowMatchFinder; 863 864 case ZSTD_c_deterministicRefPrefix: 865 BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value); 866 CCtxParams->deterministicRefPrefix = !!value; 867 return CCtxParams->deterministicRefPrefix; 868 869 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 870 } 871 } 872 873 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value) 874 { 875 return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); 876 } 877 878 size_t ZSTD_CCtxParams_getParameter( 879 ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value) 880 { 881 switch(param) 882 { 883 case ZSTD_c_format : 884 *value = CCtxParams->format; 885 break; 886 case ZSTD_c_compressionLevel : 887 *value = CCtxParams->compressionLevel; 888 break; 889 case ZSTD_c_windowLog : 890 *value = (int)CCtxParams->cParams.windowLog; 891 break; 892 case ZSTD_c_hashLog : 893 *value = (int)CCtxParams->cParams.hashLog; 894 break; 895 case ZSTD_c_chainLog : 896 *value = (int)CCtxParams->cParams.chainLog; 897 break; 898 case ZSTD_c_searchLog : 899 *value = CCtxParams->cParams.searchLog; 900 break; 901 case ZSTD_c_minMatch : 902 *value = CCtxParams->cParams.minMatch; 903 break; 904 case ZSTD_c_targetLength : 905 *value = CCtxParams->cParams.targetLength; 906 break; 907 case ZSTD_c_strategy : 908 *value = (unsigned)CCtxParams->cParams.strategy; 909 break; 910 case ZSTD_c_contentSizeFlag : 911 *value = CCtxParams->fParams.contentSizeFlag; 912 break; 913 case ZSTD_c_checksumFlag : 914 *value = CCtxParams->fParams.checksumFlag; 915 break; 916 case ZSTD_c_dictIDFlag : 917 *value = !CCtxParams->fParams.noDictIDFlag; 918 break; 919 case ZSTD_c_forceMaxWindow : 920 *value = CCtxParams->forceWindow; 921 break; 922 case ZSTD_c_forceAttachDict : 923 *value = CCtxParams->attachDictPref; 924 break; 925 case ZSTD_c_literalCompressionMode : 926 *value = CCtxParams->literalCompressionMode; 927 break; 928 case ZSTD_c_nbWorkers : 929 assert(CCtxParams->nbWorkers == 0); 930 *value = CCtxParams->nbWorkers; 931 break; 932 case ZSTD_c_jobSize : 933 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 934 case ZSTD_c_overlapLog : 935 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 936 case ZSTD_c_rsyncable : 937 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); 938 case ZSTD_c_enableDedicatedDictSearch : 939 *value = CCtxParams->enableDedicatedDictSearch; 940 break; 941 case ZSTD_c_enableLongDistanceMatching : 942 *value = CCtxParams->ldmParams.enableLdm; 943 break; 944 case ZSTD_c_ldmHashLog : 945 *value = CCtxParams->ldmParams.hashLog; 946 break; 947 case ZSTD_c_ldmMinMatch : 948 *value = CCtxParams->ldmParams.minMatchLength; 949 break; 950 case ZSTD_c_ldmBucketSizeLog : 951 *value = CCtxParams->ldmParams.bucketSizeLog; 952 break; 953 case ZSTD_c_ldmHashRateLog : 954 *value = CCtxParams->ldmParams.hashRateLog; 955 break; 956 case ZSTD_c_targetCBlockSize : 957 *value = (int)CCtxParams->targetCBlockSize; 958 break; 959 case ZSTD_c_srcSizeHint : 960 *value = (int)CCtxParams->srcSizeHint; 961 break; 962 case ZSTD_c_stableInBuffer : 963 *value = (int)CCtxParams->inBufferMode; 964 break; 965 case ZSTD_c_stableOutBuffer : 966 *value = (int)CCtxParams->outBufferMode; 967 break; 968 case ZSTD_c_blockDelimiters : 969 *value = (int)CCtxParams->blockDelimiters; 970 break; 971 case ZSTD_c_validateSequences : 972 *value = (int)CCtxParams->validateSequences; 973 break; 974 case ZSTD_c_useBlockSplitter : 975 *value = (int)CCtxParams->useBlockSplitter; 976 break; 977 case ZSTD_c_useRowMatchFinder : 978 *value = (int)CCtxParams->useRowMatchFinder; 979 break; 980 case ZSTD_c_deterministicRefPrefix: 981 *value = (int)CCtxParams->deterministicRefPrefix; 982 break; 983 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 984 } 985 return 0; 986 } 987 988 /* ZSTD_CCtx_setParametersUsingCCtxParams() : 989 * just applies `params` into `cctx` 990 * no action is performed, parameters are merely stored. 991 * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. 992 * This is possible even if a compression is ongoing. 993 * In which case, new parameters will be applied on the fly, starting with next compression job. 994 */ 995 size_t ZSTD_CCtx_setParametersUsingCCtxParams( 996 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) 997 { 998 DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); 999 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1000 "The context is in the wrong stage!"); 1001 RETURN_ERROR_IF(cctx->cdict, stage_wrong, 1002 "Can't override parameters with cdict attached (some must " 1003 "be inherited from the cdict)."); 1004 1005 cctx->requestedParams = *params; 1006 return 0; 1007 } 1008 1009 size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) 1010 { 1011 DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); 1012 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1013 "Can't set pledgedSrcSize when not in init stage."); 1014 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; 1015 return 0; 1016 } 1017 1018 static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams( 1019 int const compressionLevel, 1020 size_t const dictSize); 1021 static int ZSTD_dedicatedDictSearch_isSupported( 1022 const ZSTD_compressionParameters* cParams); 1023 static void ZSTD_dedicatedDictSearch_revertCParams( 1024 ZSTD_compressionParameters* cParams); 1025 1026 /* 1027 * Initializes the local dict using the requested parameters. 1028 * NOTE: This does not use the pledged src size, because it may be used for more 1029 * than one compression. 1030 */ 1031 static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) 1032 { 1033 ZSTD_localDict* const dl = &cctx->localDict; 1034 if (dl->dict == NULL) { 1035 /* No local dictionary. */ 1036 assert(dl->dictBuffer == NULL); 1037 assert(dl->cdict == NULL); 1038 assert(dl->dictSize == 0); 1039 return 0; 1040 } 1041 if (dl->cdict != NULL) { 1042 assert(cctx->cdict == dl->cdict); 1043 /* Local dictionary already initialized. */ 1044 return 0; 1045 } 1046 assert(dl->dictSize > 0); 1047 assert(cctx->cdict == NULL); 1048 assert(cctx->prefixDict.dict == NULL); 1049 1050 dl->cdict = ZSTD_createCDict_advanced2( 1051 dl->dict, 1052 dl->dictSize, 1053 ZSTD_dlm_byRef, 1054 dl->dictContentType, 1055 &cctx->requestedParams, 1056 cctx->customMem); 1057 RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); 1058 cctx->cdict = dl->cdict; 1059 return 0; 1060 } 1061 1062 size_t ZSTD_CCtx_loadDictionary_advanced( 1063 ZSTD_CCtx* cctx, const void* dict, size_t dictSize, 1064 ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) 1065 { 1066 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1067 "Can't load a dictionary when ctx is not in init stage."); 1068 DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); 1069 ZSTD_clearAllDicts(cctx); /* in case one already exists */ 1070 if (dict == NULL || dictSize == 0) /* no dictionary mode */ 1071 return 0; 1072 if (dictLoadMethod == ZSTD_dlm_byRef) { 1073 cctx->localDict.dict = dict; 1074 } else { 1075 void* dictBuffer; 1076 RETURN_ERROR_IF(cctx->staticSize, memory_allocation, 1077 "no malloc for static CCtx"); 1078 dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem); 1079 RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); 1080 ZSTD_memcpy(dictBuffer, dict, dictSize); 1081 cctx->localDict.dictBuffer = dictBuffer; 1082 cctx->localDict.dict = dictBuffer; 1083 } 1084 cctx->localDict.dictSize = dictSize; 1085 cctx->localDict.dictContentType = dictContentType; 1086 return 0; 1087 } 1088 1089 size_t ZSTD_CCtx_loadDictionary_byReference( 1090 ZSTD_CCtx* cctx, const void* dict, size_t dictSize) 1091 { 1092 return ZSTD_CCtx_loadDictionary_advanced( 1093 cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); 1094 } 1095 1096 size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) 1097 { 1098 return ZSTD_CCtx_loadDictionary_advanced( 1099 cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); 1100 } 1101 1102 1103 size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 1104 { 1105 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1106 "Can't ref a dict when ctx not in init stage."); 1107 /* Free the existing local cdict (if any) to save memory. */ 1108 ZSTD_clearAllDicts(cctx); 1109 cctx->cdict = cdict; 1110 return 0; 1111 } 1112 1113 size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool) 1114 { 1115 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1116 "Can't ref a pool when ctx not in init stage."); 1117 cctx->pool = pool; 1118 return 0; 1119 } 1120 1121 size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) 1122 { 1123 return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); 1124 } 1125 1126 size_t ZSTD_CCtx_refPrefix_advanced( 1127 ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) 1128 { 1129 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1130 "Can't ref a prefix when ctx not in init stage."); 1131 ZSTD_clearAllDicts(cctx); 1132 if (prefix != NULL && prefixSize > 0) { 1133 cctx->prefixDict.dict = prefix; 1134 cctx->prefixDict.dictSize = prefixSize; 1135 cctx->prefixDict.dictContentType = dictContentType; 1136 } 1137 return 0; 1138 } 1139 1140 /*! ZSTD_CCtx_reset() : 1141 * Also dumps dictionary */ 1142 size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) 1143 { 1144 if ( (reset == ZSTD_reset_session_only) 1145 || (reset == ZSTD_reset_session_and_parameters) ) { 1146 cctx->streamStage = zcss_init; 1147 cctx->pledgedSrcSizePlusOne = 0; 1148 } 1149 if ( (reset == ZSTD_reset_parameters) 1150 || (reset == ZSTD_reset_session_and_parameters) ) { 1151 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1152 "Can't reset parameters only when not in init stage."); 1153 ZSTD_clearAllDicts(cctx); 1154 return ZSTD_CCtxParams_reset(&cctx->requestedParams); 1155 } 1156 return 0; 1157 } 1158 1159 1160 /* ZSTD_checkCParams() : 1161 control CParam values remain within authorized range. 1162 @return : 0, or an error code if one value is beyond authorized range */ 1163 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) 1164 { 1165 BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); 1166 BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); 1167 BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); 1168 BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); 1169 BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); 1170 BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); 1171 BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); 1172 return 0; 1173 } 1174 1175 /* ZSTD_clampCParams() : 1176 * make CParam values within valid range. 1177 * @return : valid CParams */ 1178 static ZSTD_compressionParameters 1179 ZSTD_clampCParams(ZSTD_compressionParameters cParams) 1180 { 1181 # define CLAMP_TYPE(cParam, val, type) { \ 1182 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ 1183 if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \ 1184 else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ 1185 } 1186 # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) 1187 CLAMP(ZSTD_c_windowLog, cParams.windowLog); 1188 CLAMP(ZSTD_c_chainLog, cParams.chainLog); 1189 CLAMP(ZSTD_c_hashLog, cParams.hashLog); 1190 CLAMP(ZSTD_c_searchLog, cParams.searchLog); 1191 CLAMP(ZSTD_c_minMatch, cParams.minMatch); 1192 CLAMP(ZSTD_c_targetLength,cParams.targetLength); 1193 CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); 1194 return cParams; 1195 } 1196 1197 /* ZSTD_cycleLog() : 1198 * condition for correct operation : hashLog > 1 */ 1199 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) 1200 { 1201 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); 1202 return hashLog - btScale; 1203 } 1204 1205 /* ZSTD_dictAndWindowLog() : 1206 * Returns an adjusted window log that is large enough to fit the source and the dictionary. 1207 * The zstd format says that the entire dictionary is valid if one byte of the dictionary 1208 * is within the window. So the hashLog and chainLog should be large enough to reference both 1209 * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing 1210 * the hashLog and windowLog. 1211 * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN. 1212 */ 1213 static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) 1214 { 1215 const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX; 1216 /* No dictionary ==> No change */ 1217 if (dictSize == 0) { 1218 return windowLog; 1219 } 1220 assert(windowLog <= ZSTD_WINDOWLOG_MAX); 1221 assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */ 1222 { 1223 U64 const windowSize = 1ULL << windowLog; 1224 U64 const dictAndWindowSize = dictSize + windowSize; 1225 /* If the window size is already large enough to fit both the source and the dictionary 1226 * then just use the window size. Otherwise adjust so that it fits the dictionary and 1227 * the window. 1228 */ 1229 if (windowSize >= dictSize + srcSize) { 1230 return windowLog; /* Window size large enough already */ 1231 } else if (dictAndWindowSize >= maxWindowSize) { 1232 return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */ 1233 } else { 1234 return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1; 1235 } 1236 } 1237 } 1238 1239 /* ZSTD_adjustCParams_internal() : 1240 * optimize `cPar` for a specified input (`srcSize` and `dictSize`). 1241 * mostly downsize to reduce memory consumption and initialization latency. 1242 * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. 1243 * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`. 1244 * note : `srcSize==0` means 0! 1245 * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ 1246 static ZSTD_compressionParameters 1247 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, 1248 unsigned long long srcSize, 1249 size_t dictSize, 1250 ZSTD_cParamMode_e mode) 1251 { 1252 const U64 minSrcSize = 513; /* (1<<9) + 1 */ 1253 const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); 1254 assert(ZSTD_checkCParams(cPar)==0); 1255 1256 switch (mode) { 1257 case ZSTD_cpm_unknown: 1258 case ZSTD_cpm_noAttachDict: 1259 /* If we don't know the source size, don't make any 1260 * assumptions about it. We will already have selected 1261 * smaller parameters if a dictionary is in use. 1262 */ 1263 break; 1264 case ZSTD_cpm_createCDict: 1265 /* Assume a small source size when creating a dictionary 1266 * with an unknown source size. 1267 */ 1268 if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) 1269 srcSize = minSrcSize; 1270 break; 1271 case ZSTD_cpm_attachDict: 1272 /* Dictionary has its own dedicated parameters which have 1273 * already been selected. We are selecting parameters 1274 * for only the source. 1275 */ 1276 dictSize = 0; 1277 break; 1278 default: 1279 assert(0); 1280 break; 1281 } 1282 1283 /* resize windowLog if input is small enough, to use less memory */ 1284 if ( (srcSize < maxWindowResize) 1285 && (dictSize < maxWindowResize) ) { 1286 U32 const tSize = (U32)(srcSize + dictSize); 1287 static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; 1288 U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : 1289 ZSTD_highbit32(tSize-1) + 1; 1290 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; 1291 } 1292 if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) { 1293 U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); 1294 U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); 1295 if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; 1296 if (cycleLog > dictAndWindowLog) 1297 cPar.chainLog -= (cycleLog - dictAndWindowLog); 1298 } 1299 1300 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) 1301 cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ 1302 1303 return cPar; 1304 } 1305 1306 ZSTD_compressionParameters 1307 ZSTD_adjustCParams(ZSTD_compressionParameters cPar, 1308 unsigned long long srcSize, 1309 size_t dictSize) 1310 { 1311 cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ 1312 if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; 1313 return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown); 1314 } 1315 1316 static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); 1317 static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); 1318 1319 static void ZSTD_overrideCParams( 1320 ZSTD_compressionParameters* cParams, 1321 const ZSTD_compressionParameters* overrides) 1322 { 1323 if (overrides->windowLog) cParams->windowLog = overrides->windowLog; 1324 if (overrides->hashLog) cParams->hashLog = overrides->hashLog; 1325 if (overrides->chainLog) cParams->chainLog = overrides->chainLog; 1326 if (overrides->searchLog) cParams->searchLog = overrides->searchLog; 1327 if (overrides->minMatch) cParams->minMatch = overrides->minMatch; 1328 if (overrides->targetLength) cParams->targetLength = overrides->targetLength; 1329 if (overrides->strategy) cParams->strategy = overrides->strategy; 1330 } 1331 1332 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( 1333 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) 1334 { 1335 ZSTD_compressionParameters cParams; 1336 if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { 1337 srcSizeHint = CCtxParams->srcSizeHint; 1338 } 1339 cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); 1340 if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; 1341 ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); 1342 assert(!ZSTD_checkCParams(cParams)); 1343 /* srcSizeHint == 0 means 0 */ 1344 return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode); 1345 } 1346 1347 static size_t 1348 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, 1349 const ZSTD_paramSwitch_e useRowMatchFinder, 1350 const U32 enableDedicatedDictSearch, 1351 const U32 forCCtx) 1352 { 1353 /* chain table size should be 0 for fast or row-hash strategies */ 1354 size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx) 1355 ? ((size_t)1 << cParams->chainLog) 1356 : 0; 1357 size_t const hSize = ((size_t)1) << cParams->hashLog; 1358 U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; 1359 size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; 1360 /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't 1361 * surrounded by redzones in ASAN. */ 1362 size_t const tableSpace = chainSize * sizeof(U32) 1363 + hSize * sizeof(U32) 1364 + h3Size * sizeof(U32); 1365 size_t const optPotentialSpace = 1366 ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32)) 1367 + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32)) 1368 + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32)) 1369 + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32)) 1370 + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)) 1371 + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); 1372 size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder) 1373 ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16)) 1374 : 0; 1375 size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) 1376 ? optPotentialSpace 1377 : 0; 1378 size_t const slackSpace = ZSTD_cwksp_slack_space_required(); 1379 1380 /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */ 1381 ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4); 1382 assert(useRowMatchFinder != ZSTD_ps_auto); 1383 1384 DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", 1385 (U32)chainSize, (U32)hSize, (U32)h3Size); 1386 return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; 1387 } 1388 1389 static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1390 const ZSTD_compressionParameters* cParams, 1391 const ldmParams_t* ldmParams, 1392 const int isStatic, 1393 const ZSTD_paramSwitch_e useRowMatchFinder, 1394 const size_t buffInSize, 1395 const size_t buffOutSize, 1396 const U64 pledgedSrcSize) 1397 { 1398 size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); 1399 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 1400 U32 const divider = (cParams->minMatch==3) ? 3 : 4; 1401 size_t const maxNbSeq = blockSize / divider; 1402 size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) 1403 + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) 1404 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); 1405 size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); 1406 size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); 1407 size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1); 1408 1409 size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); 1410 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); 1411 size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ? 1412 ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; 1413 1414 1415 size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) 1416 + ZSTD_cwksp_alloc_size(buffOutSize); 1417 1418 size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; 1419 1420 size_t const neededSpace = 1421 cctxSpace + 1422 entropySpace + 1423 blockStateSpace + 1424 ldmSpace + 1425 ldmSeqSpace + 1426 matchStateSize + 1427 tokenSpace + 1428 bufferSpace; 1429 1430 DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); 1431 return neededSpace; 1432 } 1433 1434 size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) 1435 { 1436 ZSTD_compressionParameters const cParams = 1437 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); 1438 ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, 1439 &cParams); 1440 1441 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); 1442 /* estimateCCtxSize is for one-shot compression. So no buffers should 1443 * be needed. However, we still allocate two 0-sized buffers, which can 1444 * take space under ASAN. */ 1445 return ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1446 &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); 1447 } 1448 1449 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) 1450 { 1451 ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); 1452 if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { 1453 /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ 1454 size_t noRowCCtxSize; 1455 size_t rowCCtxSize; 1456 initialParams.useRowMatchFinder = ZSTD_ps_disable; 1457 noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); 1458 initialParams.useRowMatchFinder = ZSTD_ps_enable; 1459 rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); 1460 return MAX(noRowCCtxSize, rowCCtxSize); 1461 } else { 1462 return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); 1463 } 1464 } 1465 1466 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) 1467 { 1468 int tier = 0; 1469 size_t largestSize = 0; 1470 static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN}; 1471 for (; tier < 4; ++tier) { 1472 /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */ 1473 ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict); 1474 largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize); 1475 } 1476 return largestSize; 1477 } 1478 1479 size_t ZSTD_estimateCCtxSize(int compressionLevel) 1480 { 1481 int level; 1482 size_t memBudget = 0; 1483 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { 1484 /* Ensure monotonically increasing memory usage as compression level increases */ 1485 size_t const newMB = ZSTD_estimateCCtxSize_internal(level); 1486 if (newMB > memBudget) memBudget = newMB; 1487 } 1488 return memBudget; 1489 } 1490 1491 size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) 1492 { 1493 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); 1494 { ZSTD_compressionParameters const cParams = 1495 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); 1496 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); 1497 size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) 1498 ? ((size_t)1 << cParams.windowLog) + blockSize 1499 : 0; 1500 size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) 1501 ? ZSTD_compressBound(blockSize) + 1 1502 : 0; 1503 ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams); 1504 1505 return ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1506 &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, 1507 ZSTD_CONTENTSIZE_UNKNOWN); 1508 } 1509 } 1510 1511 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) 1512 { 1513 ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); 1514 if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { 1515 /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ 1516 size_t noRowCCtxSize; 1517 size_t rowCCtxSize; 1518 initialParams.useRowMatchFinder = ZSTD_ps_disable; 1519 noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); 1520 initialParams.useRowMatchFinder = ZSTD_ps_enable; 1521 rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); 1522 return MAX(noRowCCtxSize, rowCCtxSize); 1523 } else { 1524 return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); 1525 } 1526 } 1527 1528 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) 1529 { 1530 ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); 1531 return ZSTD_estimateCStreamSize_usingCParams(cParams); 1532 } 1533 1534 size_t ZSTD_estimateCStreamSize(int compressionLevel) 1535 { 1536 int level; 1537 size_t memBudget = 0; 1538 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { 1539 size_t const newMB = ZSTD_estimateCStreamSize_internal(level); 1540 if (newMB > memBudget) memBudget = newMB; 1541 } 1542 return memBudget; 1543 } 1544 1545 /* ZSTD_getFrameProgression(): 1546 * tells how much data has been consumed (input) and produced (output) for current frame. 1547 * able to count progression inside worker threads (non-blocking mode). 1548 */ 1549 ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) 1550 { 1551 { ZSTD_frameProgression fp; 1552 size_t const buffered = (cctx->inBuff == NULL) ? 0 : 1553 cctx->inBuffPos - cctx->inToCompress; 1554 if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); 1555 assert(buffered <= ZSTD_BLOCKSIZE_MAX); 1556 fp.ingested = cctx->consumedSrcSize + buffered; 1557 fp.consumed = cctx->consumedSrcSize; 1558 fp.produced = cctx->producedCSize; 1559 fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ 1560 fp.currentJobID = 0; 1561 fp.nbActiveWorkers = 0; 1562 return fp; 1563 } } 1564 1565 /*! ZSTD_toFlushNow() 1566 * Only useful for multithreading scenarios currently (nbWorkers >= 1). 1567 */ 1568 size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) 1569 { 1570 (void)cctx; 1571 return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ 1572 } 1573 1574 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, 1575 ZSTD_compressionParameters cParams2) 1576 { 1577 (void)cParams1; 1578 (void)cParams2; 1579 assert(cParams1.windowLog == cParams2.windowLog); 1580 assert(cParams1.chainLog == cParams2.chainLog); 1581 assert(cParams1.hashLog == cParams2.hashLog); 1582 assert(cParams1.searchLog == cParams2.searchLog); 1583 assert(cParams1.minMatch == cParams2.minMatch); 1584 assert(cParams1.targetLength == cParams2.targetLength); 1585 assert(cParams1.strategy == cParams2.strategy); 1586 } 1587 1588 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) 1589 { 1590 int i; 1591 for (i = 0; i < ZSTD_REP_NUM; ++i) 1592 bs->rep[i] = repStartValue[i]; 1593 bs->entropy.huf.repeatMode = HUF_repeat_none; 1594 bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; 1595 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; 1596 bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; 1597 } 1598 1599 /*! ZSTD_invalidateMatchState() 1600 * Invalidate all the matches in the match finder tables. 1601 * Requires nextSrc and base to be set (can be NULL). 1602 */ 1603 static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) 1604 { 1605 ZSTD_window_clear(&ms->window); 1606 1607 ms->nextToUpdate = ms->window.dictLimit; 1608 ms->loadedDictEnd = 0; 1609 ms->opt.litLengthSum = 0; /* force reset of btopt stats */ 1610 ms->dictMatchState = NULL; 1611 } 1612 1613 /* 1614 * Controls, for this matchState reset, whether the tables need to be cleared / 1615 * prepared for the coming compression (ZSTDcrp_makeClean), or whether the 1616 * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a 1617 * subsequent operation will overwrite the table space anyways (e.g., copying 1618 * the matchState contents in from a CDict). 1619 */ 1620 typedef enum { 1621 ZSTDcrp_makeClean, 1622 ZSTDcrp_leaveDirty 1623 } ZSTD_compResetPolicy_e; 1624 1625 /* 1626 * Controls, for this matchState reset, whether indexing can continue where it 1627 * left off (ZSTDirp_continue), or whether it needs to be restarted from zero 1628 * (ZSTDirp_reset). 1629 */ 1630 typedef enum { 1631 ZSTDirp_continue, 1632 ZSTDirp_reset 1633 } ZSTD_indexResetPolicy_e; 1634 1635 typedef enum { 1636 ZSTD_resetTarget_CDict, 1637 ZSTD_resetTarget_CCtx 1638 } ZSTD_resetTarget_e; 1639 1640 1641 static size_t 1642 ZSTD_reset_matchState(ZSTD_matchState_t* ms, 1643 ZSTD_cwksp* ws, 1644 const ZSTD_compressionParameters* cParams, 1645 const ZSTD_paramSwitch_e useRowMatchFinder, 1646 const ZSTD_compResetPolicy_e crp, 1647 const ZSTD_indexResetPolicy_e forceResetIndex, 1648 const ZSTD_resetTarget_e forWho) 1649 { 1650 /* disable chain table allocation for fast or row-based strategies */ 1651 size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, 1652 ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict)) 1653 ? ((size_t)1 << cParams->chainLog) 1654 : 0; 1655 size_t const hSize = ((size_t)1) << cParams->hashLog; 1656 U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; 1657 size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; 1658 1659 DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); 1660 assert(useRowMatchFinder != ZSTD_ps_auto); 1661 if (forceResetIndex == ZSTDirp_reset) { 1662 ZSTD_window_init(&ms->window); 1663 ZSTD_cwksp_mark_tables_dirty(ws); 1664 } 1665 1666 ms->hashLog3 = hashLog3; 1667 1668 ZSTD_invalidateMatchState(ms); 1669 1670 assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ 1671 1672 ZSTD_cwksp_clear_tables(ws); 1673 1674 DEBUGLOG(5, "reserving table space"); 1675 /* table Space */ 1676 ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); 1677 ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); 1678 ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); 1679 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, 1680 "failed a workspace allocation in ZSTD_reset_matchState"); 1681 1682 DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); 1683 if (crp!=ZSTDcrp_leaveDirty) { 1684 /* reset tables only */ 1685 ZSTD_cwksp_clean_tables(ws); 1686 } 1687 1688 /* opt parser space */ 1689 if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { 1690 DEBUGLOG(4, "reserving optimal parser space"); 1691 ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned)); 1692 ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); 1693 ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); 1694 ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); 1695 ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); 1696 ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); 1697 } 1698 1699 if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) { 1700 { /* Row match finder needs an additional table of hashes ("tags") */ 1701 size_t const tagTableSize = hSize*sizeof(U16); 1702 ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize); 1703 if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize); 1704 } 1705 { /* Switch to 32-entry rows if searchLog is 5 (or more) */ 1706 U32 const rowLog = BOUNDED(4, cParams->searchLog, 6); 1707 assert(cParams->hashLog >= rowLog); 1708 ms->rowHashLog = cParams->hashLog - rowLog; 1709 } 1710 } 1711 1712 ms->cParams = *cParams; 1713 1714 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, 1715 "failed a workspace allocation in ZSTD_reset_matchState"); 1716 return 0; 1717 } 1718 1719 /* ZSTD_indexTooCloseToMax() : 1720 * minor optimization : prefer memset() rather than reduceIndex() 1721 * which is measurably slow in some circumstances (reported for Visual Studio). 1722 * Works when re-using a context for a lot of smallish inputs : 1723 * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, 1724 * memset() will be triggered before reduceIndex(). 1725 */ 1726 #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) 1727 static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) 1728 { 1729 return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); 1730 } 1731 1732 /* ZSTD_dictTooBig(): 1733 * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in 1734 * one go generically. So we ensure that in that case we reset the tables to zero, 1735 * so that we can load as much of the dictionary as possible. 1736 */ 1737 static int ZSTD_dictTooBig(size_t const loadedDictSize) 1738 { 1739 return loadedDictSize > ZSTD_CHUNKSIZE_MAX; 1740 } 1741 1742 /*! ZSTD_resetCCtx_internal() : 1743 * @param loadedDictSize The size of the dictionary to be loaded 1744 * into the context, if any. If no dictionary is used, or the 1745 * dictionary is being attached / copied, then pass 0. 1746 * note : `params` are assumed fully validated at this stage. 1747 */ 1748 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, 1749 ZSTD_CCtx_params const* params, 1750 U64 const pledgedSrcSize, 1751 size_t const loadedDictSize, 1752 ZSTD_compResetPolicy_e const crp, 1753 ZSTD_buffered_policy_e const zbuff) 1754 { 1755 ZSTD_cwksp* const ws = &zc->workspace; 1756 DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d", 1757 (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter); 1758 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); 1759 1760 zc->isFirstBlock = 1; 1761 1762 /* Set applied params early so we can modify them for LDM, 1763 * and point params at the applied params. 1764 */ 1765 zc->appliedParams = *params; 1766 params = &zc->appliedParams; 1767 1768 assert(params->useRowMatchFinder != ZSTD_ps_auto); 1769 assert(params->useBlockSplitter != ZSTD_ps_auto); 1770 assert(params->ldmParams.enableLdm != ZSTD_ps_auto); 1771 if (params->ldmParams.enableLdm == ZSTD_ps_enable) { 1772 /* Adjust long distance matching parameters */ 1773 ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams); 1774 assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog); 1775 assert(params->ldmParams.hashRateLog < 32); 1776 } 1777 1778 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); 1779 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 1780 U32 const divider = (params->cParams.minMatch==3) ? 3 : 4; 1781 size_t const maxNbSeq = blockSize / divider; 1782 size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) 1783 ? ZSTD_compressBound(blockSize) + 1 1784 : 0; 1785 size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered) 1786 ? windowSize + blockSize 1787 : 0; 1788 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize); 1789 1790 int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window); 1791 int const dictTooBig = ZSTD_dictTooBig(loadedDictSize); 1792 ZSTD_indexResetPolicy_e needsIndexReset = 1793 (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue; 1794 1795 size_t const neededSpace = 1796 ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1797 ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, 1798 buffInSize, buffOutSize, pledgedSrcSize); 1799 int resizeWorkspace; 1800 1801 FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); 1802 1803 if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); 1804 1805 { /* Check if workspace is large enough, alloc a new one if needed */ 1806 int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; 1807 int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); 1808 resizeWorkspace = workspaceTooSmall || workspaceWasteful; 1809 DEBUGLOG(4, "Need %zu B workspace", neededSpace); 1810 DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); 1811 1812 if (resizeWorkspace) { 1813 DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", 1814 ZSTD_cwksp_sizeof(ws) >> 10, 1815 neededSpace >> 10); 1816 1817 RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); 1818 1819 needsIndexReset = ZSTDirp_reset; 1820 1821 ZSTD_cwksp_free(ws, zc->customMem); 1822 FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); 1823 1824 DEBUGLOG(5, "reserving object space"); 1825 /* Statically sized space. 1826 * entropyWorkspace never moves, 1827 * though prev/next block swap places */ 1828 assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); 1829 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); 1830 RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); 1831 zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); 1832 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); 1833 zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); 1834 RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); 1835 } } 1836 1837 ZSTD_cwksp_clear(ws); 1838 1839 /* init params */ 1840 zc->blockState.matchState.cParams = params->cParams; 1841 zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; 1842 zc->consumedSrcSize = 0; 1843 zc->producedCSize = 0; 1844 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) 1845 zc->appliedParams.fParams.contentSizeFlag = 0; 1846 DEBUGLOG(4, "pledged content size : %u ; flag : %u", 1847 (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); 1848 zc->blockSize = blockSize; 1849 1850 xxh64_reset(&zc->xxhState, 0); 1851 zc->stage = ZSTDcs_init; 1852 zc->dictID = 0; 1853 zc->dictContentSize = 0; 1854 1855 ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); 1856 1857 /* ZSTD_wildcopy() is used to copy into the literals buffer, 1858 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. 1859 */ 1860 zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); 1861 zc->seqStore.maxNbLit = blockSize; 1862 1863 /* buffers */ 1864 zc->bufferedPolicy = zbuff; 1865 zc->inBuffSize = buffInSize; 1866 zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); 1867 zc->outBuffSize = buffOutSize; 1868 zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); 1869 1870 /* ldm bucketOffsets table */ 1871 if (params->ldmParams.enableLdm == ZSTD_ps_enable) { 1872 /* TODO: avoid memset? */ 1873 size_t const numBuckets = 1874 ((size_t)1) << (params->ldmParams.hashLog - 1875 params->ldmParams.bucketSizeLog); 1876 zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets); 1877 ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets); 1878 } 1879 1880 /* sequences storage */ 1881 ZSTD_referenceExternalSequences(zc, NULL, 0); 1882 zc->seqStore.maxNbSeq = maxNbSeq; 1883 zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 1884 zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 1885 zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 1886 zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); 1887 1888 FORWARD_IF_ERROR(ZSTD_reset_matchState( 1889 &zc->blockState.matchState, 1890 ws, 1891 ¶ms->cParams, 1892 params->useRowMatchFinder, 1893 crp, 1894 needsIndexReset, 1895 ZSTD_resetTarget_CCtx), ""); 1896 1897 /* ldm hash table */ 1898 if (params->ldmParams.enableLdm == ZSTD_ps_enable) { 1899 /* TODO: avoid memset? */ 1900 size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; 1901 zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); 1902 ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); 1903 zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); 1904 zc->maxNbLdmSequences = maxNbLdmSeq; 1905 1906 ZSTD_window_init(&zc->ldmState.window); 1907 zc->ldmState.loadedDictEnd = 0; 1908 } 1909 1910 DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); 1911 assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace)); 1912 1913 zc->initialized = 1; 1914 1915 return 0; 1916 } 1917 } 1918 1919 /* ZSTD_invalidateRepCodes() : 1920 * ensures next compression will not use repcodes from previous block. 1921 * Note : only works with regular variant; 1922 * do not use with extDict variant ! */ 1923 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { 1924 int i; 1925 for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0; 1926 assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); 1927 } 1928 1929 /* These are the approximate sizes for each strategy past which copying the 1930 * dictionary tables into the working context is faster than using them 1931 * in-place. 1932 */ 1933 static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { 1934 8 KB, /* unused */ 1935 8 KB, /* ZSTD_fast */ 1936 16 KB, /* ZSTD_dfast */ 1937 32 KB, /* ZSTD_greedy */ 1938 32 KB, /* ZSTD_lazy */ 1939 32 KB, /* ZSTD_lazy2 */ 1940 32 KB, /* ZSTD_btlazy2 */ 1941 32 KB, /* ZSTD_btopt */ 1942 8 KB, /* ZSTD_btultra */ 1943 8 KB /* ZSTD_btultra2 */ 1944 }; 1945 1946 static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, 1947 const ZSTD_CCtx_params* params, 1948 U64 pledgedSrcSize) 1949 { 1950 size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; 1951 int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch; 1952 return dedicatedDictSearch 1953 || ( ( pledgedSrcSize <= cutoff 1954 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN 1955 || params->attachDictPref == ZSTD_dictForceAttach ) 1956 && params->attachDictPref != ZSTD_dictForceCopy 1957 && !params->forceWindow ); /* dictMatchState isn't correctly 1958 * handled in _enforceMaxDist */ 1959 } 1960 1961 static size_t 1962 ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, 1963 const ZSTD_CDict* cdict, 1964 ZSTD_CCtx_params params, 1965 U64 pledgedSrcSize, 1966 ZSTD_buffered_policy_e zbuff) 1967 { 1968 DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu", 1969 (unsigned long long)pledgedSrcSize); 1970 { 1971 ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; 1972 unsigned const windowLog = params.cParams.windowLog; 1973 assert(windowLog != 0); 1974 /* Resize working context table params for input only, since the dict 1975 * has its own tables. */ 1976 /* pledgedSrcSize == 0 means 0! */ 1977 1978 if (cdict->matchState.dedicatedDictSearch) { 1979 ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams); 1980 } 1981 1982 params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, 1983 cdict->dictContentSize, ZSTD_cpm_attachDict); 1984 params.cParams.windowLog = windowLog; 1985 params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ 1986 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, 1987 /* loadedDictSize */ 0, 1988 ZSTDcrp_makeClean, zbuff), ""); 1989 assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); 1990 } 1991 1992 { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc 1993 - cdict->matchState.window.base); 1994 const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; 1995 if (cdictLen == 0) { 1996 /* don't even attach dictionaries with no contents */ 1997 DEBUGLOG(4, "skipping attaching empty dictionary"); 1998 } else { 1999 DEBUGLOG(4, "attaching dictionary into context"); 2000 cctx->blockState.matchState.dictMatchState = &cdict->matchState; 2001 2002 /* prep working match state so dict matches never have negative indices 2003 * when they are translated to the working context's index space. */ 2004 if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { 2005 cctx->blockState.matchState.window.nextSrc = 2006 cctx->blockState.matchState.window.base + cdictEnd; 2007 ZSTD_window_clear(&cctx->blockState.matchState.window); 2008 } 2009 /* loadedDictEnd is expressed within the referential of the active context */ 2010 cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; 2011 } } 2012 2013 cctx->dictID = cdict->dictID; 2014 cctx->dictContentSize = cdict->dictContentSize; 2015 2016 /* copy block state */ 2017 ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); 2018 2019 return 0; 2020 } 2021 2022 static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, 2023 const ZSTD_CDict* cdict, 2024 ZSTD_CCtx_params params, 2025 U64 pledgedSrcSize, 2026 ZSTD_buffered_policy_e zbuff) 2027 { 2028 const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; 2029 2030 assert(!cdict->matchState.dedicatedDictSearch); 2031 DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu", 2032 (unsigned long long)pledgedSrcSize); 2033 2034 { unsigned const windowLog = params.cParams.windowLog; 2035 assert(windowLog != 0); 2036 /* Copy only compression parameters related to tables. */ 2037 params.cParams = *cdict_cParams; 2038 params.cParams.windowLog = windowLog; 2039 params.useRowMatchFinder = cdict->useRowMatchFinder; 2040 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, 2041 /* loadedDictSize */ 0, 2042 ZSTDcrp_leaveDirty, zbuff), ""); 2043 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); 2044 assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); 2045 assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); 2046 } 2047 2048 ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); 2049 assert(params.useRowMatchFinder != ZSTD_ps_auto); 2050 2051 /* copy tables */ 2052 { size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */) 2053 ? ((size_t)1 << cdict_cParams->chainLog) 2054 : 0; 2055 size_t const hSize = (size_t)1 << cdict_cParams->hashLog; 2056 2057 ZSTD_memcpy(cctx->blockState.matchState.hashTable, 2058 cdict->matchState.hashTable, 2059 hSize * sizeof(U32)); 2060 /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */ 2061 if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) { 2062 ZSTD_memcpy(cctx->blockState.matchState.chainTable, 2063 cdict->matchState.chainTable, 2064 chainSize * sizeof(U32)); 2065 } 2066 /* copy tag table */ 2067 if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) { 2068 size_t const tagTableSize = hSize*sizeof(U16); 2069 ZSTD_memcpy(cctx->blockState.matchState.tagTable, 2070 cdict->matchState.tagTable, 2071 tagTableSize); 2072 } 2073 } 2074 2075 /* Zero the hashTable3, since the cdict never fills it */ 2076 { int const h3log = cctx->blockState.matchState.hashLog3; 2077 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; 2078 assert(cdict->matchState.hashLog3 == 0); 2079 ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); 2080 } 2081 2082 ZSTD_cwksp_mark_tables_clean(&cctx->workspace); 2083 2084 /* copy dictionary offsets */ 2085 { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; 2086 ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; 2087 dstMatchState->window = srcMatchState->window; 2088 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 2089 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; 2090 } 2091 2092 cctx->dictID = cdict->dictID; 2093 cctx->dictContentSize = cdict->dictContentSize; 2094 2095 /* copy block state */ 2096 ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); 2097 2098 return 0; 2099 } 2100 2101 /* We have a choice between copying the dictionary context into the working 2102 * context, or referencing the dictionary context from the working context 2103 * in-place. We decide here which strategy to use. */ 2104 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, 2105 const ZSTD_CDict* cdict, 2106 const ZSTD_CCtx_params* params, 2107 U64 pledgedSrcSize, 2108 ZSTD_buffered_policy_e zbuff) 2109 { 2110 2111 DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", 2112 (unsigned)pledgedSrcSize); 2113 2114 if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { 2115 return ZSTD_resetCCtx_byAttachingCDict( 2116 cctx, cdict, *params, pledgedSrcSize, zbuff); 2117 } else { 2118 return ZSTD_resetCCtx_byCopyingCDict( 2119 cctx, cdict, *params, pledgedSrcSize, zbuff); 2120 } 2121 } 2122 2123 /*! ZSTD_copyCCtx_internal() : 2124 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. 2125 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). 2126 * The "context", in this case, refers to the hash and chain tables, 2127 * entropy tables, and dictionary references. 2128 * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. 2129 * @return : 0, or an error code */ 2130 static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, 2131 const ZSTD_CCtx* srcCCtx, 2132 ZSTD_frameParameters fParams, 2133 U64 pledgedSrcSize, 2134 ZSTD_buffered_policy_e zbuff) 2135 { 2136 RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, 2137 "Can't copy a ctx that's not in init stage."); 2138 DEBUGLOG(5, "ZSTD_copyCCtx_internal"); 2139 ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); 2140 { ZSTD_CCtx_params params = dstCCtx->requestedParams; 2141 /* Copy only compression parameters related to tables. */ 2142 params.cParams = srcCCtx->appliedParams.cParams; 2143 assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto); 2144 assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto); 2145 assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto); 2146 params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder; 2147 params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter; 2148 params.ldmParams = srcCCtx->appliedParams.ldmParams; 2149 params.fParams = fParams; 2150 ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize, 2151 /* loadedDictSize */ 0, 2152 ZSTDcrp_leaveDirty, zbuff); 2153 assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); 2154 assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); 2155 assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); 2156 assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); 2157 assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); 2158 } 2159 2160 ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); 2161 2162 /* copy tables */ 2163 { size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy, 2164 srcCCtx->appliedParams.useRowMatchFinder, 2165 0 /* forDDSDict */) 2166 ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog) 2167 : 0; 2168 size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; 2169 int const h3log = srcCCtx->blockState.matchState.hashLog3; 2170 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; 2171 2172 ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable, 2173 srcCCtx->blockState.matchState.hashTable, 2174 hSize * sizeof(U32)); 2175 ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable, 2176 srcCCtx->blockState.matchState.chainTable, 2177 chainSize * sizeof(U32)); 2178 ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3, 2179 srcCCtx->blockState.matchState.hashTable3, 2180 h3Size * sizeof(U32)); 2181 } 2182 2183 ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); 2184 2185 /* copy dictionary offsets */ 2186 { 2187 const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; 2188 ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; 2189 dstMatchState->window = srcMatchState->window; 2190 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 2191 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; 2192 } 2193 dstCCtx->dictID = srcCCtx->dictID; 2194 dstCCtx->dictContentSize = srcCCtx->dictContentSize; 2195 2196 /* copy block state */ 2197 ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); 2198 2199 return 0; 2200 } 2201 2202 /*! ZSTD_copyCCtx() : 2203 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. 2204 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). 2205 * pledgedSrcSize==0 means "unknown". 2206 * @return : 0, or an error code */ 2207 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) 2208 { 2209 ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 2210 ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy; 2211 ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); 2212 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; 2213 fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); 2214 2215 return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, 2216 fParams, pledgedSrcSize, 2217 zbuff); 2218 } 2219 2220 2221 #define ZSTD_ROWSIZE 16 2222 /*! ZSTD_reduceTable() : 2223 * reduce table indexes by `reducerValue`, or squash to zero. 2224 * PreserveMark preserves "unsorted mark" for btlazy2 strategy. 2225 * It must be set to a clear 0/1 value, to remove branch during inlining. 2226 * Presume table size is a multiple of ZSTD_ROWSIZE 2227 * to help auto-vectorization */ 2228 FORCE_INLINE_TEMPLATE void 2229 ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) 2230 { 2231 int const nbRows = (int)size / ZSTD_ROWSIZE; 2232 int cellNb = 0; 2233 int rowNb; 2234 /* Protect special index values < ZSTD_WINDOW_START_INDEX. */ 2235 U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX; 2236 assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ 2237 assert(size < (1U<<31)); /* can be casted to int */ 2238 2239 2240 for (rowNb=0 ; rowNb < nbRows ; rowNb++) { 2241 int column; 2242 for (column=0; column<ZSTD_ROWSIZE; column++) { 2243 U32 newVal; 2244 if (preserveMark && table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) { 2245 /* This write is pointless, but is required(?) for the compiler 2246 * to auto-vectorize the loop. */ 2247 newVal = ZSTD_DUBT_UNSORTED_MARK; 2248 } else if (table[cellNb] < reducerThreshold) { 2249 newVal = 0; 2250 } else { 2251 newVal = table[cellNb] - reducerValue; 2252 } 2253 table[cellNb] = newVal; 2254 cellNb++; 2255 } } 2256 } 2257 2258 static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue) 2259 { 2260 ZSTD_reduceTable_internal(table, size, reducerValue, 0); 2261 } 2262 2263 static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue) 2264 { 2265 ZSTD_reduceTable_internal(table, size, reducerValue, 1); 2266 } 2267 2268 /*! ZSTD_reduceIndex() : 2269 * rescale all indexes to avoid future overflow (indexes are U32) */ 2270 static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) 2271 { 2272 { U32 const hSize = (U32)1 << params->cParams.hashLog; 2273 ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); 2274 } 2275 2276 if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) { 2277 U32 const chainSize = (U32)1 << params->cParams.chainLog; 2278 if (params->cParams.strategy == ZSTD_btlazy2) 2279 ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); 2280 else 2281 ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); 2282 } 2283 2284 if (ms->hashLog3) { 2285 U32 const h3Size = (U32)1 << ms->hashLog3; 2286 ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); 2287 } 2288 } 2289 2290 2291 /*-******************************************************* 2292 * Block entropic compression 2293 *********************************************************/ 2294 2295 /* See doc/zstd_compression_format.md for detailed format description */ 2296 2297 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) 2298 { 2299 const seqDef* const sequences = seqStorePtr->sequencesStart; 2300 BYTE* const llCodeTable = seqStorePtr->llCode; 2301 BYTE* const ofCodeTable = seqStorePtr->ofCode; 2302 BYTE* const mlCodeTable = seqStorePtr->mlCode; 2303 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 2304 U32 u; 2305 assert(nbSeq <= seqStorePtr->maxNbSeq); 2306 for (u=0; u<nbSeq; u++) { 2307 U32 const llv = sequences[u].litLength; 2308 U32 const mlv = sequences[u].mlBase; 2309 llCodeTable[u] = (BYTE)ZSTD_LLcode(llv); 2310 ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offBase); 2311 mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv); 2312 } 2313 if (seqStorePtr->longLengthType==ZSTD_llt_literalLength) 2314 llCodeTable[seqStorePtr->longLengthPos] = MaxLL; 2315 if (seqStorePtr->longLengthType==ZSTD_llt_matchLength) 2316 mlCodeTable[seqStorePtr->longLengthPos] = MaxML; 2317 } 2318 2319 /* ZSTD_useTargetCBlockSize(): 2320 * Returns if target compressed block size param is being used. 2321 * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. 2322 * Returns 1 if true, 0 otherwise. */ 2323 static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) 2324 { 2325 DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); 2326 return (cctxParams->targetCBlockSize != 0); 2327 } 2328 2329 /* ZSTD_blockSplitterEnabled(): 2330 * Returns if block splitting param is being used 2331 * If used, compression will do best effort to split a block in order to improve compression ratio. 2332 * At the time this function is called, the parameter must be finalized. 2333 * Returns 1 if true, 0 otherwise. */ 2334 static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams) 2335 { 2336 DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter); 2337 assert(cctxParams->useBlockSplitter != ZSTD_ps_auto); 2338 return (cctxParams->useBlockSplitter == ZSTD_ps_enable); 2339 } 2340 2341 /* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types 2342 * and size of the sequences statistics 2343 */ 2344 typedef struct { 2345 U32 LLtype; 2346 U32 Offtype; 2347 U32 MLtype; 2348 size_t size; 2349 size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ 2350 } ZSTD_symbolEncodingTypeStats_t; 2351 2352 /* ZSTD_buildSequencesStatistics(): 2353 * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field. 2354 * Modifies `nextEntropy` to have the appropriate values as a side effect. 2355 * nbSeq must be greater than 0. 2356 * 2357 * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) 2358 */ 2359 static ZSTD_symbolEncodingTypeStats_t 2360 ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, 2361 const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, 2362 BYTE* dst, const BYTE* const dstEnd, 2363 ZSTD_strategy strategy, unsigned* countWorkspace, 2364 void* entropyWorkspace, size_t entropyWkspSize) { 2365 BYTE* const ostart = dst; 2366 const BYTE* const oend = dstEnd; 2367 BYTE* op = ostart; 2368 FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; 2369 FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; 2370 FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; 2371 const BYTE* const ofCodeTable = seqStorePtr->ofCode; 2372 const BYTE* const llCodeTable = seqStorePtr->llCode; 2373 const BYTE* const mlCodeTable = seqStorePtr->mlCode; 2374 ZSTD_symbolEncodingTypeStats_t stats; 2375 2376 stats.lastCountSize = 0; 2377 /* convert length/distances into codes */ 2378 ZSTD_seqToCodes(seqStorePtr); 2379 assert(op <= oend); 2380 assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */ 2381 /* build CTable for Literal Lengths */ 2382 { unsigned max = MaxLL; 2383 size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ 2384 DEBUGLOG(5, "Building LL table"); 2385 nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; 2386 stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, 2387 countWorkspace, max, mostFrequent, nbSeq, 2388 LLFSELog, prevEntropy->litlengthCTable, 2389 LL_defaultNorm, LL_defaultNormLog, 2390 ZSTD_defaultAllowed, strategy); 2391 assert(set_basic < set_compressed && set_rle < set_compressed); 2392 assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2393 { size_t const countSize = ZSTD_buildCTable( 2394 op, (size_t)(oend - op), 2395 CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype, 2396 countWorkspace, max, llCodeTable, nbSeq, 2397 LL_defaultNorm, LL_defaultNormLog, MaxLL, 2398 prevEntropy->litlengthCTable, 2399 sizeof(prevEntropy->litlengthCTable), 2400 entropyWorkspace, entropyWkspSize); 2401 if (ZSTD_isError(countSize)) { 2402 DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed"); 2403 stats.size = countSize; 2404 return stats; 2405 } 2406 if (stats.LLtype == set_compressed) 2407 stats.lastCountSize = countSize; 2408 op += countSize; 2409 assert(op <= oend); 2410 } } 2411 /* build CTable for Offsets */ 2412 { unsigned max = MaxOff; 2413 size_t const mostFrequent = HIST_countFast_wksp( 2414 countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ 2415 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ 2416 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; 2417 DEBUGLOG(5, "Building OF table"); 2418 nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; 2419 stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, 2420 countWorkspace, max, mostFrequent, nbSeq, 2421 OffFSELog, prevEntropy->offcodeCTable, 2422 OF_defaultNorm, OF_defaultNormLog, 2423 defaultPolicy, strategy); 2424 assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2425 { size_t const countSize = ZSTD_buildCTable( 2426 op, (size_t)(oend - op), 2427 CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype, 2428 countWorkspace, max, ofCodeTable, nbSeq, 2429 OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, 2430 prevEntropy->offcodeCTable, 2431 sizeof(prevEntropy->offcodeCTable), 2432 entropyWorkspace, entropyWkspSize); 2433 if (ZSTD_isError(countSize)) { 2434 DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed"); 2435 stats.size = countSize; 2436 return stats; 2437 } 2438 if (stats.Offtype == set_compressed) 2439 stats.lastCountSize = countSize; 2440 op += countSize; 2441 assert(op <= oend); 2442 } } 2443 /* build CTable for MatchLengths */ 2444 { unsigned max = MaxML; 2445 size_t const mostFrequent = HIST_countFast_wksp( 2446 countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ 2447 DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); 2448 nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; 2449 stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, 2450 countWorkspace, max, mostFrequent, nbSeq, 2451 MLFSELog, prevEntropy->matchlengthCTable, 2452 ML_defaultNorm, ML_defaultNormLog, 2453 ZSTD_defaultAllowed, strategy); 2454 assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2455 { size_t const countSize = ZSTD_buildCTable( 2456 op, (size_t)(oend - op), 2457 CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype, 2458 countWorkspace, max, mlCodeTable, nbSeq, 2459 ML_defaultNorm, ML_defaultNormLog, MaxML, 2460 prevEntropy->matchlengthCTable, 2461 sizeof(prevEntropy->matchlengthCTable), 2462 entropyWorkspace, entropyWkspSize); 2463 if (ZSTD_isError(countSize)) { 2464 DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed"); 2465 stats.size = countSize; 2466 return stats; 2467 } 2468 if (stats.MLtype == set_compressed) 2469 stats.lastCountSize = countSize; 2470 op += countSize; 2471 assert(op <= oend); 2472 } } 2473 stats.size = (size_t)(op-ostart); 2474 return stats; 2475 } 2476 2477 /* ZSTD_entropyCompressSeqStore_internal(): 2478 * compresses both literals and sequences 2479 * Returns compressed size of block, or a zstd error. 2480 */ 2481 #define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20 2482 MEM_STATIC size_t 2483 ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, 2484 const ZSTD_entropyCTables_t* prevEntropy, 2485 ZSTD_entropyCTables_t* nextEntropy, 2486 const ZSTD_CCtx_params* cctxParams, 2487 void* dst, size_t dstCapacity, 2488 void* entropyWorkspace, size_t entropyWkspSize, 2489 const int bmi2) 2490 { 2491 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; 2492 ZSTD_strategy const strategy = cctxParams->cParams.strategy; 2493 unsigned* count = (unsigned*)entropyWorkspace; 2494 FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; 2495 FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; 2496 FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; 2497 const seqDef* const sequences = seqStorePtr->sequencesStart; 2498 const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; 2499 const BYTE* const ofCodeTable = seqStorePtr->ofCode; 2500 const BYTE* const llCodeTable = seqStorePtr->llCode; 2501 const BYTE* const mlCodeTable = seqStorePtr->mlCode; 2502 BYTE* const ostart = (BYTE*)dst; 2503 BYTE* const oend = ostart + dstCapacity; 2504 BYTE* op = ostart; 2505 size_t lastCountSize; 2506 2507 entropyWorkspace = count + (MaxSeq + 1); 2508 entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); 2509 2510 DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq); 2511 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); 2512 assert(entropyWkspSize >= HUF_WORKSPACE_SIZE); 2513 2514 /* Compress literals */ 2515 { const BYTE* const literals = seqStorePtr->litStart; 2516 size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart; 2517 size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart; 2518 /* Base suspicion of uncompressibility on ratio of literals to sequences */ 2519 unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO); 2520 size_t const litSize = (size_t)(seqStorePtr->lit - literals); 2521 size_t const cSize = ZSTD_compressLiterals( 2522 &prevEntropy->huf, &nextEntropy->huf, 2523 cctxParams->cParams.strategy, 2524 ZSTD_literalsCompressionIsDisabled(cctxParams), 2525 op, dstCapacity, 2526 literals, litSize, 2527 entropyWorkspace, entropyWkspSize, 2528 bmi2, suspectUncompressible); 2529 FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); 2530 assert(cSize <= dstCapacity); 2531 op += cSize; 2532 } 2533 2534 /* Sequences Header */ 2535 RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, 2536 dstSize_tooSmall, "Can't fit seq hdr in output buf!"); 2537 if (nbSeq < 128) { 2538 *op++ = (BYTE)nbSeq; 2539 } else if (nbSeq < LONGNBSEQ) { 2540 op[0] = (BYTE)((nbSeq>>8) + 0x80); 2541 op[1] = (BYTE)nbSeq; 2542 op+=2; 2543 } else { 2544 op[0]=0xFF; 2545 MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); 2546 op+=3; 2547 } 2548 assert(op <= oend); 2549 if (nbSeq==0) { 2550 /* Copy the old tables over as if we repeated them */ 2551 ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); 2552 return (size_t)(op - ostart); 2553 } 2554 { 2555 ZSTD_symbolEncodingTypeStats_t stats; 2556 BYTE* seqHead = op++; 2557 /* build stats for sequences */ 2558 stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, 2559 &prevEntropy->fse, &nextEntropy->fse, 2560 op, oend, 2561 strategy, count, 2562 entropyWorkspace, entropyWkspSize); 2563 FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); 2564 *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2)); 2565 lastCountSize = stats.lastCountSize; 2566 op += stats.size; 2567 } 2568 2569 { size_t const bitstreamSize = ZSTD_encodeSequences( 2570 op, (size_t)(oend - op), 2571 CTable_MatchLength, mlCodeTable, 2572 CTable_OffsetBits, ofCodeTable, 2573 CTable_LitLength, llCodeTable, 2574 sequences, nbSeq, 2575 longOffsets, bmi2); 2576 FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); 2577 op += bitstreamSize; 2578 assert(op <= oend); 2579 /* zstd versions <= 1.3.4 mistakenly report corruption when 2580 * FSE_readNCount() receives a buffer < 4 bytes. 2581 * Fixed by https://github.com/facebook/zstd/pull/1146. 2582 * This can happen when the last set_compressed table present is 2 2583 * bytes and the bitstream is only one byte. 2584 * In this exceedingly rare case, we will simply emit an uncompressed 2585 * block, since it isn't worth optimizing. 2586 */ 2587 if (lastCountSize && (lastCountSize + bitstreamSize) < 4) { 2588 /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ 2589 assert(lastCountSize + bitstreamSize == 3); 2590 DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " 2591 "emitting an uncompressed block."); 2592 return 0; 2593 } 2594 } 2595 2596 DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); 2597 return (size_t)(op - ostart); 2598 } 2599 2600 MEM_STATIC size_t 2601 ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, 2602 const ZSTD_entropyCTables_t* prevEntropy, 2603 ZSTD_entropyCTables_t* nextEntropy, 2604 const ZSTD_CCtx_params* cctxParams, 2605 void* dst, size_t dstCapacity, 2606 size_t srcSize, 2607 void* entropyWorkspace, size_t entropyWkspSize, 2608 int bmi2) 2609 { 2610 size_t const cSize = ZSTD_entropyCompressSeqStore_internal( 2611 seqStorePtr, prevEntropy, nextEntropy, cctxParams, 2612 dst, dstCapacity, 2613 entropyWorkspace, entropyWkspSize, bmi2); 2614 if (cSize == 0) return 0; 2615 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. 2616 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. 2617 */ 2618 if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) 2619 return 0; /* block not compressed */ 2620 FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed"); 2621 2622 /* Check compressibility */ 2623 { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); 2624 if (cSize >= maxCSize) return 0; /* block not compressed */ 2625 } 2626 DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); 2627 return cSize; 2628 } 2629 2630 /* ZSTD_selectBlockCompressor() : 2631 * Not static, but internal use only (used by long distance matcher) 2632 * assumption : strat is a valid strategy */ 2633 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode) 2634 { 2635 static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { 2636 { ZSTD_compressBlock_fast /* default for 0 */, 2637 ZSTD_compressBlock_fast, 2638 ZSTD_compressBlock_doubleFast, 2639 ZSTD_compressBlock_greedy, 2640 ZSTD_compressBlock_lazy, 2641 ZSTD_compressBlock_lazy2, 2642 ZSTD_compressBlock_btlazy2, 2643 ZSTD_compressBlock_btopt, 2644 ZSTD_compressBlock_btultra, 2645 ZSTD_compressBlock_btultra2 }, 2646 { ZSTD_compressBlock_fast_extDict /* default for 0 */, 2647 ZSTD_compressBlock_fast_extDict, 2648 ZSTD_compressBlock_doubleFast_extDict, 2649 ZSTD_compressBlock_greedy_extDict, 2650 ZSTD_compressBlock_lazy_extDict, 2651 ZSTD_compressBlock_lazy2_extDict, 2652 ZSTD_compressBlock_btlazy2_extDict, 2653 ZSTD_compressBlock_btopt_extDict, 2654 ZSTD_compressBlock_btultra_extDict, 2655 ZSTD_compressBlock_btultra_extDict }, 2656 { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, 2657 ZSTD_compressBlock_fast_dictMatchState, 2658 ZSTD_compressBlock_doubleFast_dictMatchState, 2659 ZSTD_compressBlock_greedy_dictMatchState, 2660 ZSTD_compressBlock_lazy_dictMatchState, 2661 ZSTD_compressBlock_lazy2_dictMatchState, 2662 ZSTD_compressBlock_btlazy2_dictMatchState, 2663 ZSTD_compressBlock_btopt_dictMatchState, 2664 ZSTD_compressBlock_btultra_dictMatchState, 2665 ZSTD_compressBlock_btultra_dictMatchState }, 2666 { NULL /* default for 0 */, 2667 NULL, 2668 NULL, 2669 ZSTD_compressBlock_greedy_dedicatedDictSearch, 2670 ZSTD_compressBlock_lazy_dedicatedDictSearch, 2671 ZSTD_compressBlock_lazy2_dedicatedDictSearch, 2672 NULL, 2673 NULL, 2674 NULL, 2675 NULL } 2676 }; 2677 ZSTD_blockCompressor selectedCompressor; 2678 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); 2679 2680 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); 2681 DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); 2682 if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { 2683 static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { 2684 { ZSTD_compressBlock_greedy_row, 2685 ZSTD_compressBlock_lazy_row, 2686 ZSTD_compressBlock_lazy2_row }, 2687 { ZSTD_compressBlock_greedy_extDict_row, 2688 ZSTD_compressBlock_lazy_extDict_row, 2689 ZSTD_compressBlock_lazy2_extDict_row }, 2690 { ZSTD_compressBlock_greedy_dictMatchState_row, 2691 ZSTD_compressBlock_lazy_dictMatchState_row, 2692 ZSTD_compressBlock_lazy2_dictMatchState_row }, 2693 { ZSTD_compressBlock_greedy_dedicatedDictSearch_row, 2694 ZSTD_compressBlock_lazy_dedicatedDictSearch_row, 2695 ZSTD_compressBlock_lazy2_dedicatedDictSearch_row } 2696 }; 2697 DEBUGLOG(4, "Selecting a row-based matchfinder"); 2698 assert(useRowMatchFinder != ZSTD_ps_auto); 2699 selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy]; 2700 } else { 2701 selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; 2702 } 2703 assert(selectedCompressor != NULL); 2704 return selectedCompressor; 2705 } 2706 2707 static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, 2708 const BYTE* anchor, size_t lastLLSize) 2709 { 2710 ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize); 2711 seqStorePtr->lit += lastLLSize; 2712 } 2713 2714 void ZSTD_resetSeqStore(seqStore_t* ssPtr) 2715 { 2716 ssPtr->lit = ssPtr->litStart; 2717 ssPtr->sequences = ssPtr->sequencesStart; 2718 ssPtr->longLengthType = ZSTD_llt_none; 2719 } 2720 2721 typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; 2722 2723 static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) 2724 { 2725 ZSTD_matchState_t* const ms = &zc->blockState.matchState; 2726 DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); 2727 assert(srcSize <= ZSTD_BLOCKSIZE_MAX); 2728 /* Assert that we have correctly flushed the ctx params into the ms's copy */ 2729 ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); 2730 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { 2731 if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { 2732 ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); 2733 } else { 2734 ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); 2735 } 2736 return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ 2737 } 2738 ZSTD_resetSeqStore(&(zc->seqStore)); 2739 /* required for optimal parser to read stats from dictionary */ 2740 ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; 2741 /* tell the optimal parser how we expect to compress literals */ 2742 ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; 2743 /* a gap between an attached dict and the current window is not safe, 2744 * they must remain adjacent, 2745 * and when that stops being the case, the dict must be unset */ 2746 assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); 2747 2748 /* limited update after a very long match */ 2749 { const BYTE* const base = ms->window.base; 2750 const BYTE* const istart = (const BYTE*)src; 2751 const U32 curr = (U32)(istart-base); 2752 if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ 2753 if (curr > ms->nextToUpdate + 384) 2754 ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384)); 2755 } 2756 2757 /* select and store sequences */ 2758 { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); 2759 size_t lastLLSize; 2760 { int i; 2761 for (i = 0; i < ZSTD_REP_NUM; ++i) 2762 zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; 2763 } 2764 if (zc->externSeqStore.pos < zc->externSeqStore.size) { 2765 assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); 2766 /* Updates ldmSeqStore.pos */ 2767 lastLLSize = 2768 ZSTD_ldm_blockCompress(&zc->externSeqStore, 2769 ms, &zc->seqStore, 2770 zc->blockState.nextCBlock->rep, 2771 zc->appliedParams.useRowMatchFinder, 2772 src, srcSize); 2773 assert(zc->externSeqStore.pos <= zc->externSeqStore.size); 2774 } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { 2775 rawSeqStore_t ldmSeqStore = kNullRawSeqStore; 2776 2777 ldmSeqStore.seq = zc->ldmSequences; 2778 ldmSeqStore.capacity = zc->maxNbLdmSequences; 2779 /* Updates ldmSeqStore.size */ 2780 FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, 2781 &zc->appliedParams.ldmParams, 2782 src, srcSize), ""); 2783 /* Updates ldmSeqStore.pos */ 2784 lastLLSize = 2785 ZSTD_ldm_blockCompress(&ldmSeqStore, 2786 ms, &zc->seqStore, 2787 zc->blockState.nextCBlock->rep, 2788 zc->appliedParams.useRowMatchFinder, 2789 src, srcSize); 2790 assert(ldmSeqStore.pos == ldmSeqStore.size); 2791 } else { /* not long range mode */ 2792 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, 2793 zc->appliedParams.useRowMatchFinder, 2794 dictMode); 2795 ms->ldmSeqStore = NULL; 2796 lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); 2797 } 2798 { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; 2799 ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); 2800 } } 2801 return ZSTDbss_compress; 2802 } 2803 2804 static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) 2805 { 2806 const seqStore_t* seqStore = ZSTD_getSeqStore(zc); 2807 const seqDef* seqStoreSeqs = seqStore->sequencesStart; 2808 size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; 2809 size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); 2810 size_t literalsRead = 0; 2811 size_t lastLLSize; 2812 2813 ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; 2814 size_t i; 2815 repcodes_t updatedRepcodes; 2816 2817 assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); 2818 /* Ensure we have enough space for last literals "sequence" */ 2819 assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); 2820 ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); 2821 for (i = 0; i < seqStoreSeqSize; ++i) { 2822 U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM; 2823 outSeqs[i].litLength = seqStoreSeqs[i].litLength; 2824 outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH; 2825 outSeqs[i].rep = 0; 2826 2827 if (i == seqStore->longLengthPos) { 2828 if (seqStore->longLengthType == ZSTD_llt_literalLength) { 2829 outSeqs[i].litLength += 0x10000; 2830 } else if (seqStore->longLengthType == ZSTD_llt_matchLength) { 2831 outSeqs[i].matchLength += 0x10000; 2832 } 2833 } 2834 2835 if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) { 2836 /* Derive the correct offset corresponding to a repcode */ 2837 outSeqs[i].rep = seqStoreSeqs[i].offBase; 2838 if (outSeqs[i].litLength != 0) { 2839 rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; 2840 } else { 2841 if (outSeqs[i].rep == 3) { 2842 rawOffset = updatedRepcodes.rep[0] - 1; 2843 } else { 2844 rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; 2845 } 2846 } 2847 } 2848 outSeqs[i].offset = rawOffset; 2849 /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode 2850 so we provide seqStoreSeqs[i].offset - 1 */ 2851 ZSTD_updateRep(updatedRepcodes.rep, 2852 seqStoreSeqs[i].offBase - 1, 2853 seqStoreSeqs[i].litLength == 0); 2854 literalsRead += outSeqs[i].litLength; 2855 } 2856 /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. 2857 * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker 2858 * for the block boundary, according to the API. 2859 */ 2860 assert(seqStoreLiteralsSize >= literalsRead); 2861 lastLLSize = seqStoreLiteralsSize - literalsRead; 2862 outSeqs[i].litLength = (U32)lastLLSize; 2863 outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; 2864 seqStoreSeqSize++; 2865 zc->seqCollector.seqIndex += seqStoreSeqSize; 2866 } 2867 2868 size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, 2869 size_t outSeqsSize, const void* src, size_t srcSize) 2870 { 2871 const size_t dstCapacity = ZSTD_compressBound(srcSize); 2872 void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); 2873 SeqCollector seqCollector; 2874 2875 RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); 2876 2877 seqCollector.collectSequences = 1; 2878 seqCollector.seqStart = outSeqs; 2879 seqCollector.seqIndex = 0; 2880 seqCollector.maxSequences = outSeqsSize; 2881 zc->seqCollector = seqCollector; 2882 2883 ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); 2884 ZSTD_customFree(dst, ZSTD_defaultCMem); 2885 return zc->seqCollector.seqIndex; 2886 } 2887 2888 size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) { 2889 size_t in = 0; 2890 size_t out = 0; 2891 for (; in < seqsSize; ++in) { 2892 if (sequences[in].offset == 0 && sequences[in].matchLength == 0) { 2893 if (in != seqsSize - 1) { 2894 sequences[in+1].litLength += sequences[in].litLength; 2895 } 2896 } else { 2897 sequences[out] = sequences[in]; 2898 ++out; 2899 } 2900 } 2901 return out; 2902 } 2903 2904 /* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */ 2905 static int ZSTD_isRLE(const BYTE* src, size_t length) { 2906 const BYTE* ip = src; 2907 const BYTE value = ip[0]; 2908 const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL); 2909 const size_t unrollSize = sizeof(size_t) * 4; 2910 const size_t unrollMask = unrollSize - 1; 2911 const size_t prefixLength = length & unrollMask; 2912 size_t i; 2913 size_t u; 2914 if (length == 1) return 1; 2915 /* Check if prefix is RLE first before using unrolled loop */ 2916 if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { 2917 return 0; 2918 } 2919 for (i = prefixLength; i != length; i += unrollSize) { 2920 for (u = 0; u < unrollSize; u += sizeof(size_t)) { 2921 if (MEM_readST(ip + i + u) != valueST) { 2922 return 0; 2923 } 2924 } 2925 } 2926 return 1; 2927 } 2928 2929 /* Returns true if the given block may be RLE. 2930 * This is just a heuristic based on the compressibility. 2931 * It may return both false positives and false negatives. 2932 */ 2933 static int ZSTD_maybeRLE(seqStore_t const* seqStore) 2934 { 2935 size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); 2936 size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); 2937 2938 return nbSeqs < 4 && nbLits < 10; 2939 } 2940 2941 static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) 2942 { 2943 ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock; 2944 bs->prevCBlock = bs->nextCBlock; 2945 bs->nextCBlock = tmp; 2946 } 2947 2948 /* Writes the block header */ 2949 static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { 2950 U32 const cBlockHeader = cSize == 1 ? 2951 lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : 2952 lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); 2953 MEM_writeLE24(op, cBlockHeader); 2954 DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); 2955 } 2956 2957 /* ZSTD_buildBlockEntropyStats_literals() : 2958 * Builds entropy for the literals. 2959 * Stores literals block type (raw, rle, compressed, repeat) and 2960 * huffman description table to hufMetadata. 2961 * Requires ENTROPY_WORKSPACE_SIZE workspace 2962 * @return : size of huffman description table or error code */ 2963 static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, 2964 const ZSTD_hufCTables_t* prevHuf, 2965 ZSTD_hufCTables_t* nextHuf, 2966 ZSTD_hufCTablesMetadata_t* hufMetadata, 2967 const int literalsCompressionIsDisabled, 2968 void* workspace, size_t wkspSize) 2969 { 2970 BYTE* const wkspStart = (BYTE*)workspace; 2971 BYTE* const wkspEnd = wkspStart + wkspSize; 2972 BYTE* const countWkspStart = wkspStart; 2973 unsigned* const countWksp = (unsigned*)workspace; 2974 const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); 2975 BYTE* const nodeWksp = countWkspStart + countWkspSize; 2976 const size_t nodeWkspSize = wkspEnd-nodeWksp; 2977 unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; 2978 unsigned huffLog = HUF_TABLELOG_DEFAULT; 2979 HUF_repeat repeat = prevHuf->repeatMode; 2980 DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize); 2981 2982 /* Prepare nextEntropy assuming reusing the existing table */ 2983 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 2984 2985 if (literalsCompressionIsDisabled) { 2986 DEBUGLOG(5, "set_basic - disabled"); 2987 hufMetadata->hType = set_basic; 2988 return 0; 2989 } 2990 2991 /* small ? don't even attempt compression (speed opt) */ 2992 #ifndef COMPRESS_LITERALS_SIZE_MIN 2993 #define COMPRESS_LITERALS_SIZE_MIN 63 2994 #endif 2995 { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; 2996 if (srcSize <= minLitSize) { 2997 DEBUGLOG(5, "set_basic - too small"); 2998 hufMetadata->hType = set_basic; 2999 return 0; 3000 } 3001 } 3002 3003 /* Scan input and build symbol stats */ 3004 { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); 3005 FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); 3006 if (largest == srcSize) { 3007 DEBUGLOG(5, "set_rle"); 3008 hufMetadata->hType = set_rle; 3009 return 0; 3010 } 3011 if (largest <= (srcSize >> 7)+4) { 3012 DEBUGLOG(5, "set_basic - no gain"); 3013 hufMetadata->hType = set_basic; 3014 return 0; 3015 } 3016 } 3017 3018 /* Validate the previous Huffman table */ 3019 if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { 3020 repeat = HUF_repeat_none; 3021 } 3022 3023 /* Build Huffman Tree */ 3024 ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); 3025 huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); 3026 { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, 3027 maxSymbolValue, huffLog, 3028 nodeWksp, nodeWkspSize); 3029 FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); 3030 huffLog = (U32)maxBits; 3031 { /* Build and write the CTable */ 3032 size_t const newCSize = HUF_estimateCompressedSize( 3033 (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); 3034 size_t const hSize = HUF_writeCTable_wksp( 3035 hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), 3036 (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, 3037 nodeWksp, nodeWkspSize); 3038 /* Check against repeating the previous CTable */ 3039 if (repeat != HUF_repeat_none) { 3040 size_t const oldCSize = HUF_estimateCompressedSize( 3041 (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); 3042 if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { 3043 DEBUGLOG(5, "set_repeat - smaller"); 3044 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 3045 hufMetadata->hType = set_repeat; 3046 return 0; 3047 } 3048 } 3049 if (newCSize + hSize >= srcSize) { 3050 DEBUGLOG(5, "set_basic - no gains"); 3051 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 3052 hufMetadata->hType = set_basic; 3053 return 0; 3054 } 3055 DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); 3056 hufMetadata->hType = set_compressed; 3057 nextHuf->repeatMode = HUF_repeat_check; 3058 return hSize; 3059 } 3060 } 3061 } 3062 3063 3064 /* ZSTD_buildDummySequencesStatistics(): 3065 * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic, 3066 * and updates nextEntropy to the appropriate repeatMode. 3067 */ 3068 static ZSTD_symbolEncodingTypeStats_t 3069 ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { 3070 ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0}; 3071 nextEntropy->litlength_repeatMode = FSE_repeat_none; 3072 nextEntropy->offcode_repeatMode = FSE_repeat_none; 3073 nextEntropy->matchlength_repeatMode = FSE_repeat_none; 3074 return stats; 3075 } 3076 3077 /* ZSTD_buildBlockEntropyStats_sequences() : 3078 * Builds entropy for the sequences. 3079 * Stores symbol compression modes and fse table to fseMetadata. 3080 * Requires ENTROPY_WORKSPACE_SIZE wksp. 3081 * @return : size of fse tables or error code */ 3082 static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, 3083 const ZSTD_fseCTables_t* prevEntropy, 3084 ZSTD_fseCTables_t* nextEntropy, 3085 const ZSTD_CCtx_params* cctxParams, 3086 ZSTD_fseCTablesMetadata_t* fseMetadata, 3087 void* workspace, size_t wkspSize) 3088 { 3089 ZSTD_strategy const strategy = cctxParams->cParams.strategy; 3090 size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; 3091 BYTE* const ostart = fseMetadata->fseTablesBuffer; 3092 BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); 3093 BYTE* op = ostart; 3094 unsigned* countWorkspace = (unsigned*)workspace; 3095 unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1); 3096 size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace); 3097 ZSTD_symbolEncodingTypeStats_t stats; 3098 3099 DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq); 3100 stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, 3101 prevEntropy, nextEntropy, op, oend, 3102 strategy, countWorkspace, 3103 entropyWorkspace, entropyWorkspaceSize) 3104 : ZSTD_buildDummySequencesStatistics(nextEntropy); 3105 FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); 3106 fseMetadata->llType = (symbolEncodingType_e) stats.LLtype; 3107 fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype; 3108 fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype; 3109 fseMetadata->lastCountSize = stats.lastCountSize; 3110 return stats.size; 3111 } 3112 3113 3114 /* ZSTD_buildBlockEntropyStats() : 3115 * Builds entropy for the block. 3116 * Requires workspace size ENTROPY_WORKSPACE_SIZE 3117 * 3118 * @return : 0 on success or error code 3119 */ 3120 size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, 3121 const ZSTD_entropyCTables_t* prevEntropy, 3122 ZSTD_entropyCTables_t* nextEntropy, 3123 const ZSTD_CCtx_params* cctxParams, 3124 ZSTD_entropyCTablesMetadata_t* entropyMetadata, 3125 void* workspace, size_t wkspSize) 3126 { 3127 size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; 3128 entropyMetadata->hufMetadata.hufDesSize = 3129 ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, 3130 &prevEntropy->huf, &nextEntropy->huf, 3131 &entropyMetadata->hufMetadata, 3132 ZSTD_literalsCompressionIsDisabled(cctxParams), 3133 workspace, wkspSize); 3134 FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed"); 3135 entropyMetadata->fseMetadata.fseTablesSize = 3136 ZSTD_buildBlockEntropyStats_sequences(seqStorePtr, 3137 &prevEntropy->fse, &nextEntropy->fse, 3138 cctxParams, 3139 &entropyMetadata->fseMetadata, 3140 workspace, wkspSize); 3141 FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed"); 3142 return 0; 3143 } 3144 3145 /* Returns the size estimate for the literals section (header + content) of a block */ 3146 static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, 3147 const ZSTD_hufCTables_t* huf, 3148 const ZSTD_hufCTablesMetadata_t* hufMetadata, 3149 void* workspace, size_t wkspSize, 3150 int writeEntropy) 3151 { 3152 unsigned* const countWksp = (unsigned*)workspace; 3153 unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; 3154 size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB); 3155 U32 singleStream = litSize < 256; 3156 3157 if (hufMetadata->hType == set_basic) return litSize; 3158 else if (hufMetadata->hType == set_rle) return 1; 3159 else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { 3160 size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); 3161 if (ZSTD_isError(largest)) return litSize; 3162 { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); 3163 if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; 3164 if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */ 3165 return cLitSizeEstimate + literalSectionHeaderSize; 3166 } } 3167 assert(0); /* impossible */ 3168 return 0; 3169 } 3170 3171 /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ 3172 static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, 3173 const BYTE* codeTable, size_t nbSeq, unsigned maxCode, 3174 const FSE_CTable* fseCTable, 3175 const U8* additionalBits, 3176 short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, 3177 void* workspace, size_t wkspSize) 3178 { 3179 unsigned* const countWksp = (unsigned*)workspace; 3180 const BYTE* ctp = codeTable; 3181 const BYTE* const ctStart = ctp; 3182 const BYTE* const ctEnd = ctStart + nbSeq; 3183 size_t cSymbolTypeSizeEstimateInBits = 0; 3184 unsigned max = maxCode; 3185 3186 HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ 3187 if (type == set_basic) { 3188 /* We selected this encoding type, so it must be valid. */ 3189 assert(max <= defaultMax); 3190 (void)defaultMax; 3191 cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); 3192 } else if (type == set_rle) { 3193 cSymbolTypeSizeEstimateInBits = 0; 3194 } else if (type == set_compressed || type == set_repeat) { 3195 cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); 3196 } 3197 if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) { 3198 return nbSeq * 10; 3199 } 3200 while (ctp < ctEnd) { 3201 if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; 3202 else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ 3203 ctp++; 3204 } 3205 return cSymbolTypeSizeEstimateInBits >> 3; 3206 } 3207 3208 /* Returns the size estimate for the sequences section (header + content) of a block */ 3209 static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, 3210 const BYTE* llCodeTable, 3211 const BYTE* mlCodeTable, 3212 size_t nbSeq, 3213 const ZSTD_fseCTables_t* fseTables, 3214 const ZSTD_fseCTablesMetadata_t* fseMetadata, 3215 void* workspace, size_t wkspSize, 3216 int writeEntropy) 3217 { 3218 size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); 3219 size_t cSeqSizeEstimate = 0; 3220 cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff, 3221 fseTables->offcodeCTable, NULL, 3222 OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, 3223 workspace, wkspSize); 3224 cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL, 3225 fseTables->litlengthCTable, LL_bits, 3226 LL_defaultNorm, LL_defaultNormLog, MaxLL, 3227 workspace, wkspSize); 3228 cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML, 3229 fseTables->matchlengthCTable, ML_bits, 3230 ML_defaultNorm, ML_defaultNormLog, MaxML, 3231 workspace, wkspSize); 3232 if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; 3233 return cSeqSizeEstimate + sequencesSectionHeaderSize; 3234 } 3235 3236 /* Returns the size estimate for a given stream of literals, of, ll, ml */ 3237 static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, 3238 const BYTE* ofCodeTable, 3239 const BYTE* llCodeTable, 3240 const BYTE* mlCodeTable, 3241 size_t nbSeq, 3242 const ZSTD_entropyCTables_t* entropy, 3243 const ZSTD_entropyCTablesMetadata_t* entropyMetadata, 3244 void* workspace, size_t wkspSize, 3245 int writeLitEntropy, int writeSeqEntropy) { 3246 size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize, 3247 &entropy->huf, &entropyMetadata->hufMetadata, 3248 workspace, wkspSize, writeLitEntropy); 3249 size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, 3250 nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, 3251 workspace, wkspSize, writeSeqEntropy); 3252 return seqSize + literalsSize + ZSTD_blockHeaderSize; 3253 } 3254 3255 /* Builds entropy statistics and uses them for blocksize estimation. 3256 * 3257 * Returns the estimated compressed size of the seqStore, or a zstd error. 3258 */ 3259 static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) { 3260 ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata; 3261 DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()"); 3262 FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, 3263 &zc->blockState.prevCBlock->entropy, 3264 &zc->blockState.nextCBlock->entropy, 3265 &zc->appliedParams, 3266 entropyMetadata, 3267 zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); 3268 return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), 3269 seqStore->ofCode, seqStore->llCode, seqStore->mlCode, 3270 (size_t)(seqStore->sequences - seqStore->sequencesStart), 3271 &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, 3272 (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1); 3273 } 3274 3275 /* Returns literals bytes represented in a seqStore */ 3276 static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) { 3277 size_t literalsBytes = 0; 3278 size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; 3279 size_t i; 3280 for (i = 0; i < nbSeqs; ++i) { 3281 seqDef seq = seqStore->sequencesStart[i]; 3282 literalsBytes += seq.litLength; 3283 if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) { 3284 literalsBytes += 0x10000; 3285 } 3286 } 3287 return literalsBytes; 3288 } 3289 3290 /* Returns match bytes represented in a seqStore */ 3291 static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) { 3292 size_t matchBytes = 0; 3293 size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; 3294 size_t i; 3295 for (i = 0; i < nbSeqs; ++i) { 3296 seqDef seq = seqStore->sequencesStart[i]; 3297 matchBytes += seq.mlBase + MINMATCH; 3298 if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) { 3299 matchBytes += 0x10000; 3300 } 3301 } 3302 return matchBytes; 3303 } 3304 3305 /* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx). 3306 * Stores the result in resultSeqStore. 3307 */ 3308 static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, 3309 const seqStore_t* originalSeqStore, 3310 size_t startIdx, size_t endIdx) { 3311 BYTE* const litEnd = originalSeqStore->lit; 3312 size_t literalsBytes; 3313 size_t literalsBytesPreceding = 0; 3314 3315 *resultSeqStore = *originalSeqStore; 3316 if (startIdx > 0) { 3317 resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; 3318 literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); 3319 } 3320 3321 /* Move longLengthPos into the correct position if necessary */ 3322 if (originalSeqStore->longLengthType != ZSTD_llt_none) { 3323 if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) { 3324 resultSeqStore->longLengthType = ZSTD_llt_none; 3325 } else { 3326 resultSeqStore->longLengthPos -= (U32)startIdx; 3327 } 3328 } 3329 resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx; 3330 resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx; 3331 literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); 3332 resultSeqStore->litStart += literalsBytesPreceding; 3333 if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) { 3334 /* This accounts for possible last literals if the derived chunk reaches the end of the block */ 3335 resultSeqStore->lit = litEnd; 3336 } else { 3337 resultSeqStore->lit = resultSeqStore->litStart+literalsBytes; 3338 } 3339 resultSeqStore->llCode += startIdx; 3340 resultSeqStore->mlCode += startIdx; 3341 resultSeqStore->ofCode += startIdx; 3342 } 3343 3344 /* 3345 * Returns the raw offset represented by the combination of offCode, ll0, and repcode history. 3346 * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq(). 3347 */ 3348 static U32 3349 ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) 3350 { 3351 U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0; /* [ 0 - 3 ] */ 3352 assert(STORED_IS_REPCODE(offCode)); 3353 if (adjustedOffCode == ZSTD_REP_NUM) { 3354 /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */ 3355 assert(rep[0] > 0); 3356 return rep[0] - 1; 3357 } 3358 return rep[adjustedOffCode]; 3359 } 3360 3361 /* 3362 * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise 3363 * due to emission of RLE/raw blocks that disturb the offset history, 3364 * and replaces any repcodes within the seqStore that may be invalid. 3365 * 3366 * dRepcodes are updated as would be on the decompression side. 3367 * cRepcodes are updated exactly in accordance with the seqStore. 3368 * 3369 * Note : this function assumes seq->offBase respects the following numbering scheme : 3370 * 0 : invalid 3371 * 1-3 : repcode 1-3 3372 * 4+ : real_offset+3 3373 */ 3374 static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, 3375 seqStore_t* const seqStore, U32 const nbSeq) { 3376 U32 idx = 0; 3377 for (; idx < nbSeq; ++idx) { 3378 seqDef* const seq = seqStore->sequencesStart + idx; 3379 U32 const ll0 = (seq->litLength == 0); 3380 U32 const offCode = OFFBASE_TO_STORED(seq->offBase); 3381 assert(seq->offBase > 0); 3382 if (STORED_IS_REPCODE(offCode)) { 3383 U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0); 3384 U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0); 3385 /* Adjust simulated decompression repcode history if we come across a mismatch. Replace 3386 * the repcode with the offset it actually references, determined by the compression 3387 * repcode history. 3388 */ 3389 if (dRawOffset != cRawOffset) { 3390 seq->offBase = cRawOffset + ZSTD_REP_NUM; 3391 } 3392 } 3393 /* Compression repcode history is always updated with values directly from the unmodified seqStore. 3394 * Decompression repcode history may use modified seq->offset value taken from compression repcode history. 3395 */ 3396 ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0); 3397 ZSTD_updateRep(cRepcodes->rep, offCode, ll0); 3398 } 3399 } 3400 3401 /* ZSTD_compressSeqStore_singleBlock(): 3402 * Compresses a seqStore into a block with a block header, into the buffer dst. 3403 * 3404 * Returns the total size of that block (including header) or a ZSTD error code. 3405 */ 3406 static size_t 3407 ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, 3408 repcodes_t* const dRep, repcodes_t* const cRep, 3409 void* dst, size_t dstCapacity, 3410 const void* src, size_t srcSize, 3411 U32 lastBlock, U32 isPartition) 3412 { 3413 const U32 rleMaxLength = 25; 3414 BYTE* op = (BYTE*)dst; 3415 const BYTE* ip = (const BYTE*)src; 3416 size_t cSize; 3417 size_t cSeqsSize; 3418 3419 /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */ 3420 repcodes_t const dRepOriginal = *dRep; 3421 DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock"); 3422 if (isPartition) 3423 ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart)); 3424 3425 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "Block header doesn't fit"); 3426 cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore, 3427 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, 3428 &zc->appliedParams, 3429 op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, 3430 srcSize, 3431 zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, 3432 zc->bmi2); 3433 FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!"); 3434 3435 if (!zc->isFirstBlock && 3436 cSeqsSize < rleMaxLength && 3437 ZSTD_isRLE((BYTE const*)src, srcSize)) { 3438 /* We don't want to emit our first block as a RLE even if it qualifies because 3439 * doing so will cause the decoder (cli only) to throw a "should consume all input error." 3440 * This is only an issue for zstd <= v1.4.3 3441 */ 3442 cSeqsSize = 1; 3443 } 3444 3445 if (zc->seqCollector.collectSequences) { 3446 ZSTD_copyBlockSequences(zc); 3447 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); 3448 return 0; 3449 } 3450 3451 if (cSeqsSize == 0) { 3452 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); 3453 FORWARD_IF_ERROR(cSize, "Nocompress block failed"); 3454 DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize); 3455 *dRep = dRepOriginal; /* reset simulated decompression repcode history */ 3456 } else if (cSeqsSize == 1) { 3457 cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock); 3458 FORWARD_IF_ERROR(cSize, "RLE compress block failed"); 3459 DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize); 3460 *dRep = dRepOriginal; /* reset simulated decompression repcode history */ 3461 } else { 3462 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); 3463 writeBlockHeader(op, cSeqsSize, srcSize, lastBlock); 3464 cSize = ZSTD_blockHeaderSize + cSeqsSize; 3465 DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize); 3466 } 3467 3468 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 3469 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 3470 3471 return cSize; 3472 } 3473 3474 /* Struct to keep track of where we are in our recursive calls. */ 3475 typedef struct { 3476 U32* splitLocations; /* Array of split indices */ 3477 size_t idx; /* The current index within splitLocations being worked on */ 3478 } seqStoreSplits; 3479 3480 #define MIN_SEQUENCES_BLOCK_SPLITTING 300 3481 3482 /* Helper function to perform the recursive search for block splits. 3483 * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. 3484 * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then 3485 * we do not recurse. 3486 * 3487 * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. 3488 * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). 3489 * In practice, recursion depth usually doesn't go beyond 4. 3490 * 3491 * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize 3492 * maximum of 128 KB, this value is actually impossible to reach. 3493 */ 3494 static void 3495 ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, 3496 ZSTD_CCtx* zc, const seqStore_t* origSeqStore) 3497 { 3498 seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; 3499 seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; 3500 seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; 3501 size_t estimatedOriginalSize; 3502 size_t estimatedFirstHalfSize; 3503 size_t estimatedSecondHalfSize; 3504 size_t midIdx = (startIdx + endIdx)/2; 3505 3506 if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) { 3507 DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences"); 3508 return; 3509 } 3510 DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); 3511 ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx); 3512 ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx); 3513 ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx); 3514 estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc); 3515 estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc); 3516 estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc); 3517 DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu", 3518 estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize); 3519 if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) { 3520 return; 3521 } 3522 if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { 3523 ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore); 3524 splits->splitLocations[splits->idx] = (U32)midIdx; 3525 splits->idx++; 3526 ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore); 3527 } 3528 } 3529 3530 /* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio. 3531 * 3532 * Returns the number of splits made (which equals the size of the partition table - 1). 3533 */ 3534 static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) { 3535 seqStoreSplits splits = {partitions, 0}; 3536 if (nbSeq <= 4) { 3537 DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split"); 3538 /* Refuse to try and split anything with less than 4 sequences */ 3539 return 0; 3540 } 3541 ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore); 3542 splits.splitLocations[splits.idx] = nbSeq; 3543 DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1); 3544 return splits.idx; 3545 } 3546 3547 /* ZSTD_compressBlock_splitBlock(): 3548 * Attempts to split a given block into multiple blocks to improve compression ratio. 3549 * 3550 * Returns combined size of all blocks (which includes headers), or a ZSTD error code. 3551 */ 3552 static size_t 3553 ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, 3554 const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) 3555 { 3556 size_t cSize = 0; 3557 const BYTE* ip = (const BYTE*)src; 3558 BYTE* op = (BYTE*)dst; 3559 size_t i = 0; 3560 size_t srcBytesTotal = 0; 3561 U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ 3562 seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore; 3563 seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore; 3564 size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); 3565 3566 /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history 3567 * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two 3568 * separate repcode histories that simulate repcode history on compression and decompression side, 3569 * and use the histories to determine whether we must replace a particular repcode with its raw offset. 3570 * 3571 * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed 3572 * or RLE. This allows us to retrieve the offset value that an invalid repcode references within 3573 * a nocompress/RLE block. 3574 * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use 3575 * the replacement offset value rather than the original repcode to update the repcode history. 3576 * dRep also will be the final repcode history sent to the next block. 3577 * 3578 * See ZSTD_seqStore_resolveOffCodes() for more details. 3579 */ 3580 repcodes_t dRep; 3581 repcodes_t cRep; 3582 ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); 3583 ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); 3584 ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t)); 3585 3586 DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", 3587 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, 3588 (unsigned)zc->blockState.matchState.nextToUpdate); 3589 3590 if (numSplits == 0) { 3591 size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, 3592 &dRep, &cRep, 3593 op, dstCapacity, 3594 ip, blockSize, 3595 lastBlock, 0 /* isPartition */); 3596 FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); 3597 DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits"); 3598 assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); 3599 return cSizeSingleBlock; 3600 } 3601 3602 ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]); 3603 for (i = 0; i <= numSplits; ++i) { 3604 size_t srcBytes; 3605 size_t cSizeChunk; 3606 U32 const lastPartition = (i == numSplits); 3607 U32 lastBlockEntireSrc = 0; 3608 3609 srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore); 3610 srcBytesTotal += srcBytes; 3611 if (lastPartition) { 3612 /* This is the final partition, need to account for possible last literals */ 3613 srcBytes += blockSize - srcBytesTotal; 3614 lastBlockEntireSrc = lastBlock; 3615 } else { 3616 ZSTD_deriveSeqStoreChunk(nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]); 3617 } 3618 3619 cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, currSeqStore, 3620 &dRep, &cRep, 3621 op, dstCapacity, 3622 ip, srcBytes, 3623 lastBlockEntireSrc, 1 /* isPartition */); 3624 DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); 3625 FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!"); 3626 3627 ip += srcBytes; 3628 op += cSizeChunk; 3629 dstCapacity -= cSizeChunk; 3630 cSize += cSizeChunk; 3631 *currSeqStore = *nextSeqStore; 3632 assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); 3633 } 3634 /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes 3635 * for the next block. 3636 */ 3637 ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t)); 3638 return cSize; 3639 } 3640 3641 static size_t 3642 ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, 3643 void* dst, size_t dstCapacity, 3644 const void* src, size_t srcSize, U32 lastBlock) 3645 { 3646 const BYTE* ip = (const BYTE*)src; 3647 BYTE* op = (BYTE*)dst; 3648 U32 nbSeq; 3649 size_t cSize; 3650 DEBUGLOG(4, "ZSTD_compressBlock_splitBlock"); 3651 assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable); 3652 3653 { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); 3654 FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); 3655 if (bss == ZSTDbss_noCompress) { 3656 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 3657 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 3658 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); 3659 FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); 3660 DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block"); 3661 return cSize; 3662 } 3663 nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart); 3664 } 3665 3666 cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq); 3667 FORWARD_IF_ERROR(cSize, "Splitting blocks failed!"); 3668 return cSize; 3669 } 3670 3671 static size_t 3672 ZSTD_compressBlock_internal(ZSTD_CCtx* zc, 3673 void* dst, size_t dstCapacity, 3674 const void* src, size_t srcSize, U32 frame) 3675 { 3676 /* This the upper bound for the length of an rle block. 3677 * This isn't the actual upper bound. Finding the real threshold 3678 * needs further investigation. 3679 */ 3680 const U32 rleMaxLength = 25; 3681 size_t cSize; 3682 const BYTE* ip = (const BYTE*)src; 3683 BYTE* op = (BYTE*)dst; 3684 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", 3685 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, 3686 (unsigned)zc->blockState.matchState.nextToUpdate); 3687 3688 { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); 3689 FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); 3690 if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } 3691 } 3692 3693 if (zc->seqCollector.collectSequences) { 3694 ZSTD_copyBlockSequences(zc); 3695 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); 3696 return 0; 3697 } 3698 3699 /* encode sequences and literals */ 3700 cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore, 3701 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, 3702 &zc->appliedParams, 3703 dst, dstCapacity, 3704 srcSize, 3705 zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, 3706 zc->bmi2); 3707 3708 if (frame && 3709 /* We don't want to emit our first block as a RLE even if it qualifies because 3710 * doing so will cause the decoder (cli only) to throw a "should consume all input error." 3711 * This is only an issue for zstd <= v1.4.3 3712 */ 3713 !zc->isFirstBlock && 3714 cSize < rleMaxLength && 3715 ZSTD_isRLE(ip, srcSize)) 3716 { 3717 cSize = 1; 3718 op[0] = ip[0]; 3719 } 3720 3721 out: 3722 if (!ZSTD_isError(cSize) && cSize > 1) { 3723 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); 3724 } 3725 /* We check that dictionaries have offset codes available for the first 3726 * block. After the first block, the offcode table might not have large 3727 * enough codes to represent the offsets in the data. 3728 */ 3729 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 3730 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 3731 3732 return cSize; 3733 } 3734 3735 static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, 3736 void* dst, size_t dstCapacity, 3737 const void* src, size_t srcSize, 3738 const size_t bss, U32 lastBlock) 3739 { 3740 DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); 3741 if (bss == ZSTDbss_compress) { 3742 if (/* We don't want to emit our first block as a RLE even if it qualifies because 3743 * doing so will cause the decoder (cli only) to throw a "should consume all input error." 3744 * This is only an issue for zstd <= v1.4.3 3745 */ 3746 !zc->isFirstBlock && 3747 ZSTD_maybeRLE(&zc->seqStore) && 3748 ZSTD_isRLE((BYTE const*)src, srcSize)) 3749 { 3750 return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); 3751 } 3752 /* Attempt superblock compression. 3753 * 3754 * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the 3755 * standard ZSTD_compressBound(). This is a problem, because even if we have 3756 * space now, taking an extra byte now could cause us to run out of space later 3757 * and violate ZSTD_compressBound(). 3758 * 3759 * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. 3760 * 3761 * In order to respect ZSTD_compressBound() we must attempt to emit a raw 3762 * uncompressed block in these cases: 3763 * * cSize == 0: Return code for an uncompressed block. 3764 * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). 3765 * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of 3766 * output space. 3767 * * cSize >= blockBound(srcSize): We have expanded the block too much so 3768 * emit an uncompressed block. 3769 */ 3770 { 3771 size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); 3772 if (cSize != ERROR(dstSize_tooSmall)) { 3773 size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); 3774 FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); 3775 if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { 3776 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); 3777 return cSize; 3778 } 3779 } 3780 } 3781 } 3782 3783 DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); 3784 /* Superblock compression failed, attempt to emit a single no compress block. 3785 * The decoder will be able to stream this block since it is uncompressed. 3786 */ 3787 return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); 3788 } 3789 3790 static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, 3791 void* dst, size_t dstCapacity, 3792 const void* src, size_t srcSize, 3793 U32 lastBlock) 3794 { 3795 size_t cSize = 0; 3796 const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); 3797 DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", 3798 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); 3799 FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); 3800 3801 cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); 3802 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); 3803 3804 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 3805 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 3806 3807 return cSize; 3808 } 3809 3810 static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, 3811 ZSTD_cwksp* ws, 3812 ZSTD_CCtx_params const* params, 3813 void const* ip, 3814 void const* iend) 3815 { 3816 U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); 3817 U32 const maxDist = (U32)1 << params->cParams.windowLog; 3818 if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) { 3819 U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); 3820 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); 3821 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); 3822 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); 3823 ZSTD_cwksp_mark_tables_dirty(ws); 3824 ZSTD_reduceIndex(ms, params, correction); 3825 ZSTD_cwksp_mark_tables_clean(ws); 3826 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; 3827 else ms->nextToUpdate -= correction; 3828 /* invalidate dictionaries on overflow correction */ 3829 ms->loadedDictEnd = 0; 3830 ms->dictMatchState = NULL; 3831 } 3832 } 3833 3834 /*! ZSTD_compress_frameChunk() : 3835 * Compress a chunk of data into one or multiple blocks. 3836 * All blocks will be terminated, all input will be consumed. 3837 * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. 3838 * Frame is supposed already started (header already produced) 3839 * @return : compressed size, or an error code 3840 */ 3841 static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, 3842 void* dst, size_t dstCapacity, 3843 const void* src, size_t srcSize, 3844 U32 lastFrameChunk) 3845 { 3846 size_t blockSize = cctx->blockSize; 3847 size_t remaining = srcSize; 3848 const BYTE* ip = (const BYTE*)src; 3849 BYTE* const ostart = (BYTE*)dst; 3850 BYTE* op = ostart; 3851 U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; 3852 3853 assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); 3854 3855 DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); 3856 if (cctx->appliedParams.fParams.checksumFlag && srcSize) 3857 xxh64_update(&cctx->xxhState, src, srcSize); 3858 3859 while (remaining) { 3860 ZSTD_matchState_t* const ms = &cctx->blockState.matchState; 3861 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); 3862 3863 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, 3864 dstSize_tooSmall, 3865 "not enough space to store compressed block"); 3866 if (remaining < blockSize) blockSize = remaining; 3867 3868 ZSTD_overflowCorrectIfNeeded( 3869 ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); 3870 ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); 3871 ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); 3872 3873 /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ 3874 if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; 3875 3876 { size_t cSize; 3877 if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { 3878 cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); 3879 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); 3880 assert(cSize > 0); 3881 assert(cSize <= blockSize + ZSTD_blockHeaderSize); 3882 } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) { 3883 cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock); 3884 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed"); 3885 assert(cSize > 0 || cctx->seqCollector.collectSequences == 1); 3886 } else { 3887 cSize = ZSTD_compressBlock_internal(cctx, 3888 op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, 3889 ip, blockSize, 1 /* frame */); 3890 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); 3891 3892 if (cSize == 0) { /* block is not compressible */ 3893 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); 3894 FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); 3895 } else { 3896 U32 const cBlockHeader = cSize == 1 ? 3897 lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : 3898 lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); 3899 MEM_writeLE24(op, cBlockHeader); 3900 cSize += ZSTD_blockHeaderSize; 3901 } 3902 } 3903 3904 3905 ip += blockSize; 3906 assert(remaining >= blockSize); 3907 remaining -= blockSize; 3908 op += cSize; 3909 assert(dstCapacity >= cSize); 3910 dstCapacity -= cSize; 3911 cctx->isFirstBlock = 0; 3912 DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", 3913 (unsigned)cSize); 3914 } } 3915 3916 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; 3917 return (size_t)(op-ostart); 3918 } 3919 3920 3921 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, 3922 const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) 3923 { BYTE* const op = (BYTE*)dst; 3924 U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ 3925 U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ 3926 U32 const checksumFlag = params->fParams.checksumFlag>0; 3927 U32 const windowSize = (U32)1 << params->cParams.windowLog; 3928 U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); 3929 BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); 3930 U32 const fcsCode = params->fParams.contentSizeFlag ? 3931 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ 3932 BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); 3933 size_t pos=0; 3934 3935 assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); 3936 RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, 3937 "dst buf is too small to fit worst-case frame header size."); 3938 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", 3939 !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); 3940 if (params->format == ZSTD_f_zstd1) { 3941 MEM_writeLE32(dst, ZSTD_MAGICNUMBER); 3942 pos = 4; 3943 } 3944 op[pos++] = frameHeaderDescriptionByte; 3945 if (!singleSegment) op[pos++] = windowLogByte; 3946 switch(dictIDSizeCode) 3947 { 3948 default: 3949 assert(0); /* impossible */ 3950 ZSTD_FALLTHROUGH; 3951 case 0 : break; 3952 case 1 : op[pos] = (BYTE)(dictID); pos++; break; 3953 case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; 3954 case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; 3955 } 3956 switch(fcsCode) 3957 { 3958 default: 3959 assert(0); /* impossible */ 3960 ZSTD_FALLTHROUGH; 3961 case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; 3962 case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; 3963 case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; 3964 case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; 3965 } 3966 return pos; 3967 } 3968 3969 /* ZSTD_writeSkippableFrame_advanced() : 3970 * Writes out a skippable frame with the specified magic number variant (16 are supported), 3971 * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data. 3972 * 3973 * Returns the total number of bytes written, or a ZSTD error code. 3974 */ 3975 size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, 3976 const void* src, size_t srcSize, unsigned magicVariant) { 3977 BYTE* op = (BYTE*)dst; 3978 RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */, 3979 dstSize_tooSmall, "Not enough room for skippable frame"); 3980 RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame"); 3981 RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported"); 3982 3983 MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant)); 3984 MEM_writeLE32(op+4, (U32)srcSize); 3985 ZSTD_memcpy(op+8, src, srcSize); 3986 return srcSize + ZSTD_SKIPPABLEHEADERSIZE; 3987 } 3988 3989 /* ZSTD_writeLastEmptyBlock() : 3990 * output an empty Block with end-of-frame mark to complete a frame 3991 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) 3992 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) 3993 */ 3994 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) 3995 { 3996 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, 3997 "dst buf is too small to write frame trailer empty block."); 3998 { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */ 3999 MEM_writeLE24(dst, cBlockHeader24); 4000 return ZSTD_blockHeaderSize; 4001 } 4002 } 4003 4004 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) 4005 { 4006 RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, 4007 "wrong cctx stage"); 4008 RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable, 4009 parameter_unsupported, 4010 "incompatible with ldm"); 4011 cctx->externSeqStore.seq = seq; 4012 cctx->externSeqStore.size = nbSeq; 4013 cctx->externSeqStore.capacity = nbSeq; 4014 cctx->externSeqStore.pos = 0; 4015 cctx->externSeqStore.posInSequence = 0; 4016 return 0; 4017 } 4018 4019 4020 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, 4021 void* dst, size_t dstCapacity, 4022 const void* src, size_t srcSize, 4023 U32 frame, U32 lastFrameChunk) 4024 { 4025 ZSTD_matchState_t* const ms = &cctx->blockState.matchState; 4026 size_t fhSize = 0; 4027 4028 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", 4029 cctx->stage, (unsigned)srcSize); 4030 RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, 4031 "missing init (ZSTD_compressBegin)"); 4032 4033 if (frame && (cctx->stage==ZSTDcs_init)) { 4034 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 4035 cctx->pledgedSrcSizePlusOne-1, cctx->dictID); 4036 FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); 4037 assert(fhSize <= dstCapacity); 4038 dstCapacity -= fhSize; 4039 dst = (char*)dst + fhSize; 4040 cctx->stage = ZSTDcs_ongoing; 4041 } 4042 4043 if (!srcSize) return fhSize; /* do not generate an empty block if no input */ 4044 4045 if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) { 4046 ms->forceNonContiguous = 0; 4047 ms->nextToUpdate = ms->window.dictLimit; 4048 } 4049 if (cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { 4050 ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0); 4051 } 4052 4053 if (!frame) { 4054 /* overflow check and correction for block mode */ 4055 ZSTD_overflowCorrectIfNeeded( 4056 ms, &cctx->workspace, &cctx->appliedParams, 4057 src, (BYTE const*)src + srcSize); 4058 } 4059 4060 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); 4061 { size_t const cSize = frame ? 4062 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : 4063 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); 4064 FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); 4065 cctx->consumedSrcSize += srcSize; 4066 cctx->producedCSize += (cSize + fhSize); 4067 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); 4068 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ 4069 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); 4070 RETURN_ERROR_IF( 4071 cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, 4072 srcSize_wrong, 4073 "error : pledgedSrcSize = %u, while realSrcSize >= %u", 4074 (unsigned)cctx->pledgedSrcSizePlusOne-1, 4075 (unsigned)cctx->consumedSrcSize); 4076 } 4077 return cSize + fhSize; 4078 } 4079 } 4080 4081 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, 4082 void* dst, size_t dstCapacity, 4083 const void* src, size_t srcSize) 4084 { 4085 DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); 4086 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); 4087 } 4088 4089 4090 size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) 4091 { 4092 ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; 4093 assert(!ZSTD_checkCParams(cParams)); 4094 return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); 4095 } 4096 4097 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) 4098 { 4099 DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); 4100 { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); 4101 RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } 4102 4103 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); 4104 } 4105 4106 /*! ZSTD_loadDictionaryContent() : 4107 * @return : 0, or an error code 4108 */ 4109 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, 4110 ldmState_t* ls, 4111 ZSTD_cwksp* ws, 4112 ZSTD_CCtx_params const* params, 4113 const void* src, size_t srcSize, 4114 ZSTD_dictTableLoadMethod_e dtlm) 4115 { 4116 const BYTE* ip = (const BYTE*) src; 4117 const BYTE* const iend = ip + srcSize; 4118 int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL; 4119 4120 /* Assert that we the ms params match the params we're being given */ 4121 ZSTD_assertEqualCParams(params->cParams, ms->cParams); 4122 4123 if (srcSize > ZSTD_CHUNKSIZE_MAX) { 4124 /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX. 4125 * Dictionaries right at the edge will immediately trigger overflow 4126 * correction, but I don't want to insert extra constraints here. 4127 */ 4128 U32 const maxDictSize = ZSTD_CURRENT_MAX - 1; 4129 /* We must have cleared our windows when our source is this large. */ 4130 assert(ZSTD_window_isEmpty(ms->window)); 4131 if (loadLdmDict) 4132 assert(ZSTD_window_isEmpty(ls->window)); 4133 /* If the dictionary is too large, only load the suffix of the dictionary. */ 4134 if (srcSize > maxDictSize) { 4135 ip = iend - maxDictSize; 4136 src = ip; 4137 srcSize = maxDictSize; 4138 } 4139 } 4140 4141 DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder); 4142 ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0); 4143 ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); 4144 ms->forceNonContiguous = params->deterministicRefPrefix; 4145 4146 if (loadLdmDict) { 4147 ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0); 4148 ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); 4149 } 4150 4151 if (srcSize <= HASH_READ_SIZE) return 0; 4152 4153 ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend); 4154 4155 if (loadLdmDict) 4156 ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams); 4157 4158 switch(params->cParams.strategy) 4159 { 4160 case ZSTD_fast: 4161 ZSTD_fillHashTable(ms, iend, dtlm); 4162 break; 4163 case ZSTD_dfast: 4164 ZSTD_fillDoubleHashTable(ms, iend, dtlm); 4165 break; 4166 4167 case ZSTD_greedy: 4168 case ZSTD_lazy: 4169 case ZSTD_lazy2: 4170 assert(srcSize >= HASH_READ_SIZE); 4171 if (ms->dedicatedDictSearch) { 4172 assert(ms->chainTable != NULL); 4173 ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE); 4174 } else { 4175 assert(params->useRowMatchFinder != ZSTD_ps_auto); 4176 if (params->useRowMatchFinder == ZSTD_ps_enable) { 4177 size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16); 4178 ZSTD_memset(ms->tagTable, 0, tagTableSize); 4179 ZSTD_row_update(ms, iend-HASH_READ_SIZE); 4180 DEBUGLOG(4, "Using row-based hash table for lazy dict"); 4181 } else { 4182 ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); 4183 DEBUGLOG(4, "Using chain-based hash table for lazy dict"); 4184 } 4185 } 4186 break; 4187 4188 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ 4189 case ZSTD_btopt: 4190 case ZSTD_btultra: 4191 case ZSTD_btultra2: 4192 assert(srcSize >= HASH_READ_SIZE); 4193 ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); 4194 break; 4195 4196 default: 4197 assert(0); /* not possible : not a valid strategy id */ 4198 } 4199 4200 ms->nextToUpdate = (U32)(iend - ms->window.base); 4201 return 0; 4202 } 4203 4204 4205 /* Dictionaries that assign zero probability to symbols that show up causes problems 4206 * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check 4207 * and only dictionaries with 100% valid symbols can be assumed valid. 4208 */ 4209 static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) 4210 { 4211 U32 s; 4212 if (dictMaxSymbolValue < maxSymbolValue) { 4213 return FSE_repeat_check; 4214 } 4215 for (s = 0; s <= maxSymbolValue; ++s) { 4216 if (normalizedCounter[s] == 0) { 4217 return FSE_repeat_check; 4218 } 4219 } 4220 return FSE_repeat_valid; 4221 } 4222 4223 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, 4224 const void* const dict, size_t dictSize) 4225 { 4226 short offcodeNCount[MaxOff+1]; 4227 unsigned offcodeMaxValue = MaxOff; 4228 const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ 4229 const BYTE* const dictEnd = dictPtr + dictSize; 4230 dictPtr += 8; 4231 bs->entropy.huf.repeatMode = HUF_repeat_check; 4232 4233 { unsigned maxSymbolValue = 255; 4234 unsigned hasZeroWeights = 1; 4235 size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, 4236 dictEnd-dictPtr, &hasZeroWeights); 4237 4238 /* We only set the loaded table as valid if it contains all non-zero 4239 * weights. Otherwise, we set it to check */ 4240 if (!hasZeroWeights) 4241 bs->entropy.huf.repeatMode = HUF_repeat_valid; 4242 4243 RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); 4244 RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); 4245 dictPtr += hufHeaderSize; 4246 } 4247 4248 { unsigned offcodeLog; 4249 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); 4250 RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); 4251 RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); 4252 /* fill all offset symbols to avoid garbage at end of table */ 4253 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 4254 bs->entropy.fse.offcodeCTable, 4255 offcodeNCount, MaxOff, offcodeLog, 4256 workspace, HUF_WORKSPACE_SIZE)), 4257 dictionary_corrupted, ""); 4258 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ 4259 dictPtr += offcodeHeaderSize; 4260 } 4261 4262 { short matchlengthNCount[MaxML+1]; 4263 unsigned matchlengthMaxValue = MaxML, matchlengthLog; 4264 size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); 4265 RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); 4266 RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); 4267 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 4268 bs->entropy.fse.matchlengthCTable, 4269 matchlengthNCount, matchlengthMaxValue, matchlengthLog, 4270 workspace, HUF_WORKSPACE_SIZE)), 4271 dictionary_corrupted, ""); 4272 bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML); 4273 dictPtr += matchlengthHeaderSize; 4274 } 4275 4276 { short litlengthNCount[MaxLL+1]; 4277 unsigned litlengthMaxValue = MaxLL, litlengthLog; 4278 size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); 4279 RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); 4280 RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); 4281 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( 4282 bs->entropy.fse.litlengthCTable, 4283 litlengthNCount, litlengthMaxValue, litlengthLog, 4284 workspace, HUF_WORKSPACE_SIZE)), 4285 dictionary_corrupted, ""); 4286 bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL); 4287 dictPtr += litlengthHeaderSize; 4288 } 4289 4290 RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); 4291 bs->rep[0] = MEM_readLE32(dictPtr+0); 4292 bs->rep[1] = MEM_readLE32(dictPtr+4); 4293 bs->rep[2] = MEM_readLE32(dictPtr+8); 4294 dictPtr += 12; 4295 4296 { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); 4297 U32 offcodeMax = MaxOff; 4298 if (dictContentSize <= ((U32)-1) - 128 KB) { 4299 U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ 4300 offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ 4301 } 4302 /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */ 4303 bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)); 4304 4305 /* All repCodes must be <= dictContentSize and != 0 */ 4306 { U32 u; 4307 for (u=0; u<3; u++) { 4308 RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); 4309 RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); 4310 } } } 4311 4312 return dictPtr - (const BYTE*)dict; 4313 } 4314 4315 /* Dictionary format : 4316 * See : 4317 * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format 4318 */ 4319 /*! ZSTD_loadZstdDictionary() : 4320 * @return : dictID, or an error code 4321 * assumptions : magic number supposed already checked 4322 * dictSize supposed >= 8 4323 */ 4324 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, 4325 ZSTD_matchState_t* ms, 4326 ZSTD_cwksp* ws, 4327 ZSTD_CCtx_params const* params, 4328 const void* dict, size_t dictSize, 4329 ZSTD_dictTableLoadMethod_e dtlm, 4330 void* workspace) 4331 { 4332 const BYTE* dictPtr = (const BYTE*)dict; 4333 const BYTE* const dictEnd = dictPtr + dictSize; 4334 size_t dictID; 4335 size_t eSize; 4336 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); 4337 assert(dictSize >= 8); 4338 assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); 4339 4340 dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); 4341 eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize); 4342 FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); 4343 dictPtr += eSize; 4344 4345 { 4346 size_t const dictContentSize = (size_t)(dictEnd - dictPtr); 4347 FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( 4348 ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); 4349 } 4350 return dictID; 4351 } 4352 4353 /* ZSTD_compress_insertDictionary() : 4354 * @return : dictID, or an error code */ 4355 static size_t 4356 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, 4357 ZSTD_matchState_t* ms, 4358 ldmState_t* ls, 4359 ZSTD_cwksp* ws, 4360 const ZSTD_CCtx_params* params, 4361 const void* dict, size_t dictSize, 4362 ZSTD_dictContentType_e dictContentType, 4363 ZSTD_dictTableLoadMethod_e dtlm, 4364 void* workspace) 4365 { 4366 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); 4367 if ((dict==NULL) || (dictSize<8)) { 4368 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); 4369 return 0; 4370 } 4371 4372 ZSTD_reset_compressedBlockState(bs); 4373 4374 /* dict restricted modes */ 4375 if (dictContentType == ZSTD_dct_rawContent) 4376 return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); 4377 4378 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { 4379 if (dictContentType == ZSTD_dct_auto) { 4380 DEBUGLOG(4, "raw content dictionary detected"); 4381 return ZSTD_loadDictionaryContent( 4382 ms, ls, ws, params, dict, dictSize, dtlm); 4383 } 4384 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); 4385 assert(0); /* impossible */ 4386 } 4387 4388 /* dict as full zstd dictionary */ 4389 return ZSTD_loadZstdDictionary( 4390 bs, ms, ws, params, dict, dictSize, dtlm, workspace); 4391 } 4392 4393 #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) 4394 #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) 4395 4396 /*! ZSTD_compressBegin_internal() : 4397 * @return : 0, or an error code */ 4398 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, 4399 const void* dict, size_t dictSize, 4400 ZSTD_dictContentType_e dictContentType, 4401 ZSTD_dictTableLoadMethod_e dtlm, 4402 const ZSTD_CDict* cdict, 4403 const ZSTD_CCtx_params* params, U64 pledgedSrcSize, 4404 ZSTD_buffered_policy_e zbuff) 4405 { 4406 size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize; 4407 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); 4408 /* params are supposed to be fully validated at this point */ 4409 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); 4410 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 4411 if ( (cdict) 4412 && (cdict->dictContentSize > 0) 4413 && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF 4414 || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER 4415 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN 4416 || cdict->compressionLevel == 0) 4417 && (params->attachDictPref != ZSTD_dictForceLoad) ) { 4418 return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); 4419 } 4420 4421 FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, 4422 dictContentSize, 4423 ZSTDcrp_makeClean, zbuff) , ""); 4424 { size_t const dictID = cdict ? 4425 ZSTD_compress_insertDictionary( 4426 cctx->blockState.prevCBlock, &cctx->blockState.matchState, 4427 &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, 4428 cdict->dictContentSize, cdict->dictContentType, dtlm, 4429 cctx->entropyWorkspace) 4430 : ZSTD_compress_insertDictionary( 4431 cctx->blockState.prevCBlock, &cctx->blockState.matchState, 4432 &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, 4433 dictContentType, dtlm, cctx->entropyWorkspace); 4434 FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); 4435 assert(dictID <= UINT_MAX); 4436 cctx->dictID = (U32)dictID; 4437 cctx->dictContentSize = dictContentSize; 4438 } 4439 return 0; 4440 } 4441 4442 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, 4443 const void* dict, size_t dictSize, 4444 ZSTD_dictContentType_e dictContentType, 4445 ZSTD_dictTableLoadMethod_e dtlm, 4446 const ZSTD_CDict* cdict, 4447 const ZSTD_CCtx_params* params, 4448 unsigned long long pledgedSrcSize) 4449 { 4450 DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); 4451 /* compression parameters verification and optimization */ 4452 FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); 4453 return ZSTD_compressBegin_internal(cctx, 4454 dict, dictSize, dictContentType, dtlm, 4455 cdict, 4456 params, pledgedSrcSize, 4457 ZSTDb_not_buffered); 4458 } 4459 4460 /*! ZSTD_compressBegin_advanced() : 4461 * @return : 0, or an error code */ 4462 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, 4463 const void* dict, size_t dictSize, 4464 ZSTD_parameters params, unsigned long long pledgedSrcSize) 4465 { 4466 ZSTD_CCtx_params cctxParams; 4467 ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); 4468 return ZSTD_compressBegin_advanced_internal(cctx, 4469 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, 4470 NULL /*cdict*/, 4471 &cctxParams, pledgedSrcSize); 4472 } 4473 4474 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) 4475 { 4476 ZSTD_CCtx_params cctxParams; 4477 { 4478 ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); 4479 ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); 4480 } 4481 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); 4482 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, 4483 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); 4484 } 4485 4486 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) 4487 { 4488 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); 4489 } 4490 4491 4492 /*! ZSTD_writeEpilogue() : 4493 * Ends a frame. 4494 * @return : nb of bytes written into dst (or an error code) */ 4495 static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) 4496 { 4497 BYTE* const ostart = (BYTE*)dst; 4498 BYTE* op = ostart; 4499 size_t fhSize = 0; 4500 4501 DEBUGLOG(4, "ZSTD_writeEpilogue"); 4502 RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); 4503 4504 /* special case : empty frame */ 4505 if (cctx->stage == ZSTDcs_init) { 4506 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); 4507 FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); 4508 dstCapacity -= fhSize; 4509 op += fhSize; 4510 cctx->stage = ZSTDcs_ongoing; 4511 } 4512 4513 if (cctx->stage != ZSTDcs_ending) { 4514 /* write one last empty block, make it the "last" block */ 4515 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; 4516 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); 4517 MEM_writeLE32(op, cBlockHeader24); 4518 op += ZSTD_blockHeaderSize; 4519 dstCapacity -= ZSTD_blockHeaderSize; 4520 } 4521 4522 if (cctx->appliedParams.fParams.checksumFlag) { 4523 U32 const checksum = (U32) xxh64_digest(&cctx->xxhState); 4524 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); 4525 DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); 4526 MEM_writeLE32(op, checksum); 4527 op += 4; 4528 } 4529 4530 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ 4531 return op-ostart; 4532 } 4533 4534 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize) 4535 { 4536 (void)cctx; 4537 (void)extraCSize; 4538 } 4539 4540 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, 4541 void* dst, size_t dstCapacity, 4542 const void* src, size_t srcSize) 4543 { 4544 size_t endResult; 4545 size_t const cSize = ZSTD_compressContinue_internal(cctx, 4546 dst, dstCapacity, src, srcSize, 4547 1 /* frame mode */, 1 /* last chunk */); 4548 FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); 4549 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); 4550 FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); 4551 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); 4552 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ 4553 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); 4554 DEBUGLOG(4, "end of frame : controlling src size"); 4555 RETURN_ERROR_IF( 4556 cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, 4557 srcSize_wrong, 4558 "error : pledgedSrcSize = %u, while realSrcSize = %u", 4559 (unsigned)cctx->pledgedSrcSizePlusOne-1, 4560 (unsigned)cctx->consumedSrcSize); 4561 } 4562 ZSTD_CCtx_trace(cctx, endResult); 4563 return cSize + endResult; 4564 } 4565 4566 size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, 4567 void* dst, size_t dstCapacity, 4568 const void* src, size_t srcSize, 4569 const void* dict,size_t dictSize, 4570 ZSTD_parameters params) 4571 { 4572 DEBUGLOG(4, "ZSTD_compress_advanced"); 4573 FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); 4574 ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, ZSTD_NO_CLEVEL); 4575 return ZSTD_compress_advanced_internal(cctx, 4576 dst, dstCapacity, 4577 src, srcSize, 4578 dict, dictSize, 4579 &cctx->simpleApiParams); 4580 } 4581 4582 /* Internal */ 4583 size_t ZSTD_compress_advanced_internal( 4584 ZSTD_CCtx* cctx, 4585 void* dst, size_t dstCapacity, 4586 const void* src, size_t srcSize, 4587 const void* dict,size_t dictSize, 4588 const ZSTD_CCtx_params* params) 4589 { 4590 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); 4591 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, 4592 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, 4593 params, srcSize, ZSTDb_not_buffered) , ""); 4594 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 4595 } 4596 4597 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, 4598 void* dst, size_t dstCapacity, 4599 const void* src, size_t srcSize, 4600 const void* dict, size_t dictSize, 4601 int compressionLevel) 4602 { 4603 { 4604 ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); 4605 assert(params.fParams.contentSizeFlag == 1); 4606 ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); 4607 } 4608 DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); 4609 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams); 4610 } 4611 4612 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, 4613 void* dst, size_t dstCapacity, 4614 const void* src, size_t srcSize, 4615 int compressionLevel) 4616 { 4617 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); 4618 assert(cctx != NULL); 4619 return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); 4620 } 4621 4622 size_t ZSTD_compress(void* dst, size_t dstCapacity, 4623 const void* src, size_t srcSize, 4624 int compressionLevel) 4625 { 4626 size_t result; 4627 ZSTD_CCtx* cctx = ZSTD_createCCtx(); 4628 RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed"); 4629 result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); 4630 ZSTD_freeCCtx(cctx); 4631 return result; 4632 } 4633 4634 4635 /* ===== Dictionary API ===== */ 4636 4637 /*! ZSTD_estimateCDictSize_advanced() : 4638 * Estimate amount of memory that will be needed to create a dictionary with following arguments */ 4639 size_t ZSTD_estimateCDictSize_advanced( 4640 size_t dictSize, ZSTD_compressionParameters cParams, 4641 ZSTD_dictLoadMethod_e dictLoadMethod) 4642 { 4643 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); 4644 return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) 4645 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) 4646 /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small 4647 * in case we are using DDS with row-hash. */ 4648 + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams), 4649 /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0) 4650 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 4651 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); 4652 } 4653 4654 size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) 4655 { 4656 ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 4657 return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); 4658 } 4659 4660 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) 4661 { 4662 if (cdict==NULL) return 0; /* support sizeof on NULL */ 4663 DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); 4664 /* cdict may be in the workspace */ 4665 return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) 4666 + ZSTD_cwksp_sizeof(&cdict->workspace); 4667 } 4668 4669 static size_t ZSTD_initCDict_internal( 4670 ZSTD_CDict* cdict, 4671 const void* dictBuffer, size_t dictSize, 4672 ZSTD_dictLoadMethod_e dictLoadMethod, 4673 ZSTD_dictContentType_e dictContentType, 4674 ZSTD_CCtx_params params) 4675 { 4676 DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); 4677 assert(!ZSTD_checkCParams(params.cParams)); 4678 cdict->matchState.cParams = params.cParams; 4679 cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; 4680 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { 4681 cdict->dictContent = dictBuffer; 4682 } else { 4683 void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); 4684 RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); 4685 cdict->dictContent = internalBuffer; 4686 ZSTD_memcpy(internalBuffer, dictBuffer, dictSize); 4687 } 4688 cdict->dictContentSize = dictSize; 4689 cdict->dictContentType = dictContentType; 4690 4691 cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); 4692 4693 4694 /* Reset the state to no dictionary */ 4695 ZSTD_reset_compressedBlockState(&cdict->cBlockState); 4696 FORWARD_IF_ERROR(ZSTD_reset_matchState( 4697 &cdict->matchState, 4698 &cdict->workspace, 4699 ¶ms.cParams, 4700 params.useRowMatchFinder, 4701 ZSTDcrp_makeClean, 4702 ZSTDirp_reset, 4703 ZSTD_resetTarget_CDict), ""); 4704 /* (Maybe) load the dictionary 4705 * Skips loading the dictionary if it is < 8 bytes. 4706 */ 4707 { params.compressionLevel = ZSTD_CLEVEL_DEFAULT; 4708 params.fParams.contentSizeFlag = 1; 4709 { size_t const dictID = ZSTD_compress_insertDictionary( 4710 &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, 4711 ¶ms, cdict->dictContent, cdict->dictContentSize, 4712 dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); 4713 FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); 4714 assert(dictID <= (size_t)(U32)-1); 4715 cdict->dictID = (U32)dictID; 4716 } 4717 } 4718 4719 return 0; 4720 } 4721 4722 static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, 4723 ZSTD_dictLoadMethod_e dictLoadMethod, 4724 ZSTD_compressionParameters cParams, 4725 ZSTD_paramSwitch_e useRowMatchFinder, 4726 U32 enableDedicatedDictSearch, 4727 ZSTD_customMem customMem) 4728 { 4729 if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; 4730 4731 { size_t const workspaceSize = 4732 ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + 4733 ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + 4734 ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) + 4735 (dictLoadMethod == ZSTD_dlm_byRef ? 0 4736 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); 4737 void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); 4738 ZSTD_cwksp ws; 4739 ZSTD_CDict* cdict; 4740 4741 if (!workspace) { 4742 ZSTD_customFree(workspace, customMem); 4743 return NULL; 4744 } 4745 4746 ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc); 4747 4748 cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); 4749 assert(cdict != NULL); 4750 ZSTD_cwksp_move(&cdict->workspace, &ws); 4751 cdict->customMem = customMem; 4752 cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ 4753 cdict->useRowMatchFinder = useRowMatchFinder; 4754 return cdict; 4755 } 4756 } 4757 4758 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, 4759 ZSTD_dictLoadMethod_e dictLoadMethod, 4760 ZSTD_dictContentType_e dictContentType, 4761 ZSTD_compressionParameters cParams, 4762 ZSTD_customMem customMem) 4763 { 4764 ZSTD_CCtx_params cctxParams; 4765 ZSTD_memset(&cctxParams, 0, sizeof(cctxParams)); 4766 ZSTD_CCtxParams_init(&cctxParams, 0); 4767 cctxParams.cParams = cParams; 4768 cctxParams.customMem = customMem; 4769 return ZSTD_createCDict_advanced2( 4770 dictBuffer, dictSize, 4771 dictLoadMethod, dictContentType, 4772 &cctxParams, customMem); 4773 } 4774 4775 ZSTD_CDict* ZSTD_createCDict_advanced2( 4776 const void* dict, size_t dictSize, 4777 ZSTD_dictLoadMethod_e dictLoadMethod, 4778 ZSTD_dictContentType_e dictContentType, 4779 const ZSTD_CCtx_params* originalCctxParams, 4780 ZSTD_customMem customMem) 4781 { 4782 ZSTD_CCtx_params cctxParams = *originalCctxParams; 4783 ZSTD_compressionParameters cParams; 4784 ZSTD_CDict* cdict; 4785 4786 DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType); 4787 if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 4788 4789 if (cctxParams.enableDedicatedDictSearch) { 4790 cParams = ZSTD_dedicatedDictSearch_getCParams( 4791 cctxParams.compressionLevel, dictSize); 4792 ZSTD_overrideCParams(&cParams, &cctxParams.cParams); 4793 } else { 4794 cParams = ZSTD_getCParamsFromCCtxParams( 4795 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 4796 } 4797 4798 if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) { 4799 /* Fall back to non-DDSS params */ 4800 cctxParams.enableDedicatedDictSearch = 0; 4801 cParams = ZSTD_getCParamsFromCCtxParams( 4802 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 4803 } 4804 4805 DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch); 4806 cctxParams.cParams = cParams; 4807 cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); 4808 4809 cdict = ZSTD_createCDict_advanced_internal(dictSize, 4810 dictLoadMethod, cctxParams.cParams, 4811 cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch, 4812 customMem); 4813 4814 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 4815 dict, dictSize, 4816 dictLoadMethod, dictContentType, 4817 cctxParams) )) { 4818 ZSTD_freeCDict(cdict); 4819 return NULL; 4820 } 4821 4822 return cdict; 4823 } 4824 4825 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) 4826 { 4827 ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 4828 ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, 4829 ZSTD_dlm_byCopy, ZSTD_dct_auto, 4830 cParams, ZSTD_defaultCMem); 4831 if (cdict) 4832 cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; 4833 return cdict; 4834 } 4835 4836 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) 4837 { 4838 ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 4839 ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, 4840 ZSTD_dlm_byRef, ZSTD_dct_auto, 4841 cParams, ZSTD_defaultCMem); 4842 if (cdict) 4843 cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; 4844 return cdict; 4845 } 4846 4847 size_t ZSTD_freeCDict(ZSTD_CDict* cdict) 4848 { 4849 if (cdict==NULL) return 0; /* support free on NULL */ 4850 { ZSTD_customMem const cMem = cdict->customMem; 4851 int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); 4852 ZSTD_cwksp_free(&cdict->workspace, cMem); 4853 if (!cdictInWorkspace) { 4854 ZSTD_customFree(cdict, cMem); 4855 } 4856 return 0; 4857 } 4858 } 4859 4860 /*! ZSTD_initStaticCDict_advanced() : 4861 * Generate a digested dictionary in provided memory area. 4862 * workspace: The memory area to emplace the dictionary into. 4863 * Provided pointer must 8-bytes aligned. 4864 * It must outlive dictionary usage. 4865 * workspaceSize: Use ZSTD_estimateCDictSize() 4866 * to determine how large workspace must be. 4867 * cParams : use ZSTD_getCParams() to transform a compression level 4868 * into its relevants cParams. 4869 * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) 4870 * Note : there is no corresponding "free" function. 4871 * Since workspace was allocated externally, it must be freed externally. 4872 */ 4873 const ZSTD_CDict* ZSTD_initStaticCDict( 4874 void* workspace, size_t workspaceSize, 4875 const void* dict, size_t dictSize, 4876 ZSTD_dictLoadMethod_e dictLoadMethod, 4877 ZSTD_dictContentType_e dictContentType, 4878 ZSTD_compressionParameters cParams) 4879 { 4880 ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams); 4881 /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */ 4882 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0); 4883 size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) 4884 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 4885 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) 4886 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) 4887 + matchStateSize; 4888 ZSTD_CDict* cdict; 4889 ZSTD_CCtx_params params; 4890 4891 if ((size_t)workspace & 7) return NULL; /* 8-aligned */ 4892 4893 { 4894 ZSTD_cwksp ws; 4895 ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); 4896 cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); 4897 if (cdict == NULL) return NULL; 4898 ZSTD_cwksp_move(&cdict->workspace, &ws); 4899 } 4900 4901 DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", 4902 (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); 4903 if (workspaceSize < neededSize) return NULL; 4904 4905 ZSTD_CCtxParams_init(¶ms, 0); 4906 params.cParams = cParams; 4907 params.useRowMatchFinder = useRowMatchFinder; 4908 cdict->useRowMatchFinder = useRowMatchFinder; 4909 4910 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 4911 dict, dictSize, 4912 dictLoadMethod, dictContentType, 4913 params) )) 4914 return NULL; 4915 4916 return cdict; 4917 } 4918 4919 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) 4920 { 4921 assert(cdict != NULL); 4922 return cdict->matchState.cParams; 4923 } 4924 4925 /*! ZSTD_getDictID_fromCDict() : 4926 * Provides the dictID of the dictionary loaded into `cdict`. 4927 * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. 4928 * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ 4929 unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) 4930 { 4931 if (cdict==NULL) return 0; 4932 return cdict->dictID; 4933 } 4934 4935 /* ZSTD_compressBegin_usingCDict_internal() : 4936 * Implementation of various ZSTD_compressBegin_usingCDict* functions. 4937 */ 4938 static size_t ZSTD_compressBegin_usingCDict_internal( 4939 ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, 4940 ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) 4941 { 4942 ZSTD_CCtx_params cctxParams; 4943 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal"); 4944 RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); 4945 /* Initialize the cctxParams from the cdict */ 4946 { 4947 ZSTD_parameters params; 4948 params.fParams = fParams; 4949 params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF 4950 || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER 4951 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN 4952 || cdict->compressionLevel == 0 ) ? 4953 ZSTD_getCParamsFromCDict(cdict) 4954 : ZSTD_getCParams(cdict->compressionLevel, 4955 pledgedSrcSize, 4956 cdict->dictContentSize); 4957 ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel); 4958 } 4959 /* Increase window log to fit the entire dictionary and source if the 4960 * source size is known. Limit the increase to 19, which is the 4961 * window log for compression level 1 with the largest source size. 4962 */ 4963 if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { 4964 U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); 4965 U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; 4966 cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog); 4967 } 4968 return ZSTD_compressBegin_internal(cctx, 4969 NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, 4970 cdict, 4971 &cctxParams, pledgedSrcSize, 4972 ZSTDb_not_buffered); 4973 } 4974 4975 4976 /* ZSTD_compressBegin_usingCDict_advanced() : 4977 * This function is DEPRECATED. 4978 * cdict must be != NULL */ 4979 size_t ZSTD_compressBegin_usingCDict_advanced( 4980 ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, 4981 ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) 4982 { 4983 return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize); 4984 } 4985 4986 /* ZSTD_compressBegin_usingCDict() : 4987 * cdict must be != NULL */ 4988 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 4989 { 4990 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 4991 return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); 4992 } 4993 4994 /*! ZSTD_compress_usingCDict_internal(): 4995 * Implementation of various ZSTD_compress_usingCDict* functions. 4996 */ 4997 static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx, 4998 void* dst, size_t dstCapacity, 4999 const void* src, size_t srcSize, 5000 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) 5001 { 5002 FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ 5003 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 5004 } 5005 5006 /*! ZSTD_compress_usingCDict_advanced(): 5007 * This function is DEPRECATED. 5008 */ 5009 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, 5010 void* dst, size_t dstCapacity, 5011 const void* src, size_t srcSize, 5012 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) 5013 { 5014 return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); 5015 } 5016 5017 /*! ZSTD_compress_usingCDict() : 5018 * Compression using a digested Dictionary. 5019 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. 5020 * Note that compression parameters are decided at CDict creation time 5021 * while frame parameters are hardcoded */ 5022 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, 5023 void* dst, size_t dstCapacity, 5024 const void* src, size_t srcSize, 5025 const ZSTD_CDict* cdict) 5026 { 5027 ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 5028 return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); 5029 } 5030 5031 5032 5033 /* ****************************************************************** 5034 * Streaming 5035 ********************************************************************/ 5036 5037 ZSTD_CStream* ZSTD_createCStream(void) 5038 { 5039 DEBUGLOG(3, "ZSTD_createCStream"); 5040 return ZSTD_createCStream_advanced(ZSTD_defaultCMem); 5041 } 5042 5043 ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) 5044 { 5045 return ZSTD_initStaticCCtx(workspace, workspaceSize); 5046 } 5047 5048 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) 5049 { /* CStream and CCtx are now same object */ 5050 return ZSTD_createCCtx_advanced(customMem); 5051 } 5052 5053 size_t ZSTD_freeCStream(ZSTD_CStream* zcs) 5054 { 5055 return ZSTD_freeCCtx(zcs); /* same object */ 5056 } 5057 5058 5059 5060 /*====== Initialization ======*/ 5061 5062 size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } 5063 5064 size_t ZSTD_CStreamOutSize(void) 5065 { 5066 return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; 5067 } 5068 5069 static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) 5070 { 5071 if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) 5072 return ZSTD_cpm_attachDict; 5073 else 5074 return ZSTD_cpm_noAttachDict; 5075 } 5076 5077 /* ZSTD_resetCStream(): 5078 * pledgedSrcSize == 0 means "unknown" */ 5079 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) 5080 { 5081 /* temporary : 0 interpreted as "unknown" during transition period. 5082 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. 5083 * 0 will be interpreted as "empty" in the future. 5084 */ 5085 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 5086 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); 5087 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5088 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 5089 return 0; 5090 } 5091 5092 /*! ZSTD_initCStream_internal() : 5093 * Note : for lib/compress only. Used by zstdmt_compress.c. 5094 * Assumption 1 : params are valid 5095 * Assumption 2 : either dict, or cdict, is defined, not both */ 5096 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, 5097 const void* dict, size_t dictSize, const ZSTD_CDict* cdict, 5098 const ZSTD_CCtx_params* params, 5099 unsigned long long pledgedSrcSize) 5100 { 5101 DEBUGLOG(4, "ZSTD_initCStream_internal"); 5102 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5103 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 5104 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); 5105 zcs->requestedParams = *params; 5106 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 5107 if (dict) { 5108 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); 5109 } else { 5110 /* Dictionary is cleared if !cdict */ 5111 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); 5112 } 5113 return 0; 5114 } 5115 5116 /* ZSTD_initCStream_usingCDict_advanced() : 5117 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ 5118 size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, 5119 const ZSTD_CDict* cdict, 5120 ZSTD_frameParameters fParams, 5121 unsigned long long pledgedSrcSize) 5122 { 5123 DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); 5124 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5125 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 5126 zcs->requestedParams.fParams = fParams; 5127 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); 5128 return 0; 5129 } 5130 5131 /* note : cdict must outlive compression session */ 5132 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) 5133 { 5134 DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); 5135 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5136 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); 5137 return 0; 5138 } 5139 5140 5141 /* ZSTD_initCStream_advanced() : 5142 * pledgedSrcSize must be exact. 5143 * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. 5144 * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ 5145 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, 5146 const void* dict, size_t dictSize, 5147 ZSTD_parameters params, unsigned long long pss) 5148 { 5149 /* for compatibility with older programs relying on this behavior. 5150 * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. 5151 * This line will be removed in the future. 5152 */ 5153 U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 5154 DEBUGLOG(4, "ZSTD_initCStream_advanced"); 5155 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5156 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 5157 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); 5158 ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms); 5159 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); 5160 return 0; 5161 } 5162 5163 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) 5164 { 5165 DEBUGLOG(4, "ZSTD_initCStream_usingDict"); 5166 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5167 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); 5168 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); 5169 return 0; 5170 } 5171 5172 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) 5173 { 5174 /* temporary : 0 interpreted as "unknown" during transition period. 5175 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. 5176 * 0 will be interpreted as "empty" in the future. 5177 */ 5178 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; 5179 DEBUGLOG(4, "ZSTD_initCStream_srcSize"); 5180 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5181 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); 5182 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); 5183 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); 5184 return 0; 5185 } 5186 5187 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) 5188 { 5189 DEBUGLOG(4, "ZSTD_initCStream"); 5190 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); 5191 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); 5192 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); 5193 return 0; 5194 } 5195 5196 /*====== Compression ======*/ 5197 5198 static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) 5199 { 5200 size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; 5201 if (hintInSize==0) hintInSize = cctx->blockSize; 5202 return hintInSize; 5203 } 5204 5205 /* ZSTD_compressStream_generic(): 5206 * internal function for all *compressStream*() variants 5207 * non-static, because can be called from zstdmt_compress.c 5208 * @return : hint size for next input */ 5209 static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, 5210 ZSTD_outBuffer* output, 5211 ZSTD_inBuffer* input, 5212 ZSTD_EndDirective const flushMode) 5213 { 5214 const char* const istart = (const char*)input->src; 5215 const char* const iend = input->size != 0 ? istart + input->size : istart; 5216 const char* ip = input->pos != 0 ? istart + input->pos : istart; 5217 char* const ostart = (char*)output->dst; 5218 char* const oend = output->size != 0 ? ostart + output->size : ostart; 5219 char* op = output->pos != 0 ? ostart + output->pos : ostart; 5220 U32 someMoreWork = 1; 5221 5222 /* check expectations */ 5223 DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); 5224 if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { 5225 assert(zcs->inBuff != NULL); 5226 assert(zcs->inBuffSize > 0); 5227 } 5228 if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) { 5229 assert(zcs->outBuff != NULL); 5230 assert(zcs->outBuffSize > 0); 5231 } 5232 assert(output->pos <= output->size); 5233 assert(input->pos <= input->size); 5234 assert((U32)flushMode <= (U32)ZSTD_e_end); 5235 5236 while (someMoreWork) { 5237 switch(zcs->streamStage) 5238 { 5239 case zcss_init: 5240 RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); 5241 5242 case zcss_load: 5243 if ( (flushMode == ZSTD_e_end) 5244 && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */ 5245 || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ 5246 && (zcs->inBuffPos == 0) ) { 5247 /* shortcut to compression pass directly into output buffer */ 5248 size_t const cSize = ZSTD_compressEnd(zcs, 5249 op, oend-op, ip, iend-ip); 5250 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); 5251 FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); 5252 ip = iend; 5253 op += cSize; 5254 zcs->frameEnded = 1; 5255 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 5256 someMoreWork = 0; break; 5257 } 5258 /* complete loading into inBuffer in buffered mode */ 5259 if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { 5260 size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; 5261 size_t const loaded = ZSTD_limitCopy( 5262 zcs->inBuff + zcs->inBuffPos, toLoad, 5263 ip, iend-ip); 5264 zcs->inBuffPos += loaded; 5265 if (loaded != 0) 5266 ip += loaded; 5267 if ( (flushMode == ZSTD_e_continue) 5268 && (zcs->inBuffPos < zcs->inBuffTarget) ) { 5269 /* not enough input to fill full block : stop here */ 5270 someMoreWork = 0; break; 5271 } 5272 if ( (flushMode == ZSTD_e_flush) 5273 && (zcs->inBuffPos == zcs->inToCompress) ) { 5274 /* empty */ 5275 someMoreWork = 0; break; 5276 } 5277 } 5278 /* compress current block (note : this stage cannot be stopped in the middle) */ 5279 DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); 5280 { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered); 5281 void* cDst; 5282 size_t cSize; 5283 size_t oSize = oend-op; 5284 size_t const iSize = inputBuffered 5285 ? zcs->inBuffPos - zcs->inToCompress 5286 : MIN((size_t)(iend - ip), zcs->blockSize); 5287 if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) 5288 cDst = op; /* compress into output buffer, to skip flush stage */ 5289 else 5290 cDst = zcs->outBuff, oSize = zcs->outBuffSize; 5291 if (inputBuffered) { 5292 unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); 5293 cSize = lastBlock ? 5294 ZSTD_compressEnd(zcs, cDst, oSize, 5295 zcs->inBuff + zcs->inToCompress, iSize) : 5296 ZSTD_compressContinue(zcs, cDst, oSize, 5297 zcs->inBuff + zcs->inToCompress, iSize); 5298 FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); 5299 zcs->frameEnded = lastBlock; 5300 /* prepare next block */ 5301 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; 5302 if (zcs->inBuffTarget > zcs->inBuffSize) 5303 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; 5304 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", 5305 (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); 5306 if (!lastBlock) 5307 assert(zcs->inBuffTarget <= zcs->inBuffSize); 5308 zcs->inToCompress = zcs->inBuffPos; 5309 } else { 5310 unsigned const lastBlock = (ip + iSize == iend); 5311 assert(flushMode == ZSTD_e_end /* Already validated */); 5312 cSize = lastBlock ? 5313 ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) : 5314 ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize); 5315 /* Consume the input prior to error checking to mirror buffered mode. */ 5316 if (iSize > 0) 5317 ip += iSize; 5318 FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); 5319 zcs->frameEnded = lastBlock; 5320 if (lastBlock) 5321 assert(ip == iend); 5322 } 5323 if (cDst == op) { /* no need to flush */ 5324 op += cSize; 5325 if (zcs->frameEnded) { 5326 DEBUGLOG(5, "Frame completed directly in outBuffer"); 5327 someMoreWork = 0; 5328 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 5329 } 5330 break; 5331 } 5332 zcs->outBuffContentSize = cSize; 5333 zcs->outBuffFlushedSize = 0; 5334 zcs->streamStage = zcss_flush; /* pass-through to flush stage */ 5335 } 5336 ZSTD_FALLTHROUGH; 5337 case zcss_flush: 5338 DEBUGLOG(5, "flush stage"); 5339 assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered); 5340 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; 5341 size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), 5342 zcs->outBuff + zcs->outBuffFlushedSize, toFlush); 5343 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", 5344 (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); 5345 if (flushed) 5346 op += flushed; 5347 zcs->outBuffFlushedSize += flushed; 5348 if (toFlush!=flushed) { 5349 /* flush not fully completed, presumably because dst is too small */ 5350 assert(op==oend); 5351 someMoreWork = 0; 5352 break; 5353 } 5354 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; 5355 if (zcs->frameEnded) { 5356 DEBUGLOG(5, "Frame completed on flush"); 5357 someMoreWork = 0; 5358 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 5359 break; 5360 } 5361 zcs->streamStage = zcss_load; 5362 break; 5363 } 5364 5365 default: /* impossible */ 5366 assert(0); 5367 } 5368 } 5369 5370 input->pos = ip - istart; 5371 output->pos = op - ostart; 5372 if (zcs->frameEnded) return 0; 5373 return ZSTD_nextInputSizeHint(zcs); 5374 } 5375 5376 static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) 5377 { 5378 return ZSTD_nextInputSizeHint(cctx); 5379 5380 } 5381 5382 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) 5383 { 5384 FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); 5385 return ZSTD_nextInputSizeHint_MTorST(zcs); 5386 } 5387 5388 /* After a compression call set the expected input/output buffer. 5389 * This is validated at the start of the next compression call. 5390 */ 5391 static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input) 5392 { 5393 if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { 5394 cctx->expectedInBuffer = *input; 5395 } 5396 if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { 5397 cctx->expectedOutBufferSize = output->size - output->pos; 5398 } 5399 } 5400 5401 /* Validate that the input/output buffers match the expectations set by 5402 * ZSTD_setBufferExpectations. 5403 */ 5404 static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx, 5405 ZSTD_outBuffer const* output, 5406 ZSTD_inBuffer const* input, 5407 ZSTD_EndDirective endOp) 5408 { 5409 if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { 5410 ZSTD_inBuffer const expect = cctx->expectedInBuffer; 5411 if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size) 5412 RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!"); 5413 if (endOp != ZSTD_e_end) 5414 RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!"); 5415 } 5416 if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { 5417 size_t const outBufferSize = output->size - output->pos; 5418 if (cctx->expectedOutBufferSize != outBufferSize) 5419 RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!"); 5420 } 5421 return 0; 5422 } 5423 5424 static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, 5425 ZSTD_EndDirective endOp, 5426 size_t inSize) { 5427 ZSTD_CCtx_params params = cctx->requestedParams; 5428 ZSTD_prefixDict const prefixDict = cctx->prefixDict; 5429 FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ 5430 ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ 5431 assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ 5432 if (cctx->cdict && !cctx->localDict.cdict) { 5433 /* Let the cdict's compression level take priority over the requested params. 5434 * But do not take the cdict's compression level if the "cdict" is actually a localDict 5435 * generated from ZSTD_initLocalDict(). 5436 */ 5437 params.compressionLevel = cctx->cdict->compressionLevel; 5438 } 5439 DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); 5440 if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ 5441 { 5442 size_t const dictSize = prefixDict.dict 5443 ? prefixDict.dictSize 5444 : (cctx->cdict ? cctx->cdict->dictContentSize : 0); 5445 ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); 5446 params.cParams = ZSTD_getCParamsFromCCtxParams( 5447 ¶ms, cctx->pledgedSrcSizePlusOne-1, 5448 dictSize, mode); 5449 } 5450 5451 params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams); 5452 params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams); 5453 params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams); 5454 5455 { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; 5456 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 5457 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, 5458 prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast, 5459 cctx->cdict, 5460 ¶ms, pledgedSrcSize, 5461 ZSTDb_buffered) , ""); 5462 assert(cctx->appliedParams.nbWorkers == 0); 5463 cctx->inToCompress = 0; 5464 cctx->inBuffPos = 0; 5465 if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) { 5466 /* for small input: avoid automatic flush on reaching end of block, since 5467 * it would require to add a 3-bytes null block to end frame 5468 */ 5469 cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); 5470 } else { 5471 cctx->inBuffTarget = 0; 5472 } 5473 cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; 5474 cctx->streamStage = zcss_load; 5475 cctx->frameEnded = 0; 5476 } 5477 return 0; 5478 } 5479 5480 size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, 5481 ZSTD_outBuffer* output, 5482 ZSTD_inBuffer* input, 5483 ZSTD_EndDirective endOp) 5484 { 5485 DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); 5486 /* check conditions */ 5487 RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer"); 5488 RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer"); 5489 RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective"); 5490 assert(cctx != NULL); 5491 5492 /* transparent initialization stage */ 5493 if (cctx->streamStage == zcss_init) { 5494 FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed"); 5495 ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ 5496 } 5497 /* end of transparent initialization stage */ 5498 5499 FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers"); 5500 /* compression stage */ 5501 FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); 5502 DEBUGLOG(5, "completed ZSTD_compressStream2"); 5503 ZSTD_setBufferExpectations(cctx, output, input); 5504 return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ 5505 } 5506 5507 size_t ZSTD_compressStream2_simpleArgs ( 5508 ZSTD_CCtx* cctx, 5509 void* dst, size_t dstCapacity, size_t* dstPos, 5510 const void* src, size_t srcSize, size_t* srcPos, 5511 ZSTD_EndDirective endOp) 5512 { 5513 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; 5514 ZSTD_inBuffer input = { src, srcSize, *srcPos }; 5515 /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ 5516 size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); 5517 *dstPos = output.pos; 5518 *srcPos = input.pos; 5519 return cErr; 5520 } 5521 5522 size_t ZSTD_compress2(ZSTD_CCtx* cctx, 5523 void* dst, size_t dstCapacity, 5524 const void* src, size_t srcSize) 5525 { 5526 ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode; 5527 ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode; 5528 DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); 5529 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); 5530 /* Enable stable input/output buffers. */ 5531 cctx->requestedParams.inBufferMode = ZSTD_bm_stable; 5532 cctx->requestedParams.outBufferMode = ZSTD_bm_stable; 5533 { size_t oPos = 0; 5534 size_t iPos = 0; 5535 size_t const result = ZSTD_compressStream2_simpleArgs(cctx, 5536 dst, dstCapacity, &oPos, 5537 src, srcSize, &iPos, 5538 ZSTD_e_end); 5539 /* Reset to the original values. */ 5540 cctx->requestedParams.inBufferMode = originalInBufferMode; 5541 cctx->requestedParams.outBufferMode = originalOutBufferMode; 5542 FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); 5543 if (result != 0) { /* compression not completed, due to lack of output space */ 5544 assert(oPos == dstCapacity); 5545 RETURN_ERROR(dstSize_tooSmall, ""); 5546 } 5547 assert(iPos == srcSize); /* all input is expected consumed */ 5548 return oPos; 5549 } 5550 } 5551 5552 typedef struct { 5553 U32 idx; /* Index in array of ZSTD_Sequence */ 5554 U32 posInSequence; /* Position within sequence at idx */ 5555 size_t posInSrc; /* Number of bytes given by sequences provided so far */ 5556 } ZSTD_sequencePosition; 5557 5558 /* ZSTD_validateSequence() : 5559 * @offCode : is presumed to follow format required by ZSTD_storeSeq() 5560 * @returns a ZSTD error code if sequence is not valid 5561 */ 5562 static size_t 5563 ZSTD_validateSequence(U32 offCode, U32 matchLength, 5564 size_t posInSrc, U32 windowLog, size_t dictSize) 5565 { 5566 U32 const windowSize = 1 << windowLog; 5567 /* posInSrc represents the amount of data the decoder would decode up to this point. 5568 * As long as the amount of data decoded is less than or equal to window size, offsets may be 5569 * larger than the total length of output decoded in order to reference the dict, even larger than 5570 * window size. After output surpasses windowSize, we're limited to windowSize offsets again. 5571 */ 5572 size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; 5573 RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!"); 5574 RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small"); 5575 return 0; 5576 } 5577 5578 /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ 5579 static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) 5580 { 5581 U32 offCode = STORE_OFFSET(rawOffset); 5582 5583 if (!ll0 && rawOffset == rep[0]) { 5584 offCode = STORE_REPCODE_1; 5585 } else if (rawOffset == rep[1]) { 5586 offCode = STORE_REPCODE(2 - ll0); 5587 } else if (rawOffset == rep[2]) { 5588 offCode = STORE_REPCODE(3 - ll0); 5589 } else if (ll0 && rawOffset == rep[0] - 1) { 5590 offCode = STORE_REPCODE_3; 5591 } 5592 return offCode; 5593 } 5594 5595 /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of 5596 * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. 5597 */ 5598 static size_t 5599 ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, 5600 ZSTD_sequencePosition* seqPos, 5601 const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 5602 const void* src, size_t blockSize) 5603 { 5604 U32 idx = seqPos->idx; 5605 BYTE const* ip = (BYTE const*)(src); 5606 const BYTE* const iend = ip + blockSize; 5607 repcodes_t updatedRepcodes; 5608 U32 dictSize; 5609 5610 if (cctx->cdict) { 5611 dictSize = (U32)cctx->cdict->dictContentSize; 5612 } else if (cctx->prefixDict.dict) { 5613 dictSize = (U32)cctx->prefixDict.dictSize; 5614 } else { 5615 dictSize = 0; 5616 } 5617 ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); 5618 for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { 5619 U32 const litLength = inSeqs[idx].litLength; 5620 U32 const ll0 = (litLength == 0); 5621 U32 const matchLength = inSeqs[idx].matchLength; 5622 U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); 5623 ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); 5624 5625 DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); 5626 if (cctx->appliedParams.validateSequences) { 5627 seqPos->posInSrc += litLength + matchLength; 5628 FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, 5629 cctx->appliedParams.cParams.windowLog, dictSize), 5630 "Sequence validation failed"); 5631 } 5632 RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, 5633 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); 5634 ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); 5635 ip += matchLength + litLength; 5636 } 5637 ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); 5638 5639 if (inSeqs[idx].litLength) { 5640 DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength); 5641 ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength); 5642 ip += inSeqs[idx].litLength; 5643 seqPos->posInSrc += inSeqs[idx].litLength; 5644 } 5645 RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!"); 5646 seqPos->idx = idx+1; 5647 return 0; 5648 } 5649 5650 /* Returns the number of bytes to move the current read position back by. Only non-zero 5651 * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something 5652 * went wrong. 5653 * 5654 * This function will attempt to scan through blockSize bytes represented by the sequences 5655 * in inSeqs, storing any (partial) sequences. 5656 * 5657 * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to 5658 * avoid splitting a match, or to avoid splitting a match such that it would produce a match 5659 * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. 5660 */ 5661 static size_t 5662 ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, 5663 const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 5664 const void* src, size_t blockSize) 5665 { 5666 U32 idx = seqPos->idx; 5667 U32 startPosInSequence = seqPos->posInSequence; 5668 U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; 5669 size_t dictSize; 5670 BYTE const* ip = (BYTE const*)(src); 5671 BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ 5672 repcodes_t updatedRepcodes; 5673 U32 bytesAdjustment = 0; 5674 U32 finalMatchSplit = 0; 5675 5676 if (cctx->cdict) { 5677 dictSize = cctx->cdict->dictContentSize; 5678 } else if (cctx->prefixDict.dict) { 5679 dictSize = cctx->prefixDict.dictSize; 5680 } else { 5681 dictSize = 0; 5682 } 5683 DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); 5684 DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); 5685 ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); 5686 while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { 5687 const ZSTD_Sequence currSeq = inSeqs[idx]; 5688 U32 litLength = currSeq.litLength; 5689 U32 matchLength = currSeq.matchLength; 5690 U32 const rawOffset = currSeq.offset; 5691 U32 offCode; 5692 5693 /* Modify the sequence depending on where endPosInSequence lies */ 5694 if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { 5695 if (startPosInSequence >= litLength) { 5696 startPosInSequence -= litLength; 5697 litLength = 0; 5698 matchLength -= startPosInSequence; 5699 } else { 5700 litLength -= startPosInSequence; 5701 } 5702 /* Move to the next sequence */ 5703 endPosInSequence -= currSeq.litLength + currSeq.matchLength; 5704 startPosInSequence = 0; 5705 idx++; 5706 } else { 5707 /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence 5708 does not reach the end of the match. So, we have to split the sequence */ 5709 DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", 5710 currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence); 5711 if (endPosInSequence > litLength) { 5712 U32 firstHalfMatchLength; 5713 litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence; 5714 firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength; 5715 if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) { 5716 /* Only ever split the match if it is larger than the block size */ 5717 U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence; 5718 if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) { 5719 /* Move the endPosInSequence backward so that it creates match of minMatch length */ 5720 endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; 5721 bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; 5722 firstHalfMatchLength -= bytesAdjustment; 5723 } 5724 matchLength = firstHalfMatchLength; 5725 /* Flag that we split the last match - after storing the sequence, exit the loop, 5726 but keep the value of endPosInSequence */ 5727 finalMatchSplit = 1; 5728 } else { 5729 /* Move the position in sequence backwards so that we don't split match, and break to store 5730 * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence 5731 * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so 5732 * would cause the first half of the match to be too small 5733 */ 5734 bytesAdjustment = endPosInSequence - currSeq.litLength; 5735 endPosInSequence = currSeq.litLength; 5736 break; 5737 } 5738 } else { 5739 /* This sequence ends inside the literals, break to store the last literals */ 5740 break; 5741 } 5742 } 5743 /* Check if this offset can be represented with a repcode */ 5744 { U32 const ll0 = (litLength == 0); 5745 offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); 5746 ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); 5747 } 5748 5749 if (cctx->appliedParams.validateSequences) { 5750 seqPos->posInSrc += litLength + matchLength; 5751 FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, 5752 cctx->appliedParams.cParams.windowLog, dictSize), 5753 "Sequence validation failed"); 5754 } 5755 DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); 5756 RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, 5757 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); 5758 ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); 5759 ip += matchLength + litLength; 5760 } 5761 DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); 5762 assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); 5763 seqPos->idx = idx; 5764 seqPos->posInSequence = endPosInSequence; 5765 ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); 5766 5767 iend -= bytesAdjustment; 5768 if (ip != iend) { 5769 /* Store any last literals */ 5770 U32 lastLLSize = (U32)(iend - ip); 5771 assert(ip <= iend); 5772 DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize); 5773 ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize); 5774 seqPos->posInSrc += lastLLSize; 5775 } 5776 5777 return bytesAdjustment; 5778 } 5779 5780 typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, 5781 const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 5782 const void* src, size_t blockSize); 5783 static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) 5784 { 5785 ZSTD_sequenceCopier sequenceCopier = NULL; 5786 assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); 5787 if (mode == ZSTD_sf_explicitBlockDelimiters) { 5788 return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; 5789 } else if (mode == ZSTD_sf_noBlockDelimiters) { 5790 return ZSTD_copySequencesToSeqStoreNoBlockDelim; 5791 } 5792 assert(sequenceCopier != NULL); 5793 return sequenceCopier; 5794 } 5795 5796 /* Compress, block-by-block, all of the sequences given. 5797 * 5798 * Returns the cumulative size of all compressed blocks (including their headers), 5799 * otherwise a ZSTD error. 5800 */ 5801 static size_t 5802 ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, 5803 void* dst, size_t dstCapacity, 5804 const ZSTD_Sequence* inSeqs, size_t inSeqsSize, 5805 const void* src, size_t srcSize) 5806 { 5807 size_t cSize = 0; 5808 U32 lastBlock; 5809 size_t blockSize; 5810 size_t compressedSeqsSize; 5811 size_t remaining = srcSize; 5812 ZSTD_sequencePosition seqPos = {0, 0, 0}; 5813 5814 BYTE const* ip = (BYTE const*)src; 5815 BYTE* op = (BYTE*)dst; 5816 ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); 5817 5818 DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); 5819 /* Special case: empty frame */ 5820 if (remaining == 0) { 5821 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1); 5822 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header"); 5823 MEM_writeLE32(op, cBlockHeader24); 5824 op += ZSTD_blockHeaderSize; 5825 dstCapacity -= ZSTD_blockHeaderSize; 5826 cSize += ZSTD_blockHeaderSize; 5827 } 5828 5829 while (remaining) { 5830 size_t cBlockSize; 5831 size_t additionalByteAdjustment; 5832 lastBlock = remaining <= cctx->blockSize; 5833 blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; 5834 ZSTD_resetSeqStore(&cctx->seqStore); 5835 DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize); 5836 5837 additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize); 5838 FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy"); 5839 blockSize -= additionalByteAdjustment; 5840 5841 /* If blocks are too small, emit as a nocompress block */ 5842 if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { 5843 cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); 5844 FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); 5845 DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); 5846 cSize += cBlockSize; 5847 ip += blockSize; 5848 op += cBlockSize; 5849 remaining -= blockSize; 5850 dstCapacity -= cBlockSize; 5851 continue; 5852 } 5853 5854 compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore, 5855 &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, 5856 &cctx->appliedParams, 5857 op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, 5858 blockSize, 5859 cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, 5860 cctx->bmi2); 5861 FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); 5862 DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize); 5863 5864 if (!cctx->isFirstBlock && 5865 ZSTD_maybeRLE(&cctx->seqStore) && 5866 ZSTD_isRLE((BYTE const*)src, srcSize)) { 5867 /* We don't want to emit our first block as a RLE even if it qualifies because 5868 * doing so will cause the decoder (cli only) to throw a "should consume all input error." 5869 * This is only an issue for zstd <= v1.4.3 5870 */ 5871 compressedSeqsSize = 1; 5872 } 5873 5874 if (compressedSeqsSize == 0) { 5875 /* ZSTD_noCompressBlock writes the block header as well */ 5876 cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); 5877 FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); 5878 DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize); 5879 } else if (compressedSeqsSize == 1) { 5880 cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock); 5881 FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed"); 5882 DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize); 5883 } else { 5884 U32 cBlockHeader; 5885 /* Error checking and repcodes update */ 5886 ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState); 5887 if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 5888 cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 5889 5890 /* Write block header into beginning of block*/ 5891 cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); 5892 MEM_writeLE24(op, cBlockHeader); 5893 cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; 5894 DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize); 5895 } 5896 5897 cSize += cBlockSize; 5898 DEBUGLOG(4, "cSize running total: %zu", cSize); 5899 5900 if (lastBlock) { 5901 break; 5902 } else { 5903 ip += blockSize; 5904 op += cBlockSize; 5905 remaining -= blockSize; 5906 dstCapacity -= cBlockSize; 5907 cctx->isFirstBlock = 0; 5908 } 5909 } 5910 5911 return cSize; 5912 } 5913 5914 size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, 5915 const ZSTD_Sequence* inSeqs, size_t inSeqsSize, 5916 const void* src, size_t srcSize) 5917 { 5918 BYTE* op = (BYTE*)dst; 5919 size_t cSize = 0; 5920 size_t compressedBlocksSize = 0; 5921 size_t frameHeaderSize = 0; 5922 5923 /* Transparent initialization stage, same as compressStream2() */ 5924 DEBUGLOG(3, "ZSTD_compressSequences()"); 5925 assert(cctx != NULL); 5926 FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); 5927 /* Begin writing output, starting with frame header */ 5928 frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); 5929 op += frameHeaderSize; 5930 dstCapacity -= frameHeaderSize; 5931 cSize += frameHeaderSize; 5932 if (cctx->appliedParams.fParams.checksumFlag && srcSize) { 5933 xxh64_update(&cctx->xxhState, src, srcSize); 5934 } 5935 /* cSize includes block header size and compressed sequences size */ 5936 compressedBlocksSize = ZSTD_compressSequences_internal(cctx, 5937 op, dstCapacity, 5938 inSeqs, inSeqsSize, 5939 src, srcSize); 5940 FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); 5941 cSize += compressedBlocksSize; 5942 dstCapacity -= compressedBlocksSize; 5943 5944 if (cctx->appliedParams.fParams.checksumFlag) { 5945 U32 const checksum = (U32) xxh64_digest(&cctx->xxhState); 5946 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); 5947 DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum); 5948 MEM_writeLE32((char*)dst + cSize, checksum); 5949 cSize += 4; 5950 } 5951 5952 DEBUGLOG(3, "Final compressed size: %zu", cSize); 5953 return cSize; 5954 } 5955 5956 /*====== Finalize ======*/ 5957 5958 /*! ZSTD_flushStream() : 5959 * @return : amount of data remaining to flush */ 5960 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 5961 { 5962 ZSTD_inBuffer input = { NULL, 0, 0 }; 5963 return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); 5964 } 5965 5966 5967 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 5968 { 5969 ZSTD_inBuffer input = { NULL, 0, 0 }; 5970 size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); 5971 FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); 5972 if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ 5973 /* single thread mode : attempt to calculate remaining to flush more precisely */ 5974 { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; 5975 size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); 5976 size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; 5977 DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); 5978 return toFlush; 5979 } 5980 } 5981 5982 5983 /*-===== Pre-defined compression levels =====-*/ 5984 #include "clevels.h" 5985 5986 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } 5987 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } 5988 int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; } 5989 5990 static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) 5991 { 5992 ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict); 5993 switch (cParams.strategy) { 5994 case ZSTD_fast: 5995 case ZSTD_dfast: 5996 break; 5997 case ZSTD_greedy: 5998 case ZSTD_lazy: 5999 case ZSTD_lazy2: 6000 cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG; 6001 break; 6002 case ZSTD_btlazy2: 6003 case ZSTD_btopt: 6004 case ZSTD_btultra: 6005 case ZSTD_btultra2: 6006 break; 6007 } 6008 return cParams; 6009 } 6010 6011 static int ZSTD_dedicatedDictSearch_isSupported( 6012 ZSTD_compressionParameters const* cParams) 6013 { 6014 return (cParams->strategy >= ZSTD_greedy) 6015 && (cParams->strategy <= ZSTD_lazy2) 6016 && (cParams->hashLog > cParams->chainLog) 6017 && (cParams->chainLog <= 24); 6018 } 6019 6020 /* 6021 * Reverses the adjustment applied to cparams when enabling dedicated dict 6022 * search. This is used to recover the params set to be used in the working 6023 * context. (Otherwise, those tables would also grow.) 6024 */ 6025 static void ZSTD_dedicatedDictSearch_revertCParams( 6026 ZSTD_compressionParameters* cParams) { 6027 switch (cParams->strategy) { 6028 case ZSTD_fast: 6029 case ZSTD_dfast: 6030 break; 6031 case ZSTD_greedy: 6032 case ZSTD_lazy: 6033 case ZSTD_lazy2: 6034 cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; 6035 if (cParams->hashLog < ZSTD_HASHLOG_MIN) { 6036 cParams->hashLog = ZSTD_HASHLOG_MIN; 6037 } 6038 break; 6039 case ZSTD_btlazy2: 6040 case ZSTD_btopt: 6041 case ZSTD_btultra: 6042 case ZSTD_btultra2: 6043 break; 6044 } 6045 } 6046 6047 static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) 6048 { 6049 switch (mode) { 6050 case ZSTD_cpm_unknown: 6051 case ZSTD_cpm_noAttachDict: 6052 case ZSTD_cpm_createCDict: 6053 break; 6054 case ZSTD_cpm_attachDict: 6055 dictSize = 0; 6056 break; 6057 default: 6058 assert(0); 6059 break; 6060 } 6061 { int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; 6062 size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; 6063 return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; 6064 } 6065 } 6066 6067 /*! ZSTD_getCParams_internal() : 6068 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. 6069 * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. 6070 * Use dictSize == 0 for unknown or unused. 6071 * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */ 6072 static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) 6073 { 6074 U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); 6075 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); 6076 int row; 6077 DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); 6078 6079 /* row */ 6080 if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ 6081 else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ 6082 else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; 6083 else row = compressionLevel; 6084 6085 { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; 6086 DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy); 6087 /* acceleration factor */ 6088 if (compressionLevel < 0) { 6089 int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); 6090 cp.targetLength = (unsigned)(-clampedCompressionLevel); 6091 } 6092 /* refine parameters based on srcSize & dictSize */ 6093 return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode); 6094 } 6095 } 6096 6097 /*! ZSTD_getCParams() : 6098 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. 6099 * Size values are optional, provide 0 if not known or unused */ 6100 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) 6101 { 6102 if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; 6103 return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); 6104 } 6105 6106 /*! ZSTD_getParams() : 6107 * same idea as ZSTD_getCParams() 6108 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). 6109 * Fields of `ZSTD_frameParameters` are set to default values */ 6110 static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { 6111 ZSTD_parameters params; 6112 ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); 6113 DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); 6114 ZSTD_memset(¶ms, 0, sizeof(params)); 6115 params.cParams = cParams; 6116 params.fParams.contentSizeFlag = 1; 6117 return params; 6118 } 6119 6120 /*! ZSTD_getParams() : 6121 * same idea as ZSTD_getCParams() 6122 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). 6123 * Fields of `ZSTD_frameParameters` are set to default values */ 6124 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { 6125 if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; 6126 return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); 6127 } 6128