1 /* 2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 12 /*-************************************* 13 * Tuning parameters 14 ***************************************/ 15 #ifndef ZSTD_CLEVEL_DEFAULT 16 # define ZSTD_CLEVEL_DEFAULT 3 17 #endif 18 19 20 /*-************************************* 21 * Dependencies 22 ***************************************/ 23 #include <string.h> /* memset */ 24 #include "cpu.h" 25 #include "mem.h" 26 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ 27 #include "fse.h" 28 #define HUF_STATIC_LINKING_ONLY 29 #include "huf.h" 30 #include "zstd_compress_internal.h" 31 #include "zstd_fast.h" 32 #include "zstd_double_fast.h" 33 #include "zstd_lazy.h" 34 #include "zstd_opt.h" 35 #include "zstd_ldm.h" 36 37 38 /*-************************************* 39 * Helper functions 40 ***************************************/ 41 size_t ZSTD_compressBound(size_t srcSize) { 42 return ZSTD_COMPRESSBOUND(srcSize); 43 } 44 45 46 /*-************************************* 47 * Context memory management 48 ***************************************/ 49 struct ZSTD_CDict_s { 50 void* dictBuffer; 51 const void* dictContent; 52 size_t dictContentSize; 53 void* workspace; 54 size_t workspaceSize; 55 ZSTD_matchState_t matchState; 56 ZSTD_compressedBlockState_t cBlockState; 57 ZSTD_compressionParameters cParams; 58 ZSTD_customMem customMem; 59 U32 dictID; 60 }; /* typedef'd to ZSTD_CDict within "zstd.h" */ 61 62 ZSTD_CCtx* ZSTD_createCCtx(void) 63 { 64 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); 65 } 66 67 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) 68 { 69 ZSTD_STATIC_ASSERT(zcss_init==0); 70 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); 71 if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 72 { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_calloc(sizeof(ZSTD_CCtx), customMem); 73 if (!cctx) return NULL; 74 cctx->customMem = customMem; 75 cctx->requestedParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; 76 cctx->requestedParams.fParams.contentSizeFlag = 1; 77 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); 78 return cctx; 79 } 80 } 81 82 ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) 83 { 84 ZSTD_CCtx* const cctx = (ZSTD_CCtx*) workspace; 85 if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ 86 if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ 87 memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */ 88 cctx->staticSize = workspaceSize; 89 cctx->workSpace = (void*)(cctx+1); 90 cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx); 91 92 /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ 93 if (cctx->workSpaceSize < HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t)) return NULL; 94 assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ 95 cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)cctx->workSpace; 96 cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1; 97 { 98 void* const ptr = cctx->blockState.nextCBlock + 1; 99 cctx->entropyWorkspace = (U32*)ptr; 100 } 101 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); 102 return cctx; 103 } 104 105 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) 106 { 107 if (cctx==NULL) return 0; /* support free on NULL */ 108 if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */ 109 ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL; 110 ZSTD_freeCDict(cctx->cdictLocal); cctx->cdictLocal = NULL; 111 #ifdef ZSTD_MULTITHREAD 112 ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; 113 #endif 114 ZSTD_free(cctx, cctx->customMem); 115 return 0; /* reserved as a potential error code in the future */ 116 } 117 118 119 static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) 120 { 121 #ifdef ZSTD_MULTITHREAD 122 return ZSTDMT_sizeof_CCtx(cctx->mtctx); 123 #else 124 (void) cctx; 125 return 0; 126 #endif 127 } 128 129 130 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) 131 { 132 if (cctx==NULL) return 0; /* support sizeof on NULL */ 133 return sizeof(*cctx) + cctx->workSpaceSize 134 + ZSTD_sizeof_CDict(cctx->cdictLocal) 135 + ZSTD_sizeof_mtctx(cctx); 136 } 137 138 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) 139 { 140 return ZSTD_sizeof_CCtx(zcs); /* same object */ 141 } 142 143 /* private API call, for dictBuilder only */ 144 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } 145 146 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( 147 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) 148 { 149 ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); 150 if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; 151 if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; 152 if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; 153 if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog; 154 if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog; 155 if (CCtxParams->cParams.searchLength) cParams.searchLength = CCtxParams->cParams.searchLength; 156 if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; 157 if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; 158 return cParams; 159 } 160 161 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( 162 ZSTD_compressionParameters cParams) 163 { 164 ZSTD_CCtx_params cctxParams; 165 memset(&cctxParams, 0, sizeof(cctxParams)); 166 cctxParams.cParams = cParams; 167 cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ 168 assert(!ZSTD_checkCParams(cParams)); 169 cctxParams.fParams.contentSizeFlag = 1; 170 return cctxParams; 171 } 172 173 static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( 174 ZSTD_customMem customMem) 175 { 176 ZSTD_CCtx_params* params; 177 if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 178 params = (ZSTD_CCtx_params*)ZSTD_calloc( 179 sizeof(ZSTD_CCtx_params), customMem); 180 if (!params) { return NULL; } 181 params->customMem = customMem; 182 params->compressionLevel = ZSTD_CLEVEL_DEFAULT; 183 params->fParams.contentSizeFlag = 1; 184 return params; 185 } 186 187 ZSTD_CCtx_params* ZSTD_createCCtxParams(void) 188 { 189 return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); 190 } 191 192 size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) 193 { 194 if (params == NULL) { return 0; } 195 ZSTD_free(params, params->customMem); 196 return 0; 197 } 198 199 size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) 200 { 201 return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); 202 } 203 204 size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { 205 if (!cctxParams) { return ERROR(GENERIC); } 206 memset(cctxParams, 0, sizeof(*cctxParams)); 207 cctxParams->compressionLevel = compressionLevel; 208 cctxParams->fParams.contentSizeFlag = 1; 209 return 0; 210 } 211 212 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) 213 { 214 if (!cctxParams) { return ERROR(GENERIC); } 215 CHECK_F( ZSTD_checkCParams(params.cParams) ); 216 memset(cctxParams, 0, sizeof(*cctxParams)); 217 cctxParams->cParams = params.cParams; 218 cctxParams->fParams = params.fParams; 219 cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ 220 assert(!ZSTD_checkCParams(params.cParams)); 221 return 0; 222 } 223 224 /* ZSTD_assignParamsToCCtxParams() : 225 * params is presumed valid at this stage */ 226 static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( 227 ZSTD_CCtx_params cctxParams, ZSTD_parameters params) 228 { 229 ZSTD_CCtx_params ret = cctxParams; 230 ret.cParams = params.cParams; 231 ret.fParams = params.fParams; 232 ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ 233 assert(!ZSTD_checkCParams(params.cParams)); 234 return ret; 235 } 236 237 #define CLAMPCHECK(val,min,max) { \ 238 if (((val)<(min)) | ((val)>(max))) { \ 239 return ERROR(parameter_outOfBound); \ 240 } } 241 242 243 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) 244 { 245 switch(param) 246 { 247 case ZSTD_p_compressionLevel: 248 case ZSTD_p_hashLog: 249 case ZSTD_p_chainLog: 250 case ZSTD_p_searchLog: 251 case ZSTD_p_minMatch: 252 case ZSTD_p_targetLength: 253 case ZSTD_p_compressionStrategy: 254 case ZSTD_p_compressLiterals: 255 return 1; 256 257 case ZSTD_p_format: 258 case ZSTD_p_windowLog: 259 case ZSTD_p_contentSizeFlag: 260 case ZSTD_p_checksumFlag: 261 case ZSTD_p_dictIDFlag: 262 case ZSTD_p_forceMaxWindow : 263 case ZSTD_p_nbWorkers: 264 case ZSTD_p_jobSize: 265 case ZSTD_p_overlapSizeLog: 266 case ZSTD_p_enableLongDistanceMatching: 267 case ZSTD_p_ldmHashLog: 268 case ZSTD_p_ldmMinMatch: 269 case ZSTD_p_ldmBucketSizeLog: 270 case ZSTD_p_ldmHashEveryLog: 271 default: 272 return 0; 273 } 274 } 275 276 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value) 277 { 278 DEBUGLOG(4, "ZSTD_CCtx_setParameter (%u, %u)", (U32)param, value); 279 if (cctx->streamStage != zcss_init) { 280 if (ZSTD_isUpdateAuthorized(param)) { 281 cctx->cParamsChanged = 1; 282 } else { 283 return ERROR(stage_wrong); 284 } } 285 286 switch(param) 287 { 288 case ZSTD_p_format : 289 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); 290 291 case ZSTD_p_compressionLevel: 292 if (cctx->cdict) return ERROR(stage_wrong); 293 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); 294 295 case ZSTD_p_windowLog: 296 case ZSTD_p_hashLog: 297 case ZSTD_p_chainLog: 298 case ZSTD_p_searchLog: 299 case ZSTD_p_minMatch: 300 case ZSTD_p_targetLength: 301 case ZSTD_p_compressionStrategy: 302 if (cctx->cdict) return ERROR(stage_wrong); 303 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); 304 305 case ZSTD_p_compressLiterals: 306 case ZSTD_p_contentSizeFlag: 307 case ZSTD_p_checksumFlag: 308 case ZSTD_p_dictIDFlag: 309 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); 310 311 case ZSTD_p_forceMaxWindow : /* Force back-references to remain < windowSize, 312 * even when referencing into Dictionary content. 313 * default : 0 when using a CDict, 1 when using a Prefix */ 314 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); 315 316 case ZSTD_p_nbWorkers: 317 if ((value>0) && cctx->staticSize) { 318 return ERROR(parameter_unsupported); /* MT not compatible with static alloc */ 319 } 320 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); 321 322 case ZSTD_p_jobSize: 323 case ZSTD_p_overlapSizeLog: 324 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); 325 326 case ZSTD_p_enableLongDistanceMatching: 327 case ZSTD_p_ldmHashLog: 328 case ZSTD_p_ldmMinMatch: 329 case ZSTD_p_ldmBucketSizeLog: 330 case ZSTD_p_ldmHashEveryLog: 331 if (cctx->cdict) return ERROR(stage_wrong); 332 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); 333 334 default: return ERROR(parameter_unsupported); 335 } 336 } 337 338 size_t ZSTD_CCtxParam_setParameter( 339 ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, unsigned value) 340 { 341 DEBUGLOG(4, "ZSTD_CCtxParam_setParameter (%u, %u)", (U32)param, value); 342 switch(param) 343 { 344 case ZSTD_p_format : 345 if (value > (unsigned)ZSTD_f_zstd1_magicless) 346 return ERROR(parameter_unsupported); 347 CCtxParams->format = (ZSTD_format_e)value; 348 return (size_t)CCtxParams->format; 349 350 case ZSTD_p_compressionLevel : { 351 int cLevel = (int)value; /* cast expected to restore negative sign */ 352 if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); 353 if (cLevel) { /* 0 : does not change current level */ 354 CCtxParams->disableLiteralCompression = (cLevel<0); /* negative levels disable huffman */ 355 CCtxParams->compressionLevel = cLevel; 356 } 357 if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel; 358 return 0; /* return type (size_t) cannot represent negative values */ 359 } 360 361 case ZSTD_p_windowLog : 362 if (value>0) /* 0 => use default */ 363 CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); 364 CCtxParams->cParams.windowLog = value; 365 return CCtxParams->cParams.windowLog; 366 367 case ZSTD_p_hashLog : 368 if (value>0) /* 0 => use default */ 369 CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); 370 CCtxParams->cParams.hashLog = value; 371 return CCtxParams->cParams.hashLog; 372 373 case ZSTD_p_chainLog : 374 if (value>0) /* 0 => use default */ 375 CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); 376 CCtxParams->cParams.chainLog = value; 377 return CCtxParams->cParams.chainLog; 378 379 case ZSTD_p_searchLog : 380 if (value>0) /* 0 => use default */ 381 CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); 382 CCtxParams->cParams.searchLog = value; 383 return value; 384 385 case ZSTD_p_minMatch : 386 if (value>0) /* 0 => use default */ 387 CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); 388 CCtxParams->cParams.searchLength = value; 389 return CCtxParams->cParams.searchLength; 390 391 case ZSTD_p_targetLength : 392 /* all values are valid. 0 => use default */ 393 CCtxParams->cParams.targetLength = value; 394 return CCtxParams->cParams.targetLength; 395 396 case ZSTD_p_compressionStrategy : 397 if (value>0) /* 0 => use default */ 398 CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra); 399 CCtxParams->cParams.strategy = (ZSTD_strategy)value; 400 return (size_t)CCtxParams->cParams.strategy; 401 402 case ZSTD_p_compressLiterals: 403 CCtxParams->disableLiteralCompression = !value; 404 return !CCtxParams->disableLiteralCompression; 405 406 case ZSTD_p_contentSizeFlag : 407 /* Content size written in frame header _when known_ (default:1) */ 408 DEBUGLOG(4, "set content size flag = %u", (value>0)); 409 CCtxParams->fParams.contentSizeFlag = value > 0; 410 return CCtxParams->fParams.contentSizeFlag; 411 412 case ZSTD_p_checksumFlag : 413 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ 414 CCtxParams->fParams.checksumFlag = value > 0; 415 return CCtxParams->fParams.checksumFlag; 416 417 case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ 418 DEBUGLOG(4, "set dictIDFlag = %u", (value>0)); 419 CCtxParams->fParams.noDictIDFlag = !value; 420 return !CCtxParams->fParams.noDictIDFlag; 421 422 case ZSTD_p_forceMaxWindow : 423 CCtxParams->forceWindow = (value > 0); 424 return CCtxParams->forceWindow; 425 426 case ZSTD_p_nbWorkers : 427 #ifndef ZSTD_MULTITHREAD 428 if (value>0) return ERROR(parameter_unsupported); 429 return 0; 430 #else 431 return ZSTDMT_CCtxParam_setNbWorkers(CCtxParams, value); 432 #endif 433 434 case ZSTD_p_jobSize : 435 #ifndef ZSTD_MULTITHREAD 436 return ERROR(parameter_unsupported); 437 #else 438 return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_jobSize, value); 439 #endif 440 441 case ZSTD_p_overlapSizeLog : 442 #ifndef ZSTD_MULTITHREAD 443 return ERROR(parameter_unsupported); 444 #else 445 return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapSectionLog, value); 446 #endif 447 448 case ZSTD_p_enableLongDistanceMatching : 449 CCtxParams->ldmParams.enableLdm = (value>0); 450 return CCtxParams->ldmParams.enableLdm; 451 452 case ZSTD_p_ldmHashLog : 453 if (value>0) /* 0 ==> auto */ 454 CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); 455 CCtxParams->ldmParams.hashLog = value; 456 return CCtxParams->ldmParams.hashLog; 457 458 case ZSTD_p_ldmMinMatch : 459 if (value>0) /* 0 ==> default */ 460 CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX); 461 CCtxParams->ldmParams.minMatchLength = value; 462 return CCtxParams->ldmParams.minMatchLength; 463 464 case ZSTD_p_ldmBucketSizeLog : 465 if (value > ZSTD_LDM_BUCKETSIZELOG_MAX) 466 return ERROR(parameter_outOfBound); 467 CCtxParams->ldmParams.bucketSizeLog = value; 468 return CCtxParams->ldmParams.bucketSizeLog; 469 470 case ZSTD_p_ldmHashEveryLog : 471 if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) 472 return ERROR(parameter_outOfBound); 473 CCtxParams->ldmParams.hashEveryLog = value; 474 return CCtxParams->ldmParams.hashEveryLog; 475 476 default: return ERROR(parameter_unsupported); 477 } 478 } 479 480 /** ZSTD_CCtx_setParametersUsingCCtxParams() : 481 * just applies `params` into `cctx` 482 * no action is performed, parameters are merely stored. 483 * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. 484 * This is possible even if a compression is ongoing. 485 * In which case, new parameters will be applied on the fly, starting with next compression job. 486 */ 487 size_t ZSTD_CCtx_setParametersUsingCCtxParams( 488 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) 489 { 490 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); 491 if (cctx->cdict) return ERROR(stage_wrong); 492 493 cctx->requestedParams = *params; 494 return 0; 495 } 496 497 ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) 498 { 499 DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); 500 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); 501 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; 502 return 0; 503 } 504 505 size_t ZSTD_CCtx_loadDictionary_advanced( 506 ZSTD_CCtx* cctx, const void* dict, size_t dictSize, 507 ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) 508 { 509 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); 510 if (cctx->staticSize) return ERROR(memory_allocation); /* no malloc for static CCtx */ 511 DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); 512 ZSTD_freeCDict(cctx->cdictLocal); /* in case one already exists */ 513 if (dict==NULL || dictSize==0) { /* no dictionary mode */ 514 cctx->cdictLocal = NULL; 515 cctx->cdict = NULL; 516 } else { 517 ZSTD_compressionParameters const cParams = 518 ZSTD_getCParamsFromCCtxParams(&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize); 519 cctx->cdictLocal = ZSTD_createCDict_advanced( 520 dict, dictSize, 521 dictLoadMethod, dictContentType, 522 cParams, cctx->customMem); 523 cctx->cdict = cctx->cdictLocal; 524 if (cctx->cdictLocal == NULL) 525 return ERROR(memory_allocation); 526 } 527 return 0; 528 } 529 530 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( 531 ZSTD_CCtx* cctx, const void* dict, size_t dictSize) 532 { 533 return ZSTD_CCtx_loadDictionary_advanced( 534 cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); 535 } 536 537 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) 538 { 539 return ZSTD_CCtx_loadDictionary_advanced( 540 cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); 541 } 542 543 544 size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 545 { 546 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); 547 cctx->cdict = cdict; 548 memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* exclusive */ 549 return 0; 550 } 551 552 size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) 553 { 554 return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); 555 } 556 557 size_t ZSTD_CCtx_refPrefix_advanced( 558 ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) 559 { 560 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); 561 cctx->cdict = NULL; /* prefix discards any prior cdict */ 562 cctx->prefixDict.dict = prefix; 563 cctx->prefixDict.dictSize = prefixSize; 564 cctx->prefixDict.dictContentType = dictContentType; 565 return 0; 566 } 567 568 static void ZSTD_startNewCompression(ZSTD_CCtx* cctx) 569 { 570 cctx->streamStage = zcss_init; 571 cctx->pledgedSrcSizePlusOne = 0; 572 } 573 574 /*! ZSTD_CCtx_reset() : 575 * Also dumps dictionary */ 576 void ZSTD_CCtx_reset(ZSTD_CCtx* cctx) 577 { 578 ZSTD_startNewCompression(cctx); 579 cctx->cdict = NULL; 580 } 581 582 /** ZSTD_checkCParams() : 583 control CParam values remain within authorized range. 584 @return : 0, or an error code if one value is beyond authorized range */ 585 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) 586 { 587 CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); 588 CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); 589 CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); 590 CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); 591 CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); 592 if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN) 593 return ERROR(parameter_unsupported); 594 if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) 595 return ERROR(parameter_unsupported); 596 return 0; 597 } 598 599 /** ZSTD_clampCParams() : 600 * make CParam values within valid range. 601 * @return : valid CParams */ 602 static ZSTD_compressionParameters ZSTD_clampCParams(ZSTD_compressionParameters cParams) 603 { 604 # define CLAMP(val,min,max) { \ 605 if (val<min) val=min; \ 606 else if (val>max) val=max; \ 607 } 608 CLAMP(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); 609 CLAMP(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); 610 CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); 611 CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); 612 CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); 613 if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN) cParams.targetLength = ZSTD_TARGETLENGTH_MIN; 614 if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) cParams.strategy = ZSTD_btultra; 615 return cParams; 616 } 617 618 /** ZSTD_cycleLog() : 619 * condition for correct operation : hashLog > 1 */ 620 static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) 621 { 622 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); 623 return hashLog - btScale; 624 } 625 626 /** ZSTD_adjustCParams_internal() : 627 optimize `cPar` for a given input (`srcSize` and `dictSize`). 628 mostly downsizing to reduce memory consumption and initialization latency. 629 Both `srcSize` and `dictSize` are optional (use 0 if unknown). 630 Note : cPar is considered validated at this stage. Use ZSTD_checkCParams() to ensure that condition. */ 631 ZSTD_compressionParameters ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize) 632 { 633 static const U64 minSrcSize = 513; /* (1<<9) + 1 */ 634 static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); 635 assert(ZSTD_checkCParams(cPar)==0); 636 637 if (dictSize && (srcSize+1<2) /* srcSize unknown */ ) 638 srcSize = minSrcSize; /* presumed small when there is a dictionary */ 639 else if (srcSize == 0) 640 srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */ 641 642 /* resize windowLog if input is small enough, to use less memory */ 643 if ( (srcSize < maxWindowResize) 644 && (dictSize < maxWindowResize) ) { 645 U32 const tSize = (U32)(srcSize + dictSize); 646 static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; 647 U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : 648 ZSTD_highbit32(tSize-1) + 1; 649 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; 650 } 651 if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog; 652 { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); 653 if (cycleLog > cPar.windowLog) 654 cPar.chainLog -= (cycleLog - cPar.windowLog); 655 } 656 657 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) 658 cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ 659 660 return cPar; 661 } 662 663 ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize) 664 { 665 cPar = ZSTD_clampCParams(cPar); 666 return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); 667 } 668 669 static size_t ZSTD_sizeof_matchState(ZSTD_compressionParameters const* cParams, const U32 forCCtx) 670 { 671 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); 672 size_t const hSize = ((size_t)1) << cParams->hashLog; 673 U32 const hashLog3 = (forCCtx && cParams->searchLength==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; 674 size_t const h3Size = ((size_t)1) << hashLog3; 675 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); 676 size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32) 677 + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t)); 678 size_t const optSpace = (forCCtx && ((cParams->strategy == ZSTD_btopt) || 679 (cParams->strategy == ZSTD_btultra))) 680 ? optPotentialSpace 681 : 0; 682 DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", 683 (U32)chainSize, (U32)hSize, (U32)h3Size); 684 return tableSpace + optSpace; 685 } 686 687 size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) 688 { 689 /* Estimate CCtx size is supported for single-threaded compression only. */ 690 if (params->nbWorkers > 0) { return ERROR(GENERIC); } 691 { ZSTD_compressionParameters const cParams = 692 ZSTD_getCParamsFromCCtxParams(params, 0, 0); 693 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); 694 U32 const divider = (cParams.searchLength==3) ? 3 : 4; 695 size_t const maxNbSeq = blockSize / divider; 696 size_t const tokenSpace = blockSize + 11*maxNbSeq; 697 size_t const entropySpace = HUF_WORKSPACE_SIZE; 698 size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); 699 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); 700 701 size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); 702 size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq); 703 704 size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace + 705 matchStateSize + ldmSpace + ldmSeqSpace; 706 707 DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx)); 708 DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace); 709 return sizeof(ZSTD_CCtx) + neededSpace; 710 } 711 } 712 713 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) 714 { 715 ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); 716 return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); 717 } 718 719 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) 720 { 721 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); 722 return ZSTD_estimateCCtxSize_usingCParams(cParams); 723 } 724 725 size_t ZSTD_estimateCCtxSize(int compressionLevel) 726 { 727 int level; 728 size_t memBudget = 0; 729 for (level=1; level<=compressionLevel; level++) { 730 size_t const newMB = ZSTD_estimateCCtxSize_internal(level); 731 if (newMB > memBudget) memBudget = newMB; 732 } 733 return memBudget; 734 } 735 736 size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) 737 { 738 if (params->nbWorkers > 0) { return ERROR(GENERIC); } 739 { size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); 740 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params->cParams.windowLog); 741 size_t const inBuffSize = ((size_t)1 << params->cParams.windowLog) + blockSize; 742 size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; 743 size_t const streamingSize = inBuffSize + outBuffSize; 744 745 return CCtxSize + streamingSize; 746 } 747 } 748 749 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) 750 { 751 ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); 752 return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); 753 } 754 755 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) { 756 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); 757 return ZSTD_estimateCStreamSize_usingCParams(cParams); 758 } 759 760 size_t ZSTD_estimateCStreamSize(int compressionLevel) { 761 int level; 762 size_t memBudget = 0; 763 for (level=1; level<=compressionLevel; level++) { 764 size_t const newMB = ZSTD_estimateCStreamSize_internal(level); 765 if (newMB > memBudget) memBudget = newMB; 766 } 767 return memBudget; 768 } 769 770 /* ZSTD_getFrameProgression(): 771 * tells how much data has been consumed (input) and produced (output) for current frame. 772 * able to count progression inside worker threads (non-blocking mode). 773 */ 774 ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) 775 { 776 #ifdef ZSTD_MULTITHREAD 777 if (cctx->appliedParams.nbWorkers > 0) { 778 return ZSTDMT_getFrameProgression(cctx->mtctx); 779 } 780 #endif 781 { ZSTD_frameProgression fp; 782 size_t const buffered = (cctx->inBuff == NULL) ? 0 : 783 cctx->inBuffPos - cctx->inToCompress; 784 if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); 785 assert(buffered <= ZSTD_BLOCKSIZE_MAX); 786 fp.ingested = cctx->consumedSrcSize + buffered; 787 fp.consumed = cctx->consumedSrcSize; 788 fp.produced = cctx->producedCSize; 789 return fp; 790 } } 791 792 793 static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1, 794 ZSTD_compressionParameters cParams2) 795 { 796 return (cParams1.hashLog == cParams2.hashLog) 797 & (cParams1.chainLog == cParams2.chainLog) 798 & (cParams1.strategy == cParams2.strategy) /* opt parser space */ 799 & ((cParams1.searchLength==3) == (cParams2.searchLength==3)); /* hashlog3 space */ 800 } 801 802 /** The parameters are equivalent if ldm is not enabled in both sets or 803 * all the parameters are equivalent. */ 804 static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1, 805 ldmParams_t ldmParams2) 806 { 807 return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) || 808 (ldmParams1.enableLdm == ldmParams2.enableLdm && 809 ldmParams1.hashLog == ldmParams2.hashLog && 810 ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog && 811 ldmParams1.minMatchLength == ldmParams2.minMatchLength && 812 ldmParams1.hashEveryLog == ldmParams2.hashEveryLog); 813 } 814 815 typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; 816 817 /* ZSTD_sufficientBuff() : 818 * check internal buffers exist for streaming if buffPol == ZSTDb_buffered . 819 * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */ 820 static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t blockSize1, 821 ZSTD_buffered_policy_e buffPol2, 822 ZSTD_compressionParameters cParams2, 823 U64 pledgedSrcSize) 824 { 825 size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize)); 826 size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2); 827 size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0; 828 DEBUGLOG(4, "ZSTD_sufficientBuff: is windowSize2=%u <= wlog1=%u", 829 (U32)windowSize2, cParams2.windowLog); 830 DEBUGLOG(4, "ZSTD_sufficientBuff: is blockSize2=%u <= blockSize1=%u", 831 (U32)blockSize2, (U32)blockSize1); 832 return (blockSize2 <= blockSize1) /* seqStore space depends on blockSize */ 833 & (neededBufferSize2 <= bufferSize1); 834 } 835 836 /** Equivalence for resetCCtx purposes */ 837 static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1, 838 ZSTD_CCtx_params params2, 839 size_t buffSize1, size_t blockSize1, 840 ZSTD_buffered_policy_e buffPol2, 841 U64 pledgedSrcSize) 842 { 843 DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize); 844 return ZSTD_equivalentCParams(params1.cParams, params2.cParams) && 845 ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams) && 846 ZSTD_sufficientBuff(buffSize1, blockSize1, buffPol2, params2.cParams, pledgedSrcSize); 847 } 848 849 static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) 850 { 851 int i; 852 for (i = 0; i < ZSTD_REP_NUM; ++i) 853 bs->rep[i] = repStartValue[i]; 854 bs->entropy.hufCTable_repeatMode = HUF_repeat_none; 855 bs->entropy.offcode_repeatMode = FSE_repeat_none; 856 bs->entropy.matchlength_repeatMode = FSE_repeat_none; 857 bs->entropy.litlength_repeatMode = FSE_repeat_none; 858 } 859 860 /*! ZSTD_invalidateMatchState() 861 * Invalidate all the matches in the match finder tables. 862 * Requires nextSrc and base to be set (can be NULL). 863 */ 864 static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) 865 { 866 ZSTD_window_clear(&ms->window); 867 868 ms->nextToUpdate = ms->window.dictLimit + 1; 869 ms->loadedDictEnd = 0; 870 ms->opt.litLengthSum = 0; /* force reset of btopt stats */ 871 } 872 873 /*! ZSTD_continueCCtx() : 874 * reuse CCtx without reset (note : requires no dictionary) */ 875 static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize) 876 { 877 size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); 878 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 879 DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place"); 880 881 cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */ 882 cctx->appliedParams = params; 883 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; 884 cctx->consumedSrcSize = 0; 885 cctx->producedCSize = 0; 886 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) 887 cctx->appliedParams.fParams.contentSizeFlag = 0; 888 DEBUGLOG(4, "pledged content size : %u ; flag : %u", 889 (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag); 890 cctx->stage = ZSTDcs_init; 891 cctx->dictID = 0; 892 if (params.ldmParams.enableLdm) 893 ZSTD_window_clear(&cctx->ldmState.window); 894 ZSTD_referenceExternalSequences(cctx, NULL, 0); 895 ZSTD_invalidateMatchState(&cctx->blockState.matchState); 896 ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock); 897 XXH64_reset(&cctx->xxhState, 0); 898 return 0; 899 } 900 901 typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; 902 903 static void* ZSTD_reset_matchState(ZSTD_matchState_t* ms, void* ptr, ZSTD_compressionParameters const* cParams, ZSTD_compResetPolicy_e const crp, U32 const forCCtx) 904 { 905 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); 906 size_t const hSize = ((size_t)1) << cParams->hashLog; 907 U32 const hashLog3 = (forCCtx && cParams->searchLength==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; 908 size_t const h3Size = ((size_t)1) << hashLog3; 909 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); 910 911 assert(((size_t)ptr & 3) == 0); 912 913 ms->hashLog3 = hashLog3; 914 memset(&ms->window, 0, sizeof(ms->window)); 915 ZSTD_invalidateMatchState(ms); 916 917 /* opt parser space */ 918 if (forCCtx && ((cParams->strategy == ZSTD_btopt) | (cParams->strategy == ZSTD_btultra))) { 919 DEBUGLOG(4, "reserving optimal parser space"); 920 ms->opt.litFreq = (U32*)ptr; 921 ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits); 922 ms->opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1); 923 ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1); 924 ptr = ms->opt.offCodeFreq + (MaxOff+1); 925 ms->opt.matchTable = (ZSTD_match_t*)ptr; 926 ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1; 927 ms->opt.priceTable = (ZSTD_optimal_t*)ptr; 928 ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1; 929 } 930 931 /* table Space */ 932 DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset); 933 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ 934 if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */ 935 ms->hashTable = (U32*)(ptr); 936 ms->chainTable = ms->hashTable + hSize; 937 ms->hashTable3 = ms->chainTable + chainSize; 938 ptr = ms->hashTable3 + h3Size; 939 940 assert(((size_t)ptr & 3) == 0); 941 return ptr; 942 } 943 944 /*! ZSTD_resetCCtx_internal() : 945 note : `params` are assumed fully validated at this stage */ 946 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, 947 ZSTD_CCtx_params params, U64 pledgedSrcSize, 948 ZSTD_compResetPolicy_e const crp, 949 ZSTD_buffered_policy_e const zbuff) 950 { 951 DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", 952 (U32)pledgedSrcSize, params.cParams.windowLog); 953 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 954 955 if (crp == ZSTDcrp_continue) { 956 if (ZSTD_equivalentParams(zc->appliedParams, params, 957 zc->inBuffSize, zc->blockSize, 958 zbuff, pledgedSrcSize)) { 959 DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%u)", 960 zc->appliedParams.cParams.windowLog, (U32)zc->blockSize); 961 return ZSTD_continueCCtx(zc, params, pledgedSrcSize); 962 } } 963 DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); 964 965 if (params.ldmParams.enableLdm) { 966 /* Adjust long distance matching parameters */ 967 params.ldmParams.windowLog = params.cParams.windowLog; 968 ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); 969 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); 970 assert(params.ldmParams.hashEveryLog < 32); 971 zc->ldmState.hashPower = 972 ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength); 973 } 974 975 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); 976 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 977 U32 const divider = (params.cParams.searchLength==3) ? 3 : 4; 978 size_t const maxNbSeq = blockSize / divider; 979 size_t const tokenSpace = blockSize + 11*maxNbSeq; 980 size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; 981 size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; 982 size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); 983 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); 984 void* ptr; 985 986 /* Check if workSpace is large enough, alloc a new one if needed */ 987 { size_t const entropySpace = HUF_WORKSPACE_SIZE; 988 size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); 989 size_t const bufferSpace = buffInSize + buffOutSize; 990 size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); 991 size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq); 992 993 size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace + 994 ldmSeqSpace + matchStateSize + tokenSpace + 995 bufferSpace; 996 DEBUGLOG(4, "Need %uKB workspace, including %uKB for match state, and %uKB for buffers", 997 (U32)(neededSpace>>10), (U32)(matchStateSize>>10), (U32)(bufferSpace>>10)); 998 DEBUGLOG(4, "windowSize: %u - blockSize: %u", (U32)windowSize, (U32)blockSize); 999 1000 if (zc->workSpaceSize < neededSpace) { /* too small : resize */ 1001 DEBUGLOG(4, "Need to update workSpaceSize from %uK to %uK", 1002 (unsigned)(zc->workSpaceSize>>10), 1003 (unsigned)(neededSpace>>10)); 1004 /* static cctx : no resize, error out */ 1005 if (zc->staticSize) return ERROR(memory_allocation); 1006 1007 zc->workSpaceSize = 0; 1008 ZSTD_free(zc->workSpace, zc->customMem); 1009 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); 1010 if (zc->workSpace == NULL) return ERROR(memory_allocation); 1011 zc->workSpaceSize = neededSpace; 1012 ptr = zc->workSpace; 1013 1014 /* Statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ 1015 assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */ 1016 assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t)); 1017 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace; 1018 zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1; 1019 ptr = zc->blockState.nextCBlock + 1; 1020 zc->entropyWorkspace = (U32*)ptr; 1021 } } 1022 1023 /* init params */ 1024 zc->appliedParams = params; 1025 zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; 1026 zc->consumedSrcSize = 0; 1027 zc->producedCSize = 0; 1028 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) 1029 zc->appliedParams.fParams.contentSizeFlag = 0; 1030 DEBUGLOG(4, "pledged content size : %u ; flag : %u", 1031 (U32)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); 1032 zc->blockSize = blockSize; 1033 1034 XXH64_reset(&zc->xxhState, 0); 1035 zc->stage = ZSTDcs_init; 1036 zc->dictID = 0; 1037 1038 ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); 1039 1040 ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32; 1041 1042 /* ldm hash table */ 1043 /* initialize bucketOffsets table later for pointer alignment */ 1044 if (params.ldmParams.enableLdm) { 1045 size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; 1046 memset(ptr, 0, ldmHSize * sizeof(ldmEntry_t)); 1047 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ 1048 zc->ldmState.hashTable = (ldmEntry_t*)ptr; 1049 ptr = zc->ldmState.hashTable + ldmHSize; 1050 zc->ldmSequences = (rawSeq*)ptr; 1051 ptr = zc->ldmSequences + maxNbLdmSeq; 1052 zc->maxNbLdmSequences = maxNbLdmSeq; 1053 1054 memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); 1055 } 1056 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ 1057 1058 ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, ¶ms.cParams, crp, /* forCCtx */ 1); 1059 1060 /* sequences storage */ 1061 zc->seqStore.sequencesStart = (seqDef*)ptr; 1062 ptr = zc->seqStore.sequencesStart + maxNbSeq; 1063 zc->seqStore.llCode = (BYTE*) ptr; 1064 zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; 1065 zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; 1066 zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; 1067 ptr = zc->seqStore.litStart + blockSize; 1068 1069 /* ldm bucketOffsets table */ 1070 if (params.ldmParams.enableLdm) { 1071 size_t const ldmBucketSize = 1072 ((size_t)1) << (params.ldmParams.hashLog - 1073 params.ldmParams.bucketSizeLog); 1074 memset(ptr, 0, ldmBucketSize); 1075 zc->ldmState.bucketOffsets = (BYTE*)ptr; 1076 ptr = zc->ldmState.bucketOffsets + ldmBucketSize; 1077 ZSTD_window_clear(&zc->ldmState.window); 1078 } 1079 ZSTD_referenceExternalSequences(zc, NULL, 0); 1080 1081 /* buffers */ 1082 zc->inBuffSize = buffInSize; 1083 zc->inBuff = (char*)ptr; 1084 zc->outBuffSize = buffOutSize; 1085 zc->outBuff = zc->inBuff + buffInSize; 1086 1087 return 0; 1088 } 1089 } 1090 1091 /* ZSTD_invalidateRepCodes() : 1092 * ensures next compression will not use repcodes from previous block. 1093 * Note : only works with regular variant; 1094 * do not use with extDict variant ! */ 1095 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { 1096 int i; 1097 for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0; 1098 assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); 1099 } 1100 1101 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, 1102 const ZSTD_CDict* cdict, 1103 unsigned windowLog, 1104 ZSTD_frameParameters fParams, 1105 U64 pledgedSrcSize, 1106 ZSTD_buffered_policy_e zbuff) 1107 { 1108 { ZSTD_CCtx_params params = cctx->requestedParams; 1109 /* Copy only compression parameters related to tables. */ 1110 params.cParams = cdict->cParams; 1111 if (windowLog) params.cParams.windowLog = windowLog; 1112 params.fParams = fParams; 1113 ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, 1114 ZSTDcrp_noMemset, zbuff); 1115 assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy); 1116 assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog); 1117 assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog); 1118 } 1119 1120 /* copy tables */ 1121 { size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog); 1122 size_t const hSize = (size_t)1 << cdict->cParams.hashLog; 1123 size_t const tableSpace = (chainSize + hSize) * sizeof(U32); 1124 assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ 1125 assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize); 1126 assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */ 1127 assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize); 1128 memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */ 1129 } 1130 /* Zero the hashTable3, since the cdict never fills it */ 1131 { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3; 1132 assert(cdict->matchState.hashLog3 == 0); 1133 memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); 1134 } 1135 1136 /* copy dictionary offsets */ 1137 { 1138 ZSTD_matchState_t const* srcMatchState = &cdict->matchState; 1139 ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; 1140 dstMatchState->window = srcMatchState->window; 1141 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 1142 dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; 1143 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; 1144 } 1145 cctx->dictID = cdict->dictID; 1146 1147 /* copy block state */ 1148 memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); 1149 1150 return 0; 1151 } 1152 1153 /*! ZSTD_copyCCtx_internal() : 1154 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. 1155 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). 1156 * The "context", in this case, refers to the hash and chain tables, 1157 * entropy tables, and dictionary references. 1158 * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. 1159 * @return : 0, or an error code */ 1160 static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, 1161 const ZSTD_CCtx* srcCCtx, 1162 ZSTD_frameParameters fParams, 1163 U64 pledgedSrcSize, 1164 ZSTD_buffered_policy_e zbuff) 1165 { 1166 DEBUGLOG(5, "ZSTD_copyCCtx_internal"); 1167 if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong); 1168 1169 memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); 1170 { ZSTD_CCtx_params params = dstCCtx->requestedParams; 1171 /* Copy only compression parameters related to tables. */ 1172 params.cParams = srcCCtx->appliedParams.cParams; 1173 params.fParams = fParams; 1174 ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, 1175 ZSTDcrp_noMemset, zbuff); 1176 assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); 1177 assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); 1178 assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); 1179 assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); 1180 assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); 1181 } 1182 1183 /* copy tables */ 1184 { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); 1185 size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; 1186 size_t const h3Size = (size_t)1 << srcCCtx->blockState.matchState.hashLog3; 1187 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); 1188 assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ 1189 assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize); 1190 memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */ 1191 } 1192 1193 /* copy dictionary offsets */ 1194 { 1195 ZSTD_matchState_t const* srcMatchState = &srcCCtx->blockState.matchState; 1196 ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; 1197 dstMatchState->window = srcMatchState->window; 1198 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 1199 dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; 1200 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; 1201 } 1202 dstCCtx->dictID = srcCCtx->dictID; 1203 1204 /* copy block state */ 1205 memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); 1206 1207 return 0; 1208 } 1209 1210 /*! ZSTD_copyCCtx() : 1211 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. 1212 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). 1213 * pledgedSrcSize==0 means "unknown". 1214 * @return : 0, or an error code */ 1215 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) 1216 { 1217 ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 1218 ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0); 1219 ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); 1220 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; 1221 fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); 1222 1223 return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, 1224 fParams, pledgedSrcSize, 1225 zbuff); 1226 } 1227 1228 1229 #define ZSTD_ROWSIZE 16 1230 /*! ZSTD_reduceTable() : 1231 * reduce table indexes by `reducerValue`, or squash to zero. 1232 * PreserveMark preserves "unsorted mark" for btlazy2 strategy. 1233 * It must be set to a clear 0/1 value, to remove branch during inlining. 1234 * Presume table size is a multiple of ZSTD_ROWSIZE 1235 * to help auto-vectorization */ 1236 FORCE_INLINE_TEMPLATE void 1237 ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) 1238 { 1239 int const nbRows = (int)size / ZSTD_ROWSIZE; 1240 int cellNb = 0; 1241 int rowNb; 1242 assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ 1243 assert(size < (1U<<31)); /* can be casted to int */ 1244 for (rowNb=0 ; rowNb < nbRows ; rowNb++) { 1245 int column; 1246 for (column=0; column<ZSTD_ROWSIZE; column++) { 1247 if (preserveMark) { 1248 U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0; 1249 table[cellNb] += adder; 1250 } 1251 if (table[cellNb] < reducerValue) table[cellNb] = 0; 1252 else table[cellNb] -= reducerValue; 1253 cellNb++; 1254 } } 1255 } 1256 1257 static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue) 1258 { 1259 ZSTD_reduceTable_internal(table, size, reducerValue, 0); 1260 } 1261 1262 static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue) 1263 { 1264 ZSTD_reduceTable_internal(table, size, reducerValue, 1); 1265 } 1266 1267 /*! ZSTD_reduceIndex() : 1268 * rescale all indexes to avoid future overflow (indexes are U32) */ 1269 static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) 1270 { 1271 ZSTD_matchState_t* const ms = &zc->blockState.matchState; 1272 { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog; 1273 ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); 1274 } 1275 1276 if (zc->appliedParams.cParams.strategy != ZSTD_fast) { 1277 U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog; 1278 if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2) 1279 ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); 1280 else 1281 ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); 1282 } 1283 1284 if (ms->hashLog3) { 1285 U32 const h3Size = (U32)1 << ms->hashLog3; 1286 ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); 1287 } 1288 } 1289 1290 1291 /*-******************************************************* 1292 * Block entropic compression 1293 *********************************************************/ 1294 1295 /* See doc/zstd_compression_format.md for detailed format description */ 1296 1297 size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 1298 { 1299 if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); 1300 memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); 1301 MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw); 1302 return ZSTD_blockHeaderSize+srcSize; 1303 } 1304 1305 1306 static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 1307 { 1308 BYTE* const ostart = (BYTE* const)dst; 1309 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); 1310 1311 if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall); 1312 1313 switch(flSize) 1314 { 1315 case 1: /* 2 - 1 - 5 */ 1316 ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); 1317 break; 1318 case 2: /* 2 - 2 - 12 */ 1319 MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); 1320 break; 1321 case 3: /* 2 - 2 - 20 */ 1322 MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); 1323 break; 1324 default: /* not necessary : flSize is {1,2,3} */ 1325 assert(0); 1326 } 1327 1328 memcpy(ostart + flSize, src, srcSize); 1329 return srcSize + flSize; 1330 } 1331 1332 static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 1333 { 1334 BYTE* const ostart = (BYTE* const)dst; 1335 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); 1336 1337 (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ 1338 1339 switch(flSize) 1340 { 1341 case 1: /* 2 - 1 - 5 */ 1342 ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); 1343 break; 1344 case 2: /* 2 - 2 - 12 */ 1345 MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); 1346 break; 1347 case 3: /* 2 - 2 - 20 */ 1348 MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); 1349 break; 1350 default: /* not necessary : flSize is {1,2,3} */ 1351 assert(0); 1352 } 1353 1354 ostart[flSize] = *(const BYTE*)src; 1355 return flSize+1; 1356 } 1357 1358 1359 static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } 1360 1361 static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy, 1362 ZSTD_entropyCTables_t* nextEntropy, 1363 ZSTD_strategy strategy, int disableLiteralCompression, 1364 void* dst, size_t dstCapacity, 1365 const void* src, size_t srcSize, 1366 U32* workspace, const int bmi2) 1367 { 1368 size_t const minGain = ZSTD_minGain(srcSize); 1369 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); 1370 BYTE* const ostart = (BYTE*)dst; 1371 U32 singleStream = srcSize < 256; 1372 symbolEncodingType_e hType = set_compressed; 1373 size_t cLitSize; 1374 1375 DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", 1376 disableLiteralCompression); 1377 1378 /* Prepare nextEntropy assuming reusing the existing table */ 1379 nextEntropy->hufCTable_repeatMode = prevEntropy->hufCTable_repeatMode; 1380 memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, 1381 sizeof(prevEntropy->hufCTable)); 1382 1383 if (disableLiteralCompression) 1384 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 1385 1386 /* small ? don't even attempt compression (speed opt) */ 1387 # define COMPRESS_LITERALS_SIZE_MIN 63 1388 { size_t const minLitSize = (prevEntropy->hufCTable_repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; 1389 if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 1390 } 1391 1392 if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ 1393 { HUF_repeat repeat = prevEntropy->hufCTable_repeatMode; 1394 int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; 1395 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; 1396 cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, 1397 workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2) 1398 : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, 1399 workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2); 1400 if (repeat != HUF_repeat_none) { 1401 /* reused the existing table */ 1402 hType = set_repeat; 1403 } 1404 } 1405 1406 if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { 1407 memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable)); 1408 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 1409 } 1410 if (cLitSize==1) { 1411 memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable)); 1412 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); 1413 } 1414 1415 if (hType == set_compressed) { 1416 /* using a newly constructed table */ 1417 nextEntropy->hufCTable_repeatMode = HUF_repeat_check; 1418 } 1419 1420 /* Build header */ 1421 switch(lhSize) 1422 { 1423 case 3: /* 2 - 2 - 10 - 10 */ 1424 { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); 1425 MEM_writeLE24(ostart, lhc); 1426 break; 1427 } 1428 case 4: /* 2 - 2 - 14 - 14 */ 1429 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); 1430 MEM_writeLE32(ostart, lhc); 1431 break; 1432 } 1433 case 5: /* 2 - 2 - 18 - 18 */ 1434 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); 1435 MEM_writeLE32(ostart, lhc); 1436 ostart[4] = (BYTE)(cLitSize >> 10); 1437 break; 1438 } 1439 default: /* not possible : lhSize is {3,4,5} */ 1440 assert(0); 1441 } 1442 return lhSize+cLitSize; 1443 } 1444 1445 1446 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) 1447 { 1448 const seqDef* const sequences = seqStorePtr->sequencesStart; 1449 BYTE* const llCodeTable = seqStorePtr->llCode; 1450 BYTE* const ofCodeTable = seqStorePtr->ofCode; 1451 BYTE* const mlCodeTable = seqStorePtr->mlCode; 1452 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 1453 U32 u; 1454 for (u=0; u<nbSeq; u++) { 1455 U32 const llv = sequences[u].litLength; 1456 U32 const mlv = sequences[u].matchLength; 1457 llCodeTable[u] = (BYTE)ZSTD_LLcode(llv); 1458 ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset); 1459 mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv); 1460 } 1461 if (seqStorePtr->longLengthID==1) 1462 llCodeTable[seqStorePtr->longLengthPos] = MaxLL; 1463 if (seqStorePtr->longLengthID==2) 1464 mlCodeTable[seqStorePtr->longLengthPos] = MaxML; 1465 } 1466 1467 typedef enum { 1468 ZSTD_defaultDisallowed = 0, 1469 ZSTD_defaultAllowed = 1 1470 } ZSTD_defaultPolicy_e; 1471 1472 MEM_STATIC 1473 symbolEncodingType_e ZSTD_selectEncodingType( 1474 FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq, 1475 U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed) 1476 { 1477 #define MIN_SEQ_FOR_DYNAMIC_FSE 64 1478 #define MAX_SEQ_FOR_STATIC_FSE 1000 1479 ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); 1480 if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) { 1481 DEBUGLOG(5, "Selected set_rle"); 1482 /* Prefer set_basic over set_rle when there are 2 or less symbols, 1483 * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. 1484 * If basic encoding isn't possible, always choose RLE. 1485 */ 1486 *repeatMode = FSE_repeat_check; 1487 return set_rle; 1488 } 1489 if ( isDefaultAllowed 1490 && (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { 1491 DEBUGLOG(5, "Selected set_repeat"); 1492 return set_repeat; 1493 } 1494 if ( isDefaultAllowed 1495 && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) ) { 1496 DEBUGLOG(5, "Selected set_basic"); 1497 /* The format allows default tables to be repeated, but it isn't useful. 1498 * When using simple heuristics to select encoding type, we don't want 1499 * to confuse these tables with dictionaries. When running more careful 1500 * analysis, we don't need to waste time checking both repeating tables 1501 * and default tables. 1502 */ 1503 *repeatMode = FSE_repeat_none; 1504 return set_basic; 1505 } 1506 DEBUGLOG(5, "Selected set_compressed"); 1507 *repeatMode = FSE_repeat_check; 1508 return set_compressed; 1509 } 1510 1511 MEM_STATIC 1512 size_t ZSTD_buildCTable(void* dst, size_t dstCapacity, 1513 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, 1514 U32* count, U32 max, 1515 BYTE const* codeTable, size_t nbSeq, 1516 S16 const* defaultNorm, U32 defaultNormLog, U32 defaultMax, 1517 FSE_CTable const* prevCTable, size_t prevCTableSize, 1518 void* workspace, size_t workspaceSize) 1519 { 1520 BYTE* op = (BYTE*)dst; 1521 BYTE const* const oend = op + dstCapacity; 1522 1523 switch (type) { 1524 case set_rle: 1525 *op = codeTable[0]; 1526 CHECK_F(FSE_buildCTable_rle(nextCTable, (BYTE)max)); 1527 return 1; 1528 case set_repeat: 1529 memcpy(nextCTable, prevCTable, prevCTableSize); 1530 return 0; 1531 case set_basic: 1532 CHECK_F(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ 1533 return 0; 1534 case set_compressed: { 1535 S16 norm[MaxSeq + 1]; 1536 size_t nbSeq_1 = nbSeq; 1537 const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); 1538 if (count[codeTable[nbSeq-1]] > 1) { 1539 count[codeTable[nbSeq-1]]--; 1540 nbSeq_1--; 1541 } 1542 assert(nbSeq_1 > 1); 1543 CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); 1544 { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ 1545 if (FSE_isError(NCountSize)) return NCountSize; 1546 CHECK_F(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); 1547 return NCountSize; 1548 } 1549 } 1550 default: return assert(0), ERROR(GENERIC); 1551 } 1552 } 1553 1554 FORCE_INLINE_TEMPLATE size_t 1555 ZSTD_encodeSequences_body( 1556 void* dst, size_t dstCapacity, 1557 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 1558 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 1559 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 1560 seqDef const* sequences, size_t nbSeq, int longOffsets) 1561 { 1562 BIT_CStream_t blockStream; 1563 FSE_CState_t stateMatchLength; 1564 FSE_CState_t stateOffsetBits; 1565 FSE_CState_t stateLitLength; 1566 1567 CHECK_E(BIT_initCStream(&blockStream, dst, dstCapacity), dstSize_tooSmall); /* not enough space remaining */ 1568 1569 /* first symbols */ 1570 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); 1571 FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); 1572 FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); 1573 BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); 1574 if (MEM_32bits()) BIT_flushBits(&blockStream); 1575 BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); 1576 if (MEM_32bits()) BIT_flushBits(&blockStream); 1577 if (longOffsets) { 1578 U32 const ofBits = ofCodeTable[nbSeq-1]; 1579 int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); 1580 if (extraBits) { 1581 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); 1582 BIT_flushBits(&blockStream); 1583 } 1584 BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, 1585 ofBits - extraBits); 1586 } else { 1587 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); 1588 } 1589 BIT_flushBits(&blockStream); 1590 1591 { size_t n; 1592 for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ 1593 BYTE const llCode = llCodeTable[n]; 1594 BYTE const ofCode = ofCodeTable[n]; 1595 BYTE const mlCode = mlCodeTable[n]; 1596 U32 const llBits = LL_bits[llCode]; 1597 U32 const ofBits = ofCode; 1598 U32 const mlBits = ML_bits[mlCode]; 1599 DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", 1600 sequences[n].litLength, 1601 sequences[n].matchLength + MINMATCH, 1602 sequences[n].offset); 1603 /* 32b*/ /* 64b*/ 1604 /* (7)*/ /* (7)*/ 1605 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ 1606 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ 1607 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ 1608 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ 1609 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) 1610 BIT_flushBits(&blockStream); /* (7)*/ 1611 BIT_addBits(&blockStream, sequences[n].litLength, llBits); 1612 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); 1613 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); 1614 if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); 1615 if (longOffsets) { 1616 int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); 1617 if (extraBits) { 1618 BIT_addBits(&blockStream, sequences[n].offset, extraBits); 1619 BIT_flushBits(&blockStream); /* (7)*/ 1620 } 1621 BIT_addBits(&blockStream, sequences[n].offset >> extraBits, 1622 ofBits - extraBits); /* 31 */ 1623 } else { 1624 BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ 1625 } 1626 BIT_flushBits(&blockStream); /* (7)*/ 1627 } } 1628 1629 DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); 1630 FSE_flushCState(&blockStream, &stateMatchLength); 1631 DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); 1632 FSE_flushCState(&blockStream, &stateOffsetBits); 1633 DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); 1634 FSE_flushCState(&blockStream, &stateLitLength); 1635 1636 { size_t const streamSize = BIT_closeCStream(&blockStream); 1637 if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ 1638 return streamSize; 1639 } 1640 } 1641 1642 static size_t 1643 ZSTD_encodeSequences_default( 1644 void* dst, size_t dstCapacity, 1645 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 1646 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 1647 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 1648 seqDef const* sequences, size_t nbSeq, int longOffsets) 1649 { 1650 return ZSTD_encodeSequences_body(dst, dstCapacity, 1651 CTable_MatchLength, mlCodeTable, 1652 CTable_OffsetBits, ofCodeTable, 1653 CTable_LitLength, llCodeTable, 1654 sequences, nbSeq, longOffsets); 1655 } 1656 1657 1658 #if DYNAMIC_BMI2 1659 1660 static TARGET_ATTRIBUTE("bmi2") size_t 1661 ZSTD_encodeSequences_bmi2( 1662 void* dst, size_t dstCapacity, 1663 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 1664 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 1665 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 1666 seqDef const* sequences, size_t nbSeq, int longOffsets) 1667 { 1668 return ZSTD_encodeSequences_body(dst, dstCapacity, 1669 CTable_MatchLength, mlCodeTable, 1670 CTable_OffsetBits, ofCodeTable, 1671 CTable_LitLength, llCodeTable, 1672 sequences, nbSeq, longOffsets); 1673 } 1674 1675 #endif 1676 1677 size_t ZSTD_encodeSequences( 1678 void* dst, size_t dstCapacity, 1679 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 1680 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 1681 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 1682 seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) 1683 { 1684 #if DYNAMIC_BMI2 1685 if (bmi2) { 1686 return ZSTD_encodeSequences_bmi2(dst, dstCapacity, 1687 CTable_MatchLength, mlCodeTable, 1688 CTable_OffsetBits, ofCodeTable, 1689 CTable_LitLength, llCodeTable, 1690 sequences, nbSeq, longOffsets); 1691 } 1692 #endif 1693 (void)bmi2; 1694 return ZSTD_encodeSequences_default(dst, dstCapacity, 1695 CTable_MatchLength, mlCodeTable, 1696 CTable_OffsetBits, ofCodeTable, 1697 CTable_LitLength, llCodeTable, 1698 sequences, nbSeq, longOffsets); 1699 } 1700 1701 MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, 1702 ZSTD_entropyCTables_t const* prevEntropy, 1703 ZSTD_entropyCTables_t* nextEntropy, 1704 ZSTD_CCtx_params const* cctxParams, 1705 void* dst, size_t dstCapacity, U32* workspace, 1706 const int bmi2) 1707 { 1708 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; 1709 U32 count[MaxSeq+1]; 1710 FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; 1711 FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; 1712 FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; 1713 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ 1714 const seqDef* const sequences = seqStorePtr->sequencesStart; 1715 const BYTE* const ofCodeTable = seqStorePtr->ofCode; 1716 const BYTE* const llCodeTable = seqStorePtr->llCode; 1717 const BYTE* const mlCodeTable = seqStorePtr->mlCode; 1718 BYTE* const ostart = (BYTE*)dst; 1719 BYTE* const oend = ostart + dstCapacity; 1720 BYTE* op = ostart; 1721 size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; 1722 BYTE* seqHead; 1723 1724 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); 1725 1726 /* Compress literals */ 1727 { const BYTE* const literals = seqStorePtr->litStart; 1728 size_t const litSize = seqStorePtr->lit - literals; 1729 size_t const cSize = ZSTD_compressLiterals( 1730 prevEntropy, nextEntropy, 1731 cctxParams->cParams.strategy, cctxParams->disableLiteralCompression, 1732 op, dstCapacity, 1733 literals, litSize, 1734 workspace, bmi2); 1735 if (ZSTD_isError(cSize)) 1736 return cSize; 1737 assert(cSize <= dstCapacity); 1738 op += cSize; 1739 } 1740 1741 /* Sequences Header */ 1742 if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/) return ERROR(dstSize_tooSmall); 1743 if (nbSeq < 0x7F) 1744 *op++ = (BYTE)nbSeq; 1745 else if (nbSeq < LONGNBSEQ) 1746 op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; 1747 else 1748 op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; 1749 if (nbSeq==0) { 1750 memcpy(nextEntropy->litlengthCTable, prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable)); 1751 nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; 1752 memcpy(nextEntropy->offcodeCTable, prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable)); 1753 nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; 1754 memcpy(nextEntropy->matchlengthCTable, prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable)); 1755 nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; 1756 return op - ostart; 1757 } 1758 1759 /* seqHead : flags for FSE encoding type */ 1760 seqHead = op++; 1761 1762 /* convert length/distances into codes */ 1763 ZSTD_seqToCodes(seqStorePtr); 1764 /* build CTable for Literal Lengths */ 1765 { U32 max = MaxLL; 1766 size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); 1767 DEBUGLOG(5, "Building LL table"); 1768 nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; 1769 LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed); 1770 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, 1771 count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, 1772 prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), 1773 workspace, HUF_WORKSPACE_SIZE); 1774 if (ZSTD_isError(countSize)) return countSize; 1775 op += countSize; 1776 } } 1777 /* build CTable for Offsets */ 1778 { U32 max = MaxOff; 1779 size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace); 1780 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ 1781 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; 1782 DEBUGLOG(5, "Building OF table"); 1783 nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; 1784 Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy); 1785 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, 1786 count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, 1787 prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), 1788 workspace, HUF_WORKSPACE_SIZE); 1789 if (ZSTD_isError(countSize)) return countSize; 1790 op += countSize; 1791 } } 1792 /* build CTable for MatchLengths */ 1793 { U32 max = MaxML; 1794 size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); 1795 DEBUGLOG(5, "Building ML table"); 1796 nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; 1797 MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed); 1798 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, 1799 count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, 1800 prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), 1801 workspace, HUF_WORKSPACE_SIZE); 1802 if (ZSTD_isError(countSize)) return countSize; 1803 op += countSize; 1804 } } 1805 1806 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); 1807 1808 { size_t const bitstreamSize = ZSTD_encodeSequences( 1809 op, oend - op, 1810 CTable_MatchLength, mlCodeTable, 1811 CTable_OffsetBits, ofCodeTable, 1812 CTable_LitLength, llCodeTable, 1813 sequences, nbSeq, 1814 longOffsets, bmi2); 1815 if (ZSTD_isError(bitstreamSize)) return bitstreamSize; 1816 op += bitstreamSize; 1817 } 1818 1819 return op - ostart; 1820 } 1821 1822 MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, 1823 ZSTD_entropyCTables_t const* prevEntropy, 1824 ZSTD_entropyCTables_t* nextEntropy, 1825 ZSTD_CCtx_params const* cctxParams, 1826 void* dst, size_t dstCapacity, 1827 size_t srcSize, U32* workspace, int bmi2) 1828 { 1829 size_t const cSize = ZSTD_compressSequences_internal( 1830 seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity, 1831 workspace, bmi2); 1832 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. 1833 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. 1834 */ 1835 if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) 1836 return 0; /* block not compressed */ 1837 if (ZSTD_isError(cSize)) return cSize; 1838 1839 /* Check compressibility */ 1840 { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize); /* note : fixed formula, maybe should depend on compression level, or strategy */ 1841 if (cSize >= maxCSize) return 0; /* block not compressed */ 1842 } 1843 1844 /* We check that dictionaries have offset codes available for the first 1845 * block. After the first block, the offcode table might not have large 1846 * enough codes to represent the offsets in the data. 1847 */ 1848 if (nextEntropy->offcode_repeatMode == FSE_repeat_valid) 1849 nextEntropy->offcode_repeatMode = FSE_repeat_check; 1850 1851 return cSize; 1852 } 1853 1854 /* ZSTD_selectBlockCompressor() : 1855 * Not static, but internal use only (used by long distance matcher) 1856 * assumption : strat is a valid strategy */ 1857 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) 1858 { 1859 static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = { 1860 { ZSTD_compressBlock_fast /* default for 0 */, 1861 ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, 1862 ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, 1863 ZSTD_compressBlock_btopt, ZSTD_compressBlock_btultra }, 1864 { ZSTD_compressBlock_fast_extDict /* default for 0 */, 1865 ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, 1866 ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, 1867 ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict } 1868 }; 1869 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); 1870 1871 assert((U32)strat >= (U32)ZSTD_fast); 1872 assert((U32)strat <= (U32)ZSTD_btultra); 1873 return blockCompressor[extDict!=0][(U32)strat]; 1874 } 1875 1876 static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, 1877 const BYTE* anchor, size_t lastLLSize) 1878 { 1879 memcpy(seqStorePtr->lit, anchor, lastLLSize); 1880 seqStorePtr->lit += lastLLSize; 1881 } 1882 1883 static void ZSTD_resetSeqStore(seqStore_t* ssPtr) 1884 { 1885 ssPtr->lit = ssPtr->litStart; 1886 ssPtr->sequences = ssPtr->sequencesStart; 1887 ssPtr->longLengthID = 0; 1888 } 1889 1890 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, 1891 void* dst, size_t dstCapacity, 1892 const void* src, size_t srcSize) 1893 { 1894 ZSTD_matchState_t* const ms = &zc->blockState.matchState; 1895 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", 1896 (U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate); 1897 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { 1898 ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength); 1899 return 0; /* don't even attempt compression below a certain srcSize */ 1900 } 1901 ZSTD_resetSeqStore(&(zc->seqStore)); 1902 1903 /* limited update after a very long match */ 1904 { const BYTE* const base = ms->window.base; 1905 const BYTE* const istart = (const BYTE*)src; 1906 const U32 current = (U32)(istart-base); 1907 if (current > ms->nextToUpdate + 384) 1908 ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); 1909 } 1910 1911 /* select and store sequences */ 1912 { U32 const extDict = ZSTD_window_hasExtDict(ms->window); 1913 size_t lastLLSize; 1914 { int i; 1915 for (i = 0; i < ZSTD_REP_NUM; ++i) 1916 zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; 1917 } 1918 if (zc->externSeqStore.pos < zc->externSeqStore.size) { 1919 assert(!zc->appliedParams.ldmParams.enableLdm); 1920 /* Updates ldmSeqStore.pos */ 1921 lastLLSize = 1922 ZSTD_ldm_blockCompress(&zc->externSeqStore, 1923 ms, &zc->seqStore, 1924 zc->blockState.nextCBlock->rep, 1925 &zc->appliedParams.cParams, 1926 src, srcSize, extDict); 1927 assert(zc->externSeqStore.pos <= zc->externSeqStore.size); 1928 } else if (zc->appliedParams.ldmParams.enableLdm) { 1929 rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; 1930 1931 ldmSeqStore.seq = zc->ldmSequences; 1932 ldmSeqStore.capacity = zc->maxNbLdmSequences; 1933 /* Updates ldmSeqStore.size */ 1934 CHECK_F(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, 1935 &zc->appliedParams.ldmParams, 1936 src, srcSize)); 1937 /* Updates ldmSeqStore.pos */ 1938 lastLLSize = 1939 ZSTD_ldm_blockCompress(&ldmSeqStore, 1940 ms, &zc->seqStore, 1941 zc->blockState.nextCBlock->rep, 1942 &zc->appliedParams.cParams, 1943 src, srcSize, extDict); 1944 assert(ldmSeqStore.pos == ldmSeqStore.size); 1945 } else { /* not long range mode */ 1946 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict); 1947 lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize); 1948 } 1949 { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; 1950 ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); 1951 } } 1952 1953 /* encode sequences and literals */ 1954 { size_t const cSize = ZSTD_compressSequences(&zc->seqStore, 1955 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, 1956 &zc->appliedParams, 1957 dst, dstCapacity, 1958 srcSize, zc->entropyWorkspace, zc->bmi2); 1959 if (ZSTD_isError(cSize) || cSize == 0) return cSize; 1960 /* confirm repcodes and entropy tables */ 1961 { ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; 1962 zc->blockState.prevCBlock = zc->blockState.nextCBlock; 1963 zc->blockState.nextCBlock = tmp; 1964 } 1965 return cSize; 1966 } 1967 } 1968 1969 1970 /*! ZSTD_compress_frameChunk() : 1971 * Compress a chunk of data into one or multiple blocks. 1972 * All blocks will be terminated, all input will be consumed. 1973 * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. 1974 * Frame is supposed already started (header already produced) 1975 * @return : compressed size, or an error code 1976 */ 1977 static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, 1978 void* dst, size_t dstCapacity, 1979 const void* src, size_t srcSize, 1980 U32 lastFrameChunk) 1981 { 1982 size_t blockSize = cctx->blockSize; 1983 size_t remaining = srcSize; 1984 const BYTE* ip = (const BYTE*)src; 1985 BYTE* const ostart = (BYTE*)dst; 1986 BYTE* op = ostart; 1987 U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; 1988 assert(cctx->appliedParams.cParams.windowLog <= 31); 1989 1990 DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (U32)blockSize); 1991 if (cctx->appliedParams.fParams.checksumFlag && srcSize) 1992 XXH64_update(&cctx->xxhState, src, srcSize); 1993 1994 while (remaining) { 1995 ZSTD_matchState_t* const ms = &cctx->blockState.matchState; 1996 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); 1997 1998 if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) 1999 return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ 2000 if (remaining < blockSize) blockSize = remaining; 2001 2002 if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) { 2003 U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); 2004 U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); 2005 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); 2006 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); 2007 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); 2008 2009 ZSTD_reduceIndex(cctx, correction); 2010 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; 2011 else ms->nextToUpdate -= correction; 2012 ms->loadedDictEnd = 0; 2013 } 2014 ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd); 2015 if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; 2016 2017 { size_t cSize = ZSTD_compressBlock_internal(cctx, 2018 op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, 2019 ip, blockSize); 2020 if (ZSTD_isError(cSize)) return cSize; 2021 2022 if (cSize == 0) { /* block is not compressible */ 2023 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3); 2024 if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); 2025 MEM_writeLE32(op, cBlockHeader24); /* 4th byte will be overwritten */ 2026 memcpy(op + ZSTD_blockHeaderSize, ip, blockSize); 2027 cSize = ZSTD_blockHeaderSize + blockSize; 2028 } else { 2029 U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); 2030 MEM_writeLE24(op, cBlockHeader24); 2031 cSize += ZSTD_blockHeaderSize; 2032 } 2033 2034 ip += blockSize; 2035 assert(remaining >= blockSize); 2036 remaining -= blockSize; 2037 op += cSize; 2038 assert(dstCapacity >= cSize); 2039 dstCapacity -= cSize; 2040 DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", 2041 (U32)cSize); 2042 } } 2043 2044 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; 2045 return op-ostart; 2046 } 2047 2048 2049 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, 2050 ZSTD_CCtx_params params, U64 pledgedSrcSize, U32 dictID) 2051 { BYTE* const op = (BYTE*)dst; 2052 U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ 2053 U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ 2054 U32 const checksumFlag = params.fParams.checksumFlag>0; 2055 U32 const windowSize = (U32)1 << params.cParams.windowLog; 2056 U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); 2057 BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); 2058 U32 const fcsCode = params.fParams.contentSizeFlag ? 2059 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ 2060 BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); 2061 size_t pos=0; 2062 2063 if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall); 2064 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", 2065 !params.fParams.noDictIDFlag, dictID, dictIDSizeCode); 2066 2067 if (params.format == ZSTD_f_zstd1) { 2068 MEM_writeLE32(dst, ZSTD_MAGICNUMBER); 2069 pos = 4; 2070 } 2071 op[pos++] = frameHeaderDecriptionByte; 2072 if (!singleSegment) op[pos++] = windowLogByte; 2073 switch(dictIDSizeCode) 2074 { 2075 default: assert(0); /* impossible */ 2076 case 0 : break; 2077 case 1 : op[pos] = (BYTE)(dictID); pos++; break; 2078 case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; 2079 case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; 2080 } 2081 switch(fcsCode) 2082 { 2083 default: assert(0); /* impossible */ 2084 case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; 2085 case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; 2086 case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; 2087 case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; 2088 } 2089 return pos; 2090 } 2091 2092 /* ZSTD_writeLastEmptyBlock() : 2093 * output an empty Block with end-of-frame mark to complete a frame 2094 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) 2095 * or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize) 2096 */ 2097 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) 2098 { 2099 if (dstCapacity < ZSTD_blockHeaderSize) return ERROR(dstSize_tooSmall); 2100 { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */ 2101 MEM_writeLE24(dst, cBlockHeader24); 2102 return ZSTD_blockHeaderSize; 2103 } 2104 } 2105 2106 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) 2107 { 2108 if (cctx->stage != ZSTDcs_init) 2109 return ERROR(stage_wrong); 2110 if (cctx->appliedParams.ldmParams.enableLdm) 2111 return ERROR(parameter_unsupported); 2112 cctx->externSeqStore.seq = seq; 2113 cctx->externSeqStore.size = nbSeq; 2114 cctx->externSeqStore.capacity = nbSeq; 2115 cctx->externSeqStore.pos = 0; 2116 return 0; 2117 } 2118 2119 2120 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, 2121 void* dst, size_t dstCapacity, 2122 const void* src, size_t srcSize, 2123 U32 frame, U32 lastFrameChunk) 2124 { 2125 ZSTD_matchState_t* ms = &cctx->blockState.matchState; 2126 size_t fhSize = 0; 2127 2128 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", 2129 cctx->stage, (U32)srcSize); 2130 if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ 2131 2132 if (frame && (cctx->stage==ZSTDcs_init)) { 2133 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 2134 cctx->pledgedSrcSizePlusOne-1, cctx->dictID); 2135 if (ZSTD_isError(fhSize)) return fhSize; 2136 dstCapacity -= fhSize; 2137 dst = (char*)dst + fhSize; 2138 cctx->stage = ZSTDcs_ongoing; 2139 } 2140 2141 if (!srcSize) return fhSize; /* do not generate an empty block if no input */ 2142 2143 if (!ZSTD_window_update(&ms->window, src, srcSize)) { 2144 ms->nextToUpdate = ms->window.dictLimit; 2145 } 2146 if (cctx->appliedParams.ldmParams.enableLdm) 2147 ZSTD_window_update(&cctx->ldmState.window, src, srcSize); 2148 2149 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (U32)cctx->blockSize); 2150 { size_t const cSize = frame ? 2151 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : 2152 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); 2153 if (ZSTD_isError(cSize)) return cSize; 2154 cctx->consumedSrcSize += srcSize; 2155 cctx->producedCSize += (cSize + fhSize); 2156 if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */ 2157 if (cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne) { 2158 DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize >= %u", 2159 (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize); 2160 return ERROR(srcSize_wrong); 2161 } 2162 } 2163 return cSize + fhSize; 2164 } 2165 } 2166 2167 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, 2168 void* dst, size_t dstCapacity, 2169 const void* src, size_t srcSize) 2170 { 2171 DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (U32)srcSize); 2172 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); 2173 } 2174 2175 2176 size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) 2177 { 2178 ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; 2179 assert(!ZSTD_checkCParams(cParams)); 2180 return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); 2181 } 2182 2183 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) 2184 { 2185 size_t const blockSizeMax = ZSTD_getBlockSize(cctx); 2186 if (srcSize > blockSizeMax) return ERROR(srcSize_wrong); 2187 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); 2188 } 2189 2190 /*! ZSTD_loadDictionaryContent() : 2191 * @return : 0, or an error code 2192 */ 2193 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* src, size_t srcSize) 2194 { 2195 const BYTE* const ip = (const BYTE*) src; 2196 const BYTE* const iend = ip + srcSize; 2197 ZSTD_compressionParameters const* cParams = ¶ms->cParams; 2198 2199 ZSTD_window_update(&ms->window, src, srcSize); 2200 ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); 2201 2202 if (srcSize <= HASH_READ_SIZE) return 0; 2203 2204 switch(params->cParams.strategy) 2205 { 2206 case ZSTD_fast: 2207 ZSTD_fillHashTable(ms, cParams, iend); 2208 break; 2209 case ZSTD_dfast: 2210 ZSTD_fillDoubleHashTable(ms, cParams, iend); 2211 break; 2212 2213 case ZSTD_greedy: 2214 case ZSTD_lazy: 2215 case ZSTD_lazy2: 2216 if (srcSize >= HASH_READ_SIZE) 2217 ZSTD_insertAndFindFirstIndex(ms, cParams, iend-HASH_READ_SIZE); 2218 break; 2219 2220 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ 2221 case ZSTD_btopt: 2222 case ZSTD_btultra: 2223 if (srcSize >= HASH_READ_SIZE) 2224 ZSTD_updateTree(ms, cParams, iend-HASH_READ_SIZE, iend); 2225 break; 2226 2227 default: 2228 assert(0); /* not possible : not a valid strategy id */ 2229 } 2230 2231 ms->nextToUpdate = (U32)(iend - ms->window.base); 2232 return 0; 2233 } 2234 2235 2236 /* Dictionaries that assign zero probability to symbols that show up causes problems 2237 when FSE encoding. Refuse dictionaries that assign zero probability to symbols 2238 that we may encounter during compression. 2239 NOTE: This behavior is not standard and could be improved in the future. */ 2240 static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { 2241 U32 s; 2242 if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted); 2243 for (s = 0; s <= maxSymbolValue; ++s) { 2244 if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted); 2245 } 2246 return 0; 2247 } 2248 2249 2250 /* Dictionary format : 2251 * See : 2252 * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format 2253 */ 2254 /*! ZSTD_loadZstdDictionary() : 2255 * @return : dictID, or an error code 2256 * assumptions : magic number supposed already checked 2257 * dictSize supposed > 8 2258 */ 2259 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* dict, size_t dictSize, void* workspace) 2260 { 2261 const BYTE* dictPtr = (const BYTE*)dict; 2262 const BYTE* const dictEnd = dictPtr + dictSize; 2263 short offcodeNCount[MaxOff+1]; 2264 unsigned offcodeMaxValue = MaxOff; 2265 size_t dictID; 2266 2267 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); 2268 2269 dictPtr += 4; /* skip magic number */ 2270 dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); 2271 dictPtr += 4; 2272 2273 { unsigned maxSymbolValue = 255; 2274 size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.hufCTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); 2275 if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); 2276 if (maxSymbolValue < 255) return ERROR(dictionary_corrupted); 2277 dictPtr += hufHeaderSize; 2278 } 2279 2280 { unsigned offcodeLog; 2281 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); 2282 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); 2283 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); 2284 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ 2285 CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, workspace, HUF_WORKSPACE_SIZE), 2286 dictionary_corrupted); 2287 dictPtr += offcodeHeaderSize; 2288 } 2289 2290 { short matchlengthNCount[MaxML+1]; 2291 unsigned matchlengthMaxValue = MaxML, matchlengthLog; 2292 size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); 2293 if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); 2294 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); 2295 /* Every match length code must have non-zero probability */ 2296 CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); 2297 CHECK_E( FSE_buildCTable_wksp(bs->entropy.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE), 2298 dictionary_corrupted); 2299 dictPtr += matchlengthHeaderSize; 2300 } 2301 2302 { short litlengthNCount[MaxLL+1]; 2303 unsigned litlengthMaxValue = MaxLL, litlengthLog; 2304 size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); 2305 if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); 2306 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); 2307 /* Every literal length code must have non-zero probability */ 2308 CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); 2309 CHECK_E( FSE_buildCTable_wksp(bs->entropy.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE), 2310 dictionary_corrupted); 2311 dictPtr += litlengthHeaderSize; 2312 } 2313 2314 if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); 2315 bs->rep[0] = MEM_readLE32(dictPtr+0); 2316 bs->rep[1] = MEM_readLE32(dictPtr+4); 2317 bs->rep[2] = MEM_readLE32(dictPtr+8); 2318 dictPtr += 12; 2319 2320 { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); 2321 U32 offcodeMax = MaxOff; 2322 if (dictContentSize <= ((U32)-1) - 128 KB) { 2323 U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ 2324 offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ 2325 } 2326 /* All offset values <= dictContentSize + 128 KB must be representable */ 2327 CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); 2328 /* All repCodes must be <= dictContentSize and != 0*/ 2329 { U32 u; 2330 for (u=0; u<3; u++) { 2331 if (bs->rep[u] == 0) return ERROR(dictionary_corrupted); 2332 if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted); 2333 } } 2334 2335 bs->entropy.hufCTable_repeatMode = HUF_repeat_valid; 2336 bs->entropy.offcode_repeatMode = FSE_repeat_valid; 2337 bs->entropy.matchlength_repeatMode = FSE_repeat_valid; 2338 bs->entropy.litlength_repeatMode = FSE_repeat_valid; 2339 CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize)); 2340 return dictID; 2341 } 2342 } 2343 2344 /** ZSTD_compress_insertDictionary() : 2345 * @return : dictID, or an error code */ 2346 static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, 2347 ZSTD_CCtx_params const* params, 2348 const void* dict, size_t dictSize, 2349 ZSTD_dictContentType_e dictContentType, 2350 void* workspace) 2351 { 2352 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); 2353 if ((dict==NULL) || (dictSize<=8)) return 0; 2354 2355 ZSTD_reset_compressedBlockState(bs); 2356 2357 /* dict restricted modes */ 2358 if (dictContentType == ZSTD_dct_rawContent) 2359 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize); 2360 2361 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { 2362 if (dictContentType == ZSTD_dct_auto) { 2363 DEBUGLOG(4, "raw content dictionary detected"); 2364 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize); 2365 } 2366 if (dictContentType == ZSTD_dct_fullDict) 2367 return ERROR(dictionary_wrong); 2368 assert(0); /* impossible */ 2369 } 2370 2371 /* dict as full zstd dictionary */ 2372 return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, workspace); 2373 } 2374 2375 /*! ZSTD_compressBegin_internal() : 2376 * @return : 0, or an error code */ 2377 size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, 2378 const void* dict, size_t dictSize, 2379 ZSTD_dictContentType_e dictContentType, 2380 const ZSTD_CDict* cdict, 2381 ZSTD_CCtx_params params, U64 pledgedSrcSize, 2382 ZSTD_buffered_policy_e zbuff) 2383 { 2384 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog); 2385 /* params are supposed to be fully validated at this point */ 2386 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 2387 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 2388 2389 if (cdict && cdict->dictContentSize>0) { 2390 cctx->requestedParams = params; 2391 return ZSTD_resetCCtx_usingCDict(cctx, cdict, params.cParams.windowLog, 2392 params.fParams, pledgedSrcSize, zbuff); 2393 } 2394 2395 CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, 2396 ZSTDcrp_continue, zbuff) ); 2397 { 2398 size_t const dictID = ZSTD_compress_insertDictionary( 2399 cctx->blockState.prevCBlock, &cctx->blockState.matchState, 2400 ¶ms, dict, dictSize, dictContentType, cctx->entropyWorkspace); 2401 if (ZSTD_isError(dictID)) return dictID; 2402 assert(dictID <= (size_t)(U32)-1); 2403 cctx->dictID = (U32)dictID; 2404 } 2405 return 0; 2406 } 2407 2408 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, 2409 const void* dict, size_t dictSize, 2410 ZSTD_dictContentType_e dictContentType, 2411 const ZSTD_CDict* cdict, 2412 ZSTD_CCtx_params params, 2413 unsigned long long pledgedSrcSize) 2414 { 2415 DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog); 2416 /* compression parameters verification and optimization */ 2417 CHECK_F( ZSTD_checkCParams(params.cParams) ); 2418 return ZSTD_compressBegin_internal(cctx, 2419 dict, dictSize, dictContentType, 2420 cdict, 2421 params, pledgedSrcSize, 2422 ZSTDb_not_buffered); 2423 } 2424 2425 /*! ZSTD_compressBegin_advanced() : 2426 * @return : 0, or an error code */ 2427 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, 2428 const void* dict, size_t dictSize, 2429 ZSTD_parameters params, unsigned long long pledgedSrcSize) 2430 { 2431 ZSTD_CCtx_params const cctxParams = 2432 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); 2433 return ZSTD_compressBegin_advanced_internal(cctx, 2434 dict, dictSize, ZSTD_dct_auto, 2435 NULL /*cdict*/, 2436 cctxParams, pledgedSrcSize); 2437 } 2438 2439 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) 2440 { 2441 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); 2442 ZSTD_CCtx_params const cctxParams = 2443 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); 2444 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (U32)dictSize); 2445 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL, 2446 cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); 2447 } 2448 2449 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) 2450 { 2451 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); 2452 } 2453 2454 2455 /*! ZSTD_writeEpilogue() : 2456 * Ends a frame. 2457 * @return : nb of bytes written into dst (or an error code) */ 2458 static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) 2459 { 2460 BYTE* const ostart = (BYTE*)dst; 2461 BYTE* op = ostart; 2462 size_t fhSize = 0; 2463 2464 DEBUGLOG(4, "ZSTD_writeEpilogue"); 2465 if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */ 2466 2467 /* special case : empty frame */ 2468 if (cctx->stage == ZSTDcs_init) { 2469 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0); 2470 if (ZSTD_isError(fhSize)) return fhSize; 2471 dstCapacity -= fhSize; 2472 op += fhSize; 2473 cctx->stage = ZSTDcs_ongoing; 2474 } 2475 2476 if (cctx->stage != ZSTDcs_ending) { 2477 /* write one last empty block, make it the "last" block */ 2478 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; 2479 if (dstCapacity<4) return ERROR(dstSize_tooSmall); 2480 MEM_writeLE32(op, cBlockHeader24); 2481 op += ZSTD_blockHeaderSize; 2482 dstCapacity -= ZSTD_blockHeaderSize; 2483 } 2484 2485 if (cctx->appliedParams.fParams.checksumFlag) { 2486 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); 2487 if (dstCapacity<4) return ERROR(dstSize_tooSmall); 2488 DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", checksum); 2489 MEM_writeLE32(op, checksum); 2490 op += 4; 2491 } 2492 2493 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ 2494 return op-ostart; 2495 } 2496 2497 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, 2498 void* dst, size_t dstCapacity, 2499 const void* src, size_t srcSize) 2500 { 2501 size_t endResult; 2502 size_t const cSize = ZSTD_compressContinue_internal(cctx, 2503 dst, dstCapacity, src, srcSize, 2504 1 /* frame mode */, 1 /* last chunk */); 2505 if (ZSTD_isError(cSize)) return cSize; 2506 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); 2507 if (ZSTD_isError(endResult)) return endResult; 2508 if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */ 2509 DEBUGLOG(4, "end of frame : controlling src size"); 2510 if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) { 2511 DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize = %u", 2512 (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize); 2513 return ERROR(srcSize_wrong); 2514 } } 2515 return cSize + endResult; 2516 } 2517 2518 2519 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, 2520 void* dst, size_t dstCapacity, 2521 const void* src, size_t srcSize, 2522 const void* dict,size_t dictSize, 2523 ZSTD_parameters params) 2524 { 2525 ZSTD_CCtx_params const cctxParams = 2526 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); 2527 DEBUGLOG(4, "ZSTD_compress_internal"); 2528 return ZSTD_compress_advanced_internal(cctx, 2529 dst, dstCapacity, 2530 src, srcSize, 2531 dict, dictSize, 2532 cctxParams); 2533 } 2534 2535 size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, 2536 void* dst, size_t dstCapacity, 2537 const void* src, size_t srcSize, 2538 const void* dict,size_t dictSize, 2539 ZSTD_parameters params) 2540 { 2541 DEBUGLOG(4, "ZSTD_compress_advanced"); 2542 CHECK_F(ZSTD_checkCParams(params.cParams)); 2543 return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); 2544 } 2545 2546 /* Internal */ 2547 size_t ZSTD_compress_advanced_internal( 2548 ZSTD_CCtx* cctx, 2549 void* dst, size_t dstCapacity, 2550 const void* src, size_t srcSize, 2551 const void* dict,size_t dictSize, 2552 ZSTD_CCtx_params params) 2553 { 2554 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", 2555 (U32)srcSize); 2556 CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL, 2557 params, srcSize, ZSTDb_not_buffered) ); 2558 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 2559 } 2560 2561 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, 2562 const void* dict, size_t dictSize, int compressionLevel) 2563 { 2564 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0); 2565 ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); 2566 assert(params.fParams.contentSizeFlag == 1); 2567 ZSTD_CCtxParam_setParameter(&cctxParams, ZSTD_p_compressLiterals, compressionLevel>=0); 2568 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams); 2569 } 2570 2571 size_t ZSTD_compressCCtx (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) 2572 { 2573 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (U32)srcSize); 2574 return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); 2575 } 2576 2577 size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) 2578 { 2579 size_t result; 2580 ZSTD_CCtx ctxBody; 2581 memset(&ctxBody, 0, sizeof(ctxBody)); 2582 ctxBody.customMem = ZSTD_defaultCMem; 2583 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); 2584 ZSTD_free(ctxBody.workSpace, ZSTD_defaultCMem); /* can't free ctxBody itself, as it's on stack; free only heap content */ 2585 return result; 2586 } 2587 2588 2589 /* ===== Dictionary API ===== */ 2590 2591 /*! ZSTD_estimateCDictSize_advanced() : 2592 * Estimate amount of memory that will be needed to create a dictionary with following arguments */ 2593 size_t ZSTD_estimateCDictSize_advanced( 2594 size_t dictSize, ZSTD_compressionParameters cParams, 2595 ZSTD_dictLoadMethod_e dictLoadMethod) 2596 { 2597 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (U32)sizeof(ZSTD_CDict)); 2598 return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) 2599 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); 2600 } 2601 2602 size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) 2603 { 2604 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); 2605 return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); 2606 } 2607 2608 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) 2609 { 2610 if (cdict==NULL) return 0; /* support sizeof on NULL */ 2611 DEBUGLOG(5, "sizeof(*cdict) : %u", (U32)sizeof(*cdict)); 2612 return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict); 2613 } 2614 2615 static size_t ZSTD_initCDict_internal( 2616 ZSTD_CDict* cdict, 2617 const void* dictBuffer, size_t dictSize, 2618 ZSTD_dictLoadMethod_e dictLoadMethod, 2619 ZSTD_dictContentType_e dictContentType, 2620 ZSTD_compressionParameters cParams) 2621 { 2622 DEBUGLOG(3, "ZSTD_initCDict_internal, dictContentType %u", (U32)dictContentType); 2623 assert(!ZSTD_checkCParams(cParams)); 2624 cdict->cParams = cParams; 2625 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { 2626 cdict->dictBuffer = NULL; 2627 cdict->dictContent = dictBuffer; 2628 } else { 2629 void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem); 2630 cdict->dictBuffer = internalBuffer; 2631 cdict->dictContent = internalBuffer; 2632 if (!internalBuffer) return ERROR(memory_allocation); 2633 memcpy(internalBuffer, dictBuffer, dictSize); 2634 } 2635 cdict->dictContentSize = dictSize; 2636 2637 /* Reset the state to no dictionary */ 2638 ZSTD_reset_compressedBlockState(&cdict->cBlockState); 2639 { void* const end = ZSTD_reset_matchState( 2640 &cdict->matchState, 2641 (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, 2642 &cParams, ZSTDcrp_continue, /* forCCtx */ 0); 2643 assert(end == (char*)cdict->workspace + cdict->workspaceSize); 2644 (void)end; 2645 } 2646 /* (Maybe) load the dictionary 2647 * Skips loading the dictionary if it is <= 8 bytes. 2648 */ 2649 { ZSTD_CCtx_params params; 2650 memset(¶ms, 0, sizeof(params)); 2651 params.compressionLevel = ZSTD_CLEVEL_DEFAULT; 2652 params.fParams.contentSizeFlag = 1; 2653 params.cParams = cParams; 2654 { size_t const dictID = ZSTD_compress_insertDictionary( 2655 &cdict->cBlockState, &cdict->matchState, ¶ms, 2656 cdict->dictContent, cdict->dictContentSize, 2657 dictContentType, cdict->workspace); 2658 if (ZSTD_isError(dictID)) return dictID; 2659 assert(dictID <= (size_t)(U32)-1); 2660 cdict->dictID = (U32)dictID; 2661 } 2662 } 2663 2664 return 0; 2665 } 2666 2667 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, 2668 ZSTD_dictLoadMethod_e dictLoadMethod, 2669 ZSTD_dictContentType_e dictContentType, 2670 ZSTD_compressionParameters cParams, ZSTD_customMem customMem) 2671 { 2672 DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (U32)dictContentType); 2673 if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 2674 2675 { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem); 2676 size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); 2677 void* const workspace = ZSTD_malloc(workspaceSize, customMem); 2678 2679 if (!cdict || !workspace) { 2680 ZSTD_free(cdict, customMem); 2681 ZSTD_free(workspace, customMem); 2682 return NULL; 2683 } 2684 cdict->customMem = customMem; 2685 cdict->workspace = workspace; 2686 cdict->workspaceSize = workspaceSize; 2687 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 2688 dictBuffer, dictSize, 2689 dictLoadMethod, dictContentType, 2690 cParams) )) { 2691 ZSTD_freeCDict(cdict); 2692 return NULL; 2693 } 2694 2695 return cdict; 2696 } 2697 } 2698 2699 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) 2700 { 2701 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); 2702 return ZSTD_createCDict_advanced(dict, dictSize, 2703 ZSTD_dlm_byCopy, ZSTD_dct_auto, 2704 cParams, ZSTD_defaultCMem); 2705 } 2706 2707 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) 2708 { 2709 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); 2710 return ZSTD_createCDict_advanced(dict, dictSize, 2711 ZSTD_dlm_byRef, ZSTD_dct_auto, 2712 cParams, ZSTD_defaultCMem); 2713 } 2714 2715 size_t ZSTD_freeCDict(ZSTD_CDict* cdict) 2716 { 2717 if (cdict==NULL) return 0; /* support free on NULL */ 2718 { ZSTD_customMem const cMem = cdict->customMem; 2719 ZSTD_free(cdict->workspace, cMem); 2720 ZSTD_free(cdict->dictBuffer, cMem); 2721 ZSTD_free(cdict, cMem); 2722 return 0; 2723 } 2724 } 2725 2726 /*! ZSTD_initStaticCDict_advanced() : 2727 * Generate a digested dictionary in provided memory area. 2728 * workspace: The memory area to emplace the dictionary into. 2729 * Provided pointer must 8-bytes aligned. 2730 * It must outlive dictionary usage. 2731 * workspaceSize: Use ZSTD_estimateCDictSize() 2732 * to determine how large workspace must be. 2733 * cParams : use ZSTD_getCParams() to transform a compression level 2734 * into its relevants cParams. 2735 * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) 2736 * Note : there is no corresponding "free" function. 2737 * Since workspace was allocated externally, it must be freed externally. 2738 */ 2739 const ZSTD_CDict* ZSTD_initStaticCDict( 2740 void* workspace, size_t workspaceSize, 2741 const void* dict, size_t dictSize, 2742 ZSTD_dictLoadMethod_e dictLoadMethod, 2743 ZSTD_dictContentType_e dictContentType, 2744 ZSTD_compressionParameters cParams) 2745 { 2746 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); 2747 size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize) 2748 + HUF_WORKSPACE_SIZE + matchStateSize; 2749 ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace; 2750 void* ptr; 2751 if ((size_t)workspace & 7) return NULL; /* 8-aligned */ 2752 DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", 2753 (U32)workspaceSize, (U32)neededSize, (U32)(workspaceSize < neededSize)); 2754 if (workspaceSize < neededSize) return NULL; 2755 2756 if (dictLoadMethod == ZSTD_dlm_byCopy) { 2757 memcpy(cdict+1, dict, dictSize); 2758 dict = cdict+1; 2759 ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize; 2760 } else { 2761 ptr = cdict+1; 2762 } 2763 cdict->workspace = ptr; 2764 cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize; 2765 2766 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 2767 dict, dictSize, 2768 ZSTD_dlm_byRef, dictContentType, 2769 cParams) )) 2770 return NULL; 2771 2772 return cdict; 2773 } 2774 2775 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) 2776 { 2777 assert(cdict != NULL); 2778 return cdict->cParams; 2779 } 2780 2781 /* ZSTD_compressBegin_usingCDict_advanced() : 2782 * cdict must be != NULL */ 2783 size_t ZSTD_compressBegin_usingCDict_advanced( 2784 ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, 2785 ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) 2786 { 2787 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); 2788 if (cdict==NULL) return ERROR(dictionary_wrong); 2789 { ZSTD_CCtx_params params = cctx->requestedParams; 2790 params.cParams = ZSTD_getCParamsFromCDict(cdict); 2791 /* Increase window log to fit the entire dictionary and source if the 2792 * source size is known. Limit the increase to 19, which is the 2793 * window log for compression level 1 with the largest source size. 2794 */ 2795 if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { 2796 U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); 2797 U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; 2798 params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog); 2799 } 2800 params.fParams = fParams; 2801 return ZSTD_compressBegin_internal(cctx, 2802 NULL, 0, ZSTD_dct_auto, 2803 cdict, 2804 params, pledgedSrcSize, 2805 ZSTDb_not_buffered); 2806 } 2807 } 2808 2809 /* ZSTD_compressBegin_usingCDict() : 2810 * pledgedSrcSize=0 means "unknown" 2811 * if pledgedSrcSize>0, it will enable contentSizeFlag */ 2812 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 2813 { 2814 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 2815 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); 2816 return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, 0); 2817 } 2818 2819 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, 2820 void* dst, size_t dstCapacity, 2821 const void* src, size_t srcSize, 2822 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) 2823 { 2824 CHECK_F (ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */ 2825 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 2826 } 2827 2828 /*! ZSTD_compress_usingCDict() : 2829 * Compression using a digested Dictionary. 2830 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. 2831 * Note that compression parameters are decided at CDict creation time 2832 * while frame parameters are hardcoded */ 2833 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, 2834 void* dst, size_t dstCapacity, 2835 const void* src, size_t srcSize, 2836 const ZSTD_CDict* cdict) 2837 { 2838 ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 2839 return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); 2840 } 2841 2842 2843 2844 /* ****************************************************************** 2845 * Streaming 2846 ********************************************************************/ 2847 2848 ZSTD_CStream* ZSTD_createCStream(void) 2849 { 2850 DEBUGLOG(3, "ZSTD_createCStream"); 2851 return ZSTD_createCStream_advanced(ZSTD_defaultCMem); 2852 } 2853 2854 ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) 2855 { 2856 return ZSTD_initStaticCCtx(workspace, workspaceSize); 2857 } 2858 2859 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) 2860 { /* CStream and CCtx are now same object */ 2861 return ZSTD_createCCtx_advanced(customMem); 2862 } 2863 2864 size_t ZSTD_freeCStream(ZSTD_CStream* zcs) 2865 { 2866 return ZSTD_freeCCtx(zcs); /* same object */ 2867 } 2868 2869 2870 2871 /*====== Initialization ======*/ 2872 2873 size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } 2874 2875 size_t ZSTD_CStreamOutSize(void) 2876 { 2877 return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; 2878 } 2879 2880 static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, 2881 const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType, 2882 const ZSTD_CDict* const cdict, 2883 ZSTD_CCtx_params const params, unsigned long long const pledgedSrcSize) 2884 { 2885 DEBUGLOG(4, "ZSTD_resetCStream_internal (disableLiteralCompression=%i)", 2886 params.disableLiteralCompression); 2887 /* params are supposed to be fully validated at this point */ 2888 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 2889 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 2890 2891 CHECK_F( ZSTD_compressBegin_internal(cctx, 2892 dict, dictSize, dictContentType, 2893 cdict, 2894 params, pledgedSrcSize, 2895 ZSTDb_buffered) ); 2896 2897 cctx->inToCompress = 0; 2898 cctx->inBuffPos = 0; 2899 cctx->inBuffTarget = cctx->blockSize 2900 + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ 2901 cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; 2902 cctx->streamStage = zcss_load; 2903 cctx->frameEnded = 0; 2904 return 0; /* ready to go */ 2905 } 2906 2907 /* ZSTD_resetCStream(): 2908 * pledgedSrcSize == 0 means "unknown" */ 2909 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) 2910 { 2911 ZSTD_CCtx_params params = zcs->requestedParams; 2912 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (U32)pledgedSrcSize); 2913 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; 2914 params.fParams.contentSizeFlag = 1; 2915 params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, 0); 2916 return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize); 2917 } 2918 2919 /*! ZSTD_initCStream_internal() : 2920 * Note : for lib/compress only. Used by zstdmt_compress.c. 2921 * Assumption 1 : params are valid 2922 * Assumption 2 : either dict, or cdict, is defined, not both */ 2923 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, 2924 const void* dict, size_t dictSize, const ZSTD_CDict* cdict, 2925 ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) 2926 { 2927 DEBUGLOG(4, "ZSTD_initCStream_internal"); 2928 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); 2929 assert(!((dict) && (cdict))); /* either dict or cdict, not both */ 2930 2931 if (dict && dictSize >= 8) { 2932 DEBUGLOG(4, "loading dictionary of size %u", (U32)dictSize); 2933 if (zcs->staticSize) { /* static CCtx : never uses malloc */ 2934 /* incompatible with internal cdict creation */ 2935 return ERROR(memory_allocation); 2936 } 2937 ZSTD_freeCDict(zcs->cdictLocal); 2938 zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 2939 ZSTD_dlm_byCopy, ZSTD_dct_auto, 2940 params.cParams, zcs->customMem); 2941 zcs->cdict = zcs->cdictLocal; 2942 if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); 2943 } else { 2944 if (cdict) { 2945 params.cParams = ZSTD_getCParamsFromCDict(cdict); /* cParams are enforced from cdict; it includes windowLog */ 2946 } 2947 ZSTD_freeCDict(zcs->cdictLocal); 2948 zcs->cdictLocal = NULL; 2949 zcs->cdict = cdict; 2950 } 2951 2952 return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize); 2953 } 2954 2955 /* ZSTD_initCStream_usingCDict_advanced() : 2956 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ 2957 size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, 2958 const ZSTD_CDict* cdict, 2959 ZSTD_frameParameters fParams, 2960 unsigned long long pledgedSrcSize) 2961 { 2962 DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); 2963 if (!cdict) return ERROR(dictionary_wrong); /* cannot handle NULL cdict (does not know what to do) */ 2964 { ZSTD_CCtx_params params = zcs->requestedParams; 2965 params.cParams = ZSTD_getCParamsFromCDict(cdict); 2966 params.fParams = fParams; 2967 return ZSTD_initCStream_internal(zcs, 2968 NULL, 0, cdict, 2969 params, pledgedSrcSize); 2970 } 2971 } 2972 2973 /* note : cdict must outlive compression session */ 2974 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) 2975 { 2976 ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */, 0 /* checksum */, 0 /* hideDictID */ }; 2977 DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); 2978 return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); /* note : will check that cdict != NULL */ 2979 } 2980 2981 2982 /* ZSTD_initCStream_advanced() : 2983 * pledgedSrcSize must be exact. 2984 * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. 2985 * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */ 2986 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, 2987 const void* dict, size_t dictSize, 2988 ZSTD_parameters params, unsigned long long pledgedSrcSize) 2989 { 2990 DEBUGLOG(4, "ZSTD_initCStream_advanced: pledgedSrcSize=%u, flag=%u", 2991 (U32)pledgedSrcSize, params.fParams.contentSizeFlag); 2992 CHECK_F( ZSTD_checkCParams(params.cParams) ); 2993 if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */ 2994 { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); 2995 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, cctxParams, pledgedSrcSize); 2996 } 2997 } 2998 2999 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) 3000 { 3001 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); 3002 ZSTD_CCtx_params const cctxParams = 3003 ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); 3004 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN); 3005 } 3006 3007 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) 3008 { 3009 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; /* temporary : 0 interpreted as "unknown" during transition period. Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. `0` will be interpreted as "empty" in the future */ 3010 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0); 3011 ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); 3012 return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, cctxParams, pledgedSrcSize); 3013 } 3014 3015 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) 3016 { 3017 DEBUGLOG(4, "ZSTD_initCStream"); 3018 return ZSTD_initCStream_srcSize(zcs, compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN); 3019 } 3020 3021 /*====== Compression ======*/ 3022 3023 MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, 3024 const void* src, size_t srcSize) 3025 { 3026 size_t const length = MIN(dstCapacity, srcSize); 3027 if (length) memcpy(dst, src, length); 3028 return length; 3029 } 3030 3031 /** ZSTD_compressStream_generic(): 3032 * internal function for all *compressStream*() variants and *compress_generic() 3033 * non-static, because can be called from zstdmt_compress.c 3034 * @return : hint size for next input */ 3035 size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, 3036 ZSTD_outBuffer* output, 3037 ZSTD_inBuffer* input, 3038 ZSTD_EndDirective const flushMode) 3039 { 3040 const char* const istart = (const char*)input->src; 3041 const char* const iend = istart + input->size; 3042 const char* ip = istart + input->pos; 3043 char* const ostart = (char*)output->dst; 3044 char* const oend = ostart + output->size; 3045 char* op = ostart + output->pos; 3046 U32 someMoreWork = 1; 3047 3048 /* check expectations */ 3049 DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (U32)flushMode); 3050 assert(zcs->inBuff != NULL); 3051 assert(zcs->inBuffSize > 0); 3052 assert(zcs->outBuff != NULL); 3053 assert(zcs->outBuffSize > 0); 3054 assert(output->pos <= output->size); 3055 assert(input->pos <= input->size); 3056 3057 while (someMoreWork) { 3058 switch(zcs->streamStage) 3059 { 3060 case zcss_init: 3061 /* call ZSTD_initCStream() first ! */ 3062 return ERROR(init_missing); 3063 3064 case zcss_load: 3065 if ( (flushMode == ZSTD_e_end) 3066 && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */ 3067 && (zcs->inBuffPos == 0) ) { 3068 /* shortcut to compression pass directly into output buffer */ 3069 size_t const cSize = ZSTD_compressEnd(zcs, 3070 op, oend-op, ip, iend-ip); 3071 DEBUGLOG(4, "ZSTD_compressEnd : %u", (U32)cSize); 3072 if (ZSTD_isError(cSize)) return cSize; 3073 ip = iend; 3074 op += cSize; 3075 zcs->frameEnded = 1; 3076 ZSTD_startNewCompression(zcs); 3077 someMoreWork = 0; break; 3078 } 3079 /* complete loading into inBuffer */ 3080 { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; 3081 size_t const loaded = ZSTD_limitCopy( 3082 zcs->inBuff + zcs->inBuffPos, toLoad, 3083 ip, iend-ip); 3084 zcs->inBuffPos += loaded; 3085 ip += loaded; 3086 if ( (flushMode == ZSTD_e_continue) 3087 && (zcs->inBuffPos < zcs->inBuffTarget) ) { 3088 /* not enough input to fill full block : stop here */ 3089 someMoreWork = 0; break; 3090 } 3091 if ( (flushMode == ZSTD_e_flush) 3092 && (zcs->inBuffPos == zcs->inToCompress) ) { 3093 /* empty */ 3094 someMoreWork = 0; break; 3095 } 3096 } 3097 /* compress current block (note : this stage cannot be stopped in the middle) */ 3098 DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); 3099 { void* cDst; 3100 size_t cSize; 3101 size_t const iSize = zcs->inBuffPos - zcs->inToCompress; 3102 size_t oSize = oend-op; 3103 unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); 3104 if (oSize >= ZSTD_compressBound(iSize)) 3105 cDst = op; /* compress into output buffer, to skip flush stage */ 3106 else 3107 cDst = zcs->outBuff, oSize = zcs->outBuffSize; 3108 cSize = lastBlock ? 3109 ZSTD_compressEnd(zcs, cDst, oSize, 3110 zcs->inBuff + zcs->inToCompress, iSize) : 3111 ZSTD_compressContinue(zcs, cDst, oSize, 3112 zcs->inBuff + zcs->inToCompress, iSize); 3113 if (ZSTD_isError(cSize)) return cSize; 3114 zcs->frameEnded = lastBlock; 3115 /* prepare next block */ 3116 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; 3117 if (zcs->inBuffTarget > zcs->inBuffSize) 3118 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; 3119 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", 3120 (U32)zcs->inBuffTarget, (U32)zcs->inBuffSize); 3121 if (!lastBlock) 3122 assert(zcs->inBuffTarget <= zcs->inBuffSize); 3123 zcs->inToCompress = zcs->inBuffPos; 3124 if (cDst == op) { /* no need to flush */ 3125 op += cSize; 3126 if (zcs->frameEnded) { 3127 DEBUGLOG(5, "Frame completed directly in outBuffer"); 3128 someMoreWork = 0; 3129 ZSTD_startNewCompression(zcs); 3130 } 3131 break; 3132 } 3133 zcs->outBuffContentSize = cSize; 3134 zcs->outBuffFlushedSize = 0; 3135 zcs->streamStage = zcss_flush; /* pass-through to flush stage */ 3136 } 3137 /* fall-through */ 3138 case zcss_flush: 3139 DEBUGLOG(5, "flush stage"); 3140 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; 3141 size_t const flushed = ZSTD_limitCopy(op, oend-op, 3142 zcs->outBuff + zcs->outBuffFlushedSize, toFlush); 3143 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", 3144 (U32)toFlush, (U32)(oend-op), (U32)flushed); 3145 op += flushed; 3146 zcs->outBuffFlushedSize += flushed; 3147 if (toFlush!=flushed) { 3148 /* flush not fully completed, presumably because dst is too small */ 3149 assert(op==oend); 3150 someMoreWork = 0; 3151 break; 3152 } 3153 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; 3154 if (zcs->frameEnded) { 3155 DEBUGLOG(5, "Frame completed on flush"); 3156 someMoreWork = 0; 3157 ZSTD_startNewCompression(zcs); 3158 break; 3159 } 3160 zcs->streamStage = zcss_load; 3161 break; 3162 } 3163 3164 default: /* impossible */ 3165 assert(0); 3166 } 3167 } 3168 3169 input->pos = ip - istart; 3170 output->pos = op - ostart; 3171 if (zcs->frameEnded) return 0; 3172 { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos; 3173 if (hintInSize==0) hintInSize = zcs->blockSize; 3174 return hintInSize; 3175 } 3176 } 3177 3178 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) 3179 { 3180 /* check conditions */ 3181 if (output->pos > output->size) return ERROR(GENERIC); 3182 if (input->pos > input->size) return ERROR(GENERIC); 3183 3184 return ZSTD_compressStream_generic(zcs, output, input, ZSTD_e_continue); 3185 } 3186 3187 3188 size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, 3189 ZSTD_outBuffer* output, 3190 ZSTD_inBuffer* input, 3191 ZSTD_EndDirective endOp) 3192 { 3193 DEBUGLOG(5, "ZSTD_compress_generic, endOp=%u ", (U32)endOp); 3194 /* check conditions */ 3195 if (output->pos > output->size) return ERROR(GENERIC); 3196 if (input->pos > input->size) return ERROR(GENERIC); 3197 assert(cctx!=NULL); 3198 3199 /* transparent initialization stage */ 3200 if (cctx->streamStage == zcss_init) { 3201 ZSTD_CCtx_params params = cctx->requestedParams; 3202 ZSTD_prefixDict const prefixDict = cctx->prefixDict; 3203 memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ 3204 assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ 3205 DEBUGLOG(4, "ZSTD_compress_generic : transparent init stage"); 3206 if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ 3207 params.cParams = ZSTD_getCParamsFromCCtxParams( 3208 &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); 3209 3210 #ifdef ZSTD_MULTITHREAD 3211 if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { 3212 params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ 3213 } 3214 if (params.nbWorkers > 0) { 3215 /* mt context creation */ 3216 if (cctx->mtctx == NULL || (params.nbWorkers != ZSTDMT_getNbWorkers(cctx->mtctx))) { 3217 DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbWorkers=%u", 3218 params.nbWorkers); 3219 if (cctx->mtctx != NULL) 3220 DEBUGLOG(4, "ZSTD_compress_generic: previous nbWorkers was %u", 3221 ZSTDMT_getNbWorkers(cctx->mtctx)); 3222 ZSTDMT_freeCCtx(cctx->mtctx); 3223 cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem); 3224 if (cctx->mtctx == NULL) return ERROR(memory_allocation); 3225 } 3226 /* mt compression */ 3227 DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); 3228 CHECK_F( ZSTDMT_initCStream_internal( 3229 cctx->mtctx, 3230 prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent, 3231 cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); 3232 cctx->streamStage = zcss_load; 3233 cctx->appliedParams.nbWorkers = params.nbWorkers; 3234 } else 3235 #endif 3236 { CHECK_F( ZSTD_resetCStream_internal(cctx, 3237 prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, 3238 cctx->cdict, 3239 params, cctx->pledgedSrcSizePlusOne-1) ); 3240 assert(cctx->streamStage == zcss_load); 3241 assert(cctx->appliedParams.nbWorkers == 0); 3242 } } 3243 3244 /* compression stage */ 3245 #ifdef ZSTD_MULTITHREAD 3246 if (cctx->appliedParams.nbWorkers > 0) { 3247 if (cctx->cParamsChanged) { 3248 ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); 3249 cctx->cParamsChanged = 0; 3250 } 3251 { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); 3252 if ( ZSTD_isError(flushMin) 3253 || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ 3254 ZSTD_startNewCompression(cctx); 3255 } 3256 return flushMin; 3257 } } 3258 #endif 3259 CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) ); 3260 DEBUGLOG(5, "completed ZSTD_compress_generic"); 3261 return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ 3262 } 3263 3264 size_t ZSTD_compress_generic_simpleArgs ( 3265 ZSTD_CCtx* cctx, 3266 void* dst, size_t dstCapacity, size_t* dstPos, 3267 const void* src, size_t srcSize, size_t* srcPos, 3268 ZSTD_EndDirective endOp) 3269 { 3270 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; 3271 ZSTD_inBuffer input = { src, srcSize, *srcPos }; 3272 /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ 3273 size_t const cErr = ZSTD_compress_generic(cctx, &output, &input, endOp); 3274 *dstPos = output.pos; 3275 *srcPos = input.pos; 3276 return cErr; 3277 } 3278 3279 3280 /*====== Finalize ======*/ 3281 3282 /*! ZSTD_flushStream() : 3283 * @return : amount of data remaining to flush */ 3284 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 3285 { 3286 ZSTD_inBuffer input = { NULL, 0, 0 }; 3287 if (output->pos > output->size) return ERROR(GENERIC); 3288 CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_flush) ); 3289 return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */ 3290 } 3291 3292 3293 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 3294 { 3295 ZSTD_inBuffer input = { NULL, 0, 0 }; 3296 if (output->pos > output->size) return ERROR(GENERIC); 3297 CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_end) ); 3298 { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; 3299 size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4; 3300 size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize + lastBlockSize + checksumSize; 3301 DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (U32)toFlush); 3302 return toFlush; 3303 } 3304 } 3305 3306 3307 /*-===== Pre-defined compression levels =====-*/ 3308 3309 #define ZSTD_MAX_CLEVEL 22 3310 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } 3311 3312 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { 3313 { /* "default" - guarantees a monotonically increasing memory budget */ 3314 /* W, C, H, S, L, TL, strat */ 3315 { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ 3316 { 19, 13, 14, 1, 7, 1, ZSTD_fast }, /* level 1 */ 3317 { 19, 15, 16, 1, 6, 1, ZSTD_fast }, /* level 2 */ 3318 { 20, 16, 17, 1, 5, 8, ZSTD_dfast }, /* level 3 */ 3319 { 20, 17, 18, 1, 5, 8, ZSTD_dfast }, /* level 4 */ 3320 { 20, 17, 18, 2, 5, 16, ZSTD_greedy }, /* level 5 */ 3321 { 21, 17, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */ 3322 { 21, 18, 19, 3, 5, 16, ZSTD_lazy }, /* level 7 */ 3323 { 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ 3324 { 21, 19, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */ 3325 { 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ 3326 { 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ 3327 { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ 3328 { 22, 21, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */ 3329 { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ 3330 { 22, 22, 22, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ 3331 { 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */ 3332 { 23, 22, 22, 4, 4, 48, ZSTD_btopt }, /* level 17 */ 3333 { 23, 22, 22, 5, 3, 64, ZSTD_btopt }, /* level 18 */ 3334 { 23, 23, 22, 7, 3,128, ZSTD_btopt }, /* level 19 */ 3335 { 25, 25, 23, 7, 3,128, ZSTD_btultra }, /* level 20 */ 3336 { 26, 26, 24, 7, 3,256, ZSTD_btultra }, /* level 21 */ 3337 { 27, 27, 25, 9, 3,512, ZSTD_btultra }, /* level 22 */ 3338 }, 3339 { /* for srcSize <= 256 KB */ 3340 /* W, C, H, S, L, T, strat */ 3341 { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ 3342 { 18, 13, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */ 3343 { 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */ 3344 { 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */ 3345 { 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/ 3346 { 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/ 3347 { 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/ 3348 { 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */ 3349 { 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ 3350 { 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ 3351 { 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ 3352 { 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/ 3353 { 18, 18, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 12.*/ 3354 { 18, 19, 17, 7, 4, 8, ZSTD_btlazy2 }, /* level 13 */ 3355 { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/ 3356 { 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/ 3357 { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/ 3358 { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/ 3359 { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/ 3360 { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/ 3361 { 18, 19, 18, 11, 3,512, ZSTD_btultra }, /* level 20.*/ 3362 { 18, 19, 18, 12, 3,512, ZSTD_btultra }, /* level 21.*/ 3363 { 18, 19, 18, 13, 3,512, ZSTD_btultra }, /* level 22.*/ 3364 }, 3365 { /* for srcSize <= 128 KB */ 3366 /* W, C, H, S, L, T, strat */ 3367 { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* level 0 - not used */ 3368 { 17, 12, 13, 1, 6, 1, ZSTD_fast }, /* level 1 */ 3369 { 17, 13, 16, 1, 5, 1, ZSTD_fast }, /* level 2 */ 3370 { 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */ 3371 { 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */ 3372 { 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */ 3373 { 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */ 3374 { 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */ 3375 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ 3376 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ 3377 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ 3378 { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */ 3379 { 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */ 3380 { 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/ 3381 { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/ 3382 { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/ 3383 { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/ 3384 { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/ 3385 { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/ 3386 { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/ 3387 { 17, 18, 17, 9, 3,256, ZSTD_btultra }, /* level 20.*/ 3388 { 17, 18, 17, 10, 3,256, ZSTD_btultra }, /* level 21.*/ 3389 { 17, 18, 17, 11, 3,512, ZSTD_btultra }, /* level 22.*/ 3390 }, 3391 { /* for srcSize <= 16 KB */ 3392 /* W, C, H, S, L, T, strat */ 3393 { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ 3394 { 14, 14, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */ 3395 { 14, 14, 14, 1, 4, 1, ZSTD_fast }, /* level 2 */ 3396 { 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/ 3397 { 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/ 3398 { 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/ 3399 { 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */ 3400 { 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */ 3401 { 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/ 3402 { 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/ 3403 { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ 3404 { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ 3405 { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ 3406 { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ 3407 { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ 3408 { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ 3409 { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ 3410 { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/ 3411 { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/ 3412 { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/ 3413 { 14, 15, 15, 8, 3,256, ZSTD_btultra }, /* level 20.*/ 3414 { 14, 15, 15, 9, 3,256, ZSTD_btultra }, /* level 21.*/ 3415 { 14, 15, 15, 10, 3,256, ZSTD_btultra }, /* level 22.*/ 3416 }, 3417 }; 3418 3419 /*! ZSTD_getCParams() : 3420 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. 3421 * Size values are optional, provide 0 if not known or unused */ 3422 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) 3423 { 3424 size_t const addedSize = srcSizeHint ? 0 : 500; 3425 U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : (U64)-1; 3426 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ 3427 int row = compressionLevel; 3428 DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel); 3429 if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ 3430 if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ 3431 if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; 3432 { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; 3433 if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ 3434 return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); } 3435 3436 } 3437 3438 /*! ZSTD_getParams() : 3439 * same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`). 3440 * All fields of `ZSTD_frameParameters` are set to default (0) */ 3441 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { 3442 ZSTD_parameters params; 3443 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); 3444 DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); 3445 memset(¶ms, 0, sizeof(params)); 3446 params.cParams = cParams; 3447 params.fParams.contentSizeFlag = 1; 3448 return params; 3449 } 3450