1 /* 2 * Copyright (c) Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 /* This header contains definitions 12 * that shall **only** be used by modules within lib/compress. 13 */ 14 15 #ifndef ZSTD_COMPRESS_H 16 #define ZSTD_COMPRESS_H 17 18 /*-************************************* 19 * Dependencies 20 ***************************************/ 21 #include "../common/zstd_internal.h" 22 #include "zstd_cwksp.h" 23 #ifdef ZSTD_MULTITHREAD 24 # include "zstdmt_compress.h" 25 #endif 26 27 #if defined (__cplusplus) 28 extern "C" { 29 #endif 30 31 /*-************************************* 32 * Constants 33 ***************************************/ 34 #define kSearchStrength 8 35 #define HASH_READ_SIZE 8 36 #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". 37 It could be confused for a real successor at index "1", if sorted as larger than its predecessor. 38 It's not a big deal though : candidate will just be sorted again. 39 Additionally, candidate position 1 will be lost. 40 But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. 41 The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. 42 This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ 43 44 45 /*-************************************* 46 * Context memory management 47 ***************************************/ 48 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; 49 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; 50 51 typedef struct ZSTD_prefixDict_s { 52 const void* dict; 53 size_t dictSize; 54 ZSTD_dictContentType_e dictContentType; 55 } ZSTD_prefixDict; 56 57 typedef struct { 58 void* dictBuffer; 59 void const* dict; 60 size_t dictSize; 61 ZSTD_dictContentType_e dictContentType; 62 ZSTD_CDict* cdict; 63 } ZSTD_localDict; 64 65 typedef struct { 66 HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)]; 67 HUF_repeat repeatMode; 68 } ZSTD_hufCTables_t; 69 70 typedef struct { 71 FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; 72 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; 73 FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; 74 FSE_repeat offcode_repeatMode; 75 FSE_repeat matchlength_repeatMode; 76 FSE_repeat litlength_repeatMode; 77 } ZSTD_fseCTables_t; 78 79 typedef struct { 80 ZSTD_hufCTables_t huf; 81 ZSTD_fseCTables_t fse; 82 } ZSTD_entropyCTables_t; 83 84 /*********************************************** 85 * Entropy buffer statistics structs and funcs * 86 ***********************************************/ 87 /** ZSTD_hufCTablesMetadata_t : 88 * Stores Literals Block Type for a super-block in hType, and 89 * huffman tree description in hufDesBuffer. 90 * hufDesSize refers to the size of huffman tree description in bytes. 91 * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ 92 typedef struct { 93 symbolEncodingType_e hType; 94 BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; 95 size_t hufDesSize; 96 } ZSTD_hufCTablesMetadata_t; 97 98 /** ZSTD_fseCTablesMetadata_t : 99 * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and 100 * fse tables in fseTablesBuffer. 101 * fseTablesSize refers to the size of fse tables in bytes. 102 * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ 103 typedef struct { 104 symbolEncodingType_e llType; 105 symbolEncodingType_e ofType; 106 symbolEncodingType_e mlType; 107 BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; 108 size_t fseTablesSize; 109 size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ 110 } ZSTD_fseCTablesMetadata_t; 111 112 typedef struct { 113 ZSTD_hufCTablesMetadata_t hufMetadata; 114 ZSTD_fseCTablesMetadata_t fseMetadata; 115 } ZSTD_entropyCTablesMetadata_t; 116 117 /** ZSTD_buildBlockEntropyStats() : 118 * Builds entropy for the block. 119 * @return : 0 on success or error code */ 120 size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, 121 const ZSTD_entropyCTables_t* prevEntropy, 122 ZSTD_entropyCTables_t* nextEntropy, 123 const ZSTD_CCtx_params* cctxParams, 124 ZSTD_entropyCTablesMetadata_t* entropyMetadata, 125 void* workspace, size_t wkspSize); 126 127 /********************************* 128 * Compression internals structs * 129 *********************************/ 130 131 typedef struct { 132 U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */ 133 U32 len; /* Raw length of match */ 134 } ZSTD_match_t; 135 136 typedef struct { 137 U32 offset; /* Offset of sequence */ 138 U32 litLength; /* Length of literals prior to match */ 139 U32 matchLength; /* Raw length of match */ 140 } rawSeq; 141 142 typedef struct { 143 rawSeq* seq; /* The start of the sequences */ 144 size_t pos; /* The index in seq where reading stopped. pos <= size. */ 145 size_t posInSequence; /* The position within the sequence at seq[pos] where reading 146 stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ 147 size_t size; /* The number of sequences. <= capacity. */ 148 size_t capacity; /* The capacity starting from `seq` pointer */ 149 } rawSeqStore_t; 150 151 UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; 152 153 typedef struct { 154 int price; 155 U32 off; 156 U32 mlen; 157 U32 litlen; 158 U32 rep[ZSTD_REP_NUM]; 159 } ZSTD_optimal_t; 160 161 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; 162 163 typedef struct { 164 /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ 165 unsigned* litFreq; /* table of literals statistics, of size 256 */ 166 unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ 167 unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ 168 unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ 169 ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ 170 ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ 171 172 U32 litSum; /* nb of literals */ 173 U32 litLengthSum; /* nb of litLength codes */ 174 U32 matchLengthSum; /* nb of matchLength codes */ 175 U32 offCodeSum; /* nb of offset codes */ 176 U32 litSumBasePrice; /* to compare to log2(litfreq) */ 177 U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ 178 U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ 179 U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ 180 ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ 181 const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ 182 ZSTD_paramSwitch_e literalCompressionMode; 183 } optState_t; 184 185 typedef struct { 186 ZSTD_entropyCTables_t entropy; 187 U32 rep[ZSTD_REP_NUM]; 188 } ZSTD_compressedBlockState_t; 189 190 typedef struct { 191 BYTE const* nextSrc; /* next block here to continue on current prefix */ 192 BYTE const* base; /* All regular indexes relative to this position */ 193 BYTE const* dictBase; /* extDict indexes relative to this position */ 194 U32 dictLimit; /* below that point, need extDict */ 195 U32 lowLimit; /* below that point, no more valid data */ 196 U32 nbOverflowCorrections; /* Number of times overflow correction has run since 197 * ZSTD_window_init(). Useful for debugging coredumps 198 * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY. 199 */ 200 } ZSTD_window_t; 201 202 #define ZSTD_WINDOW_START_INDEX 2 203 204 typedef struct ZSTD_matchState_t ZSTD_matchState_t; 205 206 #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ 207 208 struct ZSTD_matchState_t { 209 ZSTD_window_t window; /* State for window round buffer management */ 210 U32 loadedDictEnd; /* index of end of dictionary, within context's referential. 211 * When loadedDictEnd != 0, a dictionary is in use, and still valid. 212 * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. 213 * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). 214 * When dict referential is copied into active context (i.e. not attached), 215 * loadedDictEnd == dictSize, since referential starts from zero. 216 */ 217 U32 nextToUpdate; /* index from which to continue table update */ 218 U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ 219 220 U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ 221 U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ 222 U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ 223 224 U32* hashTable; 225 U32* hashTable3; 226 U32* chainTable; 227 228 U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ 229 230 int dedicatedDictSearch; /* Indicates whether this matchState is using the 231 * dedicated dictionary search structure. 232 */ 233 optState_t opt; /* optimal parser state */ 234 const ZSTD_matchState_t* dictMatchState; 235 ZSTD_compressionParameters cParams; 236 const rawSeqStore_t* ldmSeqStore; 237 }; 238 239 typedef struct { 240 ZSTD_compressedBlockState_t* prevCBlock; 241 ZSTD_compressedBlockState_t* nextCBlock; 242 ZSTD_matchState_t matchState; 243 } ZSTD_blockState_t; 244 245 typedef struct { 246 U32 offset; 247 U32 checksum; 248 } ldmEntry_t; 249 250 typedef struct { 251 BYTE const* split; 252 U32 hash; 253 U32 checksum; 254 ldmEntry_t* bucket; 255 } ldmMatchCandidate_t; 256 257 #define LDM_BATCH_SIZE 64 258 259 typedef struct { 260 ZSTD_window_t window; /* State for the window round buffer management */ 261 ldmEntry_t* hashTable; 262 U32 loadedDictEnd; 263 BYTE* bucketOffsets; /* Next position in bucket to insert entry */ 264 size_t splitIndices[LDM_BATCH_SIZE]; 265 ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; 266 } ldmState_t; 267 268 typedef struct { 269 ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ 270 U32 hashLog; /* Log size of hashTable */ 271 U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ 272 U32 minMatchLength; /* Minimum match length */ 273 U32 hashRateLog; /* Log number of entries to skip */ 274 U32 windowLog; /* Window log for the LDM */ 275 } ldmParams_t; 276 277 typedef struct { 278 int collectSequences; 279 ZSTD_Sequence* seqStart; 280 size_t seqIndex; 281 size_t maxSequences; 282 } SeqCollector; 283 284 struct ZSTD_CCtx_params_s { 285 ZSTD_format_e format; 286 ZSTD_compressionParameters cParams; 287 ZSTD_frameParameters fParams; 288 289 int compressionLevel; 290 int forceWindow; /* force back-references to respect limit of 291 * 1<<wLog, even for dictionary */ 292 size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize. 293 * No target when targetCBlockSize == 0. 294 * There is no guarantee on compressed block size */ 295 int srcSizeHint; /* User's best guess of source size. 296 * Hint is not valid when srcSizeHint == 0. 297 * There is no guarantee that hint is close to actual source size */ 298 299 ZSTD_dictAttachPref_e attachDictPref; 300 ZSTD_paramSwitch_e literalCompressionMode; 301 302 /* Multithreading: used to pass parameters to mtctx */ 303 int nbWorkers; 304 size_t jobSize; 305 int overlapLog; 306 int rsyncable; 307 308 /* Long distance matching parameters */ 309 ldmParams_t ldmParams; 310 311 /* Dedicated dict search algorithm trigger */ 312 int enableDedicatedDictSearch; 313 314 /* Input/output buffer modes */ 315 ZSTD_bufferMode_e inBufferMode; 316 ZSTD_bufferMode_e outBufferMode; 317 318 /* Sequence compression API */ 319 ZSTD_sequenceFormat_e blockDelimiters; 320 int validateSequences; 321 322 /* Block splitting */ 323 ZSTD_paramSwitch_e useBlockSplitter; 324 325 /* Param for deciding whether to use row-based matchfinder */ 326 ZSTD_paramSwitch_e useRowMatchFinder; 327 328 /* Always load a dictionary in ext-dict mode (not prefix mode)? */ 329 int deterministicRefPrefix; 330 331 /* Internal use, for createCCtxParams() and freeCCtxParams() only */ 332 ZSTD_customMem customMem; 333 }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ 334 335 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) 336 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE) 337 338 /** 339 * Indicates whether this compression proceeds directly from user-provided 340 * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or 341 * whether the context needs to buffer the input/output (ZSTDb_buffered). 342 */ 343 typedef enum { 344 ZSTDb_not_buffered, 345 ZSTDb_buffered 346 } ZSTD_buffered_policy_e; 347 348 /** 349 * Struct that contains all elements of block splitter that should be allocated 350 * in a wksp. 351 */ 352 #define ZSTD_MAX_NB_BLOCK_SPLITS 196 353 typedef struct { 354 seqStore_t fullSeqStoreChunk; 355 seqStore_t firstHalfSeqStore; 356 seqStore_t secondHalfSeqStore; 357 seqStore_t currSeqStore; 358 seqStore_t nextSeqStore; 359 360 U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS]; 361 ZSTD_entropyCTablesMetadata_t entropyMetadata; 362 } ZSTD_blockSplitCtx; 363 364 struct ZSTD_CCtx_s { 365 ZSTD_compressionStage_e stage; 366 int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ 367 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ 368 ZSTD_CCtx_params requestedParams; 369 ZSTD_CCtx_params appliedParams; 370 ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */ 371 U32 dictID; 372 size_t dictContentSize; 373 374 ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ 375 size_t blockSize; 376 unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ 377 unsigned long long consumedSrcSize; 378 unsigned long long producedCSize; 379 XXH64_state_t xxhState; 380 ZSTD_customMem customMem; 381 ZSTD_threadPool* pool; 382 size_t staticSize; 383 SeqCollector seqCollector; 384 int isFirstBlock; 385 int initialized; 386 387 seqStore_t seqStore; /* sequences storage ptrs */ 388 ldmState_t ldmState; /* long distance matching state */ 389 rawSeq* ldmSequences; /* Storage for the ldm output sequences */ 390 size_t maxNbLdmSequences; 391 rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ 392 ZSTD_blockState_t blockState; 393 U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */ 394 395 /* Whether we are streaming or not */ 396 ZSTD_buffered_policy_e bufferedPolicy; 397 398 /* streaming */ 399 char* inBuff; 400 size_t inBuffSize; 401 size_t inToCompress; 402 size_t inBuffPos; 403 size_t inBuffTarget; 404 char* outBuff; 405 size_t outBuffSize; 406 size_t outBuffContentSize; 407 size_t outBuffFlushedSize; 408 ZSTD_cStreamStage streamStage; 409 U32 frameEnded; 410 411 /* Stable in/out buffer verification */ 412 ZSTD_inBuffer expectedInBuffer; 413 size_t expectedOutBufferSize; 414 415 /* Dictionary */ 416 ZSTD_localDict localDict; 417 const ZSTD_CDict* cdict; 418 ZSTD_prefixDict prefixDict; /* single-usage dictionary */ 419 420 /* Multi-threading */ 421 #ifdef ZSTD_MULTITHREAD 422 ZSTDMT_CCtx* mtctx; 423 #endif 424 425 /* Tracing */ 426 #if ZSTD_TRACE 427 ZSTD_TraceCtx traceCtx; 428 #endif 429 430 /* Workspace for block splitter */ 431 ZSTD_blockSplitCtx blockSplitCtx; 432 }; 433 434 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; 435 436 typedef enum { 437 ZSTD_noDict = 0, 438 ZSTD_extDict = 1, 439 ZSTD_dictMatchState = 2, 440 ZSTD_dedicatedDictSearch = 3 441 } ZSTD_dictMode_e; 442 443 typedef enum { 444 ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict. 445 * In this mode we use both the srcSize and the dictSize 446 * when selecting and adjusting parameters. 447 */ 448 ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch. 449 * In this mode we only take the srcSize into account when selecting 450 * and adjusting parameters. 451 */ 452 ZSTD_cpm_createCDict = 2, /* Creating a CDict. 453 * In this mode we take both the source size and the dictionary size 454 * into account when selecting and adjusting the parameters. 455 */ 456 ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. 457 * We don't know what these parameters are for. We default to the legacy 458 * behavior of taking both the source size and the dict size into account 459 * when selecting and adjusting parameters. 460 */ 461 } ZSTD_cParamMode_e; 462 463 typedef size_t (*ZSTD_blockCompressor) ( 464 ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 465 void const* src, size_t srcSize); 466 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); 467 468 469 MEM_STATIC U32 ZSTD_LLcode(U32 litLength) 470 { 471 static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, 472 8, 9, 10, 11, 12, 13, 14, 15, 473 16, 16, 17, 17, 18, 18, 19, 19, 474 20, 20, 20, 20, 21, 21, 21, 21, 475 22, 22, 22, 22, 22, 22, 22, 22, 476 23, 23, 23, 23, 23, 23, 23, 23, 477 24, 24, 24, 24, 24, 24, 24, 24, 478 24, 24, 24, 24, 24, 24, 24, 24 }; 479 static const U32 LL_deltaCode = 19; 480 return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; 481 } 482 483 /* ZSTD_MLcode() : 484 * note : mlBase = matchLength - MINMATCH; 485 * because it's the format it's stored in seqStore->sequences */ 486 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) 487 { 488 static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 489 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 490 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 491 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 492 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 493 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 494 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 495 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; 496 static const U32 ML_deltaCode = 36; 497 return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; 498 } 499 500 /* ZSTD_cParam_withinBounds: 501 * @return 1 if value is within cParam bounds, 502 * 0 otherwise */ 503 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) 504 { 505 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); 506 if (ZSTD_isError(bounds.error)) return 0; 507 if (value < bounds.lowerBound) return 0; 508 if (value > bounds.upperBound) return 0; 509 return 1; 510 } 511 512 /* ZSTD_noCompressBlock() : 513 * Writes uncompressed block to dst buffer from given src. 514 * Returns the size of the block */ 515 MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) 516 { 517 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); 518 RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, 519 dstSize_tooSmall, "dst buf too small for uncompressed block"); 520 MEM_writeLE24(dst, cBlockHeader24); 521 ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); 522 return ZSTD_blockHeaderSize + srcSize; 523 } 524 525 MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) 526 { 527 BYTE* const op = (BYTE*)dst; 528 U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); 529 RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); 530 MEM_writeLE24(op, cBlockHeader); 531 op[3] = src; 532 return 4; 533 } 534 535 536 /* ZSTD_minGain() : 537 * minimum compression required 538 * to generate a compress block or a compressed literals section. 539 * note : use same formula for both situations */ 540 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) 541 { 542 U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; 543 ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); 544 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); 545 return (srcSize >> minlog) + 2; 546 } 547 548 MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams) 549 { 550 switch (cctxParams->literalCompressionMode) { 551 case ZSTD_ps_enable: 552 return 0; 553 case ZSTD_ps_disable: 554 return 1; 555 default: 556 assert(0 /* impossible: pre-validated */); 557 ZSTD_FALLTHROUGH; 558 case ZSTD_ps_auto: 559 return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); 560 } 561 } 562 563 /*! ZSTD_safecopyLiterals() : 564 * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. 565 * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single 566 * large copies. 567 */ 568 static void 569 ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) 570 { 571 assert(iend > ilimit_w); 572 if (ip <= ilimit_w) { 573 ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); 574 op += ilimit_w - ip; 575 ip = ilimit_w; 576 } 577 while (ip < iend) *op++ = *ip++; 578 } 579 580 #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) 581 #define STORE_REPCODE_1 STORE_REPCODE(1) 582 #define STORE_REPCODE_2 STORE_REPCODE(2) 583 #define STORE_REPCODE_3 STORE_REPCODE(3) 584 #define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1) 585 #define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE) 586 #define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE) 587 #define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE) 588 #define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE) 589 #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */ 590 #define STORED_TO_OFFBASE(o) ((o)+1) 591 #define OFFBASE_TO_STORED(o) ((o)-1) 592 593 /*! ZSTD_storeSeq() : 594 * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. 595 * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET(). 596 * @matchLength : must be >= MINMATCH 597 * Allowed to overread literals up to litLimit. 598 */ 599 HINT_INLINE UNUSED_ATTR void 600 ZSTD_storeSeq(seqStore_t* seqStorePtr, 601 size_t litLength, const BYTE* literals, const BYTE* litLimit, 602 U32 offBase_minus1, 603 size_t matchLength) 604 { 605 BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; 606 BYTE const* const litEnd = literals + litLength; 607 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) 608 static const BYTE* g_start = NULL; 609 if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ 610 { U32 const pos = (U32)((const BYTE*)literals - g_start); 611 DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", 612 pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1); 613 } 614 #endif 615 assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); 616 /* copy Literals */ 617 assert(seqStorePtr->maxNbLit <= 128 KB); 618 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); 619 assert(literals + litLength <= litLimit); 620 if (litEnd <= litLimit_w) { 621 /* Common case we can use wildcopy. 622 * First copy 16 bytes, because literals are likely short. 623 */ 624 assert(WILDCOPY_OVERLENGTH >= 16); 625 ZSTD_copy16(seqStorePtr->lit, literals); 626 if (litLength > 16) { 627 ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); 628 } 629 } else { 630 ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); 631 } 632 seqStorePtr->lit += litLength; 633 634 /* literal Length */ 635 if (litLength>0xFFFF) { 636 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ 637 seqStorePtr->longLengthType = ZSTD_llt_literalLength; 638 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 639 } 640 seqStorePtr->sequences[0].litLength = (U16)litLength; 641 642 /* match offset */ 643 seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1); 644 645 /* match Length */ 646 assert(matchLength >= MINMATCH); 647 { size_t const mlBase = matchLength - MINMATCH; 648 if (mlBase>0xFFFF) { 649 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ 650 seqStorePtr->longLengthType = ZSTD_llt_matchLength; 651 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 652 } 653 seqStorePtr->sequences[0].mlBase = (U16)mlBase; 654 } 655 656 seqStorePtr->sequences++; 657 } 658 659 /* ZSTD_updateRep() : 660 * updates in-place @rep (array of repeat offsets) 661 * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq() 662 */ 663 MEM_STATIC void 664 ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) 665 { 666 if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */ 667 rep[2] = rep[1]; 668 rep[1] = rep[0]; 669 rep[0] = STORED_OFFSET(offBase_minus1); 670 } else { /* repcode */ 671 U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0; 672 if (repCode > 0) { /* note : if repCode==0, no change */ 673 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; 674 rep[2] = (repCode >= 2) ? rep[1] : rep[2]; 675 rep[1] = rep[0]; 676 rep[0] = currentOffset; 677 } else { /* repCode == 0 */ 678 /* nothing to do */ 679 } 680 } 681 } 682 683 typedef struct repcodes_s { 684 U32 rep[3]; 685 } repcodes_t; 686 687 MEM_STATIC repcodes_t 688 ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) 689 { 690 repcodes_t newReps; 691 ZSTD_memcpy(&newReps, rep, sizeof(newReps)); 692 ZSTD_updateRep(newReps.rep, offBase_minus1, ll0); 693 return newReps; 694 } 695 696 697 /*-************************************* 698 * Match length counter 699 ***************************************/ 700 static unsigned ZSTD_NbCommonBytes (size_t val) 701 { 702 if (MEM_isLittleEndian()) { 703 if (MEM_64bits()) { 704 # if defined(_MSC_VER) && defined(_WIN64) 705 # if STATIC_BMI2 706 return _tzcnt_u64(val) >> 3; 707 # else 708 if (val != 0) { 709 unsigned long r; 710 _BitScanForward64(&r, (U64)val); 711 return (unsigned)(r >> 3); 712 } else { 713 /* Should not reach this code path */ 714 __assume(0); 715 } 716 # endif 717 # elif defined(__GNUC__) && (__GNUC__ >= 4) 718 return (__builtin_ctzll((U64)val) >> 3); 719 # else 720 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 721 0, 3, 1, 3, 1, 4, 2, 7, 722 0, 2, 3, 6, 1, 5, 3, 5, 723 1, 3, 4, 4, 2, 5, 6, 7, 724 7, 0, 1, 2, 3, 3, 4, 6, 725 2, 6, 5, 5, 3, 4, 5, 6, 726 7, 1, 2, 4, 6, 4, 4, 5, 727 7, 2, 6, 5, 7, 6, 7, 7 }; 728 return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; 729 # endif 730 } else { /* 32 bits */ 731 # if defined(_MSC_VER) 732 if (val != 0) { 733 unsigned long r; 734 _BitScanForward(&r, (U32)val); 735 return (unsigned)(r >> 3); 736 } else { 737 /* Should not reach this code path */ 738 __assume(0); 739 } 740 # elif defined(__GNUC__) && (__GNUC__ >= 3) 741 return (__builtin_ctz((U32)val) >> 3); 742 # else 743 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 744 3, 2, 2, 1, 3, 2, 0, 1, 745 3, 3, 1, 2, 2, 2, 2, 0, 746 3, 1, 2, 0, 1, 0, 1, 1 }; 747 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 748 # endif 749 } 750 } else { /* Big Endian CPU */ 751 if (MEM_64bits()) { 752 # if defined(_MSC_VER) && defined(_WIN64) 753 # if STATIC_BMI2 754 return _lzcnt_u64(val) >> 3; 755 # else 756 if (val != 0) { 757 unsigned long r; 758 _BitScanReverse64(&r, (U64)val); 759 return (unsigned)(r >> 3); 760 } else { 761 /* Should not reach this code path */ 762 __assume(0); 763 } 764 # endif 765 # elif defined(__GNUC__) && (__GNUC__ >= 4) 766 return (__builtin_clzll(val) >> 3); 767 # else 768 unsigned r; 769 const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ 770 if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } 771 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 772 r += (!val); 773 return r; 774 # endif 775 } else { /* 32 bits */ 776 # if defined(_MSC_VER) 777 if (val != 0) { 778 unsigned long r; 779 _BitScanReverse(&r, (unsigned long)val); 780 return (unsigned)(r >> 3); 781 } else { 782 /* Should not reach this code path */ 783 __assume(0); 784 } 785 # elif defined(__GNUC__) && (__GNUC__ >= 3) 786 return (__builtin_clz((U32)val) >> 3); 787 # else 788 unsigned r; 789 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 790 r += (!val); 791 return r; 792 # endif 793 } } 794 } 795 796 797 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) 798 { 799 const BYTE* const pStart = pIn; 800 const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); 801 802 if (pIn < pInLoopLimit) { 803 { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); 804 if (diff) return ZSTD_NbCommonBytes(diff); } 805 pIn+=sizeof(size_t); pMatch+=sizeof(size_t); 806 while (pIn < pInLoopLimit) { 807 size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); 808 if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } 809 pIn += ZSTD_NbCommonBytes(diff); 810 return (size_t)(pIn - pStart); 811 } } 812 if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } 813 if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } 814 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; 815 return (size_t)(pIn - pStart); 816 } 817 818 /** ZSTD_count_2segments() : 819 * can count match length with `ip` & `match` in 2 different segments. 820 * convention : on reaching mEnd, match count continue starting from iStart 821 */ 822 MEM_STATIC size_t 823 ZSTD_count_2segments(const BYTE* ip, const BYTE* match, 824 const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart) 825 { 826 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd); 827 size_t const matchLength = ZSTD_count(ip, match, vEnd); 828 if (match + matchLength != mEnd) return matchLength; 829 DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength); 830 DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match); 831 DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip); 832 DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart); 833 DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd)); 834 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd); 835 } 836 837 838 /*-************************************* 839 * Hashes 840 ***************************************/ 841 static const U32 prime3bytes = 506832829U; 842 static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } 843 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ 844 845 static const U32 prime4bytes = 2654435761U; 846 static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } 847 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } 848 849 static const U64 prime5bytes = 889523592379ULL; 850 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } 851 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } 852 853 static const U64 prime6bytes = 227718039650203ULL; 854 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } 855 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } 856 857 static const U64 prime7bytes = 58295818150454627ULL; 858 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } 859 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } 860 861 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; 862 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } 863 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } 864 865 MEM_STATIC FORCE_INLINE_ATTR 866 size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) 867 { 868 switch(mls) 869 { 870 default: 871 case 4: return ZSTD_hash4Ptr(p, hBits); 872 case 5: return ZSTD_hash5Ptr(p, hBits); 873 case 6: return ZSTD_hash6Ptr(p, hBits); 874 case 7: return ZSTD_hash7Ptr(p, hBits); 875 case 8: return ZSTD_hash8Ptr(p, hBits); 876 } 877 } 878 879 /** ZSTD_ipow() : 880 * Return base^exponent. 881 */ 882 static U64 ZSTD_ipow(U64 base, U64 exponent) 883 { 884 U64 power = 1; 885 while (exponent) { 886 if (exponent & 1) power *= base; 887 exponent >>= 1; 888 base *= base; 889 } 890 return power; 891 } 892 893 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10 894 895 /** ZSTD_rollingHash_append() : 896 * Add the buffer to the hash value. 897 */ 898 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) 899 { 900 BYTE const* istart = (BYTE const*)buf; 901 size_t pos; 902 for (pos = 0; pos < size; ++pos) { 903 hash *= prime8bytes; 904 hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; 905 } 906 return hash; 907 } 908 909 /** ZSTD_rollingHash_compute() : 910 * Compute the rolling hash value of the buffer. 911 */ 912 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) 913 { 914 return ZSTD_rollingHash_append(0, buf, size); 915 } 916 917 /** ZSTD_rollingHash_primePower() : 918 * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash 919 * over a window of length bytes. 920 */ 921 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) 922 { 923 return ZSTD_ipow(prime8bytes, length - 1); 924 } 925 926 /** ZSTD_rollingHash_rotate() : 927 * Rotate the rolling hash by one byte. 928 */ 929 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) 930 { 931 hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; 932 hash *= prime8bytes; 933 hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; 934 return hash; 935 } 936 937 /*-************************************* 938 * Round buffer management 939 ***************************************/ 940 #if (ZSTD_WINDOWLOG_MAX_64 > 31) 941 # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" 942 #endif 943 /* Max current allowed */ 944 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) 945 /* Maximum chunk size before overflow correction needs to be called again */ 946 #define ZSTD_CHUNKSIZE_MAX \ 947 ( ((U32)-1) /* Maximum ending current index */ \ 948 - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ 949 950 /** 951 * ZSTD_window_clear(): 952 * Clears the window containing the history by simply setting it to empty. 953 */ 954 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) 955 { 956 size_t const endT = (size_t)(window->nextSrc - window->base); 957 U32 const end = (U32)endT; 958 959 window->lowLimit = end; 960 window->dictLimit = end; 961 } 962 963 MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window) 964 { 965 return window.dictLimit == ZSTD_WINDOW_START_INDEX && 966 window.lowLimit == ZSTD_WINDOW_START_INDEX && 967 (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX; 968 } 969 970 /** 971 * ZSTD_window_hasExtDict(): 972 * Returns non-zero if the window has a non-empty extDict. 973 */ 974 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) 975 { 976 return window.lowLimit < window.dictLimit; 977 } 978 979 /** 980 * ZSTD_matchState_dictMode(): 981 * Inspects the provided matchState and figures out what dictMode should be 982 * passed to the compressor. 983 */ 984 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) 985 { 986 return ZSTD_window_hasExtDict(ms->window) ? 987 ZSTD_extDict : 988 ms->dictMatchState != NULL ? 989 (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : 990 ZSTD_noDict; 991 } 992 993 /* Defining this macro to non-zero tells zstd to run the overflow correction 994 * code much more frequently. This is very inefficient, and should only be 995 * used for tests and fuzzers. 996 */ 997 #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 998 # ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 999 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1 1000 # else 1001 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0 1002 # endif 1003 #endif 1004 1005 /** 1006 * ZSTD_window_canOverflowCorrect(): 1007 * Returns non-zero if the indices are large enough for overflow correction 1008 * to work correctly without impacting compression ratio. 1009 */ 1010 MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window, 1011 U32 cycleLog, 1012 U32 maxDist, 1013 U32 loadedDictEnd, 1014 void const* src) 1015 { 1016 U32 const cycleSize = 1u << cycleLog; 1017 U32 const curr = (U32)((BYTE const*)src - window.base); 1018 U32 const minIndexToOverflowCorrect = cycleSize 1019 + MAX(maxDist, cycleSize) 1020 + ZSTD_WINDOW_START_INDEX; 1021 1022 /* Adjust the min index to backoff the overflow correction frequency, 1023 * so we don't waste too much CPU in overflow correction. If this 1024 * computation overflows we don't really care, we just need to make 1025 * sure it is at least minIndexToOverflowCorrect. 1026 */ 1027 U32 const adjustment = window.nbOverflowCorrections + 1; 1028 U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment, 1029 minIndexToOverflowCorrect); 1030 U32 const indexLargeEnough = curr > adjustedIndex; 1031 1032 /* Only overflow correct early if the dictionary is invalidated already, 1033 * so we don't hurt compression ratio. 1034 */ 1035 U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd; 1036 1037 return indexLargeEnough && dictionaryInvalidated; 1038 } 1039 1040 /** 1041 * ZSTD_window_needOverflowCorrection(): 1042 * Returns non-zero if the indices are getting too large and need overflow 1043 * protection. 1044 */ 1045 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, 1046 U32 cycleLog, 1047 U32 maxDist, 1048 U32 loadedDictEnd, 1049 void const* src, 1050 void const* srcEnd) 1051 { 1052 U32 const curr = (U32)((BYTE const*)srcEnd - window.base); 1053 if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { 1054 if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) { 1055 return 1; 1056 } 1057 } 1058 return curr > ZSTD_CURRENT_MAX; 1059 } 1060 1061 /** 1062 * ZSTD_window_correctOverflow(): 1063 * Reduces the indices to protect from index overflow. 1064 * Returns the correction made to the indices, which must be applied to every 1065 * stored index. 1066 * 1067 * The least significant cycleLog bits of the indices must remain the same, 1068 * which may be 0. Every index up to maxDist in the past must be valid. 1069 */ 1070 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, 1071 U32 maxDist, void const* src) 1072 { 1073 /* preemptive overflow correction: 1074 * 1. correction is large enough: 1075 * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog 1076 * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog 1077 * 1078 * current - newCurrent 1079 * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog) 1080 * > (3<<29) - (1<<chainLog) 1081 * > (3<<29) - (1<<30) (NOTE: chainLog <= 30) 1082 * > 1<<29 1083 * 1084 * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: 1085 * After correction, current is less than (1<<chainLog + 1<<windowLog). 1086 * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t. 1087 * In 32-bit mode we are safe, because (chainLog <= 29), so 1088 * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32. 1089 * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32: 1090 * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32. 1091 */ 1092 U32 const cycleSize = 1u << cycleLog; 1093 U32 const cycleMask = cycleSize - 1; 1094 U32 const curr = (U32)((BYTE const*)src - window->base); 1095 U32 const currentCycle = curr & cycleMask; 1096 /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */ 1097 U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX 1098 ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX) 1099 : 0; 1100 U32 const newCurrent = currentCycle 1101 + currentCycleCorrection 1102 + MAX(maxDist, cycleSize); 1103 U32 const correction = curr - newCurrent; 1104 /* maxDist must be a power of two so that: 1105 * (newCurrent & cycleMask) == (curr & cycleMask) 1106 * This is required to not corrupt the chains / binary tree. 1107 */ 1108 assert((maxDist & (maxDist - 1)) == 0); 1109 assert((curr & cycleMask) == (newCurrent & cycleMask)); 1110 assert(curr > newCurrent); 1111 if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { 1112 /* Loose bound, should be around 1<<29 (see above) */ 1113 assert(correction > 1<<28); 1114 } 1115 1116 window->base += correction; 1117 window->dictBase += correction; 1118 if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) { 1119 window->lowLimit = ZSTD_WINDOW_START_INDEX; 1120 } else { 1121 window->lowLimit -= correction; 1122 } 1123 if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) { 1124 window->dictLimit = ZSTD_WINDOW_START_INDEX; 1125 } else { 1126 window->dictLimit -= correction; 1127 } 1128 1129 /* Ensure we can still reference the full window. */ 1130 assert(newCurrent >= maxDist); 1131 assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX); 1132 /* Ensure that lowLimit and dictLimit didn't underflow. */ 1133 assert(window->lowLimit <= newCurrent); 1134 assert(window->dictLimit <= newCurrent); 1135 1136 ++window->nbOverflowCorrections; 1137 1138 DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, 1139 window->lowLimit); 1140 return correction; 1141 } 1142 1143 /** 1144 * ZSTD_window_enforceMaxDist(): 1145 * Updates lowLimit so that: 1146 * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd 1147 * 1148 * It ensures index is valid as long as index >= lowLimit. 1149 * This must be called before a block compression call. 1150 * 1151 * loadedDictEnd is only defined if a dictionary is in use for current compression. 1152 * As the name implies, loadedDictEnd represents the index at end of dictionary. 1153 * The value lies within context's referential, it can be directly compared to blockEndIdx. 1154 * 1155 * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. 1156 * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. 1157 * This is because dictionaries are allowed to be referenced fully 1158 * as long as the last byte of the dictionary is in the window. 1159 * Once input has progressed beyond window size, dictionary cannot be referenced anymore. 1160 * 1161 * In normal dict mode, the dictionary lies between lowLimit and dictLimit. 1162 * In dictMatchState mode, lowLimit and dictLimit are the same, 1163 * and the dictionary is below them. 1164 * forceWindow and dictMatchState are therefore incompatible. 1165 */ 1166 MEM_STATIC void 1167 ZSTD_window_enforceMaxDist(ZSTD_window_t* window, 1168 const void* blockEnd, 1169 U32 maxDist, 1170 U32* loadedDictEndPtr, 1171 const ZSTD_matchState_t** dictMatchStatePtr) 1172 { 1173 U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); 1174 U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; 1175 DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", 1176 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); 1177 1178 /* - When there is no dictionary : loadedDictEnd == 0. 1179 In which case, the test (blockEndIdx > maxDist) is merely to avoid 1180 overflowing next operation `newLowLimit = blockEndIdx - maxDist`. 1181 - When there is a standard dictionary : 1182 Index referential is copied from the dictionary, 1183 which means it starts from 0. 1184 In which case, loadedDictEnd == dictSize, 1185 and it makes sense to compare `blockEndIdx > maxDist + dictSize` 1186 since `blockEndIdx` also starts from zero. 1187 - When there is an attached dictionary : 1188 loadedDictEnd is expressed within the referential of the context, 1189 so it can be directly compared against blockEndIdx. 1190 */ 1191 if (blockEndIdx > maxDist + loadedDictEnd) { 1192 U32 const newLowLimit = blockEndIdx - maxDist; 1193 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; 1194 if (window->dictLimit < window->lowLimit) { 1195 DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", 1196 (unsigned)window->dictLimit, (unsigned)window->lowLimit); 1197 window->dictLimit = window->lowLimit; 1198 } 1199 /* On reaching window size, dictionaries are invalidated */ 1200 if (loadedDictEndPtr) *loadedDictEndPtr = 0; 1201 if (dictMatchStatePtr) *dictMatchStatePtr = NULL; 1202 } 1203 } 1204 1205 /* Similar to ZSTD_window_enforceMaxDist(), 1206 * but only invalidates dictionary 1207 * when input progresses beyond window size. 1208 * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) 1209 * loadedDictEnd uses same referential as window->base 1210 * maxDist is the window size */ 1211 MEM_STATIC void 1212 ZSTD_checkDictValidity(const ZSTD_window_t* window, 1213 const void* blockEnd, 1214 U32 maxDist, 1215 U32* loadedDictEndPtr, 1216 const ZSTD_matchState_t** dictMatchStatePtr) 1217 { 1218 assert(loadedDictEndPtr != NULL); 1219 assert(dictMatchStatePtr != NULL); 1220 { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); 1221 U32 const loadedDictEnd = *loadedDictEndPtr; 1222 DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", 1223 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); 1224 assert(blockEndIdx >= loadedDictEnd); 1225 1226 if (blockEndIdx > loadedDictEnd + maxDist) { 1227 /* On reaching window size, dictionaries are invalidated. 1228 * For simplification, if window size is reached anywhere within next block, 1229 * the dictionary is invalidated for the full block. 1230 */ 1231 DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); 1232 *loadedDictEndPtr = 0; 1233 *dictMatchStatePtr = NULL; 1234 } else { 1235 if (*loadedDictEndPtr != 0) { 1236 DEBUGLOG(6, "dictionary considered valid for current block"); 1237 } } } 1238 } 1239 1240 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { 1241 ZSTD_memset(window, 0, sizeof(*window)); 1242 window->base = (BYTE const*)" "; 1243 window->dictBase = (BYTE const*)" "; 1244 ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */ 1245 window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */ 1246 window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */ 1247 window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */ 1248 window->nbOverflowCorrections = 0; 1249 } 1250 1251 /** 1252 * ZSTD_window_update(): 1253 * Updates the window by appending [src, src + srcSize) to the window. 1254 * If it is not contiguous, the current prefix becomes the extDict, and we 1255 * forget about the extDict. Handles overlap of the prefix and extDict. 1256 * Returns non-zero if the segment is contiguous. 1257 */ 1258 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, 1259 void const* src, size_t srcSize, 1260 int forceNonContiguous) 1261 { 1262 BYTE const* const ip = (BYTE const*)src; 1263 U32 contiguous = 1; 1264 DEBUGLOG(5, "ZSTD_window_update"); 1265 if (srcSize == 0) 1266 return contiguous; 1267 assert(window->base != NULL); 1268 assert(window->dictBase != NULL); 1269 /* Check if blocks follow each other */ 1270 if (src != window->nextSrc || forceNonContiguous) { 1271 /* not contiguous */ 1272 size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); 1273 DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); 1274 window->lowLimit = window->dictLimit; 1275 assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ 1276 window->dictLimit = (U32)distanceFromBase; 1277 window->dictBase = window->base; 1278 window->base = ip - distanceFromBase; 1279 /* ms->nextToUpdate = window->dictLimit; */ 1280 if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ 1281 contiguous = 0; 1282 } 1283 window->nextSrc = ip + srcSize; 1284 /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ 1285 if ( (ip+srcSize > window->dictBase + window->lowLimit) 1286 & (ip < window->dictBase + window->dictLimit)) { 1287 ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; 1288 U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; 1289 window->lowLimit = lowLimitMax; 1290 DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); 1291 } 1292 return contiguous; 1293 } 1294 1295 /** 1296 * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. 1297 */ 1298 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) 1299 { 1300 U32 const maxDistance = 1U << windowLog; 1301 U32 const lowestValid = ms->window.lowLimit; 1302 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; 1303 U32 const isDictionary = (ms->loadedDictEnd != 0); 1304 /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary 1305 * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't 1306 * valid for the entire block. So this check is sufficient to find the lowest valid match index. 1307 */ 1308 U32 const matchLowest = isDictionary ? lowestValid : withinWindow; 1309 return matchLowest; 1310 } 1311 1312 /** 1313 * Returns the lowest allowed match index in the prefix. 1314 */ 1315 MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) 1316 { 1317 U32 const maxDistance = 1U << windowLog; 1318 U32 const lowestValid = ms->window.dictLimit; 1319 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; 1320 U32 const isDictionary = (ms->loadedDictEnd != 0); 1321 /* When computing the lowest prefix index we need to take the dictionary into account to handle 1322 * the edge case where the dictionary and the source are contiguous in memory. 1323 */ 1324 U32 const matchLowest = isDictionary ? lowestValid : withinWindow; 1325 return matchLowest; 1326 } 1327 1328 1329 1330 /* debug functions */ 1331 #if (DEBUGLEVEL>=2) 1332 1333 MEM_STATIC double ZSTD_fWeight(U32 rawStat) 1334 { 1335 U32 const fp_accuracy = 8; 1336 U32 const fp_multiplier = (1 << fp_accuracy); 1337 U32 const newStat = rawStat + 1; 1338 U32 const hb = ZSTD_highbit32(newStat); 1339 U32 const BWeight = hb * fp_multiplier; 1340 U32 const FWeight = (newStat << fp_accuracy) >> hb; 1341 U32 const weight = BWeight + FWeight; 1342 assert(hb + fp_accuracy < 31); 1343 return (double)weight / fp_multiplier; 1344 } 1345 1346 /* display a table content, 1347 * listing each element, its frequency, and its predicted bit cost */ 1348 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) 1349 { 1350 unsigned u, sum; 1351 for (u=0, sum=0; u<=max; u++) sum += table[u]; 1352 DEBUGLOG(2, "total nb elts: %u", sum); 1353 for (u=0; u<=max; u++) { 1354 DEBUGLOG(2, "%2u: %5u (%.2f)", 1355 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); 1356 } 1357 } 1358 1359 #endif 1360 1361 1362 #if defined (__cplusplus) 1363 } 1364 #endif 1365 1366 /* =============================================================== 1367 * Shared internal declarations 1368 * These prototypes may be called from sources not in lib/compress 1369 * =============================================================== */ 1370 1371 /* ZSTD_loadCEntropy() : 1372 * dict : must point at beginning of a valid zstd dictionary. 1373 * return : size of dictionary header (size of magic number + dict ID + entropy tables) 1374 * assumptions : magic number supposed already checked 1375 * and dictSize >= 8 */ 1376 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, 1377 const void* const dict, size_t dictSize); 1378 1379 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); 1380 1381 /* ============================================================== 1382 * Private declarations 1383 * These prototypes shall only be called from within lib/compress 1384 * ============================================================== */ 1385 1386 /* ZSTD_getCParamsFromCCtxParams() : 1387 * cParams are built depending on compressionLevel, src size hints, 1388 * LDM and manually set compression parameters. 1389 * Note: srcSizeHint == 0 means 0! 1390 */ 1391 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( 1392 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); 1393 1394 /*! ZSTD_initCStream_internal() : 1395 * Private use only. Init streaming operation. 1396 * expects params to be valid. 1397 * must receive dict, or cdict, or none, but not both. 1398 * @return : 0, or an error code */ 1399 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, 1400 const void* dict, size_t dictSize, 1401 const ZSTD_CDict* cdict, 1402 const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); 1403 1404 void ZSTD_resetSeqStore(seqStore_t* ssPtr); 1405 1406 /*! ZSTD_getCParamsFromCDict() : 1407 * as the name implies */ 1408 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); 1409 1410 /* ZSTD_compressBegin_advanced_internal() : 1411 * Private use only. To be called from zstdmt_compress.c. */ 1412 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, 1413 const void* dict, size_t dictSize, 1414 ZSTD_dictContentType_e dictContentType, 1415 ZSTD_dictTableLoadMethod_e dtlm, 1416 const ZSTD_CDict* cdict, 1417 const ZSTD_CCtx_params* params, 1418 unsigned long long pledgedSrcSize); 1419 1420 /* ZSTD_compress_advanced_internal() : 1421 * Private use only. To be called from zstdmt_compress.c. */ 1422 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, 1423 void* dst, size_t dstCapacity, 1424 const void* src, size_t srcSize, 1425 const void* dict,size_t dictSize, 1426 const ZSTD_CCtx_params* params); 1427 1428 1429 /* ZSTD_writeLastEmptyBlock() : 1430 * output an empty Block with end-of-frame mark to complete a frame 1431 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) 1432 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) 1433 */ 1434 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); 1435 1436 1437 /* ZSTD_referenceExternalSequences() : 1438 * Must be called before starting a compression operation. 1439 * seqs must parse a prefix of the source. 1440 * This cannot be used when long range matching is enabled. 1441 * Zstd will use these sequences, and pass the literals to a secondary block 1442 * compressor. 1443 * @return : An error code on failure. 1444 * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory 1445 * access and data corruption. 1446 */ 1447 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); 1448 1449 /** ZSTD_cycleLog() : 1450 * condition for correct operation : hashLog > 1 */ 1451 /* Begin FreeBSD - This symbol is needed by dll-linked CLI zstd(1). */ 1452 ZSTDLIB_API 1453 /* End FreeBSD */ 1454 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); 1455 1456 /** ZSTD_CCtx_trace() : 1457 * Trace the end of a compression call. 1458 */ 1459 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); 1460 1461 #endif /* ZSTD_COMPRESS_H */ 1462