1 /* 2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 /* This header contains definitions 12 * that shall **only** be used by modules within lib/compress. 13 */ 14 15 #ifndef ZSTD_COMPRESS_H 16 #define ZSTD_COMPRESS_H 17 18 /*-************************************* 19 * Dependencies 20 ***************************************/ 21 #include "zstd_internal.h" 22 #include "zstd_cwksp.h" 23 #ifdef ZSTD_MULTITHREAD 24 # include "zstdmt_compress.h" 25 #endif 26 27 #if defined (__cplusplus) 28 extern "C" { 29 #endif 30 31 32 /*-************************************* 33 * Constants 34 ***************************************/ 35 #define kSearchStrength 8 36 #define HASH_READ_SIZE 8 37 #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". 38 It could be confused for a real successor at index "1", if sorted as larger than its predecessor. 39 It's not a big deal though : candidate will just be sorted again. 40 Additionally, candidate position 1 will be lost. 41 But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. 42 The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. 43 This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ 44 45 46 /*-************************************* 47 * Context memory management 48 ***************************************/ 49 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; 50 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; 51 52 typedef struct ZSTD_prefixDict_s { 53 const void* dict; 54 size_t dictSize; 55 ZSTD_dictContentType_e dictContentType; 56 } ZSTD_prefixDict; 57 58 typedef struct { 59 void* dictBuffer; 60 void const* dict; 61 size_t dictSize; 62 ZSTD_dictContentType_e dictContentType; 63 ZSTD_CDict* cdict; 64 } ZSTD_localDict; 65 66 typedef struct { 67 U32 CTable[HUF_CTABLE_SIZE_U32(255)]; 68 HUF_repeat repeatMode; 69 } ZSTD_hufCTables_t; 70 71 typedef struct { 72 FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; 73 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; 74 FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; 75 FSE_repeat offcode_repeatMode; 76 FSE_repeat matchlength_repeatMode; 77 FSE_repeat litlength_repeatMode; 78 } ZSTD_fseCTables_t; 79 80 typedef struct { 81 ZSTD_hufCTables_t huf; 82 ZSTD_fseCTables_t fse; 83 } ZSTD_entropyCTables_t; 84 85 typedef struct { 86 U32 off; 87 U32 len; 88 } ZSTD_match_t; 89 90 typedef struct { 91 int price; 92 U32 off; 93 U32 mlen; 94 U32 litlen; 95 U32 rep[ZSTD_REP_NUM]; 96 } ZSTD_optimal_t; 97 98 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; 99 100 typedef struct { 101 /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ 102 unsigned* litFreq; /* table of literals statistics, of size 256 */ 103 unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ 104 unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ 105 unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ 106 ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ 107 ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ 108 109 U32 litSum; /* nb of literals */ 110 U32 litLengthSum; /* nb of litLength codes */ 111 U32 matchLengthSum; /* nb of matchLength codes */ 112 U32 offCodeSum; /* nb of offset codes */ 113 U32 litSumBasePrice; /* to compare to log2(litfreq) */ 114 U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ 115 U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ 116 U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ 117 ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ 118 const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ 119 ZSTD_literalCompressionMode_e literalCompressionMode; 120 } optState_t; 121 122 typedef struct { 123 ZSTD_entropyCTables_t entropy; 124 U32 rep[ZSTD_REP_NUM]; 125 } ZSTD_compressedBlockState_t; 126 127 typedef struct { 128 BYTE const* nextSrc; /* next block here to continue on current prefix */ 129 BYTE const* base; /* All regular indexes relative to this position */ 130 BYTE const* dictBase; /* extDict indexes relative to this position */ 131 U32 dictLimit; /* below that point, need extDict */ 132 U32 lowLimit; /* below that point, no more valid data */ 133 } ZSTD_window_t; 134 135 typedef struct ZSTD_matchState_t ZSTD_matchState_t; 136 struct ZSTD_matchState_t { 137 ZSTD_window_t window; /* State for window round buffer management */ 138 U32 loadedDictEnd; /* index of end of dictionary, within context's referential. 139 * When loadedDictEnd != 0, a dictionary is in use, and still valid. 140 * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. 141 * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). 142 * When dict referential is copied into active context (i.e. not attached), 143 * loadedDictEnd == dictSize, since referential starts from zero. 144 */ 145 U32 nextToUpdate; /* index from which to continue table update */ 146 U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ 147 U32* hashTable; 148 U32* hashTable3; 149 U32* chainTable; 150 optState_t opt; /* optimal parser state */ 151 const ZSTD_matchState_t* dictMatchState; 152 ZSTD_compressionParameters cParams; 153 }; 154 155 typedef struct { 156 ZSTD_compressedBlockState_t* prevCBlock; 157 ZSTD_compressedBlockState_t* nextCBlock; 158 ZSTD_matchState_t matchState; 159 } ZSTD_blockState_t; 160 161 typedef struct { 162 U32 offset; 163 U32 checksum; 164 } ldmEntry_t; 165 166 typedef struct { 167 ZSTD_window_t window; /* State for the window round buffer management */ 168 ldmEntry_t* hashTable; 169 BYTE* bucketOffsets; /* Next position in bucket to insert entry */ 170 U64 hashPower; /* Used to compute the rolling hash. 171 * Depends on ldmParams.minMatchLength */ 172 } ldmState_t; 173 174 typedef struct { 175 U32 enableLdm; /* 1 if enable long distance matching */ 176 U32 hashLog; /* Log size of hashTable */ 177 U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ 178 U32 minMatchLength; /* Minimum match length */ 179 U32 hashRateLog; /* Log number of entries to skip */ 180 U32 windowLog; /* Window log for the LDM */ 181 } ldmParams_t; 182 183 typedef struct { 184 U32 offset; 185 U32 litLength; 186 U32 matchLength; 187 } rawSeq; 188 189 typedef struct { 190 rawSeq* seq; /* The start of the sequences */ 191 size_t pos; /* The position where reading stopped. <= size. */ 192 size_t size; /* The number of sequences. <= capacity. */ 193 size_t capacity; /* The capacity starting from `seq` pointer */ 194 } rawSeqStore_t; 195 196 typedef struct { 197 int collectSequences; 198 ZSTD_Sequence* seqStart; 199 size_t seqIndex; 200 size_t maxSequences; 201 } SeqCollector; 202 203 struct ZSTD_CCtx_params_s { 204 ZSTD_format_e format; 205 ZSTD_compressionParameters cParams; 206 ZSTD_frameParameters fParams; 207 208 int compressionLevel; 209 int forceWindow; /* force back-references to respect limit of 210 * 1<<wLog, even for dictionary */ 211 size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize. 212 * No target when targetCBlockSize == 0. 213 * There is no guarantee on compressed block size */ 214 int srcSizeHint; /* User's best guess of source size. 215 * Hint is not valid when srcSizeHint == 0. 216 * There is no guarantee that hint is close to actual source size */ 217 218 ZSTD_dictAttachPref_e attachDictPref; 219 ZSTD_literalCompressionMode_e literalCompressionMode; 220 221 /* Multithreading: used to pass parameters to mtctx */ 222 int nbWorkers; 223 size_t jobSize; 224 int overlapLog; 225 int rsyncable; 226 227 /* Long distance matching parameters */ 228 ldmParams_t ldmParams; 229 230 /* Internal use, for createCCtxParams() and freeCCtxParams() only */ 231 ZSTD_customMem customMem; 232 }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ 233 234 struct ZSTD_CCtx_s { 235 ZSTD_compressionStage_e stage; 236 int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ 237 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ 238 ZSTD_CCtx_params requestedParams; 239 ZSTD_CCtx_params appliedParams; 240 U32 dictID; 241 242 ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ 243 size_t blockSize; 244 unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ 245 unsigned long long consumedSrcSize; 246 unsigned long long producedCSize; 247 XXH64_state_t xxhState; 248 ZSTD_customMem customMem; 249 size_t staticSize; 250 SeqCollector seqCollector; 251 int isFirstBlock; 252 253 seqStore_t seqStore; /* sequences storage ptrs */ 254 ldmState_t ldmState; /* long distance matching state */ 255 rawSeq* ldmSequences; /* Storage for the ldm output sequences */ 256 size_t maxNbLdmSequences; 257 rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ 258 ZSTD_blockState_t blockState; 259 U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ 260 261 /* streaming */ 262 char* inBuff; 263 size_t inBuffSize; 264 size_t inToCompress; 265 size_t inBuffPos; 266 size_t inBuffTarget; 267 char* outBuff; 268 size_t outBuffSize; 269 size_t outBuffContentSize; 270 size_t outBuffFlushedSize; 271 ZSTD_cStreamStage streamStage; 272 U32 frameEnded; 273 274 /* Dictionary */ 275 ZSTD_localDict localDict; 276 const ZSTD_CDict* cdict; 277 ZSTD_prefixDict prefixDict; /* single-usage dictionary */ 278 279 /* Multi-threading */ 280 #ifdef ZSTD_MULTITHREAD 281 ZSTDMT_CCtx* mtctx; 282 #endif 283 }; 284 285 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; 286 287 typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e; 288 289 290 typedef size_t (*ZSTD_blockCompressor) ( 291 ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 292 void const* src, size_t srcSize); 293 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode); 294 295 296 MEM_STATIC U32 ZSTD_LLcode(U32 litLength) 297 { 298 static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, 299 8, 9, 10, 11, 12, 13, 14, 15, 300 16, 16, 17, 17, 18, 18, 19, 19, 301 20, 20, 20, 20, 21, 21, 21, 21, 302 22, 22, 22, 22, 22, 22, 22, 22, 303 23, 23, 23, 23, 23, 23, 23, 23, 304 24, 24, 24, 24, 24, 24, 24, 24, 305 24, 24, 24, 24, 24, 24, 24, 24 }; 306 static const U32 LL_deltaCode = 19; 307 return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; 308 } 309 310 /* ZSTD_MLcode() : 311 * note : mlBase = matchLength - MINMATCH; 312 * because it's the format it's stored in seqStore->sequences */ 313 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) 314 { 315 static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 316 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 317 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 318 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 319 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 320 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 321 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 322 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; 323 static const U32 ML_deltaCode = 36; 324 return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; 325 } 326 327 /* ZSTD_cParam_withinBounds: 328 * @return 1 if value is within cParam bounds, 329 * 0 otherwise */ 330 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) 331 { 332 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); 333 if (ZSTD_isError(bounds.error)) return 0; 334 if (value < bounds.lowerBound) return 0; 335 if (value > bounds.upperBound) return 0; 336 return 1; 337 } 338 339 /* ZSTD_minGain() : 340 * minimum compression required 341 * to generate a compress block or a compressed literals section. 342 * note : use same formula for both situations */ 343 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) 344 { 345 U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; 346 ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); 347 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); 348 return (srcSize >> minlog) + 2; 349 } 350 351 /*! ZSTD_safecopyLiterals() : 352 * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. 353 * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single 354 * large copies. 355 */ 356 static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { 357 assert(iend > ilimit_w); 358 if (ip <= ilimit_w) { 359 ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); 360 op += ilimit_w - ip; 361 ip = ilimit_w; 362 } 363 while (ip < iend) *op++ = *ip++; 364 } 365 366 /*! ZSTD_storeSeq() : 367 * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. 368 * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). 369 * `mlBase` : matchLength - MINMATCH 370 * Allowed to overread literals up to litLimit. 371 */ 372 HINT_INLINE UNUSED_ATTR 373 void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) 374 { 375 BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; 376 BYTE const* const litEnd = literals + litLength; 377 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) 378 static const BYTE* g_start = NULL; 379 if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ 380 { U32 const pos = (U32)((const BYTE*)literals - g_start); 381 DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", 382 pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); 383 } 384 #endif 385 assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); 386 /* copy Literals */ 387 assert(seqStorePtr->maxNbLit <= 128 KB); 388 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); 389 assert(literals + litLength <= litLimit); 390 if (litEnd <= litLimit_w) { 391 /* Common case we can use wildcopy. 392 * First copy 16 bytes, because literals are likely short. 393 */ 394 assert(WILDCOPY_OVERLENGTH >= 16); 395 ZSTD_copy16(seqStorePtr->lit, literals); 396 if (litLength > 16) { 397 ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); 398 } 399 } else { 400 ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); 401 } 402 seqStorePtr->lit += litLength; 403 404 /* literal Length */ 405 if (litLength>0xFFFF) { 406 assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ 407 seqStorePtr->longLengthID = 1; 408 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 409 } 410 seqStorePtr->sequences[0].litLength = (U16)litLength; 411 412 /* match offset */ 413 seqStorePtr->sequences[0].offset = offCode + 1; 414 415 /* match Length */ 416 if (mlBase>0xFFFF) { 417 assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ 418 seqStorePtr->longLengthID = 2; 419 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 420 } 421 seqStorePtr->sequences[0].matchLength = (U16)mlBase; 422 423 seqStorePtr->sequences++; 424 } 425 426 427 /*-************************************* 428 * Match length counter 429 ***************************************/ 430 static unsigned ZSTD_NbCommonBytes (size_t val) 431 { 432 if (MEM_isLittleEndian()) { 433 if (MEM_64bits()) { 434 # if defined(_MSC_VER) && defined(_WIN64) 435 unsigned long r = 0; 436 _BitScanForward64( &r, (U64)val ); 437 return (unsigned)(r>>3); 438 # elif defined(__GNUC__) && (__GNUC__ >= 4) 439 return (__builtin_ctzll((U64)val) >> 3); 440 # else 441 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 442 0, 3, 1, 3, 1, 4, 2, 7, 443 0, 2, 3, 6, 1, 5, 3, 5, 444 1, 3, 4, 4, 2, 5, 6, 7, 445 7, 0, 1, 2, 3, 3, 4, 6, 446 2, 6, 5, 5, 3, 4, 5, 6, 447 7, 1, 2, 4, 6, 4, 4, 5, 448 7, 2, 6, 5, 7, 6, 7, 7 }; 449 return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; 450 # endif 451 } else { /* 32 bits */ 452 # if defined(_MSC_VER) 453 unsigned long r=0; 454 _BitScanForward( &r, (U32)val ); 455 return (unsigned)(r>>3); 456 # elif defined(__GNUC__) && (__GNUC__ >= 3) 457 return (__builtin_ctz((U32)val) >> 3); 458 # else 459 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 460 3, 2, 2, 1, 3, 2, 0, 1, 461 3, 3, 1, 2, 2, 2, 2, 0, 462 3, 1, 2, 0, 1, 0, 1, 1 }; 463 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 464 # endif 465 } 466 } else { /* Big Endian CPU */ 467 if (MEM_64bits()) { 468 # if defined(_MSC_VER) && defined(_WIN64) 469 unsigned long r = 0; 470 _BitScanReverse64( &r, val ); 471 return (unsigned)(r>>3); 472 # elif defined(__GNUC__) && (__GNUC__ >= 4) 473 return (__builtin_clzll(val) >> 3); 474 # else 475 unsigned r; 476 const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ 477 if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } 478 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 479 r += (!val); 480 return r; 481 # endif 482 } else { /* 32 bits */ 483 # if defined(_MSC_VER) 484 unsigned long r = 0; 485 _BitScanReverse( &r, (unsigned long)val ); 486 return (unsigned)(r>>3); 487 # elif defined(__GNUC__) && (__GNUC__ >= 3) 488 return (__builtin_clz((U32)val) >> 3); 489 # else 490 unsigned r; 491 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 492 r += (!val); 493 return r; 494 # endif 495 } } 496 } 497 498 499 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) 500 { 501 const BYTE* const pStart = pIn; 502 const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); 503 504 if (pIn < pInLoopLimit) { 505 { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); 506 if (diff) return ZSTD_NbCommonBytes(diff); } 507 pIn+=sizeof(size_t); pMatch+=sizeof(size_t); 508 while (pIn < pInLoopLimit) { 509 size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); 510 if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } 511 pIn += ZSTD_NbCommonBytes(diff); 512 return (size_t)(pIn - pStart); 513 } } 514 if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } 515 if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } 516 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; 517 return (size_t)(pIn - pStart); 518 } 519 520 /** ZSTD_count_2segments() : 521 * can count match length with `ip` & `match` in 2 different segments. 522 * convention : on reaching mEnd, match count continue starting from iStart 523 */ 524 MEM_STATIC size_t 525 ZSTD_count_2segments(const BYTE* ip, const BYTE* match, 526 const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart) 527 { 528 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd); 529 size_t const matchLength = ZSTD_count(ip, match, vEnd); 530 if (match + matchLength != mEnd) return matchLength; 531 DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength); 532 DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match); 533 DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip); 534 DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart); 535 DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd)); 536 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd); 537 } 538 539 540 /*-************************************* 541 * Hashes 542 ***************************************/ 543 static const U32 prime3bytes = 506832829U; 544 static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } 545 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ 546 547 static const U32 prime4bytes = 2654435761U; 548 static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } 549 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } 550 551 static const U64 prime5bytes = 889523592379ULL; 552 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } 553 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } 554 555 static const U64 prime6bytes = 227718039650203ULL; 556 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } 557 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } 558 559 static const U64 prime7bytes = 58295818150454627ULL; 560 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } 561 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } 562 563 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; 564 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } 565 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } 566 567 MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) 568 { 569 switch(mls) 570 { 571 default: 572 case 4: return ZSTD_hash4Ptr(p, hBits); 573 case 5: return ZSTD_hash5Ptr(p, hBits); 574 case 6: return ZSTD_hash6Ptr(p, hBits); 575 case 7: return ZSTD_hash7Ptr(p, hBits); 576 case 8: return ZSTD_hash8Ptr(p, hBits); 577 } 578 } 579 580 /** ZSTD_ipow() : 581 * Return base^exponent. 582 */ 583 static U64 ZSTD_ipow(U64 base, U64 exponent) 584 { 585 U64 power = 1; 586 while (exponent) { 587 if (exponent & 1) power *= base; 588 exponent >>= 1; 589 base *= base; 590 } 591 return power; 592 } 593 594 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10 595 596 /** ZSTD_rollingHash_append() : 597 * Add the buffer to the hash value. 598 */ 599 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) 600 { 601 BYTE const* istart = (BYTE const*)buf; 602 size_t pos; 603 for (pos = 0; pos < size; ++pos) { 604 hash *= prime8bytes; 605 hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; 606 } 607 return hash; 608 } 609 610 /** ZSTD_rollingHash_compute() : 611 * Compute the rolling hash value of the buffer. 612 */ 613 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) 614 { 615 return ZSTD_rollingHash_append(0, buf, size); 616 } 617 618 /** ZSTD_rollingHash_primePower() : 619 * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash 620 * over a window of length bytes. 621 */ 622 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) 623 { 624 return ZSTD_ipow(prime8bytes, length - 1); 625 } 626 627 /** ZSTD_rollingHash_rotate() : 628 * Rotate the rolling hash by one byte. 629 */ 630 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) 631 { 632 hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; 633 hash *= prime8bytes; 634 hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; 635 return hash; 636 } 637 638 /*-************************************* 639 * Round buffer management 640 ***************************************/ 641 #if (ZSTD_WINDOWLOG_MAX_64 > 31) 642 # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" 643 #endif 644 /* Max current allowed */ 645 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) 646 /* Maximum chunk size before overflow correction needs to be called again */ 647 #define ZSTD_CHUNKSIZE_MAX \ 648 ( ((U32)-1) /* Maximum ending current index */ \ 649 - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ 650 651 /** 652 * ZSTD_window_clear(): 653 * Clears the window containing the history by simply setting it to empty. 654 */ 655 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) 656 { 657 size_t const endT = (size_t)(window->nextSrc - window->base); 658 U32 const end = (U32)endT; 659 660 window->lowLimit = end; 661 window->dictLimit = end; 662 } 663 664 /** 665 * ZSTD_window_hasExtDict(): 666 * Returns non-zero if the window has a non-empty extDict. 667 */ 668 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) 669 { 670 return window.lowLimit < window.dictLimit; 671 } 672 673 /** 674 * ZSTD_matchState_dictMode(): 675 * Inspects the provided matchState and figures out what dictMode should be 676 * passed to the compressor. 677 */ 678 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) 679 { 680 return ZSTD_window_hasExtDict(ms->window) ? 681 ZSTD_extDict : 682 ms->dictMatchState != NULL ? 683 ZSTD_dictMatchState : 684 ZSTD_noDict; 685 } 686 687 /** 688 * ZSTD_window_needOverflowCorrection(): 689 * Returns non-zero if the indices are getting too large and need overflow 690 * protection. 691 */ 692 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, 693 void const* srcEnd) 694 { 695 U32 const current = (U32)((BYTE const*)srcEnd - window.base); 696 return current > ZSTD_CURRENT_MAX; 697 } 698 699 /** 700 * ZSTD_window_correctOverflow(): 701 * Reduces the indices to protect from index overflow. 702 * Returns the correction made to the indices, which must be applied to every 703 * stored index. 704 * 705 * The least significant cycleLog bits of the indices must remain the same, 706 * which may be 0. Every index up to maxDist in the past must be valid. 707 * NOTE: (maxDist & cycleMask) must be zero. 708 */ 709 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, 710 U32 maxDist, void const* src) 711 { 712 /* preemptive overflow correction: 713 * 1. correction is large enough: 714 * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog 715 * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog 716 * 717 * current - newCurrent 718 * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog) 719 * > (3<<29) - (1<<chainLog) 720 * > (3<<29) - (1<<30) (NOTE: chainLog <= 30) 721 * > 1<<29 722 * 723 * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: 724 * After correction, current is less than (1<<chainLog + 1<<windowLog). 725 * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t. 726 * In 32-bit mode we are safe, because (chainLog <= 29), so 727 * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32. 728 * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32: 729 * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32. 730 */ 731 U32 const cycleMask = (1U << cycleLog) - 1; 732 U32 const current = (U32)((BYTE const*)src - window->base); 733 U32 const newCurrent = (current & cycleMask) + maxDist; 734 U32 const correction = current - newCurrent; 735 assert((maxDist & cycleMask) == 0); 736 assert(current > newCurrent); 737 /* Loose bound, should be around 1<<29 (see above) */ 738 assert(correction > 1<<28); 739 740 window->base += correction; 741 window->dictBase += correction; 742 window->lowLimit -= correction; 743 window->dictLimit -= correction; 744 745 DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, 746 window->lowLimit); 747 return correction; 748 } 749 750 /** 751 * ZSTD_window_enforceMaxDist(): 752 * Updates lowLimit so that: 753 * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd 754 * 755 * It ensures index is valid as long as index >= lowLimit. 756 * This must be called before a block compression call. 757 * 758 * loadedDictEnd is only defined if a dictionary is in use for current compression. 759 * As the name implies, loadedDictEnd represents the index at end of dictionary. 760 * The value lies within context's referential, it can be directly compared to blockEndIdx. 761 * 762 * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. 763 * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. 764 * This is because dictionaries are allowed to be referenced fully 765 * as long as the last byte of the dictionary is in the window. 766 * Once input has progressed beyond window size, dictionary cannot be referenced anymore. 767 * 768 * In normal dict mode, the dictionary lies between lowLimit and dictLimit. 769 * In dictMatchState mode, lowLimit and dictLimit are the same, 770 * and the dictionary is below them. 771 * forceWindow and dictMatchState are therefore incompatible. 772 */ 773 MEM_STATIC void 774 ZSTD_window_enforceMaxDist(ZSTD_window_t* window, 775 const void* blockEnd, 776 U32 maxDist, 777 U32* loadedDictEndPtr, 778 const ZSTD_matchState_t** dictMatchStatePtr) 779 { 780 U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); 781 U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; 782 DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", 783 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); 784 785 /* - When there is no dictionary : loadedDictEnd == 0. 786 In which case, the test (blockEndIdx > maxDist) is merely to avoid 787 overflowing next operation `newLowLimit = blockEndIdx - maxDist`. 788 - When there is a standard dictionary : 789 Index referential is copied from the dictionary, 790 which means it starts from 0. 791 In which case, loadedDictEnd == dictSize, 792 and it makes sense to compare `blockEndIdx > maxDist + dictSize` 793 since `blockEndIdx` also starts from zero. 794 - When there is an attached dictionary : 795 loadedDictEnd is expressed within the referential of the context, 796 so it can be directly compared against blockEndIdx. 797 */ 798 if (blockEndIdx > maxDist + loadedDictEnd) { 799 U32 const newLowLimit = blockEndIdx - maxDist; 800 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; 801 if (window->dictLimit < window->lowLimit) { 802 DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", 803 (unsigned)window->dictLimit, (unsigned)window->lowLimit); 804 window->dictLimit = window->lowLimit; 805 } 806 /* On reaching window size, dictionaries are invalidated */ 807 if (loadedDictEndPtr) *loadedDictEndPtr = 0; 808 if (dictMatchStatePtr) *dictMatchStatePtr = NULL; 809 } 810 } 811 812 /* Similar to ZSTD_window_enforceMaxDist(), 813 * but only invalidates dictionary 814 * when input progresses beyond window size. 815 * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) 816 * loadedDictEnd uses same referential as window->base 817 * maxDist is the window size */ 818 MEM_STATIC void 819 ZSTD_checkDictValidity(const ZSTD_window_t* window, 820 const void* blockEnd, 821 U32 maxDist, 822 U32* loadedDictEndPtr, 823 const ZSTD_matchState_t** dictMatchStatePtr) 824 { 825 assert(loadedDictEndPtr != NULL); 826 assert(dictMatchStatePtr != NULL); 827 { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); 828 U32 const loadedDictEnd = *loadedDictEndPtr; 829 DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", 830 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); 831 assert(blockEndIdx >= loadedDictEnd); 832 833 if (blockEndIdx > loadedDictEnd + maxDist) { 834 /* On reaching window size, dictionaries are invalidated. 835 * For simplification, if window size is reached anywhere within next block, 836 * the dictionary is invalidated for the full block. 837 */ 838 DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); 839 *loadedDictEndPtr = 0; 840 *dictMatchStatePtr = NULL; 841 } else { 842 if (*loadedDictEndPtr != 0) { 843 DEBUGLOG(6, "dictionary considered valid for current block"); 844 } } } 845 } 846 847 /** 848 * ZSTD_window_update(): 849 * Updates the window by appending [src, src + srcSize) to the window. 850 * If it is not contiguous, the current prefix becomes the extDict, and we 851 * forget about the extDict. Handles overlap of the prefix and extDict. 852 * Returns non-zero if the segment is contiguous. 853 */ 854 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, 855 void const* src, size_t srcSize) 856 { 857 BYTE const* const ip = (BYTE const*)src; 858 U32 contiguous = 1; 859 DEBUGLOG(5, "ZSTD_window_update"); 860 /* Check if blocks follow each other */ 861 if (src != window->nextSrc) { 862 /* not contiguous */ 863 size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); 864 DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); 865 window->lowLimit = window->dictLimit; 866 assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ 867 window->dictLimit = (U32)distanceFromBase; 868 window->dictBase = window->base; 869 window->base = ip - distanceFromBase; 870 // ms->nextToUpdate = window->dictLimit; 871 if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ 872 contiguous = 0; 873 } 874 window->nextSrc = ip + srcSize; 875 /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ 876 if ( (ip+srcSize > window->dictBase + window->lowLimit) 877 & (ip < window->dictBase + window->dictLimit)) { 878 ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; 879 U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; 880 window->lowLimit = lowLimitMax; 881 DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); 882 } 883 return contiguous; 884 } 885 886 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) 887 { 888 U32 const maxDistance = 1U << windowLog; 889 U32 const lowestValid = ms->window.lowLimit; 890 U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; 891 U32 const isDictionary = (ms->loadedDictEnd != 0); 892 U32 const matchLowest = isDictionary ? lowestValid : withinWindow; 893 return matchLowest; 894 } 895 896 897 898 /* debug functions */ 899 #if (DEBUGLEVEL>=2) 900 901 MEM_STATIC double ZSTD_fWeight(U32 rawStat) 902 { 903 U32 const fp_accuracy = 8; 904 U32 const fp_multiplier = (1 << fp_accuracy); 905 U32 const newStat = rawStat + 1; 906 U32 const hb = ZSTD_highbit32(newStat); 907 U32 const BWeight = hb * fp_multiplier; 908 U32 const FWeight = (newStat << fp_accuracy) >> hb; 909 U32 const weight = BWeight + FWeight; 910 assert(hb + fp_accuracy < 31); 911 return (double)weight / fp_multiplier; 912 } 913 914 /* display a table content, 915 * listing each element, its frequency, and its predicted bit cost */ 916 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) 917 { 918 unsigned u, sum; 919 for (u=0, sum=0; u<=max; u++) sum += table[u]; 920 DEBUGLOG(2, "total nb elts: %u", sum); 921 for (u=0; u<=max; u++) { 922 DEBUGLOG(2, "%2u: %5u (%.2f)", 923 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); 924 } 925 } 926 927 #endif 928 929 930 #if defined (__cplusplus) 931 } 932 #endif 933 934 935 /* ============================================================== 936 * Private declarations 937 * These prototypes shall only be called from within lib/compress 938 * ============================================================== */ 939 940 /* ZSTD_getCParamsFromCCtxParams() : 941 * cParams are built depending on compressionLevel, src size hints, 942 * LDM and manually set compression parameters. 943 */ 944 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( 945 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize); 946 947 /*! ZSTD_initCStream_internal() : 948 * Private use only. Init streaming operation. 949 * expects params to be valid. 950 * must receive dict, or cdict, or none, but not both. 951 * @return : 0, or an error code */ 952 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, 953 const void* dict, size_t dictSize, 954 const ZSTD_CDict* cdict, 955 const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); 956 957 void ZSTD_resetSeqStore(seqStore_t* ssPtr); 958 959 /*! ZSTD_getCParamsFromCDict() : 960 * as the name implies */ 961 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); 962 963 /* ZSTD_compressBegin_advanced_internal() : 964 * Private use only. To be called from zstdmt_compress.c. */ 965 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, 966 const void* dict, size_t dictSize, 967 ZSTD_dictContentType_e dictContentType, 968 ZSTD_dictTableLoadMethod_e dtlm, 969 const ZSTD_CDict* cdict, 970 const ZSTD_CCtx_params* params, 971 unsigned long long pledgedSrcSize); 972 973 /* ZSTD_compress_advanced_internal() : 974 * Private use only. To be called from zstdmt_compress.c. */ 975 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, 976 void* dst, size_t dstCapacity, 977 const void* src, size_t srcSize, 978 const void* dict,size_t dictSize, 979 const ZSTD_CCtx_params* params); 980 981 982 /* ZSTD_writeLastEmptyBlock() : 983 * output an empty Block with end-of-frame mark to complete a frame 984 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) 985 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) 986 */ 987 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); 988 989 990 /* ZSTD_referenceExternalSequences() : 991 * Must be called before starting a compression operation. 992 * seqs must parse a prefix of the source. 993 * This cannot be used when long range matching is enabled. 994 * Zstd will use these sequences, and pass the literals to a secondary block 995 * compressor. 996 * @return : An error code on failure. 997 * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory 998 * access and data corruption. 999 */ 1000 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); 1001 1002 1003 #endif /* ZSTD_COMPRESS_H */ 1004