xref: /freebsd/sys/contrib/zstd/lib/compress/zstd_compress_internal.h (revision 86aa9539fef591a363b06a0ebd3aa7a07f4c1579)
1 /*
2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /* This header contains definitions
12  * that shall **only** be used by modules within lib/compress.
13  */
14 
15 #ifndef ZSTD_COMPRESS_H
16 #define ZSTD_COMPRESS_H
17 
18 /*-*************************************
19 *  Dependencies
20 ***************************************/
21 #include "zstd_internal.h"
22 #ifdef ZSTD_MULTITHREAD
23 #  include "zstdmt_compress.h"
24 #endif
25 
26 #if defined (__cplusplus)
27 extern "C" {
28 #endif
29 
30 
31 /*-*************************************
32 *  Constants
33 ***************************************/
34 #define kSearchStrength      8
35 #define HASH_READ_SIZE       8
36 #define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
37                                        It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
38                                        It's not a big deal though : candidate will just be sorted again.
39                                        Additionally, candidate position 1 will be lost.
40                                        But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
41                                        The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
42                                        This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
43 
44 
45 /*-*************************************
46 *  Context memory management
47 ***************************************/
48 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
49 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
50 
51 typedef struct ZSTD_prefixDict_s {
52     const void* dict;
53     size_t dictSize;
54     ZSTD_dictContentType_e dictContentType;
55 } ZSTD_prefixDict;
56 
57 typedef struct {
58     void* dictBuffer;
59     void const* dict;
60     size_t dictSize;
61     ZSTD_dictContentType_e dictContentType;
62     ZSTD_CDict* cdict;
63 } ZSTD_localDict;
64 
65 typedef struct {
66     U32 CTable[HUF_CTABLE_SIZE_U32(255)];
67     HUF_repeat repeatMode;
68 } ZSTD_hufCTables_t;
69 
70 typedef struct {
71     FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
72     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
73     FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
74     FSE_repeat offcode_repeatMode;
75     FSE_repeat matchlength_repeatMode;
76     FSE_repeat litlength_repeatMode;
77 } ZSTD_fseCTables_t;
78 
79 typedef struct {
80     ZSTD_hufCTables_t huf;
81     ZSTD_fseCTables_t fse;
82 } ZSTD_entropyCTables_t;
83 
84 typedef struct {
85     U32 off;
86     U32 len;
87 } ZSTD_match_t;
88 
89 typedef struct {
90     int price;
91     U32 off;
92     U32 mlen;
93     U32 litlen;
94     U32 rep[ZSTD_REP_NUM];
95 } ZSTD_optimal_t;
96 
97 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
98 
99 typedef struct {
100     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
101     unsigned* litFreq;           /* table of literals statistics, of size 256 */
102     unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
103     unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
104     unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
105     ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
106     ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
107 
108     U32  litSum;                 /* nb of literals */
109     U32  litLengthSum;           /* nb of litLength codes */
110     U32  matchLengthSum;         /* nb of matchLength codes */
111     U32  offCodeSum;             /* nb of offset codes */
112     U32  litSumBasePrice;        /* to compare to log2(litfreq) */
113     U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
114     U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
115     U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
116     ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
117     const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
118     ZSTD_literalCompressionMode_e literalCompressionMode;
119 } optState_t;
120 
121 typedef struct {
122   ZSTD_entropyCTables_t entropy;
123   U32 rep[ZSTD_REP_NUM];
124 } ZSTD_compressedBlockState_t;
125 
126 typedef struct {
127     BYTE const* nextSrc;    /* next block here to continue on current prefix */
128     BYTE const* base;       /* All regular indexes relative to this position */
129     BYTE const* dictBase;   /* extDict indexes relative to this position */
130     U32 dictLimit;          /* below that point, need extDict */
131     U32 lowLimit;           /* below that point, no more valid data */
132 } ZSTD_window_t;
133 
134 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
135 struct ZSTD_matchState_t {
136     ZSTD_window_t window;   /* State for window round buffer management */
137     U32 loadedDictEnd;      /* index of end of dictionary, within context's referential. When dict referential is copied into active context (i.e. not attached), effectively same value as dictSize, since referential starts from zero */
138     U32 nextToUpdate;       /* index from which to continue table update */
139     U32 hashLog3;           /* dispatch table : larger == faster, more memory */
140     U32* hashTable;
141     U32* hashTable3;
142     U32* chainTable;
143     optState_t opt;         /* optimal parser state */
144     const ZSTD_matchState_t* dictMatchState;
145     ZSTD_compressionParameters cParams;
146 };
147 
148 typedef struct {
149     ZSTD_compressedBlockState_t* prevCBlock;
150     ZSTD_compressedBlockState_t* nextCBlock;
151     ZSTD_matchState_t matchState;
152 } ZSTD_blockState_t;
153 
154 typedef struct {
155     U32 offset;
156     U32 checksum;
157 } ldmEntry_t;
158 
159 typedef struct {
160     ZSTD_window_t window;   /* State for the window round buffer management */
161     ldmEntry_t* hashTable;
162     BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
163     U64 hashPower;          /* Used to compute the rolling hash.
164                              * Depends on ldmParams.minMatchLength */
165 } ldmState_t;
166 
167 typedef struct {
168     U32 enableLdm;          /* 1 if enable long distance matching */
169     U32 hashLog;            /* Log size of hashTable */
170     U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
171     U32 minMatchLength;     /* Minimum match length */
172     U32 hashRateLog;       /* Log number of entries to skip */
173     U32 windowLog;          /* Window log for the LDM */
174 } ldmParams_t;
175 
176 typedef struct {
177     U32 offset;
178     U32 litLength;
179     U32 matchLength;
180 } rawSeq;
181 
182 typedef struct {
183   rawSeq* seq;     /* The start of the sequences */
184   size_t pos;      /* The position where reading stopped. <= size. */
185   size_t size;     /* The number of sequences. <= capacity. */
186   size_t capacity; /* The capacity starting from `seq` pointer */
187 } rawSeqStore_t;
188 
189 struct ZSTD_CCtx_params_s {
190     ZSTD_format_e format;
191     ZSTD_compressionParameters cParams;
192     ZSTD_frameParameters fParams;
193 
194     int compressionLevel;
195     int forceWindow;           /* force back-references to respect limit of
196                                 * 1<<wLog, even for dictionary */
197     size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
198                                 * No target when targetCBlockSize == 0.
199                                 * There is no guarantee on compressed block size */
200 
201     ZSTD_dictAttachPref_e attachDictPref;
202     ZSTD_literalCompressionMode_e literalCompressionMode;
203 
204     /* Multithreading: used to pass parameters to mtctx */
205     int nbWorkers;
206     size_t jobSize;
207     int overlapLog;
208     int rsyncable;
209 
210     /* Long distance matching parameters */
211     ldmParams_t ldmParams;
212 
213     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
214     ZSTD_customMem customMem;
215 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
216 
217 struct ZSTD_CCtx_s {
218     ZSTD_compressionStage_e stage;
219     int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
220     int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
221     ZSTD_CCtx_params requestedParams;
222     ZSTD_CCtx_params appliedParams;
223     U32   dictID;
224 
225     int workSpaceOversizedDuration;
226     void* workSpace;
227     size_t workSpaceSize;
228     size_t blockSize;
229     unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
230     unsigned long long consumedSrcSize;
231     unsigned long long producedCSize;
232     XXH64_state_t xxhState;
233     ZSTD_customMem customMem;
234     size_t staticSize;
235 
236     seqStore_t seqStore;      /* sequences storage ptrs */
237     ldmState_t ldmState;      /* long distance matching state */
238     rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
239     size_t maxNbLdmSequences;
240     rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
241     ZSTD_blockState_t blockState;
242     U32* entropyWorkspace;  /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
243 
244     /* streaming */
245     char*  inBuff;
246     size_t inBuffSize;
247     size_t inToCompress;
248     size_t inBuffPos;
249     size_t inBuffTarget;
250     char*  outBuff;
251     size_t outBuffSize;
252     size_t outBuffContentSize;
253     size_t outBuffFlushedSize;
254     ZSTD_cStreamStage streamStage;
255     U32    frameEnded;
256 
257     /* Dictionary */
258     ZSTD_localDict localDict;
259     const ZSTD_CDict* cdict;
260     ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
261 
262     /* Multi-threading */
263 #ifdef ZSTD_MULTITHREAD
264     ZSTDMT_CCtx* mtctx;
265 #endif
266 };
267 
268 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
269 
270 typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
271 
272 
273 typedef size_t (*ZSTD_blockCompressor) (
274         ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
275         void const* src, size_t srcSize);
276 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
277 
278 
279 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
280 {
281     static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
282                                        8,  9, 10, 11, 12, 13, 14, 15,
283                                       16, 16, 17, 17, 18, 18, 19, 19,
284                                       20, 20, 20, 20, 21, 21, 21, 21,
285                                       22, 22, 22, 22, 22, 22, 22, 22,
286                                       23, 23, 23, 23, 23, 23, 23, 23,
287                                       24, 24, 24, 24, 24, 24, 24, 24,
288                                       24, 24, 24, 24, 24, 24, 24, 24 };
289     static const U32 LL_deltaCode = 19;
290     return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
291 }
292 
293 /* ZSTD_MLcode() :
294  * note : mlBase = matchLength - MINMATCH;
295  *        because it's the format it's stored in seqStore->sequences */
296 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
297 {
298     static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
299                                       16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
300                                       32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
301                                       38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
302                                       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
303                                       41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
304                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
305                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
306     static const U32 ML_deltaCode = 36;
307     return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
308 }
309 
310 /* ZSTD_cParam_withinBounds:
311  * @return 1 if value is within cParam bounds,
312  * 0 otherwise */
313 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
314 {
315     ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
316     if (ZSTD_isError(bounds.error)) return 0;
317     if (value < bounds.lowerBound) return 0;
318     if (value > bounds.upperBound) return 0;
319     return 1;
320 }
321 
322 /* ZSTD_minGain() :
323  * minimum compression required
324  * to generate a compress block or a compressed literals section.
325  * note : use same formula for both situations */
326 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
327 {
328     U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
329     ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
330     assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
331     return (srcSize >> minlog) + 2;
332 }
333 
334 /*! ZSTD_storeSeq() :
335  *  Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
336  *  `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
337  *  `mlBase` : matchLength - MINMATCH
338 */
339 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
340 {
341 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
342     static const BYTE* g_start = NULL;
343     if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
344     {   U32 const pos = (U32)((const BYTE*)literals - g_start);
345         DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
346                pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
347     }
348 #endif
349     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
350     /* copy Literals */
351     assert(seqStorePtr->maxNbLit <= 128 KB);
352     assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
353     ZSTD_wildcopy(seqStorePtr->lit, literals, litLength, ZSTD_no_overlap);
354     seqStorePtr->lit += litLength;
355 
356     /* literal Length */
357     if (litLength>0xFFFF) {
358         assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
359         seqStorePtr->longLengthID = 1;
360         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
361     }
362     seqStorePtr->sequences[0].litLength = (U16)litLength;
363 
364     /* match offset */
365     seqStorePtr->sequences[0].offset = offsetCode + 1;
366 
367     /* match Length */
368     if (mlBase>0xFFFF) {
369         assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
370         seqStorePtr->longLengthID = 2;
371         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
372     }
373     seqStorePtr->sequences[0].matchLength = (U16)mlBase;
374 
375     seqStorePtr->sequences++;
376 }
377 
378 
379 /*-*************************************
380 *  Match length counter
381 ***************************************/
382 static unsigned ZSTD_NbCommonBytes (size_t val)
383 {
384     if (MEM_isLittleEndian()) {
385         if (MEM_64bits()) {
386 #       if defined(_MSC_VER) && defined(_WIN64)
387             unsigned long r = 0;
388             _BitScanForward64( &r, (U64)val );
389             return (unsigned)(r>>3);
390 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
391             return (__builtin_ctzll((U64)val) >> 3);
392 #       else
393             static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
394                                                      0, 3, 1, 3, 1, 4, 2, 7,
395                                                      0, 2, 3, 6, 1, 5, 3, 5,
396                                                      1, 3, 4, 4, 2, 5, 6, 7,
397                                                      7, 0, 1, 2, 3, 3, 4, 6,
398                                                      2, 6, 5, 5, 3, 4, 5, 6,
399                                                      7, 1, 2, 4, 6, 4, 4, 5,
400                                                      7, 2, 6, 5, 7, 6, 7, 7 };
401             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
402 #       endif
403         } else { /* 32 bits */
404 #       if defined(_MSC_VER)
405             unsigned long r=0;
406             _BitScanForward( &r, (U32)val );
407             return (unsigned)(r>>3);
408 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
409             return (__builtin_ctz((U32)val) >> 3);
410 #       else
411             static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
412                                                      3, 2, 2, 1, 3, 2, 0, 1,
413                                                      3, 3, 1, 2, 2, 2, 2, 0,
414                                                      3, 1, 2, 0, 1, 0, 1, 1 };
415             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
416 #       endif
417         }
418     } else {  /* Big Endian CPU */
419         if (MEM_64bits()) {
420 #       if defined(_MSC_VER) && defined(_WIN64)
421             unsigned long r = 0;
422             _BitScanReverse64( &r, val );
423             return (unsigned)(r>>3);
424 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
425             return (__builtin_clzll(val) >> 3);
426 #       else
427             unsigned r;
428             const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
429             if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
430             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
431             r += (!val);
432             return r;
433 #       endif
434         } else { /* 32 bits */
435 #       if defined(_MSC_VER)
436             unsigned long r = 0;
437             _BitScanReverse( &r, (unsigned long)val );
438             return (unsigned)(r>>3);
439 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
440             return (__builtin_clz((U32)val) >> 3);
441 #       else
442             unsigned r;
443             if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
444             r += (!val);
445             return r;
446 #       endif
447     }   }
448 }
449 
450 
451 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
452 {
453     const BYTE* const pStart = pIn;
454     const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
455 
456     if (pIn < pInLoopLimit) {
457         { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
458           if (diff) return ZSTD_NbCommonBytes(diff); }
459         pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
460         while (pIn < pInLoopLimit) {
461             size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
462             if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
463             pIn += ZSTD_NbCommonBytes(diff);
464             return (size_t)(pIn - pStart);
465     }   }
466     if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
467     if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
468     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
469     return (size_t)(pIn - pStart);
470 }
471 
472 /** ZSTD_count_2segments() :
473  *  can count match length with `ip` & `match` in 2 different segments.
474  *  convention : on reaching mEnd, match count continue starting from iStart
475  */
476 MEM_STATIC size_t
477 ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
478                      const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
479 {
480     const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
481     size_t const matchLength = ZSTD_count(ip, match, vEnd);
482     if (match + matchLength != mEnd) return matchLength;
483     DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
484     DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
485     DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
486     DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
487     DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
488     return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
489 }
490 
491 
492 /*-*************************************
493  *  Hashes
494  ***************************************/
495 static const U32 prime3bytes = 506832829U;
496 static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
497 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
498 
499 static const U32 prime4bytes = 2654435761U;
500 static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
501 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
502 
503 static const U64 prime5bytes = 889523592379ULL;
504 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
505 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
506 
507 static const U64 prime6bytes = 227718039650203ULL;
508 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
509 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
510 
511 static const U64 prime7bytes = 58295818150454627ULL;
512 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
513 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
514 
515 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
516 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
517 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
518 
519 MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
520 {
521     switch(mls)
522     {
523     default:
524     case 4: return ZSTD_hash4Ptr(p, hBits);
525     case 5: return ZSTD_hash5Ptr(p, hBits);
526     case 6: return ZSTD_hash6Ptr(p, hBits);
527     case 7: return ZSTD_hash7Ptr(p, hBits);
528     case 8: return ZSTD_hash8Ptr(p, hBits);
529     }
530 }
531 
532 /** ZSTD_ipow() :
533  * Return base^exponent.
534  */
535 static U64 ZSTD_ipow(U64 base, U64 exponent)
536 {
537     U64 power = 1;
538     while (exponent) {
539       if (exponent & 1) power *= base;
540       exponent >>= 1;
541       base *= base;
542     }
543     return power;
544 }
545 
546 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
547 
548 /** ZSTD_rollingHash_append() :
549  * Add the buffer to the hash value.
550  */
551 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
552 {
553     BYTE const* istart = (BYTE const*)buf;
554     size_t pos;
555     for (pos = 0; pos < size; ++pos) {
556         hash *= prime8bytes;
557         hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
558     }
559     return hash;
560 }
561 
562 /** ZSTD_rollingHash_compute() :
563  * Compute the rolling hash value of the buffer.
564  */
565 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
566 {
567     return ZSTD_rollingHash_append(0, buf, size);
568 }
569 
570 /** ZSTD_rollingHash_primePower() :
571  * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
572  * over a window of length bytes.
573  */
574 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
575 {
576     return ZSTD_ipow(prime8bytes, length - 1);
577 }
578 
579 /** ZSTD_rollingHash_rotate() :
580  * Rotate the rolling hash by one byte.
581  */
582 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
583 {
584     hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
585     hash *= prime8bytes;
586     hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
587     return hash;
588 }
589 
590 /*-*************************************
591 *  Round buffer management
592 ***************************************/
593 #if (ZSTD_WINDOWLOG_MAX_64 > 31)
594 # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
595 #endif
596 /* Max current allowed */
597 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
598 /* Maximum chunk size before overflow correction needs to be called again */
599 #define ZSTD_CHUNKSIZE_MAX                                                     \
600     ( ((U32)-1)                  /* Maximum ending current index */            \
601     - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
602 
603 /**
604  * ZSTD_window_clear():
605  * Clears the window containing the history by simply setting it to empty.
606  */
607 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
608 {
609     size_t const endT = (size_t)(window->nextSrc - window->base);
610     U32 const end = (U32)endT;
611 
612     window->lowLimit = end;
613     window->dictLimit = end;
614 }
615 
616 /**
617  * ZSTD_window_hasExtDict():
618  * Returns non-zero if the window has a non-empty extDict.
619  */
620 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
621 {
622     return window.lowLimit < window.dictLimit;
623 }
624 
625 /**
626  * ZSTD_matchState_dictMode():
627  * Inspects the provided matchState and figures out what dictMode should be
628  * passed to the compressor.
629  */
630 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
631 {
632     return ZSTD_window_hasExtDict(ms->window) ?
633         ZSTD_extDict :
634         ms->dictMatchState != NULL ?
635             ZSTD_dictMatchState :
636             ZSTD_noDict;
637 }
638 
639 /**
640  * ZSTD_window_needOverflowCorrection():
641  * Returns non-zero if the indices are getting too large and need overflow
642  * protection.
643  */
644 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
645                                                   void const* srcEnd)
646 {
647     U32 const current = (U32)((BYTE const*)srcEnd - window.base);
648     return current > ZSTD_CURRENT_MAX;
649 }
650 
651 /**
652  * ZSTD_window_correctOverflow():
653  * Reduces the indices to protect from index overflow.
654  * Returns the correction made to the indices, which must be applied to every
655  * stored index.
656  *
657  * The least significant cycleLog bits of the indices must remain the same,
658  * which may be 0. Every index up to maxDist in the past must be valid.
659  * NOTE: (maxDist & cycleMask) must be zero.
660  */
661 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
662                                            U32 maxDist, void const* src)
663 {
664     /* preemptive overflow correction:
665      * 1. correction is large enough:
666      *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
667      *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
668      *
669      *    current - newCurrent
670      *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
671      *    > (3<<29) - (1<<chainLog)
672      *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
673      *    > 1<<29
674      *
675      * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
676      *    After correction, current is less than (1<<chainLog + 1<<windowLog).
677      *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
678      *    In 32-bit mode we are safe, because (chainLog <= 29), so
679      *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
680      * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
681      *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
682      */
683     U32 const cycleMask = (1U << cycleLog) - 1;
684     U32 const current = (U32)((BYTE const*)src - window->base);
685     U32 const newCurrent = (current & cycleMask) + maxDist;
686     U32 const correction = current - newCurrent;
687     assert((maxDist & cycleMask) == 0);
688     assert(current > newCurrent);
689     /* Loose bound, should be around 1<<29 (see above) */
690     assert(correction > 1<<28);
691 
692     window->base += correction;
693     window->dictBase += correction;
694     window->lowLimit -= correction;
695     window->dictLimit -= correction;
696 
697     DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
698              window->lowLimit);
699     return correction;
700 }
701 
702 /**
703  * ZSTD_window_enforceMaxDist():
704  * Updates lowLimit so that:
705  *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
706  *
707  * It ensures index is valid as long as index >= lowLimit.
708  * This must be called before a block compression call.
709  *
710  * loadedDictEnd is only defined if a dictionary is in use for current compression.
711  * As the name implies, loadedDictEnd represents the index at end of dictionary.
712  * The value lies within context's referential, it can be directly compared to blockEndIdx.
713  *
714  * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
715  * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
716  * This is because dictionaries are allowed to be referenced fully
717  * as long as the last byte of the dictionary is in the window.
718  * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
719  *
720  * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
721  * In dictMatchState mode, lowLimit and dictLimit are the same,
722  * and the dictionary is below them.
723  * forceWindow and dictMatchState are therefore incompatible.
724  */
725 MEM_STATIC void
726 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
727                      const void* blockEnd,
728                            U32   maxDist,
729                            U32*  loadedDictEndPtr,
730                      const ZSTD_matchState_t** dictMatchStatePtr)
731 {
732     U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
733     U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
734     DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
735                 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
736 
737     /* - When there is no dictionary : loadedDictEnd == 0.
738          In which case, the test (blockEndIdx > maxDist) is merely to avoid
739          overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
740        - When there is a standard dictionary :
741          Index referential is copied from the dictionary,
742          which means it starts from 0.
743          In which case, loadedDictEnd == dictSize,
744          and it makes sense to compare `blockEndIdx > maxDist + dictSize`
745          since `blockEndIdx` also starts from zero.
746        - When there is an attached dictionary :
747          loadedDictEnd is expressed within the referential of the context,
748          so it can be directly compared against blockEndIdx.
749     */
750     if (blockEndIdx > maxDist + loadedDictEnd) {
751         U32 const newLowLimit = blockEndIdx - maxDist;
752         if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
753         if (window->dictLimit < window->lowLimit) {
754             DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
755                         (unsigned)window->dictLimit, (unsigned)window->lowLimit);
756             window->dictLimit = window->lowLimit;
757         }
758         /* On reaching window size, dictionaries are invalidated */
759         if (loadedDictEndPtr) *loadedDictEndPtr = 0;
760         if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
761     }
762 }
763 
764 /* Similar to ZSTD_window_enforceMaxDist(),
765  * but only invalidates dictionary
766  * when input progresses beyond window size. */
767 MEM_STATIC void
768 ZSTD_checkDictValidity(ZSTD_window_t* window,
769                        const void* blockEnd,
770                              U32   maxDist,
771                              U32*  loadedDictEndPtr,
772                        const ZSTD_matchState_t** dictMatchStatePtr)
773 {
774     U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
775     U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
776     DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
777                 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
778 
779     if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) {
780         /* On reaching window size, dictionaries are invalidated */
781         if (loadedDictEndPtr) *loadedDictEndPtr = 0;
782         if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
783     }
784 }
785 
786 /**
787  * ZSTD_window_update():
788  * Updates the window by appending [src, src + srcSize) to the window.
789  * If it is not contiguous, the current prefix becomes the extDict, and we
790  * forget about the extDict. Handles overlap of the prefix and extDict.
791  * Returns non-zero if the segment is contiguous.
792  */
793 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
794                                   void const* src, size_t srcSize)
795 {
796     BYTE const* const ip = (BYTE const*)src;
797     U32 contiguous = 1;
798     DEBUGLOG(5, "ZSTD_window_update");
799     /* Check if blocks follow each other */
800     if (src != window->nextSrc) {
801         /* not contiguous */
802         size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
803         DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
804         window->lowLimit = window->dictLimit;
805         assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
806         window->dictLimit = (U32)distanceFromBase;
807         window->dictBase = window->base;
808         window->base = ip - distanceFromBase;
809         // ms->nextToUpdate = window->dictLimit;
810         if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
811         contiguous = 0;
812     }
813     window->nextSrc = ip + srcSize;
814     /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
815     if ( (ip+srcSize > window->dictBase + window->lowLimit)
816        & (ip < window->dictBase + window->dictLimit)) {
817         ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
818         U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
819         window->lowLimit = lowLimitMax;
820         DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
821     }
822     return contiguous;
823 }
824 
825 
826 /* debug functions */
827 #if (DEBUGLEVEL>=2)
828 
829 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
830 {
831     U32 const fp_accuracy = 8;
832     U32 const fp_multiplier = (1 << fp_accuracy);
833     U32 const newStat = rawStat + 1;
834     U32 const hb = ZSTD_highbit32(newStat);
835     U32 const BWeight = hb * fp_multiplier;
836     U32 const FWeight = (newStat << fp_accuracy) >> hb;
837     U32 const weight = BWeight + FWeight;
838     assert(hb + fp_accuracy < 31);
839     return (double)weight / fp_multiplier;
840 }
841 
842 /* display a table content,
843  * listing each element, its frequency, and its predicted bit cost */
844 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
845 {
846     unsigned u, sum;
847     for (u=0, sum=0; u<=max; u++) sum += table[u];
848     DEBUGLOG(2, "total nb elts: %u", sum);
849     for (u=0; u<=max; u++) {
850         DEBUGLOG(2, "%2u: %5u  (%.2f)",
851                 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
852     }
853 }
854 
855 #endif
856 
857 
858 #if defined (__cplusplus)
859 }
860 #endif
861 
862 
863 /* ==============================================================
864  * Private declarations
865  * These prototypes shall only be called from within lib/compress
866  * ============================================================== */
867 
868 /* ZSTD_getCParamsFromCCtxParams() :
869  * cParams are built depending on compressionLevel, src size hints,
870  * LDM and manually set compression parameters.
871  */
872 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
873         const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
874 
875 /*! ZSTD_initCStream_internal() :
876  *  Private use only. Init streaming operation.
877  *  expects params to be valid.
878  *  must receive dict, or cdict, or none, but not both.
879  *  @return : 0, or an error code */
880 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
881                      const void* dict, size_t dictSize,
882                      const ZSTD_CDict* cdict,
883                      ZSTD_CCtx_params  params, unsigned long long pledgedSrcSize);
884 
885 void ZSTD_resetSeqStore(seqStore_t* ssPtr);
886 
887 /*! ZSTD_getCParamsFromCDict() :
888  *  as the name implies */
889 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
890 
891 /* ZSTD_compressBegin_advanced_internal() :
892  * Private use only. To be called from zstdmt_compress.c. */
893 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
894                                     const void* dict, size_t dictSize,
895                                     ZSTD_dictContentType_e dictContentType,
896                                     ZSTD_dictTableLoadMethod_e dtlm,
897                                     const ZSTD_CDict* cdict,
898                                     ZSTD_CCtx_params params,
899                                     unsigned long long pledgedSrcSize);
900 
901 /* ZSTD_compress_advanced_internal() :
902  * Private use only. To be called from zstdmt_compress.c. */
903 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
904                                        void* dst, size_t dstCapacity,
905                                  const void* src, size_t srcSize,
906                                  const void* dict,size_t dictSize,
907                                  ZSTD_CCtx_params params);
908 
909 
910 /* ZSTD_writeLastEmptyBlock() :
911  * output an empty Block with end-of-frame mark to complete a frame
912  * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
913  *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
914  */
915 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
916 
917 
918 /* ZSTD_referenceExternalSequences() :
919  * Must be called before starting a compression operation.
920  * seqs must parse a prefix of the source.
921  * This cannot be used when long range matching is enabled.
922  * Zstd will use these sequences, and pass the literals to a secondary block
923  * compressor.
924  * @return : An error code on failure.
925  * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
926  * access and data corruption.
927  */
928 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
929 
930 
931 #endif /* ZSTD_COMPRESS_H */
932