xref: /linux/lib/zstd/compress/zstd_compress_internal.h (revision e61f33273ca755b3e2ebee4520a76097199dc7a8)
1 /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
2 /*
3  * Copyright (c) Meta Platforms, Inc. and affiliates.
4  * All rights reserved.
5  *
6  * This source code is licensed under both the BSD-style license (found in the
7  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8  * in the COPYING file in the root directory of this source tree).
9  * You may select, at your option, one of the above-listed licenses.
10  */
11 
12 /* This header contains definitions
13  * that shall **only** be used by modules within lib/compress.
14  */
15 
16 #ifndef ZSTD_COMPRESS_H
17 #define ZSTD_COMPRESS_H
18 
19 /*-*************************************
20 *  Dependencies
21 ***************************************/
22 #include "../common/zstd_internal.h"
23 #include "zstd_cwksp.h"
24 #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
25 #include "zstd_preSplit.h" /* ZSTD_SLIPBLOCK_WORKSPACESIZE */
26 
27 /*-*************************************
28 *  Constants
29 ***************************************/
30 #define kSearchStrength      8
31 #define HASH_READ_SIZE       8
32 #define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
33                                        It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
34                                        It's not a big deal though : candidate will just be sorted again.
35                                        Additionally, candidate position 1 will be lost.
36                                        But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
37                                        The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy.
38                                        This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
39 
40 
41 /*-*************************************
42 *  Context memory management
43 ***************************************/
44 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
45 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
46 
47 typedef struct ZSTD_prefixDict_s {
48     const void* dict;
49     size_t dictSize;
50     ZSTD_dictContentType_e dictContentType;
51 } ZSTD_prefixDict;
52 
53 typedef struct {
54     void* dictBuffer;
55     void const* dict;
56     size_t dictSize;
57     ZSTD_dictContentType_e dictContentType;
58     ZSTD_CDict* cdict;
59 } ZSTD_localDict;
60 
61 typedef struct {
62     HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
63     HUF_repeat repeatMode;
64 } ZSTD_hufCTables_t;
65 
66 typedef struct {
67     FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
68     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
69     FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
70     FSE_repeat offcode_repeatMode;
71     FSE_repeat matchlength_repeatMode;
72     FSE_repeat litlength_repeatMode;
73 } ZSTD_fseCTables_t;
74 
75 typedef struct {
76     ZSTD_hufCTables_t huf;
77     ZSTD_fseCTables_t fse;
78 } ZSTD_entropyCTables_t;
79 
80 /* *********************************************
81 *  Sequences *
82 ***********************************************/
83 typedef struct SeqDef_s {
84     U32 offBase;   /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
85     U16 litLength;
86     U16 mlBase;    /* mlBase == matchLength - MINMATCH */
87 } SeqDef;
88 
89 /* Controls whether seqStore has a single "long" litLength or matchLength. See SeqStore_t. */
90 typedef enum {
91     ZSTD_llt_none = 0,             /* no longLengthType */
92     ZSTD_llt_literalLength = 1,    /* represents a long literal */
93     ZSTD_llt_matchLength = 2       /* represents a long match */
94 } ZSTD_longLengthType_e;
95 
96 typedef struct {
97     SeqDef* sequencesStart;
98     SeqDef* sequences;      /* ptr to end of sequences */
99     BYTE*  litStart;
100     BYTE*  lit;             /* ptr to end of literals */
101     BYTE*  llCode;
102     BYTE*  mlCode;
103     BYTE*  ofCode;
104     size_t maxNbSeq;
105     size_t maxNbLit;
106 
107     /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
108      * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
109      * the existing value of the litLength or matchLength by 0x10000.
110      */
111     ZSTD_longLengthType_e longLengthType;
112     U32                   longLengthPos;  /* Index of the sequence to apply long length modification to */
113 } SeqStore_t;
114 
115 typedef struct {
116     U32 litLength;
117     U32 matchLength;
118 } ZSTD_SequenceLength;
119 
120 /*
121  * Returns the ZSTD_SequenceLength for the given sequences. It handles the decoding of long sequences
122  * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
123  */
ZSTD_getSequenceLength(SeqStore_t const * seqStore,SeqDef const * seq)124 MEM_STATIC ZSTD_SequenceLength ZSTD_getSequenceLength(SeqStore_t const* seqStore, SeqDef const* seq)
125 {
126     ZSTD_SequenceLength seqLen;
127     seqLen.litLength = seq->litLength;
128     seqLen.matchLength = seq->mlBase + MINMATCH;
129     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
130         if (seqStore->longLengthType == ZSTD_llt_literalLength) {
131             seqLen.litLength += 0x10000;
132         }
133         if (seqStore->longLengthType == ZSTD_llt_matchLength) {
134             seqLen.matchLength += 0x10000;
135         }
136     }
137     return seqLen;
138 }
139 
140 const SeqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
141 int ZSTD_seqToCodes(const SeqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
142 
143 
144 /* *********************************************
145 *  Entropy buffer statistics structs and funcs *
146 ***********************************************/
147 /* ZSTD_hufCTablesMetadata_t :
148  *  Stores Literals Block Type for a super-block in hType, and
149  *  huffman tree description in hufDesBuffer.
150  *  hufDesSize refers to the size of huffman tree description in bytes.
151  *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
152 typedef struct {
153     SymbolEncodingType_e hType;
154     BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
155     size_t hufDesSize;
156 } ZSTD_hufCTablesMetadata_t;
157 
158 /* ZSTD_fseCTablesMetadata_t :
159  *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
160  *  fse tables in fseTablesBuffer.
161  *  fseTablesSize refers to the size of fse tables in bytes.
162  *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
163 typedef struct {
164     SymbolEncodingType_e llType;
165     SymbolEncodingType_e ofType;
166     SymbolEncodingType_e mlType;
167     BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
168     size_t fseTablesSize;
169     size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
170 } ZSTD_fseCTablesMetadata_t;
171 
172 typedef struct {
173     ZSTD_hufCTablesMetadata_t hufMetadata;
174     ZSTD_fseCTablesMetadata_t fseMetadata;
175 } ZSTD_entropyCTablesMetadata_t;
176 
177 /* ZSTD_buildBlockEntropyStats() :
178  *  Builds entropy for the block.
179  *  @return : 0 on success or error code */
180 size_t ZSTD_buildBlockEntropyStats(
181                     const SeqStore_t* seqStorePtr,
182                     const ZSTD_entropyCTables_t* prevEntropy,
183                           ZSTD_entropyCTables_t* nextEntropy,
184                     const ZSTD_CCtx_params* cctxParams,
185                           ZSTD_entropyCTablesMetadata_t* entropyMetadata,
186                           void* workspace, size_t wkspSize);
187 
188 /* *******************************
189 *  Compression internals structs *
190 *********************************/
191 
192 typedef struct {
193     U32 off;            /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
194     U32 len;            /* Raw length of match */
195 } ZSTD_match_t;
196 
197 typedef struct {
198     U32 offset;         /* Offset of sequence */
199     U32 litLength;      /* Length of literals prior to match */
200     U32 matchLength;    /* Raw length of match */
201 } rawSeq;
202 
203 typedef struct {
204   rawSeq* seq;          /* The start of the sequences */
205   size_t pos;           /* The index in seq where reading stopped. pos <= size. */
206   size_t posInSequence; /* The position within the sequence at seq[pos] where reading
207                            stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
208   size_t size;          /* The number of sequences. <= capacity. */
209   size_t capacity;      /* The capacity starting from `seq` pointer */
210 } RawSeqStore_t;
211 
212 UNUSED_ATTR static const RawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
213 
214 typedef struct {
215     int price;  /* price from beginning of segment to this position */
216     U32 off;    /* offset of previous match */
217     U32 mlen;   /* length of previous match */
218     U32 litlen; /* nb of literals since previous match */
219     U32 rep[ZSTD_REP_NUM];  /* offset history after previous match */
220 } ZSTD_optimal_t;
221 
222 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
223 
224 #define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3)
225 typedef struct {
226     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
227     unsigned* litFreq;           /* table of literals statistics, of size 256 */
228     unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
229     unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
230     unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
231     ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_SIZE */
232     ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */
233 
234     U32  litSum;                 /* nb of literals */
235     U32  litLengthSum;           /* nb of litLength codes */
236     U32  matchLengthSum;         /* nb of matchLength codes */
237     U32  offCodeSum;             /* nb of offset codes */
238     U32  litSumBasePrice;        /* to compare to log2(litfreq) */
239     U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
240     U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
241     U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
242     ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
243     const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
244     ZSTD_ParamSwitch_e literalCompressionMode;
245 } optState_t;
246 
247 typedef struct {
248   ZSTD_entropyCTables_t entropy;
249   U32 rep[ZSTD_REP_NUM];
250 } ZSTD_compressedBlockState_t;
251 
252 typedef struct {
253     BYTE const* nextSrc;       /* next block here to continue on current prefix */
254     BYTE const* base;          /* All regular indexes relative to this position */
255     BYTE const* dictBase;      /* extDict indexes relative to this position */
256     U32 dictLimit;             /* below that point, need extDict */
257     U32 lowLimit;              /* below that point, no more valid data */
258     U32 nbOverflowCorrections; /* Number of times overflow correction has run since
259                                 * ZSTD_window_init(). Useful for debugging coredumps
260                                 * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
261                                 */
262 } ZSTD_window_t;
263 
264 #define ZSTD_WINDOW_START_INDEX 2
265 
266 typedef struct ZSTD_MatchState_t ZSTD_MatchState_t;
267 
268 #define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
269 
270 struct ZSTD_MatchState_t {
271     ZSTD_window_t window;   /* State for window round buffer management */
272     U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
273                              * When loadedDictEnd != 0, a dictionary is in use, and still valid.
274                              * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
275                              * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
276                              * When dict referential is copied into active context (i.e. not attached),
277                              * loadedDictEnd == dictSize, since referential starts from zero.
278                              */
279     U32 nextToUpdate;       /* index from which to continue table update */
280     U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
281 
282     U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
283     BYTE* tagTable;                          /* For row-based matchFinder: A row-based table containing the hashes and head index. */
284     U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
285     U64 hashSalt;                            /* For row-based matchFinder: salts the hash for reuse of tag table */
286     U32 hashSaltEntropy;                     /* For row-based matchFinder: collects entropy for salt generation */
287 
288     U32* hashTable;
289     U32* hashTable3;
290     U32* chainTable;
291 
292     int forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
293 
294     int dedicatedDictSearch;  /* Indicates whether this matchState is using the
295                                * dedicated dictionary search structure.
296                                */
297     optState_t opt;         /* optimal parser state */
298     const ZSTD_MatchState_t* dictMatchState;
299     ZSTD_compressionParameters cParams;
300     const RawSeqStore_t* ldmSeqStore;
301 
302     /* Controls prefetching in some dictMatchState matchfinders.
303      * This behavior is controlled from the cctx ms.
304      * This parameter has no effect in the cdict ms. */
305     int prefetchCDictTables;
306 
307     /* When == 0, lazy match finders insert every position.
308      * When != 0, lazy match finders only insert positions they search.
309      * This allows them to skip much faster over incompressible data,
310      * at a small cost to compression ratio.
311      */
312     int lazySkipping;
313 };
314 
315 typedef struct {
316     ZSTD_compressedBlockState_t* prevCBlock;
317     ZSTD_compressedBlockState_t* nextCBlock;
318     ZSTD_MatchState_t matchState;
319 } ZSTD_blockState_t;
320 
321 typedef struct {
322     U32 offset;
323     U32 checksum;
324 } ldmEntry_t;
325 
326 typedef struct {
327     BYTE const* split;
328     U32 hash;
329     U32 checksum;
330     ldmEntry_t* bucket;
331 } ldmMatchCandidate_t;
332 
333 #define LDM_BATCH_SIZE 64
334 
335 typedef struct {
336     ZSTD_window_t window;   /* State for the window round buffer management */
337     ldmEntry_t* hashTable;
338     U32 loadedDictEnd;
339     BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
340     size_t splitIndices[LDM_BATCH_SIZE];
341     ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
342 } ldmState_t;
343 
344 typedef struct {
345     ZSTD_ParamSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
346     U32 hashLog;            /* Log size of hashTable */
347     U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
348     U32 minMatchLength;     /* Minimum match length */
349     U32 hashRateLog;       /* Log number of entries to skip */
350     U32 windowLog;          /* Window log for the LDM */
351 } ldmParams_t;
352 
353 typedef struct {
354     int collectSequences;
355     ZSTD_Sequence* seqStart;
356     size_t seqIndex;
357     size_t maxSequences;
358 } SeqCollector;
359 
360 struct ZSTD_CCtx_params_s {
361     ZSTD_format_e format;
362     ZSTD_compressionParameters cParams;
363     ZSTD_frameParameters fParams;
364 
365     int compressionLevel;
366     int forceWindow;           /* force back-references to respect limit of
367                                 * 1<<wLog, even for dictionary */
368     size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
369                                 * No target when targetCBlockSize == 0.
370                                 * There is no guarantee on compressed block size */
371     int srcSizeHint;           /* User's best guess of source size.
372                                 * Hint is not valid when srcSizeHint == 0.
373                                 * There is no guarantee that hint is close to actual source size */
374 
375     ZSTD_dictAttachPref_e attachDictPref;
376     ZSTD_ParamSwitch_e literalCompressionMode;
377 
378     /* Multithreading: used to pass parameters to mtctx */
379     int nbWorkers;
380     size_t jobSize;
381     int overlapLog;
382     int rsyncable;
383 
384     /* Long distance matching parameters */
385     ldmParams_t ldmParams;
386 
387     /* Dedicated dict search algorithm trigger */
388     int enableDedicatedDictSearch;
389 
390     /* Input/output buffer modes */
391     ZSTD_bufferMode_e inBufferMode;
392     ZSTD_bufferMode_e outBufferMode;
393 
394     /* Sequence compression API */
395     ZSTD_SequenceFormat_e blockDelimiters;
396     int validateSequences;
397 
398     /* Block splitting
399      * @postBlockSplitter executes split analysis after sequences are produced,
400      * it's more accurate but consumes more resources.
401      * @preBlockSplitter_level splits before knowing sequences,
402      * it's more approximative but also cheaper.
403      * Valid @preBlockSplitter_level values range from 0 to 6 (included).
404      * 0 means auto, 1 means do not split,
405      * then levels are sorted in increasing cpu budget, from 2 (fastest) to 6 (slowest).
406      * Highest @preBlockSplitter_level combines well with @postBlockSplitter.
407      */
408     ZSTD_ParamSwitch_e postBlockSplitter;
409     int preBlockSplitter_level;
410 
411     /* Adjust the max block size*/
412     size_t maxBlockSize;
413 
414     /* Param for deciding whether to use row-based matchfinder */
415     ZSTD_ParamSwitch_e useRowMatchFinder;
416 
417     /* Always load a dictionary in ext-dict mode (not prefix mode)? */
418     int deterministicRefPrefix;
419 
420     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
421     ZSTD_customMem customMem;
422 
423     /* Controls prefetching in some dictMatchState matchfinders */
424     ZSTD_ParamSwitch_e prefetchCDictTables;
425 
426     /* Controls whether zstd will fall back to an internal matchfinder
427      * if the external matchfinder returns an error code. */
428     int enableMatchFinderFallback;
429 
430     /* Parameters for the external sequence producer API.
431      * Users set these parameters through ZSTD_registerSequenceProducer().
432      * It is not possible to set these parameters individually through the public API. */
433     void* extSeqProdState;
434     ZSTD_sequenceProducer_F extSeqProdFunc;
435 
436     /* Controls repcode search in external sequence parsing */
437     ZSTD_ParamSwitch_e searchForExternalRepcodes;
438 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
439 
440 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
441 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
442 #define TMP_WORKSPACE_SIZE (MAX(ENTROPY_WORKSPACE_SIZE, ZSTD_SLIPBLOCK_WORKSPACESIZE))
443 
444 /*
445  * Indicates whether this compression proceeds directly from user-provided
446  * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
447  * whether the context needs to buffer the input/output (ZSTDb_buffered).
448  */
449 typedef enum {
450     ZSTDb_not_buffered,
451     ZSTDb_buffered
452 } ZSTD_buffered_policy_e;
453 
454 /*
455  * Struct that contains all elements of block splitter that should be allocated
456  * in a wksp.
457  */
458 #define ZSTD_MAX_NB_BLOCK_SPLITS 196
459 typedef struct {
460     SeqStore_t fullSeqStoreChunk;
461     SeqStore_t firstHalfSeqStore;
462     SeqStore_t secondHalfSeqStore;
463     SeqStore_t currSeqStore;
464     SeqStore_t nextSeqStore;
465 
466     U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
467     ZSTD_entropyCTablesMetadata_t entropyMetadata;
468 } ZSTD_blockSplitCtx;
469 
470 struct ZSTD_CCtx_s {
471     ZSTD_compressionStage_e stage;
472     int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
473     int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
474     ZSTD_CCtx_params requestedParams;
475     ZSTD_CCtx_params appliedParams;
476     ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
477     U32   dictID;
478     size_t dictContentSize;
479 
480     ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
481     size_t blockSizeMax;
482     unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
483     unsigned long long consumedSrcSize;
484     unsigned long long producedCSize;
485     struct xxh64_state xxhState;
486     ZSTD_customMem customMem;
487     ZSTD_threadPool* pool;
488     size_t staticSize;
489     SeqCollector seqCollector;
490     int isFirstBlock;
491     int initialized;
492 
493     SeqStore_t seqStore;      /* sequences storage ptrs */
494     ldmState_t ldmState;      /* long distance matching state */
495     rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
496     size_t maxNbLdmSequences;
497     RawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
498     ZSTD_blockState_t blockState;
499     void* tmpWorkspace;  /* used as substitute of stack space - must be aligned for S64 type */
500     size_t tmpWkspSize;
501 
502     /* Whether we are streaming or not */
503     ZSTD_buffered_policy_e bufferedPolicy;
504 
505     /* streaming */
506     char*  inBuff;
507     size_t inBuffSize;
508     size_t inToCompress;
509     size_t inBuffPos;
510     size_t inBuffTarget;
511     char*  outBuff;
512     size_t outBuffSize;
513     size_t outBuffContentSize;
514     size_t outBuffFlushedSize;
515     ZSTD_cStreamStage streamStage;
516     U32    frameEnded;
517 
518     /* Stable in/out buffer verification */
519     ZSTD_inBuffer expectedInBuffer;
520     size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */
521     size_t expectedOutBufferSize;
522 
523     /* Dictionary */
524     ZSTD_localDict localDict;
525     const ZSTD_CDict* cdict;
526     ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
527 
528     /* Multi-threading */
529 
530     /* Tracing */
531 
532     /* Workspace for block splitter */
533     ZSTD_blockSplitCtx blockSplitCtx;
534 
535     /* Buffer for output from external sequence producer */
536     ZSTD_Sequence* extSeqBuf;
537     size_t extSeqBufCapacity;
538 };
539 
540 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
541 typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e;
542 
543 typedef enum {
544     ZSTD_noDict = 0,
545     ZSTD_extDict = 1,
546     ZSTD_dictMatchState = 2,
547     ZSTD_dedicatedDictSearch = 3
548 } ZSTD_dictMode_e;
549 
550 typedef enum {
551     ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
552                                  * In this mode we use both the srcSize and the dictSize
553                                  * when selecting and adjusting parameters.
554                                  */
555     ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
556                                  * In this mode we only take the srcSize into account when selecting
557                                  * and adjusting parameters.
558                                  */
559     ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
560                                  * In this mode we take both the source size and the dictionary size
561                                  * into account when selecting and adjusting the parameters.
562                                  */
563     ZSTD_cpm_unknown = 3        /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
564                                  * We don't know what these parameters are for. We default to the legacy
565                                  * behavior of taking both the source size and the dict size into account
566                                  * when selecting and adjusting parameters.
567                                  */
568 } ZSTD_CParamMode_e;
569 
570 typedef size_t (*ZSTD_BlockCompressor_f) (
571         ZSTD_MatchState_t* bs, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
572         void const* src, size_t srcSize);
573 ZSTD_BlockCompressor_f ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_ParamSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
574 
575 
ZSTD_LLcode(U32 litLength)576 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
577 {
578     static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
579                                        8,  9, 10, 11, 12, 13, 14, 15,
580                                       16, 16, 17, 17, 18, 18, 19, 19,
581                                       20, 20, 20, 20, 21, 21, 21, 21,
582                                       22, 22, 22, 22, 22, 22, 22, 22,
583                                       23, 23, 23, 23, 23, 23, 23, 23,
584                                       24, 24, 24, 24, 24, 24, 24, 24,
585                                       24, 24, 24, 24, 24, 24, 24, 24 };
586     static const U32 LL_deltaCode = 19;
587     return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
588 }
589 
590 /* ZSTD_MLcode() :
591  * note : mlBase = matchLength - MINMATCH;
592  *        because it's the format it's stored in seqStore->sequences */
ZSTD_MLcode(U32 mlBase)593 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
594 {
595     static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
596                                       16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
597                                       32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
598                                       38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
599                                       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
600                                       41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
601                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
602                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
603     static const U32 ML_deltaCode = 36;
604     return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
605 }
606 
607 /* ZSTD_cParam_withinBounds:
608  * @return 1 if value is within cParam bounds,
609  * 0 otherwise */
ZSTD_cParam_withinBounds(ZSTD_cParameter cParam,int value)610 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
611 {
612     ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
613     if (ZSTD_isError(bounds.error)) return 0;
614     if (value < bounds.lowerBound) return 0;
615     if (value > bounds.upperBound) return 0;
616     return 1;
617 }
618 
619 /* ZSTD_selectAddr:
620  * @return index >= lowLimit ? candidate : backup,
621  * tries to force branchless codegen. */
622 MEM_STATIC const BYTE*
ZSTD_selectAddr(U32 index,U32 lowLimit,const BYTE * candidate,const BYTE * backup)623 ZSTD_selectAddr(U32 index, U32 lowLimit, const BYTE* candidate, const BYTE* backup)
624 {
625 #if defined(__x86_64__)
626     __asm__ (
627         "cmp %1, %2\n"
628         "cmova %3, %0\n"
629         : "+r"(candidate)
630         : "r"(index), "r"(lowLimit), "r"(backup)
631         );
632     return candidate;
633 #else
634     return index >= lowLimit ? candidate : backup;
635 #endif
636 }
637 
638 /* ZSTD_noCompressBlock() :
639  * Writes uncompressed block to dst buffer from given src.
640  * Returns the size of the block */
641 MEM_STATIC size_t
ZSTD_noCompressBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)642 ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
643 {
644     U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
645     DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity);
646     RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
647                     dstSize_tooSmall, "dst buf too small for uncompressed block");
648     MEM_writeLE24(dst, cBlockHeader24);
649     ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
650     return ZSTD_blockHeaderSize + srcSize;
651 }
652 
653 MEM_STATIC size_t
ZSTD_rleCompressBlock(void * dst,size_t dstCapacity,BYTE src,size_t srcSize,U32 lastBlock)654 ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
655 {
656     BYTE* const op = (BYTE*)dst;
657     U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
658     RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
659     MEM_writeLE24(op, cBlockHeader);
660     op[3] = src;
661     return 4;
662 }
663 
664 
665 /* ZSTD_minGain() :
666  * minimum compression required
667  * to generate a compress block or a compressed literals section.
668  * note : use same formula for both situations */
ZSTD_minGain(size_t srcSize,ZSTD_strategy strat)669 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
670 {
671     U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
672     ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
673     assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat));
674     return (srcSize >> minlog) + 2;
675 }
676 
ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params * cctxParams)677 MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
678 {
679     switch (cctxParams->literalCompressionMode) {
680     case ZSTD_ps_enable:
681         return 0;
682     case ZSTD_ps_disable:
683         return 1;
684     default:
685         assert(0 /* impossible: pre-validated */);
686         ZSTD_FALLTHROUGH;
687     case ZSTD_ps_auto:
688         return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
689     }
690 }
691 
692 /*! ZSTD_safecopyLiterals() :
693  *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
694  *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
695  *  large copies.
696  */
697 static void
ZSTD_safecopyLiterals(BYTE * op,BYTE const * ip,BYTE const * const iend,BYTE const * ilimit_w)698 ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
699 {
700     assert(iend > ilimit_w);
701     if (ip <= ilimit_w) {
702         ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
703         op += ilimit_w - ip;
704         ip = ilimit_w;
705     }
706     while (ip < iend) *op++ = *ip++;
707 }
708 
709 
710 #define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
711 #define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
712 #define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
713 #define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */
714 #define OFFSET_TO_OFFBASE(o)  (assert((o)>0), o + ZSTD_REP_NUM)
715 #define OFFBASE_IS_OFFSET(o)  ((o) > ZSTD_REP_NUM)
716 #define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
717 #define OFFBASE_TO_OFFSET(o)  (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
718 #define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o))  /* returns ID 1,2,3 */
719 
720 /*! ZSTD_storeSeqOnly() :
721  *  Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t.
722  *  Literals themselves are not copied, but @litPtr is updated.
723  *  @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
724  *  @matchLength : must be >= MINMATCH
725 */
726 HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeqOnly(SeqStore_t * seqStorePtr,size_t litLength,U32 offBase,size_t matchLength)727 ZSTD_storeSeqOnly(SeqStore_t* seqStorePtr,
728               size_t litLength,
729               U32 offBase,
730               size_t matchLength)
731 {
732     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
733 
734     /* literal Length */
735     assert(litLength <= ZSTD_BLOCKSIZE_MAX);
736     if (UNLIKELY(litLength>0xFFFF)) {
737         assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
738         seqStorePtr->longLengthType = ZSTD_llt_literalLength;
739         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
740     }
741     seqStorePtr->sequences[0].litLength = (U16)litLength;
742 
743     /* match offset */
744     seqStorePtr->sequences[0].offBase = offBase;
745 
746     /* match Length */
747     assert(matchLength <= ZSTD_BLOCKSIZE_MAX);
748     assert(matchLength >= MINMATCH);
749     {   size_t const mlBase = matchLength - MINMATCH;
750         if (UNLIKELY(mlBase>0xFFFF)) {
751             assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
752             seqStorePtr->longLengthType = ZSTD_llt_matchLength;
753             seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
754         }
755         seqStorePtr->sequences[0].mlBase = (U16)mlBase;
756     }
757 
758     seqStorePtr->sequences++;
759 }
760 
761 /*! ZSTD_storeSeq() :
762  *  Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t.
763  *  @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
764  *  @matchLength : must be >= MINMATCH
765  *  Allowed to over-read literals up to litLimit.
766 */
767 HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeq(SeqStore_t * seqStorePtr,size_t litLength,const BYTE * literals,const BYTE * litLimit,U32 offBase,size_t matchLength)768 ZSTD_storeSeq(SeqStore_t* seqStorePtr,
769               size_t litLength, const BYTE* literals, const BYTE* litLimit,
770               U32 offBase,
771               size_t matchLength)
772 {
773     BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
774     BYTE const* const litEnd = literals + litLength;
775 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
776     static const BYTE* g_start = NULL;
777     if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
778     {   U32 const pos = (U32)((const BYTE*)literals - g_start);
779         DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
780                pos, (U32)litLength, (U32)matchLength, (U32)offBase);
781     }
782 #endif
783     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
784     /* copy Literals */
785     assert(seqStorePtr->maxNbLit <= 128 KB);
786     assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
787     assert(literals + litLength <= litLimit);
788     if (litEnd <= litLimit_w) {
789         /* Common case we can use wildcopy.
790          * First copy 16 bytes, because literals are likely short.
791          */
792         ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16);
793         ZSTD_copy16(seqStorePtr->lit, literals);
794         if (litLength > 16) {
795             ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
796         }
797     } else {
798         ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
799     }
800     seqStorePtr->lit += litLength;
801 
802     ZSTD_storeSeqOnly(seqStorePtr, litLength, offBase, matchLength);
803 }
804 
805 /* ZSTD_updateRep() :
806  * updates in-place @rep (array of repeat offsets)
807  * @offBase : sum-type, using numeric representation of ZSTD_storeSeq()
808  */
809 MEM_STATIC void
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM],U32 const offBase,U32 const ll0)810 ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
811 {
812     if (OFFBASE_IS_OFFSET(offBase)) {  /* full offset */
813         rep[2] = rep[1];
814         rep[1] = rep[0];
815         rep[0] = OFFBASE_TO_OFFSET(offBase);
816     } else {   /* repcode */
817         U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;
818         if (repCode > 0) {  /* note : if repCode==0, no change */
819             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
820             rep[2] = (repCode >= 2) ? rep[1] : rep[2];
821             rep[1] = rep[0];
822             rep[0] = currentOffset;
823         } else {   /* repCode == 0 */
824             /* nothing to do */
825         }
826     }
827 }
828 
829 typedef struct repcodes_s {
830     U32 rep[3];
831 } Repcodes_t;
832 
833 MEM_STATIC Repcodes_t
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM],U32 const offBase,U32 const ll0)834 ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
835 {
836     Repcodes_t newReps;
837     ZSTD_memcpy(&newReps, rep, sizeof(newReps));
838     ZSTD_updateRep(newReps.rep, offBase, ll0);
839     return newReps;
840 }
841 
842 
843 /*-*************************************
844 *  Match length counter
845 ***************************************/
ZSTD_count(const BYTE * pIn,const BYTE * pMatch,const BYTE * const pInLimit)846 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
847 {
848     const BYTE* const pStart = pIn;
849     const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
850 
851     if (pIn < pInLoopLimit) {
852         { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
853           if (diff) return ZSTD_NbCommonBytes(diff); }
854         pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
855         while (pIn < pInLoopLimit) {
856             size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
857             if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
858             pIn += ZSTD_NbCommonBytes(diff);
859             return (size_t)(pIn - pStart);
860     }   }
861     if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
862     if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
863     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
864     return (size_t)(pIn - pStart);
865 }
866 
867 /* ZSTD_count_2segments() :
868  *  can count match length with `ip` & `match` in 2 different segments.
869  *  convention : on reaching mEnd, match count continue starting from iStart
870  */
871 MEM_STATIC size_t
ZSTD_count_2segments(const BYTE * ip,const BYTE * match,const BYTE * iEnd,const BYTE * mEnd,const BYTE * iStart)872 ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
873                      const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
874 {
875     const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
876     size_t const matchLength = ZSTD_count(ip, match, vEnd);
877     if (match + matchLength != mEnd) return matchLength;
878     DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
879     DEBUGLOG(7, "distance from match beginning to end dictionary = %i", (int)(mEnd - match));
880     DEBUGLOG(7, "distance from current pos to end buffer = %i", (int)(iEnd - ip));
881     DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
882     DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
883     return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
884 }
885 
886 
887 /*-*************************************
888  *  Hashes
889  ***************************************/
890 static const U32 prime3bytes = 506832829U;
ZSTD_hash3(U32 u,U32 h,U32 s)891 static U32    ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s)  >> (32-h) ; }
ZSTD_hash3Ptr(const void * ptr,U32 h)892 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
ZSTD_hash3PtrS(const void * ptr,U32 h,U32 s)893 MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }
894 
895 static const U32 prime4bytes = 2654435761U;
ZSTD_hash4(U32 u,U32 h,U32 s)896 static U32    ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
ZSTD_hash4Ptr(const void * ptr,U32 h)897 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
ZSTD_hash4PtrS(const void * ptr,U32 h,U32 s)898 static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }
899 
900 static const U64 prime5bytes = 889523592379ULL;
ZSTD_hash5(U64 u,U32 h,U64 s)901 static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
ZSTD_hash5Ptr(const void * p,U32 h)902 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
ZSTD_hash5PtrS(const void * p,U32 h,U64 s)903 static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }
904 
905 static const U64 prime6bytes = 227718039650203ULL;
ZSTD_hash6(U64 u,U32 h,U64 s)906 static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
ZSTD_hash6Ptr(const void * p,U32 h)907 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
ZSTD_hash6PtrS(const void * p,U32 h,U64 s)908 static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }
909 
910 static const U64 prime7bytes = 58295818150454627ULL;
ZSTD_hash7(U64 u,U32 h,U64 s)911 static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
ZSTD_hash7Ptr(const void * p,U32 h)912 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
ZSTD_hash7PtrS(const void * p,U32 h,U64 s)913 static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }
914 
915 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
ZSTD_hash8(U64 u,U32 h,U64 s)916 static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes)  ^ s) >> (64-h)) ; }
ZSTD_hash8Ptr(const void * p,U32 h)917 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
ZSTD_hash8PtrS(const void * p,U32 h,U64 s)918 static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }
919 
920 
921 MEM_STATIC FORCE_INLINE_ATTR
ZSTD_hashPtr(const void * p,U32 hBits,U32 mls)922 size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
923 {
924     /* Although some of these hashes do support hBits up to 64, some do not.
925      * To be on the safe side, always avoid hBits > 32. */
926     assert(hBits <= 32);
927 
928     switch(mls)
929     {
930     default:
931     case 4: return ZSTD_hash4Ptr(p, hBits);
932     case 5: return ZSTD_hash5Ptr(p, hBits);
933     case 6: return ZSTD_hash6Ptr(p, hBits);
934     case 7: return ZSTD_hash7Ptr(p, hBits);
935     case 8: return ZSTD_hash8Ptr(p, hBits);
936     }
937 }
938 
939 MEM_STATIC FORCE_INLINE_ATTR
ZSTD_hashPtrSalted(const void * p,U32 hBits,U32 mls,const U64 hashSalt)940 size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
941     /* Although some of these hashes do support hBits up to 64, some do not.
942      * To be on the safe side, always avoid hBits > 32. */
943     assert(hBits <= 32);
944 
945     switch(mls)
946     {
947         default:
948         case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
949         case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
950         case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
951         case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
952         case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
953     }
954 }
955 
956 
957 /* ZSTD_ipow() :
958  * Return base^exponent.
959  */
ZSTD_ipow(U64 base,U64 exponent)960 static U64 ZSTD_ipow(U64 base, U64 exponent)
961 {
962     U64 power = 1;
963     while (exponent) {
964       if (exponent & 1) power *= base;
965       exponent >>= 1;
966       base *= base;
967     }
968     return power;
969 }
970 
971 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
972 
973 /* ZSTD_rollingHash_append() :
974  * Add the buffer to the hash value.
975  */
ZSTD_rollingHash_append(U64 hash,void const * buf,size_t size)976 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
977 {
978     BYTE const* istart = (BYTE const*)buf;
979     size_t pos;
980     for (pos = 0; pos < size; ++pos) {
981         hash *= prime8bytes;
982         hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
983     }
984     return hash;
985 }
986 
987 /* ZSTD_rollingHash_compute() :
988  * Compute the rolling hash value of the buffer.
989  */
ZSTD_rollingHash_compute(void const * buf,size_t size)990 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
991 {
992     return ZSTD_rollingHash_append(0, buf, size);
993 }
994 
995 /* ZSTD_rollingHash_primePower() :
996  * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
997  * over a window of length bytes.
998  */
ZSTD_rollingHash_primePower(U32 length)999 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
1000 {
1001     return ZSTD_ipow(prime8bytes, length - 1);
1002 }
1003 
1004 /* ZSTD_rollingHash_rotate() :
1005  * Rotate the rolling hash by one byte.
1006  */
ZSTD_rollingHash_rotate(U64 hash,BYTE toRemove,BYTE toAdd,U64 primePower)1007 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
1008 {
1009     hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
1010     hash *= prime8bytes;
1011     hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
1012     return hash;
1013 }
1014 
1015 /*-*************************************
1016 *  Round buffer management
1017 ***************************************/
1018 /* Max @current value allowed:
1019  * In 32-bit mode: we want to avoid crossing the 2 GB limit,
1020  *                 reducing risks of side effects in case of signed operations on indexes.
1021  * In 64-bit mode: we want to ensure that adding the maximum job size (512 MB)
1022  *                 doesn't overflow U32 index capacity (4 GB) */
1023 #define ZSTD_CURRENT_MAX (MEM_64bits() ? 3500U MB : 2000U MB)
1024 /* Maximum chunk size before overflow correction needs to be called again */
1025 #define ZSTD_CHUNKSIZE_MAX                                                     \
1026     ( ((U32)-1)                  /* Maximum ending current index */            \
1027     - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
1028 
1029 /*
1030  * ZSTD_window_clear():
1031  * Clears the window containing the history by simply setting it to empty.
1032  */
ZSTD_window_clear(ZSTD_window_t * window)1033 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
1034 {
1035     size_t const endT = (size_t)(window->nextSrc - window->base);
1036     U32 const end = (U32)endT;
1037 
1038     window->lowLimit = end;
1039     window->dictLimit = end;
1040 }
1041 
ZSTD_window_isEmpty(ZSTD_window_t const window)1042 MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
1043 {
1044     return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
1045            window.lowLimit == ZSTD_WINDOW_START_INDEX &&
1046            (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
1047 }
1048 
1049 /*
1050  * ZSTD_window_hasExtDict():
1051  * Returns non-zero if the window has a non-empty extDict.
1052  */
ZSTD_window_hasExtDict(ZSTD_window_t const window)1053 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
1054 {
1055     return window.lowLimit < window.dictLimit;
1056 }
1057 
1058 /*
1059  * ZSTD_matchState_dictMode():
1060  * Inspects the provided matchState and figures out what dictMode should be
1061  * passed to the compressor.
1062  */
ZSTD_matchState_dictMode(const ZSTD_MatchState_t * ms)1063 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_MatchState_t *ms)
1064 {
1065     return ZSTD_window_hasExtDict(ms->window) ?
1066         ZSTD_extDict :
1067         ms->dictMatchState != NULL ?
1068             (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
1069             ZSTD_noDict;
1070 }
1071 
1072 /* Defining this macro to non-zero tells zstd to run the overflow correction
1073  * code much more frequently. This is very inefficient, and should only be
1074  * used for tests and fuzzers.
1075  */
1076 #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
1077 #  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1078 #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
1079 #  else
1080 #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
1081 #  endif
1082 #endif
1083 
1084 /*
1085  * ZSTD_window_canOverflowCorrect():
1086  * Returns non-zero if the indices are large enough for overflow correction
1087  * to work correctly without impacting compression ratio.
1088  */
ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src)1089 MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
1090                                               U32 cycleLog,
1091                                               U32 maxDist,
1092                                               U32 loadedDictEnd,
1093                                               void const* src)
1094 {
1095     U32 const cycleSize = 1u << cycleLog;
1096     U32 const curr = (U32)((BYTE const*)src - window.base);
1097     U32 const minIndexToOverflowCorrect = cycleSize
1098                                         + MAX(maxDist, cycleSize)
1099                                         + ZSTD_WINDOW_START_INDEX;
1100 
1101     /* Adjust the min index to backoff the overflow correction frequency,
1102      * so we don't waste too much CPU in overflow correction. If this
1103      * computation overflows we don't really care, we just need to make
1104      * sure it is at least minIndexToOverflowCorrect.
1105      */
1106     U32 const adjustment = window.nbOverflowCorrections + 1;
1107     U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
1108                                   minIndexToOverflowCorrect);
1109     U32 const indexLargeEnough = curr > adjustedIndex;
1110 
1111     /* Only overflow correct early if the dictionary is invalidated already,
1112      * so we don't hurt compression ratio.
1113      */
1114     U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
1115 
1116     return indexLargeEnough && dictionaryInvalidated;
1117 }
1118 
1119 /*
1120  * ZSTD_window_needOverflowCorrection():
1121  * Returns non-zero if the indices are getting too large and need overflow
1122  * protection.
1123  */
ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src,void const * srcEnd)1124 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
1125                                                   U32 cycleLog,
1126                                                   U32 maxDist,
1127                                                   U32 loadedDictEnd,
1128                                                   void const* src,
1129                                                   void const* srcEnd)
1130 {
1131     U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
1132     if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1133         if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
1134             return 1;
1135         }
1136     }
1137     return curr > ZSTD_CURRENT_MAX;
1138 }
1139 
1140 /*
1141  * ZSTD_window_correctOverflow():
1142  * Reduces the indices to protect from index overflow.
1143  * Returns the correction made to the indices, which must be applied to every
1144  * stored index.
1145  *
1146  * The least significant cycleLog bits of the indices must remain the same,
1147  * which may be 0. Every index up to maxDist in the past must be valid.
1148  */
1149 MEM_STATIC
1150 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_window_correctOverflow(ZSTD_window_t * window,U32 cycleLog,U32 maxDist,void const * src)1151 U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1152                                            U32 maxDist, void const* src)
1153 {
1154     /* preemptive overflow correction:
1155      * 1. correction is large enough:
1156      *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
1157      *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
1158      *
1159      *    current - newCurrent
1160      *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1161      *    > (3<<29) - (1<<chainLog)
1162      *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
1163      *    > 1<<29
1164      *
1165      * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1166      *    After correction, current is less than (1<<chainLog + 1<<windowLog).
1167      *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1168      *    In 32-bit mode we are safe, because (chainLog <= 29), so
1169      *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1170      * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1171      *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1172      */
1173     U32 const cycleSize = 1u << cycleLog;
1174     U32 const cycleMask = cycleSize - 1;
1175     U32 const curr = (U32)((BYTE const*)src - window->base);
1176     U32 const currentCycle = curr & cycleMask;
1177     /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1178     U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1179                                      ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1180                                      : 0;
1181     U32 const newCurrent = currentCycle
1182                          + currentCycleCorrection
1183                          + MAX(maxDist, cycleSize);
1184     U32 const correction = curr - newCurrent;
1185     /* maxDist must be a power of two so that:
1186      *   (newCurrent & cycleMask) == (curr & cycleMask)
1187      * This is required to not corrupt the chains / binary tree.
1188      */
1189     assert((maxDist & (maxDist - 1)) == 0);
1190     assert((curr & cycleMask) == (newCurrent & cycleMask));
1191     assert(curr > newCurrent);
1192     if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1193         /* Loose bound, should be around 1<<29 (see above) */
1194         assert(correction > 1<<28);
1195     }
1196 
1197     window->base += correction;
1198     window->dictBase += correction;
1199     if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1200         window->lowLimit = ZSTD_WINDOW_START_INDEX;
1201     } else {
1202         window->lowLimit -= correction;
1203     }
1204     if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1205         window->dictLimit = ZSTD_WINDOW_START_INDEX;
1206     } else {
1207         window->dictLimit -= correction;
1208     }
1209 
1210     /* Ensure we can still reference the full window. */
1211     assert(newCurrent >= maxDist);
1212     assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1213     /* Ensure that lowLimit and dictLimit didn't underflow. */
1214     assert(window->lowLimit <= newCurrent);
1215     assert(window->dictLimit <= newCurrent);
1216 
1217     ++window->nbOverflowCorrections;
1218 
1219     DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1220              window->lowLimit);
1221     return correction;
1222 }
1223 
1224 /*
1225  * ZSTD_window_enforceMaxDist():
1226  * Updates lowLimit so that:
1227  *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1228  *
1229  * It ensures index is valid as long as index >= lowLimit.
1230  * This must be called before a block compression call.
1231  *
1232  * loadedDictEnd is only defined if a dictionary is in use for current compression.
1233  * As the name implies, loadedDictEnd represents the index at end of dictionary.
1234  * The value lies within context's referential, it can be directly compared to blockEndIdx.
1235  *
1236  * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1237  * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1238  * This is because dictionaries are allowed to be referenced fully
1239  * as long as the last byte of the dictionary is in the window.
1240  * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1241  *
1242  * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1243  * In dictMatchState mode, lowLimit and dictLimit are the same,
1244  * and the dictionary is below them.
1245  * forceWindow and dictMatchState are therefore incompatible.
1246  */
1247 MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_MatchState_t ** dictMatchStatePtr)1248 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1249                      const void* blockEnd,
1250                            U32   maxDist,
1251                            U32*  loadedDictEndPtr,
1252                      const ZSTD_MatchState_t** dictMatchStatePtr)
1253 {
1254     U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1255     U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
1256     DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1257                 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1258 
1259     /* - When there is no dictionary : loadedDictEnd == 0.
1260          In which case, the test (blockEndIdx > maxDist) is merely to avoid
1261          overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1262        - When there is a standard dictionary :
1263          Index referential is copied from the dictionary,
1264          which means it starts from 0.
1265          In which case, loadedDictEnd == dictSize,
1266          and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1267          since `blockEndIdx` also starts from zero.
1268        - When there is an attached dictionary :
1269          loadedDictEnd is expressed within the referential of the context,
1270          so it can be directly compared against blockEndIdx.
1271     */
1272     if (blockEndIdx > maxDist + loadedDictEnd) {
1273         U32 const newLowLimit = blockEndIdx - maxDist;
1274         if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1275         if (window->dictLimit < window->lowLimit) {
1276             DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
1277                         (unsigned)window->dictLimit, (unsigned)window->lowLimit);
1278             window->dictLimit = window->lowLimit;
1279         }
1280         /* On reaching window size, dictionaries are invalidated */
1281         if (loadedDictEndPtr) *loadedDictEndPtr = 0;
1282         if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1283     }
1284 }
1285 
1286 /* Similar to ZSTD_window_enforceMaxDist(),
1287  * but only invalidates dictionary
1288  * when input progresses beyond window size.
1289  * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1290  *              loadedDictEnd uses same referential as window->base
1291  *              maxDist is the window size */
1292 MEM_STATIC void
ZSTD_checkDictValidity(const ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_MatchState_t ** dictMatchStatePtr)1293 ZSTD_checkDictValidity(const ZSTD_window_t* window,
1294                        const void* blockEnd,
1295                              U32   maxDist,
1296                              U32*  loadedDictEndPtr,
1297                        const ZSTD_MatchState_t** dictMatchStatePtr)
1298 {
1299     assert(loadedDictEndPtr != NULL);
1300     assert(dictMatchStatePtr != NULL);
1301     {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1302         U32 const loadedDictEnd = *loadedDictEndPtr;
1303         DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1304                     (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1305         assert(blockEndIdx >= loadedDictEnd);
1306 
1307         if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) {
1308             /* On reaching window size, dictionaries are invalidated.
1309              * For simplification, if window size is reached anywhere within next block,
1310              * the dictionary is invalidated for the full block.
1311              *
1312              * We also have to invalidate the dictionary if ZSTD_window_update() has detected
1313              * non-contiguous segments, which means that loadedDictEnd != window->dictLimit.
1314              * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use
1315              * dictMatchState, so setting it to NULL is not a problem.
1316              */
1317             DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1318             *loadedDictEndPtr = 0;
1319             *dictMatchStatePtr = NULL;
1320         } else {
1321             if (*loadedDictEndPtr != 0) {
1322                 DEBUGLOG(6, "dictionary considered valid for current block");
1323     }   }   }
1324 }
1325 
ZSTD_window_init(ZSTD_window_t * window)1326 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1327     ZSTD_memset(window, 0, sizeof(*window));
1328     window->base = (BYTE const*)" ";
1329     window->dictBase = (BYTE const*)" ";
1330     ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1331     window->dictLimit = ZSTD_WINDOW_START_INDEX;    /* start from >0, so that 1st position is valid */
1332     window->lowLimit = ZSTD_WINDOW_START_INDEX;     /* it ensures first and later CCtx usages compress the same */
1333     window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX;   /* see issue #1241 */
1334     window->nbOverflowCorrections = 0;
1335 }
1336 
1337 /*
1338  * ZSTD_window_update():
1339  * Updates the window by appending [src, src + srcSize) to the window.
1340  * If it is not contiguous, the current prefix becomes the extDict, and we
1341  * forget about the extDict. Handles overlap of the prefix and extDict.
1342  * Returns non-zero if the segment is contiguous.
1343  */
1344 MEM_STATIC
1345 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_window_update(ZSTD_window_t * window,const void * src,size_t srcSize,int forceNonContiguous)1346 U32 ZSTD_window_update(ZSTD_window_t* window,
1347                  const void* src, size_t srcSize,
1348                        int forceNonContiguous)
1349 {
1350     BYTE const* const ip = (BYTE const*)src;
1351     U32 contiguous = 1;
1352     DEBUGLOG(5, "ZSTD_window_update");
1353     if (srcSize == 0)
1354         return contiguous;
1355     assert(window->base != NULL);
1356     assert(window->dictBase != NULL);
1357     /* Check if blocks follow each other */
1358     if (src != window->nextSrc || forceNonContiguous) {
1359         /* not contiguous */
1360         size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1361         DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1362         window->lowLimit = window->dictLimit;
1363         assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
1364         window->dictLimit = (U32)distanceFromBase;
1365         window->dictBase = window->base;
1366         window->base = ip - distanceFromBase;
1367         /* ms->nextToUpdate = window->dictLimit; */
1368         if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
1369         contiguous = 0;
1370     }
1371     window->nextSrc = ip + srcSize;
1372     /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
1373     if ( (ip+srcSize > window->dictBase + window->lowLimit)
1374        & (ip < window->dictBase + window->dictLimit)) {
1375         size_t const highInputIdx = (size_t)((ip + srcSize) - window->dictBase);
1376         U32 const lowLimitMax = (highInputIdx > (size_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1377         assert(highInputIdx < UINT_MAX);
1378         window->lowLimit = lowLimitMax;
1379         DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1380     }
1381     return contiguous;
1382 }
1383 
1384 /*
1385  * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1386  */
ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t * ms,U32 curr,unsigned windowLog)1387 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog)
1388 {
1389     U32 const maxDistance = 1U << windowLog;
1390     U32 const lowestValid = ms->window.lowLimit;
1391     U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1392     U32 const isDictionary = (ms->loadedDictEnd != 0);
1393     /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1394      * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1395      * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1396      */
1397     U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1398     return matchLowest;
1399 }
1400 
1401 /*
1402  * Returns the lowest allowed match index in the prefix.
1403  */
ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t * ms,U32 curr,unsigned windowLog)1404 MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog)
1405 {
1406     U32    const maxDistance = 1U << windowLog;
1407     U32    const lowestValid = ms->window.dictLimit;
1408     U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1409     U32    const isDictionary = (ms->loadedDictEnd != 0);
1410     /* When computing the lowest prefix index we need to take the dictionary into account to handle
1411      * the edge case where the dictionary and the source are contiguous in memory.
1412      */
1413     U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
1414     return matchLowest;
1415 }
1416 
1417 /* index_safety_check:
1418  * intentional underflow : ensure repIndex isn't overlapping dict + prefix
1419  * @return 1 if values are not overlapping,
1420  * 0 otherwise */
ZSTD_index_overlap_check(const U32 prefixLowestIndex,const U32 repIndex)1421 MEM_STATIC int ZSTD_index_overlap_check(const U32 prefixLowestIndex, const U32 repIndex) {
1422     return ((U32)((prefixLowestIndex-1)  - repIndex) >= 3);
1423 }
1424 
1425 
1426 /* debug functions */
1427 #if (DEBUGLEVEL>=2)
1428 
ZSTD_fWeight(U32 rawStat)1429 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1430 {
1431     U32 const fp_accuracy = 8;
1432     U32 const fp_multiplier = (1 << fp_accuracy);
1433     U32 const newStat = rawStat + 1;
1434     U32 const hb = ZSTD_highbit32(newStat);
1435     U32 const BWeight = hb * fp_multiplier;
1436     U32 const FWeight = (newStat << fp_accuracy) >> hb;
1437     U32 const weight = BWeight + FWeight;
1438     assert(hb + fp_accuracy < 31);
1439     return (double)weight / fp_multiplier;
1440 }
1441 
1442 /* display a table content,
1443  * listing each element, its frequency, and its predicted bit cost */
ZSTD_debugTable(const U32 * table,U32 max)1444 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1445 {
1446     unsigned u, sum;
1447     for (u=0, sum=0; u<=max; u++) sum += table[u];
1448     DEBUGLOG(2, "total nb elts: %u", sum);
1449     for (u=0; u<=max; u++) {
1450         DEBUGLOG(2, "%2u: %5u  (%.2f)",
1451                 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1452     }
1453 }
1454 
1455 #endif
1456 
1457 /* Short Cache */
1458 
1459 /* Normally, zstd matchfinders follow this flow:
1460  *     1. Compute hash at ip
1461  *     2. Load index from hashTable[hash]
1462  *     3. Check if *ip == *(base + index)
1463  * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss.
1464  *
1465  * Short cache is an optimization which allows us to avoid step 3 most of the time
1466  * when the data doesn't actually match. With short cache, the flow becomes:
1467  *     1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip.
1468  *     2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works.
1469  *     3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue.
1470  *
1471  * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to
1472  * dictMatchState matchfinders.
1473  */
1474 #define ZSTD_SHORT_CACHE_TAG_BITS 8
1475 #define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
1476 
1477 /* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable.
1478  * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */
ZSTD_writeTaggedIndex(U32 * const hashTable,size_t hashAndTag,U32 index)1479 MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) {
1480     size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
1481     U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK);
1482     assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0);
1483     hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag;
1484 }
1485 
1486 /* Helper function for short cache matchfinders.
1487  * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */
ZSTD_comparePackedTags(size_t packedTag1,size_t packedTag2)1488 MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
1489     U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK;
1490     U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK;
1491     return tag1 == tag2;
1492 }
1493 
1494 /* ===============================================================
1495  * Shared internal declarations
1496  * These prototypes may be called from sources not in lib/compress
1497  * =============================================================== */
1498 
1499 /* ZSTD_loadCEntropy() :
1500  * dict : must point at beginning of a valid zstd dictionary.
1501  * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1502  * assumptions : magic number supposed already checked
1503  *               and dictSize >= 8 */
1504 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1505                          const void* const dict, size_t dictSize);
1506 
1507 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1508 
1509 typedef struct {
1510     U32 idx;            /* Index in array of ZSTD_Sequence */
1511     U32 posInSequence;  /* Position within sequence at idx */
1512     size_t posInSrc;    /* Number of bytes given by sequences provided so far */
1513 } ZSTD_SequencePosition;
1514 
1515 /* for benchmark */
1516 size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx,
1517                         const ZSTD_Sequence* const inSeqs, size_t nbSequences,
1518                         int const repcodeResolution);
1519 
1520 typedef struct {
1521     size_t nbSequences;
1522     size_t blockSize;
1523     size_t litSize;
1524 } BlockSummary;
1525 
1526 BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs);
1527 
1528 /* ==============================================================
1529  * Private declarations
1530  * These prototypes shall only be called from within lib/compress
1531  * ============================================================== */
1532 
1533 /* ZSTD_getCParamsFromCCtxParams() :
1534  * cParams are built depending on compressionLevel, src size hints,
1535  * LDM and manually set compression parameters.
1536  * Note: srcSizeHint == 0 means 0!
1537  */
1538 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1539         const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode);
1540 
1541 /*! ZSTD_initCStream_internal() :
1542  *  Private use only. Init streaming operation.
1543  *  expects params to be valid.
1544  *  must receive dict, or cdict, or none, but not both.
1545  *  @return : 0, or an error code */
1546 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1547                      const void* dict, size_t dictSize,
1548                      const ZSTD_CDict* cdict,
1549                      const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1550 
1551 void ZSTD_resetSeqStore(SeqStore_t* ssPtr);
1552 
1553 /*! ZSTD_getCParamsFromCDict() :
1554  *  as the name implies */
1555 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1556 
1557 /* ZSTD_compressBegin_advanced_internal() :
1558  * Private use only. To be called from zstdmt_compress.c. */
1559 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1560                                     const void* dict, size_t dictSize,
1561                                     ZSTD_dictContentType_e dictContentType,
1562                                     ZSTD_dictTableLoadMethod_e dtlm,
1563                                     const ZSTD_CDict* cdict,
1564                                     const ZSTD_CCtx_params* params,
1565                                     unsigned long long pledgedSrcSize);
1566 
1567 /* ZSTD_compress_advanced_internal() :
1568  * Private use only. To be called from zstdmt_compress.c. */
1569 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1570                                        void* dst, size_t dstCapacity,
1571                                  const void* src, size_t srcSize,
1572                                  const void* dict,size_t dictSize,
1573                                  const ZSTD_CCtx_params* params);
1574 
1575 
1576 /* ZSTD_writeLastEmptyBlock() :
1577  * output an empty Block with end-of-frame mark to complete a frame
1578  * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1579  *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1580  */
1581 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1582 
1583 
1584 /* ZSTD_referenceExternalSequences() :
1585  * Must be called before starting a compression operation.
1586  * seqs must parse a prefix of the source.
1587  * This cannot be used when long range matching is enabled.
1588  * Zstd will use these sequences, and pass the literals to a secondary block
1589  * compressor.
1590  * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1591  * access and data corruption.
1592  */
1593 void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1594 
1595 /* ZSTD_cycleLog() :
1596  *  condition for correct operation : hashLog > 1 */
1597 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1598 
1599 /* ZSTD_CCtx_trace() :
1600  *  Trace the end of a compression call.
1601  */
1602 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1603 
1604 /* Returns 1 if an external sequence producer is registered, otherwise returns 0. */
ZSTD_hasExtSeqProd(const ZSTD_CCtx_params * params)1605 MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
1606     return params->extSeqProdFunc != NULL;
1607 }
1608 
1609 /* ===============================================================
1610  * Deprecated definitions that are still used internally to avoid
1611  * deprecation warnings. These functions are exactly equivalent to
1612  * their public variants, but avoid the deprecation warnings.
1613  * =============================================================== */
1614 
1615 size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
1616 
1617 size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
1618                                     void* dst, size_t dstCapacity,
1619                               const void* src, size_t srcSize);
1620 
1621 size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
1622                                void* dst, size_t dstCapacity,
1623                          const void* src, size_t srcSize);
1624 
1625 size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
1626 
1627 
1628 #endif /* ZSTD_COMPRESS_H */
1629