xref: /freebsd/sys/contrib/zstd/lib/common/zstd_internal.h (revision b37f6c9805edb4b89f0a8c2b78f78a3dcfc0647b)
1 /*
2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 #ifndef ZSTD_CCOMMON_H_MODULE
12 #define ZSTD_CCOMMON_H_MODULE
13 
14 
15 /*-*************************************
16 *  Dependencies
17 ***************************************/
18 #include "compiler.h"
19 #include "mem.h"
20 #include "error_private.h"
21 #define ZSTD_STATIC_LINKING_ONLY
22 #include "zstd.h"
23 #define FSE_STATIC_LINKING_ONLY
24 #include "fse.h"
25 #define HUF_STATIC_LINKING_ONLY
26 #include "huf.h"
27 #ifndef XXH_STATIC_LINKING_ONLY
28 #  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
29 #endif
30 #include "xxhash.h"                /* XXH_reset, update, digest */
31 
32 
33 #if defined (__cplusplus)
34 extern "C" {
35 #endif
36 
37 
38 /*-*************************************
39 *  Debug
40 ***************************************/
41 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
42 #  include <assert.h>
43 #else
44 #  ifndef assert
45 #    define assert(condition) ((void)0)
46 #  endif
47 #endif
48 
49 #define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
50 
51 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
52 #  include <stdio.h>
53 /* recommended values for ZSTD_DEBUG display levels :
54  * 1 : no display, enables assert() only
55  * 2 : reserved for currently active debugging path
56  * 3 : events once per object lifetime (CCtx, CDict)
57  * 4 : events once per frame
58  * 5 : events once per block
59  * 6 : events once per sequence (*very* verbose) */
60 #  define DEBUGLOG(l, ...) {                         \
61                 if (l<=ZSTD_DEBUG) {                 \
62                     fprintf(stderr, __FILE__ ": ");  \
63                     fprintf(stderr, __VA_ARGS__);    \
64                     fprintf(stderr, " \n");          \
65             }   }
66 #else
67 #  define DEBUGLOG(l, ...)      {}    /* disabled */
68 #endif
69 
70 
71 /*-*************************************
72 *  shared macros
73 ***************************************/
74 #undef MIN
75 #undef MAX
76 #define MIN(a,b) ((a)<(b) ? (a) : (b))
77 #define MAX(a,b) ((a)>(b) ? (a) : (b))
78 #define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; }  /* check and Forward error code */
79 #define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); }  /* check and send Error code */
80 
81 
82 /*-*************************************
83 *  Common constants
84 ***************************************/
85 #define ZSTD_OPT_NUM    (1<<12)
86 
87 #define ZSTD_REP_NUM      3                 /* number of repcodes */
88 #define ZSTD_REP_CHECK    (ZSTD_REP_NUM)    /* number of repcodes to check by the optimal parser */
89 #define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
90 #define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM)
91 static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
92 
93 #define KB *(1 <<10)
94 #define MB *(1 <<20)
95 #define GB *(1U<<30)
96 
97 #define BIT7 128
98 #define BIT6  64
99 #define BIT5  32
100 #define BIT4  16
101 #define BIT1   2
102 #define BIT0   1
103 
104 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
105 #define ZSTD_WINDOWLOG_DEFAULTMAX 27 /* Default maximum allowed window log */
106 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
107 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
108 
109 #define ZSTD_FRAMEIDSIZE 4
110 static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE;  /* magic number size */
111 
112 #define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
113 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
114 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
115 
116 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
117 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
118 
119 #define HufLog 12
120 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
121 
122 #define LONGNBSEQ 0x7F00
123 
124 #define MINMATCH 3
125 
126 #define Litbits  8
127 #define MaxLit ((1<<Litbits) - 1)
128 #define MaxML  52
129 #define MaxLL  35
130 #define DefaultMaxOff 28
131 #define MaxOff 31
132 #define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
133 #define MLFSELog    9
134 #define LLFSELog    9
135 #define OffFSELog   8
136 
137 static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138                                       1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
139                                      13,14,15,16 };
140 static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
141                                              2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
142                                             -1,-1,-1,-1 };
143 #define LL_DEFAULTNORMLOG 6  /* for static allocation */
144 static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
145 
146 static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
147                                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
148                                       1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11,
149                                      12,13,14,15,16 };
150 static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
151                                              1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
152                                              1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
153                                             -1,-1,-1,-1,-1 };
154 #define ML_DEFAULTNORMLOG 6  /* for static allocation */
155 static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
156 
157 static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
158                                                      1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
159 #define OF_DEFAULTNORMLOG 5  /* for static allocation */
160 static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
161 
162 
163 /*-*******************************************
164 *  Shared functions to include for inlining
165 *********************************************/
166 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
167 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
168 
169 /*! ZSTD_wildcopy() :
170 *   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
171 #define WILDCOPY_OVERLENGTH 8
172 MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
173 {
174     const BYTE* ip = (const BYTE*)src;
175     BYTE* op = (BYTE*)dst;
176     BYTE* const oend = op + length;
177     do
178         COPY8(op, ip)
179     while (op < oend);
180 }
181 
182 MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd)   /* should be faster for decoding, but strangely, not verified on all platform */
183 {
184     const BYTE* ip = (const BYTE*)src;
185     BYTE* op = (BYTE*)dst;
186     BYTE* const oend = (BYTE*)dstEnd;
187     do
188         COPY8(op, ip)
189     while (op < oend);
190 }
191 
192 
193 /*-*******************************************
194 *  Private interfaces
195 *********************************************/
196 typedef struct ZSTD_stats_s ZSTD_stats_t;
197 
198 typedef struct seqDef_s {
199     U32 offset;
200     U16 litLength;
201     U16 matchLength;
202 } seqDef;
203 
204 
205 typedef struct {
206     seqDef* sequencesStart;
207     seqDef* sequences;
208     BYTE* litStart;
209     BYTE* lit;
210     BYTE* llCode;
211     BYTE* mlCode;
212     BYTE* ofCode;
213     U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
214     U32   longLengthPos;
215     U32   rep[ZSTD_REP_NUM];
216     U32   repToConfirm[ZSTD_REP_NUM];
217 } seqStore_t;
218 
219 typedef struct {
220     U32 off;
221     U32 len;
222 } ZSTD_match_t;
223 
224 typedef struct {
225     U32 price;
226     U32 off;
227     U32 mlen;
228     U32 litlen;
229     U32 rep[ZSTD_REP_NUM];
230 } ZSTD_optimal_t;
231 
232 typedef struct {
233     U32* litFreq;
234     U32* litLengthFreq;
235     U32* matchLengthFreq;
236     U32* offCodeFreq;
237     ZSTD_match_t* matchTable;
238     ZSTD_optimal_t* priceTable;
239 
240     U32  matchLengthSum;
241     U32  matchSum;
242     U32  litLengthSum;
243     U32  litSum;
244     U32  offCodeSum;
245     U32  log2matchLengthSum;
246     U32  log2matchSum;
247     U32  log2litLengthSum;
248     U32  log2litSum;
249     U32  log2offCodeSum;
250     U32  factor;
251     U32  staticPrices;
252     U32  cachedPrice;
253     U32  cachedLitLength;
254     const BYTE* cachedLiterals;
255 } optState_t;
256 
257 typedef struct {
258     U32 offset;
259     U32 checksum;
260 } ldmEntry_t;
261 
262 typedef struct {
263     ldmEntry_t* hashTable;
264     BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
265     U64 hashPower;          /* Used to compute the rolling hash.
266                              * Depends on ldmParams.minMatchLength */
267 } ldmState_t;
268 
269 typedef struct {
270     U32 enableLdm;          /* 1 if enable long distance matching */
271     U32 hashLog;            /* Log size of hashTable */
272     U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
273     U32 minMatchLength;     /* Minimum match length */
274     U32 hashEveryLog;       /* Log number of entries to skip */
275 } ldmParams_t;
276 
277 typedef struct {
278     U32 hufCTable[HUF_CTABLE_SIZE_U32(255)];
279     FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
280     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
281     FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
282     U32 workspace[HUF_WORKSPACE_SIZE_U32];
283     HUF_repeat hufCTable_repeatMode;
284     FSE_repeat offcode_repeatMode;
285     FSE_repeat matchlength_repeatMode;
286     FSE_repeat litlength_repeatMode;
287 } ZSTD_entropyCTables_t;
288 
289 struct ZSTD_CCtx_params_s {
290     ZSTD_format_e format;
291     ZSTD_compressionParameters cParams;
292     ZSTD_frameParameters fParams;
293 
294     int compressionLevel;
295     U32 forceWindow;           /* force back-references to respect limit of
296                                 * 1<<wLog, even for dictionary */
297 
298     /* Multithreading: used to pass parameters to mtctx */
299     U32 nbThreads;
300     unsigned jobSize;
301     unsigned overlapSizeLog;
302 
303     /* Long distance matching parameters */
304     ldmParams_t ldmParams;
305 
306     /* For use with createCCtxParams() and freeCCtxParams() only */
307     ZSTD_customMem customMem;
308 
309 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
310 
311 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
312 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);
313 
314 /* custom memory allocation functions */
315 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
316 void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
317 void ZSTD_free(void* ptr, ZSTD_customMem customMem);
318 
319 
320 /*======  common function  ======*/
321 
322 MEM_STATIC U32 ZSTD_highbit32(U32 val)
323 {
324     assert(val != 0);
325     {
326 #   if defined(_MSC_VER)   /* Visual */
327         unsigned long r=0;
328         _BitScanReverse(&r, val);
329         return (unsigned)r;
330 #   elif defined(__GNUC__) && (__GNUC__ >= 3) && __has_builtin(__builtin_clz)   /* GCC Intrinsic */
331         return 31 - __builtin_clz(val);
332 #   else   /* Software version */
333         static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
334         U32 v = val;
335         int r;
336         v |= v >> 1;
337         v |= v >> 2;
338         v |= v >> 4;
339         v |= v >> 8;
340         v |= v >> 16;
341         r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
342         return r;
343 #   endif
344     }
345 }
346 
347 
348 /* hidden functions */
349 
350 /* ZSTD_invalidateRepCodes() :
351  * ensures next compression will not use repcodes from previous block.
352  * Note : only works with regular variant;
353  *        do not use with extDict variant ! */
354 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);
355 
356 
357 /*! ZSTD_initCStream_internal() :
358  *  Private use only. Init streaming operation.
359  *  expects params to be valid.
360  *  must receive dict, or cdict, or none, but not both.
361  *  @return : 0, or an error code */
362 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
363                      const void* dict, size_t dictSize,
364                      const ZSTD_CDict* cdict,
365                      ZSTD_CCtx_params  params, unsigned long long pledgedSrcSize);
366 
367 /*! ZSTD_compressStream_generic() :
368  *  Private use only. To be called from zstdmt_compress.c in single-thread mode. */
369 size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
370                                    ZSTD_outBuffer* output,
371                                    ZSTD_inBuffer* input,
372                                    ZSTD_EndDirective const flushMode);
373 
374 /*! ZSTD_getCParamsFromCDict() :
375  *  as the name implies */
376 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
377 
378 /* ZSTD_compressBegin_advanced_internal() :
379  * Private use only. To be called from zstdmt_compress.c. */
380 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
381                                     const void* dict, size_t dictSize,
382                                     ZSTD_dictMode_e dictMode,
383                                     ZSTD_CCtx_params params,
384                                     unsigned long long pledgedSrcSize);
385 
386 /* ZSTD_compress_advanced_internal() :
387  * Private use only. To be called from zstdmt_compress.c. */
388 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
389                                        void* dst, size_t dstCapacity,
390                                  const void* src, size_t srcSize,
391                                  const void* dict,size_t dictSize,
392                                  ZSTD_CCtx_params params);
393 
394 typedef struct {
395     blockType_e blockType;
396     U32 lastBlock;
397     U32 origSize;
398 } blockProperties_t;
399 
400 /*! ZSTD_getcBlockSize() :
401 *   Provides the size of compressed block from block header `src` */
402 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
403                           blockProperties_t* bpPtr);
404 
405 #if defined (__cplusplus)
406 }
407 #endif
408 
409 #endif   /* ZSTD_CCOMMON_H_MODULE */
410