xref: /freebsd/sys/contrib/zstd/lib/common/zstd_internal.h (revision 130d950cafcd29c6a32cf5357bf600dcd9c1d998)
1 /*
2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 #ifndef ZSTD_CCOMMON_H_MODULE
12 #define ZSTD_CCOMMON_H_MODULE
13 
14 /* this module contains definitions which must be identical
15  * across compression, decompression and dictBuilder.
16  * It also contains a few functions useful to at least 2 of them
17  * and which benefit from being inlined */
18 
19 /*-*************************************
20 *  Dependencies
21 ***************************************/
22 #include "compiler.h"
23 #include "mem.h"
24 #include "debug.h"                 /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
25 #include "error_private.h"
26 #define ZSTD_STATIC_LINKING_ONLY
27 #include "zstd.h"
28 #define FSE_STATIC_LINKING_ONLY
29 #include "fse.h"
30 #define HUF_STATIC_LINKING_ONLY
31 #include "huf.h"
32 #ifndef XXH_STATIC_LINKING_ONLY
33 #  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
34 #endif
35 #include "xxhash.h"                /* XXH_reset, update, digest */
36 
37 #if defined (__cplusplus)
38 extern "C" {
39 #endif
40 
41 /* ---- static assert (debug) --- */
42 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
43 #define ZSTD_isError ERR_isError   /* for inlining */
44 #define FSE_isError  ERR_isError
45 #define HUF_isError  ERR_isError
46 
47 
48 /*-*************************************
49 *  shared macros
50 ***************************************/
51 #undef MIN
52 #undef MAX
53 #define MIN(a,b) ((a)<(b) ? (a) : (b))
54 #define MAX(a,b) ((a)>(b) ? (a) : (b))
55 
56 /**
57  * Return the specified error if the condition evaluates to true.
58  *
59  * In debug modes, prints additional information.
60  * In order to do that (particularly, printing the conditional that failed),
61  * this can't just wrap RETURN_ERROR().
62  */
63 #define RETURN_ERROR_IF(cond, err, ...) \
64   if (cond) { \
65     RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
66     RAWLOG(3, ": " __VA_ARGS__); \
67     RAWLOG(3, "\n"); \
68     return ERROR(err); \
69   }
70 
71 /**
72  * Unconditionally return the specified error.
73  *
74  * In debug modes, prints additional information.
75  */
76 #define RETURN_ERROR(err, ...) \
77   do { \
78     RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
79     RAWLOG(3, ": " __VA_ARGS__); \
80     RAWLOG(3, "\n"); \
81     return ERROR(err); \
82   } while(0);
83 
84 /**
85  * If the provided expression evaluates to an error code, returns that error code.
86  *
87  * In debug modes, prints additional information.
88  */
89 #define FORWARD_IF_ERROR(err, ...) \
90   do { \
91     size_t const err_code = (err); \
92     if (ERR_isError(err_code)) { \
93       RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
94       RAWLOG(3, ": " __VA_ARGS__); \
95       RAWLOG(3, "\n"); \
96       return err_code; \
97     } \
98   } while(0);
99 
100 
101 /*-*************************************
102 *  Common constants
103 ***************************************/
104 #define ZSTD_OPT_NUM    (1<<12)
105 
106 #define ZSTD_REP_NUM      3                 /* number of repcodes */
107 #define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
108 static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
109 
110 #define KB *(1 <<10)
111 #define MB *(1 <<20)
112 #define GB *(1U<<30)
113 
114 #define BIT7 128
115 #define BIT6  64
116 #define BIT5  32
117 #define BIT4  16
118 #define BIT1   2
119 #define BIT0   1
120 
121 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
122 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
123 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
124 
125 #define ZSTD_FRAMEIDSIZE 4   /* magic number size */
126 
127 #define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
128 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
129 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
130 
131 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
132 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
133 
134 #define HufLog 12
135 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
136 
137 #define LONGNBSEQ 0x7F00
138 
139 #define MINMATCH 3
140 
141 #define Litbits  8
142 #define MaxLit ((1<<Litbits) - 1)
143 #define MaxML   52
144 #define MaxLL   35
145 #define DefaultMaxOff 28
146 #define MaxOff  31
147 #define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
148 #define MLFSELog    9
149 #define LLFSELog    9
150 #define OffFSELog   8
151 #define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
152 
153 static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
154                                       0, 0, 0, 0, 0, 0, 0, 0,
155                                       1, 1, 1, 1, 2, 2, 3, 3,
156                                       4, 6, 7, 8, 9,10,11,12,
157                                      13,14,15,16 };
158 static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2,
159                                              2, 2, 2, 2, 2, 1, 1, 1,
160                                              2, 2, 2, 2, 2, 2, 2, 2,
161                                              2, 3, 2, 1, 1, 1, 1, 1,
162                                             -1,-1,-1,-1 };
163 #define LL_DEFAULTNORMLOG 6  /* for static allocation */
164 static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
165 
166 static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
167                                       0, 0, 0, 0, 0, 0, 0, 0,
168                                       0, 0, 0, 0, 0, 0, 0, 0,
169                                       0, 0, 0, 0, 0, 0, 0, 0,
170                                       1, 1, 1, 1, 2, 2, 3, 3,
171                                       4, 4, 5, 7, 8, 9,10,11,
172                                      12,13,14,15,16 };
173 static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2,
174                                              2, 1, 1, 1, 1, 1, 1, 1,
175                                              1, 1, 1, 1, 1, 1, 1, 1,
176                                              1, 1, 1, 1, 1, 1, 1, 1,
177                                              1, 1, 1, 1, 1, 1, 1, 1,
178                                              1, 1, 1, 1, 1, 1,-1,-1,
179                                             -1,-1,-1,-1,-1 };
180 #define ML_DEFAULTNORMLOG 6  /* for static allocation */
181 static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
182 
183 static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2,
184                                                      2, 1, 1, 1, 1, 1, 1, 1,
185                                                      1, 1, 1, 1, 1, 1, 1, 1,
186                                                     -1,-1,-1,-1,-1 };
187 #define OF_DEFAULTNORMLOG 5  /* for static allocation */
188 static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
189 
190 
191 /*-*******************************************
192 *  Shared functions to include for inlining
193 *********************************************/
194 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
195 
196 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
197 static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
198 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
199 
200 #define WILDCOPY_OVERLENGTH 32
201 #define WILDCOPY_VECLEN 16
202 
203 typedef enum {
204     ZSTD_no_overlap,
205     ZSTD_overlap_src_before_dst
206     /*  ZSTD_overlap_dst_before_src, */
207 } ZSTD_overlap_e;
208 
209 /*! ZSTD_wildcopy() :
210  *  Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
211  *  @param ovtype controls the overlap detection
212  *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
213  *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
214  *           The src buffer must be before the dst buffer.
215  */
216 MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
217 void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
218 {
219     ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
220     const BYTE* ip = (const BYTE*)src;
221     BYTE* op = (BYTE*)dst;
222     BYTE* const oend = op + length;
223 
224     assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
225 
226     if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
227         /* Handle short offset copies. */
228         do {
229             COPY8(op, ip)
230         } while (op < oend);
231     } else {
232         assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
233         /* Separate out the first two COPY16() calls because the copy length is
234          * almost certain to be short, so the branches have different
235          * probabilities.
236          * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
237          * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
238          */
239         COPY16(op, ip);
240         COPY16(op, ip);
241         if (op >= oend) return;
242         do {
243             COPY16(op, ip);
244             COPY16(op, ip);
245         }
246         while (op < oend);
247     }
248 }
249 
250 
251 /*-*******************************************
252 *  Private declarations
253 *********************************************/
254 typedef struct seqDef_s {
255     U32 offset;
256     U16 litLength;
257     U16 matchLength;
258 } seqDef;
259 
260 typedef struct {
261     seqDef* sequencesStart;
262     seqDef* sequences;
263     BYTE* litStart;
264     BYTE* lit;
265     BYTE* llCode;
266     BYTE* mlCode;
267     BYTE* ofCode;
268     size_t maxNbSeq;
269     size_t maxNbLit;
270     U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
271     U32   longLengthPos;
272 } seqStore_t;
273 
274 /**
275  * Contains the compressed frame size and an upper-bound for the decompressed frame size.
276  * Note: before using `compressedSize`, check for errors using ZSTD_isError().
277  *       similarly, before using `decompressedBound`, check for errors using:
278  *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
279  */
280 typedef struct {
281     size_t compressedSize;
282     unsigned long long decompressedBound;
283 } ZSTD_frameSizeInfo;   /* decompress & legacy */
284 
285 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
286 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
287 
288 /* custom memory allocation functions */
289 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
290 void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
291 void ZSTD_free(void* ptr, ZSTD_customMem customMem);
292 
293 
294 MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
295 {
296     assert(val != 0);
297     {
298 #   if defined(_MSC_VER)   /* Visual */
299         unsigned long r=0;
300         _BitScanReverse(&r, val);
301         return (unsigned)r;
302 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
303         return __builtin_clz (val) ^ 31;
304 #   elif defined(__ICCARM__)    /* IAR Intrinsic */
305         return 31 - __CLZ(val);
306 #   else   /* Software version */
307         static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
308         U32 v = val;
309         v |= v >> 1;
310         v |= v >> 2;
311         v |= v >> 4;
312         v |= v >> 8;
313         v |= v >> 16;
314         return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
315 #   endif
316     }
317 }
318 
319 
320 /* ZSTD_invalidateRepCodes() :
321  * ensures next compression will not use repcodes from previous block.
322  * Note : only works with regular variant;
323  *        do not use with extDict variant ! */
324 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
325 
326 
327 typedef struct {
328     blockType_e blockType;
329     U32 lastBlock;
330     U32 origSize;
331 } blockProperties_t;   /* declared here for decompress and fullbench */
332 
333 /*! ZSTD_getcBlockSize() :
334  *  Provides the size of compressed block from block header `src` */
335 /* Used by: decompress, fullbench (does not get its definition from here) */
336 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
337                           blockProperties_t* bpPtr);
338 
339 /*! ZSTD_decodeSeqHeaders() :
340  *  decode sequence header from src */
341 /* Used by: decompress, fullbench (does not get its definition from here) */
342 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
343                        const void* src, size_t srcSize);
344 
345 
346 #if defined (__cplusplus)
347 }
348 #endif
349 
350 #endif   /* ZSTD_CCOMMON_H_MODULE */
351