xref: /freebsd/sys/contrib/zstd/lib/common/zstd_internal.h (revision 036d2e814bf0f5d88ffb4b24c159320894541757)
1 /*
2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 #ifndef ZSTD_CCOMMON_H_MODULE
12 #define ZSTD_CCOMMON_H_MODULE
13 
14 /* this module contains definitions which must be identical
15  * across compression, decompression and dictBuilder.
16  * It also contains a few functions useful to at least 2 of them
17  * and which benefit from being inlined */
18 
19 /*-*************************************
20 *  Dependencies
21 ***************************************/
22 #include "compiler.h"
23 #include "mem.h"
24 #include "debug.h"                 /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
25 #include "error_private.h"
26 #define ZSTD_STATIC_LINKING_ONLY
27 #include "zstd.h"
28 #define FSE_STATIC_LINKING_ONLY
29 #include "fse.h"
30 #define HUF_STATIC_LINKING_ONLY
31 #include "huf.h"
32 #ifndef XXH_STATIC_LINKING_ONLY
33 #  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
34 #endif
35 #include "xxhash.h"                /* XXH_reset, update, digest */
36 
37 #if defined (__cplusplus)
38 extern "C" {
39 #endif
40 
41 /* ---- static assert (debug) --- */
42 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
43 #define ZSTD_isError ERR_isError   /* for inlining */
44 #define FSE_isError  ERR_isError
45 #define HUF_isError  ERR_isError
46 
47 
48 /*-*************************************
49 *  shared macros
50 ***************************************/
51 #undef MIN
52 #undef MAX
53 #define MIN(a,b) ((a)<(b) ? (a) : (b))
54 #define MAX(a,b) ((a)>(b) ? (a) : (b))
55 
56 /**
57  * Return the specified error if the condition evaluates to true.
58  *
59  * In debug modes, prints additional information. In order to do that
60  * (particularly, printing the conditional that failed), this can't just wrap
61  * RETURN_ERROR().
62  */
63 #define RETURN_ERROR_IF(cond, err, ...) \
64   if (cond) { \
65     RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
66     RAWLOG(3, ": " __VA_ARGS__); \
67     RAWLOG(3, "\n"); \
68     return ERROR(err); \
69   }
70 
71 /**
72  * Unconditionally return the specified error.
73  *
74  * In debug modes, prints additional information.
75  */
76 #define RETURN_ERROR(err, ...) \
77   do { \
78     RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
79     RAWLOG(3, ": " __VA_ARGS__); \
80     RAWLOG(3, "\n"); \
81     return ERROR(err); \
82   } while(0);
83 
84 /**
85  * If the provided expression evaluates to an error code, returns that error code.
86  *
87  * In debug modes, prints additional information.
88  */
89 #define FORWARD_IF_ERROR(err, ...) \
90   do { \
91     size_t const err_code = (err); \
92     if (ERR_isError(err_code)) { \
93       RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
94       RAWLOG(3, ": " __VA_ARGS__); \
95       RAWLOG(3, "\n"); \
96       return err_code; \
97     } \
98   } while(0);
99 
100 
101 /*-*************************************
102 *  Common constants
103 ***************************************/
104 #define ZSTD_OPT_NUM    (1<<12)
105 
106 #define ZSTD_REP_NUM      3                 /* number of repcodes */
107 #define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
108 static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
109 
110 #define KB *(1 <<10)
111 #define MB *(1 <<20)
112 #define GB *(1U<<30)
113 
114 #define BIT7 128
115 #define BIT6  64
116 #define BIT5  32
117 #define BIT4  16
118 #define BIT1   2
119 #define BIT0   1
120 
121 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
122 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
123 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
124 
125 #define ZSTD_FRAMEIDSIZE 4   /* magic number size */
126 
127 #define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
128 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
129 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
130 
131 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
132 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
133 
134 #define HufLog 12
135 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
136 
137 #define LONGNBSEQ 0x7F00
138 
139 #define MINMATCH 3
140 
141 #define Litbits  8
142 #define MaxLit ((1<<Litbits) - 1)
143 #define MaxML   52
144 #define MaxLL   35
145 #define DefaultMaxOff 28
146 #define MaxOff  31
147 #define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
148 #define MLFSELog    9
149 #define LLFSELog    9
150 #define OffFSELog   8
151 #define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
152 
153 static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
154                                       0, 0, 0, 0, 0, 0, 0, 0,
155                                       1, 1, 1, 1, 2, 2, 3, 3,
156                                       4, 6, 7, 8, 9,10,11,12,
157                                      13,14,15,16 };
158 static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2,
159                                              2, 2, 2, 2, 2, 1, 1, 1,
160                                              2, 2, 2, 2, 2, 2, 2, 2,
161                                              2, 3, 2, 1, 1, 1, 1, 1,
162                                             -1,-1,-1,-1 };
163 #define LL_DEFAULTNORMLOG 6  /* for static allocation */
164 static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
165 
166 static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
167                                       0, 0, 0, 0, 0, 0, 0, 0,
168                                       0, 0, 0, 0, 0, 0, 0, 0,
169                                       0, 0, 0, 0, 0, 0, 0, 0,
170                                       1, 1, 1, 1, 2, 2, 3, 3,
171                                       4, 4, 5, 7, 8, 9,10,11,
172                                      12,13,14,15,16 };
173 static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2,
174                                              2, 1, 1, 1, 1, 1, 1, 1,
175                                              1, 1, 1, 1, 1, 1, 1, 1,
176                                              1, 1, 1, 1, 1, 1, 1, 1,
177                                              1, 1, 1, 1, 1, 1, 1, 1,
178                                              1, 1, 1, 1, 1, 1,-1,-1,
179                                             -1,-1,-1,-1,-1 };
180 #define ML_DEFAULTNORMLOG 6  /* for static allocation */
181 static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
182 
183 static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2,
184                                                      2, 1, 1, 1, 1, 1, 1, 1,
185                                                      1, 1, 1, 1, 1, 1, 1, 1,
186                                                     -1,-1,-1,-1,-1 };
187 #define OF_DEFAULTNORMLOG 5  /* for static allocation */
188 static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
189 
190 
191 /*-*******************************************
192 *  Shared functions to include for inlining
193 *********************************************/
194 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
195 
196 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
197 static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
198 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
199 
200 #define WILDCOPY_OVERLENGTH 8
201 #define VECLEN 16
202 
203 typedef enum {
204     ZSTD_no_overlap,
205     ZSTD_overlap_src_before_dst,
206     /*  ZSTD_overlap_dst_before_src, */
207 } ZSTD_overlap_e;
208 
209 /*! ZSTD_wildcopy() :
210  *  custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
211 MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
212 void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
213 {
214     ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
215     const BYTE* ip = (const BYTE*)src;
216     BYTE* op = (BYTE*)dst;
217     BYTE* const oend = op + length;
218 
219     assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
220     if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
221       do
222           COPY8(op, ip)
223       while (op < oend);
224     }
225     else {
226       if ((length & 8) == 0)
227         COPY8(op, ip);
228       do {
229         COPY16(op, ip);
230       }
231       while (op < oend);
232     }
233 }
234 
235 /*! ZSTD_wildcopy_16min() :
236  *  same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
237 MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
238 void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
239 {
240     ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
241     const BYTE* ip = (const BYTE*)src;
242     BYTE* op = (BYTE*)dst;
243     BYTE* const oend = op + length;
244 
245     assert(length >= 8);
246     assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
247 
248     if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
249       do
250           COPY8(op, ip)
251       while (op < oend);
252     }
253     else {
254       if ((length & 8) == 0)
255         COPY8(op, ip);
256       do {
257         COPY16(op, ip);
258       }
259       while (op < oend);
260     }
261 }
262 
263 MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd)   /* should be faster for decoding, but strangely, not verified on all platform */
264 {
265     const BYTE* ip = (const BYTE*)src;
266     BYTE* op = (BYTE*)dst;
267     BYTE* const oend = (BYTE*)dstEnd;
268     do
269         COPY8(op, ip)
270     while (op < oend);
271 }
272 
273 
274 /*-*******************************************
275 *  Private declarations
276 *********************************************/
277 typedef struct seqDef_s {
278     U32 offset;
279     U16 litLength;
280     U16 matchLength;
281 } seqDef;
282 
283 typedef struct {
284     seqDef* sequencesStart;
285     seqDef* sequences;
286     BYTE* litStart;
287     BYTE* lit;
288     BYTE* llCode;
289     BYTE* mlCode;
290     BYTE* ofCode;
291     size_t maxNbSeq;
292     size_t maxNbLit;
293     U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
294     U32   longLengthPos;
295 } seqStore_t;
296 
297 /**
298  * Contains the compressed frame size and an upper-bound for the decompressed frame size.
299  * Note: before using `compressedSize`, check for errors using ZSTD_isError().
300  *       similarly, before using `decompressedBound`, check for errors using:
301  *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
302  */
303 typedef struct {
304     size_t compressedSize;
305     unsigned long long decompressedBound;
306 } ZSTD_frameSizeInfo;   /* decompress & legacy */
307 
308 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
309 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
310 
311 /* custom memory allocation functions */
312 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
313 void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
314 void ZSTD_free(void* ptr, ZSTD_customMem customMem);
315 
316 
317 MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
318 {
319     assert(val != 0);
320     {
321 #   if defined(_MSC_VER)   /* Visual */
322         unsigned long r=0;
323         _BitScanReverse(&r, val);
324         return (unsigned)r;
325 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
326         return 31 - __builtin_clz(val);
327 #   else   /* Software version */
328         static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
329         U32 v = val;
330         v |= v >> 1;
331         v |= v >> 2;
332         v |= v >> 4;
333         v |= v >> 8;
334         v |= v >> 16;
335         return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
336 #   endif
337     }
338 }
339 
340 
341 /* ZSTD_invalidateRepCodes() :
342  * ensures next compression will not use repcodes from previous block.
343  * Note : only works with regular variant;
344  *        do not use with extDict variant ! */
345 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
346 
347 
348 typedef struct {
349     blockType_e blockType;
350     U32 lastBlock;
351     U32 origSize;
352 } blockProperties_t;   /* declared here for decompress and fullbench */
353 
354 /*! ZSTD_getcBlockSize() :
355  *  Provides the size of compressed block from block header `src` */
356 /* Used by: decompress, fullbench (does not get its definition from here) */
357 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
358                           blockProperties_t* bpPtr);
359 
360 /*! ZSTD_decodeSeqHeaders() :
361  *  decode sequence header from src */
362 /* Used by: decompress, fullbench (does not get its definition from here) */
363 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
364                        const void* src, size_t srcSize);
365 
366 
367 #if defined (__cplusplus)
368 }
369 #endif
370 
371 #endif   /* ZSTD_CCOMMON_H_MODULE */
372