xref: /freebsd/sys/contrib/zstd/lib/common/zstd_internal.h (revision c0a4a7bb942fd3302f0093e4353820916d3661d1)
1 /*
2  * Copyright (c) Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 #ifndef ZSTD_CCOMMON_H_MODULE
12 #define ZSTD_CCOMMON_H_MODULE
13 
14 /* this module contains definitions which must be identical
15  * across compression, decompression and dictBuilder.
16  * It also contains a few functions useful to at least 2 of them
17  * and which benefit from being inlined */
18 
19 /*-*************************************
20 *  Dependencies
21 ***************************************/
22 #include "compiler.h"
23 #include "cpu.h"
24 #include "mem.h"
25 #include "debug.h"                 /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
26 #include "error_private.h"
27 #define ZSTD_STATIC_LINKING_ONLY
28 #include "../zstd.h"
29 #define FSE_STATIC_LINKING_ONLY
30 #include "fse.h"
31 #define HUF_STATIC_LINKING_ONLY
32 #include "huf.h"
33 #ifndef XXH_STATIC_LINKING_ONLY
34 #  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
35 #endif
36 #include "xxhash.h"                /* XXH_reset, update, digest */
37 #ifndef ZSTD_NO_TRACE
38 #  include "zstd_trace.h"
39 #else
40 #  define ZSTD_TRACE 0
41 #endif
42 
43 #if defined (__cplusplus)
44 extern "C" {
45 #endif
46 
47 /* ---- static assert (debug) --- */
48 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
49 #define ZSTD_isError ERR_isError   /* for inlining */
50 #define FSE_isError  ERR_isError
51 #define HUF_isError  ERR_isError
52 
53 
54 /*-*************************************
55 *  shared macros
56 ***************************************/
57 #undef MIN
58 #undef MAX
59 #define MIN(a,b) ((a)<(b) ? (a) : (b))
60 #define MAX(a,b) ((a)>(b) ? (a) : (b))
61 #define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
62 
63 
64 /*-*************************************
65 *  Common constants
66 ***************************************/
67 #define ZSTD_OPT_NUM    (1<<12)
68 
69 #define ZSTD_REP_NUM      3                 /* number of repcodes */
70 static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
71 
72 #define KB *(1 <<10)
73 #define MB *(1 <<20)
74 #define GB *(1U<<30)
75 
76 #define BIT7 128
77 #define BIT6  64
78 #define BIT5  32
79 #define BIT4  16
80 #define BIT1   2
81 #define BIT0   1
82 
83 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
84 static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
85 static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
86 
87 #define ZSTD_FRAMEIDSIZE 4   /* magic number size */
88 
89 #define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
90 static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
91 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
92 
93 #define ZSTD_FRAMECHECKSUMSIZE 4
94 
95 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
96 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
97 
98 #define HufLog 12
99 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
100 
101 #define LONGNBSEQ 0x7F00
102 
103 #define MINMATCH 3
104 
105 #define Litbits  8
106 #define MaxLit ((1<<Litbits) - 1)
107 #define MaxML   52
108 #define MaxLL   35
109 #define DefaultMaxOff 28
110 #define MaxOff  31
111 #define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
112 #define MLFSELog    9
113 #define LLFSELog    9
114 #define OffFSELog   8
115 #define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
116 
117 #define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
118 /* Each table cannot take more than #symbols * FSELog bits */
119 #define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
120 
121 static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
122      0, 0, 0, 0, 0, 0, 0, 0,
123      0, 0, 0, 0, 0, 0, 0, 0,
124      1, 1, 1, 1, 2, 2, 3, 3,
125      4, 6, 7, 8, 9,10,11,12,
126     13,14,15,16
127 };
128 static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
129      4, 3, 2, 2, 2, 2, 2, 2,
130      2, 2, 2, 2, 2, 1, 1, 1,
131      2, 2, 2, 2, 2, 2, 2, 2,
132      2, 3, 2, 1, 1, 1, 1, 1,
133     -1,-1,-1,-1
134 };
135 #define LL_DEFAULTNORMLOG 6  /* for static allocation */
136 static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
137 
138 static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
139      0, 0, 0, 0, 0, 0, 0, 0,
140      0, 0, 0, 0, 0, 0, 0, 0,
141      0, 0, 0, 0, 0, 0, 0, 0,
142      0, 0, 0, 0, 0, 0, 0, 0,
143      1, 1, 1, 1, 2, 2, 3, 3,
144      4, 4, 5, 7, 8, 9,10,11,
145     12,13,14,15,16
146 };
147 static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
148      1, 4, 3, 2, 2, 2, 2, 2,
149      2, 1, 1, 1, 1, 1, 1, 1,
150      1, 1, 1, 1, 1, 1, 1, 1,
151      1, 1, 1, 1, 1, 1, 1, 1,
152      1, 1, 1, 1, 1, 1, 1, 1,
153      1, 1, 1, 1, 1, 1,-1,-1,
154     -1,-1,-1,-1,-1
155 };
156 #define ML_DEFAULTNORMLOG 6  /* for static allocation */
157 static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
158 
159 static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
160      1, 1, 1, 1, 1, 1, 2, 2,
161      2, 1, 1, 1, 1, 1, 1, 1,
162      1, 1, 1, 1, 1, 1, 1, 1,
163     -1,-1,-1,-1,-1
164 };
165 #define OF_DEFAULTNORMLOG 5  /* for static allocation */
166 static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
167 
168 
169 /*-*******************************************
170 *  Shared functions to include for inlining
171 *********************************************/
172 static void ZSTD_copy8(void* dst, const void* src) {
173 #if defined(ZSTD_ARCH_ARM_NEON)
174     vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
175 #else
176     ZSTD_memcpy(dst, src, 8);
177 #endif
178 }
179 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
180 
181 /* Need to use memmove here since the literal buffer can now be located within
182    the dst buffer. In circumstances where the op "catches up" to where the
183    literal buffer is, there can be partial overlaps in this call on the final
184    copy if the literal is being shifted by less than 16 bytes. */
185 static void ZSTD_copy16(void* dst, const void* src) {
186 #if defined(ZSTD_ARCH_ARM_NEON)
187     vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
188 #elif defined(ZSTD_ARCH_X86_SSE2)
189     _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
190 #elif defined(__clang__)
191     ZSTD_memmove(dst, src, 16);
192 #else
193     /* ZSTD_memmove is not inlined properly by gcc */
194     BYTE copy16_buf[16];
195     ZSTD_memcpy(copy16_buf, src, 16);
196     ZSTD_memcpy(dst, copy16_buf, 16);
197 #endif
198 }
199 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
200 
201 #define WILDCOPY_OVERLENGTH 32
202 #define WILDCOPY_VECLEN 16
203 
204 typedef enum {
205     ZSTD_no_overlap,
206     ZSTD_overlap_src_before_dst
207     /*  ZSTD_overlap_dst_before_src, */
208 } ZSTD_overlap_e;
209 
210 /*! ZSTD_wildcopy() :
211  *  Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
212  *  @param ovtype controls the overlap detection
213  *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
214  *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
215  *           The src buffer must be before the dst buffer.
216  */
217 MEM_STATIC FORCE_INLINE_ATTR
218 void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
219 {
220     ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
221     const BYTE* ip = (const BYTE*)src;
222     BYTE* op = (BYTE*)dst;
223     BYTE* const oend = op + length;
224 
225     if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
226         /* Handle short offset copies. */
227         do {
228             COPY8(op, ip)
229         } while (op < oend);
230     } else {
231         assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
232         /* Separate out the first COPY16() call because the copy length is
233          * almost certain to be short, so the branches have different
234          * probabilities. Since it is almost certain to be short, only do
235          * one COPY16() in the first call. Then, do two calls per loop since
236          * at that point it is more likely to have a high trip count.
237          */
238 #ifdef __aarch64__
239         do {
240             COPY16(op, ip);
241         }
242         while (op < oend);
243 #else
244         ZSTD_copy16(op, ip);
245         if (16 >= length) return;
246         op += 16;
247         ip += 16;
248         do {
249             COPY16(op, ip);
250             COPY16(op, ip);
251         }
252         while (op < oend);
253 #endif
254     }
255 }
256 
257 MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
258 {
259     size_t const length = MIN(dstCapacity, srcSize);
260     if (length > 0) {
261         ZSTD_memcpy(dst, src, length);
262     }
263     return length;
264 }
265 
266 /* define "workspace is too large" as this number of times larger than needed */
267 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3
268 
269 /* when workspace is continuously too large
270  * during at least this number of times,
271  * context's memory usage is considered wasteful,
272  * because it's sized to handle a worst case scenario which rarely happens.
273  * In which case, resize it down to free some memory */
274 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
275 
276 /* Controls whether the input/output buffer is buffered or stable. */
277 typedef enum {
278     ZSTD_bm_buffered = 0,  /* Buffer the input/output */
279     ZSTD_bm_stable = 1     /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
280 } ZSTD_bufferMode_e;
281 
282 
283 /*-*******************************************
284 *  Private declarations
285 *********************************************/
286 typedef struct seqDef_s {
287     U32 offBase;   /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
288     U16 litLength;
289     U16 mlBase;    /* mlBase == matchLength - MINMATCH */
290 } seqDef;
291 
292 /* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
293 typedef enum {
294     ZSTD_llt_none = 0,             /* no longLengthType */
295     ZSTD_llt_literalLength = 1,    /* represents a long literal */
296     ZSTD_llt_matchLength = 2       /* represents a long match */
297 } ZSTD_longLengthType_e;
298 
299 typedef struct {
300     seqDef* sequencesStart;
301     seqDef* sequences;      /* ptr to end of sequences */
302     BYTE* litStart;
303     BYTE* lit;              /* ptr to end of literals */
304     BYTE* llCode;
305     BYTE* mlCode;
306     BYTE* ofCode;
307     size_t maxNbSeq;
308     size_t maxNbLit;
309 
310     /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
311      * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
312      * the existing value of the litLength or matchLength by 0x10000.
313      */
314     ZSTD_longLengthType_e   longLengthType;
315     U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */
316 } seqStore_t;
317 
318 typedef struct {
319     U32 litLength;
320     U32 matchLength;
321 } ZSTD_sequenceLength;
322 
323 /**
324  * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
325  * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
326  */
327 MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
328 {
329     ZSTD_sequenceLength seqLen;
330     seqLen.litLength = seq->litLength;
331     seqLen.matchLength = seq->mlBase + MINMATCH;
332     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
333         if (seqStore->longLengthType == ZSTD_llt_literalLength) {
334             seqLen.litLength += 0xFFFF;
335         }
336         if (seqStore->longLengthType == ZSTD_llt_matchLength) {
337             seqLen.matchLength += 0xFFFF;
338         }
339     }
340     return seqLen;
341 }
342 
343 /**
344  * Contains the compressed frame size and an upper-bound for the decompressed frame size.
345  * Note: before using `compressedSize`, check for errors using ZSTD_isError().
346  *       similarly, before using `decompressedBound`, check for errors using:
347  *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
348  */
349 typedef struct {
350     size_t compressedSize;
351     unsigned long long decompressedBound;
352 } ZSTD_frameSizeInfo;   /* decompress & legacy */
353 
354 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
355 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
356 
357 /* custom memory allocation functions */
358 void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
359 void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
360 void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
361 
362 
363 MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
364 {
365     assert(val != 0);
366     {
367 #   if defined(_MSC_VER)   /* Visual */
368 #       if STATIC_BMI2 == 1
369             return _lzcnt_u32(val)^31;
370 #       else
371             if (val != 0) {
372                 unsigned long r;
373                 _BitScanReverse(&r, val);
374                 return (unsigned)r;
375             } else {
376                 /* Should not reach this code path */
377                 __assume(0);
378             }
379 #       endif
380 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
381         return __builtin_clz (val) ^ 31;
382 #   elif defined(__ICCARM__)    /* IAR Intrinsic */
383         return 31 - __CLZ(val);
384 #   else   /* Software version */
385         static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
386         U32 v = val;
387         v |= v >> 1;
388         v |= v >> 2;
389         v |= v >> 4;
390         v |= v >> 8;
391         v |= v >> 16;
392         return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
393 #   endif
394     }
395 }
396 
397 /**
398  * Counts the number of trailing zeros of a `size_t`.
399  * Most compilers should support CTZ as a builtin. A backup
400  * implementation is provided if the builtin isn't supported, but
401  * it may not be terribly efficient.
402  */
403 MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
404 {
405     if (MEM_64bits()) {
406 #       if defined(_MSC_VER) && defined(_WIN64)
407 #           if STATIC_BMI2
408                 return _tzcnt_u64(val);
409 #           else
410                 if (val != 0) {
411                     unsigned long r;
412                     _BitScanForward64(&r, (U64)val);
413                     return (unsigned)r;
414                 } else {
415                     /* Should not reach this code path */
416                     __assume(0);
417                 }
418 #           endif
419 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
420             return __builtin_ctzll((U64)val);
421 #       else
422             static const int DeBruijnBytePos[64] = {  0,  1,  2,  7,  3, 13,  8, 19,
423                                                       4, 25, 14, 28,  9, 34, 20, 56,
424                                                       5, 17, 26, 54, 15, 41, 29, 43,
425                                                       10, 31, 38, 35, 21, 45, 49, 57,
426                                                       63,  6, 12, 18, 24, 27, 33, 55,
427                                                       16, 53, 40, 42, 30, 37, 44, 48,
428                                                       62, 11, 23, 32, 52, 39, 36, 47,
429                                                       61, 22, 51, 46, 60, 50, 59, 58 };
430             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
431 #       endif
432     } else { /* 32 bits */
433 #       if defined(_MSC_VER)
434             if (val != 0) {
435                 unsigned long r;
436                 _BitScanForward(&r, (U32)val);
437                 return (unsigned)r;
438             } else {
439                 /* Should not reach this code path */
440                 __assume(0);
441             }
442 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
443             return __builtin_ctz((U32)val);
444 #       else
445             static const int DeBruijnBytePos[32] = {  0,  1, 28,  2, 29, 14, 24,  3,
446                                                      30, 22, 20, 15, 25, 17,  4,  8,
447                                                      31, 27, 13, 23, 21, 19, 16,  7,
448                                                      26, 12, 18,  6, 11,  5, 10,  9 };
449             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
450 #       endif
451     }
452 }
453 
454 
455 /* ZSTD_invalidateRepCodes() :
456  * ensures next compression will not use repcodes from previous block.
457  * Note : only works with regular variant;
458  *        do not use with extDict variant ! */
459 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
460 
461 
462 typedef struct {
463     blockType_e blockType;
464     U32 lastBlock;
465     U32 origSize;
466 } blockProperties_t;   /* declared here for decompress and fullbench */
467 
468 /*! ZSTD_getcBlockSize() :
469  *  Provides the size of compressed block from block header `src` */
470 /* Used by: decompress, fullbench (does not get its definition from here) */
471 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
472                           blockProperties_t* bpPtr);
473 
474 /*! ZSTD_decodeSeqHeaders() :
475  *  decode sequence header from src */
476 /* Used by: decompress, fullbench (does not get its definition from here) */
477 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
478                        const void* src, size_t srcSize);
479 
480 /**
481  * @returns true iff the CPU supports dynamic BMI2 dispatch.
482  */
483 MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
484 {
485     ZSTD_cpuid_t cpuid = ZSTD_cpuid();
486     return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
487 }
488 
489 #if defined (__cplusplus)
490 }
491 #endif
492 
493 #endif   /* ZSTD_CCOMMON_H_MODULE */
494