1 /* 2 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #ifndef ZSTD_CCOMMON_H_MODULE 12 #define ZSTD_CCOMMON_H_MODULE 13 14 /* this module contains definitions which must be identical 15 * across compression, decompression and dictBuilder. 16 * It also contains a few functions useful to at least 2 of them 17 * and which benefit from being inlined */ 18 19 /*-************************************* 20 * Dependencies 21 ***************************************/ 22 #if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) 23 #include <arm_neon.h> 24 #endif 25 #include "compiler.h" 26 #include "mem.h" 27 #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ 28 #include "error_private.h" 29 #define ZSTD_STATIC_LINKING_ONLY 30 #include "../zstd.h" 31 #define FSE_STATIC_LINKING_ONLY 32 #include "fse.h" 33 #define HUF_STATIC_LINKING_ONLY 34 #include "huf.h" 35 #ifndef XXH_STATIC_LINKING_ONLY 36 # define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ 37 #endif 38 #include "xxhash.h" /* XXH_reset, update, digest */ 39 40 #if defined (__cplusplus) 41 extern "C" { 42 #endif 43 44 /* ---- static assert (debug) --- */ 45 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) 46 #define FSE_isError ERR_isError 47 #define HUF_isError ERR_isError 48 49 50 /*-************************************* 51 * shared macros 52 ***************************************/ 53 #undef MIN 54 #undef MAX 55 #define MIN(a,b) ((a)<(b) ? (a) : (b)) 56 #define MAX(a,b) ((a)>(b) ? (a) : (b)) 57 58 /** 59 * Ignore: this is an internal helper. 60 * 61 * This is a helper function to help force C99-correctness during compilation. 62 * Under strict compilation modes, variadic macro arguments can't be empty. 63 * However, variadic function arguments can be. Using a function therefore lets 64 * us statically check that at least one (string) argument was passed, 65 * independent of the compilation flags. 66 */ 67 static INLINE_KEYWORD UNUSED_ATTR 68 void _force_has_format_string(const char *format, ...) { 69 (void)format; 70 } 71 72 /** 73 * Ignore: this is an internal helper. 74 * 75 * We want to force this function invocation to be syntactically correct, but 76 * we don't want to force runtime evaluation of its arguments. 77 */ 78 #define _FORCE_HAS_FORMAT_STRING(...) \ 79 if (0) { \ 80 _force_has_format_string(__VA_ARGS__); \ 81 } 82 83 /** 84 * Return the specified error if the condition evaluates to true. 85 * 86 * In debug modes, prints additional information. 87 * In order to do that (particularly, printing the conditional that failed), 88 * this can't just wrap RETURN_ERROR(). 89 */ 90 #define RETURN_ERROR_IF(cond, err, ...) \ 91 if (cond) { \ 92 RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ 93 __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ 94 _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ 95 RAWLOG(3, ": " __VA_ARGS__); \ 96 RAWLOG(3, "\n"); \ 97 return ERROR(err); \ 98 } 99 100 /** 101 * Unconditionally return the specified error. 102 * 103 * In debug modes, prints additional information. 104 */ 105 #define RETURN_ERROR(err, ...) \ 106 do { \ 107 RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ 108 __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ 109 _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ 110 RAWLOG(3, ": " __VA_ARGS__); \ 111 RAWLOG(3, "\n"); \ 112 return ERROR(err); \ 113 } while(0); 114 115 /** 116 * If the provided expression evaluates to an error code, returns that error code. 117 * 118 * In debug modes, prints additional information. 119 */ 120 #define FORWARD_IF_ERROR(err, ...) \ 121 do { \ 122 size_t const err_code = (err); \ 123 if (ERR_isError(err_code)) { \ 124 RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ 125 __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ 126 _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ 127 RAWLOG(3, ": " __VA_ARGS__); \ 128 RAWLOG(3, "\n"); \ 129 return err_code; \ 130 } \ 131 } while(0); 132 133 134 /*-************************************* 135 * Common constants 136 ***************************************/ 137 #define ZSTD_OPT_NUM (1<<12) 138 139 #define ZSTD_REP_NUM 3 /* number of repcodes */ 140 #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) 141 static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; 142 143 #define KB *(1 <<10) 144 #define MB *(1 <<20) 145 #define GB *(1U<<30) 146 147 #define BIT7 128 148 #define BIT6 64 149 #define BIT5 32 150 #define BIT4 16 151 #define BIT1 2 152 #define BIT0 1 153 154 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 155 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; 156 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; 157 158 #define ZSTD_FRAMEIDSIZE 4 /* magic number size */ 159 160 #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ 161 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; 162 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; 163 164 #define ZSTD_FRAMECHECKSUMSIZE 4 165 166 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ 167 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ 168 169 #define HufLog 12 170 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; 171 172 #define LONGNBSEQ 0x7F00 173 174 #define MINMATCH 3 175 176 #define Litbits 8 177 #define MaxLit ((1<<Litbits) - 1) 178 #define MaxML 52 179 #define MaxLL 35 180 #define DefaultMaxOff 28 181 #define MaxOff 31 182 #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */ 183 #define MLFSELog 9 184 #define LLFSELog 9 185 #define OffFSELog 8 186 #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog) 187 188 static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 189 0, 0, 0, 0, 0, 0, 0, 0, 190 1, 1, 1, 1, 2, 2, 3, 3, 191 4, 6, 7, 8, 9,10,11,12, 192 13,14,15,16 }; 193 static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 194 2, 2, 2, 2, 2, 1, 1, 1, 195 2, 2, 2, 2, 2, 2, 2, 2, 196 2, 3, 2, 1, 1, 1, 1, 1, 197 -1,-1,-1,-1 }; 198 #define LL_DEFAULTNORMLOG 6 /* for static allocation */ 199 static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; 200 201 static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 202 0, 0, 0, 0, 0, 0, 0, 0, 203 0, 0, 0, 0, 0, 0, 0, 0, 204 0, 0, 0, 0, 0, 0, 0, 0, 205 1, 1, 1, 1, 2, 2, 3, 3, 206 4, 4, 5, 7, 8, 9,10,11, 207 12,13,14,15,16 }; 208 static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 209 2, 1, 1, 1, 1, 1, 1, 1, 210 1, 1, 1, 1, 1, 1, 1, 1, 211 1, 1, 1, 1, 1, 1, 1, 1, 212 1, 1, 1, 1, 1, 1, 1, 1, 213 1, 1, 1, 1, 1, 1,-1,-1, 214 -1,-1,-1,-1,-1 }; 215 #define ML_DEFAULTNORMLOG 6 /* for static allocation */ 216 static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; 217 218 static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 219 2, 1, 1, 1, 1, 1, 1, 1, 220 1, 1, 1, 1, 1, 1, 1, 1, 221 -1,-1,-1,-1,-1 }; 222 #define OF_DEFAULTNORMLOG 5 /* for static allocation */ 223 static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; 224 225 226 /*-******************************************* 227 * Shared functions to include for inlining 228 *********************************************/ 229 static void ZSTD_copy8(void* dst, const void* src) { 230 #if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) 231 vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src)); 232 #else 233 memcpy(dst, src, 8); 234 #endif 235 } 236 237 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } 238 static void ZSTD_copy16(void* dst, const void* src) { 239 #if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) 240 vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); 241 #else 242 memcpy(dst, src, 16); 243 #endif 244 } 245 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } 246 247 #define WILDCOPY_OVERLENGTH 32 248 #define WILDCOPY_VECLEN 16 249 250 typedef enum { 251 ZSTD_no_overlap, 252 ZSTD_overlap_src_before_dst 253 /* ZSTD_overlap_dst_before_src, */ 254 } ZSTD_overlap_e; 255 256 /*! ZSTD_wildcopy() : 257 * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) 258 * @param ovtype controls the overlap detection 259 * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. 260 * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. 261 * The src buffer must be before the dst buffer. 262 */ 263 MEM_STATIC FORCE_INLINE_ATTR 264 void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) 265 { 266 ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; 267 const BYTE* ip = (const BYTE*)src; 268 BYTE* op = (BYTE*)dst; 269 BYTE* const oend = op + length; 270 271 assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); 272 273 if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { 274 /* Handle short offset copies. */ 275 do { 276 COPY8(op, ip) 277 } while (op < oend); 278 } else { 279 assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); 280 /* Separate out the first COPY16() call because the copy length is 281 * almost certain to be short, so the branches have different 282 * probabilities. Since it is almost certain to be short, only do 283 * one COPY16() in the first call. Then, do two calls per loop since 284 * at that point it is more likely to have a high trip count. 285 */ 286 #ifndef __aarch64__ 287 do { 288 COPY16(op, ip); 289 } 290 while (op < oend); 291 #else 292 COPY16(op, ip); 293 if (op >= oend) return; 294 do { 295 COPY16(op, ip); 296 COPY16(op, ip); 297 } 298 while (op < oend); 299 #endif 300 } 301 } 302 303 MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) 304 { 305 size_t const length = MIN(dstCapacity, srcSize); 306 if (length > 0) { 307 memcpy(dst, src, length); 308 } 309 return length; 310 } 311 312 /* define "workspace is too large" as this number of times larger than needed */ 313 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 314 315 /* when workspace is continuously too large 316 * during at least this number of times, 317 * context's memory usage is considered wasteful, 318 * because it's sized to handle a worst case scenario which rarely happens. 319 * In which case, resize it down to free some memory */ 320 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 321 322 323 /*-******************************************* 324 * Private declarations 325 *********************************************/ 326 typedef struct seqDef_s { 327 U32 offset; 328 U16 litLength; 329 U16 matchLength; 330 } seqDef; 331 332 typedef struct { 333 seqDef* sequencesStart; 334 seqDef* sequences; 335 BYTE* litStart; 336 BYTE* lit; 337 BYTE* llCode; 338 BYTE* mlCode; 339 BYTE* ofCode; 340 size_t maxNbSeq; 341 size_t maxNbLit; 342 U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ 343 U32 longLengthPos; 344 } seqStore_t; 345 346 typedef struct { 347 U32 litLength; 348 U32 matchLength; 349 } ZSTD_sequenceLength; 350 351 /** 352 * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences 353 * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. 354 */ 355 MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) 356 { 357 ZSTD_sequenceLength seqLen; 358 seqLen.litLength = seq->litLength; 359 seqLen.matchLength = seq->matchLength + MINMATCH; 360 if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { 361 if (seqStore->longLengthID == 1) { 362 seqLen.litLength += 0xFFFF; 363 } 364 if (seqStore->longLengthID == 2) { 365 seqLen.matchLength += 0xFFFF; 366 } 367 } 368 return seqLen; 369 } 370 371 /** 372 * Contains the compressed frame size and an upper-bound for the decompressed frame size. 373 * Note: before using `compressedSize`, check for errors using ZSTD_isError(). 374 * similarly, before using `decompressedBound`, check for errors using: 375 * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` 376 */ 377 typedef struct { 378 size_t compressedSize; 379 unsigned long long decompressedBound; 380 } ZSTD_frameSizeInfo; /* decompress & legacy */ 381 382 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ 383 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ 384 385 /* custom memory allocation functions */ 386 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem); 387 void* ZSTD_calloc(size_t size, ZSTD_customMem customMem); 388 void ZSTD_free(void* ptr, ZSTD_customMem customMem); 389 390 391 MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ 392 { 393 assert(val != 0); 394 { 395 # if defined(_MSC_VER) /* Visual */ 396 unsigned long r=0; 397 return _BitScanReverse(&r, val) ? (unsigned)r : 0; 398 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ 399 return __builtin_clz (val) ^ 31; 400 # elif defined(__ICCARM__) /* IAR Intrinsic */ 401 return 31 - __CLZ(val); 402 # else /* Software version */ 403 static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 404 U32 v = val; 405 v |= v >> 1; 406 v |= v >> 2; 407 v |= v >> 4; 408 v |= v >> 8; 409 v |= v >> 16; 410 return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; 411 # endif 412 } 413 } 414 415 416 /* ZSTD_invalidateRepCodes() : 417 * ensures next compression will not use repcodes from previous block. 418 * Note : only works with regular variant; 419 * do not use with extDict variant ! */ 420 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ 421 422 423 typedef struct { 424 blockType_e blockType; 425 U32 lastBlock; 426 U32 origSize; 427 } blockProperties_t; /* declared here for decompress and fullbench */ 428 429 /*! ZSTD_getcBlockSize() : 430 * Provides the size of compressed block from block header `src` */ 431 /* Used by: decompress, fullbench (does not get its definition from here) */ 432 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, 433 blockProperties_t* bpPtr); 434 435 /*! ZSTD_decodeSeqHeaders() : 436 * decode sequence header from src */ 437 /* Used by: decompress, fullbench (does not get its definition from here) */ 438 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, 439 const void* src, size_t srcSize); 440 441 442 #if defined (__cplusplus) 443 } 444 #endif 445 446 #endif /* ZSTD_CCOMMON_H_MODULE */ 447