1 /* ******************************************************************
2 * huff0 huffman decoder,
3 * part of Finite State Entropy library
4 * Copyright (c) Yann Collet, Facebook, Inc.
5 *
6 * You can contact the author at :
7 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8 *
9 * This source code is licensed under both the BSD-style license (found in the
10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11 * in the COPYING file in the root directory of this source tree).
12 * You may select, at your option, one of the above-listed licenses.
13 ****************************************************************** */
14
15 /* **************************************************************
16 * Dependencies
17 ****************************************************************/
18 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
19 #include "../common/compiler.h"
20 #include "../common/bitstream.h" /* BIT_* */
21 #include "../common/fse.h" /* to compress headers */
22 #define HUF_STATIC_LINKING_ONLY
23 #include "../common/huf.h"
24 #include "../common/error_private.h"
25 #include "../common/zstd_internal.h"
26
27 /* **************************************************************
28 * Constants
29 ****************************************************************/
30
31 #define HUF_DECODER_FAST_TABLELOG 11
32
33 /* **************************************************************
34 * Macros
35 ****************************************************************/
36
37 /* These two optional macros force the use one way or another of the two
38 * Huffman decompression implementations. You can't force in both directions
39 * at the same time.
40 */
41 #if defined(HUF_FORCE_DECOMPRESS_X1) && \
42 defined(HUF_FORCE_DECOMPRESS_X2)
43 #error "Cannot force the use of the X1 and X2 decoders at the same time!"
44 #endif
45
46 #if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
47 # define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
48 #else
49 # define HUF_ASM_X86_64_BMI2_ATTRS
50 #endif
51
52 #ifdef __cplusplus
53 # define HUF_EXTERN_C extern "C"
54 #else
55 # define HUF_EXTERN_C
56 #endif
57 #define HUF_ASM_DECL HUF_EXTERN_C
58
59 #if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
60 # define HUF_NEED_BMI2_FUNCTION 1
61 #else
62 # define HUF_NEED_BMI2_FUNCTION 0
63 #endif
64
65 #if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
66 # define HUF_NEED_DEFAULT_FUNCTION 1
67 #else
68 # define HUF_NEED_DEFAULT_FUNCTION 0
69 #endif
70
71 /* **************************************************************
72 * Error Management
73 ****************************************************************/
74 #define HUF_isError ERR_isError
75
76
77 /* **************************************************************
78 * Byte alignment for workSpace management
79 ****************************************************************/
80 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
81 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
82
83
84 /* **************************************************************
85 * BMI2 Variant Wrappers
86 ****************************************************************/
87 #if DYNAMIC_BMI2
88
89 #define HUF_DGEN(fn) \
90 \
91 static size_t fn##_default( \
92 void* dst, size_t dstSize, \
93 const void* cSrc, size_t cSrcSize, \
94 const HUF_DTable* DTable) \
95 { \
96 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
97 } \
98 \
99 static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
100 void* dst, size_t dstSize, \
101 const void* cSrc, size_t cSrcSize, \
102 const HUF_DTable* DTable) \
103 { \
104 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
105 } \
106 \
107 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
108 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
109 { \
110 if (bmi2) { \
111 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
112 } \
113 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
114 }
115
116 #else
117
118 #define HUF_DGEN(fn) \
119 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
120 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
121 { \
122 (void)bmi2; \
123 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
124 }
125
126 #endif
127
128
129 /*-***************************/
130 /* generic DTableDesc */
131 /*-***************************/
132 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
133
HUF_getDTableDesc(const HUF_DTable * table)134 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
135 {
136 DTableDesc dtd;
137 ZSTD_memcpy(&dtd, table, sizeof(dtd));
138 return dtd;
139 }
140
141 #if ZSTD_ENABLE_ASM_X86_64_BMI2
142
HUF_initDStream(BYTE const * ip)143 static size_t HUF_initDStream(BYTE const* ip) {
144 BYTE const lastByte = ip[7];
145 size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
146 size_t const value = MEM_readLEST(ip) | 1;
147 assert(bitsConsumed <= 8);
148 return value << bitsConsumed;
149 }
150 typedef struct {
151 BYTE const* ip[4];
152 BYTE* op[4];
153 U64 bits[4];
154 void const* dt;
155 BYTE const* ilimit;
156 BYTE* oend;
157 BYTE const* iend[4];
158 } HUF_DecompressAsmArgs;
159
160 /**
161 * Initializes args for the asm decoding loop.
162 * @returns 0 on success
163 * 1 if the fallback implementation should be used.
164 * Or an error code on failure.
165 */
HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs * args,void * dst,size_t dstSize,void const * src,size_t srcSize,const HUF_DTable * DTable)166 static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
167 {
168 void const* dt = DTable + 1;
169 U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
170
171 const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
172
173 BYTE* const oend = (BYTE*)dst + dstSize;
174
175 /* The following condition is false on x32 platform,
176 * but HUF_asm is not compatible with this ABI */
177 if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
178
179 /* strict minimum : jump table + 1 byte per stream */
180 if (srcSize < 10)
181 return ERROR(corruption_detected);
182
183 /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
184 * If table log is not correct at this point, fallback to the old decoder.
185 * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
186 */
187 if (dtLog != HUF_DECODER_FAST_TABLELOG)
188 return 1;
189
190 /* Read the jump table. */
191 {
192 const BYTE* const istart = (const BYTE*)src;
193 size_t const length1 = MEM_readLE16(istart);
194 size_t const length2 = MEM_readLE16(istart+2);
195 size_t const length3 = MEM_readLE16(istart+4);
196 size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
197 args->iend[0] = istart + 6; /* jumpTable */
198 args->iend[1] = args->iend[0] + length1;
199 args->iend[2] = args->iend[1] + length2;
200 args->iend[3] = args->iend[2] + length3;
201
202 /* HUF_initDStream() requires this, and this small of an input
203 * won't benefit from the ASM loop anyways.
204 * length1 must be >= 16 so that ip[0] >= ilimit before the loop
205 * starts.
206 */
207 if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
208 return 1;
209 if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
210 }
211 /* ip[] contains the position that is currently loaded into bits[]. */
212 args->ip[0] = args->iend[1] - sizeof(U64);
213 args->ip[1] = args->iend[2] - sizeof(U64);
214 args->ip[2] = args->iend[3] - sizeof(U64);
215 args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
216
217 /* op[] contains the output pointers. */
218 args->op[0] = (BYTE*)dst;
219 args->op[1] = args->op[0] + (dstSize+3)/4;
220 args->op[2] = args->op[1] + (dstSize+3)/4;
221 args->op[3] = args->op[2] + (dstSize+3)/4;
222
223 /* No point to call the ASM loop for tiny outputs. */
224 if (args->op[3] >= oend)
225 return 1;
226
227 /* bits[] is the bit container.
228 * It is read from the MSB down to the LSB.
229 * It is shifted left as it is read, and zeros are
230 * shifted in. After the lowest valid bit a 1 is
231 * set, so that CountTrailingZeros(bits[]) can be used
232 * to count how many bits we've consumed.
233 */
234 args->bits[0] = HUF_initDStream(args->ip[0]);
235 args->bits[1] = HUF_initDStream(args->ip[1]);
236 args->bits[2] = HUF_initDStream(args->ip[2]);
237 args->bits[3] = HUF_initDStream(args->ip[3]);
238
239 /* If ip[] >= ilimit, it is guaranteed to be safe to
240 * reload bits[]. It may be beyond its section, but is
241 * guaranteed to be valid (>= istart).
242 */
243 args->ilimit = ilimit;
244
245 args->oend = oend;
246 args->dt = dt;
247
248 return 0;
249 }
250
HUF_initRemainingDStream(BIT_DStream_t * bit,HUF_DecompressAsmArgs const * args,int stream,BYTE * segmentEnd)251 static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
252 {
253 /* Validate that we haven't overwritten. */
254 if (args->op[stream] > segmentEnd)
255 return ERROR(corruption_detected);
256 /* Validate that we haven't read beyond iend[].
257 * Note that ip[] may be < iend[] because the MSB is
258 * the next bit to read, and we may have consumed 100%
259 * of the stream, so down to iend[i] - 8 is valid.
260 */
261 if (args->ip[stream] < args->iend[stream] - 8)
262 return ERROR(corruption_detected);
263
264 /* Construct the BIT_DStream_t. */
265 bit->bitContainer = MEM_readLE64(args->ip[stream]);
266 bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
267 bit->start = (const char*)args->iend[0];
268 bit->limitPtr = bit->start + sizeof(size_t);
269 bit->ptr = (const char*)args->ip[stream];
270
271 return 0;
272 }
273 #endif
274
275
276 #ifndef HUF_FORCE_DECOMPRESS_X2
277
278 /*-***************************/
279 /* single-symbol decoding */
280 /*-***************************/
281 typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */
282
283 /**
284 * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
285 * a time.
286 */
HUF_DEltX1_set4(BYTE symbol,BYTE nbBits)287 static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
288 U64 D4;
289 if (MEM_isLittleEndian()) {
290 D4 = (symbol << 8) + nbBits;
291 } else {
292 D4 = symbol + (nbBits << 8);
293 }
294 D4 *= 0x0001000100010001ULL;
295 return D4;
296 }
297
298 /**
299 * Increase the tableLog to targetTableLog and rescales the stats.
300 * If tableLog > targetTableLog this is a no-op.
301 * @returns New tableLog
302 */
HUF_rescaleStats(BYTE * huffWeight,U32 * rankVal,U32 nbSymbols,U32 tableLog,U32 targetTableLog)303 static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
304 {
305 if (tableLog > targetTableLog)
306 return tableLog;
307 if (tableLog < targetTableLog) {
308 U32 const scale = targetTableLog - tableLog;
309 U32 s;
310 /* Increase the weight for all non-zero probability symbols by scale. */
311 for (s = 0; s < nbSymbols; ++s) {
312 huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
313 }
314 /* Update rankVal to reflect the new weights.
315 * All weights except 0 get moved to weight + scale.
316 * Weights [1, scale] are empty.
317 */
318 for (s = targetTableLog; s > scale; --s) {
319 rankVal[s] = rankVal[s - scale];
320 }
321 for (s = scale; s > 0; --s) {
322 rankVal[s] = 0;
323 }
324 }
325 return targetTableLog;
326 }
327
328 typedef struct {
329 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
330 U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
331 U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
332 BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
333 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
334 } HUF_ReadDTableX1_Workspace;
335
336
HUF_readDTableX1_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)337 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
338 {
339 return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
340 }
341
HUF_readDTableX1_wksp_bmi2(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize,int bmi2)342 size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
343 {
344 U32 tableLog = 0;
345 U32 nbSymbols = 0;
346 size_t iSize;
347 void* const dtPtr = DTable + 1;
348 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
349 HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
350
351 DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
352 if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
353
354 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
355 /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
356
357 iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
358 if (HUF_isError(iSize)) return iSize;
359
360
361 /* Table header */
362 { DTableDesc dtd = HUF_getDTableDesc(DTable);
363 U32 const maxTableLog = dtd.maxTableLog + 1;
364 U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
365 tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
366 if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
367 dtd.tableType = 0;
368 dtd.tableLog = (BYTE)tableLog;
369 ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
370 }
371
372 /* Compute symbols and rankStart given rankVal:
373 *
374 * rankVal already contains the number of values of each weight.
375 *
376 * symbols contains the symbols ordered by weight. First are the rankVal[0]
377 * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
378 * symbols[0] is filled (but unused) to avoid a branch.
379 *
380 * rankStart contains the offset where each rank belongs in the DTable.
381 * rankStart[0] is not filled because there are no entries in the table for
382 * weight 0.
383 */
384 {
385 int n;
386 int nextRankStart = 0;
387 int const unroll = 4;
388 int const nLimit = (int)nbSymbols - unroll + 1;
389 for (n=0; n<(int)tableLog+1; n++) {
390 U32 const curr = nextRankStart;
391 nextRankStart += wksp->rankVal[n];
392 wksp->rankStart[n] = curr;
393 }
394 for (n=0; n < nLimit; n += unroll) {
395 int u;
396 for (u=0; u < unroll; ++u) {
397 size_t const w = wksp->huffWeight[n+u];
398 wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
399 }
400 }
401 for (; n < (int)nbSymbols; ++n) {
402 size_t const w = wksp->huffWeight[n];
403 wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
404 }
405 }
406
407 /* fill DTable
408 * We fill all entries of each weight in order.
409 * That way length is a constant for each iteration of the outer loop.
410 * We can switch based on the length to a different inner loop which is
411 * optimized for that particular case.
412 */
413 {
414 U32 w;
415 int symbol=wksp->rankVal[0];
416 int rankStart=0;
417 for (w=1; w<tableLog+1; ++w) {
418 int const symbolCount = wksp->rankVal[w];
419 int const length = (1 << w) >> 1;
420 int uStart = rankStart;
421 BYTE const nbBits = (BYTE)(tableLog + 1 - w);
422 int s;
423 int u;
424 switch (length) {
425 case 1:
426 for (s=0; s<symbolCount; ++s) {
427 HUF_DEltX1 D;
428 D.byte = wksp->symbols[symbol + s];
429 D.nbBits = nbBits;
430 dt[uStart] = D;
431 uStart += 1;
432 }
433 break;
434 case 2:
435 for (s=0; s<symbolCount; ++s) {
436 HUF_DEltX1 D;
437 D.byte = wksp->symbols[symbol + s];
438 D.nbBits = nbBits;
439 dt[uStart+0] = D;
440 dt[uStart+1] = D;
441 uStart += 2;
442 }
443 break;
444 case 4:
445 for (s=0; s<symbolCount; ++s) {
446 U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
447 MEM_write64(dt + uStart, D4);
448 uStart += 4;
449 }
450 break;
451 case 8:
452 for (s=0; s<symbolCount; ++s) {
453 U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
454 MEM_write64(dt + uStart, D4);
455 MEM_write64(dt + uStart + 4, D4);
456 uStart += 8;
457 }
458 break;
459 default:
460 for (s=0; s<symbolCount; ++s) {
461 U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
462 for (u=0; u < length; u += 16) {
463 MEM_write64(dt + uStart + u + 0, D4);
464 MEM_write64(dt + uStart + u + 4, D4);
465 MEM_write64(dt + uStart + u + 8, D4);
466 MEM_write64(dt + uStart + u + 12, D4);
467 }
468 assert(u == length);
469 uStart += length;
470 }
471 break;
472 }
473 symbol += symbolCount;
474 rankStart += symbolCount * length;
475 }
476 }
477 return iSize;
478 }
479
480 FORCE_INLINE_TEMPLATE BYTE
HUF_decodeSymbolX1(BIT_DStream_t * Dstream,const HUF_DEltX1 * dt,const U32 dtLog)481 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
482 {
483 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
484 BYTE const c = dt[val].byte;
485 BIT_skipBits(Dstream, dt[val].nbBits);
486 return c;
487 }
488
489 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
490 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
491
492 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
493 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
494 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
495
496 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
497 if (MEM_64bits()) \
498 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
499
500 HINT_INLINE size_t
HUF_decodeStreamX1(BYTE * p,BIT_DStream_t * const bitDPtr,BYTE * const pEnd,const HUF_DEltX1 * const dt,const U32 dtLog)501 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
502 {
503 BYTE* const pStart = p;
504
505 /* up to 4 symbols at a time */
506 if ((pEnd - p) > 3) {
507 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
508 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
509 HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
510 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
511 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
512 }
513 } else {
514 BIT_reloadDStream(bitDPtr);
515 }
516
517 /* [0-3] symbols remaining */
518 if (MEM_32bits())
519 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
520 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
521
522 /* no more data to retrieve from bitstream, no need to reload */
523 while (p < pEnd)
524 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
525
526 return pEnd-pStart;
527 }
528
529 FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)530 HUF_decompress1X1_usingDTable_internal_body(
531 void* dst, size_t dstSize,
532 const void* cSrc, size_t cSrcSize,
533 const HUF_DTable* DTable)
534 {
535 BYTE* op = (BYTE*)dst;
536 BYTE* const oend = op + dstSize;
537 const void* dtPtr = DTable + 1;
538 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
539 BIT_DStream_t bitD;
540 DTableDesc const dtd = HUF_getDTableDesc(DTable);
541 U32 const dtLog = dtd.tableLog;
542
543 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
544
545 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
546
547 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
548
549 return dstSize;
550 }
551
552 FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)553 HUF_decompress4X1_usingDTable_internal_body(
554 void* dst, size_t dstSize,
555 const void* cSrc, size_t cSrcSize,
556 const HUF_DTable* DTable)
557 {
558 /* Check */
559 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
560
561 { const BYTE* const istart = (const BYTE*) cSrc;
562 BYTE* const ostart = (BYTE*) dst;
563 BYTE* const oend = ostart + dstSize;
564 BYTE* const olimit = oend - 3;
565 const void* const dtPtr = DTable + 1;
566 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
567
568 /* Init */
569 BIT_DStream_t bitD1;
570 BIT_DStream_t bitD2;
571 BIT_DStream_t bitD3;
572 BIT_DStream_t bitD4;
573 size_t const length1 = MEM_readLE16(istart);
574 size_t const length2 = MEM_readLE16(istart+2);
575 size_t const length3 = MEM_readLE16(istart+4);
576 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
577 const BYTE* const istart1 = istart + 6; /* jumpTable */
578 const BYTE* const istart2 = istart1 + length1;
579 const BYTE* const istart3 = istart2 + length2;
580 const BYTE* const istart4 = istart3 + length3;
581 const size_t segmentSize = (dstSize+3) / 4;
582 BYTE* const opStart2 = ostart + segmentSize;
583 BYTE* const opStart3 = opStart2 + segmentSize;
584 BYTE* const opStart4 = opStart3 + segmentSize;
585 BYTE* op1 = ostart;
586 BYTE* op2 = opStart2;
587 BYTE* op3 = opStart3;
588 BYTE* op4 = opStart4;
589 DTableDesc const dtd = HUF_getDTableDesc(DTable);
590 U32 const dtLog = dtd.tableLog;
591 U32 endSignal = 1;
592
593 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
594 if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
595 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
596 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
597 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
598 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
599
600 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
601 if ((size_t)(oend - op4) >= sizeof(size_t)) {
602 for ( ; (endSignal) & (op4 < olimit) ; ) {
603 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
604 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
605 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
606 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
607 HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
608 HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
609 HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
610 HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
611 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
612 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
613 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
614 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
615 HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
616 HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
617 HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
618 HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
619 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
620 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
621 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
622 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
623 }
624 }
625
626 /* check corruption */
627 /* note : should not be necessary : op# advance in lock step, and we control op4.
628 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
629 if (op1 > opStart2) return ERROR(corruption_detected);
630 if (op2 > opStart3) return ERROR(corruption_detected);
631 if (op3 > opStart4) return ERROR(corruption_detected);
632 /* note : op4 supposed already verified within main loop */
633
634 /* finish bitStreams one by one */
635 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
636 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
637 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
638 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
639
640 /* check */
641 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
642 if (!endCheck) return ERROR(corruption_detected); }
643
644 /* decoded size */
645 return dstSize;
646 }
647 }
648
649 #if HUF_NEED_BMI2_FUNCTION
650 static BMI2_TARGET_ATTRIBUTE
HUF_decompress4X1_usingDTable_internal_bmi2(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable)651 size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
652 size_t cSrcSize, HUF_DTable const* DTable) {
653 return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
654 }
655 #endif
656
657 #if HUF_NEED_DEFAULT_FUNCTION
658 static
HUF_decompress4X1_usingDTable_internal_default(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable)659 size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
660 size_t cSrcSize, HUF_DTable const* DTable) {
661 return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
662 }
663 #endif
664
665 #if ZSTD_ENABLE_ASM_X86_64_BMI2
666
667 HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
668
669 static HUF_ASM_X86_64_BMI2_ATTRS
670 size_t
HUF_decompress4X1_usingDTable_internal_bmi2_asm(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)671 HUF_decompress4X1_usingDTable_internal_bmi2_asm(
672 void* dst, size_t dstSize,
673 const void* cSrc, size_t cSrcSize,
674 const HUF_DTable* DTable)
675 {
676 void const* dt = DTable + 1;
677 const BYTE* const iend = (const BYTE*)cSrc + 6;
678 BYTE* const oend = (BYTE*)dst + dstSize;
679 HUF_DecompressAsmArgs args;
680 {
681 size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
682 FORWARD_IF_ERROR(ret, "Failed to init asm args");
683 if (ret != 0)
684 return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
685 }
686
687 assert(args.ip[0] >= args.ilimit);
688 HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
689
690 /* Our loop guarantees that ip[] >= ilimit and that we haven't
691 * overwritten any op[].
692 */
693 assert(args.ip[0] >= iend);
694 assert(args.ip[1] >= iend);
695 assert(args.ip[2] >= iend);
696 assert(args.ip[3] >= iend);
697 assert(args.op[3] <= oend);
698 (void)iend;
699
700 /* finish bit streams one by one. */
701 {
702 size_t const segmentSize = (dstSize+3) / 4;
703 BYTE* segmentEnd = (BYTE*)dst;
704 int i;
705 for (i = 0; i < 4; ++i) {
706 BIT_DStream_t bit;
707 if (segmentSize <= (size_t)(oend - segmentEnd))
708 segmentEnd += segmentSize;
709 else
710 segmentEnd = oend;
711 FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
712 /* Decompress and validate that we've produced exactly the expected length. */
713 args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
714 if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
715 }
716 }
717
718 /* decoded size */
719 return dstSize;
720 }
721 #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
722
723 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
724 const void *cSrc,
725 size_t cSrcSize,
726 const HUF_DTable *DTable);
727
HUF_DGEN(HUF_decompress1X1_usingDTable_internal)728 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
729
730 static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
731 size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
732 {
733 #if DYNAMIC_BMI2
734 if (bmi2) {
735 # if ZSTD_ENABLE_ASM_X86_64_BMI2
736 return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
737 # else
738 return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
739 # endif
740 }
741 #else
742 (void)bmi2;
743 #endif
744
745 #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
746 return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
747 #else
748 return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
749 #endif
750 }
751
752
HUF_decompress1X1_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)753 size_t HUF_decompress1X1_usingDTable(
754 void* dst, size_t dstSize,
755 const void* cSrc, size_t cSrcSize,
756 const HUF_DTable* DTable)
757 {
758 DTableDesc dtd = HUF_getDTableDesc(DTable);
759 if (dtd.tableType != 0) return ERROR(GENERIC);
760 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
761 }
762
HUF_decompress1X1_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)763 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
764 const void* cSrc, size_t cSrcSize,
765 void* workSpace, size_t wkspSize)
766 {
767 const BYTE* ip = (const BYTE*) cSrc;
768
769 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
770 if (HUF_isError(hSize)) return hSize;
771 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
772 ip += hSize; cSrcSize -= hSize;
773
774 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
775 }
776
777
HUF_decompress4X1_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)778 size_t HUF_decompress4X1_usingDTable(
779 void* dst, size_t dstSize,
780 const void* cSrc, size_t cSrcSize,
781 const HUF_DTable* DTable)
782 {
783 DTableDesc dtd = HUF_getDTableDesc(DTable);
784 if (dtd.tableType != 0) return ERROR(GENERIC);
785 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
786 }
787
HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)788 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
789 const void* cSrc, size_t cSrcSize,
790 void* workSpace, size_t wkspSize, int bmi2)
791 {
792 const BYTE* ip = (const BYTE*) cSrc;
793
794 size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
795 if (HUF_isError(hSize)) return hSize;
796 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
797 ip += hSize; cSrcSize -= hSize;
798
799 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
800 }
801
HUF_decompress4X1_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)802 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
803 const void* cSrc, size_t cSrcSize,
804 void* workSpace, size_t wkspSize)
805 {
806 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
807 }
808
809
810 #endif /* HUF_FORCE_DECOMPRESS_X2 */
811
812
813 #ifndef HUF_FORCE_DECOMPRESS_X1
814
815 /* *************************/
816 /* double-symbols decoding */
817 /* *************************/
818
819 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
820 typedef struct { BYTE symbol; } sortedSymbol_t;
821 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
822 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
823
824 /**
825 * Constructs a HUF_DEltX2 in a U32.
826 */
HUF_buildDEltX2U32(U32 symbol,U32 nbBits,U32 baseSeq,int level)827 static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
828 {
829 U32 seq;
830 DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
831 DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
832 DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
833 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
834 if (MEM_isLittleEndian()) {
835 seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
836 return seq + (nbBits << 16) + ((U32)level << 24);
837 } else {
838 seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
839 return (seq << 16) + (nbBits << 8) + (U32)level;
840 }
841 }
842
843 /**
844 * Constructs a HUF_DEltX2.
845 */
HUF_buildDEltX2(U32 symbol,U32 nbBits,U32 baseSeq,int level)846 static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
847 {
848 HUF_DEltX2 DElt;
849 U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
850 DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
851 ZSTD_memcpy(&DElt, &val, sizeof(val));
852 return DElt;
853 }
854
855 /**
856 * Constructs 2 HUF_DEltX2s and packs them into a U64.
857 */
HUF_buildDEltX2U64(U32 symbol,U32 nbBits,U16 baseSeq,int level)858 static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
859 {
860 U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
861 return (U64)DElt + ((U64)DElt << 32);
862 }
863
864 /**
865 * Fills the DTable rank with all the symbols from [begin, end) that are each
866 * nbBits long.
867 *
868 * @param DTableRank The start of the rank in the DTable.
869 * @param begin The first symbol to fill (inclusive).
870 * @param end The last symbol to fill (exclusive).
871 * @param nbBits Each symbol is nbBits long.
872 * @param tableLog The table log.
873 * @param baseSeq If level == 1 { 0 } else { the first level symbol }
874 * @param level The level in the table. Must be 1 or 2.
875 */
HUF_fillDTableX2ForWeight(HUF_DEltX2 * DTableRank,sortedSymbol_t const * begin,sortedSymbol_t const * end,U32 nbBits,U32 tableLog,U16 baseSeq,int const level)876 static void HUF_fillDTableX2ForWeight(
877 HUF_DEltX2* DTableRank,
878 sortedSymbol_t const* begin, sortedSymbol_t const* end,
879 U32 nbBits, U32 tableLog,
880 U16 baseSeq, int const level)
881 {
882 U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
883 const sortedSymbol_t* ptr;
884 assert(level >= 1 && level <= 2);
885 switch (length) {
886 case 1:
887 for (ptr = begin; ptr != end; ++ptr) {
888 HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
889 *DTableRank++ = DElt;
890 }
891 break;
892 case 2:
893 for (ptr = begin; ptr != end; ++ptr) {
894 HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
895 DTableRank[0] = DElt;
896 DTableRank[1] = DElt;
897 DTableRank += 2;
898 }
899 break;
900 case 4:
901 for (ptr = begin; ptr != end; ++ptr) {
902 U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
903 ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
904 ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
905 DTableRank += 4;
906 }
907 break;
908 case 8:
909 for (ptr = begin; ptr != end; ++ptr) {
910 U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
911 ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
912 ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
913 ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
914 ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
915 DTableRank += 8;
916 }
917 break;
918 default:
919 for (ptr = begin; ptr != end; ++ptr) {
920 U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
921 HUF_DEltX2* const DTableRankEnd = DTableRank + length;
922 for (; DTableRank != DTableRankEnd; DTableRank += 8) {
923 ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
924 ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
925 ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
926 ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
927 }
928 }
929 break;
930 }
931 }
932
933 /* HUF_fillDTableX2Level2() :
934 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
HUF_fillDTableX2Level2(HUF_DEltX2 * DTable,U32 targetLog,const U32 consumedBits,const U32 * rankVal,const int minWeight,const int maxWeight1,const sortedSymbol_t * sortedSymbols,U32 const * rankStart,U32 nbBitsBaseline,U16 baseSeq)935 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
936 const U32* rankVal, const int minWeight, const int maxWeight1,
937 const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
938 U32 nbBitsBaseline, U16 baseSeq)
939 {
940 /* Fill skipped values (all positions up to rankVal[minWeight]).
941 * These are positions only get a single symbol because the combined weight
942 * is too large.
943 */
944 if (minWeight>1) {
945 U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
946 U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
947 int const skipSize = rankVal[minWeight];
948 assert(length > 1);
949 assert((U32)skipSize < length);
950 switch (length) {
951 case 2:
952 assert(skipSize == 1);
953 ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
954 break;
955 case 4:
956 assert(skipSize <= 4);
957 ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
958 ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
959 break;
960 default:
961 {
962 int i;
963 for (i = 0; i < skipSize; i += 8) {
964 ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
965 ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
966 ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
967 ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
968 }
969 }
970 }
971 }
972
973 /* Fill each of the second level symbols by weight. */
974 {
975 int w;
976 for (w = minWeight; w < maxWeight1; ++w) {
977 int const begin = rankStart[w];
978 int const end = rankStart[w+1];
979 U32 const nbBits = nbBitsBaseline - w;
980 U32 const totalBits = nbBits + consumedBits;
981 HUF_fillDTableX2ForWeight(
982 DTable + rankVal[w],
983 sortedSymbols + begin, sortedSymbols + end,
984 totalBits, targetLog,
985 baseSeq, /* level */ 2);
986 }
987 }
988 }
989
HUF_fillDTableX2(HUF_DEltX2 * DTable,const U32 targetLog,const sortedSymbol_t * sortedList,const U32 * rankStart,rankVal_t rankValOrigin,const U32 maxWeight,const U32 nbBitsBaseline)990 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
991 const sortedSymbol_t* sortedList,
992 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
993 const U32 nbBitsBaseline)
994 {
995 U32* const rankVal = rankValOrigin[0];
996 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
997 const U32 minBits = nbBitsBaseline - maxWeight;
998 int w;
999 int const wEnd = (int)maxWeight + 1;
1000
1001 /* Fill DTable in order of weight. */
1002 for (w = 1; w < wEnd; ++w) {
1003 int const begin = (int)rankStart[w];
1004 int const end = (int)rankStart[w+1];
1005 U32 const nbBits = nbBitsBaseline - w;
1006
1007 if (targetLog-nbBits >= minBits) {
1008 /* Enough room for a second symbol. */
1009 int start = rankVal[w];
1010 U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
1011 int minWeight = nbBits + scaleLog;
1012 int s;
1013 if (minWeight < 1) minWeight = 1;
1014 /* Fill the DTable for every symbol of weight w.
1015 * These symbols get at least 1 second symbol.
1016 */
1017 for (s = begin; s != end; ++s) {
1018 HUF_fillDTableX2Level2(
1019 DTable + start, targetLog, nbBits,
1020 rankValOrigin[nbBits], minWeight, wEnd,
1021 sortedList, rankStart,
1022 nbBitsBaseline, sortedList[s].symbol);
1023 start += length;
1024 }
1025 } else {
1026 /* Only a single symbol. */
1027 HUF_fillDTableX2ForWeight(
1028 DTable + rankVal[w],
1029 sortedList + begin, sortedList + end,
1030 nbBits, targetLog,
1031 /* baseSeq */ 0, /* level */ 1);
1032 }
1033 }
1034 }
1035
1036 typedef struct {
1037 rankValCol_t rankVal[HUF_TABLELOG_MAX];
1038 U32 rankStats[HUF_TABLELOG_MAX + 1];
1039 U32 rankStart0[HUF_TABLELOG_MAX + 3];
1040 sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
1041 BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
1042 U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
1043 } HUF_ReadDTableX2_Workspace;
1044
HUF_readDTableX2_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)1045 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
1046 const void* src, size_t srcSize,
1047 void* workSpace, size_t wkspSize)
1048 {
1049 return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
1050 }
1051
HUF_readDTableX2_wksp_bmi2(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize,int bmi2)1052 size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
1053 const void* src, size_t srcSize,
1054 void* workSpace, size_t wkspSize, int bmi2)
1055 {
1056 U32 tableLog, maxW, nbSymbols;
1057 DTableDesc dtd = HUF_getDTableDesc(DTable);
1058 U32 maxTableLog = dtd.maxTableLog;
1059 size_t iSize;
1060 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
1061 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
1062 U32 *rankStart;
1063
1064 HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
1065
1066 if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
1067
1068 rankStart = wksp->rankStart0 + 1;
1069 ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
1070 ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
1071
1072 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
1073 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
1074 /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
1075
1076 iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);
1077 if (HUF_isError(iSize)) return iSize;
1078
1079 /* check result */
1080 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
1081 if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
1082
1083 /* find maxWeight */
1084 for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
1085
1086 /* Get start index of each weight */
1087 { U32 w, nextRankStart = 0;
1088 for (w=1; w<maxW+1; w++) {
1089 U32 curr = nextRankStart;
1090 nextRankStart += wksp->rankStats[w];
1091 rankStart[w] = curr;
1092 }
1093 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
1094 rankStart[maxW+1] = nextRankStart;
1095 }
1096
1097 /* sort symbols by weight */
1098 { U32 s;
1099 for (s=0; s<nbSymbols; s++) {
1100 U32 const w = wksp->weightList[s];
1101 U32 const r = rankStart[w]++;
1102 wksp->sortedSymbol[r].symbol = (BYTE)s;
1103 }
1104 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
1105 }
1106
1107 /* Build rankVal */
1108 { U32* const rankVal0 = wksp->rankVal[0];
1109 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
1110 U32 nextRankVal = 0;
1111 U32 w;
1112 for (w=1; w<maxW+1; w++) {
1113 U32 curr = nextRankVal;
1114 nextRankVal += wksp->rankStats[w] << (w+rescale);
1115 rankVal0[w] = curr;
1116 } }
1117 { U32 const minBits = tableLog+1 - maxW;
1118 U32 consumed;
1119 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
1120 U32* const rankValPtr = wksp->rankVal[consumed];
1121 U32 w;
1122 for (w = 1; w < maxW+1; w++) {
1123 rankValPtr[w] = rankVal0[w] >> consumed;
1124 } } } }
1125
1126 HUF_fillDTableX2(dt, maxTableLog,
1127 wksp->sortedSymbol,
1128 wksp->rankStart0, wksp->rankVal, maxW,
1129 tableLog+1);
1130
1131 dtd.tableLog = (BYTE)maxTableLog;
1132 dtd.tableType = 1;
1133 ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
1134 return iSize;
1135 }
1136
1137
1138 FORCE_INLINE_TEMPLATE U32
HUF_decodeSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)1139 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
1140 {
1141 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
1142 ZSTD_memcpy(op, &dt[val].sequence, 2);
1143 BIT_skipBits(DStream, dt[val].nbBits);
1144 return dt[val].length;
1145 }
1146
1147 FORCE_INLINE_TEMPLATE U32
HUF_decodeLastSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)1148 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
1149 {
1150 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
1151 ZSTD_memcpy(op, &dt[val].sequence, 1);
1152 if (dt[val].length==1) {
1153 BIT_skipBits(DStream, dt[val].nbBits);
1154 } else {
1155 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
1156 BIT_skipBits(DStream, dt[val].nbBits);
1157 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
1158 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
1159 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
1160 }
1161 }
1162 return 1;
1163 }
1164
1165 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
1166 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
1167
1168 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
1169 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
1170 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
1171
1172 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
1173 if (MEM_64bits()) \
1174 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
1175
1176 HINT_INLINE size_t
HUF_decodeStreamX2(BYTE * p,BIT_DStream_t * bitDPtr,BYTE * const pEnd,const HUF_DEltX2 * const dt,const U32 dtLog)1177 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
1178 const HUF_DEltX2* const dt, const U32 dtLog)
1179 {
1180 BYTE* const pStart = p;
1181
1182 /* up to 8 symbols at a time */
1183 if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
1184 if (dtLog <= 11 && MEM_64bits()) {
1185 /* up to 10 symbols at a time */
1186 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
1187 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1188 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1189 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1190 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1191 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1192 }
1193 } else {
1194 /* up to 8 symbols at a time */
1195 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
1196 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
1197 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
1198 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
1199 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1200 }
1201 }
1202 } else {
1203 BIT_reloadDStream(bitDPtr);
1204 }
1205
1206 /* closer to end : up to 2 symbols at a time */
1207 if ((size_t)(pEnd - p) >= 2) {
1208 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
1209 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1210
1211 while (p <= pEnd-2)
1212 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
1213 }
1214
1215 if (p < pEnd)
1216 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
1217
1218 return p-pStart;
1219 }
1220
1221 FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)1222 HUF_decompress1X2_usingDTable_internal_body(
1223 void* dst, size_t dstSize,
1224 const void* cSrc, size_t cSrcSize,
1225 const HUF_DTable* DTable)
1226 {
1227 BIT_DStream_t bitD;
1228
1229 /* Init */
1230 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
1231
1232 /* decode */
1233 { BYTE* const ostart = (BYTE*) dst;
1234 BYTE* const oend = ostart + dstSize;
1235 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
1236 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
1237 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1238 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
1239 }
1240
1241 /* check */
1242 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
1243
1244 /* decoded size */
1245 return dstSize;
1246 }
1247 FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)1248 HUF_decompress4X2_usingDTable_internal_body(
1249 void* dst, size_t dstSize,
1250 const void* cSrc, size_t cSrcSize,
1251 const HUF_DTable* DTable)
1252 {
1253 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
1254
1255 { const BYTE* const istart = (const BYTE*) cSrc;
1256 BYTE* const ostart = (BYTE*) dst;
1257 BYTE* const oend = ostart + dstSize;
1258 BYTE* const olimit = oend - (sizeof(size_t)-1);
1259 const void* const dtPtr = DTable+1;
1260 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
1261
1262 /* Init */
1263 BIT_DStream_t bitD1;
1264 BIT_DStream_t bitD2;
1265 BIT_DStream_t bitD3;
1266 BIT_DStream_t bitD4;
1267 size_t const length1 = MEM_readLE16(istart);
1268 size_t const length2 = MEM_readLE16(istart+2);
1269 size_t const length3 = MEM_readLE16(istart+4);
1270 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
1271 const BYTE* const istart1 = istart + 6; /* jumpTable */
1272 const BYTE* const istart2 = istart1 + length1;
1273 const BYTE* const istart3 = istart2 + length2;
1274 const BYTE* const istart4 = istart3 + length3;
1275 size_t const segmentSize = (dstSize+3) / 4;
1276 BYTE* const opStart2 = ostart + segmentSize;
1277 BYTE* const opStart3 = opStart2 + segmentSize;
1278 BYTE* const opStart4 = opStart3 + segmentSize;
1279 BYTE* op1 = ostart;
1280 BYTE* op2 = opStart2;
1281 BYTE* op3 = opStart3;
1282 BYTE* op4 = opStart4;
1283 U32 endSignal = 1;
1284 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1285 U32 const dtLog = dtd.tableLog;
1286
1287 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
1288 if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
1289 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
1290 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
1291 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
1292 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
1293
1294 /* 16-32 symbols per loop (4-8 symbols per stream) */
1295 if ((size_t)(oend - op4) >= sizeof(size_t)) {
1296 for ( ; (endSignal) & (op4 < olimit); ) {
1297 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
1298 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1299 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
1300 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1301 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
1302 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1303 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
1304 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1305 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
1306 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
1307 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
1308 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1309 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
1310 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1311 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
1312 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1313 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
1314 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1315 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
1316 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
1317 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
1318 #else
1319 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1320 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1321 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1322 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1323 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
1324 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
1325 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
1326 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
1327 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1328 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1329 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1330 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1331 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
1332 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
1333 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
1334 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
1335 endSignal = (U32)LIKELY((U32)
1336 (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
1337 & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
1338 & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
1339 & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
1340 #endif
1341 }
1342 }
1343
1344 /* check corruption */
1345 if (op1 > opStart2) return ERROR(corruption_detected);
1346 if (op2 > opStart3) return ERROR(corruption_detected);
1347 if (op3 > opStart4) return ERROR(corruption_detected);
1348 /* note : op4 already verified within main loop */
1349
1350 /* finish bitStreams one by one */
1351 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
1352 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
1353 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
1354 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
1355
1356 /* check */
1357 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
1358 if (!endCheck) return ERROR(corruption_detected); }
1359
1360 /* decoded size */
1361 return dstSize;
1362 }
1363 }
1364
1365 #if HUF_NEED_BMI2_FUNCTION
1366 static BMI2_TARGET_ATTRIBUTE
HUF_decompress4X2_usingDTable_internal_bmi2(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable)1367 size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
1368 size_t cSrcSize, HUF_DTable const* DTable) {
1369 return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
1370 }
1371 #endif
1372
1373 #if HUF_NEED_DEFAULT_FUNCTION
1374 static
HUF_decompress4X2_usingDTable_internal_default(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable)1375 size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
1376 size_t cSrcSize, HUF_DTable const* DTable) {
1377 return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
1378 }
1379 #endif
1380
1381 #if ZSTD_ENABLE_ASM_X86_64_BMI2
1382
1383 HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
1384
1385 static HUF_ASM_X86_64_BMI2_ATTRS size_t
HUF_decompress4X2_usingDTable_internal_bmi2_asm(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)1386 HUF_decompress4X2_usingDTable_internal_bmi2_asm(
1387 void* dst, size_t dstSize,
1388 const void* cSrc, size_t cSrcSize,
1389 const HUF_DTable* DTable) {
1390 void const* dt = DTable + 1;
1391 const BYTE* const iend = (const BYTE*)cSrc + 6;
1392 BYTE* const oend = (BYTE*)dst + dstSize;
1393 HUF_DecompressAsmArgs args;
1394 {
1395 size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
1396 FORWARD_IF_ERROR(ret, "Failed to init asm args");
1397 if (ret != 0)
1398 return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
1399 }
1400
1401 assert(args.ip[0] >= args.ilimit);
1402 HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
1403
1404 /* note : op4 already verified within main loop */
1405 assert(args.ip[0] >= iend);
1406 assert(args.ip[1] >= iend);
1407 assert(args.ip[2] >= iend);
1408 assert(args.ip[3] >= iend);
1409 assert(args.op[3] <= oend);
1410 (void)iend;
1411
1412 /* finish bitStreams one by one */
1413 {
1414 size_t const segmentSize = (dstSize+3) / 4;
1415 BYTE* segmentEnd = (BYTE*)dst;
1416 int i;
1417 for (i = 0; i < 4; ++i) {
1418 BIT_DStream_t bit;
1419 if (segmentSize <= (size_t)(oend - segmentEnd))
1420 segmentEnd += segmentSize;
1421 else
1422 segmentEnd = oend;
1423 FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
1424 args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
1425 if (args.op[i] != segmentEnd)
1426 return ERROR(corruption_detected);
1427 }
1428 }
1429
1430 /* decoded size */
1431 return dstSize;
1432 }
1433 #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
1434
HUF_decompress4X2_usingDTable_internal(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable,int bmi2)1435 static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
1436 size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
1437 {
1438 #if DYNAMIC_BMI2
1439 if (bmi2) {
1440 # if ZSTD_ENABLE_ASM_X86_64_BMI2
1441 return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
1442 # else
1443 return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
1444 # endif
1445 }
1446 #else
1447 (void)bmi2;
1448 #endif
1449
1450 #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
1451 return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
1452 #else
1453 return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
1454 #endif
1455 }
1456
HUF_DGEN(HUF_decompress1X2_usingDTable_internal)1457 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
1458
1459 size_t HUF_decompress1X2_usingDTable(
1460 void* dst, size_t dstSize,
1461 const void* cSrc, size_t cSrcSize,
1462 const HUF_DTable* DTable)
1463 {
1464 DTableDesc dtd = HUF_getDTableDesc(DTable);
1465 if (dtd.tableType != 1) return ERROR(GENERIC);
1466 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1467 }
1468
HUF_decompress1X2_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1469 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
1470 const void* cSrc, size_t cSrcSize,
1471 void* workSpace, size_t wkspSize)
1472 {
1473 const BYTE* ip = (const BYTE*) cSrc;
1474
1475 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
1476 workSpace, wkspSize);
1477 if (HUF_isError(hSize)) return hSize;
1478 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1479 ip += hSize; cSrcSize -= hSize;
1480
1481 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
1482 }
1483
1484
HUF_decompress4X2_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)1485 size_t HUF_decompress4X2_usingDTable(
1486 void* dst, size_t dstSize,
1487 const void* cSrc, size_t cSrcSize,
1488 const HUF_DTable* DTable)
1489 {
1490 DTableDesc dtd = HUF_getDTableDesc(DTable);
1491 if (dtd.tableType != 1) return ERROR(GENERIC);
1492 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1493 }
1494
HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1495 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
1496 const void* cSrc, size_t cSrcSize,
1497 void* workSpace, size_t wkspSize, int bmi2)
1498 {
1499 const BYTE* ip = (const BYTE*) cSrc;
1500
1501 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
1502 workSpace, wkspSize);
1503 if (HUF_isError(hSize)) return hSize;
1504 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1505 ip += hSize; cSrcSize -= hSize;
1506
1507 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1508 }
1509
HUF_decompress4X2_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1510 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1511 const void* cSrc, size_t cSrcSize,
1512 void* workSpace, size_t wkspSize)
1513 {
1514 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
1515 }
1516
1517
1518 #endif /* HUF_FORCE_DECOMPRESS_X1 */
1519
1520
1521 /* ***********************************/
1522 /* Universal decompression selectors */
1523 /* ***********************************/
1524
HUF_decompress1X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)1525 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
1526 const void* cSrc, size_t cSrcSize,
1527 const HUF_DTable* DTable)
1528 {
1529 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1530 #if defined(HUF_FORCE_DECOMPRESS_X1)
1531 (void)dtd;
1532 assert(dtd.tableType == 0);
1533 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1534 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1535 (void)dtd;
1536 assert(dtd.tableType == 1);
1537 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1538 #else
1539 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1540 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1541 #endif
1542 }
1543
HUF_decompress4X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)1544 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
1545 const void* cSrc, size_t cSrcSize,
1546 const HUF_DTable* DTable)
1547 {
1548 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1549 #if defined(HUF_FORCE_DECOMPRESS_X1)
1550 (void)dtd;
1551 assert(dtd.tableType == 0);
1552 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1553 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1554 (void)dtd;
1555 assert(dtd.tableType == 1);
1556 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1557 #else
1558 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1559 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1560 #endif
1561 }
1562
1563
1564 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1565 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
1566 static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
1567 {
1568 /* single, double, quad */
1569 {{0,0}, {1,1}}, /* Q==0 : impossible */
1570 {{0,0}, {1,1}}, /* Q==1 : impossible */
1571 {{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */
1572 {{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */
1573 {{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */
1574 {{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */
1575 {{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */
1576 {{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */
1577 {{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */
1578 {{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */
1579 {{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */
1580 {{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */
1581 {{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */
1582 {{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */
1583 {{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */
1584 {{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */
1585 };
1586 #endif
1587
1588 /** HUF_selectDecoder() :
1589 * Tells which decoder is likely to decode faster,
1590 * based on a set of pre-computed metrics.
1591 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1592 * Assumption : 0 < dstSize <= 128 KB */
HUF_selectDecoder(size_t dstSize,size_t cSrcSize)1593 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1594 {
1595 assert(dstSize > 0);
1596 assert(dstSize <= 128*1024);
1597 #if defined(HUF_FORCE_DECOMPRESS_X1)
1598 (void)dstSize;
1599 (void)cSrcSize;
1600 return 0;
1601 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1602 (void)dstSize;
1603 (void)cSrcSize;
1604 return 1;
1605 #else
1606 /* decoder timing evaluation */
1607 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1608 U32 const D256 = (U32)(dstSize >> 8);
1609 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1610 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1611 DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */
1612 return DTime1 < DTime0;
1613 }
1614 #endif
1615 }
1616
1617
HUF_decompress4X_hufOnly_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1618 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1619 size_t dstSize, const void* cSrc,
1620 size_t cSrcSize, void* workSpace,
1621 size_t wkspSize)
1622 {
1623 /* validation checks */
1624 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1625 if (cSrcSize == 0) return ERROR(corruption_detected);
1626
1627 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1628 #if defined(HUF_FORCE_DECOMPRESS_X1)
1629 (void)algoNb;
1630 assert(algoNb == 0);
1631 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1632 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1633 (void)algoNb;
1634 assert(algoNb == 1);
1635 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1636 #else
1637 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1638 cSrcSize, workSpace, wkspSize):
1639 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1640 #endif
1641 }
1642 }
1643
HUF_decompress1X_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1644 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1645 const void* cSrc, size_t cSrcSize,
1646 void* workSpace, size_t wkspSize)
1647 {
1648 /* validation checks */
1649 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1650 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1651 if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1652 if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1653
1654 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1655 #if defined(HUF_FORCE_DECOMPRESS_X1)
1656 (void)algoNb;
1657 assert(algoNb == 0);
1658 return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1659 cSrcSize, workSpace, wkspSize);
1660 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1661 (void)algoNb;
1662 assert(algoNb == 1);
1663 return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1664 cSrcSize, workSpace, wkspSize);
1665 #else
1666 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1667 cSrcSize, workSpace, wkspSize):
1668 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1669 cSrcSize, workSpace, wkspSize);
1670 #endif
1671 }
1672 }
1673
1674
HUF_decompress1X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)1675 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1676 {
1677 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1678 #if defined(HUF_FORCE_DECOMPRESS_X1)
1679 (void)dtd;
1680 assert(dtd.tableType == 0);
1681 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1682 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1683 (void)dtd;
1684 assert(dtd.tableType == 1);
1685 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1686 #else
1687 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1688 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1689 #endif
1690 }
1691
1692 #ifndef HUF_FORCE_DECOMPRESS_X2
HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1693 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1694 {
1695 const BYTE* ip = (const BYTE*) cSrc;
1696
1697 size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1698 if (HUF_isError(hSize)) return hSize;
1699 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1700 ip += hSize; cSrcSize -= hSize;
1701
1702 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1703 }
1704 #endif
1705
HUF_decompress4X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)1706 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1707 {
1708 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1709 #if defined(HUF_FORCE_DECOMPRESS_X1)
1710 (void)dtd;
1711 assert(dtd.tableType == 0);
1712 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1713 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1714 (void)dtd;
1715 assert(dtd.tableType == 1);
1716 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1717 #else
1718 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1719 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1720 #endif
1721 }
1722
HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1723 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1724 {
1725 /* validation checks */
1726 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1727 if (cSrcSize == 0) return ERROR(corruption_detected);
1728
1729 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1730 #if defined(HUF_FORCE_DECOMPRESS_X1)
1731 (void)algoNb;
1732 assert(algoNb == 0);
1733 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1734 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1735 (void)algoNb;
1736 assert(algoNb == 1);
1737 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1738 #else
1739 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1740 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1741 #endif
1742 }
1743 }
1744
1745 #ifndef ZSTD_NO_UNUSED_FUNCTIONS
1746 #ifndef HUF_FORCE_DECOMPRESS_X2
HUF_readDTableX1(HUF_DTable * DTable,const void * src,size_t srcSize)1747 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
1748 {
1749 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1750 return HUF_readDTableX1_wksp(DTable, src, srcSize,
1751 workSpace, sizeof(workSpace));
1752 }
1753
HUF_decompress1X1_DCtx(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1754 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1755 const void* cSrc, size_t cSrcSize)
1756 {
1757 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1758 return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1759 workSpace, sizeof(workSpace));
1760 }
1761
HUF_decompress1X1(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1762 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1763 {
1764 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1765 return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
1766 }
1767 #endif
1768
1769 #ifndef HUF_FORCE_DECOMPRESS_X1
HUF_readDTableX2(HUF_DTable * DTable,const void * src,size_t srcSize)1770 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
1771 {
1772 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1773 return HUF_readDTableX2_wksp(DTable, src, srcSize,
1774 workSpace, sizeof(workSpace));
1775 }
1776
HUF_decompress1X2_DCtx(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1777 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1778 const void* cSrc, size_t cSrcSize)
1779 {
1780 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1781 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1782 workSpace, sizeof(workSpace));
1783 }
1784
HUF_decompress1X2(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1785 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1786 {
1787 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1788 return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1789 }
1790 #endif
1791
1792 #ifndef HUF_FORCE_DECOMPRESS_X2
HUF_decompress4X1_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1793 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1794 {
1795 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1796 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1797 workSpace, sizeof(workSpace));
1798 }
HUF_decompress4X1(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1799 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1800 {
1801 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1802 return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1803 }
1804 #endif
1805
1806 #ifndef HUF_FORCE_DECOMPRESS_X1
HUF_decompress4X2_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1807 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1808 const void* cSrc, size_t cSrcSize)
1809 {
1810 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1811 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1812 workSpace, sizeof(workSpace));
1813 }
1814
HUF_decompress4X2(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1815 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1816 {
1817 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1818 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1819 }
1820 #endif
1821
1822 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1823
HUF_decompress(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1824 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1825 {
1826 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1827 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1828 #endif
1829
1830 /* validation checks */
1831 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1832 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1833 if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1834 if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1835
1836 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1837 #if defined(HUF_FORCE_DECOMPRESS_X1)
1838 (void)algoNb;
1839 assert(algoNb == 0);
1840 return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1841 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1842 (void)algoNb;
1843 assert(algoNb == 1);
1844 return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1845 #else
1846 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1847 #endif
1848 }
1849 }
1850
HUF_decompress4X_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1851 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1852 {
1853 /* validation checks */
1854 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1855 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1856 if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1857 if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1858
1859 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1860 #if defined(HUF_FORCE_DECOMPRESS_X1)
1861 (void)algoNb;
1862 assert(algoNb == 0);
1863 return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1864 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1865 (void)algoNb;
1866 assert(algoNb == 1);
1867 return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1868 #else
1869 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1870 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1871 #endif
1872 }
1873 }
1874
HUF_decompress4X_hufOnly(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1875 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1876 {
1877 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1878 return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1879 workSpace, sizeof(workSpace));
1880 }
1881
HUF_decompress1X_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1882 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1883 const void* cSrc, size_t cSrcSize)
1884 {
1885 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1886 return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1887 workSpace, sizeof(workSpace));
1888 }
1889 #endif
1890