xref: /freebsd/sys/contrib/zstd/lib/decompress/huf_decompress.c (revision 86aa9539fef591a363b06a0ebd3aa7a07f4c1579)
1 /* ******************************************************************
2    huff0 huffman decoder,
3    part of Finite State Entropy library
4    Copyright (C) 2013-present, Yann Collet.
5 
6    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 
8    Redistribution and use in source and binary forms, with or without
9    modification, are permitted provided that the following conditions are
10    met:
11 
12        * Redistributions of source code must retain the above copyright
13    notice, this list of conditions and the following disclaimer.
14        * Redistributions in binary form must reproduce the above
15    copyright notice, this list of conditions and the following disclaimer
16    in the documentation and/or other materials provided with the
17    distribution.
18 
19    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31     You can contact the author at :
32     - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
34 
35 /* **************************************************************
36 *  Dependencies
37 ****************************************************************/
38 #include <string.h>     /* memcpy, memset */
39 #include "compiler.h"
40 #include "bitstream.h"  /* BIT_* */
41 #include "fse.h"        /* to compress headers */
42 #define HUF_STATIC_LINKING_ONLY
43 #include "huf.h"
44 #include "error_private.h"
45 
46 /* **************************************************************
47 *  Macros
48 ****************************************************************/
49 
50 /* These two optional macros force the use one way or another of the two
51  * Huffman decompression implementations. You can't force in both directions
52  * at the same time.
53  */
54 #if defined(HUF_FORCE_DECOMPRESS_X1) && \
55     defined(HUF_FORCE_DECOMPRESS_X2)
56 #error "Cannot force the use of the X1 and X2 decoders at the same time!"
57 #endif
58 
59 
60 /* **************************************************************
61 *  Error Management
62 ****************************************************************/
63 #define HUF_isError ERR_isError
64 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
65 
66 
67 /* **************************************************************
68 *  Byte alignment for workSpace management
69 ****************************************************************/
70 #define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
71 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
72 
73 
74 /* **************************************************************
75 *  BMI2 Variant Wrappers
76 ****************************************************************/
77 #if DYNAMIC_BMI2
78 
79 #define HUF_DGEN(fn)                                                        \
80                                                                             \
81     static size_t fn##_default(                                             \
82                   void* dst,  size_t dstSize,                               \
83             const void* cSrc, size_t cSrcSize,                              \
84             const HUF_DTable* DTable)                                       \
85     {                                                                       \
86         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
87     }                                                                       \
88                                                                             \
89     static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
90                   void* dst,  size_t dstSize,                               \
91             const void* cSrc, size_t cSrcSize,                              \
92             const HUF_DTable* DTable)                                       \
93     {                                                                       \
94         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
95     }                                                                       \
96                                                                             \
97     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
98                      size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
99     {                                                                       \
100         if (bmi2) {                                                         \
101             return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
102         }                                                                   \
103         return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
104     }
105 
106 #else
107 
108 #define HUF_DGEN(fn)                                                        \
109     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
110                      size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
111     {                                                                       \
112         (void)bmi2;                                                         \
113         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
114     }
115 
116 #endif
117 
118 
119 /*-***************************/
120 /*  generic DTableDesc       */
121 /*-***************************/
122 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
123 
124 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
125 {
126     DTableDesc dtd;
127     memcpy(&dtd, table, sizeof(dtd));
128     return dtd;
129 }
130 
131 
132 #ifndef HUF_FORCE_DECOMPRESS_X2
133 
134 /*-***************************/
135 /*  single-symbol decoding   */
136 /*-***************************/
137 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
138 
139 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
140 {
141     U32 tableLog = 0;
142     U32 nbSymbols = 0;
143     size_t iSize;
144     void* const dtPtr = DTable + 1;
145     HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
146 
147     U32* rankVal;
148     BYTE* huffWeight;
149     size_t spaceUsed32 = 0;
150 
151     rankVal = (U32 *)workSpace + spaceUsed32;
152     spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
153     huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
154     spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
155 
156     if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
157 
158     DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
159     /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
160 
161     iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
162     if (HUF_isError(iSize)) return iSize;
163 
164     /* Table header */
165     {   DTableDesc dtd = HUF_getDTableDesc(DTable);
166         if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
167         dtd.tableType = 0;
168         dtd.tableLog = (BYTE)tableLog;
169         memcpy(DTable, &dtd, sizeof(dtd));
170     }
171 
172     /* Calculate starting value for each rank */
173     {   U32 n, nextRankStart = 0;
174         for (n=1; n<tableLog+1; n++) {
175             U32 const current = nextRankStart;
176             nextRankStart += (rankVal[n] << (n-1));
177             rankVal[n] = current;
178     }   }
179 
180     /* fill DTable */
181     {   U32 n;
182         for (n=0; n<nbSymbols; n++) {
183             U32 const w = huffWeight[n];
184             U32 const length = (1 << w) >> 1;
185             U32 u;
186             HUF_DEltX1 D;
187             D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
188             for (u = rankVal[w]; u < rankVal[w] + length; u++)
189                 dt[u] = D;
190             rankVal[w] += length;
191     }   }
192 
193     return iSize;
194 }
195 
196 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
197 {
198     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
199     return HUF_readDTableX1_wksp(DTable, src, srcSize,
200                                  workSpace, sizeof(workSpace));
201 }
202 
203 FORCE_INLINE_TEMPLATE BYTE
204 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
205 {
206     size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
207     BYTE const c = dt[val].byte;
208     BIT_skipBits(Dstream, dt[val].nbBits);
209     return c;
210 }
211 
212 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
213     *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
214 
215 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
216     if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
217         HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
218 
219 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
220     if (MEM_64bits()) \
221         HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
222 
223 HINT_INLINE size_t
224 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
225 {
226     BYTE* const pStart = p;
227 
228     /* up to 4 symbols at a time */
229     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
230         HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
231         HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
232         HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
233         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
234     }
235 
236     /* [0-3] symbols remaining */
237     if (MEM_32bits())
238         while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
239             HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
240 
241     /* no more data to retrieve from bitstream, no need to reload */
242     while (p < pEnd)
243         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
244 
245     return pEnd-pStart;
246 }
247 
248 FORCE_INLINE_TEMPLATE size_t
249 HUF_decompress1X1_usingDTable_internal_body(
250           void* dst,  size_t dstSize,
251     const void* cSrc, size_t cSrcSize,
252     const HUF_DTable* DTable)
253 {
254     BYTE* op = (BYTE*)dst;
255     BYTE* const oend = op + dstSize;
256     const void* dtPtr = DTable + 1;
257     const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
258     BIT_DStream_t bitD;
259     DTableDesc const dtd = HUF_getDTableDesc(DTable);
260     U32 const dtLog = dtd.tableLog;
261 
262     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
263 
264     HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
265 
266     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
267 
268     return dstSize;
269 }
270 
271 FORCE_INLINE_TEMPLATE size_t
272 HUF_decompress4X1_usingDTable_internal_body(
273           void* dst,  size_t dstSize,
274     const void* cSrc, size_t cSrcSize,
275     const HUF_DTable* DTable)
276 {
277     /* Check */
278     if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
279 
280     {   const BYTE* const istart = (const BYTE*) cSrc;
281         BYTE* const ostart = (BYTE*) dst;
282         BYTE* const oend = ostart + dstSize;
283         const void* const dtPtr = DTable + 1;
284         const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
285 
286         /* Init */
287         BIT_DStream_t bitD1;
288         BIT_DStream_t bitD2;
289         BIT_DStream_t bitD3;
290         BIT_DStream_t bitD4;
291         size_t const length1 = MEM_readLE16(istart);
292         size_t const length2 = MEM_readLE16(istart+2);
293         size_t const length3 = MEM_readLE16(istart+4);
294         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
295         const BYTE* const istart1 = istart + 6;  /* jumpTable */
296         const BYTE* const istart2 = istart1 + length1;
297         const BYTE* const istart3 = istart2 + length2;
298         const BYTE* const istart4 = istart3 + length3;
299         const size_t segmentSize = (dstSize+3) / 4;
300         BYTE* const opStart2 = ostart + segmentSize;
301         BYTE* const opStart3 = opStart2 + segmentSize;
302         BYTE* const opStart4 = opStart3 + segmentSize;
303         BYTE* op1 = ostart;
304         BYTE* op2 = opStart2;
305         BYTE* op3 = opStart3;
306         BYTE* op4 = opStart4;
307         U32 endSignal = BIT_DStream_unfinished;
308         DTableDesc const dtd = HUF_getDTableDesc(DTable);
309         U32 const dtLog = dtd.tableLog;
310 
311         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
312         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
313         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
314         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
315         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
316 
317         /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
318         endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
319         while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
320             HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
321             HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
322             HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
323             HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
324             HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
325             HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
326             HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
327             HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
328             HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
329             HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
330             HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
331             HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
332             HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
333             HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
334             HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
335             HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
336             BIT_reloadDStream(&bitD1);
337             BIT_reloadDStream(&bitD2);
338             BIT_reloadDStream(&bitD3);
339             BIT_reloadDStream(&bitD4);
340         }
341 
342         /* check corruption */
343         /* note : should not be necessary : op# advance in lock step, and we control op4.
344          *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
345         if (op1 > opStart2) return ERROR(corruption_detected);
346         if (op2 > opStart3) return ERROR(corruption_detected);
347         if (op3 > opStart4) return ERROR(corruption_detected);
348         /* note : op4 supposed already verified within main loop */
349 
350         /* finish bitStreams one by one */
351         HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
352         HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
353         HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
354         HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
355 
356         /* check */
357         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
358           if (!endCheck) return ERROR(corruption_detected); }
359 
360         /* decoded size */
361         return dstSize;
362     }
363 }
364 
365 
366 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
367                                                const void *cSrc,
368                                                size_t cSrcSize,
369                                                const HUF_DTable *DTable);
370 
371 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
372 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
373 
374 
375 
376 size_t HUF_decompress1X1_usingDTable(
377           void* dst,  size_t dstSize,
378     const void* cSrc, size_t cSrcSize,
379     const HUF_DTable* DTable)
380 {
381     DTableDesc dtd = HUF_getDTableDesc(DTable);
382     if (dtd.tableType != 0) return ERROR(GENERIC);
383     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
384 }
385 
386 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
387                                    const void* cSrc, size_t cSrcSize,
388                                    void* workSpace, size_t wkspSize)
389 {
390     const BYTE* ip = (const BYTE*) cSrc;
391 
392     size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
393     if (HUF_isError(hSize)) return hSize;
394     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
395     ip += hSize; cSrcSize -= hSize;
396 
397     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
398 }
399 
400 
401 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
402                               const void* cSrc, size_t cSrcSize)
403 {
404     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
405     return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
406                                        workSpace, sizeof(workSpace));
407 }
408 
409 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
410 {
411     HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
412     return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
413 }
414 
415 size_t HUF_decompress4X1_usingDTable(
416           void* dst,  size_t dstSize,
417     const void* cSrc, size_t cSrcSize,
418     const HUF_DTable* DTable)
419 {
420     DTableDesc dtd = HUF_getDTableDesc(DTable);
421     if (dtd.tableType != 0) return ERROR(GENERIC);
422     return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
423 }
424 
425 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
426                                    const void* cSrc, size_t cSrcSize,
427                                    void* workSpace, size_t wkspSize, int bmi2)
428 {
429     const BYTE* ip = (const BYTE*) cSrc;
430 
431     size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
432                                                 workSpace, wkspSize);
433     if (HUF_isError(hSize)) return hSize;
434     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
435     ip += hSize; cSrcSize -= hSize;
436 
437     return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
438 }
439 
440 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
441                                    const void* cSrc, size_t cSrcSize,
442                                    void* workSpace, size_t wkspSize)
443 {
444     return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
445 }
446 
447 
448 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
449 {
450     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
451     return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
452                                        workSpace, sizeof(workSpace));
453 }
454 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
455 {
456     HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
457     return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
458 }
459 
460 #endif /* HUF_FORCE_DECOMPRESS_X2 */
461 
462 
463 #ifndef HUF_FORCE_DECOMPRESS_X1
464 
465 /* *************************/
466 /* double-symbols decoding */
467 /* *************************/
468 
469 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
470 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
471 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
472 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
473 
474 
475 /* HUF_fillDTableX2Level2() :
476  * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
477 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
478                            const U32* rankValOrigin, const int minWeight,
479                            const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
480                            U32 nbBitsBaseline, U16 baseSeq)
481 {
482     HUF_DEltX2 DElt;
483     U32 rankVal[HUF_TABLELOG_MAX + 1];
484 
485     /* get pre-calculated rankVal */
486     memcpy(rankVal, rankValOrigin, sizeof(rankVal));
487 
488     /* fill skipped values */
489     if (minWeight>1) {
490         U32 i, skipSize = rankVal[minWeight];
491         MEM_writeLE16(&(DElt.sequence), baseSeq);
492         DElt.nbBits   = (BYTE)(consumed);
493         DElt.length   = 1;
494         for (i = 0; i < skipSize; i++)
495             DTable[i] = DElt;
496     }
497 
498     /* fill DTable */
499     {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
500             const U32 symbol = sortedSymbols[s].symbol;
501             const U32 weight = sortedSymbols[s].weight;
502             const U32 nbBits = nbBitsBaseline - weight;
503             const U32 length = 1 << (sizeLog-nbBits);
504             const U32 start = rankVal[weight];
505             U32 i = start;
506             const U32 end = start + length;
507 
508             MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
509             DElt.nbBits = (BYTE)(nbBits + consumed);
510             DElt.length = 2;
511             do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
512 
513             rankVal[weight] += length;
514     }   }
515 }
516 
517 
518 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
519                            const sortedSymbol_t* sortedList, const U32 sortedListSize,
520                            const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
521                            const U32 nbBitsBaseline)
522 {
523     U32 rankVal[HUF_TABLELOG_MAX + 1];
524     const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
525     const U32 minBits  = nbBitsBaseline - maxWeight;
526     U32 s;
527 
528     memcpy(rankVal, rankValOrigin, sizeof(rankVal));
529 
530     /* fill DTable */
531     for (s=0; s<sortedListSize; s++) {
532         const U16 symbol = sortedList[s].symbol;
533         const U32 weight = sortedList[s].weight;
534         const U32 nbBits = nbBitsBaseline - weight;
535         const U32 start = rankVal[weight];
536         const U32 length = 1 << (targetLog-nbBits);
537 
538         if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
539             U32 sortedRank;
540             int minWeight = nbBits + scaleLog;
541             if (minWeight < 1) minWeight = 1;
542             sortedRank = rankStart[minWeight];
543             HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
544                            rankValOrigin[nbBits], minWeight,
545                            sortedList+sortedRank, sortedListSize-sortedRank,
546                            nbBitsBaseline, symbol);
547         } else {
548             HUF_DEltX2 DElt;
549             MEM_writeLE16(&(DElt.sequence), symbol);
550             DElt.nbBits = (BYTE)(nbBits);
551             DElt.length = 1;
552             {   U32 const end = start + length;
553                 U32 u;
554                 for (u = start; u < end; u++) DTable[u] = DElt;
555         }   }
556         rankVal[weight] += length;
557     }
558 }
559 
560 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
561                        const void* src, size_t srcSize,
562                              void* workSpace, size_t wkspSize)
563 {
564     U32 tableLog, maxW, sizeOfSort, nbSymbols;
565     DTableDesc dtd = HUF_getDTableDesc(DTable);
566     U32 const maxTableLog = dtd.maxTableLog;
567     size_t iSize;
568     void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
569     HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
570     U32 *rankStart;
571 
572     rankValCol_t* rankVal;
573     U32* rankStats;
574     U32* rankStart0;
575     sortedSymbol_t* sortedSymbol;
576     BYTE* weightList;
577     size_t spaceUsed32 = 0;
578 
579     rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
580     spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
581     rankStats = (U32 *)workSpace + spaceUsed32;
582     spaceUsed32 += HUF_TABLELOG_MAX + 1;
583     rankStart0 = (U32 *)workSpace + spaceUsed32;
584     spaceUsed32 += HUF_TABLELOG_MAX + 2;
585     sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
586     spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
587     weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
588     spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
589 
590     if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
591 
592     rankStart = rankStart0 + 1;
593     memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
594 
595     DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
596     if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
597     /* memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
598 
599     iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
600     if (HUF_isError(iSize)) return iSize;
601 
602     /* check result */
603     if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
604 
605     /* find maxWeight */
606     for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
607 
608     /* Get start index of each weight */
609     {   U32 w, nextRankStart = 0;
610         for (w=1; w<maxW+1; w++) {
611             U32 current = nextRankStart;
612             nextRankStart += rankStats[w];
613             rankStart[w] = current;
614         }
615         rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
616         sizeOfSort = nextRankStart;
617     }
618 
619     /* sort symbols by weight */
620     {   U32 s;
621         for (s=0; s<nbSymbols; s++) {
622             U32 const w = weightList[s];
623             U32 const r = rankStart[w]++;
624             sortedSymbol[r].symbol = (BYTE)s;
625             sortedSymbol[r].weight = (BYTE)w;
626         }
627         rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
628     }
629 
630     /* Build rankVal */
631     {   U32* const rankVal0 = rankVal[0];
632         {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
633             U32 nextRankVal = 0;
634             U32 w;
635             for (w=1; w<maxW+1; w++) {
636                 U32 current = nextRankVal;
637                 nextRankVal += rankStats[w] << (w+rescale);
638                 rankVal0[w] = current;
639         }   }
640         {   U32 const minBits = tableLog+1 - maxW;
641             U32 consumed;
642             for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
643                 U32* const rankValPtr = rankVal[consumed];
644                 U32 w;
645                 for (w = 1; w < maxW+1; w++) {
646                     rankValPtr[w] = rankVal0[w] >> consumed;
647     }   }   }   }
648 
649     HUF_fillDTableX2(dt, maxTableLog,
650                    sortedSymbol, sizeOfSort,
651                    rankStart0, rankVal, maxW,
652                    tableLog+1);
653 
654     dtd.tableLog = (BYTE)maxTableLog;
655     dtd.tableType = 1;
656     memcpy(DTable, &dtd, sizeof(dtd));
657     return iSize;
658 }
659 
660 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
661 {
662   U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
663   return HUF_readDTableX2_wksp(DTable, src, srcSize,
664                                workSpace, sizeof(workSpace));
665 }
666 
667 
668 FORCE_INLINE_TEMPLATE U32
669 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
670 {
671     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
672     memcpy(op, dt+val, 2);
673     BIT_skipBits(DStream, dt[val].nbBits);
674     return dt[val].length;
675 }
676 
677 FORCE_INLINE_TEMPLATE U32
678 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
679 {
680     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
681     memcpy(op, dt+val, 1);
682     if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
683     else {
684         if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
685             BIT_skipBits(DStream, dt[val].nbBits);
686             if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
687                 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
688                 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
689     }   }
690     return 1;
691 }
692 
693 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
694     ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
695 
696 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
697     if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
698         ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
699 
700 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
701     if (MEM_64bits()) \
702         ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
703 
704 HINT_INLINE size_t
705 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
706                 const HUF_DEltX2* const dt, const U32 dtLog)
707 {
708     BYTE* const pStart = p;
709 
710     /* up to 8 symbols at a time */
711     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
712         HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
713         HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
714         HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
715         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
716     }
717 
718     /* closer to end : up to 2 symbols at a time */
719     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
720         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
721 
722     while (p <= pEnd-2)
723         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
724 
725     if (p < pEnd)
726         p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
727 
728     return p-pStart;
729 }
730 
731 FORCE_INLINE_TEMPLATE size_t
732 HUF_decompress1X2_usingDTable_internal_body(
733           void* dst,  size_t dstSize,
734     const void* cSrc, size_t cSrcSize,
735     const HUF_DTable* DTable)
736 {
737     BIT_DStream_t bitD;
738 
739     /* Init */
740     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
741 
742     /* decode */
743     {   BYTE* const ostart = (BYTE*) dst;
744         BYTE* const oend = ostart + dstSize;
745         const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
746         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
747         DTableDesc const dtd = HUF_getDTableDesc(DTable);
748         HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
749     }
750 
751     /* check */
752     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
753 
754     /* decoded size */
755     return dstSize;
756 }
757 
758 
759 FORCE_INLINE_TEMPLATE size_t
760 HUF_decompress4X2_usingDTable_internal_body(
761           void* dst,  size_t dstSize,
762     const void* cSrc, size_t cSrcSize,
763     const HUF_DTable* DTable)
764 {
765     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
766 
767     {   const BYTE* const istart = (const BYTE*) cSrc;
768         BYTE* const ostart = (BYTE*) dst;
769         BYTE* const oend = ostart + dstSize;
770         const void* const dtPtr = DTable+1;
771         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
772 
773         /* Init */
774         BIT_DStream_t bitD1;
775         BIT_DStream_t bitD2;
776         BIT_DStream_t bitD3;
777         BIT_DStream_t bitD4;
778         size_t const length1 = MEM_readLE16(istart);
779         size_t const length2 = MEM_readLE16(istart+2);
780         size_t const length3 = MEM_readLE16(istart+4);
781         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
782         const BYTE* const istart1 = istart + 6;  /* jumpTable */
783         const BYTE* const istart2 = istart1 + length1;
784         const BYTE* const istart3 = istart2 + length2;
785         const BYTE* const istart4 = istart3 + length3;
786         size_t const segmentSize = (dstSize+3) / 4;
787         BYTE* const opStart2 = ostart + segmentSize;
788         BYTE* const opStart3 = opStart2 + segmentSize;
789         BYTE* const opStart4 = opStart3 + segmentSize;
790         BYTE* op1 = ostart;
791         BYTE* op2 = opStart2;
792         BYTE* op3 = opStart3;
793         BYTE* op4 = opStart4;
794         U32 endSignal;
795         DTableDesc const dtd = HUF_getDTableDesc(DTable);
796         U32 const dtLog = dtd.tableLog;
797 
798         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
799         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
800         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
801         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
802         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
803 
804         /* 16-32 symbols per loop (4-8 symbols per stream) */
805         endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
806         for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
807             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
808             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
809             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
810             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
811             HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
812             HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
813             HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
814             HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
815             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
816             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
817             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
818             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
819             HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
820             HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
821             HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
822             HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
823 
824             endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
825         }
826 
827         /* check corruption */
828         if (op1 > opStart2) return ERROR(corruption_detected);
829         if (op2 > opStart3) return ERROR(corruption_detected);
830         if (op3 > opStart4) return ERROR(corruption_detected);
831         /* note : op4 already verified within main loop */
832 
833         /* finish bitStreams one by one */
834         HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
835         HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
836         HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
837         HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
838 
839         /* check */
840         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
841           if (!endCheck) return ERROR(corruption_detected); }
842 
843         /* decoded size */
844         return dstSize;
845     }
846 }
847 
848 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
849 HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
850 
851 size_t HUF_decompress1X2_usingDTable(
852           void* dst,  size_t dstSize,
853     const void* cSrc, size_t cSrcSize,
854     const HUF_DTable* DTable)
855 {
856     DTableDesc dtd = HUF_getDTableDesc(DTable);
857     if (dtd.tableType != 1) return ERROR(GENERIC);
858     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
859 }
860 
861 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
862                                    const void* cSrc, size_t cSrcSize,
863                                    void* workSpace, size_t wkspSize)
864 {
865     const BYTE* ip = (const BYTE*) cSrc;
866 
867     size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
868                                                workSpace, wkspSize);
869     if (HUF_isError(hSize)) return hSize;
870     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
871     ip += hSize; cSrcSize -= hSize;
872 
873     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
874 }
875 
876 
877 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
878                               const void* cSrc, size_t cSrcSize)
879 {
880     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
881     return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
882                                        workSpace, sizeof(workSpace));
883 }
884 
885 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
886 {
887     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
888     return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
889 }
890 
891 size_t HUF_decompress4X2_usingDTable(
892           void* dst,  size_t dstSize,
893     const void* cSrc, size_t cSrcSize,
894     const HUF_DTable* DTable)
895 {
896     DTableDesc dtd = HUF_getDTableDesc(DTable);
897     if (dtd.tableType != 1) return ERROR(GENERIC);
898     return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
899 }
900 
901 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
902                                    const void* cSrc, size_t cSrcSize,
903                                    void* workSpace, size_t wkspSize, int bmi2)
904 {
905     const BYTE* ip = (const BYTE*) cSrc;
906 
907     size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
908                                          workSpace, wkspSize);
909     if (HUF_isError(hSize)) return hSize;
910     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
911     ip += hSize; cSrcSize -= hSize;
912 
913     return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
914 }
915 
916 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
917                                    const void* cSrc, size_t cSrcSize,
918                                    void* workSpace, size_t wkspSize)
919 {
920     return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
921 }
922 
923 
924 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
925                               const void* cSrc, size_t cSrcSize)
926 {
927     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
928     return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
929                                        workSpace, sizeof(workSpace));
930 }
931 
932 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
933 {
934     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
935     return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
936 }
937 
938 #endif /* HUF_FORCE_DECOMPRESS_X1 */
939 
940 
941 /* ***********************************/
942 /* Universal decompression selectors */
943 /* ***********************************/
944 
945 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
946                                     const void* cSrc, size_t cSrcSize,
947                                     const HUF_DTable* DTable)
948 {
949     DTableDesc const dtd = HUF_getDTableDesc(DTable);
950 #if defined(HUF_FORCE_DECOMPRESS_X1)
951     (void)dtd;
952     assert(dtd.tableType == 0);
953     return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
954 #elif defined(HUF_FORCE_DECOMPRESS_X2)
955     (void)dtd;
956     assert(dtd.tableType == 1);
957     return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
958 #else
959     return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
960                            HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
961 #endif
962 }
963 
964 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
965                                     const void* cSrc, size_t cSrcSize,
966                                     const HUF_DTable* DTable)
967 {
968     DTableDesc const dtd = HUF_getDTableDesc(DTable);
969 #if defined(HUF_FORCE_DECOMPRESS_X1)
970     (void)dtd;
971     assert(dtd.tableType == 0);
972     return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
973 #elif defined(HUF_FORCE_DECOMPRESS_X2)
974     (void)dtd;
975     assert(dtd.tableType == 1);
976     return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
977 #else
978     return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
979                            HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
980 #endif
981 }
982 
983 
984 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
985 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
986 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
987 {
988     /* single, double, quad */
989     {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
990     {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
991     {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
992     {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
993     {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
994     {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
995     {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
996     {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
997     {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
998     {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
999     {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
1000     {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
1001     {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
1002     {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
1003     {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
1004     {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
1005 };
1006 #endif
1007 
1008 /** HUF_selectDecoder() :
1009  *  Tells which decoder is likely to decode faster,
1010  *  based on a set of pre-computed metrics.
1011  * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1012  *  Assumption : 0 < dstSize <= 128 KB */
1013 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1014 {
1015     assert(dstSize > 0);
1016     assert(dstSize <= 128*1024);
1017 #if defined(HUF_FORCE_DECOMPRESS_X1)
1018     (void)dstSize;
1019     (void)cSrcSize;
1020     return 0;
1021 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1022     (void)dstSize;
1023     (void)cSrcSize;
1024     return 1;
1025 #else
1026     /* decoder timing evaluation */
1027     {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
1028         U32 const D256 = (U32)(dstSize >> 8);
1029         U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1030         U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1031         DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
1032         return DTime1 < DTime0;
1033     }
1034 #endif
1035 }
1036 
1037 
1038 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1039 
1040 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1041 {
1042 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1043     static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1044 #endif
1045 
1046     /* validation checks */
1047     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1048     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1049     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1050     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1051 
1052     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1053 #if defined(HUF_FORCE_DECOMPRESS_X1)
1054         (void)algoNb;
1055         assert(algoNb == 0);
1056         return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1057 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1058         (void)algoNb;
1059         assert(algoNb == 1);
1060         return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1061 #else
1062         return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1063 #endif
1064     }
1065 }
1066 
1067 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1068 {
1069     /* validation checks */
1070     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1071     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1072     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1073     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1074 
1075     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1076 #if defined(HUF_FORCE_DECOMPRESS_X1)
1077         (void)algoNb;
1078         assert(algoNb == 0);
1079         return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1080 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1081         (void)algoNb;
1082         assert(algoNb == 1);
1083         return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1084 #else
1085         return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1086                         HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1087 #endif
1088     }
1089 }
1090 
1091 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1092 {
1093     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1094     return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1095                                          workSpace, sizeof(workSpace));
1096 }
1097 
1098 
1099 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1100                                      size_t dstSize, const void* cSrc,
1101                                      size_t cSrcSize, void* workSpace,
1102                                      size_t wkspSize)
1103 {
1104     /* validation checks */
1105     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1106     if (cSrcSize == 0) return ERROR(corruption_detected);
1107 
1108     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1109 #if defined(HUF_FORCE_DECOMPRESS_X1)
1110         (void)algoNb;
1111         assert(algoNb == 0);
1112         return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1113 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1114         (void)algoNb;
1115         assert(algoNb == 1);
1116         return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1117 #else
1118         return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1119                             cSrcSize, workSpace, wkspSize):
1120                         HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1121 #endif
1122     }
1123 }
1124 
1125 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1126                                   const void* cSrc, size_t cSrcSize,
1127                                   void* workSpace, size_t wkspSize)
1128 {
1129     /* validation checks */
1130     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1131     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1132     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1133     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1134 
1135     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1136 #if defined(HUF_FORCE_DECOMPRESS_X1)
1137         (void)algoNb;
1138         assert(algoNb == 0);
1139         return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1140                                 cSrcSize, workSpace, wkspSize);
1141 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1142         (void)algoNb;
1143         assert(algoNb == 1);
1144         return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1145                                 cSrcSize, workSpace, wkspSize);
1146 #else
1147         return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1148                                 cSrcSize, workSpace, wkspSize):
1149                         HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1150                                 cSrcSize, workSpace, wkspSize);
1151 #endif
1152     }
1153 }
1154 
1155 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1156                              const void* cSrc, size_t cSrcSize)
1157 {
1158     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1159     return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1160                                       workSpace, sizeof(workSpace));
1161 }
1162 
1163 
1164 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1165 {
1166     DTableDesc const dtd = HUF_getDTableDesc(DTable);
1167 #if defined(HUF_FORCE_DECOMPRESS_X1)
1168     (void)dtd;
1169     assert(dtd.tableType == 0);
1170     return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1171 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1172     (void)dtd;
1173     assert(dtd.tableType == 1);
1174     return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1175 #else
1176     return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1177                            HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1178 #endif
1179 }
1180 
1181 #ifndef HUF_FORCE_DECOMPRESS_X2
1182 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1183 {
1184     const BYTE* ip = (const BYTE*) cSrc;
1185 
1186     size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1187     if (HUF_isError(hSize)) return hSize;
1188     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1189     ip += hSize; cSrcSize -= hSize;
1190 
1191     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1192 }
1193 #endif
1194 
1195 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1196 {
1197     DTableDesc const dtd = HUF_getDTableDesc(DTable);
1198 #if defined(HUF_FORCE_DECOMPRESS_X1)
1199     (void)dtd;
1200     assert(dtd.tableType == 0);
1201     return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1202 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1203     (void)dtd;
1204     assert(dtd.tableType == 1);
1205     return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1206 #else
1207     return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1208                            HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1209 #endif
1210 }
1211 
1212 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1213 {
1214     /* validation checks */
1215     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1216     if (cSrcSize == 0) return ERROR(corruption_detected);
1217 
1218     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1219 #if defined(HUF_FORCE_DECOMPRESS_X1)
1220         (void)algoNb;
1221         assert(algoNb == 0);
1222         return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1223 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1224         (void)algoNb;
1225         assert(algoNb == 1);
1226         return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1227 #else
1228         return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1229                         HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1230 #endif
1231     }
1232 }
1233