xref: /freebsd/sys/contrib/openzfs/module/zstd/lib/decompress/huf_decompress.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
2 /* ******************************************************************
3  * huff0 huffman decoder,
4  * part of Finite State Entropy library
5  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
6  *
7  *  You can contact the author at :
8  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
9  *
10  * This source code is licensed under both the BSD-style license (found in the
11  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12  * in the COPYING file in the root directory of this source tree).
13  * You may select, at your option, one of the above-listed licenses.
14 ****************************************************************** */
15 
16 /* **************************************************************
17 *  Dependencies
18 ****************************************************************/
19 #include <string.h>     /* memcpy, memset */
20 #include "../common/compiler.h"
21 #include "../common/bitstream.h"  /* BIT_* */
22 #include "../common/fse.h"        /* to compress headers */
23 #define HUF_STATIC_LINKING_ONLY
24 #include "../common/huf.h"
25 #include "../common/error_private.h"
26 
27 /* **************************************************************
28 *  Macros
29 ****************************************************************/
30 
31 /* These two optional macros force the use one way or another of the two
32  * Huffman decompression implementations. You can't force in both directions
33  * at the same time.
34  */
35 #if defined(HUF_FORCE_DECOMPRESS_X1) && \
36     defined(HUF_FORCE_DECOMPRESS_X2)
37 #error "Cannot force the use of the X1 and X2 decoders at the same time!"
38 #endif
39 
40 
41 /* **************************************************************
42 *  Error Management
43 ****************************************************************/
44 #define HUF_isError ERR_isError
45 
46 
47 /* **************************************************************
48 *  Byte alignment for workSpace management
49 ****************************************************************/
50 #define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
51 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
52 
53 
54 /* **************************************************************
55 *  BMI2 Variant Wrappers
56 ****************************************************************/
57 #if DYNAMIC_BMI2
58 
59 #define HUF_DGEN(fn)                                                        \
60                                                                             \
61     static size_t fn##_default(                                             \
62                   void* dst,  size_t dstSize,                               \
63             const void* cSrc, size_t cSrcSize,                              \
64             const HUF_DTable* DTable)                                       \
65     {                                                                       \
66         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
67     }                                                                       \
68                                                                             \
69     static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
70                   void* dst,  size_t dstSize,                               \
71             const void* cSrc, size_t cSrcSize,                              \
72             const HUF_DTable* DTable)                                       \
73     {                                                                       \
74         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
75     }                                                                       \
76                                                                             \
77     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
78                      size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
79     {                                                                       \
80         if (bmi2) {                                                         \
81             return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
82         }                                                                   \
83         return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
84     }
85 
86 #else
87 
88 #define HUF_DGEN(fn)                                                        \
89     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
90                      size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
91     {                                                                       \
92         (void)bmi2;                                                         \
93         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
94     }
95 
96 #endif
97 
98 
99 /*-***************************/
100 /*  generic DTableDesc       */
101 /*-***************************/
102 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
103 
HUF_getDTableDesc(const HUF_DTable * table)104 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
105 {
106     DTableDesc dtd;
107     memcpy(&dtd, table, sizeof(dtd));
108     return dtd;
109 }
110 
111 
112 #ifndef HUF_FORCE_DECOMPRESS_X2
113 
114 /*-***************************/
115 /*  single-symbol decoding   */
116 /*-***************************/
117 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
118 
HUF_readDTableX1_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)119 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
120 {
121     U32 tableLog = 0;
122     U32 nbSymbols = 0;
123     size_t iSize;
124     void* const dtPtr = DTable + 1;
125     HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
126 
127     U32* rankVal;
128     BYTE* huffWeight;
129     size_t spaceUsed32 = 0;
130 
131     rankVal = (U32 *)workSpace + spaceUsed32;
132     spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
133     huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
134     spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
135 
136     if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
137 
138     DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
139     /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
140 
141     iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
142     if (HUF_isError(iSize)) return iSize;
143 
144     /* Table header */
145     {   DTableDesc dtd = HUF_getDTableDesc(DTable);
146         if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
147         dtd.tableType = 0;
148         dtd.tableLog = (BYTE)tableLog;
149         memcpy(DTable, &dtd, sizeof(dtd));
150     }
151 
152     /* Calculate starting value for each rank */
153     {   U32 n, nextRankStart = 0;
154         for (n=1; n<tableLog+1; n++) {
155             U32 const current = nextRankStart;
156             nextRankStart += (rankVal[n] << (n-1));
157             rankVal[n] = current;
158     }   }
159 
160     /* fill DTable */
161     {   U32 n;
162         size_t const nEnd = nbSymbols;
163         for (n=0; n<nEnd; n++) {
164             size_t const w = huffWeight[n];
165             size_t const length = (1 << w) >> 1;
166             size_t const uStart = rankVal[w];
167             size_t const uEnd = uStart + length;
168             size_t u;
169             HUF_DEltX1 D;
170             D.byte = (BYTE)n;
171             D.nbBits = (BYTE)(tableLog + 1 - w);
172             rankVal[w] = (U32)uEnd;
173             if (length < 4) {
174                 /* Use length in the loop bound so the compiler knows it is short. */
175                 for (u = 0; u < length; ++u)
176                     dt[uStart + u] = D;
177             } else {
178                 /* Unroll the loop 4 times, we know it is a power of 2. */
179                 for (u = uStart; u < uEnd; u += 4) {
180                     dt[u + 0] = D;
181                     dt[u + 1] = D;
182                     dt[u + 2] = D;
183                     dt[u + 3] = D;
184     }   }   }   }
185     return iSize;
186 }
187 
HUF_readDTableX1(HUF_DTable * DTable,const void * src,size_t srcSize)188 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
189 {
190     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
191     return HUF_readDTableX1_wksp(DTable, src, srcSize,
192                                  workSpace, sizeof(workSpace));
193 }
194 
195 FORCE_INLINE_TEMPLATE BYTE
HUF_decodeSymbolX1(BIT_DStream_t * Dstream,const HUF_DEltX1 * dt,const U32 dtLog)196 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
197 {
198     size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
199     BYTE const c = dt[val].byte;
200     BIT_skipBits(Dstream, dt[val].nbBits);
201     return c;
202 }
203 
204 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
205     *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
206 
207 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
208     if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
209         HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
210 
211 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
212     if (MEM_64bits()) \
213         HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
214 
215 HINT_INLINE size_t
HUF_decodeStreamX1(BYTE * p,BIT_DStream_t * const bitDPtr,BYTE * const pEnd,const HUF_DEltX1 * const dt,const U32 dtLog)216 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
217 {
218     BYTE* const pStart = p;
219 
220     /* up to 4 symbols at a time */
221     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
222         HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
223         HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
224         HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
225         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
226     }
227 
228     /* [0-3] symbols remaining */
229     if (MEM_32bits())
230         while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
231             HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
232 
233     /* no more data to retrieve from bitstream, no need to reload */
234     while (p < pEnd)
235         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
236 
237     return pEnd-pStart;
238 }
239 
240 FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)241 HUF_decompress1X1_usingDTable_internal_body(
242           void* dst,  size_t dstSize,
243     const void* cSrc, size_t cSrcSize,
244     const HUF_DTable* DTable)
245 {
246     BYTE* op = (BYTE*)dst;
247     BYTE* const oend = op + dstSize;
248     const void* dtPtr = DTable + 1;
249     const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
250     BIT_DStream_t bitD;
251     DTableDesc const dtd = HUF_getDTableDesc(DTable);
252     U32 const dtLog = dtd.tableLog;
253 
254     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
255 
256     HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
257 
258     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
259 
260     return dstSize;
261 }
262 
263 FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)264 HUF_decompress4X1_usingDTable_internal_body(
265           void* dst,  size_t dstSize,
266     const void* cSrc, size_t cSrcSize,
267     const HUF_DTable* DTable)
268 {
269     /* Check */
270     if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
271 
272     {   const BYTE* const istart = (const BYTE*) cSrc;
273         BYTE* const ostart = (BYTE*) dst;
274         BYTE* const oend = ostart + dstSize;
275         BYTE* const olimit = oend - 3;
276         const void* const dtPtr = DTable + 1;
277         const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
278 
279         /* Init */
280         BIT_DStream_t bitD1;
281         BIT_DStream_t bitD2;
282         BIT_DStream_t bitD3;
283         BIT_DStream_t bitD4;
284         size_t const length1 = MEM_readLE16(istart);
285         size_t const length2 = MEM_readLE16(istart+2);
286         size_t const length3 = MEM_readLE16(istart+4);
287         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
288         const BYTE* const istart1 = istart + 6;  /* jumpTable */
289         const BYTE* const istart2 = istart1 + length1;
290         const BYTE* const istart3 = istart2 + length2;
291         const BYTE* const istart4 = istart3 + length3;
292         const size_t segmentSize = (dstSize+3) / 4;
293         BYTE* const opStart2 = ostart + segmentSize;
294         BYTE* const opStart3 = opStart2 + segmentSize;
295         BYTE* const opStart4 = opStart3 + segmentSize;
296         BYTE* op1 = ostart;
297         BYTE* op2 = opStart2;
298         BYTE* op3 = opStart3;
299         BYTE* op4 = opStart4;
300         DTableDesc const dtd = HUF_getDTableDesc(DTable);
301         U32 const dtLog = dtd.tableLog;
302         U32 endSignal = 1;
303 
304         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
305         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
306         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
307         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
308         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
309 
310         /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
311         for ( ; (endSignal) & (op4 < olimit) ; ) {
312             HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
313             HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
314             HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
315             HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
316             HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
317             HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
318             HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
319             HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
320             HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
321             HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
322             HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
323             HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
324             HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
325             HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
326             HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
327             HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
328             endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
329             endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
330             endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
331             endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
332         }
333 
334         /* check corruption */
335         /* note : should not be necessary : op# advance in lock step, and we control op4.
336          *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
337         if (op1 > opStart2) return ERROR(corruption_detected);
338         if (op2 > opStart3) return ERROR(corruption_detected);
339         if (op3 > opStart4) return ERROR(corruption_detected);
340         /* note : op4 supposed already verified within main loop */
341 
342         /* finish bitStreams one by one */
343         HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
344         HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
345         HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
346         HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
347 
348         /* check */
349         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
350           if (!endCheck) return ERROR(corruption_detected); }
351 
352         /* decoded size */
353         return dstSize;
354     }
355 }
356 
357 
358 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
359                                                const void *cSrc,
360                                                size_t cSrcSize,
361                                                const HUF_DTable *DTable);
362 
363 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
HUF_DGEN(HUF_decompress4X1_usingDTable_internal)364 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
365 
366 
367 
368 size_t HUF_decompress1X1_usingDTable(
369           void* dst,  size_t dstSize,
370     const void* cSrc, size_t cSrcSize,
371     const HUF_DTable* DTable)
372 {
373     DTableDesc dtd = HUF_getDTableDesc(DTable);
374     if (dtd.tableType != 0) return ERROR(GENERIC);
375     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
376 }
377 
HUF_decompress1X1_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)378 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
379                                    const void* cSrc, size_t cSrcSize,
380                                    void* workSpace, size_t wkspSize)
381 {
382     const BYTE* ip = (const BYTE*) cSrc;
383 
384     size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
385     if (HUF_isError(hSize)) return hSize;
386     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
387     ip += hSize; cSrcSize -= hSize;
388 
389     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
390 }
391 
392 
HUF_decompress1X1_DCtx(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)393 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
394                               const void* cSrc, size_t cSrcSize)
395 {
396     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
397     return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
398                                        workSpace, sizeof(workSpace));
399 }
400 
HUF_decompress1X1(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)401 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
402 {
403     HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
404     return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
405 }
406 
HUF_decompress4X1_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)407 size_t HUF_decompress4X1_usingDTable(
408           void* dst,  size_t dstSize,
409     const void* cSrc, size_t cSrcSize,
410     const HUF_DTable* DTable)
411 {
412     DTableDesc dtd = HUF_getDTableDesc(DTable);
413     if (dtd.tableType != 0) return ERROR(GENERIC);
414     return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
415 }
416 
HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)417 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
418                                    const void* cSrc, size_t cSrcSize,
419                                    void* workSpace, size_t wkspSize, int bmi2)
420 {
421     const BYTE* ip = (const BYTE*) cSrc;
422 
423     size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
424                                                 workSpace, wkspSize);
425     if (HUF_isError(hSize)) return hSize;
426     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
427     ip += hSize; cSrcSize -= hSize;
428 
429     return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
430 }
431 
HUF_decompress4X1_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)432 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
433                                    const void* cSrc, size_t cSrcSize,
434                                    void* workSpace, size_t wkspSize)
435 {
436     return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
437 }
438 
439 
HUF_decompress4X1_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)440 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
441 {
442     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
443     return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
444                                        workSpace, sizeof(workSpace));
445 }
HUF_decompress4X1(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)446 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
447 {
448     HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
449     return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
450 }
451 
452 #endif /* HUF_FORCE_DECOMPRESS_X2 */
453 
454 
455 #ifndef HUF_FORCE_DECOMPRESS_X1
456 
457 /* *************************/
458 /* double-symbols decoding */
459 /* *************************/
460 
461 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
462 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
463 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
464 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
465 
466 
467 /* HUF_fillDTableX2Level2() :
468  * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
HUF_fillDTableX2Level2(HUF_DEltX2 * DTable,U32 sizeLog,const U32 consumed,const U32 * rankValOrigin,const int minWeight,const sortedSymbol_t * sortedSymbols,const U32 sortedListSize,U32 nbBitsBaseline,U16 baseSeq)469 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
470                            const U32* rankValOrigin, const int minWeight,
471                            const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
472                            U32 nbBitsBaseline, U16 baseSeq)
473 {
474     HUF_DEltX2 DElt;
475     U32 rankVal[HUF_TABLELOG_MAX + 1];
476 
477     /* get pre-calculated rankVal */
478     memcpy(rankVal, rankValOrigin, sizeof(rankVal));
479 
480     /* fill skipped values */
481     if (minWeight>1) {
482         U32 i, skipSize = rankVal[minWeight];
483         MEM_writeLE16(&(DElt.sequence), baseSeq);
484         DElt.nbBits   = (BYTE)(consumed);
485         DElt.length   = 1;
486         for (i = 0; i < skipSize; i++)
487             DTable[i] = DElt;
488     }
489 
490     /* fill DTable */
491     {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
492             const U32 symbol = sortedSymbols[s].symbol;
493             const U32 weight = sortedSymbols[s].weight;
494             const U32 nbBits = nbBitsBaseline - weight;
495             const U32 length = 1 << (sizeLog-nbBits);
496             const U32 start = rankVal[weight];
497             U32 i = start;
498             const U32 end = start + length;
499 
500             MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
501             DElt.nbBits = (BYTE)(nbBits + consumed);
502             DElt.length = 2;
503             do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
504 
505             rankVal[weight] += length;
506     }   }
507 }
508 
509 
HUF_fillDTableX2(HUF_DEltX2 * DTable,const U32 targetLog,const sortedSymbol_t * sortedList,const U32 sortedListSize,const U32 * rankStart,rankVal_t rankValOrigin,const U32 maxWeight,const U32 nbBitsBaseline)510 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
511                            const sortedSymbol_t* sortedList, const U32 sortedListSize,
512                            const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
513                            const U32 nbBitsBaseline)
514 {
515     U32 rankVal[HUF_TABLELOG_MAX + 1];
516     const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
517     const U32 minBits  = nbBitsBaseline - maxWeight;
518     U32 s;
519 
520     memcpy(rankVal, rankValOrigin, sizeof(rankVal));
521 
522     /* fill DTable */
523     for (s=0; s<sortedListSize; s++) {
524         const U16 symbol = sortedList[s].symbol;
525         const U32 weight = sortedList[s].weight;
526         const U32 nbBits = nbBitsBaseline - weight;
527         const U32 start = rankVal[weight];
528         const U32 length = 1 << (targetLog-nbBits);
529 
530         if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
531             U32 sortedRank;
532             int minWeight = nbBits + scaleLog;
533             if (minWeight < 1) minWeight = 1;
534             sortedRank = rankStart[minWeight];
535             HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
536                            rankValOrigin[nbBits], minWeight,
537                            sortedList+sortedRank, sortedListSize-sortedRank,
538                            nbBitsBaseline, symbol);
539         } else {
540             HUF_DEltX2 DElt;
541             MEM_writeLE16(&(DElt.sequence), symbol);
542             DElt.nbBits = (BYTE)(nbBits);
543             DElt.length = 1;
544             {   U32 const end = start + length;
545                 U32 u;
546                 for (u = start; u < end; u++) DTable[u] = DElt;
547         }   }
548         rankVal[weight] += length;
549     }
550 }
551 
HUF_readDTableX2_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)552 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
553                        const void* src, size_t srcSize,
554                              void* workSpace, size_t wkspSize)
555 {
556     U32 tableLog, maxW, sizeOfSort, nbSymbols;
557     DTableDesc dtd = HUF_getDTableDesc(DTable);
558     U32 const maxTableLog = dtd.maxTableLog;
559     size_t iSize;
560     void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
561     HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
562     U32 *rankStart;
563 
564     rankValCol_t* rankVal;
565     U32* rankStats;
566     U32* rankStart0;
567     sortedSymbol_t* sortedSymbol;
568     BYTE* weightList;
569     size_t spaceUsed32 = 0;
570 
571     rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
572     spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
573     rankStats = (U32 *)workSpace + spaceUsed32;
574     spaceUsed32 += HUF_TABLELOG_MAX + 1;
575     rankStart0 = (U32 *)workSpace + spaceUsed32;
576     spaceUsed32 += HUF_TABLELOG_MAX + 2;
577     sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
578     spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
579     weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
580     spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
581 
582     if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
583 
584     rankStart = rankStart0 + 1;
585     memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
586 
587     DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
588     if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
589     /* memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
590 
591     iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
592     if (HUF_isError(iSize)) return iSize;
593 
594     /* check result */
595     if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
596 
597     /* find maxWeight */
598     for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
599 
600     /* Get start index of each weight */
601     {   U32 w, nextRankStart = 0;
602         for (w=1; w<maxW+1; w++) {
603             U32 current = nextRankStart;
604             nextRankStart += rankStats[w];
605             rankStart[w] = current;
606         }
607         rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
608         sizeOfSort = nextRankStart;
609     }
610 
611     /* sort symbols by weight */
612     {   U32 s;
613         for (s=0; s<nbSymbols; s++) {
614             U32 const w = weightList[s];
615             U32 const r = rankStart[w]++;
616             sortedSymbol[r].symbol = (BYTE)s;
617             sortedSymbol[r].weight = (BYTE)w;
618         }
619         rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
620     }
621 
622     /* Build rankVal */
623     {   U32* const rankVal0 = rankVal[0];
624         {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
625             U32 nextRankVal = 0;
626             U32 w;
627             for (w=1; w<maxW+1; w++) {
628                 U32 current = nextRankVal;
629                 nextRankVal += rankStats[w] << (w+rescale);
630                 rankVal0[w] = current;
631         }   }
632         {   U32 const minBits = tableLog+1 - maxW;
633             U32 consumed;
634             for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
635                 U32* const rankValPtr = rankVal[consumed];
636                 U32 w;
637                 for (w = 1; w < maxW+1; w++) {
638                     rankValPtr[w] = rankVal0[w] >> consumed;
639     }   }   }   }
640 
641     HUF_fillDTableX2(dt, maxTableLog,
642                    sortedSymbol, sizeOfSort,
643                    rankStart0, rankVal, maxW,
644                    tableLog+1);
645 
646     dtd.tableLog = (BYTE)maxTableLog;
647     dtd.tableType = 1;
648     memcpy(DTable, &dtd, sizeof(dtd));
649     return iSize;
650 }
651 
HUF_readDTableX2(HUF_DTable * DTable,const void * src,size_t srcSize)652 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
653 {
654   U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
655   return HUF_readDTableX2_wksp(DTable, src, srcSize,
656                                workSpace, sizeof(workSpace));
657 }
658 
659 
660 FORCE_INLINE_TEMPLATE U32
HUF_decodeSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)661 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
662 {
663     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
664     memcpy(op, dt+val, 2);
665     BIT_skipBits(DStream, dt[val].nbBits);
666     return dt[val].length;
667 }
668 
669 FORCE_INLINE_TEMPLATE U32
HUF_decodeLastSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)670 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
671 {
672     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
673     memcpy(op, dt+val, 1);
674     if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
675     else {
676         if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
677             BIT_skipBits(DStream, dt[val].nbBits);
678             if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
679                 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
680                 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
681     }   }
682     return 1;
683 }
684 
685 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
686     ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
687 
688 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
689     if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
690         ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
691 
692 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
693     if (MEM_64bits()) \
694         ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
695 
696 HINT_INLINE size_t
HUF_decodeStreamX2(BYTE * p,BIT_DStream_t * bitDPtr,BYTE * const pEnd,const HUF_DEltX2 * const dt,const U32 dtLog)697 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
698                 const HUF_DEltX2* const dt, const U32 dtLog)
699 {
700     BYTE* const pStart = p;
701 
702     /* up to 8 symbols at a time */
703     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
704         HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
705         HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
706         HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
707         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
708     }
709 
710     /* closer to end : up to 2 symbols at a time */
711     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
712         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
713 
714     while (p <= pEnd-2)
715         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
716 
717     if (p < pEnd)
718         p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
719 
720     return p-pStart;
721 }
722 
723 FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)724 HUF_decompress1X2_usingDTable_internal_body(
725           void* dst,  size_t dstSize,
726     const void* cSrc, size_t cSrcSize,
727     const HUF_DTable* DTable)
728 {
729     BIT_DStream_t bitD;
730 
731     /* Init */
732     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
733 
734     /* decode */
735     {   BYTE* const ostart = (BYTE*) dst;
736         BYTE* const oend = ostart + dstSize;
737         const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
738         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
739         DTableDesc const dtd = HUF_getDTableDesc(DTable);
740         HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
741     }
742 
743     /* check */
744     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
745 
746     /* decoded size */
747     return dstSize;
748 }
749 
750 FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)751 HUF_decompress4X2_usingDTable_internal_body(
752           void* dst,  size_t dstSize,
753     const void* cSrc, size_t cSrcSize,
754     const HUF_DTable* DTable)
755 {
756     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
757 
758     {   const BYTE* const istart = (const BYTE*) cSrc;
759         BYTE* const ostart = (BYTE*) dst;
760         BYTE* const oend = ostart + dstSize;
761         BYTE* const olimit = oend - (sizeof(size_t)-1);
762         const void* const dtPtr = DTable+1;
763         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
764 
765         /* Init */
766         BIT_DStream_t bitD1;
767         BIT_DStream_t bitD2;
768         BIT_DStream_t bitD3;
769         BIT_DStream_t bitD4;
770         size_t const length1 = MEM_readLE16(istart);
771         size_t const length2 = MEM_readLE16(istart+2);
772         size_t const length3 = MEM_readLE16(istart+4);
773         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
774         const BYTE* const istart1 = istart + 6;  /* jumpTable */
775         const BYTE* const istart2 = istart1 + length1;
776         const BYTE* const istart3 = istart2 + length2;
777         const BYTE* const istart4 = istart3 + length3;
778         size_t const segmentSize = (dstSize+3) / 4;
779         BYTE* const opStart2 = ostart + segmentSize;
780         BYTE* const opStart3 = opStart2 + segmentSize;
781         BYTE* const opStart4 = opStart3 + segmentSize;
782         BYTE* op1 = ostart;
783         BYTE* op2 = opStart2;
784         BYTE* op3 = opStart3;
785         BYTE* op4 = opStart4;
786         U32 endSignal = 1;
787         DTableDesc const dtd = HUF_getDTableDesc(DTable);
788         U32 const dtLog = dtd.tableLog;
789 
790         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
791         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
792         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
793         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
794         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
795 
796         /* 16-32 symbols per loop (4-8 symbols per stream) */
797         for ( ; (endSignal) & (op4 < olimit); ) {
798 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
799             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
800             HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
801             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
802             HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
803             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
804             HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
805             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
806             HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
807             endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
808             endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
809             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
810             HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
811             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
812             HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
813             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
814             HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
815             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
816             HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
817             endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
818             endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
819 #else
820             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
821             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
822             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
823             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
824             HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
825             HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
826             HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
827             HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
828             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
829             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
830             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
831             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
832             HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
833             HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
834             HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
835             HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
836             endSignal = (U32)LIKELY(
837                         (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
838                       & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
839                       & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
840                       & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
841 #endif
842         }
843 
844         /* check corruption */
845         if (op1 > opStart2) return ERROR(corruption_detected);
846         if (op2 > opStart3) return ERROR(corruption_detected);
847         if (op3 > opStart4) return ERROR(corruption_detected);
848         /* note : op4 already verified within main loop */
849 
850         /* finish bitStreams one by one */
851         HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
852         HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
853         HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
854         HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
855 
856         /* check */
857         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
858           if (!endCheck) return ERROR(corruption_detected); }
859 
860         /* decoded size */
861         return dstSize;
862     }
863 }
864 
865 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
HUF_DGEN(HUF_decompress4X2_usingDTable_internal)866 HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
867 
868 size_t HUF_decompress1X2_usingDTable(
869           void* dst,  size_t dstSize,
870     const void* cSrc, size_t cSrcSize,
871     const HUF_DTable* DTable)
872 {
873     DTableDesc dtd = HUF_getDTableDesc(DTable);
874     if (dtd.tableType != 1) return ERROR(GENERIC);
875     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
876 }
877 
HUF_decompress1X2_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)878 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
879                                    const void* cSrc, size_t cSrcSize,
880                                    void* workSpace, size_t wkspSize)
881 {
882     const BYTE* ip = (const BYTE*) cSrc;
883 
884     size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
885                                                workSpace, wkspSize);
886     if (HUF_isError(hSize)) return hSize;
887     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
888     ip += hSize; cSrcSize -= hSize;
889 
890     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
891 }
892 
893 
HUF_decompress1X2_DCtx(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)894 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
895                               const void* cSrc, size_t cSrcSize)
896 {
897     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
898     return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
899                                        workSpace, sizeof(workSpace));
900 }
901 
HUF_decompress1X2(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)902 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
903 {
904     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
905     return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
906 }
907 
HUF_decompress4X2_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)908 size_t HUF_decompress4X2_usingDTable(
909           void* dst,  size_t dstSize,
910     const void* cSrc, size_t cSrcSize,
911     const HUF_DTable* DTable)
912 {
913     DTableDesc dtd = HUF_getDTableDesc(DTable);
914     if (dtd.tableType != 1) return ERROR(GENERIC);
915     return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
916 }
917 
HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)918 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
919                                    const void* cSrc, size_t cSrcSize,
920                                    void* workSpace, size_t wkspSize, int bmi2)
921 {
922     const BYTE* ip = (const BYTE*) cSrc;
923 
924     size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
925                                          workSpace, wkspSize);
926     if (HUF_isError(hSize)) return hSize;
927     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
928     ip += hSize; cSrcSize -= hSize;
929 
930     return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
931 }
932 
HUF_decompress4X2_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)933 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
934                                    const void* cSrc, size_t cSrcSize,
935                                    void* workSpace, size_t wkspSize)
936 {
937     return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
938 }
939 
940 
HUF_decompress4X2_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)941 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
942                               const void* cSrc, size_t cSrcSize)
943 {
944     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
945     return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
946                                        workSpace, sizeof(workSpace));
947 }
948 
HUF_decompress4X2(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)949 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
950 {
951     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
952     return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
953 }
954 
955 #endif /* HUF_FORCE_DECOMPRESS_X1 */
956 
957 
958 /* ***********************************/
959 /* Universal decompression selectors */
960 /* ***********************************/
961 
HUF_decompress1X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)962 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
963                                     const void* cSrc, size_t cSrcSize,
964                                     const HUF_DTable* DTable)
965 {
966     DTableDesc const dtd = HUF_getDTableDesc(DTable);
967 #if defined(HUF_FORCE_DECOMPRESS_X1)
968     (void)dtd;
969     assert(dtd.tableType == 0);
970     return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
971 #elif defined(HUF_FORCE_DECOMPRESS_X2)
972     (void)dtd;
973     assert(dtd.tableType == 1);
974     return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
975 #else
976     return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
977                            HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
978 #endif
979 }
980 
HUF_decompress4X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)981 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
982                                     const void* cSrc, size_t cSrcSize,
983                                     const HUF_DTable* DTable)
984 {
985     DTableDesc const dtd = HUF_getDTableDesc(DTable);
986 #if defined(HUF_FORCE_DECOMPRESS_X1)
987     (void)dtd;
988     assert(dtd.tableType == 0);
989     return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
990 #elif defined(HUF_FORCE_DECOMPRESS_X2)
991     (void)dtd;
992     assert(dtd.tableType == 1);
993     return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
994 #else
995     return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
996                            HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
997 #endif
998 }
999 
1000 
1001 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1002 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
1003 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
1004 {
1005     /* single, double, quad */
1006     {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
1007     {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
1008     {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
1009     {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
1010     {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
1011     {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
1012     {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
1013     {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
1014     {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
1015     {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
1016     {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
1017     {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
1018     {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
1019     {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
1020     {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
1021     {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
1022 };
1023 #endif
1024 
1025 /** HUF_selectDecoder() :
1026  *  Tells which decoder is likely to decode faster,
1027  *  based on a set of pre-computed metrics.
1028  * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1029  *  Assumption : 0 < dstSize <= 128 KB */
HUF_selectDecoder(size_t dstSize,size_t cSrcSize)1030 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1031 {
1032     assert(dstSize > 0);
1033     assert(dstSize <= 128*1024);
1034 #if defined(HUF_FORCE_DECOMPRESS_X1)
1035     (void)dstSize;
1036     (void)cSrcSize;
1037     return 0;
1038 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1039     (void)dstSize;
1040     (void)cSrcSize;
1041     return 1;
1042 #else
1043     /* decoder timing evaluation */
1044     {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
1045         U32 const D256 = (U32)(dstSize >> 8);
1046         U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1047         U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1048         DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
1049         return DTime1 < DTime0;
1050     }
1051 #endif
1052 }
1053 
1054 
1055 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1056 
HUF_decompress(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1057 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1058 {
1059 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1060     static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1061 #endif
1062 
1063     /* validation checks */
1064     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1065     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1066     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1067     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1068 
1069     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1070 #if defined(HUF_FORCE_DECOMPRESS_X1)
1071         (void)algoNb;
1072         assert(algoNb == 0);
1073         return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1074 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1075         (void)algoNb;
1076         assert(algoNb == 1);
1077         return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1078 #else
1079         return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1080 #endif
1081     }
1082 }
1083 
HUF_decompress4X_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1084 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1085 {
1086     /* validation checks */
1087     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1088     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1089     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1090     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1091 
1092     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1093 #if defined(HUF_FORCE_DECOMPRESS_X1)
1094         (void)algoNb;
1095         assert(algoNb == 0);
1096         return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1097 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1098         (void)algoNb;
1099         assert(algoNb == 1);
1100         return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1101 #else
1102         return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1103                         HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1104 #endif
1105     }
1106 }
1107 
HUF_decompress4X_hufOnly(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1108 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1109 {
1110     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1111     return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1112                                          workSpace, sizeof(workSpace));
1113 }
1114 
1115 
HUF_decompress4X_hufOnly_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1116 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1117                                      size_t dstSize, const void* cSrc,
1118                                      size_t cSrcSize, void* workSpace,
1119                                      size_t wkspSize)
1120 {
1121     /* validation checks */
1122     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1123     if (cSrcSize == 0) return ERROR(corruption_detected);
1124 
1125     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1126 #if defined(HUF_FORCE_DECOMPRESS_X1)
1127         (void)algoNb;
1128         assert(algoNb == 0);
1129         return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1130 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1131         (void)algoNb;
1132         assert(algoNb == 1);
1133         return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1134 #else
1135         return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1136                             cSrcSize, workSpace, wkspSize):
1137                         HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1138 #endif
1139     }
1140 }
1141 
HUF_decompress1X_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1142 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1143                                   const void* cSrc, size_t cSrcSize,
1144                                   void* workSpace, size_t wkspSize)
1145 {
1146     /* validation checks */
1147     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1148     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1149     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1150     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1151 
1152     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1153 #if defined(HUF_FORCE_DECOMPRESS_X1)
1154         (void)algoNb;
1155         assert(algoNb == 0);
1156         return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1157                                 cSrcSize, workSpace, wkspSize);
1158 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1159         (void)algoNb;
1160         assert(algoNb == 1);
1161         return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1162                                 cSrcSize, workSpace, wkspSize);
1163 #else
1164         return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1165                                 cSrcSize, workSpace, wkspSize):
1166                         HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1167                                 cSrcSize, workSpace, wkspSize);
1168 #endif
1169     }
1170 }
1171 
HUF_decompress1X_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1172 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1173                              const void* cSrc, size_t cSrcSize)
1174 {
1175     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1176     return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1177                                       workSpace, sizeof(workSpace));
1178 }
1179 
1180 
HUF_decompress1X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)1181 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1182 {
1183     DTableDesc const dtd = HUF_getDTableDesc(DTable);
1184 #if defined(HUF_FORCE_DECOMPRESS_X1)
1185     (void)dtd;
1186     assert(dtd.tableType == 0);
1187     return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1188 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1189     (void)dtd;
1190     assert(dtd.tableType == 1);
1191     return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1192 #else
1193     return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1194                            HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1195 #endif
1196 }
1197 
1198 #ifndef HUF_FORCE_DECOMPRESS_X2
HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1199 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1200 {
1201     const BYTE* ip = (const BYTE*) cSrc;
1202 
1203     size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1204     if (HUF_isError(hSize)) return hSize;
1205     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1206     ip += hSize; cSrcSize -= hSize;
1207 
1208     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1209 }
1210 #endif
1211 
HUF_decompress4X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)1212 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1213 {
1214     DTableDesc const dtd = HUF_getDTableDesc(DTable);
1215 #if defined(HUF_FORCE_DECOMPRESS_X1)
1216     (void)dtd;
1217     assert(dtd.tableType == 0);
1218     return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1219 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1220     (void)dtd;
1221     assert(dtd.tableType == 1);
1222     return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1223 #else
1224     return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1225                            HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1226 #endif
1227 }
1228 
HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1229 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1230 {
1231     /* validation checks */
1232     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1233     if (cSrcSize == 0) return ERROR(corruption_detected);
1234 
1235     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1236 #if defined(HUF_FORCE_DECOMPRESS_X1)
1237         (void)algoNb;
1238         assert(algoNb == 0);
1239         return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1240 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1241         (void)algoNb;
1242         assert(algoNb == 1);
1243         return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1244 #else
1245         return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1246                         HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1247 #endif
1248     }
1249 }
1250