1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
2 /* ******************************************************************
3 * huff0 huffman decoder,
4 * part of Finite State Entropy library
5 * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
6 *
7 * You can contact the author at :
8 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
9 *
10 * This source code is licensed under both the BSD-style license (found in the
11 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12 * in the COPYING file in the root directory of this source tree).
13 * You may select, at your option, one of the above-listed licenses.
14 ****************************************************************** */
15
16 /* **************************************************************
17 * Dependencies
18 ****************************************************************/
19 #include <string.h> /* memcpy, memset */
20 #include "../common/compiler.h"
21 #include "../common/bitstream.h" /* BIT_* */
22 #include "../common/fse.h" /* to compress headers */
23 #define HUF_STATIC_LINKING_ONLY
24 #include "../common/huf.h"
25 #include "../common/error_private.h"
26
27 /* **************************************************************
28 * Macros
29 ****************************************************************/
30
31 /* These two optional macros force the use one way or another of the two
32 * Huffman decompression implementations. You can't force in both directions
33 * at the same time.
34 */
35 #if defined(HUF_FORCE_DECOMPRESS_X1) && \
36 defined(HUF_FORCE_DECOMPRESS_X2)
37 #error "Cannot force the use of the X1 and X2 decoders at the same time!"
38 #endif
39
40
41 /* **************************************************************
42 * Error Management
43 ****************************************************************/
44 #define HUF_isError ERR_isError
45
46
47 /* **************************************************************
48 * Byte alignment for workSpace management
49 ****************************************************************/
50 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
51 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
52
53
54 /* **************************************************************
55 * BMI2 Variant Wrappers
56 ****************************************************************/
57 #if DYNAMIC_BMI2
58
59 #define HUF_DGEN(fn) \
60 \
61 static size_t fn##_default( \
62 void* dst, size_t dstSize, \
63 const void* cSrc, size_t cSrcSize, \
64 const HUF_DTable* DTable) \
65 { \
66 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
67 } \
68 \
69 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
70 void* dst, size_t dstSize, \
71 const void* cSrc, size_t cSrcSize, \
72 const HUF_DTable* DTable) \
73 { \
74 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
75 } \
76 \
77 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
78 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
79 { \
80 if (bmi2) { \
81 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
82 } \
83 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
84 }
85
86 #else
87
88 #define HUF_DGEN(fn) \
89 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
90 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
91 { \
92 (void)bmi2; \
93 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
94 }
95
96 #endif
97
98
99 /*-***************************/
100 /* generic DTableDesc */
101 /*-***************************/
102 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
103
HUF_getDTableDesc(const HUF_DTable * table)104 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
105 {
106 DTableDesc dtd;
107 memcpy(&dtd, table, sizeof(dtd));
108 return dtd;
109 }
110
111
112 #ifndef HUF_FORCE_DECOMPRESS_X2
113
114 /*-***************************/
115 /* single-symbol decoding */
116 /*-***************************/
117 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
118
HUF_readDTableX1_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)119 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
120 {
121 U32 tableLog = 0;
122 U32 nbSymbols = 0;
123 size_t iSize;
124 void* const dtPtr = DTable + 1;
125 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
126
127 U32* rankVal;
128 BYTE* huffWeight;
129 size_t spaceUsed32 = 0;
130
131 rankVal = (U32 *)workSpace + spaceUsed32;
132 spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
133 huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
134 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
135
136 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
137
138 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
139 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
140
141 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
142 if (HUF_isError(iSize)) return iSize;
143
144 /* Table header */
145 { DTableDesc dtd = HUF_getDTableDesc(DTable);
146 if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
147 dtd.tableType = 0;
148 dtd.tableLog = (BYTE)tableLog;
149 memcpy(DTable, &dtd, sizeof(dtd));
150 }
151
152 /* Calculate starting value for each rank */
153 { U32 n, nextRankStart = 0;
154 for (n=1; n<tableLog+1; n++) {
155 U32 const current = nextRankStart;
156 nextRankStart += (rankVal[n] << (n-1));
157 rankVal[n] = current;
158 } }
159
160 /* fill DTable */
161 { U32 n;
162 size_t const nEnd = nbSymbols;
163 for (n=0; n<nEnd; n++) {
164 size_t const w = huffWeight[n];
165 size_t const length = (1 << w) >> 1;
166 size_t const uStart = rankVal[w];
167 size_t const uEnd = uStart + length;
168 size_t u;
169 HUF_DEltX1 D;
170 D.byte = (BYTE)n;
171 D.nbBits = (BYTE)(tableLog + 1 - w);
172 rankVal[w] = (U32)uEnd;
173 if (length < 4) {
174 /* Use length in the loop bound so the compiler knows it is short. */
175 for (u = 0; u < length; ++u)
176 dt[uStart + u] = D;
177 } else {
178 /* Unroll the loop 4 times, we know it is a power of 2. */
179 for (u = uStart; u < uEnd; u += 4) {
180 dt[u + 0] = D;
181 dt[u + 1] = D;
182 dt[u + 2] = D;
183 dt[u + 3] = D;
184 } } } }
185 return iSize;
186 }
187
HUF_readDTableX1(HUF_DTable * DTable,const void * src,size_t srcSize)188 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
189 {
190 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
191 return HUF_readDTableX1_wksp(DTable, src, srcSize,
192 workSpace, sizeof(workSpace));
193 }
194
195 FORCE_INLINE_TEMPLATE BYTE
HUF_decodeSymbolX1(BIT_DStream_t * Dstream,const HUF_DEltX1 * dt,const U32 dtLog)196 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
197 {
198 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
199 BYTE const c = dt[val].byte;
200 BIT_skipBits(Dstream, dt[val].nbBits);
201 return c;
202 }
203
204 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
205 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
206
207 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
208 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
209 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
210
211 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
212 if (MEM_64bits()) \
213 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
214
215 HINT_INLINE size_t
HUF_decodeStreamX1(BYTE * p,BIT_DStream_t * const bitDPtr,BYTE * const pEnd,const HUF_DEltX1 * const dt,const U32 dtLog)216 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
217 {
218 BYTE* const pStart = p;
219
220 /* up to 4 symbols at a time */
221 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
222 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
223 HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
224 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
225 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
226 }
227
228 /* [0-3] symbols remaining */
229 if (MEM_32bits())
230 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
231 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
232
233 /* no more data to retrieve from bitstream, no need to reload */
234 while (p < pEnd)
235 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
236
237 return pEnd-pStart;
238 }
239
240 FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)241 HUF_decompress1X1_usingDTable_internal_body(
242 void* dst, size_t dstSize,
243 const void* cSrc, size_t cSrcSize,
244 const HUF_DTable* DTable)
245 {
246 BYTE* op = (BYTE*)dst;
247 BYTE* const oend = op + dstSize;
248 const void* dtPtr = DTable + 1;
249 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
250 BIT_DStream_t bitD;
251 DTableDesc const dtd = HUF_getDTableDesc(DTable);
252 U32 const dtLog = dtd.tableLog;
253
254 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
255
256 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
257
258 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
259
260 return dstSize;
261 }
262
263 FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)264 HUF_decompress4X1_usingDTable_internal_body(
265 void* dst, size_t dstSize,
266 const void* cSrc, size_t cSrcSize,
267 const HUF_DTable* DTable)
268 {
269 /* Check */
270 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
271
272 { const BYTE* const istart = (const BYTE*) cSrc;
273 BYTE* const ostart = (BYTE*) dst;
274 BYTE* const oend = ostart + dstSize;
275 BYTE* const olimit = oend - 3;
276 const void* const dtPtr = DTable + 1;
277 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
278
279 /* Init */
280 BIT_DStream_t bitD1;
281 BIT_DStream_t bitD2;
282 BIT_DStream_t bitD3;
283 BIT_DStream_t bitD4;
284 size_t const length1 = MEM_readLE16(istart);
285 size_t const length2 = MEM_readLE16(istart+2);
286 size_t const length3 = MEM_readLE16(istart+4);
287 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
288 const BYTE* const istart1 = istart + 6; /* jumpTable */
289 const BYTE* const istart2 = istart1 + length1;
290 const BYTE* const istart3 = istart2 + length2;
291 const BYTE* const istart4 = istart3 + length3;
292 const size_t segmentSize = (dstSize+3) / 4;
293 BYTE* const opStart2 = ostart + segmentSize;
294 BYTE* const opStart3 = opStart2 + segmentSize;
295 BYTE* const opStart4 = opStart3 + segmentSize;
296 BYTE* op1 = ostart;
297 BYTE* op2 = opStart2;
298 BYTE* op3 = opStart3;
299 BYTE* op4 = opStart4;
300 DTableDesc const dtd = HUF_getDTableDesc(DTable);
301 U32 const dtLog = dtd.tableLog;
302 U32 endSignal = 1;
303
304 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
305 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
306 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
307 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
308 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
309
310 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
311 for ( ; (endSignal) & (op4 < olimit) ; ) {
312 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
313 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
314 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
315 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
316 HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
317 HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
318 HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
319 HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
320 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
321 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
322 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
323 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
324 HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
325 HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
326 HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
327 HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
328 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
329 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
330 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
331 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
332 }
333
334 /* check corruption */
335 /* note : should not be necessary : op# advance in lock step, and we control op4.
336 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
337 if (op1 > opStart2) return ERROR(corruption_detected);
338 if (op2 > opStart3) return ERROR(corruption_detected);
339 if (op3 > opStart4) return ERROR(corruption_detected);
340 /* note : op4 supposed already verified within main loop */
341
342 /* finish bitStreams one by one */
343 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
344 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
345 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
346 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
347
348 /* check */
349 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
350 if (!endCheck) return ERROR(corruption_detected); }
351
352 /* decoded size */
353 return dstSize;
354 }
355 }
356
357
358 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
359 const void *cSrc,
360 size_t cSrcSize,
361 const HUF_DTable *DTable);
362
363 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
HUF_DGEN(HUF_decompress4X1_usingDTable_internal)364 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
365
366
367
368 size_t HUF_decompress1X1_usingDTable(
369 void* dst, size_t dstSize,
370 const void* cSrc, size_t cSrcSize,
371 const HUF_DTable* DTable)
372 {
373 DTableDesc dtd = HUF_getDTableDesc(DTable);
374 if (dtd.tableType != 0) return ERROR(GENERIC);
375 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
376 }
377
HUF_decompress1X1_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)378 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
379 const void* cSrc, size_t cSrcSize,
380 void* workSpace, size_t wkspSize)
381 {
382 const BYTE* ip = (const BYTE*) cSrc;
383
384 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
385 if (HUF_isError(hSize)) return hSize;
386 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
387 ip += hSize; cSrcSize -= hSize;
388
389 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
390 }
391
392
HUF_decompress1X1_DCtx(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)393 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
394 const void* cSrc, size_t cSrcSize)
395 {
396 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
397 return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
398 workSpace, sizeof(workSpace));
399 }
400
HUF_decompress1X1(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)401 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
402 {
403 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
404 return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
405 }
406
HUF_decompress4X1_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)407 size_t HUF_decompress4X1_usingDTable(
408 void* dst, size_t dstSize,
409 const void* cSrc, size_t cSrcSize,
410 const HUF_DTable* DTable)
411 {
412 DTableDesc dtd = HUF_getDTableDesc(DTable);
413 if (dtd.tableType != 0) return ERROR(GENERIC);
414 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
415 }
416
HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)417 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
418 const void* cSrc, size_t cSrcSize,
419 void* workSpace, size_t wkspSize, int bmi2)
420 {
421 const BYTE* ip = (const BYTE*) cSrc;
422
423 size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
424 workSpace, wkspSize);
425 if (HUF_isError(hSize)) return hSize;
426 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
427 ip += hSize; cSrcSize -= hSize;
428
429 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
430 }
431
HUF_decompress4X1_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)432 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
433 const void* cSrc, size_t cSrcSize,
434 void* workSpace, size_t wkspSize)
435 {
436 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
437 }
438
439
HUF_decompress4X1_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)440 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
441 {
442 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
443 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
444 workSpace, sizeof(workSpace));
445 }
HUF_decompress4X1(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)446 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
447 {
448 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
449 return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
450 }
451
452 #endif /* HUF_FORCE_DECOMPRESS_X2 */
453
454
455 #ifndef HUF_FORCE_DECOMPRESS_X1
456
457 /* *************************/
458 /* double-symbols decoding */
459 /* *************************/
460
461 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
462 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
463 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
464 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
465
466
467 /* HUF_fillDTableX2Level2() :
468 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
HUF_fillDTableX2Level2(HUF_DEltX2 * DTable,U32 sizeLog,const U32 consumed,const U32 * rankValOrigin,const int minWeight,const sortedSymbol_t * sortedSymbols,const U32 sortedListSize,U32 nbBitsBaseline,U16 baseSeq)469 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
470 const U32* rankValOrigin, const int minWeight,
471 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
472 U32 nbBitsBaseline, U16 baseSeq)
473 {
474 HUF_DEltX2 DElt;
475 U32 rankVal[HUF_TABLELOG_MAX + 1];
476
477 /* get pre-calculated rankVal */
478 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
479
480 /* fill skipped values */
481 if (minWeight>1) {
482 U32 i, skipSize = rankVal[minWeight];
483 MEM_writeLE16(&(DElt.sequence), baseSeq);
484 DElt.nbBits = (BYTE)(consumed);
485 DElt.length = 1;
486 for (i = 0; i < skipSize; i++)
487 DTable[i] = DElt;
488 }
489
490 /* fill DTable */
491 { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
492 const U32 symbol = sortedSymbols[s].symbol;
493 const U32 weight = sortedSymbols[s].weight;
494 const U32 nbBits = nbBitsBaseline - weight;
495 const U32 length = 1 << (sizeLog-nbBits);
496 const U32 start = rankVal[weight];
497 U32 i = start;
498 const U32 end = start + length;
499
500 MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
501 DElt.nbBits = (BYTE)(nbBits + consumed);
502 DElt.length = 2;
503 do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
504
505 rankVal[weight] += length;
506 } }
507 }
508
509
HUF_fillDTableX2(HUF_DEltX2 * DTable,const U32 targetLog,const sortedSymbol_t * sortedList,const U32 sortedListSize,const U32 * rankStart,rankVal_t rankValOrigin,const U32 maxWeight,const U32 nbBitsBaseline)510 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
511 const sortedSymbol_t* sortedList, const U32 sortedListSize,
512 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
513 const U32 nbBitsBaseline)
514 {
515 U32 rankVal[HUF_TABLELOG_MAX + 1];
516 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
517 const U32 minBits = nbBitsBaseline - maxWeight;
518 U32 s;
519
520 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
521
522 /* fill DTable */
523 for (s=0; s<sortedListSize; s++) {
524 const U16 symbol = sortedList[s].symbol;
525 const U32 weight = sortedList[s].weight;
526 const U32 nbBits = nbBitsBaseline - weight;
527 const U32 start = rankVal[weight];
528 const U32 length = 1 << (targetLog-nbBits);
529
530 if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
531 U32 sortedRank;
532 int minWeight = nbBits + scaleLog;
533 if (minWeight < 1) minWeight = 1;
534 sortedRank = rankStart[minWeight];
535 HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
536 rankValOrigin[nbBits], minWeight,
537 sortedList+sortedRank, sortedListSize-sortedRank,
538 nbBitsBaseline, symbol);
539 } else {
540 HUF_DEltX2 DElt;
541 MEM_writeLE16(&(DElt.sequence), symbol);
542 DElt.nbBits = (BYTE)(nbBits);
543 DElt.length = 1;
544 { U32 const end = start + length;
545 U32 u;
546 for (u = start; u < end; u++) DTable[u] = DElt;
547 } }
548 rankVal[weight] += length;
549 }
550 }
551
HUF_readDTableX2_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)552 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
553 const void* src, size_t srcSize,
554 void* workSpace, size_t wkspSize)
555 {
556 U32 tableLog, maxW, sizeOfSort, nbSymbols;
557 DTableDesc dtd = HUF_getDTableDesc(DTable);
558 U32 const maxTableLog = dtd.maxTableLog;
559 size_t iSize;
560 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
561 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
562 U32 *rankStart;
563
564 rankValCol_t* rankVal;
565 U32* rankStats;
566 U32* rankStart0;
567 sortedSymbol_t* sortedSymbol;
568 BYTE* weightList;
569 size_t spaceUsed32 = 0;
570
571 rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
572 spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
573 rankStats = (U32 *)workSpace + spaceUsed32;
574 spaceUsed32 += HUF_TABLELOG_MAX + 1;
575 rankStart0 = (U32 *)workSpace + spaceUsed32;
576 spaceUsed32 += HUF_TABLELOG_MAX + 2;
577 sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
578 spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
579 weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
580 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
581
582 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
583
584 rankStart = rankStart0 + 1;
585 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
586
587 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
588 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
589 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
590
591 iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
592 if (HUF_isError(iSize)) return iSize;
593
594 /* check result */
595 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
596
597 /* find maxWeight */
598 for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
599
600 /* Get start index of each weight */
601 { U32 w, nextRankStart = 0;
602 for (w=1; w<maxW+1; w++) {
603 U32 current = nextRankStart;
604 nextRankStart += rankStats[w];
605 rankStart[w] = current;
606 }
607 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
608 sizeOfSort = nextRankStart;
609 }
610
611 /* sort symbols by weight */
612 { U32 s;
613 for (s=0; s<nbSymbols; s++) {
614 U32 const w = weightList[s];
615 U32 const r = rankStart[w]++;
616 sortedSymbol[r].symbol = (BYTE)s;
617 sortedSymbol[r].weight = (BYTE)w;
618 }
619 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
620 }
621
622 /* Build rankVal */
623 { U32* const rankVal0 = rankVal[0];
624 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
625 U32 nextRankVal = 0;
626 U32 w;
627 for (w=1; w<maxW+1; w++) {
628 U32 current = nextRankVal;
629 nextRankVal += rankStats[w] << (w+rescale);
630 rankVal0[w] = current;
631 } }
632 { U32 const minBits = tableLog+1 - maxW;
633 U32 consumed;
634 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
635 U32* const rankValPtr = rankVal[consumed];
636 U32 w;
637 for (w = 1; w < maxW+1; w++) {
638 rankValPtr[w] = rankVal0[w] >> consumed;
639 } } } }
640
641 HUF_fillDTableX2(dt, maxTableLog,
642 sortedSymbol, sizeOfSort,
643 rankStart0, rankVal, maxW,
644 tableLog+1);
645
646 dtd.tableLog = (BYTE)maxTableLog;
647 dtd.tableType = 1;
648 memcpy(DTable, &dtd, sizeof(dtd));
649 return iSize;
650 }
651
HUF_readDTableX2(HUF_DTable * DTable,const void * src,size_t srcSize)652 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
653 {
654 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
655 return HUF_readDTableX2_wksp(DTable, src, srcSize,
656 workSpace, sizeof(workSpace));
657 }
658
659
660 FORCE_INLINE_TEMPLATE U32
HUF_decodeSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)661 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
662 {
663 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
664 memcpy(op, dt+val, 2);
665 BIT_skipBits(DStream, dt[val].nbBits);
666 return dt[val].length;
667 }
668
669 FORCE_INLINE_TEMPLATE U32
HUF_decodeLastSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)670 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
671 {
672 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
673 memcpy(op, dt+val, 1);
674 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
675 else {
676 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
677 BIT_skipBits(DStream, dt[val].nbBits);
678 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
679 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
680 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
681 } }
682 return 1;
683 }
684
685 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
686 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
687
688 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
689 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
690 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
691
692 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
693 if (MEM_64bits()) \
694 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
695
696 HINT_INLINE size_t
HUF_decodeStreamX2(BYTE * p,BIT_DStream_t * bitDPtr,BYTE * const pEnd,const HUF_DEltX2 * const dt,const U32 dtLog)697 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
698 const HUF_DEltX2* const dt, const U32 dtLog)
699 {
700 BYTE* const pStart = p;
701
702 /* up to 8 symbols at a time */
703 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
704 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
705 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
706 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
707 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
708 }
709
710 /* closer to end : up to 2 symbols at a time */
711 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
712 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
713
714 while (p <= pEnd-2)
715 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
716
717 if (p < pEnd)
718 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
719
720 return p-pStart;
721 }
722
723 FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)724 HUF_decompress1X2_usingDTable_internal_body(
725 void* dst, size_t dstSize,
726 const void* cSrc, size_t cSrcSize,
727 const HUF_DTable* DTable)
728 {
729 BIT_DStream_t bitD;
730
731 /* Init */
732 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
733
734 /* decode */
735 { BYTE* const ostart = (BYTE*) dst;
736 BYTE* const oend = ostart + dstSize;
737 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
738 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
739 DTableDesc const dtd = HUF_getDTableDesc(DTable);
740 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
741 }
742
743 /* check */
744 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
745
746 /* decoded size */
747 return dstSize;
748 }
749
750 FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)751 HUF_decompress4X2_usingDTable_internal_body(
752 void* dst, size_t dstSize,
753 const void* cSrc, size_t cSrcSize,
754 const HUF_DTable* DTable)
755 {
756 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
757
758 { const BYTE* const istart = (const BYTE*) cSrc;
759 BYTE* const ostart = (BYTE*) dst;
760 BYTE* const oend = ostart + dstSize;
761 BYTE* const olimit = oend - (sizeof(size_t)-1);
762 const void* const dtPtr = DTable+1;
763 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
764
765 /* Init */
766 BIT_DStream_t bitD1;
767 BIT_DStream_t bitD2;
768 BIT_DStream_t bitD3;
769 BIT_DStream_t bitD4;
770 size_t const length1 = MEM_readLE16(istart);
771 size_t const length2 = MEM_readLE16(istart+2);
772 size_t const length3 = MEM_readLE16(istart+4);
773 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
774 const BYTE* const istart1 = istart + 6; /* jumpTable */
775 const BYTE* const istart2 = istart1 + length1;
776 const BYTE* const istart3 = istart2 + length2;
777 const BYTE* const istart4 = istart3 + length3;
778 size_t const segmentSize = (dstSize+3) / 4;
779 BYTE* const opStart2 = ostart + segmentSize;
780 BYTE* const opStart3 = opStart2 + segmentSize;
781 BYTE* const opStart4 = opStart3 + segmentSize;
782 BYTE* op1 = ostart;
783 BYTE* op2 = opStart2;
784 BYTE* op3 = opStart3;
785 BYTE* op4 = opStart4;
786 U32 endSignal = 1;
787 DTableDesc const dtd = HUF_getDTableDesc(DTable);
788 U32 const dtLog = dtd.tableLog;
789
790 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
791 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
792 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
793 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
794 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
795
796 /* 16-32 symbols per loop (4-8 symbols per stream) */
797 for ( ; (endSignal) & (op4 < olimit); ) {
798 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
799 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
800 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
801 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
802 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
803 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
804 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
805 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
806 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
807 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
808 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
809 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
810 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
811 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
812 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
813 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
814 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
815 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
816 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
817 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
818 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
819 #else
820 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
821 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
822 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
823 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
824 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
825 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
826 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
827 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
828 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
829 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
830 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
831 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
832 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
833 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
834 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
835 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
836 endSignal = (U32)LIKELY(
837 (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
838 & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
839 & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
840 & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
841 #endif
842 }
843
844 /* check corruption */
845 if (op1 > opStart2) return ERROR(corruption_detected);
846 if (op2 > opStart3) return ERROR(corruption_detected);
847 if (op3 > opStart4) return ERROR(corruption_detected);
848 /* note : op4 already verified within main loop */
849
850 /* finish bitStreams one by one */
851 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
852 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
853 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
854 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
855
856 /* check */
857 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
858 if (!endCheck) return ERROR(corruption_detected); }
859
860 /* decoded size */
861 return dstSize;
862 }
863 }
864
865 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
HUF_DGEN(HUF_decompress4X2_usingDTable_internal)866 HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
867
868 size_t HUF_decompress1X2_usingDTable(
869 void* dst, size_t dstSize,
870 const void* cSrc, size_t cSrcSize,
871 const HUF_DTable* DTable)
872 {
873 DTableDesc dtd = HUF_getDTableDesc(DTable);
874 if (dtd.tableType != 1) return ERROR(GENERIC);
875 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
876 }
877
HUF_decompress1X2_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)878 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
879 const void* cSrc, size_t cSrcSize,
880 void* workSpace, size_t wkspSize)
881 {
882 const BYTE* ip = (const BYTE*) cSrc;
883
884 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
885 workSpace, wkspSize);
886 if (HUF_isError(hSize)) return hSize;
887 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
888 ip += hSize; cSrcSize -= hSize;
889
890 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
891 }
892
893
HUF_decompress1X2_DCtx(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)894 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
895 const void* cSrc, size_t cSrcSize)
896 {
897 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
898 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
899 workSpace, sizeof(workSpace));
900 }
901
HUF_decompress1X2(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)902 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
903 {
904 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
905 return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
906 }
907
HUF_decompress4X2_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)908 size_t HUF_decompress4X2_usingDTable(
909 void* dst, size_t dstSize,
910 const void* cSrc, size_t cSrcSize,
911 const HUF_DTable* DTable)
912 {
913 DTableDesc dtd = HUF_getDTableDesc(DTable);
914 if (dtd.tableType != 1) return ERROR(GENERIC);
915 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
916 }
917
HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)918 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
919 const void* cSrc, size_t cSrcSize,
920 void* workSpace, size_t wkspSize, int bmi2)
921 {
922 const BYTE* ip = (const BYTE*) cSrc;
923
924 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
925 workSpace, wkspSize);
926 if (HUF_isError(hSize)) return hSize;
927 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
928 ip += hSize; cSrcSize -= hSize;
929
930 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
931 }
932
HUF_decompress4X2_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)933 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
934 const void* cSrc, size_t cSrcSize,
935 void* workSpace, size_t wkspSize)
936 {
937 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
938 }
939
940
HUF_decompress4X2_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)941 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
942 const void* cSrc, size_t cSrcSize)
943 {
944 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
945 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
946 workSpace, sizeof(workSpace));
947 }
948
HUF_decompress4X2(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)949 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
950 {
951 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
952 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
953 }
954
955 #endif /* HUF_FORCE_DECOMPRESS_X1 */
956
957
958 /* ***********************************/
959 /* Universal decompression selectors */
960 /* ***********************************/
961
HUF_decompress1X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)962 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
963 const void* cSrc, size_t cSrcSize,
964 const HUF_DTable* DTable)
965 {
966 DTableDesc const dtd = HUF_getDTableDesc(DTable);
967 #if defined(HUF_FORCE_DECOMPRESS_X1)
968 (void)dtd;
969 assert(dtd.tableType == 0);
970 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
971 #elif defined(HUF_FORCE_DECOMPRESS_X2)
972 (void)dtd;
973 assert(dtd.tableType == 1);
974 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
975 #else
976 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
977 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
978 #endif
979 }
980
HUF_decompress4X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)981 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
982 const void* cSrc, size_t cSrcSize,
983 const HUF_DTable* DTable)
984 {
985 DTableDesc const dtd = HUF_getDTableDesc(DTable);
986 #if defined(HUF_FORCE_DECOMPRESS_X1)
987 (void)dtd;
988 assert(dtd.tableType == 0);
989 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
990 #elif defined(HUF_FORCE_DECOMPRESS_X2)
991 (void)dtd;
992 assert(dtd.tableType == 1);
993 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
994 #else
995 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
996 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
997 #endif
998 }
999
1000
1001 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1002 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
1003 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
1004 {
1005 /* single, double, quad */
1006 {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
1007 {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
1008 {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
1009 {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
1010 {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
1011 {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
1012 {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
1013 {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
1014 {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
1015 {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
1016 {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
1017 {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
1018 {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
1019 {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
1020 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
1021 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
1022 };
1023 #endif
1024
1025 /** HUF_selectDecoder() :
1026 * Tells which decoder is likely to decode faster,
1027 * based on a set of pre-computed metrics.
1028 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1029 * Assumption : 0 < dstSize <= 128 KB */
HUF_selectDecoder(size_t dstSize,size_t cSrcSize)1030 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1031 {
1032 assert(dstSize > 0);
1033 assert(dstSize <= 128*1024);
1034 #if defined(HUF_FORCE_DECOMPRESS_X1)
1035 (void)dstSize;
1036 (void)cSrcSize;
1037 return 0;
1038 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1039 (void)dstSize;
1040 (void)cSrcSize;
1041 return 1;
1042 #else
1043 /* decoder timing evaluation */
1044 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1045 U32 const D256 = (U32)(dstSize >> 8);
1046 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1047 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1048 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1049 return DTime1 < DTime0;
1050 }
1051 #endif
1052 }
1053
1054
1055 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1056
HUF_decompress(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1057 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1058 {
1059 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1060 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1061 #endif
1062
1063 /* validation checks */
1064 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1065 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1066 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1067 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1068
1069 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1070 #if defined(HUF_FORCE_DECOMPRESS_X1)
1071 (void)algoNb;
1072 assert(algoNb == 0);
1073 return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1074 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1075 (void)algoNb;
1076 assert(algoNb == 1);
1077 return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1078 #else
1079 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1080 #endif
1081 }
1082 }
1083
HUF_decompress4X_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1084 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1085 {
1086 /* validation checks */
1087 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1088 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1089 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1090 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1091
1092 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1093 #if defined(HUF_FORCE_DECOMPRESS_X1)
1094 (void)algoNb;
1095 assert(algoNb == 0);
1096 return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1097 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1098 (void)algoNb;
1099 assert(algoNb == 1);
1100 return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1101 #else
1102 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1103 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1104 #endif
1105 }
1106 }
1107
HUF_decompress4X_hufOnly(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1108 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1109 {
1110 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1111 return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1112 workSpace, sizeof(workSpace));
1113 }
1114
1115
HUF_decompress4X_hufOnly_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1116 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1117 size_t dstSize, const void* cSrc,
1118 size_t cSrcSize, void* workSpace,
1119 size_t wkspSize)
1120 {
1121 /* validation checks */
1122 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1123 if (cSrcSize == 0) return ERROR(corruption_detected);
1124
1125 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1126 #if defined(HUF_FORCE_DECOMPRESS_X1)
1127 (void)algoNb;
1128 assert(algoNb == 0);
1129 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1130 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1131 (void)algoNb;
1132 assert(algoNb == 1);
1133 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1134 #else
1135 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1136 cSrcSize, workSpace, wkspSize):
1137 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1138 #endif
1139 }
1140 }
1141
HUF_decompress1X_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1142 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1143 const void* cSrc, size_t cSrcSize,
1144 void* workSpace, size_t wkspSize)
1145 {
1146 /* validation checks */
1147 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1148 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1149 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1150 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1151
1152 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1153 #if defined(HUF_FORCE_DECOMPRESS_X1)
1154 (void)algoNb;
1155 assert(algoNb == 0);
1156 return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1157 cSrcSize, workSpace, wkspSize);
1158 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1159 (void)algoNb;
1160 assert(algoNb == 1);
1161 return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1162 cSrcSize, workSpace, wkspSize);
1163 #else
1164 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1165 cSrcSize, workSpace, wkspSize):
1166 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1167 cSrcSize, workSpace, wkspSize);
1168 #endif
1169 }
1170 }
1171
HUF_decompress1X_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1172 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1173 const void* cSrc, size_t cSrcSize)
1174 {
1175 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1176 return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1177 workSpace, sizeof(workSpace));
1178 }
1179
1180
HUF_decompress1X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)1181 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1182 {
1183 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1184 #if defined(HUF_FORCE_DECOMPRESS_X1)
1185 (void)dtd;
1186 assert(dtd.tableType == 0);
1187 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1188 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1189 (void)dtd;
1190 assert(dtd.tableType == 1);
1191 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1192 #else
1193 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1194 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1195 #endif
1196 }
1197
1198 #ifndef HUF_FORCE_DECOMPRESS_X2
HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1199 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1200 {
1201 const BYTE* ip = (const BYTE*) cSrc;
1202
1203 size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1204 if (HUF_isError(hSize)) return hSize;
1205 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1206 ip += hSize; cSrcSize -= hSize;
1207
1208 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1209 }
1210 #endif
1211
HUF_decompress4X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)1212 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1213 {
1214 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1215 #if defined(HUF_FORCE_DECOMPRESS_X1)
1216 (void)dtd;
1217 assert(dtd.tableType == 0);
1218 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1219 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1220 (void)dtd;
1221 assert(dtd.tableType == 1);
1222 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1223 #else
1224 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1225 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1226 #endif
1227 }
1228
HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1229 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1230 {
1231 /* validation checks */
1232 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1233 if (cSrcSize == 0) return ERROR(corruption_detected);
1234
1235 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1236 #if defined(HUF_FORCE_DECOMPRESS_X1)
1237 (void)algoNb;
1238 assert(algoNb == 0);
1239 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1240 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1241 (void)algoNb;
1242 assert(algoNb == 1);
1243 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1244 #else
1245 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1246 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1247 #endif
1248 }
1249 }
1250