1*61145dc2SMartin Matuska // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
2c03c5b1cSMartin Matuska /* ******************************************************************
3c03c5b1cSMartin Matuska * huff0 huffman decoder,
4c03c5b1cSMartin Matuska * part of Finite State Entropy library
5c03c5b1cSMartin Matuska * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
6c03c5b1cSMartin Matuska *
7c03c5b1cSMartin Matuska * You can contact the author at :
8c03c5b1cSMartin Matuska * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
9c03c5b1cSMartin Matuska *
10c03c5b1cSMartin Matuska * This source code is licensed under both the BSD-style license (found in the
11c03c5b1cSMartin Matuska * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12c03c5b1cSMartin Matuska * in the COPYING file in the root directory of this source tree).
13c03c5b1cSMartin Matuska * You may select, at your option, one of the above-listed licenses.
14c03c5b1cSMartin Matuska ****************************************************************** */
15c03c5b1cSMartin Matuska
16c03c5b1cSMartin Matuska /* **************************************************************
17c03c5b1cSMartin Matuska * Dependencies
18c03c5b1cSMartin Matuska ****************************************************************/
19c03c5b1cSMartin Matuska #include <string.h> /* memcpy, memset */
20c03c5b1cSMartin Matuska #include "../common/compiler.h"
21c03c5b1cSMartin Matuska #include "../common/bitstream.h" /* BIT_* */
22c03c5b1cSMartin Matuska #include "../common/fse.h" /* to compress headers */
23c03c5b1cSMartin Matuska #define HUF_STATIC_LINKING_ONLY
24c03c5b1cSMartin Matuska #include "../common/huf.h"
25c03c5b1cSMartin Matuska #include "../common/error_private.h"
26c03c5b1cSMartin Matuska
27c03c5b1cSMartin Matuska /* **************************************************************
28c03c5b1cSMartin Matuska * Macros
29c03c5b1cSMartin Matuska ****************************************************************/
30c03c5b1cSMartin Matuska
31c03c5b1cSMartin Matuska /* These two optional macros force the use one way or another of the two
32c03c5b1cSMartin Matuska * Huffman decompression implementations. You can't force in both directions
33c03c5b1cSMartin Matuska * at the same time.
34c03c5b1cSMartin Matuska */
35c03c5b1cSMartin Matuska #if defined(HUF_FORCE_DECOMPRESS_X1) && \
36c03c5b1cSMartin Matuska defined(HUF_FORCE_DECOMPRESS_X2)
37c03c5b1cSMartin Matuska #error "Cannot force the use of the X1 and X2 decoders at the same time!"
38c03c5b1cSMartin Matuska #endif
39c03c5b1cSMartin Matuska
40c03c5b1cSMartin Matuska
41c03c5b1cSMartin Matuska /* **************************************************************
42c03c5b1cSMartin Matuska * Error Management
43c03c5b1cSMartin Matuska ****************************************************************/
44c03c5b1cSMartin Matuska #define HUF_isError ERR_isError
45c03c5b1cSMartin Matuska
46c03c5b1cSMartin Matuska
47c03c5b1cSMartin Matuska /* **************************************************************
48c03c5b1cSMartin Matuska * Byte alignment for workSpace management
49c03c5b1cSMartin Matuska ****************************************************************/
50c03c5b1cSMartin Matuska #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
51c03c5b1cSMartin Matuska #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
52c03c5b1cSMartin Matuska
53c03c5b1cSMartin Matuska
54c03c5b1cSMartin Matuska /* **************************************************************
55c03c5b1cSMartin Matuska * BMI2 Variant Wrappers
56c03c5b1cSMartin Matuska ****************************************************************/
57c03c5b1cSMartin Matuska #if DYNAMIC_BMI2
58c03c5b1cSMartin Matuska
59c03c5b1cSMartin Matuska #define HUF_DGEN(fn) \
60c03c5b1cSMartin Matuska \
61c03c5b1cSMartin Matuska static size_t fn##_default( \
62c03c5b1cSMartin Matuska void* dst, size_t dstSize, \
63c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize, \
64c03c5b1cSMartin Matuska const HUF_DTable* DTable) \
65c03c5b1cSMartin Matuska { \
66c03c5b1cSMartin Matuska return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
67c03c5b1cSMartin Matuska } \
68c03c5b1cSMartin Matuska \
69c03c5b1cSMartin Matuska static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
70c03c5b1cSMartin Matuska void* dst, size_t dstSize, \
71c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize, \
72c03c5b1cSMartin Matuska const HUF_DTable* DTable) \
73c03c5b1cSMartin Matuska { \
74c03c5b1cSMartin Matuska return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
75c03c5b1cSMartin Matuska } \
76c03c5b1cSMartin Matuska \
77c03c5b1cSMartin Matuska static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
78c03c5b1cSMartin Matuska size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
79c03c5b1cSMartin Matuska { \
80c03c5b1cSMartin Matuska if (bmi2) { \
81c03c5b1cSMartin Matuska return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
82c03c5b1cSMartin Matuska } \
83c03c5b1cSMartin Matuska return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
84c03c5b1cSMartin Matuska }
85c03c5b1cSMartin Matuska
86c03c5b1cSMartin Matuska #else
87c03c5b1cSMartin Matuska
88c03c5b1cSMartin Matuska #define HUF_DGEN(fn) \
89c03c5b1cSMartin Matuska static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
90c03c5b1cSMartin Matuska size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
91c03c5b1cSMartin Matuska { \
92c03c5b1cSMartin Matuska (void)bmi2; \
93c03c5b1cSMartin Matuska return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
94c03c5b1cSMartin Matuska }
95c03c5b1cSMartin Matuska
96c03c5b1cSMartin Matuska #endif
97c03c5b1cSMartin Matuska
98c03c5b1cSMartin Matuska
99c03c5b1cSMartin Matuska /*-***************************/
100c03c5b1cSMartin Matuska /* generic DTableDesc */
101c03c5b1cSMartin Matuska /*-***************************/
102c03c5b1cSMartin Matuska typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
103c03c5b1cSMartin Matuska
HUF_getDTableDesc(const HUF_DTable * table)104c03c5b1cSMartin Matuska static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
105c03c5b1cSMartin Matuska {
106c03c5b1cSMartin Matuska DTableDesc dtd;
107c03c5b1cSMartin Matuska memcpy(&dtd, table, sizeof(dtd));
108c03c5b1cSMartin Matuska return dtd;
109c03c5b1cSMartin Matuska }
110c03c5b1cSMartin Matuska
111c03c5b1cSMartin Matuska
112c03c5b1cSMartin Matuska #ifndef HUF_FORCE_DECOMPRESS_X2
113c03c5b1cSMartin Matuska
114c03c5b1cSMartin Matuska /*-***************************/
115c03c5b1cSMartin Matuska /* single-symbol decoding */
116c03c5b1cSMartin Matuska /*-***************************/
117c03c5b1cSMartin Matuska typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
118c03c5b1cSMartin Matuska
HUF_readDTableX1_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)119c03c5b1cSMartin Matuska size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
120c03c5b1cSMartin Matuska {
121c03c5b1cSMartin Matuska U32 tableLog = 0;
122c03c5b1cSMartin Matuska U32 nbSymbols = 0;
123c03c5b1cSMartin Matuska size_t iSize;
124c03c5b1cSMartin Matuska void* const dtPtr = DTable + 1;
125c03c5b1cSMartin Matuska HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
126c03c5b1cSMartin Matuska
127c03c5b1cSMartin Matuska U32* rankVal;
128c03c5b1cSMartin Matuska BYTE* huffWeight;
129c03c5b1cSMartin Matuska size_t spaceUsed32 = 0;
130c03c5b1cSMartin Matuska
131c03c5b1cSMartin Matuska rankVal = (U32 *)workSpace + spaceUsed32;
132c03c5b1cSMartin Matuska spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
133c03c5b1cSMartin Matuska huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
134c03c5b1cSMartin Matuska spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
135c03c5b1cSMartin Matuska
136c03c5b1cSMartin Matuska if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
137c03c5b1cSMartin Matuska
138c03c5b1cSMartin Matuska DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
139c03c5b1cSMartin Matuska /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
140c03c5b1cSMartin Matuska
141c03c5b1cSMartin Matuska iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
142c03c5b1cSMartin Matuska if (HUF_isError(iSize)) return iSize;
143c03c5b1cSMartin Matuska
144c03c5b1cSMartin Matuska /* Table header */
145c03c5b1cSMartin Matuska { DTableDesc dtd = HUF_getDTableDesc(DTable);
146c03c5b1cSMartin Matuska if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
147c03c5b1cSMartin Matuska dtd.tableType = 0;
148c03c5b1cSMartin Matuska dtd.tableLog = (BYTE)tableLog;
149c03c5b1cSMartin Matuska memcpy(DTable, &dtd, sizeof(dtd));
150c03c5b1cSMartin Matuska }
151c03c5b1cSMartin Matuska
152c03c5b1cSMartin Matuska /* Calculate starting value for each rank */
153c03c5b1cSMartin Matuska { U32 n, nextRankStart = 0;
154c03c5b1cSMartin Matuska for (n=1; n<tableLog+1; n++) {
155c03c5b1cSMartin Matuska U32 const current = nextRankStart;
156c03c5b1cSMartin Matuska nextRankStart += (rankVal[n] << (n-1));
157c03c5b1cSMartin Matuska rankVal[n] = current;
158c03c5b1cSMartin Matuska } }
159c03c5b1cSMartin Matuska
160c03c5b1cSMartin Matuska /* fill DTable */
161c03c5b1cSMartin Matuska { U32 n;
162c03c5b1cSMartin Matuska size_t const nEnd = nbSymbols;
163c03c5b1cSMartin Matuska for (n=0; n<nEnd; n++) {
164c03c5b1cSMartin Matuska size_t const w = huffWeight[n];
165c03c5b1cSMartin Matuska size_t const length = (1 << w) >> 1;
166c03c5b1cSMartin Matuska size_t const uStart = rankVal[w];
167c03c5b1cSMartin Matuska size_t const uEnd = uStart + length;
168c03c5b1cSMartin Matuska size_t u;
169c03c5b1cSMartin Matuska HUF_DEltX1 D;
170c03c5b1cSMartin Matuska D.byte = (BYTE)n;
171c03c5b1cSMartin Matuska D.nbBits = (BYTE)(tableLog + 1 - w);
172c03c5b1cSMartin Matuska rankVal[w] = (U32)uEnd;
173c03c5b1cSMartin Matuska if (length < 4) {
174c03c5b1cSMartin Matuska /* Use length in the loop bound so the compiler knows it is short. */
175c03c5b1cSMartin Matuska for (u = 0; u < length; ++u)
176c03c5b1cSMartin Matuska dt[uStart + u] = D;
177c03c5b1cSMartin Matuska } else {
178c03c5b1cSMartin Matuska /* Unroll the loop 4 times, we know it is a power of 2. */
179c03c5b1cSMartin Matuska for (u = uStart; u < uEnd; u += 4) {
180c03c5b1cSMartin Matuska dt[u + 0] = D;
181c03c5b1cSMartin Matuska dt[u + 1] = D;
182c03c5b1cSMartin Matuska dt[u + 2] = D;
183c03c5b1cSMartin Matuska dt[u + 3] = D;
184c03c5b1cSMartin Matuska } } } }
185c03c5b1cSMartin Matuska return iSize;
186c03c5b1cSMartin Matuska }
187c03c5b1cSMartin Matuska
HUF_readDTableX1(HUF_DTable * DTable,const void * src,size_t srcSize)188c03c5b1cSMartin Matuska size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
189c03c5b1cSMartin Matuska {
190c03c5b1cSMartin Matuska U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
191c03c5b1cSMartin Matuska return HUF_readDTableX1_wksp(DTable, src, srcSize,
192c03c5b1cSMartin Matuska workSpace, sizeof(workSpace));
193c03c5b1cSMartin Matuska }
194c03c5b1cSMartin Matuska
195c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE BYTE
HUF_decodeSymbolX1(BIT_DStream_t * Dstream,const HUF_DEltX1 * dt,const U32 dtLog)196c03c5b1cSMartin Matuska HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
197c03c5b1cSMartin Matuska {
198c03c5b1cSMartin Matuska size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
199c03c5b1cSMartin Matuska BYTE const c = dt[val].byte;
200c03c5b1cSMartin Matuska BIT_skipBits(Dstream, dt[val].nbBits);
201c03c5b1cSMartin Matuska return c;
202c03c5b1cSMartin Matuska }
203c03c5b1cSMartin Matuska
204c03c5b1cSMartin Matuska #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
205c03c5b1cSMartin Matuska *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
206c03c5b1cSMartin Matuska
207c03c5b1cSMartin Matuska #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
208c03c5b1cSMartin Matuska if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
209c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
210c03c5b1cSMartin Matuska
211c03c5b1cSMartin Matuska #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
212c03c5b1cSMartin Matuska if (MEM_64bits()) \
213c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
214c03c5b1cSMartin Matuska
215c03c5b1cSMartin Matuska HINT_INLINE size_t
HUF_decodeStreamX1(BYTE * p,BIT_DStream_t * const bitDPtr,BYTE * const pEnd,const HUF_DEltX1 * const dt,const U32 dtLog)216c03c5b1cSMartin Matuska HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
217c03c5b1cSMartin Matuska {
218c03c5b1cSMartin Matuska BYTE* const pStart = p;
219c03c5b1cSMartin Matuska
220c03c5b1cSMartin Matuska /* up to 4 symbols at a time */
221c03c5b1cSMartin Matuska while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
222c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
223c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
224c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
225c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
226c03c5b1cSMartin Matuska }
227c03c5b1cSMartin Matuska
228c03c5b1cSMartin Matuska /* [0-3] symbols remaining */
229c03c5b1cSMartin Matuska if (MEM_32bits())
230c03c5b1cSMartin Matuska while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
231c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
232c03c5b1cSMartin Matuska
233c03c5b1cSMartin Matuska /* no more data to retrieve from bitstream, no need to reload */
234c03c5b1cSMartin Matuska while (p < pEnd)
235c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
236c03c5b1cSMartin Matuska
237c03c5b1cSMartin Matuska return pEnd-pStart;
238c03c5b1cSMartin Matuska }
239c03c5b1cSMartin Matuska
240c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)241c03c5b1cSMartin Matuska HUF_decompress1X1_usingDTable_internal_body(
242c03c5b1cSMartin Matuska void* dst, size_t dstSize,
243c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
244c03c5b1cSMartin Matuska const HUF_DTable* DTable)
245c03c5b1cSMartin Matuska {
246c03c5b1cSMartin Matuska BYTE* op = (BYTE*)dst;
247c03c5b1cSMartin Matuska BYTE* const oend = op + dstSize;
248c03c5b1cSMartin Matuska const void* dtPtr = DTable + 1;
249c03c5b1cSMartin Matuska const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
250c03c5b1cSMartin Matuska BIT_DStream_t bitD;
251c03c5b1cSMartin Matuska DTableDesc const dtd = HUF_getDTableDesc(DTable);
252c03c5b1cSMartin Matuska U32 const dtLog = dtd.tableLog;
253c03c5b1cSMartin Matuska
254c03c5b1cSMartin Matuska CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
255c03c5b1cSMartin Matuska
256c03c5b1cSMartin Matuska HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
257c03c5b1cSMartin Matuska
258c03c5b1cSMartin Matuska if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
259c03c5b1cSMartin Matuska
260c03c5b1cSMartin Matuska return dstSize;
261c03c5b1cSMartin Matuska }
262c03c5b1cSMartin Matuska
263c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)264c03c5b1cSMartin Matuska HUF_decompress4X1_usingDTable_internal_body(
265c03c5b1cSMartin Matuska void* dst, size_t dstSize,
266c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
267c03c5b1cSMartin Matuska const HUF_DTable* DTable)
268c03c5b1cSMartin Matuska {
269c03c5b1cSMartin Matuska /* Check */
270c03c5b1cSMartin Matuska if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
271c03c5b1cSMartin Matuska
272c03c5b1cSMartin Matuska { const BYTE* const istart = (const BYTE*) cSrc;
273c03c5b1cSMartin Matuska BYTE* const ostart = (BYTE*) dst;
274c03c5b1cSMartin Matuska BYTE* const oend = ostart + dstSize;
275c03c5b1cSMartin Matuska BYTE* const olimit = oend - 3;
276c03c5b1cSMartin Matuska const void* const dtPtr = DTable + 1;
277c03c5b1cSMartin Matuska const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
278c03c5b1cSMartin Matuska
279c03c5b1cSMartin Matuska /* Init */
280c03c5b1cSMartin Matuska BIT_DStream_t bitD1;
281c03c5b1cSMartin Matuska BIT_DStream_t bitD2;
282c03c5b1cSMartin Matuska BIT_DStream_t bitD3;
283c03c5b1cSMartin Matuska BIT_DStream_t bitD4;
284c03c5b1cSMartin Matuska size_t const length1 = MEM_readLE16(istart);
285c03c5b1cSMartin Matuska size_t const length2 = MEM_readLE16(istart+2);
286c03c5b1cSMartin Matuska size_t const length3 = MEM_readLE16(istart+4);
287c03c5b1cSMartin Matuska size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
288c03c5b1cSMartin Matuska const BYTE* const istart1 = istart + 6; /* jumpTable */
289c03c5b1cSMartin Matuska const BYTE* const istart2 = istart1 + length1;
290c03c5b1cSMartin Matuska const BYTE* const istart3 = istart2 + length2;
291c03c5b1cSMartin Matuska const BYTE* const istart4 = istart3 + length3;
292c03c5b1cSMartin Matuska const size_t segmentSize = (dstSize+3) / 4;
293c03c5b1cSMartin Matuska BYTE* const opStart2 = ostart + segmentSize;
294c03c5b1cSMartin Matuska BYTE* const opStart3 = opStart2 + segmentSize;
295c03c5b1cSMartin Matuska BYTE* const opStart4 = opStart3 + segmentSize;
296c03c5b1cSMartin Matuska BYTE* op1 = ostart;
297c03c5b1cSMartin Matuska BYTE* op2 = opStart2;
298c03c5b1cSMartin Matuska BYTE* op3 = opStart3;
299c03c5b1cSMartin Matuska BYTE* op4 = opStart4;
300c03c5b1cSMartin Matuska DTableDesc const dtd = HUF_getDTableDesc(DTable);
301c03c5b1cSMartin Matuska U32 const dtLog = dtd.tableLog;
302c03c5b1cSMartin Matuska U32 endSignal = 1;
303c03c5b1cSMartin Matuska
304c03c5b1cSMartin Matuska if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
305c03c5b1cSMartin Matuska CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
306c03c5b1cSMartin Matuska CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
307c03c5b1cSMartin Matuska CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
308c03c5b1cSMartin Matuska CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
309c03c5b1cSMartin Matuska
310c03c5b1cSMartin Matuska /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
311c03c5b1cSMartin Matuska for ( ; (endSignal) & (op4 < olimit) ; ) {
312c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
313c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
314c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
315c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
316c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
317c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
318c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
319c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
320c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
321c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
322c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
323c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
324c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
325c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
326c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
327c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
328c03c5b1cSMartin Matuska endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
329c03c5b1cSMartin Matuska endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
330c03c5b1cSMartin Matuska endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
331c03c5b1cSMartin Matuska endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
332c03c5b1cSMartin Matuska }
333c03c5b1cSMartin Matuska
334c03c5b1cSMartin Matuska /* check corruption */
335c03c5b1cSMartin Matuska /* note : should not be necessary : op# advance in lock step, and we control op4.
336c03c5b1cSMartin Matuska * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
337c03c5b1cSMartin Matuska if (op1 > opStart2) return ERROR(corruption_detected);
338c03c5b1cSMartin Matuska if (op2 > opStart3) return ERROR(corruption_detected);
339c03c5b1cSMartin Matuska if (op3 > opStart4) return ERROR(corruption_detected);
340c03c5b1cSMartin Matuska /* note : op4 supposed already verified within main loop */
341c03c5b1cSMartin Matuska
342c03c5b1cSMartin Matuska /* finish bitStreams one by one */
343c03c5b1cSMartin Matuska HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
344c03c5b1cSMartin Matuska HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
345c03c5b1cSMartin Matuska HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
346c03c5b1cSMartin Matuska HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
347c03c5b1cSMartin Matuska
348c03c5b1cSMartin Matuska /* check */
349c03c5b1cSMartin Matuska { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
350c03c5b1cSMartin Matuska if (!endCheck) return ERROR(corruption_detected); }
351c03c5b1cSMartin Matuska
352c03c5b1cSMartin Matuska /* decoded size */
353c03c5b1cSMartin Matuska return dstSize;
354c03c5b1cSMartin Matuska }
355c03c5b1cSMartin Matuska }
356c03c5b1cSMartin Matuska
357c03c5b1cSMartin Matuska
358c03c5b1cSMartin Matuska typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
359c03c5b1cSMartin Matuska const void *cSrc,
360c03c5b1cSMartin Matuska size_t cSrcSize,
361c03c5b1cSMartin Matuska const HUF_DTable *DTable);
362c03c5b1cSMartin Matuska
363c03c5b1cSMartin Matuska HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
HUF_DGEN(HUF_decompress4X1_usingDTable_internal)364c03c5b1cSMartin Matuska HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
365c03c5b1cSMartin Matuska
366c03c5b1cSMartin Matuska
367c03c5b1cSMartin Matuska
368c03c5b1cSMartin Matuska size_t HUF_decompress1X1_usingDTable(
369c03c5b1cSMartin Matuska void* dst, size_t dstSize,
370c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
371c03c5b1cSMartin Matuska const HUF_DTable* DTable)
372c03c5b1cSMartin Matuska {
373c03c5b1cSMartin Matuska DTableDesc dtd = HUF_getDTableDesc(DTable);
374c03c5b1cSMartin Matuska if (dtd.tableType != 0) return ERROR(GENERIC);
375c03c5b1cSMartin Matuska return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
376c03c5b1cSMartin Matuska }
377c03c5b1cSMartin Matuska
HUF_decompress1X1_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)378c03c5b1cSMartin Matuska size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
379c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
380c03c5b1cSMartin Matuska void* workSpace, size_t wkspSize)
381c03c5b1cSMartin Matuska {
382c03c5b1cSMartin Matuska const BYTE* ip = (const BYTE*) cSrc;
383c03c5b1cSMartin Matuska
384c03c5b1cSMartin Matuska size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
385c03c5b1cSMartin Matuska if (HUF_isError(hSize)) return hSize;
386c03c5b1cSMartin Matuska if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
387c03c5b1cSMartin Matuska ip += hSize; cSrcSize -= hSize;
388c03c5b1cSMartin Matuska
389c03c5b1cSMartin Matuska return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
390c03c5b1cSMartin Matuska }
391c03c5b1cSMartin Matuska
392c03c5b1cSMartin Matuska
HUF_decompress1X1_DCtx(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)393c03c5b1cSMartin Matuska size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
394c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize)
395c03c5b1cSMartin Matuska {
396c03c5b1cSMartin Matuska U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
397c03c5b1cSMartin Matuska return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
398c03c5b1cSMartin Matuska workSpace, sizeof(workSpace));
399c03c5b1cSMartin Matuska }
400c03c5b1cSMartin Matuska
HUF_decompress1X1(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)401c03c5b1cSMartin Matuska size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
402c03c5b1cSMartin Matuska {
403c03c5b1cSMartin Matuska HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
404c03c5b1cSMartin Matuska return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
405c03c5b1cSMartin Matuska }
406c03c5b1cSMartin Matuska
HUF_decompress4X1_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)407c03c5b1cSMartin Matuska size_t HUF_decompress4X1_usingDTable(
408c03c5b1cSMartin Matuska void* dst, size_t dstSize,
409c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
410c03c5b1cSMartin Matuska const HUF_DTable* DTable)
411c03c5b1cSMartin Matuska {
412c03c5b1cSMartin Matuska DTableDesc dtd = HUF_getDTableDesc(DTable);
413c03c5b1cSMartin Matuska if (dtd.tableType != 0) return ERROR(GENERIC);
414c03c5b1cSMartin Matuska return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
415c03c5b1cSMartin Matuska }
416c03c5b1cSMartin Matuska
HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)417c03c5b1cSMartin Matuska static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
418c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
419c03c5b1cSMartin Matuska void* workSpace, size_t wkspSize, int bmi2)
420c03c5b1cSMartin Matuska {
421c03c5b1cSMartin Matuska const BYTE* ip = (const BYTE*) cSrc;
422c03c5b1cSMartin Matuska
423c03c5b1cSMartin Matuska size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
424c03c5b1cSMartin Matuska workSpace, wkspSize);
425c03c5b1cSMartin Matuska if (HUF_isError(hSize)) return hSize;
426c03c5b1cSMartin Matuska if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
427c03c5b1cSMartin Matuska ip += hSize; cSrcSize -= hSize;
428c03c5b1cSMartin Matuska
429c03c5b1cSMartin Matuska return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
430c03c5b1cSMartin Matuska }
431c03c5b1cSMartin Matuska
HUF_decompress4X1_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)432c03c5b1cSMartin Matuska size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
433c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
434c03c5b1cSMartin Matuska void* workSpace, size_t wkspSize)
435c03c5b1cSMartin Matuska {
436c03c5b1cSMartin Matuska return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
437c03c5b1cSMartin Matuska }
438c03c5b1cSMartin Matuska
439c03c5b1cSMartin Matuska
HUF_decompress4X1_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)440c03c5b1cSMartin Matuska size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
441c03c5b1cSMartin Matuska {
442c03c5b1cSMartin Matuska U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
443c03c5b1cSMartin Matuska return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
444c03c5b1cSMartin Matuska workSpace, sizeof(workSpace));
445c03c5b1cSMartin Matuska }
HUF_decompress4X1(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)446c03c5b1cSMartin Matuska size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
447c03c5b1cSMartin Matuska {
448c03c5b1cSMartin Matuska HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
449c03c5b1cSMartin Matuska return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
450c03c5b1cSMartin Matuska }
451c03c5b1cSMartin Matuska
452c03c5b1cSMartin Matuska #endif /* HUF_FORCE_DECOMPRESS_X2 */
453c03c5b1cSMartin Matuska
454c03c5b1cSMartin Matuska
455c03c5b1cSMartin Matuska #ifndef HUF_FORCE_DECOMPRESS_X1
456c03c5b1cSMartin Matuska
457c03c5b1cSMartin Matuska /* *************************/
458c03c5b1cSMartin Matuska /* double-symbols decoding */
459c03c5b1cSMartin Matuska /* *************************/
460c03c5b1cSMartin Matuska
461c03c5b1cSMartin Matuska typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
462c03c5b1cSMartin Matuska typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
463c03c5b1cSMartin Matuska typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
464c03c5b1cSMartin Matuska typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
465c03c5b1cSMartin Matuska
466c03c5b1cSMartin Matuska
467c03c5b1cSMartin Matuska /* HUF_fillDTableX2Level2() :
468c03c5b1cSMartin Matuska * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
HUF_fillDTableX2Level2(HUF_DEltX2 * DTable,U32 sizeLog,const U32 consumed,const U32 * rankValOrigin,const int minWeight,const sortedSymbol_t * sortedSymbols,const U32 sortedListSize,U32 nbBitsBaseline,U16 baseSeq)469c03c5b1cSMartin Matuska static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
470c03c5b1cSMartin Matuska const U32* rankValOrigin, const int minWeight,
471c03c5b1cSMartin Matuska const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
472c03c5b1cSMartin Matuska U32 nbBitsBaseline, U16 baseSeq)
473c03c5b1cSMartin Matuska {
474c03c5b1cSMartin Matuska HUF_DEltX2 DElt;
475c03c5b1cSMartin Matuska U32 rankVal[HUF_TABLELOG_MAX + 1];
476c03c5b1cSMartin Matuska
477c03c5b1cSMartin Matuska /* get pre-calculated rankVal */
478c03c5b1cSMartin Matuska memcpy(rankVal, rankValOrigin, sizeof(rankVal));
479c03c5b1cSMartin Matuska
480c03c5b1cSMartin Matuska /* fill skipped values */
481c03c5b1cSMartin Matuska if (minWeight>1) {
482c03c5b1cSMartin Matuska U32 i, skipSize = rankVal[minWeight];
483c03c5b1cSMartin Matuska MEM_writeLE16(&(DElt.sequence), baseSeq);
484c03c5b1cSMartin Matuska DElt.nbBits = (BYTE)(consumed);
485c03c5b1cSMartin Matuska DElt.length = 1;
486c03c5b1cSMartin Matuska for (i = 0; i < skipSize; i++)
487c03c5b1cSMartin Matuska DTable[i] = DElt;
488c03c5b1cSMartin Matuska }
489c03c5b1cSMartin Matuska
490c03c5b1cSMartin Matuska /* fill DTable */
491c03c5b1cSMartin Matuska { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
492c03c5b1cSMartin Matuska const U32 symbol = sortedSymbols[s].symbol;
493c03c5b1cSMartin Matuska const U32 weight = sortedSymbols[s].weight;
494c03c5b1cSMartin Matuska const U32 nbBits = nbBitsBaseline - weight;
495c03c5b1cSMartin Matuska const U32 length = 1 << (sizeLog-nbBits);
496c03c5b1cSMartin Matuska const U32 start = rankVal[weight];
497c03c5b1cSMartin Matuska U32 i = start;
498c03c5b1cSMartin Matuska const U32 end = start + length;
499c03c5b1cSMartin Matuska
500c03c5b1cSMartin Matuska MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
501c03c5b1cSMartin Matuska DElt.nbBits = (BYTE)(nbBits + consumed);
502c03c5b1cSMartin Matuska DElt.length = 2;
503c03c5b1cSMartin Matuska do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
504c03c5b1cSMartin Matuska
505c03c5b1cSMartin Matuska rankVal[weight] += length;
506c03c5b1cSMartin Matuska } }
507c03c5b1cSMartin Matuska }
508c03c5b1cSMartin Matuska
509c03c5b1cSMartin Matuska
HUF_fillDTableX2(HUF_DEltX2 * DTable,const U32 targetLog,const sortedSymbol_t * sortedList,const U32 sortedListSize,const U32 * rankStart,rankVal_t rankValOrigin,const U32 maxWeight,const U32 nbBitsBaseline)510c03c5b1cSMartin Matuska static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
511c03c5b1cSMartin Matuska const sortedSymbol_t* sortedList, const U32 sortedListSize,
512c03c5b1cSMartin Matuska const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
513c03c5b1cSMartin Matuska const U32 nbBitsBaseline)
514c03c5b1cSMartin Matuska {
515c03c5b1cSMartin Matuska U32 rankVal[HUF_TABLELOG_MAX + 1];
516c03c5b1cSMartin Matuska const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
517c03c5b1cSMartin Matuska const U32 minBits = nbBitsBaseline - maxWeight;
518c03c5b1cSMartin Matuska U32 s;
519c03c5b1cSMartin Matuska
520c03c5b1cSMartin Matuska memcpy(rankVal, rankValOrigin, sizeof(rankVal));
521c03c5b1cSMartin Matuska
522c03c5b1cSMartin Matuska /* fill DTable */
523c03c5b1cSMartin Matuska for (s=0; s<sortedListSize; s++) {
524c03c5b1cSMartin Matuska const U16 symbol = sortedList[s].symbol;
525c03c5b1cSMartin Matuska const U32 weight = sortedList[s].weight;
526c03c5b1cSMartin Matuska const U32 nbBits = nbBitsBaseline - weight;
527c03c5b1cSMartin Matuska const U32 start = rankVal[weight];
528c03c5b1cSMartin Matuska const U32 length = 1 << (targetLog-nbBits);
529c03c5b1cSMartin Matuska
530c03c5b1cSMartin Matuska if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
531c03c5b1cSMartin Matuska U32 sortedRank;
532c03c5b1cSMartin Matuska int minWeight = nbBits + scaleLog;
533c03c5b1cSMartin Matuska if (minWeight < 1) minWeight = 1;
534c03c5b1cSMartin Matuska sortedRank = rankStart[minWeight];
535c03c5b1cSMartin Matuska HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
536c03c5b1cSMartin Matuska rankValOrigin[nbBits], minWeight,
537c03c5b1cSMartin Matuska sortedList+sortedRank, sortedListSize-sortedRank,
538c03c5b1cSMartin Matuska nbBitsBaseline, symbol);
539c03c5b1cSMartin Matuska } else {
540c03c5b1cSMartin Matuska HUF_DEltX2 DElt;
541c03c5b1cSMartin Matuska MEM_writeLE16(&(DElt.sequence), symbol);
542c03c5b1cSMartin Matuska DElt.nbBits = (BYTE)(nbBits);
543c03c5b1cSMartin Matuska DElt.length = 1;
544c03c5b1cSMartin Matuska { U32 const end = start + length;
545c03c5b1cSMartin Matuska U32 u;
546c03c5b1cSMartin Matuska for (u = start; u < end; u++) DTable[u] = DElt;
547c03c5b1cSMartin Matuska } }
548c03c5b1cSMartin Matuska rankVal[weight] += length;
549c03c5b1cSMartin Matuska }
550c03c5b1cSMartin Matuska }
551c03c5b1cSMartin Matuska
HUF_readDTableX2_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)552c03c5b1cSMartin Matuska size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
553c03c5b1cSMartin Matuska const void* src, size_t srcSize,
554c03c5b1cSMartin Matuska void* workSpace, size_t wkspSize)
555c03c5b1cSMartin Matuska {
556c03c5b1cSMartin Matuska U32 tableLog, maxW, sizeOfSort, nbSymbols;
557c03c5b1cSMartin Matuska DTableDesc dtd = HUF_getDTableDesc(DTable);
558c03c5b1cSMartin Matuska U32 const maxTableLog = dtd.maxTableLog;
559c03c5b1cSMartin Matuska size_t iSize;
560c03c5b1cSMartin Matuska void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
561c03c5b1cSMartin Matuska HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
562c03c5b1cSMartin Matuska U32 *rankStart;
563c03c5b1cSMartin Matuska
564c03c5b1cSMartin Matuska rankValCol_t* rankVal;
565c03c5b1cSMartin Matuska U32* rankStats;
566c03c5b1cSMartin Matuska U32* rankStart0;
567c03c5b1cSMartin Matuska sortedSymbol_t* sortedSymbol;
568c03c5b1cSMartin Matuska BYTE* weightList;
569c03c5b1cSMartin Matuska size_t spaceUsed32 = 0;
570c03c5b1cSMartin Matuska
571c03c5b1cSMartin Matuska rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
572c03c5b1cSMartin Matuska spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
573c03c5b1cSMartin Matuska rankStats = (U32 *)workSpace + spaceUsed32;
574c03c5b1cSMartin Matuska spaceUsed32 += HUF_TABLELOG_MAX + 1;
575c03c5b1cSMartin Matuska rankStart0 = (U32 *)workSpace + spaceUsed32;
576c03c5b1cSMartin Matuska spaceUsed32 += HUF_TABLELOG_MAX + 2;
577c03c5b1cSMartin Matuska sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
578c03c5b1cSMartin Matuska spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
579c03c5b1cSMartin Matuska weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
580c03c5b1cSMartin Matuska spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
581c03c5b1cSMartin Matuska
582c03c5b1cSMartin Matuska if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
583c03c5b1cSMartin Matuska
584c03c5b1cSMartin Matuska rankStart = rankStart0 + 1;
585c03c5b1cSMartin Matuska memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
586c03c5b1cSMartin Matuska
587c03c5b1cSMartin Matuska DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
588c03c5b1cSMartin Matuska if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
589c03c5b1cSMartin Matuska /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
590c03c5b1cSMartin Matuska
591c03c5b1cSMartin Matuska iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
592c03c5b1cSMartin Matuska if (HUF_isError(iSize)) return iSize;
593c03c5b1cSMartin Matuska
594c03c5b1cSMartin Matuska /* check result */
595c03c5b1cSMartin Matuska if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
596c03c5b1cSMartin Matuska
597c03c5b1cSMartin Matuska /* find maxWeight */
598c03c5b1cSMartin Matuska for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
599c03c5b1cSMartin Matuska
600c03c5b1cSMartin Matuska /* Get start index of each weight */
601c03c5b1cSMartin Matuska { U32 w, nextRankStart = 0;
602c03c5b1cSMartin Matuska for (w=1; w<maxW+1; w++) {
603c03c5b1cSMartin Matuska U32 current = nextRankStart;
604c03c5b1cSMartin Matuska nextRankStart += rankStats[w];
605c03c5b1cSMartin Matuska rankStart[w] = current;
606c03c5b1cSMartin Matuska }
607c03c5b1cSMartin Matuska rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
608c03c5b1cSMartin Matuska sizeOfSort = nextRankStart;
609c03c5b1cSMartin Matuska }
610c03c5b1cSMartin Matuska
611c03c5b1cSMartin Matuska /* sort symbols by weight */
612c03c5b1cSMartin Matuska { U32 s;
613c03c5b1cSMartin Matuska for (s=0; s<nbSymbols; s++) {
614c03c5b1cSMartin Matuska U32 const w = weightList[s];
615c03c5b1cSMartin Matuska U32 const r = rankStart[w]++;
616c03c5b1cSMartin Matuska sortedSymbol[r].symbol = (BYTE)s;
617c03c5b1cSMartin Matuska sortedSymbol[r].weight = (BYTE)w;
618c03c5b1cSMartin Matuska }
619c03c5b1cSMartin Matuska rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
620c03c5b1cSMartin Matuska }
621c03c5b1cSMartin Matuska
622c03c5b1cSMartin Matuska /* Build rankVal */
623c03c5b1cSMartin Matuska { U32* const rankVal0 = rankVal[0];
624c03c5b1cSMartin Matuska { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
625c03c5b1cSMartin Matuska U32 nextRankVal = 0;
626c03c5b1cSMartin Matuska U32 w;
627c03c5b1cSMartin Matuska for (w=1; w<maxW+1; w++) {
628c03c5b1cSMartin Matuska U32 current = nextRankVal;
629c03c5b1cSMartin Matuska nextRankVal += rankStats[w] << (w+rescale);
630c03c5b1cSMartin Matuska rankVal0[w] = current;
631c03c5b1cSMartin Matuska } }
632c03c5b1cSMartin Matuska { U32 const minBits = tableLog+1 - maxW;
633c03c5b1cSMartin Matuska U32 consumed;
634c03c5b1cSMartin Matuska for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
635c03c5b1cSMartin Matuska U32* const rankValPtr = rankVal[consumed];
636c03c5b1cSMartin Matuska U32 w;
637c03c5b1cSMartin Matuska for (w = 1; w < maxW+1; w++) {
638c03c5b1cSMartin Matuska rankValPtr[w] = rankVal0[w] >> consumed;
639c03c5b1cSMartin Matuska } } } }
640c03c5b1cSMartin Matuska
641c03c5b1cSMartin Matuska HUF_fillDTableX2(dt, maxTableLog,
642c03c5b1cSMartin Matuska sortedSymbol, sizeOfSort,
643c03c5b1cSMartin Matuska rankStart0, rankVal, maxW,
644c03c5b1cSMartin Matuska tableLog+1);
645c03c5b1cSMartin Matuska
646c03c5b1cSMartin Matuska dtd.tableLog = (BYTE)maxTableLog;
647c03c5b1cSMartin Matuska dtd.tableType = 1;
648c03c5b1cSMartin Matuska memcpy(DTable, &dtd, sizeof(dtd));
649c03c5b1cSMartin Matuska return iSize;
650c03c5b1cSMartin Matuska }
651c03c5b1cSMartin Matuska
HUF_readDTableX2(HUF_DTable * DTable,const void * src,size_t srcSize)652c03c5b1cSMartin Matuska size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
653c03c5b1cSMartin Matuska {
654c03c5b1cSMartin Matuska U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
655c03c5b1cSMartin Matuska return HUF_readDTableX2_wksp(DTable, src, srcSize,
656c03c5b1cSMartin Matuska workSpace, sizeof(workSpace));
657c03c5b1cSMartin Matuska }
658c03c5b1cSMartin Matuska
659c03c5b1cSMartin Matuska
660c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE U32
HUF_decodeSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)661c03c5b1cSMartin Matuska HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
662c03c5b1cSMartin Matuska {
663c03c5b1cSMartin Matuska size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
664c03c5b1cSMartin Matuska memcpy(op, dt+val, 2);
665c03c5b1cSMartin Matuska BIT_skipBits(DStream, dt[val].nbBits);
666c03c5b1cSMartin Matuska return dt[val].length;
667c03c5b1cSMartin Matuska }
668c03c5b1cSMartin Matuska
669c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE U32
HUF_decodeLastSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)670c03c5b1cSMartin Matuska HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
671c03c5b1cSMartin Matuska {
672c03c5b1cSMartin Matuska size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
673c03c5b1cSMartin Matuska memcpy(op, dt+val, 1);
674c03c5b1cSMartin Matuska if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
675c03c5b1cSMartin Matuska else {
676c03c5b1cSMartin Matuska if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
677c03c5b1cSMartin Matuska BIT_skipBits(DStream, dt[val].nbBits);
678c03c5b1cSMartin Matuska if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
679c03c5b1cSMartin Matuska /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
680c03c5b1cSMartin Matuska DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
681c03c5b1cSMartin Matuska } }
682c03c5b1cSMartin Matuska return 1;
683c03c5b1cSMartin Matuska }
684c03c5b1cSMartin Matuska
685c03c5b1cSMartin Matuska #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
686c03c5b1cSMartin Matuska ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
687c03c5b1cSMartin Matuska
688c03c5b1cSMartin Matuska #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
689c03c5b1cSMartin Matuska if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
690c03c5b1cSMartin Matuska ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
691c03c5b1cSMartin Matuska
692c03c5b1cSMartin Matuska #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
693c03c5b1cSMartin Matuska if (MEM_64bits()) \
694c03c5b1cSMartin Matuska ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
695c03c5b1cSMartin Matuska
696c03c5b1cSMartin Matuska HINT_INLINE size_t
HUF_decodeStreamX2(BYTE * p,BIT_DStream_t * bitDPtr,BYTE * const pEnd,const HUF_DEltX2 * const dt,const U32 dtLog)697c03c5b1cSMartin Matuska HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
698c03c5b1cSMartin Matuska const HUF_DEltX2* const dt, const U32 dtLog)
699c03c5b1cSMartin Matuska {
700c03c5b1cSMartin Matuska BYTE* const pStart = p;
701c03c5b1cSMartin Matuska
702c03c5b1cSMartin Matuska /* up to 8 symbols at a time */
703c03c5b1cSMartin Matuska while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
704c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
705c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
706c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
707c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
708c03c5b1cSMartin Matuska }
709c03c5b1cSMartin Matuska
710c03c5b1cSMartin Matuska /* closer to end : up to 2 symbols at a time */
711c03c5b1cSMartin Matuska while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
712c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
713c03c5b1cSMartin Matuska
714c03c5b1cSMartin Matuska while (p <= pEnd-2)
715c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
716c03c5b1cSMartin Matuska
717c03c5b1cSMartin Matuska if (p < pEnd)
718c03c5b1cSMartin Matuska p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
719c03c5b1cSMartin Matuska
720c03c5b1cSMartin Matuska return p-pStart;
721c03c5b1cSMartin Matuska }
722c03c5b1cSMartin Matuska
723c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)724c03c5b1cSMartin Matuska HUF_decompress1X2_usingDTable_internal_body(
725c03c5b1cSMartin Matuska void* dst, size_t dstSize,
726c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
727c03c5b1cSMartin Matuska const HUF_DTable* DTable)
728c03c5b1cSMartin Matuska {
729c03c5b1cSMartin Matuska BIT_DStream_t bitD;
730c03c5b1cSMartin Matuska
731c03c5b1cSMartin Matuska /* Init */
732c03c5b1cSMartin Matuska CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
733c03c5b1cSMartin Matuska
734c03c5b1cSMartin Matuska /* decode */
735c03c5b1cSMartin Matuska { BYTE* const ostart = (BYTE*) dst;
736c03c5b1cSMartin Matuska BYTE* const oend = ostart + dstSize;
737c03c5b1cSMartin Matuska const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
738c03c5b1cSMartin Matuska const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
739c03c5b1cSMartin Matuska DTableDesc const dtd = HUF_getDTableDesc(DTable);
740c03c5b1cSMartin Matuska HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
741c03c5b1cSMartin Matuska }
742c03c5b1cSMartin Matuska
743c03c5b1cSMartin Matuska /* check */
744c03c5b1cSMartin Matuska if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
745c03c5b1cSMartin Matuska
746c03c5b1cSMartin Matuska /* decoded size */
747c03c5b1cSMartin Matuska return dstSize;
748c03c5b1cSMartin Matuska }
749c03c5b1cSMartin Matuska
750c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)751c03c5b1cSMartin Matuska HUF_decompress4X2_usingDTable_internal_body(
752c03c5b1cSMartin Matuska void* dst, size_t dstSize,
753c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
754c03c5b1cSMartin Matuska const HUF_DTable* DTable)
755c03c5b1cSMartin Matuska {
756c03c5b1cSMartin Matuska if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
757c03c5b1cSMartin Matuska
758c03c5b1cSMartin Matuska { const BYTE* const istart = (const BYTE*) cSrc;
759c03c5b1cSMartin Matuska BYTE* const ostart = (BYTE*) dst;
760c03c5b1cSMartin Matuska BYTE* const oend = ostart + dstSize;
761c03c5b1cSMartin Matuska BYTE* const olimit = oend - (sizeof(size_t)-1);
762c03c5b1cSMartin Matuska const void* const dtPtr = DTable+1;
763c03c5b1cSMartin Matuska const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
764c03c5b1cSMartin Matuska
765c03c5b1cSMartin Matuska /* Init */
766c03c5b1cSMartin Matuska BIT_DStream_t bitD1;
767c03c5b1cSMartin Matuska BIT_DStream_t bitD2;
768c03c5b1cSMartin Matuska BIT_DStream_t bitD3;
769c03c5b1cSMartin Matuska BIT_DStream_t bitD4;
770c03c5b1cSMartin Matuska size_t const length1 = MEM_readLE16(istart);
771c03c5b1cSMartin Matuska size_t const length2 = MEM_readLE16(istart+2);
772c03c5b1cSMartin Matuska size_t const length3 = MEM_readLE16(istart+4);
773c03c5b1cSMartin Matuska size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
774c03c5b1cSMartin Matuska const BYTE* const istart1 = istart + 6; /* jumpTable */
775c03c5b1cSMartin Matuska const BYTE* const istart2 = istart1 + length1;
776c03c5b1cSMartin Matuska const BYTE* const istart3 = istart2 + length2;
777c03c5b1cSMartin Matuska const BYTE* const istart4 = istart3 + length3;
778c03c5b1cSMartin Matuska size_t const segmentSize = (dstSize+3) / 4;
779c03c5b1cSMartin Matuska BYTE* const opStart2 = ostart + segmentSize;
780c03c5b1cSMartin Matuska BYTE* const opStart3 = opStart2 + segmentSize;
781c03c5b1cSMartin Matuska BYTE* const opStart4 = opStart3 + segmentSize;
782c03c5b1cSMartin Matuska BYTE* op1 = ostart;
783c03c5b1cSMartin Matuska BYTE* op2 = opStart2;
784c03c5b1cSMartin Matuska BYTE* op3 = opStart3;
785c03c5b1cSMartin Matuska BYTE* op4 = opStart4;
786c03c5b1cSMartin Matuska U32 endSignal = 1;
787c03c5b1cSMartin Matuska DTableDesc const dtd = HUF_getDTableDesc(DTable);
788c03c5b1cSMartin Matuska U32 const dtLog = dtd.tableLog;
789c03c5b1cSMartin Matuska
790c03c5b1cSMartin Matuska if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
791c03c5b1cSMartin Matuska CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
792c03c5b1cSMartin Matuska CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
793c03c5b1cSMartin Matuska CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
794c03c5b1cSMartin Matuska CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
795c03c5b1cSMartin Matuska
796c03c5b1cSMartin Matuska /* 16-32 symbols per loop (4-8 symbols per stream) */
797c03c5b1cSMartin Matuska for ( ; (endSignal) & (op4 < olimit); ) {
798c03c5b1cSMartin Matuska #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
799c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
800c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
801c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
802c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
803c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
804c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
805c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
806c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
807c03c5b1cSMartin Matuska endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
808c03c5b1cSMartin Matuska endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
809c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
810c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
811c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
812c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
813c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
814c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
815c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
816c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
817c03c5b1cSMartin Matuska endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
818c03c5b1cSMartin Matuska endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
819c03c5b1cSMartin Matuska #else
820c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
821c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
822c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
823c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
824c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
825c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
826c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
827c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
828c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
829c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
830c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
831c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
832c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
833c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
834c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
835c03c5b1cSMartin Matuska HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
836c03c5b1cSMartin Matuska endSignal = (U32)LIKELY(
837c03c5b1cSMartin Matuska (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
838c03c5b1cSMartin Matuska & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
839c03c5b1cSMartin Matuska & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
840c03c5b1cSMartin Matuska & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
841c03c5b1cSMartin Matuska #endif
842c03c5b1cSMartin Matuska }
843c03c5b1cSMartin Matuska
844c03c5b1cSMartin Matuska /* check corruption */
845c03c5b1cSMartin Matuska if (op1 > opStart2) return ERROR(corruption_detected);
846c03c5b1cSMartin Matuska if (op2 > opStart3) return ERROR(corruption_detected);
847c03c5b1cSMartin Matuska if (op3 > opStart4) return ERROR(corruption_detected);
848c03c5b1cSMartin Matuska /* note : op4 already verified within main loop */
849c03c5b1cSMartin Matuska
850c03c5b1cSMartin Matuska /* finish bitStreams one by one */
851c03c5b1cSMartin Matuska HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
852c03c5b1cSMartin Matuska HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
853c03c5b1cSMartin Matuska HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
854c03c5b1cSMartin Matuska HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
855c03c5b1cSMartin Matuska
856c03c5b1cSMartin Matuska /* check */
857c03c5b1cSMartin Matuska { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
858c03c5b1cSMartin Matuska if (!endCheck) return ERROR(corruption_detected); }
859c03c5b1cSMartin Matuska
860c03c5b1cSMartin Matuska /* decoded size */
861c03c5b1cSMartin Matuska return dstSize;
862c03c5b1cSMartin Matuska }
863c03c5b1cSMartin Matuska }
864c03c5b1cSMartin Matuska
865c03c5b1cSMartin Matuska HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
HUF_DGEN(HUF_decompress4X2_usingDTable_internal)866c03c5b1cSMartin Matuska HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
867c03c5b1cSMartin Matuska
868c03c5b1cSMartin Matuska size_t HUF_decompress1X2_usingDTable(
869c03c5b1cSMartin Matuska void* dst, size_t dstSize,
870c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
871c03c5b1cSMartin Matuska const HUF_DTable* DTable)
872c03c5b1cSMartin Matuska {
873c03c5b1cSMartin Matuska DTableDesc dtd = HUF_getDTableDesc(DTable);
874c03c5b1cSMartin Matuska if (dtd.tableType != 1) return ERROR(GENERIC);
875c03c5b1cSMartin Matuska return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
876c03c5b1cSMartin Matuska }
877c03c5b1cSMartin Matuska
HUF_decompress1X2_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)878c03c5b1cSMartin Matuska size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
879c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
880c03c5b1cSMartin Matuska void* workSpace, size_t wkspSize)
881c03c5b1cSMartin Matuska {
882c03c5b1cSMartin Matuska const BYTE* ip = (const BYTE*) cSrc;
883c03c5b1cSMartin Matuska
884c03c5b1cSMartin Matuska size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
885c03c5b1cSMartin Matuska workSpace, wkspSize);
886c03c5b1cSMartin Matuska if (HUF_isError(hSize)) return hSize;
887c03c5b1cSMartin Matuska if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
888c03c5b1cSMartin Matuska ip += hSize; cSrcSize -= hSize;
889c03c5b1cSMartin Matuska
890c03c5b1cSMartin Matuska return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
891c03c5b1cSMartin Matuska }
892c03c5b1cSMartin Matuska
893c03c5b1cSMartin Matuska
HUF_decompress1X2_DCtx(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)894c03c5b1cSMartin Matuska size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
895c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize)
896c03c5b1cSMartin Matuska {
897c03c5b1cSMartin Matuska U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
898c03c5b1cSMartin Matuska return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
899c03c5b1cSMartin Matuska workSpace, sizeof(workSpace));
900c03c5b1cSMartin Matuska }
901c03c5b1cSMartin Matuska
HUF_decompress1X2(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)902c03c5b1cSMartin Matuska size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
903c03c5b1cSMartin Matuska {
904c03c5b1cSMartin Matuska HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
905c03c5b1cSMartin Matuska return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
906c03c5b1cSMartin Matuska }
907c03c5b1cSMartin Matuska
HUF_decompress4X2_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)908c03c5b1cSMartin Matuska size_t HUF_decompress4X2_usingDTable(
909c03c5b1cSMartin Matuska void* dst, size_t dstSize,
910c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
911c03c5b1cSMartin Matuska const HUF_DTable* DTable)
912c03c5b1cSMartin Matuska {
913c03c5b1cSMartin Matuska DTableDesc dtd = HUF_getDTableDesc(DTable);
914c03c5b1cSMartin Matuska if (dtd.tableType != 1) return ERROR(GENERIC);
915c03c5b1cSMartin Matuska return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
916c03c5b1cSMartin Matuska }
917c03c5b1cSMartin Matuska
HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)918c03c5b1cSMartin Matuska static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
919c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
920c03c5b1cSMartin Matuska void* workSpace, size_t wkspSize, int bmi2)
921c03c5b1cSMartin Matuska {
922c03c5b1cSMartin Matuska const BYTE* ip = (const BYTE*) cSrc;
923c03c5b1cSMartin Matuska
924c03c5b1cSMartin Matuska size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
925c03c5b1cSMartin Matuska workSpace, wkspSize);
926c03c5b1cSMartin Matuska if (HUF_isError(hSize)) return hSize;
927c03c5b1cSMartin Matuska if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
928c03c5b1cSMartin Matuska ip += hSize; cSrcSize -= hSize;
929c03c5b1cSMartin Matuska
930c03c5b1cSMartin Matuska return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
931c03c5b1cSMartin Matuska }
932c03c5b1cSMartin Matuska
HUF_decompress4X2_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)933c03c5b1cSMartin Matuska size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
934c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
935c03c5b1cSMartin Matuska void* workSpace, size_t wkspSize)
936c03c5b1cSMartin Matuska {
937c03c5b1cSMartin Matuska return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
938c03c5b1cSMartin Matuska }
939c03c5b1cSMartin Matuska
940c03c5b1cSMartin Matuska
HUF_decompress4X2_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)941c03c5b1cSMartin Matuska size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
942c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize)
943c03c5b1cSMartin Matuska {
944c03c5b1cSMartin Matuska U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
945c03c5b1cSMartin Matuska return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
946c03c5b1cSMartin Matuska workSpace, sizeof(workSpace));
947c03c5b1cSMartin Matuska }
948c03c5b1cSMartin Matuska
HUF_decompress4X2(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)949c03c5b1cSMartin Matuska size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
950c03c5b1cSMartin Matuska {
951c03c5b1cSMartin Matuska HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
952c03c5b1cSMartin Matuska return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
953c03c5b1cSMartin Matuska }
954c03c5b1cSMartin Matuska
955c03c5b1cSMartin Matuska #endif /* HUF_FORCE_DECOMPRESS_X1 */
956c03c5b1cSMartin Matuska
957c03c5b1cSMartin Matuska
958c03c5b1cSMartin Matuska /* ***********************************/
959c03c5b1cSMartin Matuska /* Universal decompression selectors */
960c03c5b1cSMartin Matuska /* ***********************************/
961c03c5b1cSMartin Matuska
HUF_decompress1X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)962c03c5b1cSMartin Matuska size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
963c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
964c03c5b1cSMartin Matuska const HUF_DTable* DTable)
965c03c5b1cSMartin Matuska {
966c03c5b1cSMartin Matuska DTableDesc const dtd = HUF_getDTableDesc(DTable);
967c03c5b1cSMartin Matuska #if defined(HUF_FORCE_DECOMPRESS_X1)
968c03c5b1cSMartin Matuska (void)dtd;
969c03c5b1cSMartin Matuska assert(dtd.tableType == 0);
970c03c5b1cSMartin Matuska return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
971c03c5b1cSMartin Matuska #elif defined(HUF_FORCE_DECOMPRESS_X2)
972c03c5b1cSMartin Matuska (void)dtd;
973c03c5b1cSMartin Matuska assert(dtd.tableType == 1);
974c03c5b1cSMartin Matuska return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
975c03c5b1cSMartin Matuska #else
976c03c5b1cSMartin Matuska return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
977c03c5b1cSMartin Matuska HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
978c03c5b1cSMartin Matuska #endif
979c03c5b1cSMartin Matuska }
980c03c5b1cSMartin Matuska
HUF_decompress4X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)981c03c5b1cSMartin Matuska size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
982c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
983c03c5b1cSMartin Matuska const HUF_DTable* DTable)
984c03c5b1cSMartin Matuska {
985c03c5b1cSMartin Matuska DTableDesc const dtd = HUF_getDTableDesc(DTable);
986c03c5b1cSMartin Matuska #if defined(HUF_FORCE_DECOMPRESS_X1)
987c03c5b1cSMartin Matuska (void)dtd;
988c03c5b1cSMartin Matuska assert(dtd.tableType == 0);
989c03c5b1cSMartin Matuska return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
990c03c5b1cSMartin Matuska #elif defined(HUF_FORCE_DECOMPRESS_X2)
991c03c5b1cSMartin Matuska (void)dtd;
992c03c5b1cSMartin Matuska assert(dtd.tableType == 1);
993c03c5b1cSMartin Matuska return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
994c03c5b1cSMartin Matuska #else
995c03c5b1cSMartin Matuska return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
996c03c5b1cSMartin Matuska HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
997c03c5b1cSMartin Matuska #endif
998c03c5b1cSMartin Matuska }
999c03c5b1cSMartin Matuska
1000c03c5b1cSMartin Matuska
1001c03c5b1cSMartin Matuska #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1002c03c5b1cSMartin Matuska typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
1003c03c5b1cSMartin Matuska static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
1004c03c5b1cSMartin Matuska {
1005c03c5b1cSMartin Matuska /* single, double, quad */
1006c03c5b1cSMartin Matuska {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
1007c03c5b1cSMartin Matuska {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
1008c03c5b1cSMartin Matuska {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
1009c03c5b1cSMartin Matuska {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
1010c03c5b1cSMartin Matuska {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
1011c03c5b1cSMartin Matuska {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
1012c03c5b1cSMartin Matuska {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
1013c03c5b1cSMartin Matuska {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
1014c03c5b1cSMartin Matuska {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
1015c03c5b1cSMartin Matuska {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
1016c03c5b1cSMartin Matuska {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
1017c03c5b1cSMartin Matuska {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
1018c03c5b1cSMartin Matuska {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
1019c03c5b1cSMartin Matuska {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
1020c03c5b1cSMartin Matuska {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
1021c03c5b1cSMartin Matuska {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
1022c03c5b1cSMartin Matuska };
1023c03c5b1cSMartin Matuska #endif
1024c03c5b1cSMartin Matuska
1025c03c5b1cSMartin Matuska /** HUF_selectDecoder() :
1026c03c5b1cSMartin Matuska * Tells which decoder is likely to decode faster,
1027c03c5b1cSMartin Matuska * based on a set of pre-computed metrics.
1028c03c5b1cSMartin Matuska * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1029c03c5b1cSMartin Matuska * Assumption : 0 < dstSize <= 128 KB */
HUF_selectDecoder(size_t dstSize,size_t cSrcSize)1030c03c5b1cSMartin Matuska U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1031c03c5b1cSMartin Matuska {
1032c03c5b1cSMartin Matuska assert(dstSize > 0);
1033c03c5b1cSMartin Matuska assert(dstSize <= 128*1024);
1034c03c5b1cSMartin Matuska #if defined(HUF_FORCE_DECOMPRESS_X1)
1035c03c5b1cSMartin Matuska (void)dstSize;
1036c03c5b1cSMartin Matuska (void)cSrcSize;
1037c03c5b1cSMartin Matuska return 0;
1038c03c5b1cSMartin Matuska #elif defined(HUF_FORCE_DECOMPRESS_X2)
1039c03c5b1cSMartin Matuska (void)dstSize;
1040c03c5b1cSMartin Matuska (void)cSrcSize;
1041c03c5b1cSMartin Matuska return 1;
1042c03c5b1cSMartin Matuska #else
1043c03c5b1cSMartin Matuska /* decoder timing evaluation */
1044c03c5b1cSMartin Matuska { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1045c03c5b1cSMartin Matuska U32 const D256 = (U32)(dstSize >> 8);
1046c03c5b1cSMartin Matuska U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1047c03c5b1cSMartin Matuska U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1048c03c5b1cSMartin Matuska DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1049c03c5b1cSMartin Matuska return DTime1 < DTime0;
1050c03c5b1cSMartin Matuska }
1051c03c5b1cSMartin Matuska #endif
1052c03c5b1cSMartin Matuska }
1053c03c5b1cSMartin Matuska
1054c03c5b1cSMartin Matuska
1055c03c5b1cSMartin Matuska typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1056c03c5b1cSMartin Matuska
HUF_decompress(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1057c03c5b1cSMartin Matuska size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1058c03c5b1cSMartin Matuska {
1059c03c5b1cSMartin Matuska #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1060c03c5b1cSMartin Matuska static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1061c03c5b1cSMartin Matuska #endif
1062c03c5b1cSMartin Matuska
1063c03c5b1cSMartin Matuska /* validation checks */
1064c03c5b1cSMartin Matuska if (dstSize == 0) return ERROR(dstSize_tooSmall);
1065c03c5b1cSMartin Matuska if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1066c03c5b1cSMartin Matuska if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1067c03c5b1cSMartin Matuska if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1068c03c5b1cSMartin Matuska
1069c03c5b1cSMartin Matuska { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1070c03c5b1cSMartin Matuska #if defined(HUF_FORCE_DECOMPRESS_X1)
1071c03c5b1cSMartin Matuska (void)algoNb;
1072c03c5b1cSMartin Matuska assert(algoNb == 0);
1073c03c5b1cSMartin Matuska return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1074c03c5b1cSMartin Matuska #elif defined(HUF_FORCE_DECOMPRESS_X2)
1075c03c5b1cSMartin Matuska (void)algoNb;
1076c03c5b1cSMartin Matuska assert(algoNb == 1);
1077c03c5b1cSMartin Matuska return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1078c03c5b1cSMartin Matuska #else
1079c03c5b1cSMartin Matuska return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1080c03c5b1cSMartin Matuska #endif
1081c03c5b1cSMartin Matuska }
1082c03c5b1cSMartin Matuska }
1083c03c5b1cSMartin Matuska
HUF_decompress4X_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1084c03c5b1cSMartin Matuska size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1085c03c5b1cSMartin Matuska {
1086c03c5b1cSMartin Matuska /* validation checks */
1087c03c5b1cSMartin Matuska if (dstSize == 0) return ERROR(dstSize_tooSmall);
1088c03c5b1cSMartin Matuska if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1089c03c5b1cSMartin Matuska if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1090c03c5b1cSMartin Matuska if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1091c03c5b1cSMartin Matuska
1092c03c5b1cSMartin Matuska { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1093c03c5b1cSMartin Matuska #if defined(HUF_FORCE_DECOMPRESS_X1)
1094c03c5b1cSMartin Matuska (void)algoNb;
1095c03c5b1cSMartin Matuska assert(algoNb == 0);
1096c03c5b1cSMartin Matuska return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1097c03c5b1cSMartin Matuska #elif defined(HUF_FORCE_DECOMPRESS_X2)
1098c03c5b1cSMartin Matuska (void)algoNb;
1099c03c5b1cSMartin Matuska assert(algoNb == 1);
1100c03c5b1cSMartin Matuska return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1101c03c5b1cSMartin Matuska #else
1102c03c5b1cSMartin Matuska return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1103c03c5b1cSMartin Matuska HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1104c03c5b1cSMartin Matuska #endif
1105c03c5b1cSMartin Matuska }
1106c03c5b1cSMartin Matuska }
1107c03c5b1cSMartin Matuska
HUF_decompress4X_hufOnly(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1108c03c5b1cSMartin Matuska size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1109c03c5b1cSMartin Matuska {
1110c03c5b1cSMartin Matuska U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1111c03c5b1cSMartin Matuska return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1112c03c5b1cSMartin Matuska workSpace, sizeof(workSpace));
1113c03c5b1cSMartin Matuska }
1114c03c5b1cSMartin Matuska
1115c03c5b1cSMartin Matuska
HUF_decompress4X_hufOnly_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1116c03c5b1cSMartin Matuska size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1117c03c5b1cSMartin Matuska size_t dstSize, const void* cSrc,
1118c03c5b1cSMartin Matuska size_t cSrcSize, void* workSpace,
1119c03c5b1cSMartin Matuska size_t wkspSize)
1120c03c5b1cSMartin Matuska {
1121c03c5b1cSMartin Matuska /* validation checks */
1122c03c5b1cSMartin Matuska if (dstSize == 0) return ERROR(dstSize_tooSmall);
1123c03c5b1cSMartin Matuska if (cSrcSize == 0) return ERROR(corruption_detected);
1124c03c5b1cSMartin Matuska
1125c03c5b1cSMartin Matuska { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1126c03c5b1cSMartin Matuska #if defined(HUF_FORCE_DECOMPRESS_X1)
1127c03c5b1cSMartin Matuska (void)algoNb;
1128c03c5b1cSMartin Matuska assert(algoNb == 0);
1129c03c5b1cSMartin Matuska return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1130c03c5b1cSMartin Matuska #elif defined(HUF_FORCE_DECOMPRESS_X2)
1131c03c5b1cSMartin Matuska (void)algoNb;
1132c03c5b1cSMartin Matuska assert(algoNb == 1);
1133c03c5b1cSMartin Matuska return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1134c03c5b1cSMartin Matuska #else
1135c03c5b1cSMartin Matuska return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1136c03c5b1cSMartin Matuska cSrcSize, workSpace, wkspSize):
1137c03c5b1cSMartin Matuska HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1138c03c5b1cSMartin Matuska #endif
1139c03c5b1cSMartin Matuska }
1140c03c5b1cSMartin Matuska }
1141c03c5b1cSMartin Matuska
HUF_decompress1X_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1142c03c5b1cSMartin Matuska size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1143c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize,
1144c03c5b1cSMartin Matuska void* workSpace, size_t wkspSize)
1145c03c5b1cSMartin Matuska {
1146c03c5b1cSMartin Matuska /* validation checks */
1147c03c5b1cSMartin Matuska if (dstSize == 0) return ERROR(dstSize_tooSmall);
1148c03c5b1cSMartin Matuska if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1149c03c5b1cSMartin Matuska if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1150c03c5b1cSMartin Matuska if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1151c03c5b1cSMartin Matuska
1152c03c5b1cSMartin Matuska { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1153c03c5b1cSMartin Matuska #if defined(HUF_FORCE_DECOMPRESS_X1)
1154c03c5b1cSMartin Matuska (void)algoNb;
1155c03c5b1cSMartin Matuska assert(algoNb == 0);
1156c03c5b1cSMartin Matuska return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1157c03c5b1cSMartin Matuska cSrcSize, workSpace, wkspSize);
1158c03c5b1cSMartin Matuska #elif defined(HUF_FORCE_DECOMPRESS_X2)
1159c03c5b1cSMartin Matuska (void)algoNb;
1160c03c5b1cSMartin Matuska assert(algoNb == 1);
1161c03c5b1cSMartin Matuska return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1162c03c5b1cSMartin Matuska cSrcSize, workSpace, wkspSize);
1163c03c5b1cSMartin Matuska #else
1164c03c5b1cSMartin Matuska return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1165c03c5b1cSMartin Matuska cSrcSize, workSpace, wkspSize):
1166c03c5b1cSMartin Matuska HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1167c03c5b1cSMartin Matuska cSrcSize, workSpace, wkspSize);
1168c03c5b1cSMartin Matuska #endif
1169c03c5b1cSMartin Matuska }
1170c03c5b1cSMartin Matuska }
1171c03c5b1cSMartin Matuska
HUF_decompress1X_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1172c03c5b1cSMartin Matuska size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1173c03c5b1cSMartin Matuska const void* cSrc, size_t cSrcSize)
1174c03c5b1cSMartin Matuska {
1175c03c5b1cSMartin Matuska U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1176c03c5b1cSMartin Matuska return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1177c03c5b1cSMartin Matuska workSpace, sizeof(workSpace));
1178c03c5b1cSMartin Matuska }
1179c03c5b1cSMartin Matuska
1180c03c5b1cSMartin Matuska
HUF_decompress1X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)1181c03c5b1cSMartin Matuska size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1182c03c5b1cSMartin Matuska {
1183c03c5b1cSMartin Matuska DTableDesc const dtd = HUF_getDTableDesc(DTable);
1184c03c5b1cSMartin Matuska #if defined(HUF_FORCE_DECOMPRESS_X1)
1185c03c5b1cSMartin Matuska (void)dtd;
1186c03c5b1cSMartin Matuska assert(dtd.tableType == 0);
1187c03c5b1cSMartin Matuska return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1188c03c5b1cSMartin Matuska #elif defined(HUF_FORCE_DECOMPRESS_X2)
1189c03c5b1cSMartin Matuska (void)dtd;
1190c03c5b1cSMartin Matuska assert(dtd.tableType == 1);
1191c03c5b1cSMartin Matuska return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1192c03c5b1cSMartin Matuska #else
1193c03c5b1cSMartin Matuska return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1194c03c5b1cSMartin Matuska HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1195c03c5b1cSMartin Matuska #endif
1196c03c5b1cSMartin Matuska }
1197c03c5b1cSMartin Matuska
1198c03c5b1cSMartin Matuska #ifndef HUF_FORCE_DECOMPRESS_X2
HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1199c03c5b1cSMartin Matuska size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1200c03c5b1cSMartin Matuska {
1201c03c5b1cSMartin Matuska const BYTE* ip = (const BYTE*) cSrc;
1202c03c5b1cSMartin Matuska
1203c03c5b1cSMartin Matuska size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1204c03c5b1cSMartin Matuska if (HUF_isError(hSize)) return hSize;
1205c03c5b1cSMartin Matuska if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1206c03c5b1cSMartin Matuska ip += hSize; cSrcSize -= hSize;
1207c03c5b1cSMartin Matuska
1208c03c5b1cSMartin Matuska return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1209c03c5b1cSMartin Matuska }
1210c03c5b1cSMartin Matuska #endif
1211c03c5b1cSMartin Matuska
HUF_decompress4X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)1212c03c5b1cSMartin Matuska size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1213c03c5b1cSMartin Matuska {
1214c03c5b1cSMartin Matuska DTableDesc const dtd = HUF_getDTableDesc(DTable);
1215c03c5b1cSMartin Matuska #if defined(HUF_FORCE_DECOMPRESS_X1)
1216c03c5b1cSMartin Matuska (void)dtd;
1217c03c5b1cSMartin Matuska assert(dtd.tableType == 0);
1218c03c5b1cSMartin Matuska return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1219c03c5b1cSMartin Matuska #elif defined(HUF_FORCE_DECOMPRESS_X2)
1220c03c5b1cSMartin Matuska (void)dtd;
1221c03c5b1cSMartin Matuska assert(dtd.tableType == 1);
1222c03c5b1cSMartin Matuska return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1223c03c5b1cSMartin Matuska #else
1224c03c5b1cSMartin Matuska return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1225c03c5b1cSMartin Matuska HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1226c03c5b1cSMartin Matuska #endif
1227c03c5b1cSMartin Matuska }
1228c03c5b1cSMartin Matuska
HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1229c03c5b1cSMartin Matuska size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1230c03c5b1cSMartin Matuska {
1231c03c5b1cSMartin Matuska /* validation checks */
1232c03c5b1cSMartin Matuska if (dstSize == 0) return ERROR(dstSize_tooSmall);
1233c03c5b1cSMartin Matuska if (cSrcSize == 0) return ERROR(corruption_detected);
1234c03c5b1cSMartin Matuska
1235c03c5b1cSMartin Matuska { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1236c03c5b1cSMartin Matuska #if defined(HUF_FORCE_DECOMPRESS_X1)
1237c03c5b1cSMartin Matuska (void)algoNb;
1238c03c5b1cSMartin Matuska assert(algoNb == 0);
1239c03c5b1cSMartin Matuska return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1240c03c5b1cSMartin Matuska #elif defined(HUF_FORCE_DECOMPRESS_X2)
1241c03c5b1cSMartin Matuska (void)algoNb;
1242c03c5b1cSMartin Matuska assert(algoNb == 1);
1243c03c5b1cSMartin Matuska return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1244c03c5b1cSMartin Matuska #else
1245c03c5b1cSMartin Matuska return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1246c03c5b1cSMartin Matuska HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1247c03c5b1cSMartin Matuska #endif
1248c03c5b1cSMartin Matuska }
1249c03c5b1cSMartin Matuska }
1250