1 /* ****************************************************************** 2 huff0 huffman decoder, 3 part of Finite State Entropy library 4 Copyright (C) 2013-present, Yann Collet. 5 6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 7 8 Redistribution and use in source and binary forms, with or without 9 modification, are permitted provided that the following conditions are 10 met: 11 12 * Redistributions of source code must retain the above copyright 13 notice, this list of conditions and the following disclaimer. 14 * Redistributions in binary form must reproduce the above 15 copyright notice, this list of conditions and the following disclaimer 16 in the documentation and/or other materials provided with the 17 distribution. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 You can contact the author at : 32 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 33 ****************************************************************** */ 34 35 /* ************************************************************** 36 * Dependencies 37 ****************************************************************/ 38 #include <string.h> /* memcpy, memset */ 39 #include "compiler.h" 40 #include "bitstream.h" /* BIT_* */ 41 #include "fse.h" /* to compress headers */ 42 #define HUF_STATIC_LINKING_ONLY 43 #include "huf.h" 44 #include "error_private.h" 45 46 /* ************************************************************** 47 * Macros 48 ****************************************************************/ 49 50 /* These two optional macros force the use one way or another of the two 51 * Huffman decompression implementations. You can't force in both directions 52 * at the same time. 53 */ 54 #if defined(HUF_FORCE_DECOMPRESS_X1) && \ 55 defined(HUF_FORCE_DECOMPRESS_X2) 56 #error "Cannot force the use of the X1 and X2 decoders at the same time!" 57 #endif 58 59 60 /* ************************************************************** 61 * Error Management 62 ****************************************************************/ 63 #define HUF_isError ERR_isError 64 #ifndef CHECK_F 65 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } 66 #endif 67 68 69 /* ************************************************************** 70 * Byte alignment for workSpace management 71 ****************************************************************/ 72 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) 73 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) 74 75 76 /* ************************************************************** 77 * BMI2 Variant Wrappers 78 ****************************************************************/ 79 #if DYNAMIC_BMI2 80 81 #define HUF_DGEN(fn) \ 82 \ 83 static size_t fn##_default( \ 84 void* dst, size_t dstSize, \ 85 const void* cSrc, size_t cSrcSize, \ 86 const HUF_DTable* DTable) \ 87 { \ 88 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 89 } \ 90 \ 91 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ 92 void* dst, size_t dstSize, \ 93 const void* cSrc, size_t cSrcSize, \ 94 const HUF_DTable* DTable) \ 95 { \ 96 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 97 } \ 98 \ 99 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 100 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 101 { \ 102 if (bmi2) { \ 103 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ 104 } \ 105 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ 106 } 107 108 #else 109 110 #define HUF_DGEN(fn) \ 111 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 112 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 113 { \ 114 (void)bmi2; \ 115 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 116 } 117 118 #endif 119 120 121 /*-***************************/ 122 /* generic DTableDesc */ 123 /*-***************************/ 124 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; 125 126 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) 127 { 128 DTableDesc dtd; 129 memcpy(&dtd, table, sizeof(dtd)); 130 return dtd; 131 } 132 133 134 #ifndef HUF_FORCE_DECOMPRESS_X2 135 136 /*-***************************/ 137 /* single-symbol decoding */ 138 /*-***************************/ 139 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ 140 141 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) 142 { 143 U32 tableLog = 0; 144 U32 nbSymbols = 0; 145 size_t iSize; 146 void* const dtPtr = DTable + 1; 147 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; 148 149 U32* rankVal; 150 BYTE* huffWeight; 151 size_t spaceUsed32 = 0; 152 153 rankVal = (U32 *)workSpace + spaceUsed32; 154 spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; 155 huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); 156 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; 157 158 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); 159 160 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); 161 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ 162 163 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); 164 if (HUF_isError(iSize)) return iSize; 165 166 /* Table header */ 167 { DTableDesc dtd = HUF_getDTableDesc(DTable); 168 if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ 169 dtd.tableType = 0; 170 dtd.tableLog = (BYTE)tableLog; 171 memcpy(DTable, &dtd, sizeof(dtd)); 172 } 173 174 /* Calculate starting value for each rank */ 175 { U32 n, nextRankStart = 0; 176 for (n=1; n<tableLog+1; n++) { 177 U32 const current = nextRankStart; 178 nextRankStart += (rankVal[n] << (n-1)); 179 rankVal[n] = current; 180 } } 181 182 /* fill DTable */ 183 { U32 n; 184 for (n=0; n<nbSymbols; n++) { 185 U32 const w = huffWeight[n]; 186 U32 const length = (1 << w) >> 1; 187 U32 u; 188 HUF_DEltX1 D; 189 D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); 190 for (u = rankVal[w]; u < rankVal[w] + length; u++) 191 dt[u] = D; 192 rankVal[w] += length; 193 } } 194 195 return iSize; 196 } 197 198 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) 199 { 200 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 201 return HUF_readDTableX1_wksp(DTable, src, srcSize, 202 workSpace, sizeof(workSpace)); 203 } 204 205 FORCE_INLINE_TEMPLATE BYTE 206 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) 207 { 208 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 209 BYTE const c = dt[val].byte; 210 BIT_skipBits(Dstream, dt[val].nbBits); 211 return c; 212 } 213 214 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ 215 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) 216 217 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ 218 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 219 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 220 221 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ 222 if (MEM_64bits()) \ 223 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 224 225 HINT_INLINE size_t 226 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) 227 { 228 BYTE* const pStart = p; 229 230 /* up to 4 symbols at a time */ 231 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { 232 HUF_DECODE_SYMBOLX1_2(p, bitDPtr); 233 HUF_DECODE_SYMBOLX1_1(p, bitDPtr); 234 HUF_DECODE_SYMBOLX1_2(p, bitDPtr); 235 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 236 } 237 238 /* [0-3] symbols remaining */ 239 if (MEM_32bits()) 240 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) 241 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 242 243 /* no more data to retrieve from bitstream, no need to reload */ 244 while (p < pEnd) 245 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 246 247 return pEnd-pStart; 248 } 249 250 FORCE_INLINE_TEMPLATE size_t 251 HUF_decompress1X1_usingDTable_internal_body( 252 void* dst, size_t dstSize, 253 const void* cSrc, size_t cSrcSize, 254 const HUF_DTable* DTable) 255 { 256 BYTE* op = (BYTE*)dst; 257 BYTE* const oend = op + dstSize; 258 const void* dtPtr = DTable + 1; 259 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; 260 BIT_DStream_t bitD; 261 DTableDesc const dtd = HUF_getDTableDesc(DTable); 262 U32 const dtLog = dtd.tableLog; 263 264 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 265 266 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); 267 268 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 269 270 return dstSize; 271 } 272 273 FORCE_INLINE_TEMPLATE size_t 274 HUF_decompress4X1_usingDTable_internal_body( 275 void* dst, size_t dstSize, 276 const void* cSrc, size_t cSrcSize, 277 const HUF_DTable* DTable) 278 { 279 /* Check */ 280 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 281 282 { const BYTE* const istart = (const BYTE*) cSrc; 283 BYTE* const ostart = (BYTE*) dst; 284 BYTE* const oend = ostart + dstSize; 285 const void* const dtPtr = DTable + 1; 286 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; 287 288 /* Init */ 289 BIT_DStream_t bitD1; 290 BIT_DStream_t bitD2; 291 BIT_DStream_t bitD3; 292 BIT_DStream_t bitD4; 293 size_t const length1 = MEM_readLE16(istart); 294 size_t const length2 = MEM_readLE16(istart+2); 295 size_t const length3 = MEM_readLE16(istart+4); 296 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 297 const BYTE* const istart1 = istart + 6; /* jumpTable */ 298 const BYTE* const istart2 = istart1 + length1; 299 const BYTE* const istart3 = istart2 + length2; 300 const BYTE* const istart4 = istart3 + length3; 301 const size_t segmentSize = (dstSize+3) / 4; 302 BYTE* const opStart2 = ostart + segmentSize; 303 BYTE* const opStart3 = opStart2 + segmentSize; 304 BYTE* const opStart4 = opStart3 + segmentSize; 305 BYTE* op1 = ostart; 306 BYTE* op2 = opStart2; 307 BYTE* op3 = opStart3; 308 BYTE* op4 = opStart4; 309 U32 endSignal = BIT_DStream_unfinished; 310 DTableDesc const dtd = HUF_getDTableDesc(DTable); 311 U32 const dtLog = dtd.tableLog; 312 313 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 314 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 315 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 316 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 317 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 318 319 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ 320 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 321 while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) { 322 HUF_DECODE_SYMBOLX1_2(op1, &bitD1); 323 HUF_DECODE_SYMBOLX1_2(op2, &bitD2); 324 HUF_DECODE_SYMBOLX1_2(op3, &bitD3); 325 HUF_DECODE_SYMBOLX1_2(op4, &bitD4); 326 HUF_DECODE_SYMBOLX1_1(op1, &bitD1); 327 HUF_DECODE_SYMBOLX1_1(op2, &bitD2); 328 HUF_DECODE_SYMBOLX1_1(op3, &bitD3); 329 HUF_DECODE_SYMBOLX1_1(op4, &bitD4); 330 HUF_DECODE_SYMBOLX1_2(op1, &bitD1); 331 HUF_DECODE_SYMBOLX1_2(op2, &bitD2); 332 HUF_DECODE_SYMBOLX1_2(op3, &bitD3); 333 HUF_DECODE_SYMBOLX1_2(op4, &bitD4); 334 HUF_DECODE_SYMBOLX1_0(op1, &bitD1); 335 HUF_DECODE_SYMBOLX1_0(op2, &bitD2); 336 HUF_DECODE_SYMBOLX1_0(op3, &bitD3); 337 HUF_DECODE_SYMBOLX1_0(op4, &bitD4); 338 BIT_reloadDStream(&bitD1); 339 BIT_reloadDStream(&bitD2); 340 BIT_reloadDStream(&bitD3); 341 BIT_reloadDStream(&bitD4); 342 } 343 344 /* check corruption */ 345 /* note : should not be necessary : op# advance in lock step, and we control op4. 346 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ 347 if (op1 > opStart2) return ERROR(corruption_detected); 348 if (op2 > opStart3) return ERROR(corruption_detected); 349 if (op3 > opStart4) return ERROR(corruption_detected); 350 /* note : op4 supposed already verified within main loop */ 351 352 /* finish bitStreams one by one */ 353 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); 354 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); 355 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); 356 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); 357 358 /* check */ 359 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 360 if (!endCheck) return ERROR(corruption_detected); } 361 362 /* decoded size */ 363 return dstSize; 364 } 365 } 366 367 368 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, 369 const void *cSrc, 370 size_t cSrcSize, 371 const HUF_DTable *DTable); 372 373 HUF_DGEN(HUF_decompress1X1_usingDTable_internal) 374 HUF_DGEN(HUF_decompress4X1_usingDTable_internal) 375 376 377 378 size_t HUF_decompress1X1_usingDTable( 379 void* dst, size_t dstSize, 380 const void* cSrc, size_t cSrcSize, 381 const HUF_DTable* DTable) 382 { 383 DTableDesc dtd = HUF_getDTableDesc(DTable); 384 if (dtd.tableType != 0) return ERROR(GENERIC); 385 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 386 } 387 388 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 389 const void* cSrc, size_t cSrcSize, 390 void* workSpace, size_t wkspSize) 391 { 392 const BYTE* ip = (const BYTE*) cSrc; 393 394 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); 395 if (HUF_isError(hSize)) return hSize; 396 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 397 ip += hSize; cSrcSize -= hSize; 398 399 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 400 } 401 402 403 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, 404 const void* cSrc, size_t cSrcSize) 405 { 406 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 407 return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, 408 workSpace, sizeof(workSpace)); 409 } 410 411 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 412 { 413 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); 414 return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); 415 } 416 417 size_t HUF_decompress4X1_usingDTable( 418 void* dst, size_t dstSize, 419 const void* cSrc, size_t cSrcSize, 420 const HUF_DTable* DTable) 421 { 422 DTableDesc dtd = HUF_getDTableDesc(DTable); 423 if (dtd.tableType != 0) return ERROR(GENERIC); 424 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 425 } 426 427 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 428 const void* cSrc, size_t cSrcSize, 429 void* workSpace, size_t wkspSize, int bmi2) 430 { 431 const BYTE* ip = (const BYTE*) cSrc; 432 433 size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, 434 workSpace, wkspSize); 435 if (HUF_isError(hSize)) return hSize; 436 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 437 ip += hSize; cSrcSize -= hSize; 438 439 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 440 } 441 442 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 443 const void* cSrc, size_t cSrcSize, 444 void* workSpace, size_t wkspSize) 445 { 446 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); 447 } 448 449 450 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 451 { 452 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 453 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 454 workSpace, sizeof(workSpace)); 455 } 456 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 457 { 458 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); 459 return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 460 } 461 462 #endif /* HUF_FORCE_DECOMPRESS_X2 */ 463 464 465 #ifndef HUF_FORCE_DECOMPRESS_X1 466 467 /* *************************/ 468 /* double-symbols decoding */ 469 /* *************************/ 470 471 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ 472 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; 473 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; 474 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; 475 476 477 /* HUF_fillDTableX2Level2() : 478 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ 479 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, 480 const U32* rankValOrigin, const int minWeight, 481 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, 482 U32 nbBitsBaseline, U16 baseSeq) 483 { 484 HUF_DEltX2 DElt; 485 U32 rankVal[HUF_TABLELOG_MAX + 1]; 486 487 /* get pre-calculated rankVal */ 488 memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 489 490 /* fill skipped values */ 491 if (minWeight>1) { 492 U32 i, skipSize = rankVal[minWeight]; 493 MEM_writeLE16(&(DElt.sequence), baseSeq); 494 DElt.nbBits = (BYTE)(consumed); 495 DElt.length = 1; 496 for (i = 0; i < skipSize; i++) 497 DTable[i] = DElt; 498 } 499 500 /* fill DTable */ 501 { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */ 502 const U32 symbol = sortedSymbols[s].symbol; 503 const U32 weight = sortedSymbols[s].weight; 504 const U32 nbBits = nbBitsBaseline - weight; 505 const U32 length = 1 << (sizeLog-nbBits); 506 const U32 start = rankVal[weight]; 507 U32 i = start; 508 const U32 end = start + length; 509 510 MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); 511 DElt.nbBits = (BYTE)(nbBits + consumed); 512 DElt.length = 2; 513 do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */ 514 515 rankVal[weight] += length; 516 } } 517 } 518 519 520 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, 521 const sortedSymbol_t* sortedList, const U32 sortedListSize, 522 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, 523 const U32 nbBitsBaseline) 524 { 525 U32 rankVal[HUF_TABLELOG_MAX + 1]; 526 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ 527 const U32 minBits = nbBitsBaseline - maxWeight; 528 U32 s; 529 530 memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 531 532 /* fill DTable */ 533 for (s=0; s<sortedListSize; s++) { 534 const U16 symbol = sortedList[s].symbol; 535 const U32 weight = sortedList[s].weight; 536 const U32 nbBits = nbBitsBaseline - weight; 537 const U32 start = rankVal[weight]; 538 const U32 length = 1 << (targetLog-nbBits); 539 540 if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */ 541 U32 sortedRank; 542 int minWeight = nbBits + scaleLog; 543 if (minWeight < 1) minWeight = 1; 544 sortedRank = rankStart[minWeight]; 545 HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, 546 rankValOrigin[nbBits], minWeight, 547 sortedList+sortedRank, sortedListSize-sortedRank, 548 nbBitsBaseline, symbol); 549 } else { 550 HUF_DEltX2 DElt; 551 MEM_writeLE16(&(DElt.sequence), symbol); 552 DElt.nbBits = (BYTE)(nbBits); 553 DElt.length = 1; 554 { U32 const end = start + length; 555 U32 u; 556 for (u = start; u < end; u++) DTable[u] = DElt; 557 } } 558 rankVal[weight] += length; 559 } 560 } 561 562 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, 563 const void* src, size_t srcSize, 564 void* workSpace, size_t wkspSize) 565 { 566 U32 tableLog, maxW, sizeOfSort, nbSymbols; 567 DTableDesc dtd = HUF_getDTableDesc(DTable); 568 U32 const maxTableLog = dtd.maxTableLog; 569 size_t iSize; 570 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ 571 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; 572 U32 *rankStart; 573 574 rankValCol_t* rankVal; 575 U32* rankStats; 576 U32* rankStart0; 577 sortedSymbol_t* sortedSymbol; 578 BYTE* weightList; 579 size_t spaceUsed32 = 0; 580 581 rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); 582 spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; 583 rankStats = (U32 *)workSpace + spaceUsed32; 584 spaceUsed32 += HUF_TABLELOG_MAX + 1; 585 rankStart0 = (U32 *)workSpace + spaceUsed32; 586 spaceUsed32 += HUF_TABLELOG_MAX + 2; 587 sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); 588 spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; 589 weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); 590 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; 591 592 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); 593 594 rankStart = rankStart0 + 1; 595 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); 596 597 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ 598 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 599 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ 600 601 iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); 602 if (HUF_isError(iSize)) return iSize; 603 604 /* check result */ 605 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ 606 607 /* find maxWeight */ 608 for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ 609 610 /* Get start index of each weight */ 611 { U32 w, nextRankStart = 0; 612 for (w=1; w<maxW+1; w++) { 613 U32 current = nextRankStart; 614 nextRankStart += rankStats[w]; 615 rankStart[w] = current; 616 } 617 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ 618 sizeOfSort = nextRankStart; 619 } 620 621 /* sort symbols by weight */ 622 { U32 s; 623 for (s=0; s<nbSymbols; s++) { 624 U32 const w = weightList[s]; 625 U32 const r = rankStart[w]++; 626 sortedSymbol[r].symbol = (BYTE)s; 627 sortedSymbol[r].weight = (BYTE)w; 628 } 629 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ 630 } 631 632 /* Build rankVal */ 633 { U32* const rankVal0 = rankVal[0]; 634 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ 635 U32 nextRankVal = 0; 636 U32 w; 637 for (w=1; w<maxW+1; w++) { 638 U32 current = nextRankVal; 639 nextRankVal += rankStats[w] << (w+rescale); 640 rankVal0[w] = current; 641 } } 642 { U32 const minBits = tableLog+1 - maxW; 643 U32 consumed; 644 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { 645 U32* const rankValPtr = rankVal[consumed]; 646 U32 w; 647 for (w = 1; w < maxW+1; w++) { 648 rankValPtr[w] = rankVal0[w] >> consumed; 649 } } } } 650 651 HUF_fillDTableX2(dt, maxTableLog, 652 sortedSymbol, sizeOfSort, 653 rankStart0, rankVal, maxW, 654 tableLog+1); 655 656 dtd.tableLog = (BYTE)maxTableLog; 657 dtd.tableType = 1; 658 memcpy(DTable, &dtd, sizeof(dtd)); 659 return iSize; 660 } 661 662 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) 663 { 664 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 665 return HUF_readDTableX2_wksp(DTable, src, srcSize, 666 workSpace, sizeof(workSpace)); 667 } 668 669 670 FORCE_INLINE_TEMPLATE U32 671 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) 672 { 673 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 674 memcpy(op, dt+val, 2); 675 BIT_skipBits(DStream, dt[val].nbBits); 676 return dt[val].length; 677 } 678 679 FORCE_INLINE_TEMPLATE U32 680 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) 681 { 682 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 683 memcpy(op, dt+val, 1); 684 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); 685 else { 686 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { 687 BIT_skipBits(DStream, dt[val].nbBits); 688 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) 689 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ 690 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); 691 } } 692 return 1; 693 } 694 695 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ 696 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 697 698 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 699 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 700 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 701 702 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 703 if (MEM_64bits()) \ 704 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 705 706 HINT_INLINE size_t 707 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, 708 const HUF_DEltX2* const dt, const U32 dtLog) 709 { 710 BYTE* const pStart = p; 711 712 /* up to 8 symbols at a time */ 713 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { 714 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 715 HUF_DECODE_SYMBOLX2_1(p, bitDPtr); 716 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 717 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 718 } 719 720 /* closer to end : up to 2 symbols at a time */ 721 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) 722 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 723 724 while (p <= pEnd-2) 725 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ 726 727 if (p < pEnd) 728 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); 729 730 return p-pStart; 731 } 732 733 FORCE_INLINE_TEMPLATE size_t 734 HUF_decompress1X2_usingDTable_internal_body( 735 void* dst, size_t dstSize, 736 const void* cSrc, size_t cSrcSize, 737 const HUF_DTable* DTable) 738 { 739 BIT_DStream_t bitD; 740 741 /* Init */ 742 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 743 744 /* decode */ 745 { BYTE* const ostart = (BYTE*) dst; 746 BYTE* const oend = ostart + dstSize; 747 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ 748 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 749 DTableDesc const dtd = HUF_getDTableDesc(DTable); 750 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); 751 } 752 753 /* check */ 754 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 755 756 /* decoded size */ 757 return dstSize; 758 } 759 760 761 FORCE_INLINE_TEMPLATE size_t 762 HUF_decompress4X2_usingDTable_internal_body( 763 void* dst, size_t dstSize, 764 const void* cSrc, size_t cSrcSize, 765 const HUF_DTable* DTable) 766 { 767 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 768 769 { const BYTE* const istart = (const BYTE*) cSrc; 770 BYTE* const ostart = (BYTE*) dst; 771 BYTE* const oend = ostart + dstSize; 772 const void* const dtPtr = DTable+1; 773 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 774 775 /* Init */ 776 BIT_DStream_t bitD1; 777 BIT_DStream_t bitD2; 778 BIT_DStream_t bitD3; 779 BIT_DStream_t bitD4; 780 size_t const length1 = MEM_readLE16(istart); 781 size_t const length2 = MEM_readLE16(istart+2); 782 size_t const length3 = MEM_readLE16(istart+4); 783 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 784 const BYTE* const istart1 = istart + 6; /* jumpTable */ 785 const BYTE* const istart2 = istart1 + length1; 786 const BYTE* const istart3 = istart2 + length2; 787 const BYTE* const istart4 = istart3 + length3; 788 size_t const segmentSize = (dstSize+3) / 4; 789 BYTE* const opStart2 = ostart + segmentSize; 790 BYTE* const opStart3 = opStart2 + segmentSize; 791 BYTE* const opStart4 = opStart3 + segmentSize; 792 BYTE* op1 = ostart; 793 BYTE* op2 = opStart2; 794 BYTE* op3 = opStart3; 795 BYTE* op4 = opStart4; 796 U32 endSignal; 797 DTableDesc const dtd = HUF_getDTableDesc(DTable); 798 U32 const dtLog = dtd.tableLog; 799 800 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 801 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 802 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 803 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 804 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 805 806 /* 16-32 symbols per loop (4-8 symbols per stream) */ 807 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 808 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { 809 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 810 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 811 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 812 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 813 HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 814 HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 815 HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 816 HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 817 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 818 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 819 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 820 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 821 HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 822 HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 823 HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 824 HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 825 826 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 827 } 828 829 /* check corruption */ 830 if (op1 > opStart2) return ERROR(corruption_detected); 831 if (op2 > opStart3) return ERROR(corruption_detected); 832 if (op3 > opStart4) return ERROR(corruption_detected); 833 /* note : op4 already verified within main loop */ 834 835 /* finish bitStreams one by one */ 836 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); 837 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); 838 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); 839 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); 840 841 /* check */ 842 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 843 if (!endCheck) return ERROR(corruption_detected); } 844 845 /* decoded size */ 846 return dstSize; 847 } 848 } 849 850 HUF_DGEN(HUF_decompress1X2_usingDTable_internal) 851 HUF_DGEN(HUF_decompress4X2_usingDTable_internal) 852 853 size_t HUF_decompress1X2_usingDTable( 854 void* dst, size_t dstSize, 855 const void* cSrc, size_t cSrcSize, 856 const HUF_DTable* DTable) 857 { 858 DTableDesc dtd = HUF_getDTableDesc(DTable); 859 if (dtd.tableType != 1) return ERROR(GENERIC); 860 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 861 } 862 863 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 864 const void* cSrc, size_t cSrcSize, 865 void* workSpace, size_t wkspSize) 866 { 867 const BYTE* ip = (const BYTE*) cSrc; 868 869 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, 870 workSpace, wkspSize); 871 if (HUF_isError(hSize)) return hSize; 872 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 873 ip += hSize; cSrcSize -= hSize; 874 875 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 876 } 877 878 879 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, 880 const void* cSrc, size_t cSrcSize) 881 { 882 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 883 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, 884 workSpace, sizeof(workSpace)); 885 } 886 887 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 888 { 889 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); 890 return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 891 } 892 893 size_t HUF_decompress4X2_usingDTable( 894 void* dst, size_t dstSize, 895 const void* cSrc, size_t cSrcSize, 896 const HUF_DTable* DTable) 897 { 898 DTableDesc dtd = HUF_getDTableDesc(DTable); 899 if (dtd.tableType != 1) return ERROR(GENERIC); 900 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 901 } 902 903 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 904 const void* cSrc, size_t cSrcSize, 905 void* workSpace, size_t wkspSize, int bmi2) 906 { 907 const BYTE* ip = (const BYTE*) cSrc; 908 909 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, 910 workSpace, wkspSize); 911 if (HUF_isError(hSize)) return hSize; 912 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 913 ip += hSize; cSrcSize -= hSize; 914 915 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 916 } 917 918 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 919 const void* cSrc, size_t cSrcSize, 920 void* workSpace, size_t wkspSize) 921 { 922 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); 923 } 924 925 926 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, 927 const void* cSrc, size_t cSrcSize) 928 { 929 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 930 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 931 workSpace, sizeof(workSpace)); 932 } 933 934 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 935 { 936 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); 937 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 938 } 939 940 #endif /* HUF_FORCE_DECOMPRESS_X1 */ 941 942 943 /* ***********************************/ 944 /* Universal decompression selectors */ 945 /* ***********************************/ 946 947 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, 948 const void* cSrc, size_t cSrcSize, 949 const HUF_DTable* DTable) 950 { 951 DTableDesc const dtd = HUF_getDTableDesc(DTable); 952 #if defined(HUF_FORCE_DECOMPRESS_X1) 953 (void)dtd; 954 assert(dtd.tableType == 0); 955 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 956 #elif defined(HUF_FORCE_DECOMPRESS_X2) 957 (void)dtd; 958 assert(dtd.tableType == 1); 959 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 960 #else 961 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 962 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 963 #endif 964 } 965 966 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, 967 const void* cSrc, size_t cSrcSize, 968 const HUF_DTable* DTable) 969 { 970 DTableDesc const dtd = HUF_getDTableDesc(DTable); 971 #if defined(HUF_FORCE_DECOMPRESS_X1) 972 (void)dtd; 973 assert(dtd.tableType == 0); 974 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 975 #elif defined(HUF_FORCE_DECOMPRESS_X2) 976 (void)dtd; 977 assert(dtd.tableType == 1); 978 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 979 #else 980 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 981 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 982 #endif 983 } 984 985 986 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) 987 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; 988 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = 989 { 990 /* single, double, quad */ 991 {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ 992 {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ 993 {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ 994 {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ 995 {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ 996 {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ 997 {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ 998 {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ 999 {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ 1000 {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ 1001 {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ 1002 {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ 1003 {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ 1004 {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ 1005 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ 1006 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ 1007 }; 1008 #endif 1009 1010 /** HUF_selectDecoder() : 1011 * Tells which decoder is likely to decode faster, 1012 * based on a set of pre-computed metrics. 1013 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . 1014 * Assumption : 0 < dstSize <= 128 KB */ 1015 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) 1016 { 1017 assert(dstSize > 0); 1018 assert(dstSize <= 128*1024); 1019 #if defined(HUF_FORCE_DECOMPRESS_X1) 1020 (void)dstSize; 1021 (void)cSrcSize; 1022 return 0; 1023 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1024 (void)dstSize; 1025 (void)cSrcSize; 1026 return 1; 1027 #else 1028 /* decoder timing evaluation */ 1029 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ 1030 U32 const D256 = (U32)(dstSize >> 8); 1031 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); 1032 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); 1033 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ 1034 return DTime1 < DTime0; 1035 } 1036 #endif 1037 } 1038 1039 1040 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); 1041 1042 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1043 { 1044 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) 1045 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; 1046 #endif 1047 1048 /* validation checks */ 1049 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1050 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1051 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1052 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1053 1054 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1055 #if defined(HUF_FORCE_DECOMPRESS_X1) 1056 (void)algoNb; 1057 assert(algoNb == 0); 1058 return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); 1059 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1060 (void)algoNb; 1061 assert(algoNb == 1); 1062 return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); 1063 #else 1064 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); 1065 #endif 1066 } 1067 } 1068 1069 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1070 { 1071 /* validation checks */ 1072 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1073 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1074 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1075 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1076 1077 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1078 #if defined(HUF_FORCE_DECOMPRESS_X1) 1079 (void)algoNb; 1080 assert(algoNb == 0); 1081 return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); 1082 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1083 (void)algoNb; 1084 assert(algoNb == 1); 1085 return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); 1086 #else 1087 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : 1088 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; 1089 #endif 1090 } 1091 } 1092 1093 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1094 { 1095 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1096 return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1097 workSpace, sizeof(workSpace)); 1098 } 1099 1100 1101 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, 1102 size_t dstSize, const void* cSrc, 1103 size_t cSrcSize, void* workSpace, 1104 size_t wkspSize) 1105 { 1106 /* validation checks */ 1107 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1108 if (cSrcSize == 0) return ERROR(corruption_detected); 1109 1110 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1111 #if defined(HUF_FORCE_DECOMPRESS_X1) 1112 (void)algoNb; 1113 assert(algoNb == 0); 1114 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1115 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1116 (void)algoNb; 1117 assert(algoNb == 1); 1118 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1119 #else 1120 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1121 cSrcSize, workSpace, wkspSize): 1122 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1123 #endif 1124 } 1125 } 1126 1127 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 1128 const void* cSrc, size_t cSrcSize, 1129 void* workSpace, size_t wkspSize) 1130 { 1131 /* validation checks */ 1132 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1133 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1134 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1135 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1136 1137 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1138 #if defined(HUF_FORCE_DECOMPRESS_X1) 1139 (void)algoNb; 1140 assert(algoNb == 0); 1141 return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, 1142 cSrcSize, workSpace, wkspSize); 1143 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1144 (void)algoNb; 1145 assert(algoNb == 1); 1146 return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1147 cSrcSize, workSpace, wkspSize); 1148 #else 1149 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1150 cSrcSize, workSpace, wkspSize): 1151 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, 1152 cSrcSize, workSpace, wkspSize); 1153 #endif 1154 } 1155 } 1156 1157 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, 1158 const void* cSrc, size_t cSrcSize) 1159 { 1160 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1161 return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1162 workSpace, sizeof(workSpace)); 1163 } 1164 1165 1166 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1167 { 1168 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1169 #if defined(HUF_FORCE_DECOMPRESS_X1) 1170 (void)dtd; 1171 assert(dtd.tableType == 0); 1172 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1173 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1174 (void)dtd; 1175 assert(dtd.tableType == 1); 1176 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1177 #else 1178 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1179 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1180 #endif 1181 } 1182 1183 #ifndef HUF_FORCE_DECOMPRESS_X2 1184 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1185 { 1186 const BYTE* ip = (const BYTE*) cSrc; 1187 1188 size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); 1189 if (HUF_isError(hSize)) return hSize; 1190 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1191 ip += hSize; cSrcSize -= hSize; 1192 1193 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 1194 } 1195 #endif 1196 1197 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1198 { 1199 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1200 #if defined(HUF_FORCE_DECOMPRESS_X1) 1201 (void)dtd; 1202 assert(dtd.tableType == 0); 1203 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1204 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1205 (void)dtd; 1206 assert(dtd.tableType == 1); 1207 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1208 #else 1209 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1210 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1211 #endif 1212 } 1213 1214 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1215 { 1216 /* validation checks */ 1217 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1218 if (cSrcSize == 0) return ERROR(corruption_detected); 1219 1220 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1221 #if defined(HUF_FORCE_DECOMPRESS_X1) 1222 (void)algoNb; 1223 assert(algoNb == 0); 1224 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1225 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1226 (void)algoNb; 1227 assert(algoNb == 1); 1228 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1229 #else 1230 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : 1231 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1232 #endif 1233 } 1234 } 1235