1 /* ****************************************************************** 2 Huffman decoder, part of New Generation Entropy library 3 Copyright (C) 2013-2016, Yann Collet. 4 5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are 9 met: 10 11 * Redistributions of source code must retain the above copyright 12 notice, this list of conditions and the following disclaimer. 13 * Redistributions in binary form must reproduce the above 14 copyright notice, this list of conditions and the following disclaimer 15 in the documentation and/or other materials provided with the 16 distribution. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 You can contact the author at : 31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 32 - Public forum : https://groups.google.com/forum/#!forum/lz4c 33 ****************************************************************** */ 34 35 /* ************************************************************** 36 * Dependencies 37 ****************************************************************/ 38 #include <string.h> /* memcpy, memset */ 39 #include "bitstream.h" /* BIT_* */ 40 #include "compiler.h" 41 #include "fse.h" /* header compression */ 42 #define HUF_STATIC_LINKING_ONLY 43 #include "huf.h" 44 #include "error_private.h" 45 46 47 /* ************************************************************** 48 * Error Management 49 ****************************************************************/ 50 #define HUF_isError ERR_isError 51 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ 52 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } 53 54 55 /* ************************************************************** 56 * Byte alignment for workSpace management 57 ****************************************************************/ 58 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) 59 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) 60 61 62 /*-***************************/ 63 /* generic DTableDesc */ 64 /*-***************************/ 65 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; 66 67 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) 68 { 69 DTableDesc dtd; 70 memcpy(&dtd, table, sizeof(dtd)); 71 return dtd; 72 } 73 74 75 /*-***************************/ 76 /* single-symbol decoding */ 77 /*-***************************/ 78 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ 79 80 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) 81 { 82 U32 tableLog = 0; 83 U32 nbSymbols = 0; 84 size_t iSize; 85 void* const dtPtr = DTable + 1; 86 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; 87 88 U32* rankVal; 89 BYTE* huffWeight; 90 size_t spaceUsed32 = 0; 91 92 rankVal = (U32 *)workSpace + spaceUsed32; 93 spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; 94 huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); 95 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; 96 97 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); 98 99 HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); 100 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ 101 102 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); 103 if (HUF_isError(iSize)) return iSize; 104 105 /* Table header */ 106 { DTableDesc dtd = HUF_getDTableDesc(DTable); 107 if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ 108 dtd.tableType = 0; 109 dtd.tableLog = (BYTE)tableLog; 110 memcpy(DTable, &dtd, sizeof(dtd)); 111 } 112 113 /* Calculate starting value for each rank */ 114 { U32 n, nextRankStart = 0; 115 for (n=1; n<tableLog+1; n++) { 116 U32 const current = nextRankStart; 117 nextRankStart += (rankVal[n] << (n-1)); 118 rankVal[n] = current; 119 } } 120 121 /* fill DTable */ 122 { U32 n; 123 for (n=0; n<nbSymbols; n++) { 124 U32 const w = huffWeight[n]; 125 U32 const length = (1 << w) >> 1; 126 U32 u; 127 HUF_DEltX2 D; 128 D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); 129 for (u = rankVal[w]; u < rankVal[w] + length; u++) 130 dt[u] = D; 131 rankVal[w] += length; 132 } } 133 134 return iSize; 135 } 136 137 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) 138 { 139 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 140 return HUF_readDTableX2_wksp(DTable, src, srcSize, 141 workSpace, sizeof(workSpace)); 142 } 143 144 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */ 145 146 FORCE_INLINE_TEMPLATE BYTE 147 HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) 148 { 149 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 150 BYTE const c = dt[val].byte; 151 BIT_skipBits(Dstream, dt[val].nbBits); 152 return c; 153 } 154 155 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ 156 *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) 157 158 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 159 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 160 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) 161 162 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 163 if (MEM_64bits()) \ 164 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) 165 166 HINT_INLINE size_t 167 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) 168 { 169 BYTE* const pStart = p; 170 171 /* up to 4 symbols at a time */ 172 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { 173 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 174 HUF_DECODE_SYMBOLX2_1(p, bitDPtr); 175 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 176 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 177 } 178 179 /* [0-3] symbols remaining */ 180 if (MEM_32bits()) 181 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) 182 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 183 184 /* no more data to retrieve from bitstream, no need to reload */ 185 while (p < pEnd) 186 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 187 188 return pEnd-pStart; 189 } 190 191 FORCE_INLINE_TEMPLATE size_t 192 HUF_decompress1X2_usingDTable_internal_body( 193 void* dst, size_t dstSize, 194 const void* cSrc, size_t cSrcSize, 195 const HUF_DTable* DTable) 196 { 197 BYTE* op = (BYTE*)dst; 198 BYTE* const oend = op + dstSize; 199 const void* dtPtr = DTable + 1; 200 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 201 BIT_DStream_t bitD; 202 DTableDesc const dtd = HUF_getDTableDesc(DTable); 203 U32 const dtLog = dtd.tableLog; 204 205 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 206 207 HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); 208 209 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 210 211 return dstSize; 212 } 213 214 FORCE_INLINE_TEMPLATE size_t 215 HUF_decompress4X2_usingDTable_internal_body( 216 void* dst, size_t dstSize, 217 const void* cSrc, size_t cSrcSize, 218 const HUF_DTable* DTable) 219 { 220 /* Check */ 221 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 222 223 { const BYTE* const istart = (const BYTE*) cSrc; 224 BYTE* const ostart = (BYTE*) dst; 225 BYTE* const oend = ostart + dstSize; 226 const void* const dtPtr = DTable + 1; 227 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 228 229 /* Init */ 230 BIT_DStream_t bitD1; 231 BIT_DStream_t bitD2; 232 BIT_DStream_t bitD3; 233 BIT_DStream_t bitD4; 234 size_t const length1 = MEM_readLE16(istart); 235 size_t const length2 = MEM_readLE16(istart+2); 236 size_t const length3 = MEM_readLE16(istart+4); 237 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 238 const BYTE* const istart1 = istart + 6; /* jumpTable */ 239 const BYTE* const istart2 = istart1 + length1; 240 const BYTE* const istart3 = istart2 + length2; 241 const BYTE* const istart4 = istart3 + length3; 242 const size_t segmentSize = (dstSize+3) / 4; 243 BYTE* const opStart2 = ostart + segmentSize; 244 BYTE* const opStart3 = opStart2 + segmentSize; 245 BYTE* const opStart4 = opStart3 + segmentSize; 246 BYTE* op1 = ostart; 247 BYTE* op2 = opStart2; 248 BYTE* op3 = opStart3; 249 BYTE* op4 = opStart4; 250 U32 endSignal = BIT_DStream_unfinished; 251 DTableDesc const dtd = HUF_getDTableDesc(DTable); 252 U32 const dtLog = dtd.tableLog; 253 254 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 255 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 256 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 257 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 258 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 259 260 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ 261 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 262 while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) { 263 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 264 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 265 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 266 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 267 HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 268 HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 269 HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 270 HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 271 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 272 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 273 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 274 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 275 HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 276 HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 277 HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 278 HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 279 BIT_reloadDStream(&bitD1); 280 BIT_reloadDStream(&bitD2); 281 BIT_reloadDStream(&bitD3); 282 BIT_reloadDStream(&bitD4); 283 } 284 285 /* check corruption */ 286 /* note : should not be necessary : op# advance in lock step, and we control op4. 287 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ 288 if (op1 > opStart2) return ERROR(corruption_detected); 289 if (op2 > opStart3) return ERROR(corruption_detected); 290 if (op3 > opStart4) return ERROR(corruption_detected); 291 /* note : op4 supposed already verified within main loop */ 292 293 /* finish bitStreams one by one */ 294 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); 295 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); 296 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); 297 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); 298 299 /* check */ 300 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 301 if (!endCheck) return ERROR(corruption_detected); } 302 303 /* decoded size */ 304 return dstSize; 305 } 306 } 307 308 309 FORCE_INLINE_TEMPLATE U32 310 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) 311 { 312 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 313 memcpy(op, dt+val, 2); 314 BIT_skipBits(DStream, dt[val].nbBits); 315 return dt[val].length; 316 } 317 318 FORCE_INLINE_TEMPLATE U32 319 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) 320 { 321 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 322 memcpy(op, dt+val, 1); 323 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); 324 else { 325 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { 326 BIT_skipBits(DStream, dt[val].nbBits); 327 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) 328 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ 329 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); 330 } } 331 return 1; 332 } 333 334 #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ 335 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 336 337 #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ 338 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 339 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 340 341 #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ 342 if (MEM_64bits()) \ 343 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 344 345 HINT_INLINE size_t 346 HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, 347 const HUF_DEltX4* const dt, const U32 dtLog) 348 { 349 BYTE* const pStart = p; 350 351 /* up to 8 symbols at a time */ 352 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { 353 HUF_DECODE_SYMBOLX4_2(p, bitDPtr); 354 HUF_DECODE_SYMBOLX4_1(p, bitDPtr); 355 HUF_DECODE_SYMBOLX4_2(p, bitDPtr); 356 HUF_DECODE_SYMBOLX4_0(p, bitDPtr); 357 } 358 359 /* closer to end : up to 2 symbols at a time */ 360 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) 361 HUF_DECODE_SYMBOLX4_0(p, bitDPtr); 362 363 while (p <= pEnd-2) 364 HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ 365 366 if (p < pEnd) 367 p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); 368 369 return p-pStart; 370 } 371 372 FORCE_INLINE_TEMPLATE size_t 373 HUF_decompress1X4_usingDTable_internal_body( 374 void* dst, size_t dstSize, 375 const void* cSrc, size_t cSrcSize, 376 const HUF_DTable* DTable) 377 { 378 BIT_DStream_t bitD; 379 380 /* Init */ 381 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 382 383 /* decode */ 384 { BYTE* const ostart = (BYTE*) dst; 385 BYTE* const oend = ostart + dstSize; 386 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ 387 const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; 388 DTableDesc const dtd = HUF_getDTableDesc(DTable); 389 HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog); 390 } 391 392 /* check */ 393 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 394 395 /* decoded size */ 396 return dstSize; 397 } 398 399 400 FORCE_INLINE_TEMPLATE size_t 401 HUF_decompress4X4_usingDTable_internal_body( 402 void* dst, size_t dstSize, 403 const void* cSrc, size_t cSrcSize, 404 const HUF_DTable* DTable) 405 { 406 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 407 408 { const BYTE* const istart = (const BYTE*) cSrc; 409 BYTE* const ostart = (BYTE*) dst; 410 BYTE* const oend = ostart + dstSize; 411 const void* const dtPtr = DTable+1; 412 const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; 413 414 /* Init */ 415 BIT_DStream_t bitD1; 416 BIT_DStream_t bitD2; 417 BIT_DStream_t bitD3; 418 BIT_DStream_t bitD4; 419 size_t const length1 = MEM_readLE16(istart); 420 size_t const length2 = MEM_readLE16(istart+2); 421 size_t const length3 = MEM_readLE16(istart+4); 422 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 423 const BYTE* const istart1 = istart + 6; /* jumpTable */ 424 const BYTE* const istart2 = istart1 + length1; 425 const BYTE* const istart3 = istart2 + length2; 426 const BYTE* const istart4 = istart3 + length3; 427 size_t const segmentSize = (dstSize+3) / 4; 428 BYTE* const opStart2 = ostart + segmentSize; 429 BYTE* const opStart3 = opStart2 + segmentSize; 430 BYTE* const opStart4 = opStart3 + segmentSize; 431 BYTE* op1 = ostart; 432 BYTE* op2 = opStart2; 433 BYTE* op3 = opStart3; 434 BYTE* op4 = opStart4; 435 U32 endSignal; 436 DTableDesc const dtd = HUF_getDTableDesc(DTable); 437 U32 const dtLog = dtd.tableLog; 438 439 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 440 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 441 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 442 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 443 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 444 445 /* 16-32 symbols per loop (4-8 symbols per stream) */ 446 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 447 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { 448 HUF_DECODE_SYMBOLX4_2(op1, &bitD1); 449 HUF_DECODE_SYMBOLX4_2(op2, &bitD2); 450 HUF_DECODE_SYMBOLX4_2(op3, &bitD3); 451 HUF_DECODE_SYMBOLX4_2(op4, &bitD4); 452 HUF_DECODE_SYMBOLX4_1(op1, &bitD1); 453 HUF_DECODE_SYMBOLX4_1(op2, &bitD2); 454 HUF_DECODE_SYMBOLX4_1(op3, &bitD3); 455 HUF_DECODE_SYMBOLX4_1(op4, &bitD4); 456 HUF_DECODE_SYMBOLX4_2(op1, &bitD1); 457 HUF_DECODE_SYMBOLX4_2(op2, &bitD2); 458 HUF_DECODE_SYMBOLX4_2(op3, &bitD3); 459 HUF_DECODE_SYMBOLX4_2(op4, &bitD4); 460 HUF_DECODE_SYMBOLX4_0(op1, &bitD1); 461 HUF_DECODE_SYMBOLX4_0(op2, &bitD2); 462 HUF_DECODE_SYMBOLX4_0(op3, &bitD3); 463 HUF_DECODE_SYMBOLX4_0(op4, &bitD4); 464 465 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 466 } 467 468 /* check corruption */ 469 if (op1 > opStart2) return ERROR(corruption_detected); 470 if (op2 > opStart3) return ERROR(corruption_detected); 471 if (op3 > opStart4) return ERROR(corruption_detected); 472 /* note : op4 already verified within main loop */ 473 474 /* finish bitStreams one by one */ 475 HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); 476 HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); 477 HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); 478 HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); 479 480 /* check */ 481 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 482 if (!endCheck) return ERROR(corruption_detected); } 483 484 /* decoded size */ 485 return dstSize; 486 } 487 } 488 489 490 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, 491 const void *cSrc, 492 size_t cSrcSize, 493 const HUF_DTable *DTable); 494 #if DYNAMIC_BMI2 495 496 #define X(fn) \ 497 \ 498 static size_t fn##_default( \ 499 void* dst, size_t dstSize, \ 500 const void* cSrc, size_t cSrcSize, \ 501 const HUF_DTable* DTable) \ 502 { \ 503 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 504 } \ 505 \ 506 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ 507 void* dst, size_t dstSize, \ 508 const void* cSrc, size_t cSrcSize, \ 509 const HUF_DTable* DTable) \ 510 { \ 511 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 512 } \ 513 \ 514 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 515 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 516 { \ 517 if (bmi2) { \ 518 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ 519 } \ 520 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ 521 } 522 523 #else 524 525 #define X(fn) \ 526 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 527 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 528 { \ 529 (void)bmi2; \ 530 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 531 } 532 533 #endif 534 535 X(HUF_decompress1X2_usingDTable_internal) 536 X(HUF_decompress4X2_usingDTable_internal) 537 X(HUF_decompress1X4_usingDTable_internal) 538 X(HUF_decompress4X4_usingDTable_internal) 539 540 #undef X 541 542 543 size_t HUF_decompress1X2_usingDTable( 544 void* dst, size_t dstSize, 545 const void* cSrc, size_t cSrcSize, 546 const HUF_DTable* DTable) 547 { 548 DTableDesc dtd = HUF_getDTableDesc(DTable); 549 if (dtd.tableType != 0) return ERROR(GENERIC); 550 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 551 } 552 553 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 554 const void* cSrc, size_t cSrcSize, 555 void* workSpace, size_t wkspSize) 556 { 557 const BYTE* ip = (const BYTE*) cSrc; 558 559 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); 560 if (HUF_isError(hSize)) return hSize; 561 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 562 ip += hSize; cSrcSize -= hSize; 563 564 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 565 } 566 567 568 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, 569 const void* cSrc, size_t cSrcSize) 570 { 571 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 572 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, 573 workSpace, sizeof(workSpace)); 574 } 575 576 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 577 { 578 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); 579 return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); 580 } 581 582 size_t HUF_decompress4X2_usingDTable( 583 void* dst, size_t dstSize, 584 const void* cSrc, size_t cSrcSize, 585 const HUF_DTable* DTable) 586 { 587 DTableDesc dtd = HUF_getDTableDesc(DTable); 588 if (dtd.tableType != 0) return ERROR(GENERIC); 589 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 590 } 591 592 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 593 const void* cSrc, size_t cSrcSize, 594 void* workSpace, size_t wkspSize, int bmi2) 595 { 596 const BYTE* ip = (const BYTE*) cSrc; 597 598 size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize, 599 workSpace, wkspSize); 600 if (HUF_isError(hSize)) return hSize; 601 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 602 ip += hSize; cSrcSize -= hSize; 603 604 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 605 } 606 607 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 608 const void* cSrc, size_t cSrcSize, 609 void* workSpace, size_t wkspSize) 610 { 611 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); 612 } 613 614 615 size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 616 { 617 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 618 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 619 workSpace, sizeof(workSpace)); 620 } 621 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 622 { 623 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); 624 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 625 } 626 627 628 /* *************************/ 629 /* double-symbols decoding */ 630 /* *************************/ 631 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; 632 633 /* HUF_fillDTableX4Level2() : 634 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ 635 static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, 636 const U32* rankValOrigin, const int minWeight, 637 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, 638 U32 nbBitsBaseline, U16 baseSeq) 639 { 640 HUF_DEltX4 DElt; 641 U32 rankVal[HUF_TABLELOG_MAX + 1]; 642 643 /* get pre-calculated rankVal */ 644 memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 645 646 /* fill skipped values */ 647 if (minWeight>1) { 648 U32 i, skipSize = rankVal[minWeight]; 649 MEM_writeLE16(&(DElt.sequence), baseSeq); 650 DElt.nbBits = (BYTE)(consumed); 651 DElt.length = 1; 652 for (i = 0; i < skipSize; i++) 653 DTable[i] = DElt; 654 } 655 656 /* fill DTable */ 657 { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */ 658 const U32 symbol = sortedSymbols[s].symbol; 659 const U32 weight = sortedSymbols[s].weight; 660 const U32 nbBits = nbBitsBaseline - weight; 661 const U32 length = 1 << (sizeLog-nbBits); 662 const U32 start = rankVal[weight]; 663 U32 i = start; 664 const U32 end = start + length; 665 666 MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); 667 DElt.nbBits = (BYTE)(nbBits + consumed); 668 DElt.length = 2; 669 do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */ 670 671 rankVal[weight] += length; 672 } } 673 } 674 675 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; 676 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; 677 678 static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, 679 const sortedSymbol_t* sortedList, const U32 sortedListSize, 680 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, 681 const U32 nbBitsBaseline) 682 { 683 U32 rankVal[HUF_TABLELOG_MAX + 1]; 684 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ 685 const U32 minBits = nbBitsBaseline - maxWeight; 686 U32 s; 687 688 memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 689 690 /* fill DTable */ 691 for (s=0; s<sortedListSize; s++) { 692 const U16 symbol = sortedList[s].symbol; 693 const U32 weight = sortedList[s].weight; 694 const U32 nbBits = nbBitsBaseline - weight; 695 const U32 start = rankVal[weight]; 696 const U32 length = 1 << (targetLog-nbBits); 697 698 if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */ 699 U32 sortedRank; 700 int minWeight = nbBits + scaleLog; 701 if (minWeight < 1) minWeight = 1; 702 sortedRank = rankStart[minWeight]; 703 HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, 704 rankValOrigin[nbBits], minWeight, 705 sortedList+sortedRank, sortedListSize-sortedRank, 706 nbBitsBaseline, symbol); 707 } else { 708 HUF_DEltX4 DElt; 709 MEM_writeLE16(&(DElt.sequence), symbol); 710 DElt.nbBits = (BYTE)(nbBits); 711 DElt.length = 1; 712 { U32 const end = start + length; 713 U32 u; 714 for (u = start; u < end; u++) DTable[u] = DElt; 715 } } 716 rankVal[weight] += length; 717 } 718 } 719 720 size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src, 721 size_t srcSize, void* workSpace, 722 size_t wkspSize) 723 { 724 U32 tableLog, maxW, sizeOfSort, nbSymbols; 725 DTableDesc dtd = HUF_getDTableDesc(DTable); 726 U32 const maxTableLog = dtd.maxTableLog; 727 size_t iSize; 728 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ 729 HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr; 730 U32 *rankStart; 731 732 rankValCol_t* rankVal; 733 U32* rankStats; 734 U32* rankStart0; 735 sortedSymbol_t* sortedSymbol; 736 BYTE* weightList; 737 size_t spaceUsed32 = 0; 738 739 rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); 740 spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; 741 rankStats = (U32 *)workSpace + spaceUsed32; 742 spaceUsed32 += HUF_TABLELOG_MAX + 1; 743 rankStart0 = (U32 *)workSpace + spaceUsed32; 744 spaceUsed32 += HUF_TABLELOG_MAX + 2; 745 sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); 746 spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; 747 weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); 748 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; 749 750 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); 751 752 rankStart = rankStart0 + 1; 753 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); 754 755 HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ 756 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 757 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ 758 759 iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); 760 if (HUF_isError(iSize)) return iSize; 761 762 /* check result */ 763 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ 764 765 /* find maxWeight */ 766 for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ 767 768 /* Get start index of each weight */ 769 { U32 w, nextRankStart = 0; 770 for (w=1; w<maxW+1; w++) { 771 U32 current = nextRankStart; 772 nextRankStart += rankStats[w]; 773 rankStart[w] = current; 774 } 775 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ 776 sizeOfSort = nextRankStart; 777 } 778 779 /* sort symbols by weight */ 780 { U32 s; 781 for (s=0; s<nbSymbols; s++) { 782 U32 const w = weightList[s]; 783 U32 const r = rankStart[w]++; 784 sortedSymbol[r].symbol = (BYTE)s; 785 sortedSymbol[r].weight = (BYTE)w; 786 } 787 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ 788 } 789 790 /* Build rankVal */ 791 { U32* const rankVal0 = rankVal[0]; 792 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ 793 U32 nextRankVal = 0; 794 U32 w; 795 for (w=1; w<maxW+1; w++) { 796 U32 current = nextRankVal; 797 nextRankVal += rankStats[w] << (w+rescale); 798 rankVal0[w] = current; 799 } } 800 { U32 const minBits = tableLog+1 - maxW; 801 U32 consumed; 802 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { 803 U32* const rankValPtr = rankVal[consumed]; 804 U32 w; 805 for (w = 1; w < maxW+1; w++) { 806 rankValPtr[w] = rankVal0[w] >> consumed; 807 } } } } 808 809 HUF_fillDTableX4(dt, maxTableLog, 810 sortedSymbol, sizeOfSort, 811 rankStart0, rankVal, maxW, 812 tableLog+1); 813 814 dtd.tableLog = (BYTE)maxTableLog; 815 dtd.tableType = 1; 816 memcpy(DTable, &dtd, sizeof(dtd)); 817 return iSize; 818 } 819 820 size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize) 821 { 822 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 823 return HUF_readDTableX4_wksp(DTable, src, srcSize, 824 workSpace, sizeof(workSpace)); 825 } 826 827 size_t HUF_decompress1X4_usingDTable( 828 void* dst, size_t dstSize, 829 const void* cSrc, size_t cSrcSize, 830 const HUF_DTable* DTable) 831 { 832 DTableDesc dtd = HUF_getDTableDesc(DTable); 833 if (dtd.tableType != 1) return ERROR(GENERIC); 834 return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 835 } 836 837 size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 838 const void* cSrc, size_t cSrcSize, 839 void* workSpace, size_t wkspSize) 840 { 841 const BYTE* ip = (const BYTE*) cSrc; 842 843 size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, 844 workSpace, wkspSize); 845 if (HUF_isError(hSize)) return hSize; 846 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 847 ip += hSize; cSrcSize -= hSize; 848 849 return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 850 } 851 852 853 size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, 854 const void* cSrc, size_t cSrcSize) 855 { 856 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 857 return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, 858 workSpace, sizeof(workSpace)); 859 } 860 861 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 862 { 863 HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); 864 return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 865 } 866 867 size_t HUF_decompress4X4_usingDTable( 868 void* dst, size_t dstSize, 869 const void* cSrc, size_t cSrcSize, 870 const HUF_DTable* DTable) 871 { 872 DTableDesc dtd = HUF_getDTableDesc(DTable); 873 if (dtd.tableType != 1) return ERROR(GENERIC); 874 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 875 } 876 877 static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 878 const void* cSrc, size_t cSrcSize, 879 void* workSpace, size_t wkspSize, int bmi2) 880 { 881 const BYTE* ip = (const BYTE*) cSrc; 882 883 size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, 884 workSpace, wkspSize); 885 if (HUF_isError(hSize)) return hSize; 886 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 887 ip += hSize; cSrcSize -= hSize; 888 889 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 890 } 891 892 size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 893 const void* cSrc, size_t cSrcSize, 894 void* workSpace, size_t wkspSize) 895 { 896 return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); 897 } 898 899 900 size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, 901 const void* cSrc, size_t cSrcSize) 902 { 903 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 904 return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 905 workSpace, sizeof(workSpace)); 906 } 907 908 size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 909 { 910 HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); 911 return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 912 } 913 914 915 /* ********************************/ 916 /* Generic decompression selector */ 917 /* ********************************/ 918 919 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, 920 const void* cSrc, size_t cSrcSize, 921 const HUF_DTable* DTable) 922 { 923 DTableDesc const dtd = HUF_getDTableDesc(DTable); 924 return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 925 HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 926 } 927 928 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, 929 const void* cSrc, size_t cSrcSize, 930 const HUF_DTable* DTable) 931 { 932 DTableDesc const dtd = HUF_getDTableDesc(DTable); 933 return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 934 HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 935 } 936 937 938 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; 939 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = 940 { 941 /* single, double, quad */ 942 {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ 943 {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ 944 {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ 945 {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ 946 {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ 947 {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ 948 {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ 949 {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ 950 {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ 951 {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ 952 {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ 953 {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ 954 {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ 955 {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ 956 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ 957 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ 958 }; 959 960 /** HUF_selectDecoder() : 961 * Tells which decoder is likely to decode faster, 962 * based on a set of pre-computed metrics. 963 * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . 964 * Assumption : 0 < dstSize <= 128 KB */ 965 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) 966 { 967 assert(dstSize > 0); 968 assert(dstSize <= 128 KB); 969 /* decoder timing evaluation */ 970 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ 971 U32 const D256 = (U32)(dstSize >> 8); 972 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); 973 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); 974 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ 975 return DTime1 < DTime0; 976 } } 977 978 979 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); 980 981 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 982 { 983 static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 }; 984 985 /* validation checks */ 986 if (dstSize == 0) return ERROR(dstSize_tooSmall); 987 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 988 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 989 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 990 991 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 992 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); 993 } 994 } 995 996 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 997 { 998 /* validation checks */ 999 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1000 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1001 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1002 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1003 1004 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1005 return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : 1006 HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; 1007 } 1008 } 1009 1010 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1011 { 1012 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1013 return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1014 workSpace, sizeof(workSpace)); 1015 } 1016 1017 1018 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, 1019 size_t dstSize, const void* cSrc, 1020 size_t cSrcSize, void* workSpace, 1021 size_t wkspSize) 1022 { 1023 /* validation checks */ 1024 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1025 if (cSrcSize == 0) return ERROR(corruption_detected); 1026 1027 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1028 return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): 1029 HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1030 } 1031 } 1032 1033 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 1034 const void* cSrc, size_t cSrcSize, 1035 void* workSpace, size_t wkspSize) 1036 { 1037 /* validation checks */ 1038 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1039 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1040 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1041 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1042 1043 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1044 return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, 1045 cSrcSize, workSpace, wkspSize): 1046 HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1047 cSrcSize, workSpace, wkspSize); 1048 } 1049 } 1050 1051 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, 1052 const void* cSrc, size_t cSrcSize) 1053 { 1054 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1055 return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1056 workSpace, sizeof(workSpace)); 1057 } 1058 1059 1060 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1061 { 1062 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1063 return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1064 HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1065 } 1066 1067 size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1068 { 1069 const BYTE* ip = (const BYTE*) cSrc; 1070 1071 size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); 1072 if (HUF_isError(hSize)) return hSize; 1073 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1074 ip += hSize; cSrcSize -= hSize; 1075 1076 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 1077 } 1078 1079 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1080 { 1081 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1082 return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1083 HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1084 } 1085 1086 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1087 { 1088 /* validation checks */ 1089 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1090 if (cSrcSize == 0) return ERROR(corruption_detected); 1091 1092 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1093 return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : 1094 HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1095 } 1096 } 1097