1 /* ****************************************************************** 2 * huff0 huffman decoder, 3 * part of Finite State Entropy library 4 * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. 5 * 6 * You can contact the author at : 7 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 8 * 9 * This source code is licensed under both the BSD-style license (found in the 10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11 * in the COPYING file in the root directory of this source tree). 12 * You may select, at your option, one of the above-listed licenses. 13 ****************************************************************** */ 14 15 /* ************************************************************** 16 * Dependencies 17 ****************************************************************/ 18 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ 19 #include "../common/compiler.h" 20 #include "../common/bitstream.h" /* BIT_* */ 21 #include "../common/fse.h" /* to compress headers */ 22 #define HUF_STATIC_LINKING_ONLY 23 #include "../common/huf.h" 24 #include "../common/error_private.h" 25 26 /* ************************************************************** 27 * Macros 28 ****************************************************************/ 29 30 /* These two optional macros force the use one way or another of the two 31 * Huffman decompression implementations. You can't force in both directions 32 * at the same time. 33 */ 34 #if defined(HUF_FORCE_DECOMPRESS_X1) && \ 35 defined(HUF_FORCE_DECOMPRESS_X2) 36 #error "Cannot force the use of the X1 and X2 decoders at the same time!" 37 #endif 38 39 40 /* ************************************************************** 41 * Error Management 42 ****************************************************************/ 43 #define HUF_isError ERR_isError 44 45 46 /* ************************************************************** 47 * Byte alignment for workSpace management 48 ****************************************************************/ 49 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) 50 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) 51 52 53 /* ************************************************************** 54 * BMI2 Variant Wrappers 55 ****************************************************************/ 56 #if DYNAMIC_BMI2 57 58 #define HUF_DGEN(fn) \ 59 \ 60 static size_t fn##_default( \ 61 void* dst, size_t dstSize, \ 62 const void* cSrc, size_t cSrcSize, \ 63 const HUF_DTable* DTable) \ 64 { \ 65 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 66 } \ 67 \ 68 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ 69 void* dst, size_t dstSize, \ 70 const void* cSrc, size_t cSrcSize, \ 71 const HUF_DTable* DTable) \ 72 { \ 73 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 74 } \ 75 \ 76 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 77 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 78 { \ 79 if (bmi2) { \ 80 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ 81 } \ 82 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ 83 } 84 85 #else 86 87 #define HUF_DGEN(fn) \ 88 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 89 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 90 { \ 91 (void)bmi2; \ 92 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 93 } 94 95 #endif 96 97 98 /*-***************************/ 99 /* generic DTableDesc */ 100 /*-***************************/ 101 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; 102 103 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) 104 { 105 DTableDesc dtd; 106 ZSTD_memcpy(&dtd, table, sizeof(dtd)); 107 return dtd; 108 } 109 110 111 #ifndef HUF_FORCE_DECOMPRESS_X2 112 113 /*-***************************/ 114 /* single-symbol decoding */ 115 /*-***************************/ 116 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ 117 118 /** 119 * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at 120 * a time. 121 */ 122 static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { 123 U64 D4; 124 if (MEM_isLittleEndian()) { 125 D4 = symbol + (nbBits << 8); 126 } else { 127 D4 = (symbol << 8) + nbBits; 128 } 129 D4 *= 0x0001000100010001ULL; 130 return D4; 131 } 132 133 typedef struct { 134 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; 135 U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; 136 U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; 137 BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; 138 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; 139 } HUF_ReadDTableX1_Workspace; 140 141 142 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) 143 { 144 return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); 145 } 146 147 size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) 148 { 149 U32 tableLog = 0; 150 U32 nbSymbols = 0; 151 size_t iSize; 152 void* const dtPtr = DTable + 1; 153 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; 154 HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; 155 156 DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); 157 if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); 158 159 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); 160 /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ 161 162 iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); 163 if (HUF_isError(iSize)) return iSize; 164 165 /* Table header */ 166 { DTableDesc dtd = HUF_getDTableDesc(DTable); 167 if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ 168 dtd.tableType = 0; 169 dtd.tableLog = (BYTE)tableLog; 170 ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); 171 } 172 173 /* Compute symbols and rankStart given rankVal: 174 * 175 * rankVal already contains the number of values of each weight. 176 * 177 * symbols contains the symbols ordered by weight. First are the rankVal[0] 178 * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. 179 * symbols[0] is filled (but unused) to avoid a branch. 180 * 181 * rankStart contains the offset where each rank belongs in the DTable. 182 * rankStart[0] is not filled because there are no entries in the table for 183 * weight 0. 184 */ 185 { 186 int n; 187 int nextRankStart = 0; 188 int const unroll = 4; 189 int const nLimit = (int)nbSymbols - unroll + 1; 190 for (n=0; n<(int)tableLog+1; n++) { 191 U32 const curr = nextRankStart; 192 nextRankStart += wksp->rankVal[n]; 193 wksp->rankStart[n] = curr; 194 } 195 for (n=0; n < nLimit; n += unroll) { 196 int u; 197 for (u=0; u < unroll; ++u) { 198 size_t const w = wksp->huffWeight[n+u]; 199 wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); 200 } 201 } 202 for (; n < (int)nbSymbols; ++n) { 203 size_t const w = wksp->huffWeight[n]; 204 wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; 205 } 206 } 207 208 /* fill DTable 209 * We fill all entries of each weight in order. 210 * That way length is a constant for each iteration of the outter loop. 211 * We can switch based on the length to a different inner loop which is 212 * optimized for that particular case. 213 */ 214 { 215 U32 w; 216 int symbol=wksp->rankVal[0]; 217 int rankStart=0; 218 for (w=1; w<tableLog+1; ++w) { 219 int const symbolCount = wksp->rankVal[w]; 220 int const length = (1 << w) >> 1; 221 int uStart = rankStart; 222 BYTE const nbBits = (BYTE)(tableLog + 1 - w); 223 int s; 224 int u; 225 switch (length) { 226 case 1: 227 for (s=0; s<symbolCount; ++s) { 228 HUF_DEltX1 D; 229 D.byte = wksp->symbols[symbol + s]; 230 D.nbBits = nbBits; 231 dt[uStart] = D; 232 uStart += 1; 233 } 234 break; 235 case 2: 236 for (s=0; s<symbolCount; ++s) { 237 HUF_DEltX1 D; 238 D.byte = wksp->symbols[symbol + s]; 239 D.nbBits = nbBits; 240 dt[uStart+0] = D; 241 dt[uStart+1] = D; 242 uStart += 2; 243 } 244 break; 245 case 4: 246 for (s=0; s<symbolCount; ++s) { 247 U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); 248 MEM_write64(dt + uStart, D4); 249 uStart += 4; 250 } 251 break; 252 case 8: 253 for (s=0; s<symbolCount; ++s) { 254 U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); 255 MEM_write64(dt + uStart, D4); 256 MEM_write64(dt + uStart + 4, D4); 257 uStart += 8; 258 } 259 break; 260 default: 261 for (s=0; s<symbolCount; ++s) { 262 U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); 263 for (u=0; u < length; u += 16) { 264 MEM_write64(dt + uStart + u + 0, D4); 265 MEM_write64(dt + uStart + u + 4, D4); 266 MEM_write64(dt + uStart + u + 8, D4); 267 MEM_write64(dt + uStart + u + 12, D4); 268 } 269 assert(u == length); 270 uStart += length; 271 } 272 break; 273 } 274 symbol += symbolCount; 275 rankStart += symbolCount * length; 276 } 277 } 278 return iSize; 279 } 280 281 FORCE_INLINE_TEMPLATE BYTE 282 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) 283 { 284 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 285 BYTE const c = dt[val].byte; 286 BIT_skipBits(Dstream, dt[val].nbBits); 287 return c; 288 } 289 290 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ 291 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) 292 293 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ 294 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 295 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 296 297 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ 298 if (MEM_64bits()) \ 299 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 300 301 HINT_INLINE size_t 302 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) 303 { 304 BYTE* const pStart = p; 305 306 /* up to 4 symbols at a time */ 307 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { 308 HUF_DECODE_SYMBOLX1_2(p, bitDPtr); 309 HUF_DECODE_SYMBOLX1_1(p, bitDPtr); 310 HUF_DECODE_SYMBOLX1_2(p, bitDPtr); 311 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 312 } 313 314 /* [0-3] symbols remaining */ 315 if (MEM_32bits()) 316 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) 317 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 318 319 /* no more data to retrieve from bitstream, no need to reload */ 320 while (p < pEnd) 321 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 322 323 return pEnd-pStart; 324 } 325 326 FORCE_INLINE_TEMPLATE size_t 327 HUF_decompress1X1_usingDTable_internal_body( 328 void* dst, size_t dstSize, 329 const void* cSrc, size_t cSrcSize, 330 const HUF_DTable* DTable) 331 { 332 BYTE* op = (BYTE*)dst; 333 BYTE* const oend = op + dstSize; 334 const void* dtPtr = DTable + 1; 335 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; 336 BIT_DStream_t bitD; 337 DTableDesc const dtd = HUF_getDTableDesc(DTable); 338 U32 const dtLog = dtd.tableLog; 339 340 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 341 342 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); 343 344 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 345 346 return dstSize; 347 } 348 349 FORCE_INLINE_TEMPLATE size_t 350 HUF_decompress4X1_usingDTable_internal_body( 351 void* dst, size_t dstSize, 352 const void* cSrc, size_t cSrcSize, 353 const HUF_DTable* DTable) 354 { 355 /* Check */ 356 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 357 358 { const BYTE* const istart = (const BYTE*) cSrc; 359 BYTE* const ostart = (BYTE*) dst; 360 BYTE* const oend = ostart + dstSize; 361 BYTE* const olimit = oend - 3; 362 const void* const dtPtr = DTable + 1; 363 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; 364 365 /* Init */ 366 BIT_DStream_t bitD1; 367 BIT_DStream_t bitD2; 368 BIT_DStream_t bitD3; 369 BIT_DStream_t bitD4; 370 size_t const length1 = MEM_readLE16(istart); 371 size_t const length2 = MEM_readLE16(istart+2); 372 size_t const length3 = MEM_readLE16(istart+4); 373 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 374 const BYTE* const istart1 = istart + 6; /* jumpTable */ 375 const BYTE* const istart2 = istart1 + length1; 376 const BYTE* const istart3 = istart2 + length2; 377 const BYTE* const istart4 = istart3 + length3; 378 const size_t segmentSize = (dstSize+3) / 4; 379 BYTE* const opStart2 = ostart + segmentSize; 380 BYTE* const opStart3 = opStart2 + segmentSize; 381 BYTE* const opStart4 = opStart3 + segmentSize; 382 BYTE* op1 = ostart; 383 BYTE* op2 = opStart2; 384 BYTE* op3 = opStart3; 385 BYTE* op4 = opStart4; 386 DTableDesc const dtd = HUF_getDTableDesc(DTable); 387 U32 const dtLog = dtd.tableLog; 388 U32 endSignal = 1; 389 390 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 391 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 392 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 393 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 394 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 395 396 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ 397 for ( ; (endSignal) & (op4 < olimit) ; ) { 398 HUF_DECODE_SYMBOLX1_2(op1, &bitD1); 399 HUF_DECODE_SYMBOLX1_2(op2, &bitD2); 400 HUF_DECODE_SYMBOLX1_2(op3, &bitD3); 401 HUF_DECODE_SYMBOLX1_2(op4, &bitD4); 402 HUF_DECODE_SYMBOLX1_1(op1, &bitD1); 403 HUF_DECODE_SYMBOLX1_1(op2, &bitD2); 404 HUF_DECODE_SYMBOLX1_1(op3, &bitD3); 405 HUF_DECODE_SYMBOLX1_1(op4, &bitD4); 406 HUF_DECODE_SYMBOLX1_2(op1, &bitD1); 407 HUF_DECODE_SYMBOLX1_2(op2, &bitD2); 408 HUF_DECODE_SYMBOLX1_2(op3, &bitD3); 409 HUF_DECODE_SYMBOLX1_2(op4, &bitD4); 410 HUF_DECODE_SYMBOLX1_0(op1, &bitD1); 411 HUF_DECODE_SYMBOLX1_0(op2, &bitD2); 412 HUF_DECODE_SYMBOLX1_0(op3, &bitD3); 413 HUF_DECODE_SYMBOLX1_0(op4, &bitD4); 414 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; 415 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; 416 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; 417 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; 418 } 419 420 /* check corruption */ 421 /* note : should not be necessary : op# advance in lock step, and we control op4. 422 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ 423 if (op1 > opStart2) return ERROR(corruption_detected); 424 if (op2 > opStart3) return ERROR(corruption_detected); 425 if (op3 > opStart4) return ERROR(corruption_detected); 426 /* note : op4 supposed already verified within main loop */ 427 428 /* finish bitStreams one by one */ 429 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); 430 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); 431 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); 432 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); 433 434 /* check */ 435 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 436 if (!endCheck) return ERROR(corruption_detected); } 437 438 /* decoded size */ 439 return dstSize; 440 } 441 } 442 443 444 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, 445 const void *cSrc, 446 size_t cSrcSize, 447 const HUF_DTable *DTable); 448 449 HUF_DGEN(HUF_decompress1X1_usingDTable_internal) 450 HUF_DGEN(HUF_decompress4X1_usingDTable_internal) 451 452 453 454 size_t HUF_decompress1X1_usingDTable( 455 void* dst, size_t dstSize, 456 const void* cSrc, size_t cSrcSize, 457 const HUF_DTable* DTable) 458 { 459 DTableDesc dtd = HUF_getDTableDesc(DTable); 460 if (dtd.tableType != 0) return ERROR(GENERIC); 461 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 462 } 463 464 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 465 const void* cSrc, size_t cSrcSize, 466 void* workSpace, size_t wkspSize) 467 { 468 const BYTE* ip = (const BYTE*) cSrc; 469 470 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); 471 if (HUF_isError(hSize)) return hSize; 472 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 473 ip += hSize; cSrcSize -= hSize; 474 475 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 476 } 477 478 479 size_t HUF_decompress4X1_usingDTable( 480 void* dst, size_t dstSize, 481 const void* cSrc, size_t cSrcSize, 482 const HUF_DTable* DTable) 483 { 484 DTableDesc dtd = HUF_getDTableDesc(DTable); 485 if (dtd.tableType != 0) return ERROR(GENERIC); 486 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 487 } 488 489 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 490 const void* cSrc, size_t cSrcSize, 491 void* workSpace, size_t wkspSize, int bmi2) 492 { 493 const BYTE* ip = (const BYTE*) cSrc; 494 495 size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 496 if (HUF_isError(hSize)) return hSize; 497 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 498 ip += hSize; cSrcSize -= hSize; 499 500 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 501 } 502 503 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 504 const void* cSrc, size_t cSrcSize, 505 void* workSpace, size_t wkspSize) 506 { 507 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); 508 } 509 510 511 #endif /* HUF_FORCE_DECOMPRESS_X2 */ 512 513 514 #ifndef HUF_FORCE_DECOMPRESS_X1 515 516 /* *************************/ 517 /* double-symbols decoding */ 518 /* *************************/ 519 520 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ 521 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; 522 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; 523 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; 524 525 526 /* HUF_fillDTableX2Level2() : 527 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ 528 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, 529 const U32* rankValOrigin, const int minWeight, 530 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, 531 U32 nbBitsBaseline, U16 baseSeq) 532 { 533 HUF_DEltX2 DElt; 534 U32 rankVal[HUF_TABLELOG_MAX + 1]; 535 536 /* get pre-calculated rankVal */ 537 ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 538 539 /* fill skipped values */ 540 if (minWeight>1) { 541 U32 i, skipSize = rankVal[minWeight]; 542 MEM_writeLE16(&(DElt.sequence), baseSeq); 543 DElt.nbBits = (BYTE)(consumed); 544 DElt.length = 1; 545 for (i = 0; i < skipSize; i++) 546 DTable[i] = DElt; 547 } 548 549 /* fill DTable */ 550 { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */ 551 const U32 symbol = sortedSymbols[s].symbol; 552 const U32 weight = sortedSymbols[s].weight; 553 const U32 nbBits = nbBitsBaseline - weight; 554 const U32 length = 1 << (sizeLog-nbBits); 555 const U32 start = rankVal[weight]; 556 U32 i = start; 557 const U32 end = start + length; 558 559 MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); 560 DElt.nbBits = (BYTE)(nbBits + consumed); 561 DElt.length = 2; 562 do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */ 563 564 rankVal[weight] += length; 565 } } 566 } 567 568 569 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, 570 const sortedSymbol_t* sortedList, const U32 sortedListSize, 571 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, 572 const U32 nbBitsBaseline) 573 { 574 U32 rankVal[HUF_TABLELOG_MAX + 1]; 575 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ 576 const U32 minBits = nbBitsBaseline - maxWeight; 577 U32 s; 578 579 ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 580 581 /* fill DTable */ 582 for (s=0; s<sortedListSize; s++) { 583 const U16 symbol = sortedList[s].symbol; 584 const U32 weight = sortedList[s].weight; 585 const U32 nbBits = nbBitsBaseline - weight; 586 const U32 start = rankVal[weight]; 587 const U32 length = 1 << (targetLog-nbBits); 588 589 if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */ 590 U32 sortedRank; 591 int minWeight = nbBits + scaleLog; 592 if (minWeight < 1) minWeight = 1; 593 sortedRank = rankStart[minWeight]; 594 HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, 595 rankValOrigin[nbBits], minWeight, 596 sortedList+sortedRank, sortedListSize-sortedRank, 597 nbBitsBaseline, symbol); 598 } else { 599 HUF_DEltX2 DElt; 600 MEM_writeLE16(&(DElt.sequence), symbol); 601 DElt.nbBits = (BYTE)(nbBits); 602 DElt.length = 1; 603 { U32 const end = start + length; 604 U32 u; 605 for (u = start; u < end; u++) DTable[u] = DElt; 606 } } 607 rankVal[weight] += length; 608 } 609 } 610 611 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, 612 const void* src, size_t srcSize, 613 void* workSpace, size_t wkspSize) 614 { 615 U32 tableLog, maxW, sizeOfSort, nbSymbols; 616 DTableDesc dtd = HUF_getDTableDesc(DTable); 617 U32 const maxTableLog = dtd.maxTableLog; 618 size_t iSize; 619 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ 620 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; 621 U32 *rankStart; 622 623 rankValCol_t* rankVal; 624 U32* rankStats; 625 U32* rankStart0; 626 sortedSymbol_t* sortedSymbol; 627 BYTE* weightList; 628 size_t spaceUsed32 = 0; 629 630 rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); 631 spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; 632 rankStats = (U32 *)workSpace + spaceUsed32; 633 spaceUsed32 += HUF_TABLELOG_MAX + 1; 634 rankStart0 = (U32 *)workSpace + spaceUsed32; 635 spaceUsed32 += HUF_TABLELOG_MAX + 2; 636 sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); 637 spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; 638 weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); 639 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; 640 641 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); 642 643 rankStart = rankStart0 + 1; 644 ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); 645 646 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ 647 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 648 /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ 649 650 iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); 651 if (HUF_isError(iSize)) return iSize; 652 653 /* check result */ 654 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ 655 656 /* find maxWeight */ 657 for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ 658 659 /* Get start index of each weight */ 660 { U32 w, nextRankStart = 0; 661 for (w=1; w<maxW+1; w++) { 662 U32 curr = nextRankStart; 663 nextRankStart += rankStats[w]; 664 rankStart[w] = curr; 665 } 666 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ 667 sizeOfSort = nextRankStart; 668 } 669 670 /* sort symbols by weight */ 671 { U32 s; 672 for (s=0; s<nbSymbols; s++) { 673 U32 const w = weightList[s]; 674 U32 const r = rankStart[w]++; 675 sortedSymbol[r].symbol = (BYTE)s; 676 sortedSymbol[r].weight = (BYTE)w; 677 } 678 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ 679 } 680 681 /* Build rankVal */ 682 { U32* const rankVal0 = rankVal[0]; 683 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ 684 U32 nextRankVal = 0; 685 U32 w; 686 for (w=1; w<maxW+1; w++) { 687 U32 curr = nextRankVal; 688 nextRankVal += rankStats[w] << (w+rescale); 689 rankVal0[w] = curr; 690 } } 691 { U32 const minBits = tableLog+1 - maxW; 692 U32 consumed; 693 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { 694 U32* const rankValPtr = rankVal[consumed]; 695 U32 w; 696 for (w = 1; w < maxW+1; w++) { 697 rankValPtr[w] = rankVal0[w] >> consumed; 698 } } } } 699 700 HUF_fillDTableX2(dt, maxTableLog, 701 sortedSymbol, sizeOfSort, 702 rankStart0, rankVal, maxW, 703 tableLog+1); 704 705 dtd.tableLog = (BYTE)maxTableLog; 706 dtd.tableType = 1; 707 ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); 708 return iSize; 709 } 710 711 712 FORCE_INLINE_TEMPLATE U32 713 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) 714 { 715 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 716 ZSTD_memcpy(op, dt+val, 2); 717 BIT_skipBits(DStream, dt[val].nbBits); 718 return dt[val].length; 719 } 720 721 FORCE_INLINE_TEMPLATE U32 722 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) 723 { 724 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 725 ZSTD_memcpy(op, dt+val, 1); 726 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); 727 else { 728 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { 729 BIT_skipBits(DStream, dt[val].nbBits); 730 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) 731 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ 732 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); 733 } } 734 return 1; 735 } 736 737 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ 738 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 739 740 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 741 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 742 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 743 744 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 745 if (MEM_64bits()) \ 746 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 747 748 HINT_INLINE size_t 749 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, 750 const HUF_DEltX2* const dt, const U32 dtLog) 751 { 752 BYTE* const pStart = p; 753 754 /* up to 8 symbols at a time */ 755 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { 756 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 757 HUF_DECODE_SYMBOLX2_1(p, bitDPtr); 758 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 759 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 760 } 761 762 /* closer to end : up to 2 symbols at a time */ 763 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) 764 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 765 766 while (p <= pEnd-2) 767 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ 768 769 if (p < pEnd) 770 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); 771 772 return p-pStart; 773 } 774 775 FORCE_INLINE_TEMPLATE size_t 776 HUF_decompress1X2_usingDTable_internal_body( 777 void* dst, size_t dstSize, 778 const void* cSrc, size_t cSrcSize, 779 const HUF_DTable* DTable) 780 { 781 BIT_DStream_t bitD; 782 783 /* Init */ 784 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 785 786 /* decode */ 787 { BYTE* const ostart = (BYTE*) dst; 788 BYTE* const oend = ostart + dstSize; 789 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ 790 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 791 DTableDesc const dtd = HUF_getDTableDesc(DTable); 792 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); 793 } 794 795 /* check */ 796 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 797 798 /* decoded size */ 799 return dstSize; 800 } 801 802 FORCE_INLINE_TEMPLATE size_t 803 HUF_decompress4X2_usingDTable_internal_body( 804 void* dst, size_t dstSize, 805 const void* cSrc, size_t cSrcSize, 806 const HUF_DTable* DTable) 807 { 808 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 809 810 { const BYTE* const istart = (const BYTE*) cSrc; 811 BYTE* const ostart = (BYTE*) dst; 812 BYTE* const oend = ostart + dstSize; 813 BYTE* const olimit = oend - (sizeof(size_t)-1); 814 const void* const dtPtr = DTable+1; 815 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 816 817 /* Init */ 818 BIT_DStream_t bitD1; 819 BIT_DStream_t bitD2; 820 BIT_DStream_t bitD3; 821 BIT_DStream_t bitD4; 822 size_t const length1 = MEM_readLE16(istart); 823 size_t const length2 = MEM_readLE16(istart+2); 824 size_t const length3 = MEM_readLE16(istart+4); 825 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 826 const BYTE* const istart1 = istart + 6; /* jumpTable */ 827 const BYTE* const istart2 = istart1 + length1; 828 const BYTE* const istart3 = istart2 + length2; 829 const BYTE* const istart4 = istart3 + length3; 830 size_t const segmentSize = (dstSize+3) / 4; 831 BYTE* const opStart2 = ostart + segmentSize; 832 BYTE* const opStart3 = opStart2 + segmentSize; 833 BYTE* const opStart4 = opStart3 + segmentSize; 834 BYTE* op1 = ostart; 835 BYTE* op2 = opStart2; 836 BYTE* op3 = opStart3; 837 BYTE* op4 = opStart4; 838 U32 endSignal = 1; 839 DTableDesc const dtd = HUF_getDTableDesc(DTable); 840 U32 const dtLog = dtd.tableLog; 841 842 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 843 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 844 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 845 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 846 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 847 848 /* 16-32 symbols per loop (4-8 symbols per stream) */ 849 for ( ; (endSignal) & (op4 < olimit); ) { 850 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) 851 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 852 HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 853 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 854 HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 855 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 856 HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 857 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 858 HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 859 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; 860 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; 861 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 862 HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 863 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 864 HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 865 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 866 HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 867 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 868 HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 869 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; 870 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; 871 #else 872 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 873 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 874 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 875 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 876 HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 877 HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 878 HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 879 HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 880 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 881 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 882 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 883 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 884 HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 885 HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 886 HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 887 HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 888 endSignal = (U32)LIKELY( 889 (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) 890 & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) 891 & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) 892 & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); 893 #endif 894 } 895 896 /* check corruption */ 897 if (op1 > opStart2) return ERROR(corruption_detected); 898 if (op2 > opStart3) return ERROR(corruption_detected); 899 if (op3 > opStart4) return ERROR(corruption_detected); 900 /* note : op4 already verified within main loop */ 901 902 /* finish bitStreams one by one */ 903 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); 904 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); 905 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); 906 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); 907 908 /* check */ 909 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 910 if (!endCheck) return ERROR(corruption_detected); } 911 912 /* decoded size */ 913 return dstSize; 914 } 915 } 916 917 HUF_DGEN(HUF_decompress1X2_usingDTable_internal) 918 HUF_DGEN(HUF_decompress4X2_usingDTable_internal) 919 920 size_t HUF_decompress1X2_usingDTable( 921 void* dst, size_t dstSize, 922 const void* cSrc, size_t cSrcSize, 923 const HUF_DTable* DTable) 924 { 925 DTableDesc dtd = HUF_getDTableDesc(DTable); 926 if (dtd.tableType != 1) return ERROR(GENERIC); 927 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 928 } 929 930 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 931 const void* cSrc, size_t cSrcSize, 932 void* workSpace, size_t wkspSize) 933 { 934 const BYTE* ip = (const BYTE*) cSrc; 935 936 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, 937 workSpace, wkspSize); 938 if (HUF_isError(hSize)) return hSize; 939 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 940 ip += hSize; cSrcSize -= hSize; 941 942 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 943 } 944 945 946 size_t HUF_decompress4X2_usingDTable( 947 void* dst, size_t dstSize, 948 const void* cSrc, size_t cSrcSize, 949 const HUF_DTable* DTable) 950 { 951 DTableDesc dtd = HUF_getDTableDesc(DTable); 952 if (dtd.tableType != 1) return ERROR(GENERIC); 953 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 954 } 955 956 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 957 const void* cSrc, size_t cSrcSize, 958 void* workSpace, size_t wkspSize, int bmi2) 959 { 960 const BYTE* ip = (const BYTE*) cSrc; 961 962 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, 963 workSpace, wkspSize); 964 if (HUF_isError(hSize)) return hSize; 965 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 966 ip += hSize; cSrcSize -= hSize; 967 968 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 969 } 970 971 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 972 const void* cSrc, size_t cSrcSize, 973 void* workSpace, size_t wkspSize) 974 { 975 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); 976 } 977 978 979 #endif /* HUF_FORCE_DECOMPRESS_X1 */ 980 981 982 /* ***********************************/ 983 /* Universal decompression selectors */ 984 /* ***********************************/ 985 986 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, 987 const void* cSrc, size_t cSrcSize, 988 const HUF_DTable* DTable) 989 { 990 DTableDesc const dtd = HUF_getDTableDesc(DTable); 991 #if defined(HUF_FORCE_DECOMPRESS_X1) 992 (void)dtd; 993 assert(dtd.tableType == 0); 994 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 995 #elif defined(HUF_FORCE_DECOMPRESS_X2) 996 (void)dtd; 997 assert(dtd.tableType == 1); 998 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 999 #else 1000 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 1001 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1002 #endif 1003 } 1004 1005 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, 1006 const void* cSrc, size_t cSrcSize, 1007 const HUF_DTable* DTable) 1008 { 1009 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1010 #if defined(HUF_FORCE_DECOMPRESS_X1) 1011 (void)dtd; 1012 assert(dtd.tableType == 0); 1013 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1014 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1015 (void)dtd; 1016 assert(dtd.tableType == 1); 1017 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1018 #else 1019 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 1020 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1021 #endif 1022 } 1023 1024 1025 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) 1026 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; 1027 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = 1028 { 1029 /* single, double, quad */ 1030 {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ 1031 {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ 1032 {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ 1033 {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ 1034 {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ 1035 {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ 1036 {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ 1037 {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ 1038 {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ 1039 {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ 1040 {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ 1041 {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ 1042 {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ 1043 {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ 1044 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ 1045 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ 1046 }; 1047 #endif 1048 1049 /** HUF_selectDecoder() : 1050 * Tells which decoder is likely to decode faster, 1051 * based on a set of pre-computed metrics. 1052 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . 1053 * Assumption : 0 < dstSize <= 128 KB */ 1054 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) 1055 { 1056 assert(dstSize > 0); 1057 assert(dstSize <= 128*1024); 1058 #if defined(HUF_FORCE_DECOMPRESS_X1) 1059 (void)dstSize; 1060 (void)cSrcSize; 1061 return 0; 1062 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1063 (void)dstSize; 1064 (void)cSrcSize; 1065 return 1; 1066 #else 1067 /* decoder timing evaluation */ 1068 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ 1069 U32 const D256 = (U32)(dstSize >> 8); 1070 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); 1071 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); 1072 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ 1073 return DTime1 < DTime0; 1074 } 1075 #endif 1076 } 1077 1078 1079 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, 1080 size_t dstSize, const void* cSrc, 1081 size_t cSrcSize, void* workSpace, 1082 size_t wkspSize) 1083 { 1084 /* validation checks */ 1085 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1086 if (cSrcSize == 0) return ERROR(corruption_detected); 1087 1088 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1089 #if defined(HUF_FORCE_DECOMPRESS_X1) 1090 (void)algoNb; 1091 assert(algoNb == 0); 1092 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1093 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1094 (void)algoNb; 1095 assert(algoNb == 1); 1096 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1097 #else 1098 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1099 cSrcSize, workSpace, wkspSize): 1100 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1101 #endif 1102 } 1103 } 1104 1105 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 1106 const void* cSrc, size_t cSrcSize, 1107 void* workSpace, size_t wkspSize) 1108 { 1109 /* validation checks */ 1110 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1111 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1112 if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1113 if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1114 1115 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1116 #if defined(HUF_FORCE_DECOMPRESS_X1) 1117 (void)algoNb; 1118 assert(algoNb == 0); 1119 return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, 1120 cSrcSize, workSpace, wkspSize); 1121 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1122 (void)algoNb; 1123 assert(algoNb == 1); 1124 return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1125 cSrcSize, workSpace, wkspSize); 1126 #else 1127 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1128 cSrcSize, workSpace, wkspSize): 1129 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, 1130 cSrcSize, workSpace, wkspSize); 1131 #endif 1132 } 1133 } 1134 1135 1136 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1137 { 1138 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1139 #if defined(HUF_FORCE_DECOMPRESS_X1) 1140 (void)dtd; 1141 assert(dtd.tableType == 0); 1142 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1143 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1144 (void)dtd; 1145 assert(dtd.tableType == 1); 1146 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1147 #else 1148 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1149 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1150 #endif 1151 } 1152 1153 #ifndef HUF_FORCE_DECOMPRESS_X2 1154 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1155 { 1156 const BYTE* ip = (const BYTE*) cSrc; 1157 1158 size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1159 if (HUF_isError(hSize)) return hSize; 1160 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1161 ip += hSize; cSrcSize -= hSize; 1162 1163 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 1164 } 1165 #endif 1166 1167 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1168 { 1169 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1170 #if defined(HUF_FORCE_DECOMPRESS_X1) 1171 (void)dtd; 1172 assert(dtd.tableType == 0); 1173 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1174 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1175 (void)dtd; 1176 assert(dtd.tableType == 1); 1177 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1178 #else 1179 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1180 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1181 #endif 1182 } 1183 1184 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1185 { 1186 /* validation checks */ 1187 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1188 if (cSrcSize == 0) return ERROR(corruption_detected); 1189 1190 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1191 #if defined(HUF_FORCE_DECOMPRESS_X1) 1192 (void)algoNb; 1193 assert(algoNb == 0); 1194 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1195 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1196 (void)algoNb; 1197 assert(algoNb == 1); 1198 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1199 #else 1200 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : 1201 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1202 #endif 1203 } 1204 } 1205 1206 #ifndef ZSTD_NO_UNUSED_FUNCTIONS 1207 #ifndef HUF_FORCE_DECOMPRESS_X2 1208 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) 1209 { 1210 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1211 return HUF_readDTableX1_wksp(DTable, src, srcSize, 1212 workSpace, sizeof(workSpace)); 1213 } 1214 1215 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, 1216 const void* cSrc, size_t cSrcSize) 1217 { 1218 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1219 return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, 1220 workSpace, sizeof(workSpace)); 1221 } 1222 1223 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1224 { 1225 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); 1226 return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); 1227 } 1228 #endif 1229 1230 #ifndef HUF_FORCE_DECOMPRESS_X1 1231 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) 1232 { 1233 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1234 return HUF_readDTableX2_wksp(DTable, src, srcSize, 1235 workSpace, sizeof(workSpace)); 1236 } 1237 1238 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, 1239 const void* cSrc, size_t cSrcSize) 1240 { 1241 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1242 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, 1243 workSpace, sizeof(workSpace)); 1244 } 1245 1246 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1247 { 1248 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); 1249 return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 1250 } 1251 #endif 1252 1253 #ifndef HUF_FORCE_DECOMPRESS_X2 1254 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1255 { 1256 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1257 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1258 workSpace, sizeof(workSpace)); 1259 } 1260 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1261 { 1262 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); 1263 return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 1264 } 1265 #endif 1266 1267 #ifndef HUF_FORCE_DECOMPRESS_X1 1268 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, 1269 const void* cSrc, size_t cSrcSize) 1270 { 1271 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1272 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1273 workSpace, sizeof(workSpace)); 1274 } 1275 1276 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1277 { 1278 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); 1279 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 1280 } 1281 #endif 1282 1283 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); 1284 1285 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1286 { 1287 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) 1288 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; 1289 #endif 1290 1291 /* validation checks */ 1292 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1293 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1294 if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1295 if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1296 1297 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1298 #if defined(HUF_FORCE_DECOMPRESS_X1) 1299 (void)algoNb; 1300 assert(algoNb == 0); 1301 return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); 1302 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1303 (void)algoNb; 1304 assert(algoNb == 1); 1305 return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); 1306 #else 1307 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); 1308 #endif 1309 } 1310 } 1311 1312 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1313 { 1314 /* validation checks */ 1315 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1316 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1317 if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1318 if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1319 1320 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1321 #if defined(HUF_FORCE_DECOMPRESS_X1) 1322 (void)algoNb; 1323 assert(algoNb == 0); 1324 return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); 1325 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1326 (void)algoNb; 1327 assert(algoNb == 1); 1328 return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); 1329 #else 1330 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : 1331 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; 1332 #endif 1333 } 1334 } 1335 1336 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1337 { 1338 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1339 return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1340 workSpace, sizeof(workSpace)); 1341 } 1342 1343 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, 1344 const void* cSrc, size_t cSrcSize) 1345 { 1346 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1347 return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1348 workSpace, sizeof(workSpace)); 1349 } 1350 #endif 1351