1 /* ****************************************************************** 2 * huff0 huffman decoder, 3 * part of Finite State Entropy library 4 * Copyright (c) Yann Collet, Facebook, Inc. 5 * 6 * You can contact the author at : 7 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 8 * 9 * This source code is licensed under both the BSD-style license (found in the 10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11 * in the COPYING file in the root directory of this source tree). 12 * You may select, at your option, one of the above-listed licenses. 13 ****************************************************************** */ 14 15 /* ************************************************************** 16 * Dependencies 17 ****************************************************************/ 18 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ 19 #include "../common/compiler.h" 20 #include "../common/bitstream.h" /* BIT_* */ 21 #include "../common/fse.h" /* to compress headers */ 22 #define HUF_STATIC_LINKING_ONLY 23 #include "../common/huf.h" 24 #include "../common/error_private.h" 25 #include "../common/zstd_internal.h" 26 27 /* ************************************************************** 28 * Constants 29 ****************************************************************/ 30 31 #define HUF_DECODER_FAST_TABLELOG 11 32 33 /* ************************************************************** 34 * Macros 35 ****************************************************************/ 36 37 /* These two optional macros force the use one way or another of the two 38 * Huffman decompression implementations. You can't force in both directions 39 * at the same time. 40 */ 41 #if defined(HUF_FORCE_DECOMPRESS_X1) && \ 42 defined(HUF_FORCE_DECOMPRESS_X2) 43 #error "Cannot force the use of the X1 and X2 decoders at the same time!" 44 #endif 45 46 #if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2 47 # define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE 48 #else 49 # define HUF_ASM_X86_64_BMI2_ATTRS 50 #endif 51 52 #ifdef __cplusplus 53 # define HUF_EXTERN_C extern "C" 54 #else 55 # define HUF_EXTERN_C 56 #endif 57 #define HUF_ASM_DECL HUF_EXTERN_C 58 59 #if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) 60 # define HUF_NEED_BMI2_FUNCTION 1 61 #else 62 # define HUF_NEED_BMI2_FUNCTION 0 63 #endif 64 65 #if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) 66 # define HUF_NEED_DEFAULT_FUNCTION 1 67 #else 68 # define HUF_NEED_DEFAULT_FUNCTION 0 69 #endif 70 71 /* ************************************************************** 72 * Error Management 73 ****************************************************************/ 74 #define HUF_isError ERR_isError 75 76 77 /* ************************************************************** 78 * Byte alignment for workSpace management 79 ****************************************************************/ 80 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) 81 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) 82 83 84 /* ************************************************************** 85 * BMI2 Variant Wrappers 86 ****************************************************************/ 87 #if DYNAMIC_BMI2 88 89 #define HUF_DGEN(fn) \ 90 \ 91 static size_t fn##_default( \ 92 void* dst, size_t dstSize, \ 93 const void* cSrc, size_t cSrcSize, \ 94 const HUF_DTable* DTable) \ 95 { \ 96 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 97 } \ 98 \ 99 static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \ 100 void* dst, size_t dstSize, \ 101 const void* cSrc, size_t cSrcSize, \ 102 const HUF_DTable* DTable) \ 103 { \ 104 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 105 } \ 106 \ 107 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 108 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 109 { \ 110 if (bmi2) { \ 111 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ 112 } \ 113 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ 114 } 115 116 #else 117 118 #define HUF_DGEN(fn) \ 119 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 120 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 121 { \ 122 (void)bmi2; \ 123 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 124 } 125 126 #endif 127 128 129 /*-***************************/ 130 /* generic DTableDesc */ 131 /*-***************************/ 132 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; 133 134 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) 135 { 136 DTableDesc dtd; 137 ZSTD_memcpy(&dtd, table, sizeof(dtd)); 138 return dtd; 139 } 140 141 #if ZSTD_ENABLE_ASM_X86_64_BMI2 142 143 static size_t HUF_initDStream(BYTE const* ip) { 144 BYTE const lastByte = ip[7]; 145 size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; 146 size_t const value = MEM_readLEST(ip) | 1; 147 assert(bitsConsumed <= 8); 148 return value << bitsConsumed; 149 } 150 typedef struct { 151 BYTE const* ip[4]; 152 BYTE* op[4]; 153 U64 bits[4]; 154 void const* dt; 155 BYTE const* ilimit; 156 BYTE* oend; 157 BYTE const* iend[4]; 158 } HUF_DecompressAsmArgs; 159 160 /** 161 * Initializes args for the asm decoding loop. 162 * @returns 0 on success 163 * 1 if the fallback implementation should be used. 164 * Or an error code on failure. 165 */ 166 static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable) 167 { 168 void const* dt = DTable + 1; 169 U32 const dtLog = HUF_getDTableDesc(DTable).tableLog; 170 171 const BYTE* const ilimit = (const BYTE*)src + 6 + 8; 172 173 BYTE* const oend = (BYTE*)dst + dstSize; 174 175 /* The following condition is false on x32 platform, 176 * but HUF_asm is not compatible with this ABI */ 177 if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1; 178 179 /* strict minimum : jump table + 1 byte per stream */ 180 if (srcSize < 10) 181 return ERROR(corruption_detected); 182 183 /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers. 184 * If table log is not correct at this point, fallback to the old decoder. 185 * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder. 186 */ 187 if (dtLog != HUF_DECODER_FAST_TABLELOG) 188 return 1; 189 190 /* Read the jump table. */ 191 { 192 const BYTE* const istart = (const BYTE*)src; 193 size_t const length1 = MEM_readLE16(istart); 194 size_t const length2 = MEM_readLE16(istart+2); 195 size_t const length3 = MEM_readLE16(istart+4); 196 size_t const length4 = srcSize - (length1 + length2 + length3 + 6); 197 args->iend[0] = istart + 6; /* jumpTable */ 198 args->iend[1] = args->iend[0] + length1; 199 args->iend[2] = args->iend[1] + length2; 200 args->iend[3] = args->iend[2] + length3; 201 202 /* HUF_initDStream() requires this, and this small of an input 203 * won't benefit from the ASM loop anyways. 204 * length1 must be >= 16 so that ip[0] >= ilimit before the loop 205 * starts. 206 */ 207 if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8) 208 return 1; 209 if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */ 210 } 211 /* ip[] contains the position that is currently loaded into bits[]. */ 212 args->ip[0] = args->iend[1] - sizeof(U64); 213 args->ip[1] = args->iend[2] - sizeof(U64); 214 args->ip[2] = args->iend[3] - sizeof(U64); 215 args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64); 216 217 /* op[] contains the output pointers. */ 218 args->op[0] = (BYTE*)dst; 219 args->op[1] = args->op[0] + (dstSize+3)/4; 220 args->op[2] = args->op[1] + (dstSize+3)/4; 221 args->op[3] = args->op[2] + (dstSize+3)/4; 222 223 /* No point to call the ASM loop for tiny outputs. */ 224 if (args->op[3] >= oend) 225 return 1; 226 227 /* bits[] is the bit container. 228 * It is read from the MSB down to the LSB. 229 * It is shifted left as it is read, and zeros are 230 * shifted in. After the lowest valid bit a 1 is 231 * set, so that CountTrailingZeros(bits[]) can be used 232 * to count how many bits we've consumed. 233 */ 234 args->bits[0] = HUF_initDStream(args->ip[0]); 235 args->bits[1] = HUF_initDStream(args->ip[1]); 236 args->bits[2] = HUF_initDStream(args->ip[2]); 237 args->bits[3] = HUF_initDStream(args->ip[3]); 238 239 /* If ip[] >= ilimit, it is guaranteed to be safe to 240 * reload bits[]. It may be beyond its section, but is 241 * guaranteed to be valid (>= istart). 242 */ 243 args->ilimit = ilimit; 244 245 args->oend = oend; 246 args->dt = dt; 247 248 return 0; 249 } 250 251 static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd) 252 { 253 /* Validate that we haven't overwritten. */ 254 if (args->op[stream] > segmentEnd) 255 return ERROR(corruption_detected); 256 /* Validate that we haven't read beyond iend[]. 257 * Note that ip[] may be < iend[] because the MSB is 258 * the next bit to read, and we may have consumed 100% 259 * of the stream, so down to iend[i] - 8 is valid. 260 */ 261 if (args->ip[stream] < args->iend[stream] - 8) 262 return ERROR(corruption_detected); 263 264 /* Construct the BIT_DStream_t. */ 265 bit->bitContainer = MEM_readLE64(args->ip[stream]); 266 bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]); 267 bit->start = (const char*)args->iend[0]; 268 bit->limitPtr = bit->start + sizeof(size_t); 269 bit->ptr = (const char*)args->ip[stream]; 270 271 return 0; 272 } 273 #endif 274 275 276 #ifndef HUF_FORCE_DECOMPRESS_X2 277 278 /*-***************************/ 279 /* single-symbol decoding */ 280 /*-***************************/ 281 typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */ 282 283 /** 284 * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at 285 * a time. 286 */ 287 static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { 288 U64 D4; 289 if (MEM_isLittleEndian()) { 290 D4 = (symbol << 8) + nbBits; 291 } else { 292 D4 = symbol + (nbBits << 8); 293 } 294 D4 *= 0x0001000100010001ULL; 295 return D4; 296 } 297 298 /** 299 * Increase the tableLog to targetTableLog and rescales the stats. 300 * If tableLog > targetTableLog this is a no-op. 301 * @returns New tableLog 302 */ 303 static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog) 304 { 305 if (tableLog > targetTableLog) 306 return tableLog; 307 if (tableLog < targetTableLog) { 308 U32 const scale = targetTableLog - tableLog; 309 U32 s; 310 /* Increase the weight for all non-zero probability symbols by scale. */ 311 for (s = 0; s < nbSymbols; ++s) { 312 huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale); 313 } 314 /* Update rankVal to reflect the new weights. 315 * All weights except 0 get moved to weight + scale. 316 * Weights [1, scale] are empty. 317 */ 318 for (s = targetTableLog; s > scale; --s) { 319 rankVal[s] = rankVal[s - scale]; 320 } 321 for (s = scale; s > 0; --s) { 322 rankVal[s] = 0; 323 } 324 } 325 return targetTableLog; 326 } 327 328 typedef struct { 329 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; 330 U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; 331 U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; 332 BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; 333 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; 334 } HUF_ReadDTableX1_Workspace; 335 336 337 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) 338 { 339 return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); 340 } 341 342 size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) 343 { 344 U32 tableLog = 0; 345 U32 nbSymbols = 0; 346 size_t iSize; 347 void* const dtPtr = DTable + 1; 348 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; 349 HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; 350 351 DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); 352 if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); 353 354 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); 355 /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ 356 357 iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); 358 if (HUF_isError(iSize)) return iSize; 359 360 361 /* Table header */ 362 { DTableDesc dtd = HUF_getDTableDesc(DTable); 363 U32 const maxTableLog = dtd.maxTableLog + 1; 364 U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG); 365 tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog); 366 if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ 367 dtd.tableType = 0; 368 dtd.tableLog = (BYTE)tableLog; 369 ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); 370 } 371 372 /* Compute symbols and rankStart given rankVal: 373 * 374 * rankVal already contains the number of values of each weight. 375 * 376 * symbols contains the symbols ordered by weight. First are the rankVal[0] 377 * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. 378 * symbols[0] is filled (but unused) to avoid a branch. 379 * 380 * rankStart contains the offset where each rank belongs in the DTable. 381 * rankStart[0] is not filled because there are no entries in the table for 382 * weight 0. 383 */ 384 { 385 int n; 386 int nextRankStart = 0; 387 int const unroll = 4; 388 int const nLimit = (int)nbSymbols - unroll + 1; 389 for (n=0; n<(int)tableLog+1; n++) { 390 U32 const curr = nextRankStart; 391 nextRankStart += wksp->rankVal[n]; 392 wksp->rankStart[n] = curr; 393 } 394 for (n=0; n < nLimit; n += unroll) { 395 int u; 396 for (u=0; u < unroll; ++u) { 397 size_t const w = wksp->huffWeight[n+u]; 398 wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); 399 } 400 } 401 for (; n < (int)nbSymbols; ++n) { 402 size_t const w = wksp->huffWeight[n]; 403 wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; 404 } 405 } 406 407 /* fill DTable 408 * We fill all entries of each weight in order. 409 * That way length is a constant for each iteration of the outer loop. 410 * We can switch based on the length to a different inner loop which is 411 * optimized for that particular case. 412 */ 413 { 414 U32 w; 415 int symbol=wksp->rankVal[0]; 416 int rankStart=0; 417 for (w=1; w<tableLog+1; ++w) { 418 int const symbolCount = wksp->rankVal[w]; 419 int const length = (1 << w) >> 1; 420 int uStart = rankStart; 421 BYTE const nbBits = (BYTE)(tableLog + 1 - w); 422 int s; 423 int u; 424 switch (length) { 425 case 1: 426 for (s=0; s<symbolCount; ++s) { 427 HUF_DEltX1 D; 428 D.byte = wksp->symbols[symbol + s]; 429 D.nbBits = nbBits; 430 dt[uStart] = D; 431 uStart += 1; 432 } 433 break; 434 case 2: 435 for (s=0; s<symbolCount; ++s) { 436 HUF_DEltX1 D; 437 D.byte = wksp->symbols[symbol + s]; 438 D.nbBits = nbBits; 439 dt[uStart+0] = D; 440 dt[uStart+1] = D; 441 uStart += 2; 442 } 443 break; 444 case 4: 445 for (s=0; s<symbolCount; ++s) { 446 U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); 447 MEM_write64(dt + uStart, D4); 448 uStart += 4; 449 } 450 break; 451 case 8: 452 for (s=0; s<symbolCount; ++s) { 453 U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); 454 MEM_write64(dt + uStart, D4); 455 MEM_write64(dt + uStart + 4, D4); 456 uStart += 8; 457 } 458 break; 459 default: 460 for (s=0; s<symbolCount; ++s) { 461 U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); 462 for (u=0; u < length; u += 16) { 463 MEM_write64(dt + uStart + u + 0, D4); 464 MEM_write64(dt + uStart + u + 4, D4); 465 MEM_write64(dt + uStart + u + 8, D4); 466 MEM_write64(dt + uStart + u + 12, D4); 467 } 468 assert(u == length); 469 uStart += length; 470 } 471 break; 472 } 473 symbol += symbolCount; 474 rankStart += symbolCount * length; 475 } 476 } 477 return iSize; 478 } 479 480 FORCE_INLINE_TEMPLATE BYTE 481 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) 482 { 483 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 484 BYTE const c = dt[val].byte; 485 BIT_skipBits(Dstream, dt[val].nbBits); 486 return c; 487 } 488 489 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ 490 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) 491 492 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ 493 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 494 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 495 496 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ 497 if (MEM_64bits()) \ 498 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 499 500 HINT_INLINE size_t 501 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) 502 { 503 BYTE* const pStart = p; 504 505 /* up to 4 symbols at a time */ 506 if ((pEnd - p) > 3) { 507 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { 508 HUF_DECODE_SYMBOLX1_2(p, bitDPtr); 509 HUF_DECODE_SYMBOLX1_1(p, bitDPtr); 510 HUF_DECODE_SYMBOLX1_2(p, bitDPtr); 511 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 512 } 513 } else { 514 BIT_reloadDStream(bitDPtr); 515 } 516 517 /* [0-3] symbols remaining */ 518 if (MEM_32bits()) 519 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) 520 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 521 522 /* no more data to retrieve from bitstream, no need to reload */ 523 while (p < pEnd) 524 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 525 526 return pEnd-pStart; 527 } 528 529 FORCE_INLINE_TEMPLATE size_t 530 HUF_decompress1X1_usingDTable_internal_body( 531 void* dst, size_t dstSize, 532 const void* cSrc, size_t cSrcSize, 533 const HUF_DTable* DTable) 534 { 535 BYTE* op = (BYTE*)dst; 536 BYTE* const oend = op + dstSize; 537 const void* dtPtr = DTable + 1; 538 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; 539 BIT_DStream_t bitD; 540 DTableDesc const dtd = HUF_getDTableDesc(DTable); 541 U32 const dtLog = dtd.tableLog; 542 543 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 544 545 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); 546 547 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 548 549 return dstSize; 550 } 551 552 FORCE_INLINE_TEMPLATE size_t 553 HUF_decompress4X1_usingDTable_internal_body( 554 void* dst, size_t dstSize, 555 const void* cSrc, size_t cSrcSize, 556 const HUF_DTable* DTable) 557 { 558 /* Check */ 559 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 560 561 { const BYTE* const istart = (const BYTE*) cSrc; 562 BYTE* const ostart = (BYTE*) dst; 563 BYTE* const oend = ostart + dstSize; 564 BYTE* const olimit = oend - 3; 565 const void* const dtPtr = DTable + 1; 566 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; 567 568 /* Init */ 569 BIT_DStream_t bitD1; 570 BIT_DStream_t bitD2; 571 BIT_DStream_t bitD3; 572 BIT_DStream_t bitD4; 573 size_t const length1 = MEM_readLE16(istart); 574 size_t const length2 = MEM_readLE16(istart+2); 575 size_t const length3 = MEM_readLE16(istart+4); 576 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 577 const BYTE* const istart1 = istart + 6; /* jumpTable */ 578 const BYTE* const istart2 = istart1 + length1; 579 const BYTE* const istart3 = istart2 + length2; 580 const BYTE* const istart4 = istart3 + length3; 581 const size_t segmentSize = (dstSize+3) / 4; 582 BYTE* const opStart2 = ostart + segmentSize; 583 BYTE* const opStart3 = opStart2 + segmentSize; 584 BYTE* const opStart4 = opStart3 + segmentSize; 585 BYTE* op1 = ostart; 586 BYTE* op2 = opStart2; 587 BYTE* op3 = opStart3; 588 BYTE* op4 = opStart4; 589 DTableDesc const dtd = HUF_getDTableDesc(DTable); 590 U32 const dtLog = dtd.tableLog; 591 U32 endSignal = 1; 592 593 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 594 if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ 595 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 596 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 597 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 598 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 599 600 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ 601 if ((size_t)(oend - op4) >= sizeof(size_t)) { 602 for ( ; (endSignal) & (op4 < olimit) ; ) { 603 HUF_DECODE_SYMBOLX1_2(op1, &bitD1); 604 HUF_DECODE_SYMBOLX1_2(op2, &bitD2); 605 HUF_DECODE_SYMBOLX1_2(op3, &bitD3); 606 HUF_DECODE_SYMBOLX1_2(op4, &bitD4); 607 HUF_DECODE_SYMBOLX1_1(op1, &bitD1); 608 HUF_DECODE_SYMBOLX1_1(op2, &bitD2); 609 HUF_DECODE_SYMBOLX1_1(op3, &bitD3); 610 HUF_DECODE_SYMBOLX1_1(op4, &bitD4); 611 HUF_DECODE_SYMBOLX1_2(op1, &bitD1); 612 HUF_DECODE_SYMBOLX1_2(op2, &bitD2); 613 HUF_DECODE_SYMBOLX1_2(op3, &bitD3); 614 HUF_DECODE_SYMBOLX1_2(op4, &bitD4); 615 HUF_DECODE_SYMBOLX1_0(op1, &bitD1); 616 HUF_DECODE_SYMBOLX1_0(op2, &bitD2); 617 HUF_DECODE_SYMBOLX1_0(op3, &bitD3); 618 HUF_DECODE_SYMBOLX1_0(op4, &bitD4); 619 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; 620 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; 621 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; 622 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; 623 } 624 } 625 626 /* check corruption */ 627 /* note : should not be necessary : op# advance in lock step, and we control op4. 628 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ 629 if (op1 > opStart2) return ERROR(corruption_detected); 630 if (op2 > opStart3) return ERROR(corruption_detected); 631 if (op3 > opStart4) return ERROR(corruption_detected); 632 /* note : op4 supposed already verified within main loop */ 633 634 /* finish bitStreams one by one */ 635 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); 636 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); 637 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); 638 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); 639 640 /* check */ 641 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 642 if (!endCheck) return ERROR(corruption_detected); } 643 644 /* decoded size */ 645 return dstSize; 646 } 647 } 648 649 #if HUF_NEED_BMI2_FUNCTION 650 static BMI2_TARGET_ATTRIBUTE 651 size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc, 652 size_t cSrcSize, HUF_DTable const* DTable) { 653 return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); 654 } 655 #endif 656 657 #if HUF_NEED_DEFAULT_FUNCTION 658 static 659 size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, 660 size_t cSrcSize, HUF_DTable const* DTable) { 661 return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); 662 } 663 #endif 664 665 #if ZSTD_ENABLE_ASM_X86_64_BMI2 666 667 HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; 668 669 static HUF_ASM_X86_64_BMI2_ATTRS 670 size_t 671 HUF_decompress4X1_usingDTable_internal_bmi2_asm( 672 void* dst, size_t dstSize, 673 const void* cSrc, size_t cSrcSize, 674 const HUF_DTable* DTable) 675 { 676 void const* dt = DTable + 1; 677 const BYTE* const iend = (const BYTE*)cSrc + 6; 678 BYTE* const oend = (BYTE*)dst + dstSize; 679 HUF_DecompressAsmArgs args; 680 { 681 size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); 682 FORWARD_IF_ERROR(ret, "Failed to init asm args"); 683 if (ret != 0) 684 return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); 685 } 686 687 assert(args.ip[0] >= args.ilimit); 688 HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args); 689 690 /* Our loop guarantees that ip[] >= ilimit and that we haven't 691 * overwritten any op[]. 692 */ 693 assert(args.ip[0] >= iend); 694 assert(args.ip[1] >= iend); 695 assert(args.ip[2] >= iend); 696 assert(args.ip[3] >= iend); 697 assert(args.op[3] <= oend); 698 (void)iend; 699 700 /* finish bit streams one by one. */ 701 { 702 size_t const segmentSize = (dstSize+3) / 4; 703 BYTE* segmentEnd = (BYTE*)dst; 704 int i; 705 for (i = 0; i < 4; ++i) { 706 BIT_DStream_t bit; 707 if (segmentSize <= (size_t)(oend - segmentEnd)) 708 segmentEnd += segmentSize; 709 else 710 segmentEnd = oend; 711 FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption"); 712 /* Decompress and validate that we've produced exactly the expected length. */ 713 args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG); 714 if (args.op[i] != segmentEnd) return ERROR(corruption_detected); 715 } 716 } 717 718 /* decoded size */ 719 return dstSize; 720 } 721 #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ 722 723 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, 724 const void *cSrc, 725 size_t cSrcSize, 726 const HUF_DTable *DTable); 727 728 HUF_DGEN(HUF_decompress1X1_usingDTable_internal) 729 730 static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, 731 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) 732 { 733 #if DYNAMIC_BMI2 734 if (bmi2) { 735 # if ZSTD_ENABLE_ASM_X86_64_BMI2 736 return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); 737 # else 738 return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); 739 # endif 740 } 741 #else 742 (void)bmi2; 743 #endif 744 745 #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) 746 return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); 747 #else 748 return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); 749 #endif 750 } 751 752 753 size_t HUF_decompress1X1_usingDTable( 754 void* dst, size_t dstSize, 755 const void* cSrc, size_t cSrcSize, 756 const HUF_DTable* DTable) 757 { 758 DTableDesc dtd = HUF_getDTableDesc(DTable); 759 if (dtd.tableType != 0) return ERROR(GENERIC); 760 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 761 } 762 763 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 764 const void* cSrc, size_t cSrcSize, 765 void* workSpace, size_t wkspSize) 766 { 767 const BYTE* ip = (const BYTE*) cSrc; 768 769 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); 770 if (HUF_isError(hSize)) return hSize; 771 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 772 ip += hSize; cSrcSize -= hSize; 773 774 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 775 } 776 777 778 size_t HUF_decompress4X1_usingDTable( 779 void* dst, size_t dstSize, 780 const void* cSrc, size_t cSrcSize, 781 const HUF_DTable* DTable) 782 { 783 DTableDesc dtd = HUF_getDTableDesc(DTable); 784 if (dtd.tableType != 0) return ERROR(GENERIC); 785 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 786 } 787 788 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 789 const void* cSrc, size_t cSrcSize, 790 void* workSpace, size_t wkspSize, int bmi2) 791 { 792 const BYTE* ip = (const BYTE*) cSrc; 793 794 size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 795 if (HUF_isError(hSize)) return hSize; 796 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 797 ip += hSize; cSrcSize -= hSize; 798 799 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 800 } 801 802 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 803 const void* cSrc, size_t cSrcSize, 804 void* workSpace, size_t wkspSize) 805 { 806 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); 807 } 808 809 810 #endif /* HUF_FORCE_DECOMPRESS_X2 */ 811 812 813 #ifndef HUF_FORCE_DECOMPRESS_X1 814 815 /* *************************/ 816 /* double-symbols decoding */ 817 /* *************************/ 818 819 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ 820 typedef struct { BYTE symbol; } sortedSymbol_t; 821 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; 822 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; 823 824 /** 825 * Constructs a HUF_DEltX2 in a U32. 826 */ 827 static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level) 828 { 829 U32 seq; 830 DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0); 831 DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2); 832 DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3); 833 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32)); 834 if (MEM_isLittleEndian()) { 835 seq = level == 1 ? symbol : (baseSeq + (symbol << 8)); 836 return seq + (nbBits << 16) + ((U32)level << 24); 837 } else { 838 seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol); 839 return (seq << 16) + (nbBits << 8) + (U32)level; 840 } 841 } 842 843 /** 844 * Constructs a HUF_DEltX2. 845 */ 846 static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level) 847 { 848 HUF_DEltX2 DElt; 849 U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level); 850 DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val)); 851 ZSTD_memcpy(&DElt, &val, sizeof(val)); 852 return DElt; 853 } 854 855 /** 856 * Constructs 2 HUF_DEltX2s and packs them into a U64. 857 */ 858 static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level) 859 { 860 U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level); 861 return (U64)DElt + ((U64)DElt << 32); 862 } 863 864 /** 865 * Fills the DTable rank with all the symbols from [begin, end) that are each 866 * nbBits long. 867 * 868 * @param DTableRank The start of the rank in the DTable. 869 * @param begin The first symbol to fill (inclusive). 870 * @param end The last symbol to fill (exclusive). 871 * @param nbBits Each symbol is nbBits long. 872 * @param tableLog The table log. 873 * @param baseSeq If level == 1 { 0 } else { the first level symbol } 874 * @param level The level in the table. Must be 1 or 2. 875 */ 876 static void HUF_fillDTableX2ForWeight( 877 HUF_DEltX2* DTableRank, 878 sortedSymbol_t const* begin, sortedSymbol_t const* end, 879 U32 nbBits, U32 tableLog, 880 U16 baseSeq, int const level) 881 { 882 U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */); 883 const sortedSymbol_t* ptr; 884 assert(level >= 1 && level <= 2); 885 switch (length) { 886 case 1: 887 for (ptr = begin; ptr != end; ++ptr) { 888 HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level); 889 *DTableRank++ = DElt; 890 } 891 break; 892 case 2: 893 for (ptr = begin; ptr != end; ++ptr) { 894 HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level); 895 DTableRank[0] = DElt; 896 DTableRank[1] = DElt; 897 DTableRank += 2; 898 } 899 break; 900 case 4: 901 for (ptr = begin; ptr != end; ++ptr) { 902 U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); 903 ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); 904 ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); 905 DTableRank += 4; 906 } 907 break; 908 case 8: 909 for (ptr = begin; ptr != end; ++ptr) { 910 U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); 911 ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); 912 ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); 913 ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2)); 914 ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2)); 915 DTableRank += 8; 916 } 917 break; 918 default: 919 for (ptr = begin; ptr != end; ++ptr) { 920 U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); 921 HUF_DEltX2* const DTableRankEnd = DTableRank + length; 922 for (; DTableRank != DTableRankEnd; DTableRank += 8) { 923 ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); 924 ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); 925 ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2)); 926 ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2)); 927 } 928 } 929 break; 930 } 931 } 932 933 /* HUF_fillDTableX2Level2() : 934 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ 935 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits, 936 const U32* rankVal, const int minWeight, const int maxWeight1, 937 const sortedSymbol_t* sortedSymbols, U32 const* rankStart, 938 U32 nbBitsBaseline, U16 baseSeq) 939 { 940 /* Fill skipped values (all positions up to rankVal[minWeight]). 941 * These are positions only get a single symbol because the combined weight 942 * is too large. 943 */ 944 if (minWeight>1) { 945 U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */); 946 U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1); 947 int const skipSize = rankVal[minWeight]; 948 assert(length > 1); 949 assert((U32)skipSize < length); 950 switch (length) { 951 case 2: 952 assert(skipSize == 1); 953 ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2)); 954 break; 955 case 4: 956 assert(skipSize <= 4); 957 ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2)); 958 ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2)); 959 break; 960 default: 961 { 962 int i; 963 for (i = 0; i < skipSize; i += 8) { 964 ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2)); 965 ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2)); 966 ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2)); 967 ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2)); 968 } 969 } 970 } 971 } 972 973 /* Fill each of the second level symbols by weight. */ 974 { 975 int w; 976 for (w = minWeight; w < maxWeight1; ++w) { 977 int const begin = rankStart[w]; 978 int const end = rankStart[w+1]; 979 U32 const nbBits = nbBitsBaseline - w; 980 U32 const totalBits = nbBits + consumedBits; 981 HUF_fillDTableX2ForWeight( 982 DTable + rankVal[w], 983 sortedSymbols + begin, sortedSymbols + end, 984 totalBits, targetLog, 985 baseSeq, /* level */ 2); 986 } 987 } 988 } 989 990 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, 991 const sortedSymbol_t* sortedList, 992 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, 993 const U32 nbBitsBaseline) 994 { 995 U32* const rankVal = rankValOrigin[0]; 996 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ 997 const U32 minBits = nbBitsBaseline - maxWeight; 998 int w; 999 int const wEnd = (int)maxWeight + 1; 1000 1001 /* Fill DTable in order of weight. */ 1002 for (w = 1; w < wEnd; ++w) { 1003 int const begin = (int)rankStart[w]; 1004 int const end = (int)rankStart[w+1]; 1005 U32 const nbBits = nbBitsBaseline - w; 1006 1007 if (targetLog-nbBits >= minBits) { 1008 /* Enough room for a second symbol. */ 1009 int start = rankVal[w]; 1010 U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */); 1011 int minWeight = nbBits + scaleLog; 1012 int s; 1013 if (minWeight < 1) minWeight = 1; 1014 /* Fill the DTable for every symbol of weight w. 1015 * These symbols get at least 1 second symbol. 1016 */ 1017 for (s = begin; s != end; ++s) { 1018 HUF_fillDTableX2Level2( 1019 DTable + start, targetLog, nbBits, 1020 rankValOrigin[nbBits], minWeight, wEnd, 1021 sortedList, rankStart, 1022 nbBitsBaseline, sortedList[s].symbol); 1023 start += length; 1024 } 1025 } else { 1026 /* Only a single symbol. */ 1027 HUF_fillDTableX2ForWeight( 1028 DTable + rankVal[w], 1029 sortedList + begin, sortedList + end, 1030 nbBits, targetLog, 1031 /* baseSeq */ 0, /* level */ 1); 1032 } 1033 } 1034 } 1035 1036 typedef struct { 1037 rankValCol_t rankVal[HUF_TABLELOG_MAX]; 1038 U32 rankStats[HUF_TABLELOG_MAX + 1]; 1039 U32 rankStart0[HUF_TABLELOG_MAX + 3]; 1040 sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; 1041 BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; 1042 U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; 1043 } HUF_ReadDTableX2_Workspace; 1044 1045 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, 1046 const void* src, size_t srcSize, 1047 void* workSpace, size_t wkspSize) 1048 { 1049 return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); 1050 } 1051 1052 size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, 1053 const void* src, size_t srcSize, 1054 void* workSpace, size_t wkspSize, int bmi2) 1055 { 1056 U32 tableLog, maxW, nbSymbols; 1057 DTableDesc dtd = HUF_getDTableDesc(DTable); 1058 U32 maxTableLog = dtd.maxTableLog; 1059 size_t iSize; 1060 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ 1061 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; 1062 U32 *rankStart; 1063 1064 HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace; 1065 1066 if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC); 1067 1068 rankStart = wksp->rankStart0 + 1; 1069 ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats)); 1070 ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0)); 1071 1072 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ 1073 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 1074 /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ 1075 1076 iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2); 1077 if (HUF_isError(iSize)) return iSize; 1078 1079 /* check result */ 1080 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ 1081 if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG; 1082 1083 /* find maxWeight */ 1084 for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ 1085 1086 /* Get start index of each weight */ 1087 { U32 w, nextRankStart = 0; 1088 for (w=1; w<maxW+1; w++) { 1089 U32 curr = nextRankStart; 1090 nextRankStart += wksp->rankStats[w]; 1091 rankStart[w] = curr; 1092 } 1093 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ 1094 rankStart[maxW+1] = nextRankStart; 1095 } 1096 1097 /* sort symbols by weight */ 1098 { U32 s; 1099 for (s=0; s<nbSymbols; s++) { 1100 U32 const w = wksp->weightList[s]; 1101 U32 const r = rankStart[w]++; 1102 wksp->sortedSymbol[r].symbol = (BYTE)s; 1103 } 1104 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ 1105 } 1106 1107 /* Build rankVal */ 1108 { U32* const rankVal0 = wksp->rankVal[0]; 1109 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ 1110 U32 nextRankVal = 0; 1111 U32 w; 1112 for (w=1; w<maxW+1; w++) { 1113 U32 curr = nextRankVal; 1114 nextRankVal += wksp->rankStats[w] << (w+rescale); 1115 rankVal0[w] = curr; 1116 } } 1117 { U32 const minBits = tableLog+1 - maxW; 1118 U32 consumed; 1119 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { 1120 U32* const rankValPtr = wksp->rankVal[consumed]; 1121 U32 w; 1122 for (w = 1; w < maxW+1; w++) { 1123 rankValPtr[w] = rankVal0[w] >> consumed; 1124 } } } } 1125 1126 HUF_fillDTableX2(dt, maxTableLog, 1127 wksp->sortedSymbol, 1128 wksp->rankStart0, wksp->rankVal, maxW, 1129 tableLog+1); 1130 1131 dtd.tableLog = (BYTE)maxTableLog; 1132 dtd.tableType = 1; 1133 ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); 1134 return iSize; 1135 } 1136 1137 1138 FORCE_INLINE_TEMPLATE U32 1139 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) 1140 { 1141 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 1142 ZSTD_memcpy(op, &dt[val].sequence, 2); 1143 BIT_skipBits(DStream, dt[val].nbBits); 1144 return dt[val].length; 1145 } 1146 1147 FORCE_INLINE_TEMPLATE U32 1148 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) 1149 { 1150 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 1151 ZSTD_memcpy(op, &dt[val].sequence, 1); 1152 if (dt[val].length==1) { 1153 BIT_skipBits(DStream, dt[val].nbBits); 1154 } else { 1155 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { 1156 BIT_skipBits(DStream, dt[val].nbBits); 1157 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) 1158 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ 1159 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); 1160 } 1161 } 1162 return 1; 1163 } 1164 1165 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ 1166 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 1167 1168 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 1169 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 1170 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 1171 1172 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 1173 if (MEM_64bits()) \ 1174 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 1175 1176 HINT_INLINE size_t 1177 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, 1178 const HUF_DEltX2* const dt, const U32 dtLog) 1179 { 1180 BYTE* const pStart = p; 1181 1182 /* up to 8 symbols at a time */ 1183 if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) { 1184 if (dtLog <= 11 && MEM_64bits()) { 1185 /* up to 10 symbols at a time */ 1186 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) { 1187 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1188 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1189 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1190 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1191 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1192 } 1193 } else { 1194 /* up to 8 symbols at a time */ 1195 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { 1196 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 1197 HUF_DECODE_SYMBOLX2_1(p, bitDPtr); 1198 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 1199 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1200 } 1201 } 1202 } else { 1203 BIT_reloadDStream(bitDPtr); 1204 } 1205 1206 /* closer to end : up to 2 symbols at a time */ 1207 if ((size_t)(pEnd - p) >= 2) { 1208 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) 1209 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1210 1211 while (p <= pEnd-2) 1212 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ 1213 } 1214 1215 if (p < pEnd) 1216 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); 1217 1218 return p-pStart; 1219 } 1220 1221 FORCE_INLINE_TEMPLATE size_t 1222 HUF_decompress1X2_usingDTable_internal_body( 1223 void* dst, size_t dstSize, 1224 const void* cSrc, size_t cSrcSize, 1225 const HUF_DTable* DTable) 1226 { 1227 BIT_DStream_t bitD; 1228 1229 /* Init */ 1230 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 1231 1232 /* decode */ 1233 { BYTE* const ostart = (BYTE*) dst; 1234 BYTE* const oend = ostart + dstSize; 1235 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ 1236 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 1237 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1238 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); 1239 } 1240 1241 /* check */ 1242 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 1243 1244 /* decoded size */ 1245 return dstSize; 1246 } 1247 FORCE_INLINE_TEMPLATE size_t 1248 HUF_decompress4X2_usingDTable_internal_body( 1249 void* dst, size_t dstSize, 1250 const void* cSrc, size_t cSrcSize, 1251 const HUF_DTable* DTable) 1252 { 1253 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 1254 1255 { const BYTE* const istart = (const BYTE*) cSrc; 1256 BYTE* const ostart = (BYTE*) dst; 1257 BYTE* const oend = ostart + dstSize; 1258 BYTE* const olimit = oend - (sizeof(size_t)-1); 1259 const void* const dtPtr = DTable+1; 1260 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 1261 1262 /* Init */ 1263 BIT_DStream_t bitD1; 1264 BIT_DStream_t bitD2; 1265 BIT_DStream_t bitD3; 1266 BIT_DStream_t bitD4; 1267 size_t const length1 = MEM_readLE16(istart); 1268 size_t const length2 = MEM_readLE16(istart+2); 1269 size_t const length3 = MEM_readLE16(istart+4); 1270 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 1271 const BYTE* const istart1 = istart + 6; /* jumpTable */ 1272 const BYTE* const istart2 = istart1 + length1; 1273 const BYTE* const istart3 = istart2 + length2; 1274 const BYTE* const istart4 = istart3 + length3; 1275 size_t const segmentSize = (dstSize+3) / 4; 1276 BYTE* const opStart2 = ostart + segmentSize; 1277 BYTE* const opStart3 = opStart2 + segmentSize; 1278 BYTE* const opStart4 = opStart3 + segmentSize; 1279 BYTE* op1 = ostart; 1280 BYTE* op2 = opStart2; 1281 BYTE* op3 = opStart3; 1282 BYTE* op4 = opStart4; 1283 U32 endSignal = 1; 1284 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1285 U32 const dtLog = dtd.tableLog; 1286 1287 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 1288 if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ 1289 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 1290 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 1291 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 1292 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 1293 1294 /* 16-32 symbols per loop (4-8 symbols per stream) */ 1295 if ((size_t)(oend - op4) >= sizeof(size_t)) { 1296 for ( ; (endSignal) & (op4 < olimit); ) { 1297 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) 1298 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 1299 HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 1300 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 1301 HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 1302 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 1303 HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 1304 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 1305 HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 1306 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; 1307 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; 1308 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 1309 HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 1310 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 1311 HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 1312 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 1313 HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 1314 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 1315 HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 1316 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; 1317 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; 1318 #else 1319 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 1320 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 1321 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 1322 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 1323 HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 1324 HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 1325 HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 1326 HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 1327 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 1328 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 1329 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 1330 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 1331 HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 1332 HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 1333 HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 1334 HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 1335 endSignal = (U32)LIKELY((U32) 1336 (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) 1337 & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) 1338 & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) 1339 & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); 1340 #endif 1341 } 1342 } 1343 1344 /* check corruption */ 1345 if (op1 > opStart2) return ERROR(corruption_detected); 1346 if (op2 > opStart3) return ERROR(corruption_detected); 1347 if (op3 > opStart4) return ERROR(corruption_detected); 1348 /* note : op4 already verified within main loop */ 1349 1350 /* finish bitStreams one by one */ 1351 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); 1352 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); 1353 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); 1354 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); 1355 1356 /* check */ 1357 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 1358 if (!endCheck) return ERROR(corruption_detected); } 1359 1360 /* decoded size */ 1361 return dstSize; 1362 } 1363 } 1364 1365 #if HUF_NEED_BMI2_FUNCTION 1366 static BMI2_TARGET_ATTRIBUTE 1367 size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc, 1368 size_t cSrcSize, HUF_DTable const* DTable) { 1369 return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); 1370 } 1371 #endif 1372 1373 #if HUF_NEED_DEFAULT_FUNCTION 1374 static 1375 size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, 1376 size_t cSrcSize, HUF_DTable const* DTable) { 1377 return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); 1378 } 1379 #endif 1380 1381 #if ZSTD_ENABLE_ASM_X86_64_BMI2 1382 1383 HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; 1384 1385 static HUF_ASM_X86_64_BMI2_ATTRS size_t 1386 HUF_decompress4X2_usingDTable_internal_bmi2_asm( 1387 void* dst, size_t dstSize, 1388 const void* cSrc, size_t cSrcSize, 1389 const HUF_DTable* DTable) { 1390 void const* dt = DTable + 1; 1391 const BYTE* const iend = (const BYTE*)cSrc + 6; 1392 BYTE* const oend = (BYTE*)dst + dstSize; 1393 HUF_DecompressAsmArgs args; 1394 { 1395 size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); 1396 FORWARD_IF_ERROR(ret, "Failed to init asm args"); 1397 if (ret != 0) 1398 return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); 1399 } 1400 1401 assert(args.ip[0] >= args.ilimit); 1402 HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args); 1403 1404 /* note : op4 already verified within main loop */ 1405 assert(args.ip[0] >= iend); 1406 assert(args.ip[1] >= iend); 1407 assert(args.ip[2] >= iend); 1408 assert(args.ip[3] >= iend); 1409 assert(args.op[3] <= oend); 1410 (void)iend; 1411 1412 /* finish bitStreams one by one */ 1413 { 1414 size_t const segmentSize = (dstSize+3) / 4; 1415 BYTE* segmentEnd = (BYTE*)dst; 1416 int i; 1417 for (i = 0; i < 4; ++i) { 1418 BIT_DStream_t bit; 1419 if (segmentSize <= (size_t)(oend - segmentEnd)) 1420 segmentEnd += segmentSize; 1421 else 1422 segmentEnd = oend; 1423 FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption"); 1424 args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG); 1425 if (args.op[i] != segmentEnd) 1426 return ERROR(corruption_detected); 1427 } 1428 } 1429 1430 /* decoded size */ 1431 return dstSize; 1432 } 1433 #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ 1434 1435 static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, 1436 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) 1437 { 1438 #if DYNAMIC_BMI2 1439 if (bmi2) { 1440 # if ZSTD_ENABLE_ASM_X86_64_BMI2 1441 return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); 1442 # else 1443 return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); 1444 # endif 1445 } 1446 #else 1447 (void)bmi2; 1448 #endif 1449 1450 #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) 1451 return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); 1452 #else 1453 return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); 1454 #endif 1455 } 1456 1457 HUF_DGEN(HUF_decompress1X2_usingDTable_internal) 1458 1459 size_t HUF_decompress1X2_usingDTable( 1460 void* dst, size_t dstSize, 1461 const void* cSrc, size_t cSrcSize, 1462 const HUF_DTable* DTable) 1463 { 1464 DTableDesc dtd = HUF_getDTableDesc(DTable); 1465 if (dtd.tableType != 1) return ERROR(GENERIC); 1466 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1467 } 1468 1469 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 1470 const void* cSrc, size_t cSrcSize, 1471 void* workSpace, size_t wkspSize) 1472 { 1473 const BYTE* ip = (const BYTE*) cSrc; 1474 1475 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, 1476 workSpace, wkspSize); 1477 if (HUF_isError(hSize)) return hSize; 1478 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1479 ip += hSize; cSrcSize -= hSize; 1480 1481 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 1482 } 1483 1484 1485 size_t HUF_decompress4X2_usingDTable( 1486 void* dst, size_t dstSize, 1487 const void* cSrc, size_t cSrcSize, 1488 const HUF_DTable* DTable) 1489 { 1490 DTableDesc dtd = HUF_getDTableDesc(DTable); 1491 if (dtd.tableType != 1) return ERROR(GENERIC); 1492 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1493 } 1494 1495 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 1496 const void* cSrc, size_t cSrcSize, 1497 void* workSpace, size_t wkspSize, int bmi2) 1498 { 1499 const BYTE* ip = (const BYTE*) cSrc; 1500 1501 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, 1502 workSpace, wkspSize); 1503 if (HUF_isError(hSize)) return hSize; 1504 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1505 ip += hSize; cSrcSize -= hSize; 1506 1507 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 1508 } 1509 1510 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 1511 const void* cSrc, size_t cSrcSize, 1512 void* workSpace, size_t wkspSize) 1513 { 1514 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); 1515 } 1516 1517 1518 #endif /* HUF_FORCE_DECOMPRESS_X1 */ 1519 1520 1521 /* ***********************************/ 1522 /* Universal decompression selectors */ 1523 /* ***********************************/ 1524 1525 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, 1526 const void* cSrc, size_t cSrcSize, 1527 const HUF_DTable* DTable) 1528 { 1529 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1530 #if defined(HUF_FORCE_DECOMPRESS_X1) 1531 (void)dtd; 1532 assert(dtd.tableType == 0); 1533 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1534 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1535 (void)dtd; 1536 assert(dtd.tableType == 1); 1537 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1538 #else 1539 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 1540 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1541 #endif 1542 } 1543 1544 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, 1545 const void* cSrc, size_t cSrcSize, 1546 const HUF_DTable* DTable) 1547 { 1548 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1549 #if defined(HUF_FORCE_DECOMPRESS_X1) 1550 (void)dtd; 1551 assert(dtd.tableType == 0); 1552 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1553 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1554 (void)dtd; 1555 assert(dtd.tableType == 1); 1556 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1557 #else 1558 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 1559 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1560 #endif 1561 } 1562 1563 1564 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) 1565 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; 1566 static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] = 1567 { 1568 /* single, double, quad */ 1569 {{0,0}, {1,1}}, /* Q==0 : impossible */ 1570 {{0,0}, {1,1}}, /* Q==1 : impossible */ 1571 {{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */ 1572 {{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */ 1573 {{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */ 1574 {{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */ 1575 {{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */ 1576 {{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */ 1577 {{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */ 1578 {{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */ 1579 {{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */ 1580 {{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */ 1581 {{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */ 1582 {{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */ 1583 {{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */ 1584 {{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */ 1585 }; 1586 #endif 1587 1588 /** HUF_selectDecoder() : 1589 * Tells which decoder is likely to decode faster, 1590 * based on a set of pre-computed metrics. 1591 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . 1592 * Assumption : 0 < dstSize <= 128 KB */ 1593 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) 1594 { 1595 assert(dstSize > 0); 1596 assert(dstSize <= 128*1024); 1597 #if defined(HUF_FORCE_DECOMPRESS_X1) 1598 (void)dstSize; 1599 (void)cSrcSize; 1600 return 0; 1601 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1602 (void)dstSize; 1603 (void)cSrcSize; 1604 return 1; 1605 #else 1606 /* decoder timing evaluation */ 1607 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ 1608 U32 const D256 = (U32)(dstSize >> 8); 1609 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); 1610 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); 1611 DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */ 1612 return DTime1 < DTime0; 1613 } 1614 #endif 1615 } 1616 1617 1618 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, 1619 size_t dstSize, const void* cSrc, 1620 size_t cSrcSize, void* workSpace, 1621 size_t wkspSize) 1622 { 1623 /* validation checks */ 1624 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1625 if (cSrcSize == 0) return ERROR(corruption_detected); 1626 1627 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1628 #if defined(HUF_FORCE_DECOMPRESS_X1) 1629 (void)algoNb; 1630 assert(algoNb == 0); 1631 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1632 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1633 (void)algoNb; 1634 assert(algoNb == 1); 1635 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1636 #else 1637 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1638 cSrcSize, workSpace, wkspSize): 1639 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1640 #endif 1641 } 1642 } 1643 1644 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 1645 const void* cSrc, size_t cSrcSize, 1646 void* workSpace, size_t wkspSize) 1647 { 1648 /* validation checks */ 1649 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1650 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1651 if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1652 if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1653 1654 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1655 #if defined(HUF_FORCE_DECOMPRESS_X1) 1656 (void)algoNb; 1657 assert(algoNb == 0); 1658 return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, 1659 cSrcSize, workSpace, wkspSize); 1660 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1661 (void)algoNb; 1662 assert(algoNb == 1); 1663 return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1664 cSrcSize, workSpace, wkspSize); 1665 #else 1666 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1667 cSrcSize, workSpace, wkspSize): 1668 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, 1669 cSrcSize, workSpace, wkspSize); 1670 #endif 1671 } 1672 } 1673 1674 1675 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1676 { 1677 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1678 #if defined(HUF_FORCE_DECOMPRESS_X1) 1679 (void)dtd; 1680 assert(dtd.tableType == 0); 1681 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1682 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1683 (void)dtd; 1684 assert(dtd.tableType == 1); 1685 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1686 #else 1687 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1688 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1689 #endif 1690 } 1691 1692 #ifndef HUF_FORCE_DECOMPRESS_X2 1693 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1694 { 1695 const BYTE* ip = (const BYTE*) cSrc; 1696 1697 size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1698 if (HUF_isError(hSize)) return hSize; 1699 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1700 ip += hSize; cSrcSize -= hSize; 1701 1702 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 1703 } 1704 #endif 1705 1706 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1707 { 1708 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1709 #if defined(HUF_FORCE_DECOMPRESS_X1) 1710 (void)dtd; 1711 assert(dtd.tableType == 0); 1712 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1713 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1714 (void)dtd; 1715 assert(dtd.tableType == 1); 1716 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1717 #else 1718 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1719 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1720 #endif 1721 } 1722 1723 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1724 { 1725 /* validation checks */ 1726 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1727 if (cSrcSize == 0) return ERROR(corruption_detected); 1728 1729 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1730 #if defined(HUF_FORCE_DECOMPRESS_X1) 1731 (void)algoNb; 1732 assert(algoNb == 0); 1733 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1734 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1735 (void)algoNb; 1736 assert(algoNb == 1); 1737 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1738 #else 1739 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : 1740 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1741 #endif 1742 } 1743 } 1744 1745 #ifndef ZSTD_NO_UNUSED_FUNCTIONS 1746 #ifndef HUF_FORCE_DECOMPRESS_X2 1747 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) 1748 { 1749 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1750 return HUF_readDTableX1_wksp(DTable, src, srcSize, 1751 workSpace, sizeof(workSpace)); 1752 } 1753 1754 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, 1755 const void* cSrc, size_t cSrcSize) 1756 { 1757 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1758 return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, 1759 workSpace, sizeof(workSpace)); 1760 } 1761 1762 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1763 { 1764 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); 1765 return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); 1766 } 1767 #endif 1768 1769 #ifndef HUF_FORCE_DECOMPRESS_X1 1770 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) 1771 { 1772 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1773 return HUF_readDTableX2_wksp(DTable, src, srcSize, 1774 workSpace, sizeof(workSpace)); 1775 } 1776 1777 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, 1778 const void* cSrc, size_t cSrcSize) 1779 { 1780 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1781 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, 1782 workSpace, sizeof(workSpace)); 1783 } 1784 1785 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1786 { 1787 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); 1788 return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 1789 } 1790 #endif 1791 1792 #ifndef HUF_FORCE_DECOMPRESS_X2 1793 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1794 { 1795 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1796 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1797 workSpace, sizeof(workSpace)); 1798 } 1799 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1800 { 1801 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); 1802 return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 1803 } 1804 #endif 1805 1806 #ifndef HUF_FORCE_DECOMPRESS_X1 1807 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, 1808 const void* cSrc, size_t cSrcSize) 1809 { 1810 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1811 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1812 workSpace, sizeof(workSpace)); 1813 } 1814 1815 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1816 { 1817 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); 1818 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 1819 } 1820 #endif 1821 1822 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); 1823 1824 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1825 { 1826 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) 1827 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; 1828 #endif 1829 1830 /* validation checks */ 1831 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1832 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1833 if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1834 if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1835 1836 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1837 #if defined(HUF_FORCE_DECOMPRESS_X1) 1838 (void)algoNb; 1839 assert(algoNb == 0); 1840 return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); 1841 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1842 (void)algoNb; 1843 assert(algoNb == 1); 1844 return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); 1845 #else 1846 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); 1847 #endif 1848 } 1849 } 1850 1851 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1852 { 1853 /* validation checks */ 1854 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1855 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1856 if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1857 if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1858 1859 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1860 #if defined(HUF_FORCE_DECOMPRESS_X1) 1861 (void)algoNb; 1862 assert(algoNb == 0); 1863 return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); 1864 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1865 (void)algoNb; 1866 assert(algoNb == 1); 1867 return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); 1868 #else 1869 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : 1870 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; 1871 #endif 1872 } 1873 } 1874 1875 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1876 { 1877 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1878 return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1879 workSpace, sizeof(workSpace)); 1880 } 1881 1882 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, 1883 const void* cSrc, size_t cSrcSize) 1884 { 1885 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1886 return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1887 workSpace, sizeof(workSpace)); 1888 } 1889 #endif 1890