1 /* ****************************************************************** 2 FSE : Finite State Entropy encoder 3 Copyright (C) 2013-present, Yann Collet. 4 5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are 9 met: 10 11 * Redistributions of source code must retain the above copyright 12 notice, this list of conditions and the following disclaimer. 13 * Redistributions in binary form must reproduce the above 14 copyright notice, this list of conditions and the following disclaimer 15 in the documentation and/or other materials provided with the 16 distribution. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 You can contact the author at : 31 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 32 - Public forum : https://groups.google.com/forum/#!forum/lz4c 33 ****************************************************************** */ 34 35 /* ************************************************************** 36 * Includes 37 ****************************************************************/ 38 #include <stdlib.h> /* malloc, free, qsort */ 39 #include <string.h> /* memcpy, memset */ 40 #include "compiler.h" 41 #include "mem.h" /* U32, U16, etc. */ 42 #include "debug.h" /* assert, DEBUGLOG */ 43 #include "hist.h" /* HIST_count_wksp */ 44 #include "bitstream.h" 45 #define FSE_STATIC_LINKING_ONLY 46 #include "fse.h" 47 #include "error_private.h" 48 49 50 /* ************************************************************** 51 * Error Management 52 ****************************************************************/ 53 #define FSE_isError ERR_isError 54 55 56 /* ************************************************************** 57 * Templates 58 ****************************************************************/ 59 /* 60 designed to be included 61 for type-specific functions (template emulation in C) 62 Objective is to write these functions only once, for improved maintenance 63 */ 64 65 /* safety checks */ 66 #ifndef FSE_FUNCTION_EXTENSION 67 # error "FSE_FUNCTION_EXTENSION must be defined" 68 #endif 69 #ifndef FSE_FUNCTION_TYPE 70 # error "FSE_FUNCTION_TYPE must be defined" 71 #endif 72 73 /* Function names */ 74 #define FSE_CAT(X,Y) X##Y 75 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) 76 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) 77 78 79 /* Function templates */ 80 81 /* FSE_buildCTable_wksp() : 82 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). 83 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)` 84 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements 85 */ 86 size_t FSE_buildCTable_wksp(FSE_CTable* ct, 87 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, 88 void* workSpace, size_t wkspSize) 89 { 90 U32 const tableSize = 1 << tableLog; 91 U32 const tableMask = tableSize - 1; 92 void* const ptr = ct; 93 U16* const tableU16 = ( (U16*) ptr) + 2; 94 void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; 95 FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); 96 U32 const step = FSE_TABLESTEP(tableSize); 97 U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; 98 99 FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; 100 U32 highThreshold = tableSize-1; 101 102 /* CTable header */ 103 if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); 104 tableU16[-2] = (U16) tableLog; 105 tableU16[-1] = (U16) maxSymbolValue; 106 assert(tableLog < 16); /* required for threshold strategy to work */ 107 108 /* For explanations on how to distribute symbol values over the table : 109 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ 110 111 #ifdef __clang_analyzer__ 112 memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ 113 #endif 114 115 /* symbol start positions */ 116 { U32 u; 117 cumul[0] = 0; 118 for (u=1; u<=maxSymbolValue+1; u++) { 119 if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ 120 cumul[u] = cumul[u-1] + 1; 121 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); 122 } else { 123 cumul[u] = cumul[u-1] + normalizedCounter[u-1]; 124 } } 125 cumul[maxSymbolValue+1] = tableSize+1; 126 } 127 128 /* Spread symbols */ 129 { U32 position = 0; 130 U32 symbol; 131 for (symbol=0; symbol<=maxSymbolValue; symbol++) { 132 int nbOccurences; 133 int const freq = normalizedCounter[symbol]; 134 for (nbOccurences=0; nbOccurences<freq; nbOccurences++) { 135 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; 136 position = (position + step) & tableMask; 137 while (position > highThreshold) 138 position = (position + step) & tableMask; /* Low proba area */ 139 } } 140 141 assert(position==0); /* Must have initialized all positions */ 142 } 143 144 /* Build table */ 145 { U32 u; for (u=0; u<tableSize; u++) { 146 FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */ 147 tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */ 148 } } 149 150 /* Build Symbol Transformation Table */ 151 { unsigned total = 0; 152 unsigned s; 153 for (s=0; s<=maxSymbolValue; s++) { 154 switch (normalizedCounter[s]) 155 { 156 case 0: 157 /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */ 158 symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog); 159 break; 160 161 case -1: 162 case 1: 163 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog); 164 symbolTT[s].deltaFindState = total - 1; 165 total ++; 166 break; 167 default : 168 { 169 U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1); 170 U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; 171 symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; 172 symbolTT[s].deltaFindState = total - normalizedCounter[s]; 173 total += normalizedCounter[s]; 174 } } } } 175 176 #if 0 /* debug : symbol costs */ 177 DEBUGLOG(5, "\n --- table statistics : "); 178 { U32 symbol; 179 for (symbol=0; symbol<=maxSymbolValue; symbol++) { 180 DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f", 181 symbol, normalizedCounter[symbol], 182 FSE_getMaxNbBits(symbolTT, symbol), 183 (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256); 184 } 185 } 186 #endif 187 188 return 0; 189 } 190 191 192 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 193 { 194 FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ 195 return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol)); 196 } 197 198 199 200 #ifndef FSE_COMMONDEFS_ONLY 201 202 203 /*-************************************************************** 204 * FSE NCount encoding 205 ****************************************************************/ 206 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) 207 { 208 size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; 209 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ 210 } 211 212 static size_t 213 FSE_writeNCount_generic (void* header, size_t headerBufferSize, 214 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, 215 unsigned writeIsSafe) 216 { 217 BYTE* const ostart = (BYTE*) header; 218 BYTE* out = ostart; 219 BYTE* const oend = ostart + headerBufferSize; 220 int nbBits; 221 const int tableSize = 1 << tableLog; 222 int remaining; 223 int threshold; 224 U32 bitStream = 0; 225 int bitCount = 0; 226 unsigned symbol = 0; 227 unsigned const alphabetSize = maxSymbolValue + 1; 228 int previousIs0 = 0; 229 230 /* Table Size */ 231 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; 232 bitCount += 4; 233 234 /* Init */ 235 remaining = tableSize+1; /* +1 for extra accuracy */ 236 threshold = tableSize; 237 nbBits = tableLog+1; 238 239 while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ 240 if (previousIs0) { 241 unsigned start = symbol; 242 while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; 243 if (symbol == alphabetSize) break; /* incorrect distribution */ 244 while (symbol >= start+24) { 245 start+=24; 246 bitStream += 0xFFFFU << bitCount; 247 if ((!writeIsSafe) && (out > oend-2)) 248 return ERROR(dstSize_tooSmall); /* Buffer overflow */ 249 out[0] = (BYTE) bitStream; 250 out[1] = (BYTE)(bitStream>>8); 251 out+=2; 252 bitStream>>=16; 253 } 254 while (symbol >= start+3) { 255 start+=3; 256 bitStream += 3 << bitCount; 257 bitCount += 2; 258 } 259 bitStream += (symbol-start) << bitCount; 260 bitCount += 2; 261 if (bitCount>16) { 262 if ((!writeIsSafe) && (out > oend - 2)) 263 return ERROR(dstSize_tooSmall); /* Buffer overflow */ 264 out[0] = (BYTE)bitStream; 265 out[1] = (BYTE)(bitStream>>8); 266 out += 2; 267 bitStream >>= 16; 268 bitCount -= 16; 269 } } 270 { int count = normalizedCounter[symbol++]; 271 int const max = (2*threshold-1) - remaining; 272 remaining -= count < 0 ? -count : count; 273 count++; /* +1 for extra accuracy */ 274 if (count>=threshold) 275 count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ 276 bitStream += count << bitCount; 277 bitCount += nbBits; 278 bitCount -= (count<max); 279 previousIs0 = (count==1); 280 if (remaining<1) return ERROR(GENERIC); 281 while (remaining<threshold) { nbBits--; threshold>>=1; } 282 } 283 if (bitCount>16) { 284 if ((!writeIsSafe) && (out > oend - 2)) 285 return ERROR(dstSize_tooSmall); /* Buffer overflow */ 286 out[0] = (BYTE)bitStream; 287 out[1] = (BYTE)(bitStream>>8); 288 out += 2; 289 bitStream >>= 16; 290 bitCount -= 16; 291 } } 292 293 if (remaining != 1) 294 return ERROR(GENERIC); /* incorrect normalized distribution */ 295 assert(symbol <= alphabetSize); 296 297 /* flush remaining bitStream */ 298 if ((!writeIsSafe) && (out > oend - 2)) 299 return ERROR(dstSize_tooSmall); /* Buffer overflow */ 300 out[0] = (BYTE)bitStream; 301 out[1] = (BYTE)(bitStream>>8); 302 out+= (bitCount+7) /8; 303 304 return (out-ostart); 305 } 306 307 308 size_t FSE_writeNCount (void* buffer, size_t bufferSize, 309 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 310 { 311 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ 312 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ 313 314 if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) 315 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); 316 317 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); 318 } 319 320 321 /*-************************************************************** 322 * FSE Compression Code 323 ****************************************************************/ 324 325 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) 326 { 327 size_t size; 328 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; 329 size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); 330 return (FSE_CTable*)malloc(size); 331 } 332 333 void FSE_freeCTable (FSE_CTable* ct) { free(ct); } 334 335 /* provides the minimum logSize to safely represent a distribution */ 336 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) 337 { 338 U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; 339 U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; 340 U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; 341 assert(srcSize > 1); /* Not supported, RLE should be used instead */ 342 return minBits; 343 } 344 345 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) 346 { 347 U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; 348 U32 tableLog = maxTableLog; 349 U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); 350 assert(srcSize > 1); /* Not supported, RLE should be used instead */ 351 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; 352 if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ 353 if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ 354 if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; 355 if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; 356 return tableLog; 357 } 358 359 unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) 360 { 361 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); 362 } 363 364 365 /* Secondary normalization method. 366 To be used when primary method fails. */ 367 368 static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) 369 { 370 short const NOT_YET_ASSIGNED = -2; 371 U32 s; 372 U32 distributed = 0; 373 U32 ToDistribute; 374 375 /* Init */ 376 U32 const lowThreshold = (U32)(total >> tableLog); 377 U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); 378 379 for (s=0; s<=maxSymbolValue; s++) { 380 if (count[s] == 0) { 381 norm[s]=0; 382 continue; 383 } 384 if (count[s] <= lowThreshold) { 385 norm[s] = -1; 386 distributed++; 387 total -= count[s]; 388 continue; 389 } 390 if (count[s] <= lowOne) { 391 norm[s] = 1; 392 distributed++; 393 total -= count[s]; 394 continue; 395 } 396 397 norm[s]=NOT_YET_ASSIGNED; 398 } 399 ToDistribute = (1 << tableLog) - distributed; 400 401 if (ToDistribute == 0) 402 return 0; 403 404 if ((total / ToDistribute) > lowOne) { 405 /* risk of rounding to zero */ 406 lowOne = (U32)((total * 3) / (ToDistribute * 2)); 407 for (s=0; s<=maxSymbolValue; s++) { 408 if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { 409 norm[s] = 1; 410 distributed++; 411 total -= count[s]; 412 continue; 413 } } 414 ToDistribute = (1 << tableLog) - distributed; 415 } 416 417 if (distributed == maxSymbolValue+1) { 418 /* all values are pretty poor; 419 probably incompressible data (should have already been detected); 420 find max, then give all remaining points to max */ 421 U32 maxV = 0, maxC = 0; 422 for (s=0; s<=maxSymbolValue; s++) 423 if (count[s] > maxC) { maxV=s; maxC=count[s]; } 424 norm[maxV] += (short)ToDistribute; 425 return 0; 426 } 427 428 if (total == 0) { 429 /* all of the symbols were low enough for the lowOne or lowThreshold */ 430 for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) 431 if (norm[s] > 0) { ToDistribute--; norm[s]++; } 432 return 0; 433 } 434 435 { U64 const vStepLog = 62 - tableLog; 436 U64 const mid = (1ULL << (vStepLog-1)) - 1; 437 U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */ 438 U64 tmpTotal = mid; 439 for (s=0; s<=maxSymbolValue; s++) { 440 if (norm[s]==NOT_YET_ASSIGNED) { 441 U64 const end = tmpTotal + (count[s] * rStep); 442 U32 const sStart = (U32)(tmpTotal >> vStepLog); 443 U32 const sEnd = (U32)(end >> vStepLog); 444 U32 const weight = sEnd - sStart; 445 if (weight < 1) 446 return ERROR(GENERIC); 447 norm[s] = (short)weight; 448 tmpTotal = end; 449 } } } 450 451 return 0; 452 } 453 454 455 size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, 456 const unsigned* count, size_t total, 457 unsigned maxSymbolValue) 458 { 459 /* Sanity checks */ 460 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; 461 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ 462 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ 463 if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ 464 465 { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; 466 U64 const scale = 62 - tableLog; 467 U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ 468 U64 const vStep = 1ULL<<(scale-20); 469 int stillToDistribute = 1<<tableLog; 470 unsigned s; 471 unsigned largest=0; 472 short largestP=0; 473 U32 lowThreshold = (U32)(total >> tableLog); 474 475 for (s=0; s<=maxSymbolValue; s++) { 476 if (count[s] == total) return 0; /* rle special case */ 477 if (count[s] == 0) { normalizedCounter[s]=0; continue; } 478 if (count[s] <= lowThreshold) { 479 normalizedCounter[s] = -1; 480 stillToDistribute--; 481 } else { 482 short proba = (short)((count[s]*step) >> scale); 483 if (proba<8) { 484 U64 restToBeat = vStep * rtbTable[proba]; 485 proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat; 486 } 487 if (proba > largestP) { largestP=proba; largest=s; } 488 normalizedCounter[s] = proba; 489 stillToDistribute -= proba; 490 } } 491 if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { 492 /* corner case, need another normalization method */ 493 size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); 494 if (FSE_isError(errorCode)) return errorCode; 495 } 496 else normalizedCounter[largest] += (short)stillToDistribute; 497 } 498 499 #if 0 500 { /* Print Table (debug) */ 501 U32 s; 502 U32 nTotal = 0; 503 for (s=0; s<=maxSymbolValue; s++) 504 RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); 505 for (s=0; s<=maxSymbolValue; s++) 506 nTotal += abs(normalizedCounter[s]); 507 if (nTotal != (1U<<tableLog)) 508 RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog); 509 getchar(); 510 } 511 #endif 512 513 return tableLog; 514 } 515 516 517 /* fake FSE_CTable, for raw (uncompressed) input */ 518 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) 519 { 520 const unsigned tableSize = 1 << nbBits; 521 const unsigned tableMask = tableSize - 1; 522 const unsigned maxSymbolValue = tableMask; 523 void* const ptr = ct; 524 U16* const tableU16 = ( (U16*) ptr) + 2; 525 void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */ 526 FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); 527 unsigned s; 528 529 /* Sanity checks */ 530 if (nbBits < 1) return ERROR(GENERIC); /* min size */ 531 532 /* header */ 533 tableU16[-2] = (U16) nbBits; 534 tableU16[-1] = (U16) maxSymbolValue; 535 536 /* Build table */ 537 for (s=0; s<tableSize; s++) 538 tableU16[s] = (U16)(tableSize + s); 539 540 /* Build Symbol Transformation Table */ 541 { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); 542 for (s=0; s<=maxSymbolValue; s++) { 543 symbolTT[s].deltaNbBits = deltaNbBits; 544 symbolTT[s].deltaFindState = s-1; 545 } } 546 547 return 0; 548 } 549 550 /* fake FSE_CTable, for rle input (always same symbol) */ 551 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) 552 { 553 void* ptr = ct; 554 U16* tableU16 = ( (U16*) ptr) + 2; 555 void* FSCTptr = (U32*)ptr + 2; 556 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr; 557 558 /* header */ 559 tableU16[-2] = (U16) 0; 560 tableU16[-1] = (U16) symbolValue; 561 562 /* Build table */ 563 tableU16[0] = 0; 564 tableU16[1] = 0; /* just in case */ 565 566 /* Build Symbol Transformation Table */ 567 symbolTT[symbolValue].deltaNbBits = 0; 568 symbolTT[symbolValue].deltaFindState = 0; 569 570 return 0; 571 } 572 573 574 static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, 575 const void* src, size_t srcSize, 576 const FSE_CTable* ct, const unsigned fast) 577 { 578 const BYTE* const istart = (const BYTE*) src; 579 const BYTE* const iend = istart + srcSize; 580 const BYTE* ip=iend; 581 582 BIT_CStream_t bitC; 583 FSE_CState_t CState1, CState2; 584 585 /* init */ 586 if (srcSize <= 2) return 0; 587 { size_t const initError = BIT_initCStream(&bitC, dst, dstSize); 588 if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ } 589 590 #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) 591 592 if (srcSize & 1) { 593 FSE_initCState2(&CState1, ct, *--ip); 594 FSE_initCState2(&CState2, ct, *--ip); 595 FSE_encodeSymbol(&bitC, &CState1, *--ip); 596 FSE_FLUSHBITS(&bitC); 597 } else { 598 FSE_initCState2(&CState2, ct, *--ip); 599 FSE_initCState2(&CState1, ct, *--ip); 600 } 601 602 /* join to mod 4 */ 603 srcSize -= 2; 604 if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ 605 FSE_encodeSymbol(&bitC, &CState2, *--ip); 606 FSE_encodeSymbol(&bitC, &CState1, *--ip); 607 FSE_FLUSHBITS(&bitC); 608 } 609 610 /* 2 or 4 encoding per loop */ 611 while ( ip>istart ) { 612 613 FSE_encodeSymbol(&bitC, &CState2, *--ip); 614 615 if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ 616 FSE_FLUSHBITS(&bitC); 617 618 FSE_encodeSymbol(&bitC, &CState1, *--ip); 619 620 if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ 621 FSE_encodeSymbol(&bitC, &CState2, *--ip); 622 FSE_encodeSymbol(&bitC, &CState1, *--ip); 623 } 624 625 FSE_FLUSHBITS(&bitC); 626 } 627 628 FSE_flushCState(&bitC, &CState2); 629 FSE_flushCState(&bitC, &CState1); 630 return BIT_closeCStream(&bitC); 631 } 632 633 size_t FSE_compress_usingCTable (void* dst, size_t dstSize, 634 const void* src, size_t srcSize, 635 const FSE_CTable* ct) 636 { 637 unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); 638 639 if (fast) 640 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); 641 else 642 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); 643 } 644 645 646 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } 647 648 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e 649 #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } 650 651 /* FSE_compress_wksp() : 652 * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). 653 * `wkspSize` size must be `(1<<tableLog)`. 654 */ 655 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) 656 { 657 BYTE* const ostart = (BYTE*) dst; 658 BYTE* op = ostart; 659 BYTE* const oend = ostart + dstSize; 660 661 U32 count[FSE_MAX_SYMBOL_VALUE+1]; 662 S16 norm[FSE_MAX_SYMBOL_VALUE+1]; 663 FSE_CTable* CTable = (FSE_CTable*)workSpace; 664 size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue); 665 void* scratchBuffer = (void*)(CTable + CTableSize); 666 size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable)); 667 668 /* init conditions */ 669 if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge); 670 if (srcSize <= 1) return 0; /* Not compressible */ 671 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; 672 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; 673 674 /* Scan input and build symbol stats */ 675 { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) ); 676 if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */ 677 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ 678 if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ 679 } 680 681 tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); 682 CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); 683 684 /* Write table description header */ 685 { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); 686 op += nc_err; 687 } 688 689 /* Compress */ 690 CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); 691 { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); 692 if (cSize == 0) return 0; /* not enough space for compressed data */ 693 op += cSize; 694 } 695 696 /* check compressibility */ 697 if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; 698 699 return op-ostart; 700 } 701 702 typedef struct { 703 FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; 704 BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; 705 } fseWkspMax_t; 706 707 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) 708 { 709 fseWkspMax_t scratchBuffer; 710 DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ 711 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); 712 return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); 713 } 714 715 size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 716 { 717 return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); 718 } 719 720 721 #endif /* FSE_COMMONDEFS_ONLY */ 722