xref: /freebsd/sys/contrib/openzfs/module/zstd/lib/compress/fse_compress.c (revision e64fe029e9d3ce476e77a478318e0c3cd201ff08)
1 /* ******************************************************************
2  * FSE : Finite State Entropy encoder
3  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4  *
5  *  You can contact the author at :
6  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
7  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
8  *
9  * This source code is licensed under both the BSD-style license (found in the
10  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11  * in the COPYING file in the root directory of this source tree).
12  * You may select, at your option, one of the above-listed licenses.
13 ****************************************************************** */
14 
15 /* **************************************************************
16 *  Includes
17 ****************************************************************/
18 #include <stdlib.h>     /* malloc, free, qsort */
19 #include <string.h>     /* memcpy, memset */
20 #include "../common/compiler.h"
21 #include "../common/mem.h"        /* U32, U16, etc. */
22 #include "../common/debug.h"      /* assert, DEBUGLOG */
23 #include "hist.h"       /* HIST_count_wksp */
24 #include "../common/bitstream.h"
25 #define FSE_STATIC_LINKING_ONLY
26 #include "../common/fse.h"
27 #include "../common/error_private.h"
28 
29 
30 /* **************************************************************
31 *  Error Management
32 ****************************************************************/
33 #define FSE_isError ERR_isError
34 
35 
36 /* **************************************************************
37 *  Templates
38 ****************************************************************/
39 /*
40   designed to be included
41   for type-specific functions (template emulation in C)
42   Objective is to write these functions only once, for improved maintenance
43 */
44 
45 /* safety checks */
46 #ifndef FSE_FUNCTION_EXTENSION
47 #  error "FSE_FUNCTION_EXTENSION must be defined"
48 #endif
49 #ifndef FSE_FUNCTION_TYPE
50 #  error "FSE_FUNCTION_TYPE must be defined"
51 #endif
52 
53 /* Function names */
54 #define FSE_CAT(X,Y) X##Y
55 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
56 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
57 
58 
59 /* Function templates */
60 
61 /* FSE_buildCTable_wksp() :
62  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
63  * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
64  * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
65  */
66 size_t FSE_buildCTable_wksp(FSE_CTable* ct,
67                       const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
68                             void* workSpace, size_t wkspSize)
69 {
70     U32 const tableSize = 1 << tableLog;
71     U32 const tableMask = tableSize - 1;
72     void* const ptr = ct;
73     U16* const tableU16 = ( (U16*) ptr) + 2;
74     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
75     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
76     U32 const step = FSE_TABLESTEP(tableSize);
77     U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
78 
79     FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
80     U32 highThreshold = tableSize-1;
81 
82     /* CTable header */
83     if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
84     tableU16[-2] = (U16) tableLog;
85     tableU16[-1] = (U16) maxSymbolValue;
86     assert(tableLog < 16);   /* required for threshold strategy to work */
87 
88     /* For explanations on how to distribute symbol values over the table :
89      * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
90 
91      #ifdef __clang_analyzer__
92      memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
93      #endif
94 
95     /* symbol start positions */
96     {   U32 u;
97         cumul[0] = 0;
98         for (u=1; u <= maxSymbolValue+1; u++) {
99             if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
100                 cumul[u] = cumul[u-1] + 1;
101                 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
102             } else {
103                 cumul[u] = cumul[u-1] + normalizedCounter[u-1];
104         }   }
105         cumul[maxSymbolValue+1] = tableSize+1;
106     }
107 
108     /* Spread symbols */
109     {   U32 position = 0;
110         U32 symbol;
111         for (symbol=0; symbol<=maxSymbolValue; symbol++) {
112             int nbOccurrences;
113             int const freq = normalizedCounter[symbol];
114             for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
115                 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
116                 position = (position + step) & tableMask;
117                 while (position > highThreshold)
118                     position = (position + step) & tableMask;   /* Low proba area */
119         }   }
120 
121         assert(position==0);  /* Must have initialized all positions */
122     }
123 
124     /* Build table */
125     {   U32 u; for (u=0; u<tableSize; u++) {
126         FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
127         tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
128     }   }
129 
130     /* Build Symbol Transformation Table */
131     {   unsigned total = 0;
132         unsigned s;
133         for (s=0; s<=maxSymbolValue; s++) {
134             switch (normalizedCounter[s])
135             {
136             case  0:
137                 /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
138                 symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
139                 break;
140 
141             case -1:
142             case  1:
143                 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
144                 symbolTT[s].deltaFindState = total - 1;
145                 total ++;
146                 break;
147             default :
148                 {
149                     U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
150                     U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
151                     symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
152                     symbolTT[s].deltaFindState = total - normalizedCounter[s];
153                     total +=  normalizedCounter[s];
154     }   }   }   }
155 
156 #if 0  /* debug : symbol costs */
157     DEBUGLOG(5, "\n --- table statistics : ");
158     {   U32 symbol;
159         for (symbol=0; symbol<=maxSymbolValue; symbol++) {
160             DEBUGLOG(5, "%3u: w=%3i,   maxBits=%u, fracBits=%.2f",
161                 symbol, normalizedCounter[symbol],
162                 FSE_getMaxNbBits(symbolTT, symbol),
163                 (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
164         }
165     }
166 #endif
167 
168     return 0;
169 }
170 
171 
172 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
173 {
174     FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];   /* memset() is not necessary, even if static analyzer complain about it */
175     return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
176 }
177 
178 
179 
180 #ifndef FSE_COMMONDEFS_ONLY
181 
182 
183 /*-**************************************************************
184 *  FSE NCount encoding
185 ****************************************************************/
186 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
187 {
188     size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
189     return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
190 }
191 
192 static size_t
193 FSE_writeNCount_generic (void* header, size_t headerBufferSize,
194                    const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
195                          unsigned writeIsSafe)
196 {
197     BYTE* const ostart = (BYTE*) header;
198     BYTE* out = ostart;
199     BYTE* const oend = ostart + headerBufferSize;
200     int nbBits;
201     const int tableSize = 1 << tableLog;
202     int remaining;
203     int threshold;
204     U32 bitStream = 0;
205     int bitCount = 0;
206     unsigned symbol = 0;
207     unsigned const alphabetSize = maxSymbolValue + 1;
208     int previousIs0 = 0;
209 
210     /* Table Size */
211     bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
212     bitCount  += 4;
213 
214     /* Init */
215     remaining = tableSize+1;   /* +1 for extra accuracy */
216     threshold = tableSize;
217     nbBits = tableLog+1;
218 
219     while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
220         if (previousIs0) {
221             unsigned start = symbol;
222             while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
223             if (symbol == alphabetSize) break;   /* incorrect distribution */
224             while (symbol >= start+24) {
225                 start+=24;
226                 bitStream += 0xFFFFU << bitCount;
227                 if ((!writeIsSafe) && (out > oend-2))
228                     return ERROR(dstSize_tooSmall);   /* Buffer overflow */
229                 out[0] = (BYTE) bitStream;
230                 out[1] = (BYTE)(bitStream>>8);
231                 out+=2;
232                 bitStream>>=16;
233             }
234             while (symbol >= start+3) {
235                 start+=3;
236                 bitStream += 3 << bitCount;
237                 bitCount += 2;
238             }
239             bitStream += (symbol-start) << bitCount;
240             bitCount += 2;
241             if (bitCount>16) {
242                 if ((!writeIsSafe) && (out > oend - 2))
243                     return ERROR(dstSize_tooSmall);   /* Buffer overflow */
244                 out[0] = (BYTE)bitStream;
245                 out[1] = (BYTE)(bitStream>>8);
246                 out += 2;
247                 bitStream >>= 16;
248                 bitCount -= 16;
249         }   }
250         {   int count = normalizedCounter[symbol++];
251             int const max = (2*threshold-1) - remaining;
252             remaining -= count < 0 ? -count : count;
253             count++;   /* +1 for extra accuracy */
254             if (count>=threshold)
255                 count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
256             bitStream += count << bitCount;
257             bitCount  += nbBits;
258             bitCount  -= (count<max);
259             previousIs0  = (count==1);
260             if (remaining<1) return ERROR(GENERIC);
261             while (remaining<threshold) { nbBits--; threshold>>=1; }
262         }
263         if (bitCount>16) {
264             if ((!writeIsSafe) && (out > oend - 2))
265                 return ERROR(dstSize_tooSmall);   /* Buffer overflow */
266             out[0] = (BYTE)bitStream;
267             out[1] = (BYTE)(bitStream>>8);
268             out += 2;
269             bitStream >>= 16;
270             bitCount -= 16;
271     }   }
272 
273     if (remaining != 1)
274         return ERROR(GENERIC);  /* incorrect normalized distribution */
275     assert(symbol <= alphabetSize);
276 
277     /* flush remaining bitStream */
278     if ((!writeIsSafe) && (out > oend - 2))
279         return ERROR(dstSize_tooSmall);   /* Buffer overflow */
280     out[0] = (BYTE)bitStream;
281     out[1] = (BYTE)(bitStream>>8);
282     out+= (bitCount+7) /8;
283 
284     return (out-ostart);
285 }
286 
287 
288 size_t FSE_writeNCount (void* buffer, size_t bufferSize,
289                   const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
290 {
291     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
292     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
293 
294     if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
295         return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
296 
297     return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
298 }
299 
300 
301 /*-**************************************************************
302 *  FSE Compression Code
303 ****************************************************************/
304 
305 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
306 {
307     size_t size __attribute__ ((unused));
308     if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
309     size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
310     return (FSE_CTable*)malloc(size);
311 }
312 
313 void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
314 
315 /* provides the minimum logSize to safely represent a distribution */
316 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
317 {
318     U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
319     U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
320     U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
321     assert(srcSize > 1); /* Not supported, RLE should be used instead */
322     return minBits;
323 }
324 
325 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
326 {
327     U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
328     U32 tableLog = maxTableLog;
329     U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
330     assert(srcSize > 1); /* Not supported, RLE should be used instead */
331     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
332     if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
333     if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
334     if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
335     if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
336     return tableLog;
337 }
338 
339 unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
340 {
341     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
342 }
343 
344 
345 /* Secondary normalization method.
346    To be used when primary method fails. */
347 
348 static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
349 {
350     short const NOT_YET_ASSIGNED = -2;
351     U32 s;
352     U32 distributed = 0;
353     U32 ToDistribute;
354 
355     /* Init */
356     U32 const lowThreshold = (U32)(total >> tableLog);
357     U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
358 
359     for (s=0; s<=maxSymbolValue; s++) {
360         if (count[s] == 0) {
361             norm[s]=0;
362             continue;
363         }
364         if (count[s] <= lowThreshold) {
365             norm[s] = -1;
366             distributed++;
367             total -= count[s];
368             continue;
369         }
370         if (count[s] <= lowOne) {
371             norm[s] = 1;
372             distributed++;
373             total -= count[s];
374             continue;
375         }
376 
377         norm[s]=NOT_YET_ASSIGNED;
378     }
379     ToDistribute = (1 << tableLog) - distributed;
380 
381     if (ToDistribute == 0)
382         return 0;
383 
384     if ((total / ToDistribute) > lowOne) {
385         /* risk of rounding to zero */
386         lowOne = (U32)((total * 3) / (ToDistribute * 2));
387         for (s=0; s<=maxSymbolValue; s++) {
388             if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
389                 norm[s] = 1;
390                 distributed++;
391                 total -= count[s];
392                 continue;
393         }   }
394         ToDistribute = (1 << tableLog) - distributed;
395     }
396 
397     if (distributed == maxSymbolValue+1) {
398         /* all values are pretty poor;
399            probably incompressible data (should have already been detected);
400            find max, then give all remaining points to max */
401         U32 maxV = 0, maxC = 0;
402         for (s=0; s<=maxSymbolValue; s++)
403             if (count[s] > maxC) { maxV=s; maxC=count[s]; }
404         norm[maxV] += (short)ToDistribute;
405         return 0;
406     }
407 
408     if (total == 0) {
409         /* all of the symbols were low enough for the lowOne or lowThreshold */
410         for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
411             if (norm[s] > 0) { ToDistribute--; norm[s]++; }
412         return 0;
413     }
414 
415     {   U64 const vStepLog = 62 - tableLog;
416         U64 const mid = (1ULL << (vStepLog-1)) - 1;
417         U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total;   /* scale on remaining */
418         U64 tmpTotal = mid;
419         for (s=0; s<=maxSymbolValue; s++) {
420             if (norm[s]==NOT_YET_ASSIGNED) {
421                 U64 const end = tmpTotal + (count[s] * rStep);
422                 U32 const sStart = (U32)(tmpTotal >> vStepLog);
423                 U32 const sEnd = (U32)(end >> vStepLog);
424                 U32 const weight = sEnd - sStart;
425                 if (weight < 1)
426                     return ERROR(GENERIC);
427                 norm[s] = (short)weight;
428                 tmpTotal = end;
429     }   }   }
430 
431     return 0;
432 }
433 
434 
435 size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
436                            const unsigned* count, size_t total,
437                            unsigned maxSymbolValue)
438 {
439     /* Sanity checks */
440     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
441     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
442     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
443     if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
444 
445     {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
446         U64 const scale = 62 - tableLog;
447         U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
448         U64 const vStep = 1ULL<<(scale-20);
449         int stillToDistribute = 1<<tableLog;
450         unsigned s;
451         unsigned largest=0;
452         short largestP=0;
453         U32 lowThreshold = (U32)(total >> tableLog);
454 
455         for (s=0; s<=maxSymbolValue; s++) {
456             if (count[s] == total) return 0;   /* rle special case */
457             if (count[s] == 0) { normalizedCounter[s]=0; continue; }
458             if (count[s] <= lowThreshold) {
459                 normalizedCounter[s] = -1;
460                 stillToDistribute--;
461             } else {
462                 short proba = (short)((count[s]*step) >> scale);
463                 if (proba<8) {
464                     U64 restToBeat = vStep * rtbTable[proba];
465                     proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
466                 }
467                 if (proba > largestP) { largestP=proba; largest=s; }
468                 normalizedCounter[s] = proba;
469                 stillToDistribute -= proba;
470         }   }
471         if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
472             /* corner case, need another normalization method */
473             size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
474             if (FSE_isError(errorCode)) return errorCode;
475         }
476         else normalizedCounter[largest] += (short)stillToDistribute;
477     }
478 
479 #if 0
480     {   /* Print Table (debug) */
481         U32 s;
482         U32 nTotal = 0;
483         for (s=0; s<=maxSymbolValue; s++)
484             RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
485         for (s=0; s<=maxSymbolValue; s++)
486             nTotal += abs(normalizedCounter[s]);
487         if (nTotal != (1U<<tableLog))
488             RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
489         getchar();
490     }
491 #endif
492 
493     return tableLog;
494 }
495 
496 
497 /* fake FSE_CTable, for raw (uncompressed) input */
498 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
499 {
500     const unsigned tableSize = 1 << nbBits;
501     const unsigned tableMask = tableSize - 1;
502     const unsigned maxSymbolValue = tableMask;
503     void* const ptr = ct;
504     U16* const tableU16 = ( (U16*) ptr) + 2;
505     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
506     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
507     unsigned s;
508 
509     /* Sanity checks */
510     if (nbBits < 1) return ERROR(GENERIC);             /* min size */
511 
512     /* header */
513     tableU16[-2] = (U16) nbBits;
514     tableU16[-1] = (U16) maxSymbolValue;
515 
516     /* Build table */
517     for (s=0; s<tableSize; s++)
518         tableU16[s] = (U16)(tableSize + s);
519 
520     /* Build Symbol Transformation Table */
521     {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
522         for (s=0; s<=maxSymbolValue; s++) {
523             symbolTT[s].deltaNbBits = deltaNbBits;
524             symbolTT[s].deltaFindState = s-1;
525     }   }
526 
527     return 0;
528 }
529 
530 /* fake FSE_CTable, for rle input (always same symbol) */
531 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
532 {
533     void* ptr = ct;
534     U16* tableU16 = ( (U16*) ptr) + 2;
535     void* FSCTptr = (U32*)ptr + 2;
536     FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
537 
538     /* header */
539     tableU16[-2] = (U16) 0;
540     tableU16[-1] = (U16) symbolValue;
541 
542     /* Build table */
543     tableU16[0] = 0;
544     tableU16[1] = 0;   /* just in case */
545 
546     /* Build Symbol Transformation Table */
547     symbolTT[symbolValue].deltaNbBits = 0;
548     symbolTT[symbolValue].deltaFindState = 0;
549 
550     return 0;
551 }
552 
553 
554 static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
555                            const void* src, size_t srcSize,
556                            const FSE_CTable* ct, const unsigned fast)
557 {
558     const BYTE* const istart = (const BYTE*) src;
559     const BYTE* const iend = istart + srcSize;
560     const BYTE* ip=iend;
561 
562     BIT_CStream_t bitC;
563     FSE_CState_t CState1, CState2;
564 
565     /* init */
566     if (srcSize <= 2) return 0;
567     { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
568       if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
569 
570 #define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
571 
572     if (srcSize & 1) {
573         FSE_initCState2(&CState1, ct, *--ip);
574         FSE_initCState2(&CState2, ct, *--ip);
575         FSE_encodeSymbol(&bitC, &CState1, *--ip);
576         FSE_FLUSHBITS(&bitC);
577     } else {
578         FSE_initCState2(&CState2, ct, *--ip);
579         FSE_initCState2(&CState1, ct, *--ip);
580     }
581 
582     /* join to mod 4 */
583     srcSize -= 2;
584     if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
585         FSE_encodeSymbol(&bitC, &CState2, *--ip);
586         FSE_encodeSymbol(&bitC, &CState1, *--ip);
587         FSE_FLUSHBITS(&bitC);
588     }
589 
590     /* 2 or 4 encoding per loop */
591     while ( ip>istart ) {
592 
593         FSE_encodeSymbol(&bitC, &CState2, *--ip);
594 
595         if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
596             FSE_FLUSHBITS(&bitC);
597 
598         FSE_encodeSymbol(&bitC, &CState1, *--ip);
599 
600         if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
601             FSE_encodeSymbol(&bitC, &CState2, *--ip);
602             FSE_encodeSymbol(&bitC, &CState1, *--ip);
603         }
604 
605         FSE_FLUSHBITS(&bitC);
606     }
607 
608     FSE_flushCState(&bitC, &CState2);
609     FSE_flushCState(&bitC, &CState1);
610     return BIT_closeCStream(&bitC);
611 }
612 
613 size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
614                            const void* src, size_t srcSize,
615                            const FSE_CTable* ct)
616 {
617     unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
618 
619     if (fast)
620         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
621     else
622         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
623 }
624 
625 
626 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
627 
628 /* FSE_compress_wksp() :
629  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
630  * `wkspSize` size must be `(1<<tableLog)`.
631  */
632 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
633 {
634     BYTE* const ostart = (BYTE*) dst;
635     BYTE* op = ostart;
636     BYTE* const oend = ostart + dstSize;
637 
638     unsigned count[FSE_MAX_SYMBOL_VALUE+1];
639     S16   norm[FSE_MAX_SYMBOL_VALUE+1];
640     FSE_CTable* CTable = (FSE_CTable*)workSpace;
641     size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
642     void* scratchBuffer = (void*)(CTable + CTableSize);
643     size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
644 
645     /* init conditions */
646     if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
647     if (srcSize <= 1) return 0;  /* Not compressible */
648     if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
649     if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
650 
651     /* Scan input and build symbol stats */
652     {   CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
653         if (maxCount == srcSize) return 1;   /* only a single symbol in src : rle */
654         if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
655         if (maxCount < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
656     }
657 
658     tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
659     CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
660 
661     /* Write table description header */
662     {   CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
663         op += nc_err;
664     }
665 
666     /* Compress */
667     CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
668     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
669         if (cSize == 0) return 0;   /* not enough space for compressed data */
670         op += cSize;
671     }
672 
673     /* check compressibility */
674     if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
675 
676     return op-ostart;
677 }
678 
679 typedef struct {
680     FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
681     BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
682 } fseWkspMax_t;
683 
684 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
685 {
686     fseWkspMax_t scratchBuffer;
687     DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
688     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
689     return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
690 }
691 
692 size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
693 {
694     return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
695 }
696 
697 
698 #endif   /* FSE_COMMONDEFS_ONLY */
699