xref: /freebsd/sys/contrib/zstd/lib/compress/fse_compress.c (revision bdafb02fcb88389fd1ab684cfe734cb429d35618)
1 /* ******************************************************************
2    FSE : Finite State Entropy encoder
3    Copyright (C) 2013-2015, Yann Collet.
4 
5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 
7    Redistribution and use in source and binary forms, with or without
8    modification, are permitted provided that the following conditions are
9    met:
10 
11        * Redistributions of source code must retain the above copyright
12    notice, this list of conditions and the following disclaimer.
13        * Redistributions in binary form must reproduce the above
14    copyright notice, this list of conditions and the following disclaimer
15    in the documentation and/or other materials provided with the
16    distribution.
17 
18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30     You can contact the author at :
31     - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32     - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */
34 
35 /* **************************************************************
36 *  Includes
37 ****************************************************************/
38 #include <stdlib.h>     /* malloc, free, qsort */
39 #include <string.h>     /* memcpy, memset */
40 #include <stdio.h>      /* printf (debug) */
41 #include "bitstream.h"
42 #include "compiler.h"
43 #define FSE_STATIC_LINKING_ONLY
44 #include "fse.h"
45 #include "error_private.h"
46 
47 
48 /* **************************************************************
49 *  Error Management
50 ****************************************************************/
51 #define FSE_isError ERR_isError
52 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
53 
54 
55 /* **************************************************************
56 *  Templates
57 ****************************************************************/
58 /*
59   designed to be included
60   for type-specific functions (template emulation in C)
61   Objective is to write these functions only once, for improved maintenance
62 */
63 
64 /* safety checks */
65 #ifndef FSE_FUNCTION_EXTENSION
66 #  error "FSE_FUNCTION_EXTENSION must be defined"
67 #endif
68 #ifndef FSE_FUNCTION_TYPE
69 #  error "FSE_FUNCTION_TYPE must be defined"
70 #endif
71 
72 /* Function names */
73 #define FSE_CAT(X,Y) X##Y
74 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
75 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
76 
77 
78 /* Function templates */
79 
80 /* FSE_buildCTable_wksp() :
81  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
82  * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
83  * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
84  */
85 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
86 {
87     U32 const tableSize = 1 << tableLog;
88     U32 const tableMask = tableSize - 1;
89     void* const ptr = ct;
90     U16* const tableU16 = ( (U16*) ptr) + 2;
91     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
92     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
93     U32 const step = FSE_TABLESTEP(tableSize);
94     U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
95 
96     FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
97     U32 highThreshold = tableSize-1;
98 
99     /* CTable header */
100     if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
101     tableU16[-2] = (U16) tableLog;
102     tableU16[-1] = (U16) maxSymbolValue;
103 
104     /* For explanations on how to distribute symbol values over the table :
105     *  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
106 
107     /* symbol start positions */
108     {   U32 u;
109         cumul[0] = 0;
110         for (u=1; u<=maxSymbolValue+1; u++) {
111             if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
112                 cumul[u] = cumul[u-1] + 1;
113                 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
114             } else {
115                 cumul[u] = cumul[u-1] + normalizedCounter[u-1];
116         }   }
117         cumul[maxSymbolValue+1] = tableSize+1;
118     }
119 
120     /* Spread symbols */
121     {   U32 position = 0;
122         U32 symbol;
123         for (symbol=0; symbol<=maxSymbolValue; symbol++) {
124             int nbOccurences;
125             for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) {
126                 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
127                 position = (position + step) & tableMask;
128                 while (position > highThreshold) position = (position + step) & tableMask;   /* Low proba area */
129         }   }
130 
131         if (position!=0) return ERROR(GENERIC);   /* Must have gone through all positions */
132     }
133 
134     /* Build table */
135     {   U32 u; for (u=0; u<tableSize; u++) {
136         FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
137         tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
138     }   }
139 
140     /* Build Symbol Transformation Table */
141     {   unsigned total = 0;
142         unsigned s;
143         for (s=0; s<=maxSymbolValue; s++) {
144             switch (normalizedCounter[s])
145             {
146             case  0: break;
147 
148             case -1:
149             case  1:
150                 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
151                 symbolTT[s].deltaFindState = total - 1;
152                 total ++;
153                 break;
154             default :
155                 {
156                     U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
157                     U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
158                     symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
159                     symbolTT[s].deltaFindState = total - normalizedCounter[s];
160                     total +=  normalizedCounter[s];
161     }   }   }   }
162 
163     return 0;
164 }
165 
166 
167 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
168 {
169     FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];   /* memset() is not necessary, even if static analyzer complain about it */
170     return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
171 }
172 
173 
174 
175 #ifndef FSE_COMMONDEFS_ONLY
176 
177 /*-**************************************************************
178 *  FSE NCount encoding-decoding
179 ****************************************************************/
180 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
181 {
182     size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
183     return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
184 }
185 
186 static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
187                                        const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
188                                        unsigned writeIsSafe)
189 {
190     BYTE* const ostart = (BYTE*) header;
191     BYTE* out = ostart;
192     BYTE* const oend = ostart + headerBufferSize;
193     int nbBits;
194     const int tableSize = 1 << tableLog;
195     int remaining;
196     int threshold;
197     U32 bitStream;
198     int bitCount;
199     unsigned charnum = 0;
200     int previous0 = 0;
201 
202     bitStream = 0;
203     bitCount  = 0;
204     /* Table Size */
205     bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
206     bitCount  += 4;
207 
208     /* Init */
209     remaining = tableSize+1;   /* +1 for extra accuracy */
210     threshold = tableSize;
211     nbBits = tableLog+1;
212 
213     while (remaining>1) {  /* stops at 1 */
214         if (previous0) {
215             unsigned start = charnum;
216             while (!normalizedCounter[charnum]) charnum++;
217             while (charnum >= start+24) {
218                 start+=24;
219                 bitStream += 0xFFFFU << bitCount;
220                 if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
221                 out[0] = (BYTE) bitStream;
222                 out[1] = (BYTE)(bitStream>>8);
223                 out+=2;
224                 bitStream>>=16;
225             }
226             while (charnum >= start+3) {
227                 start+=3;
228                 bitStream += 3 << bitCount;
229                 bitCount += 2;
230             }
231             bitStream += (charnum-start) << bitCount;
232             bitCount += 2;
233             if (bitCount>16) {
234                 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
235                 out[0] = (BYTE)bitStream;
236                 out[1] = (BYTE)(bitStream>>8);
237                 out += 2;
238                 bitStream >>= 16;
239                 bitCount -= 16;
240         }   }
241         {   int count = normalizedCounter[charnum++];
242             int const max = (2*threshold-1)-remaining;
243             remaining -= count < 0 ? -count : count;
244             count++;   /* +1 for extra accuracy */
245             if (count>=threshold) count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
246             bitStream += count << bitCount;
247             bitCount  += nbBits;
248             bitCount  -= (count<max);
249             previous0  = (count==1);
250             if (remaining<1) return ERROR(GENERIC);
251             while (remaining<threshold) { nbBits--; threshold>>=1; }
252         }
253         if (bitCount>16) {
254             if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
255             out[0] = (BYTE)bitStream;
256             out[1] = (BYTE)(bitStream>>8);
257             out += 2;
258             bitStream >>= 16;
259             bitCount -= 16;
260     }   }
261 
262     /* flush remaining bitStream */
263     if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
264     out[0] = (BYTE)bitStream;
265     out[1] = (BYTE)(bitStream>>8);
266     out+= (bitCount+7) /8;
267 
268     if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
269 
270     return (out-ostart);
271 }
272 
273 
274 size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
275 {
276     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
277     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
278 
279     if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
280         return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
281 
282     return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
283 }
284 
285 
286 
287 /*-**************************************************************
288 *  Counting histogram
289 ****************************************************************/
290 /*! FSE_count_simple
291     This function counts byte values within `src`, and store the histogram into table `count`.
292     It doesn't use any additional memory.
293     But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
294     For this reason, prefer using a table `count` with 256 elements.
295     @return : count of most numerous element.
296 */
297 size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
298                         const void* src, size_t srcSize)
299 {
300     const BYTE* ip = (const BYTE*)src;
301     const BYTE* const end = ip + srcSize;
302     unsigned maxSymbolValue = *maxSymbolValuePtr;
303     unsigned max=0;
304 
305     memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
306     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
307 
308     while (ip<end) {
309         assert(*ip <= maxSymbolValue);
310         count[*ip++]++;
311     }
312 
313     while (!count[maxSymbolValue]) maxSymbolValue--;
314     *maxSymbolValuePtr = maxSymbolValue;
315 
316     { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
317 
318     return (size_t)max;
319 }
320 
321 
322 /* FSE_count_parallel_wksp() :
323  * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
324  * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
325  * @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
326 static size_t FSE_count_parallel_wksp(
327                                 unsigned* count, unsigned* maxSymbolValuePtr,
328                                 const void* source, size_t sourceSize,
329                                 unsigned checkMax, unsigned* const workSpace)
330 {
331     const BYTE* ip = (const BYTE*)source;
332     const BYTE* const iend = ip+sourceSize;
333     unsigned maxSymbolValue = *maxSymbolValuePtr;
334     unsigned max=0;
335     U32* const Counting1 = workSpace;
336     U32* const Counting2 = Counting1 + 256;
337     U32* const Counting3 = Counting2 + 256;
338     U32* const Counting4 = Counting3 + 256;
339 
340     memset(workSpace, 0, 4*256*sizeof(unsigned));
341 
342     /* safety checks */
343     if (!sourceSize) {
344         memset(count, 0, maxSymbolValue + 1);
345         *maxSymbolValuePtr = 0;
346         return 0;
347     }
348     if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
349 
350     /* by stripes of 16 bytes */
351     {   U32 cached = MEM_read32(ip); ip += 4;
352         while (ip < iend-15) {
353             U32 c = cached; cached = MEM_read32(ip); ip += 4;
354             Counting1[(BYTE) c     ]++;
355             Counting2[(BYTE)(c>>8) ]++;
356             Counting3[(BYTE)(c>>16)]++;
357             Counting4[       c>>24 ]++;
358             c = cached; cached = MEM_read32(ip); ip += 4;
359             Counting1[(BYTE) c     ]++;
360             Counting2[(BYTE)(c>>8) ]++;
361             Counting3[(BYTE)(c>>16)]++;
362             Counting4[       c>>24 ]++;
363             c = cached; cached = MEM_read32(ip); ip += 4;
364             Counting1[(BYTE) c     ]++;
365             Counting2[(BYTE)(c>>8) ]++;
366             Counting3[(BYTE)(c>>16)]++;
367             Counting4[       c>>24 ]++;
368             c = cached; cached = MEM_read32(ip); ip += 4;
369             Counting1[(BYTE) c     ]++;
370             Counting2[(BYTE)(c>>8) ]++;
371             Counting3[(BYTE)(c>>16)]++;
372             Counting4[       c>>24 ]++;
373         }
374         ip-=4;
375     }
376 
377     /* finish last symbols */
378     while (ip<iend) Counting1[*ip++]++;
379 
380     if (checkMax) {   /* verify stats will fit into destination table */
381         U32 s; for (s=255; s>maxSymbolValue; s--) {
382             Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
383             if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
384     }   }
385 
386     {   U32 s;
387         if (maxSymbolValue > 255) maxSymbolValue = 255;
388         for (s=0; s<=maxSymbolValue; s++) {
389             count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
390             if (count[s] > max) max = count[s];
391     }   }
392 
393     while (!count[maxSymbolValue]) maxSymbolValue--;
394     *maxSymbolValuePtr = maxSymbolValue;
395     return (size_t)max;
396 }
397 
398 /* FSE_countFast_wksp() :
399  * Same as FSE_countFast(), but using an externally provided scratch buffer.
400  * `workSpace` size must be table of >= `1024` unsigned */
401 size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
402                           const void* source, size_t sourceSize,
403                           unsigned* workSpace)
404 {
405     if (sourceSize < 1500) /* heuristic threshold */
406         return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
407     return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
408 }
409 
410 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
411 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
412                      const void* source, size_t sourceSize)
413 {
414     unsigned tmpCounters[1024];
415     return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
416 }
417 
418 /* FSE_count_wksp() :
419  * Same as FSE_count(), but using an externally provided scratch buffer.
420  * `workSpace` size must be table of >= `1024` unsigned */
421 size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
422                  const void* source, size_t sourceSize, unsigned* workSpace)
423 {
424     if (*maxSymbolValuePtr < 255)
425         return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
426     *maxSymbolValuePtr = 255;
427     return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
428 }
429 
430 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
431                  const void* src, size_t srcSize)
432 {
433     unsigned tmpCounters[1024];
434     return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
435 }
436 
437 
438 
439 /*-**************************************************************
440 *  FSE Compression Code
441 ****************************************************************/
442 /*! FSE_sizeof_CTable() :
443     FSE_CTable is a variable size structure which contains :
444     `U16 tableLog;`
445     `U16 maxSymbolValue;`
446     `U16 nextStateNumber[1 << tableLog];`                         // This size is variable
447     `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
448 Allocation is manual (C standard does not support variable-size structures).
449 */
450 size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
451 {
452     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
453     return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
454 }
455 
456 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
457 {
458     size_t size;
459     if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
460     size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
461     return (FSE_CTable*)malloc(size);
462 }
463 
464 void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
465 
466 /* provides the minimum logSize to safely represent a distribution */
467 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
468 {
469     U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
470     U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
471     U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
472     assert(srcSize > 1); /* Not supported, RLE should be used instead */
473     return minBits;
474 }
475 
476 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
477 {
478     U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
479     U32 tableLog = maxTableLog;
480     U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
481     assert(srcSize > 1); /* Not supported, RLE should be used instead */
482     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
483     if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
484     if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
485     if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
486     if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
487     return tableLog;
488 }
489 
490 unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
491 {
492     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
493 }
494 
495 
496 /* Secondary normalization method.
497    To be used when primary method fails. */
498 
499 static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
500 {
501     short const NOT_YET_ASSIGNED = -2;
502     U32 s;
503     U32 distributed = 0;
504     U32 ToDistribute;
505 
506     /* Init */
507     U32 const lowThreshold = (U32)(total >> tableLog);
508     U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
509 
510     for (s=0; s<=maxSymbolValue; s++) {
511         if (count[s] == 0) {
512             norm[s]=0;
513             continue;
514         }
515         if (count[s] <= lowThreshold) {
516             norm[s] = -1;
517             distributed++;
518             total -= count[s];
519             continue;
520         }
521         if (count[s] <= lowOne) {
522             norm[s] = 1;
523             distributed++;
524             total -= count[s];
525             continue;
526         }
527 
528         norm[s]=NOT_YET_ASSIGNED;
529     }
530     ToDistribute = (1 << tableLog) - distributed;
531 
532     if ((total / ToDistribute) > lowOne) {
533         /* risk of rounding to zero */
534         lowOne = (U32)((total * 3) / (ToDistribute * 2));
535         for (s=0; s<=maxSymbolValue; s++) {
536             if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
537                 norm[s] = 1;
538                 distributed++;
539                 total -= count[s];
540                 continue;
541         }   }
542         ToDistribute = (1 << tableLog) - distributed;
543     }
544 
545     if (distributed == maxSymbolValue+1) {
546         /* all values are pretty poor;
547            probably incompressible data (should have already been detected);
548            find max, then give all remaining points to max */
549         U32 maxV = 0, maxC = 0;
550         for (s=0; s<=maxSymbolValue; s++)
551             if (count[s] > maxC) { maxV=s; maxC=count[s]; }
552         norm[maxV] += (short)ToDistribute;
553         return 0;
554     }
555 
556     if (total == 0) {
557         /* all of the symbols were low enough for the lowOne or lowThreshold */
558         for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
559             if (norm[s] > 0) { ToDistribute--; norm[s]++; }
560         return 0;
561     }
562 
563     {   U64 const vStepLog = 62 - tableLog;
564         U64 const mid = (1ULL << (vStepLog-1)) - 1;
565         U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total;   /* scale on remaining */
566         U64 tmpTotal = mid;
567         for (s=0; s<=maxSymbolValue; s++) {
568             if (norm[s]==NOT_YET_ASSIGNED) {
569                 U64 const end = tmpTotal + (count[s] * rStep);
570                 U32 const sStart = (U32)(tmpTotal >> vStepLog);
571                 U32 const sEnd = (U32)(end >> vStepLog);
572                 U32 const weight = sEnd - sStart;
573                 if (weight < 1)
574                     return ERROR(GENERIC);
575                 norm[s] = (short)weight;
576                 tmpTotal = end;
577     }   }   }
578 
579     return 0;
580 }
581 
582 
583 size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
584                            const unsigned* count, size_t total,
585                            unsigned maxSymbolValue)
586 {
587     /* Sanity checks */
588     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
589     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
590     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
591     if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
592 
593     {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
594         U64 const scale = 62 - tableLog;
595         U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
596         U64 const vStep = 1ULL<<(scale-20);
597         int stillToDistribute = 1<<tableLog;
598         unsigned s;
599         unsigned largest=0;
600         short largestP=0;
601         U32 lowThreshold = (U32)(total >> tableLog);
602 
603         for (s=0; s<=maxSymbolValue; s++) {
604             if (count[s] == total) return 0;   /* rle special case */
605             if (count[s] == 0) { normalizedCounter[s]=0; continue; }
606             if (count[s] <= lowThreshold) {
607                 normalizedCounter[s] = -1;
608                 stillToDistribute--;
609             } else {
610                 short proba = (short)((count[s]*step) >> scale);
611                 if (proba<8) {
612                     U64 restToBeat = vStep * rtbTable[proba];
613                     proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
614                 }
615                 if (proba > largestP) { largestP=proba; largest=s; }
616                 normalizedCounter[s] = proba;
617                 stillToDistribute -= proba;
618         }   }
619         if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
620             /* corner case, need another normalization method */
621             size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
622             if (FSE_isError(errorCode)) return errorCode;
623         }
624         else normalizedCounter[largest] += (short)stillToDistribute;
625     }
626 
627 #if 0
628     {   /* Print Table (debug) */
629         U32 s;
630         U32 nTotal = 0;
631         for (s=0; s<=maxSymbolValue; s++)
632             printf("%3i: %4i \n", s, normalizedCounter[s]);
633         for (s=0; s<=maxSymbolValue; s++)
634             nTotal += abs(normalizedCounter[s]);
635         if (nTotal != (1U<<tableLog))
636             printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
637         getchar();
638     }
639 #endif
640 
641     return tableLog;
642 }
643 
644 
645 /* fake FSE_CTable, for raw (uncompressed) input */
646 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
647 {
648     const unsigned tableSize = 1 << nbBits;
649     const unsigned tableMask = tableSize - 1;
650     const unsigned maxSymbolValue = tableMask;
651     void* const ptr = ct;
652     U16* const tableU16 = ( (U16*) ptr) + 2;
653     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
654     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
655     unsigned s;
656 
657     /* Sanity checks */
658     if (nbBits < 1) return ERROR(GENERIC);             /* min size */
659 
660     /* header */
661     tableU16[-2] = (U16) nbBits;
662     tableU16[-1] = (U16) maxSymbolValue;
663 
664     /* Build table */
665     for (s=0; s<tableSize; s++)
666         tableU16[s] = (U16)(tableSize + s);
667 
668     /* Build Symbol Transformation Table */
669     {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
670         for (s=0; s<=maxSymbolValue; s++) {
671             symbolTT[s].deltaNbBits = deltaNbBits;
672             symbolTT[s].deltaFindState = s-1;
673     }   }
674 
675     return 0;
676 }
677 
678 /* fake FSE_CTable, for rle input (always same symbol) */
679 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
680 {
681     void* ptr = ct;
682     U16* tableU16 = ( (U16*) ptr) + 2;
683     void* FSCTptr = (U32*)ptr + 2;
684     FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
685 
686     /* header */
687     tableU16[-2] = (U16) 0;
688     tableU16[-1] = (U16) symbolValue;
689 
690     /* Build table */
691     tableU16[0] = 0;
692     tableU16[1] = 0;   /* just in case */
693 
694     /* Build Symbol Transformation Table */
695     symbolTT[symbolValue].deltaNbBits = 0;
696     symbolTT[symbolValue].deltaFindState = 0;
697 
698     return 0;
699 }
700 
701 
702 static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
703                            const void* src, size_t srcSize,
704                            const FSE_CTable* ct, const unsigned fast)
705 {
706     const BYTE* const istart = (const BYTE*) src;
707     const BYTE* const iend = istart + srcSize;
708     const BYTE* ip=iend;
709 
710     BIT_CStream_t bitC;
711     FSE_CState_t CState1, CState2;
712 
713     /* init */
714     if (srcSize <= 2) return 0;
715     { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
716       if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
717 
718 #define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
719 
720     if (srcSize & 1) {
721         FSE_initCState2(&CState1, ct, *--ip);
722         FSE_initCState2(&CState2, ct, *--ip);
723         FSE_encodeSymbol(&bitC, &CState1, *--ip);
724         FSE_FLUSHBITS(&bitC);
725     } else {
726         FSE_initCState2(&CState2, ct, *--ip);
727         FSE_initCState2(&CState1, ct, *--ip);
728     }
729 
730     /* join to mod 4 */
731     srcSize -= 2;
732     if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
733         FSE_encodeSymbol(&bitC, &CState2, *--ip);
734         FSE_encodeSymbol(&bitC, &CState1, *--ip);
735         FSE_FLUSHBITS(&bitC);
736     }
737 
738     /* 2 or 4 encoding per loop */
739     while ( ip>istart ) {
740 
741         FSE_encodeSymbol(&bitC, &CState2, *--ip);
742 
743         if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
744             FSE_FLUSHBITS(&bitC);
745 
746         FSE_encodeSymbol(&bitC, &CState1, *--ip);
747 
748         if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
749             FSE_encodeSymbol(&bitC, &CState2, *--ip);
750             FSE_encodeSymbol(&bitC, &CState1, *--ip);
751         }
752 
753         FSE_FLUSHBITS(&bitC);
754     }
755 
756     FSE_flushCState(&bitC, &CState2);
757     FSE_flushCState(&bitC, &CState1);
758     return BIT_closeCStream(&bitC);
759 }
760 
761 size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
762                            const void* src, size_t srcSize,
763                            const FSE_CTable* ct)
764 {
765     unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
766 
767     if (fast)
768         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
769     else
770         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
771 }
772 
773 
774 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
775 
776 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
777 #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
778 
779 /* FSE_compress_wksp() :
780  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
781  * `wkspSize` size must be `(1<<tableLog)`.
782  */
783 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
784 {
785     BYTE* const ostart = (BYTE*) dst;
786     BYTE* op = ostart;
787     BYTE* const oend = ostart + dstSize;
788 
789     U32   count[FSE_MAX_SYMBOL_VALUE+1];
790     S16   norm[FSE_MAX_SYMBOL_VALUE+1];
791     FSE_CTable* CTable = (FSE_CTable*)workSpace;
792     size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
793     void* scratchBuffer = (void*)(CTable + CTableSize);
794     size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
795 
796     /* init conditions */
797     if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
798     if (srcSize <= 1) return 0;  /* Not compressible */
799     if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
800     if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
801 
802     /* Scan input and build symbol stats */
803     {   CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
804         if (maxCount == srcSize) return 1;   /* only a single symbol in src : rle */
805         if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
806         if (maxCount < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
807     }
808 
809     tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
810     CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
811 
812     /* Write table description header */
813     {   CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
814         op += nc_err;
815     }
816 
817     /* Compress */
818     CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
819     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
820         if (cSize == 0) return 0;   /* not enough space for compressed data */
821         op += cSize;
822     }
823 
824     /* check compressibility */
825     if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
826 
827     return op-ostart;
828 }
829 
830 typedef struct {
831     FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
832     BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
833 } fseWkspMax_t;
834 
835 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
836 {
837     fseWkspMax_t scratchBuffer;
838     FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
839     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
840     return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
841 }
842 
843 size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
844 {
845     return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
846 }
847 
848 
849 #endif   /* FSE_COMMONDEFS_ONLY */
850