xref: /freebsd/sys/contrib/zstd/lib/compress/hist.c (revision 31d62a73c2e6ac0ff413a7a17700ffc7dce254ef)
1 /* ******************************************************************
2    hist : Histogram functions
3    part of Finite State Entropy project
4    Copyright (C) 2013-present, Yann Collet.
5 
6    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 
8    Redistribution and use in source and binary forms, with or without
9    modification, are permitted provided that the following conditions are
10    met:
11 
12        * Redistributions of source code must retain the above copyright
13    notice, this list of conditions and the following disclaimer.
14        * Redistributions in binary form must reproduce the above
15    copyright notice, this list of conditions and the following disclaimer
16    in the documentation and/or other materials provided with the
17    distribution.
18 
19    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31     You can contact the author at :
32     - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
33     - Public forum : https://groups.google.com/forum/#!forum/lz4c
34 ****************************************************************** */
35 
36 /* --- dependencies --- */
37 #include "mem.h"             /* U32, BYTE, etc. */
38 #include "debug.h"           /* assert, DEBUGLOG */
39 #include "error_private.h"   /* ERROR */
40 #include "hist.h"
41 
42 
43 /* --- Error management --- */
44 unsigned HIST_isError(size_t code) { return ERR_isError(code); }
45 
46 /*-**************************************************************
47  *  Histogram functions
48  ****************************************************************/
49 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
50                            const void* src, size_t srcSize)
51 {
52     const BYTE* ip = (const BYTE*)src;
53     const BYTE* const end = ip + srcSize;
54     unsigned maxSymbolValue = *maxSymbolValuePtr;
55     unsigned largestCount=0;
56 
57     memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
58     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
59 
60     while (ip<end) {
61         assert(*ip <= maxSymbolValue);
62         count[*ip++]++;
63     }
64 
65     while (!count[maxSymbolValue]) maxSymbolValue--;
66     *maxSymbolValuePtr = maxSymbolValue;
67 
68     {   U32 s;
69         for (s=0; s<=maxSymbolValue; s++)
70             if (count[s] > largestCount) largestCount = count[s];
71     }
72 
73     return largestCount;
74 }
75 
76 
77 /* HIST_count_parallel_wksp() :
78  * store histogram into 4 intermediate tables, recombined at the end.
79  * this design makes better use of OoO cpus,
80  * and is noticeably faster when some values are heavily repeated.
81  * But it needs some additional workspace for intermediate tables.
82  * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
83  * @return : largest histogram frequency,
84  *           or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
85 static size_t HIST_count_parallel_wksp(
86                                 unsigned* count, unsigned* maxSymbolValuePtr,
87                                 const void* source, size_t sourceSize,
88                                 unsigned checkMax,
89                                 unsigned* const workSpace)
90 {
91     const BYTE* ip = (const BYTE*)source;
92     const BYTE* const iend = ip+sourceSize;
93     unsigned maxSymbolValue = *maxSymbolValuePtr;
94     unsigned max=0;
95     U32* const Counting1 = workSpace;
96     U32* const Counting2 = Counting1 + 256;
97     U32* const Counting3 = Counting2 + 256;
98     U32* const Counting4 = Counting3 + 256;
99 
100     memset(workSpace, 0, 4*256*sizeof(unsigned));
101 
102     /* safety checks */
103     if (!sourceSize) {
104         memset(count, 0, maxSymbolValue + 1);
105         *maxSymbolValuePtr = 0;
106         return 0;
107     }
108     if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
109 
110     /* by stripes of 16 bytes */
111     {   U32 cached = MEM_read32(ip); ip += 4;
112         while (ip < iend-15) {
113             U32 c = cached; cached = MEM_read32(ip); ip += 4;
114             Counting1[(BYTE) c     ]++;
115             Counting2[(BYTE)(c>>8) ]++;
116             Counting3[(BYTE)(c>>16)]++;
117             Counting4[       c>>24 ]++;
118             c = cached; cached = MEM_read32(ip); ip += 4;
119             Counting1[(BYTE) c     ]++;
120             Counting2[(BYTE)(c>>8) ]++;
121             Counting3[(BYTE)(c>>16)]++;
122             Counting4[       c>>24 ]++;
123             c = cached; cached = MEM_read32(ip); ip += 4;
124             Counting1[(BYTE) c     ]++;
125             Counting2[(BYTE)(c>>8) ]++;
126             Counting3[(BYTE)(c>>16)]++;
127             Counting4[       c>>24 ]++;
128             c = cached; cached = MEM_read32(ip); ip += 4;
129             Counting1[(BYTE) c     ]++;
130             Counting2[(BYTE)(c>>8) ]++;
131             Counting3[(BYTE)(c>>16)]++;
132             Counting4[       c>>24 ]++;
133         }
134         ip-=4;
135     }
136 
137     /* finish last symbols */
138     while (ip<iend) Counting1[*ip++]++;
139 
140     if (checkMax) {   /* verify stats will fit into destination table */
141         U32 s; for (s=255; s>maxSymbolValue; s--) {
142             Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
143             if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
144     }   }
145 
146     {   U32 s;
147         if (maxSymbolValue > 255) maxSymbolValue = 255;
148         for (s=0; s<=maxSymbolValue; s++) {
149             count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
150             if (count[s] > max) max = count[s];
151     }   }
152 
153     while (!count[maxSymbolValue]) maxSymbolValue--;
154     *maxSymbolValuePtr = maxSymbolValue;
155     return (size_t)max;
156 }
157 
158 /* HIST_countFast_wksp() :
159  * Same as HIST_countFast(), but using an externally provided scratch buffer.
160  * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
161 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
162                           const void* source, size_t sourceSize,
163                           unsigned* workSpace)
164 {
165     if (sourceSize < 1500) /* heuristic threshold */
166         return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
167     return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
168 }
169 
170 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
171 size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
172                      const void* source, size_t sourceSize)
173 {
174     unsigned tmpCounters[HIST_WKSP_SIZE_U32];
175     return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
176 }
177 
178 /* HIST_count_wksp() :
179  * Same as HIST_count(), but using an externally provided scratch buffer.
180  * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
181 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
182                  const void* source, size_t sourceSize, unsigned* workSpace)
183 {
184     if (*maxSymbolValuePtr < 255)
185         return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
186     *maxSymbolValuePtr = 255;
187     return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
188 }
189 
190 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
191                  const void* src, size_t srcSize)
192 {
193     unsigned tmpCounters[HIST_WKSP_SIZE_U32];
194     return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
195 }
196