xref: /freebsd/sys/contrib/openzfs/module/zstd/lib/compress/hist.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1  // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
2  /* ******************************************************************
3   * hist : Histogram functions
4   * part of Finite State Entropy project
5   * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
6   *
7   *  You can contact the author at :
8   *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
9   *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
10   *
11   * This source code is licensed under both the BSD-style license (found in the
12   * LICENSE file in the root directory of this source tree) and the GPLv2 (found
13   * in the COPYING file in the root directory of this source tree).
14   * You may select, at your option, one of the above-listed licenses.
15  ****************************************************************** */
16  
17  /* --- dependencies --- */
18  #include "../common/mem.h"             /* U32, BYTE, etc. */
19  #include "../common/debug.h"           /* assert, DEBUGLOG */
20  #include "../common/error_private.h"   /* ERROR */
21  #include "hist.h"
22  
23  
24  /* --- Error management --- */
HIST_isError(size_t code)25  unsigned HIST_isError(size_t code) { return ERR_isError(code); }
26  
27  /*-**************************************************************
28   *  Histogram functions
29   ****************************************************************/
HIST_count_simple(unsigned * count,unsigned * maxSymbolValuePtr,const void * src,size_t srcSize)30  unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
31                             const void* src, size_t srcSize)
32  {
33      const BYTE* ip = (const BYTE*)src;
34      const BYTE* const end = ip + srcSize;
35      unsigned maxSymbolValue = *maxSymbolValuePtr;
36      unsigned largestCount=0;
37  
38      memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
39      if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
40  
41      while (ip<end) {
42          assert(*ip <= maxSymbolValue);
43          count[*ip++]++;
44      }
45  
46      while (!count[maxSymbolValue]) maxSymbolValue--;
47      *maxSymbolValuePtr = maxSymbolValue;
48  
49      {   U32 s;
50          for (s=0; s<=maxSymbolValue; s++)
51              if (count[s] > largestCount) largestCount = count[s];
52      }
53  
54      return largestCount;
55  }
56  
57  typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
58  
59  /* HIST_count_parallel_wksp() :
60   * store histogram into 4 intermediate tables, recombined at the end.
61   * this design makes better use of OoO cpus,
62   * and is noticeably faster when some values are heavily repeated.
63   * But it needs some additional workspace for intermediate tables.
64   * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
65   * @return : largest histogram frequency,
66   *           or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
HIST_count_parallel_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,HIST_checkInput_e check,U32 * const workSpace)67  static size_t HIST_count_parallel_wksp(
68                                  unsigned* count, unsigned* maxSymbolValuePtr,
69                                  const void* source, size_t sourceSize,
70                                  HIST_checkInput_e check,
71                                  U32* const workSpace)
72  {
73      const BYTE* ip = (const BYTE*)source;
74      const BYTE* const iend = ip+sourceSize;
75      unsigned maxSymbolValue = *maxSymbolValuePtr;
76      unsigned max=0;
77      U32* const Counting1 = workSpace;
78      U32* const Counting2 = Counting1 + 256;
79      U32* const Counting3 = Counting2 + 256;
80      U32* const Counting4 = Counting3 + 256;
81  
82      memset(workSpace, 0, 4*256*sizeof(unsigned));
83  
84      /* safety checks */
85      if (!sourceSize) {
86          memset(count, 0, maxSymbolValue + 1);
87          *maxSymbolValuePtr = 0;
88          return 0;
89      }
90      if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
91  
92      /* by stripes of 16 bytes */
93      {   U32 cached = MEM_read32(ip); ip += 4;
94          while (ip < iend-15) {
95              U32 c = cached; cached = MEM_read32(ip); ip += 4;
96              Counting1[(BYTE) c     ]++;
97              Counting2[(BYTE)(c>>8) ]++;
98              Counting3[(BYTE)(c>>16)]++;
99              Counting4[       c>>24 ]++;
100              c = cached; cached = MEM_read32(ip); ip += 4;
101              Counting1[(BYTE) c     ]++;
102              Counting2[(BYTE)(c>>8) ]++;
103              Counting3[(BYTE)(c>>16)]++;
104              Counting4[       c>>24 ]++;
105              c = cached; cached = MEM_read32(ip); ip += 4;
106              Counting1[(BYTE) c     ]++;
107              Counting2[(BYTE)(c>>8) ]++;
108              Counting3[(BYTE)(c>>16)]++;
109              Counting4[       c>>24 ]++;
110              c = cached; cached = MEM_read32(ip); ip += 4;
111              Counting1[(BYTE) c     ]++;
112              Counting2[(BYTE)(c>>8) ]++;
113              Counting3[(BYTE)(c>>16)]++;
114              Counting4[       c>>24 ]++;
115          }
116          ip-=4;
117      }
118  
119      /* finish last symbols */
120      while (ip<iend) Counting1[*ip++]++;
121  
122      if (check) {   /* verify stats will fit into destination table */
123          U32 s; for (s=255; s>maxSymbolValue; s--) {
124              Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
125              if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
126      }   }
127  
128      {   U32 s;
129          if (maxSymbolValue > 255) maxSymbolValue = 255;
130          for (s=0; s<=maxSymbolValue; s++) {
131              count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
132              if (count[s] > max) max = count[s];
133      }   }
134  
135      while (!count[maxSymbolValue]) maxSymbolValue--;
136      *maxSymbolValuePtr = maxSymbolValue;
137      return (size_t)max;
138  }
139  
140  /* HIST_countFast_wksp() :
141   * Same as HIST_countFast(), but using an externally provided scratch buffer.
142   * `workSpace` is a writable buffer which must be 4-bytes aligned,
143   * `workSpaceSize` must be >= HIST_WKSP_SIZE
144   */
HIST_countFast_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,void * workSpace,size_t workSpaceSize)145  size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
146                            const void* source, size_t sourceSize,
147                            void* workSpace, size_t workSpaceSize)
148  {
149      if (sourceSize < 1500) /* heuristic threshold */
150          return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
151      if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
152      if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
153      return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
154  }
155  
156  /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
HIST_countFast(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize)157  size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
158                       const void* source, size_t sourceSize)
159  {
160      unsigned tmpCounters[HIST_WKSP_SIZE_U32];
161      return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
162  }
163  
164  /* HIST_count_wksp() :
165   * Same as HIST_count(), but using an externally provided scratch buffer.
166   * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
HIST_count_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,void * workSpace,size_t workSpaceSize)167  size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
168                         const void* source, size_t sourceSize,
169                         void* workSpace, size_t workSpaceSize)
170  {
171      if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
172      if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
173      if (*maxSymbolValuePtr < 255)
174          return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
175      *maxSymbolValuePtr = 255;
176      return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
177  }
178  
HIST_count(unsigned * count,unsigned * maxSymbolValuePtr,const void * src,size_t srcSize)179  size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
180                   const void* src, size_t srcSize)
181  {
182      unsigned tmpCounters[HIST_WKSP_SIZE_U32];
183      return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
184  }
185