1 /* ****************************************************************** 2 hist : Histogram functions 3 part of Finite State Entropy project 4 Copyright (C) 2013-present, Yann Collet. 5 6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 7 8 Redistribution and use in source and binary forms, with or without 9 modification, are permitted provided that the following conditions are 10 met: 11 12 * Redistributions of source code must retain the above copyright 13 notice, this list of conditions and the following disclaimer. 14 * Redistributions in binary form must reproduce the above 15 copyright notice, this list of conditions and the following disclaimer 16 in the documentation and/or other materials provided with the 17 distribution. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 You can contact the author at : 32 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 33 - Public forum : https://groups.google.com/forum/#!forum/lz4c 34 ****************************************************************** */ 35 36 /* --- dependencies --- */ 37 #include "mem.h" /* U32, BYTE, etc. */ 38 #include "debug.h" /* assert, DEBUGLOG */ 39 #include "error_private.h" /* ERROR */ 40 #include "hist.h" 41 42 43 /* --- Error management --- */ 44 unsigned HIST_isError(size_t code) { return ERR_isError(code); } 45 46 /*-************************************************************** 47 * Histogram functions 48 ****************************************************************/ 49 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, 50 const void* src, size_t srcSize) 51 { 52 const BYTE* ip = (const BYTE*)src; 53 const BYTE* const end = ip + srcSize; 54 unsigned maxSymbolValue = *maxSymbolValuePtr; 55 unsigned largestCount=0; 56 57 memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); 58 if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } 59 60 while (ip<end) { 61 assert(*ip <= maxSymbolValue); 62 count[*ip++]++; 63 } 64 65 while (!count[maxSymbolValue]) maxSymbolValue--; 66 *maxSymbolValuePtr = maxSymbolValue; 67 68 { U32 s; 69 for (s=0; s<=maxSymbolValue; s++) 70 if (count[s] > largestCount) largestCount = count[s]; 71 } 72 73 return largestCount; 74 } 75 76 typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; 77 78 /* HIST_count_parallel_wksp() : 79 * store histogram into 4 intermediate tables, recombined at the end. 80 * this design makes better use of OoO cpus, 81 * and is noticeably faster when some values are heavily repeated. 82 * But it needs some additional workspace for intermediate tables. 83 * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32. 84 * @return : largest histogram frequency, 85 * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ 86 static size_t HIST_count_parallel_wksp( 87 unsigned* count, unsigned* maxSymbolValuePtr, 88 const void* source, size_t sourceSize, 89 HIST_checkInput_e check, 90 U32* const workSpace) 91 { 92 const BYTE* ip = (const BYTE*)source; 93 const BYTE* const iend = ip+sourceSize; 94 unsigned maxSymbolValue = *maxSymbolValuePtr; 95 unsigned max=0; 96 U32* const Counting1 = workSpace; 97 U32* const Counting2 = Counting1 + 256; 98 U32* const Counting3 = Counting2 + 256; 99 U32* const Counting4 = Counting3 + 256; 100 101 memset(workSpace, 0, 4*256*sizeof(unsigned)); 102 103 /* safety checks */ 104 if (!sourceSize) { 105 memset(count, 0, maxSymbolValue + 1); 106 *maxSymbolValuePtr = 0; 107 return 0; 108 } 109 if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ 110 111 /* by stripes of 16 bytes */ 112 { U32 cached = MEM_read32(ip); ip += 4; 113 while (ip < iend-15) { 114 U32 c = cached; cached = MEM_read32(ip); ip += 4; 115 Counting1[(BYTE) c ]++; 116 Counting2[(BYTE)(c>>8) ]++; 117 Counting3[(BYTE)(c>>16)]++; 118 Counting4[ c>>24 ]++; 119 c = cached; cached = MEM_read32(ip); ip += 4; 120 Counting1[(BYTE) c ]++; 121 Counting2[(BYTE)(c>>8) ]++; 122 Counting3[(BYTE)(c>>16)]++; 123 Counting4[ c>>24 ]++; 124 c = cached; cached = MEM_read32(ip); ip += 4; 125 Counting1[(BYTE) c ]++; 126 Counting2[(BYTE)(c>>8) ]++; 127 Counting3[(BYTE)(c>>16)]++; 128 Counting4[ c>>24 ]++; 129 c = cached; cached = MEM_read32(ip); ip += 4; 130 Counting1[(BYTE) c ]++; 131 Counting2[(BYTE)(c>>8) ]++; 132 Counting3[(BYTE)(c>>16)]++; 133 Counting4[ c>>24 ]++; 134 } 135 ip-=4; 136 } 137 138 /* finish last symbols */ 139 while (ip<iend) Counting1[*ip++]++; 140 141 if (check) { /* verify stats will fit into destination table */ 142 U32 s; for (s=255; s>maxSymbolValue; s--) { 143 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; 144 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); 145 } } 146 147 { U32 s; 148 if (maxSymbolValue > 255) maxSymbolValue = 255; 149 for (s=0; s<=maxSymbolValue; s++) { 150 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; 151 if (count[s] > max) max = count[s]; 152 } } 153 154 while (!count[maxSymbolValue]) maxSymbolValue--; 155 *maxSymbolValuePtr = maxSymbolValue; 156 return (size_t)max; 157 } 158 159 /* HIST_countFast_wksp() : 160 * Same as HIST_countFast(), but using an externally provided scratch buffer. 161 * `workSpace` is a writable buffer which must be 4-bytes aligned, 162 * `workSpaceSize` must be >= HIST_WKSP_SIZE 163 */ 164 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 165 const void* source, size_t sourceSize, 166 void* workSpace, size_t workSpaceSize) 167 { 168 if (sourceSize < 1500) /* heuristic threshold */ 169 return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); 170 if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 171 if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); 172 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); 173 } 174 175 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ 176 size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, 177 const void* source, size_t sourceSize) 178 { 179 unsigned tmpCounters[HIST_WKSP_SIZE_U32]; 180 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); 181 } 182 183 /* HIST_count_wksp() : 184 * Same as HIST_count(), but using an externally provided scratch buffer. 185 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ 186 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 187 const void* source, size_t sourceSize, 188 void* workSpace, size_t workSpaceSize) 189 { 190 if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 191 if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); 192 if (*maxSymbolValuePtr < 255) 193 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); 194 *maxSymbolValuePtr = 255; 195 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); 196 } 197 198 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, 199 const void* src, size_t srcSize) 200 { 201 unsigned tmpCounters[HIST_WKSP_SIZE_U32]; 202 return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); 203 } 204