10f743729SConrad Meyer /* ****************************************************************** 237f1f268SConrad Meyer * hist : Histogram functions 337f1f268SConrad Meyer * part of Finite State Entropy project 437f1f268SConrad Meyer * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. 537f1f268SConrad Meyer * 637f1f268SConrad Meyer * You can contact the author at : 737f1f268SConrad Meyer * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 837f1f268SConrad Meyer * - Public forum : https://groups.google.com/forum/#!forum/lz4c 937f1f268SConrad Meyer * 1037f1f268SConrad Meyer * This source code is licensed under both the BSD-style license (found in the 1137f1f268SConrad Meyer * LICENSE file in the root directory of this source tree) and the GPLv2 (found 1237f1f268SConrad Meyer * in the COPYING file in the root directory of this source tree). 1337f1f268SConrad Meyer * You may select, at your option, one of the above-listed licenses. 140f743729SConrad Meyer ****************************************************************** */ 150f743729SConrad Meyer 160f743729SConrad Meyer /* --- dependencies --- */ 1737f1f268SConrad Meyer #include "../common/mem.h" /* U32, BYTE, etc. */ 1837f1f268SConrad Meyer #include "../common/debug.h" /* assert, DEBUGLOG */ 1937f1f268SConrad Meyer #include "../common/error_private.h" /* ERROR */ 200f743729SConrad Meyer #include "hist.h" 210f743729SConrad Meyer 220f743729SConrad Meyer 230f743729SConrad Meyer /* --- Error management --- */ 240f743729SConrad Meyer unsigned HIST_isError(size_t code) { return ERR_isError(code); } 250f743729SConrad Meyer 260f743729SConrad Meyer /*-************************************************************** 270f743729SConrad Meyer * Histogram functions 280f743729SConrad Meyer ****************************************************************/ 290f743729SConrad Meyer unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, 300f743729SConrad Meyer const void* src, size_t srcSize) 310f743729SConrad Meyer { 320f743729SConrad Meyer const BYTE* ip = (const BYTE*)src; 330f743729SConrad Meyer const BYTE* const end = ip + srcSize; 340f743729SConrad Meyer unsigned maxSymbolValue = *maxSymbolValuePtr; 350f743729SConrad Meyer unsigned largestCount=0; 360f743729SConrad Meyer 37*f7cd7fe5SConrad Meyer ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); 380f743729SConrad Meyer if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } 390f743729SConrad Meyer 400f743729SConrad Meyer while (ip<end) { 410f743729SConrad Meyer assert(*ip <= maxSymbolValue); 420f743729SConrad Meyer count[*ip++]++; 430f743729SConrad Meyer } 440f743729SConrad Meyer 450f743729SConrad Meyer while (!count[maxSymbolValue]) maxSymbolValue--; 460f743729SConrad Meyer *maxSymbolValuePtr = maxSymbolValue; 470f743729SConrad Meyer 480f743729SConrad Meyer { U32 s; 490f743729SConrad Meyer for (s=0; s<=maxSymbolValue; s++) 500f743729SConrad Meyer if (count[s] > largestCount) largestCount = count[s]; 510f743729SConrad Meyer } 520f743729SConrad Meyer 530f743729SConrad Meyer return largestCount; 540f743729SConrad Meyer } 550f743729SConrad Meyer 56a0483764SConrad Meyer typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; 570f743729SConrad Meyer 580f743729SConrad Meyer /* HIST_count_parallel_wksp() : 590f743729SConrad Meyer * store histogram into 4 intermediate tables, recombined at the end. 600f743729SConrad Meyer * this design makes better use of OoO cpus, 610f743729SConrad Meyer * and is noticeably faster when some values are heavily repeated. 620f743729SConrad Meyer * But it needs some additional workspace for intermediate tables. 63*f7cd7fe5SConrad Meyer * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32. 640f743729SConrad Meyer * @return : largest histogram frequency, 65*f7cd7fe5SConrad Meyer * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */ 660f743729SConrad Meyer static size_t HIST_count_parallel_wksp( 670f743729SConrad Meyer unsigned* count, unsigned* maxSymbolValuePtr, 680f743729SConrad Meyer const void* source, size_t sourceSize, 69a0483764SConrad Meyer HIST_checkInput_e check, 70a0483764SConrad Meyer U32* const workSpace) 710f743729SConrad Meyer { 720f743729SConrad Meyer const BYTE* ip = (const BYTE*)source; 730f743729SConrad Meyer const BYTE* const iend = ip+sourceSize; 74*f7cd7fe5SConrad Meyer size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count); 750f743729SConrad Meyer unsigned max=0; 760f743729SConrad Meyer U32* const Counting1 = workSpace; 770f743729SConrad Meyer U32* const Counting2 = Counting1 + 256; 780f743729SConrad Meyer U32* const Counting3 = Counting2 + 256; 790f743729SConrad Meyer U32* const Counting4 = Counting3 + 256; 800f743729SConrad Meyer 810f743729SConrad Meyer /* safety checks */ 82*f7cd7fe5SConrad Meyer assert(*maxSymbolValuePtr <= 255); 830f743729SConrad Meyer if (!sourceSize) { 84*f7cd7fe5SConrad Meyer ZSTD_memset(count, 0, countSize); 850f743729SConrad Meyer *maxSymbolValuePtr = 0; 860f743729SConrad Meyer return 0; 870f743729SConrad Meyer } 88*f7cd7fe5SConrad Meyer ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned)); 890f743729SConrad Meyer 900f743729SConrad Meyer /* by stripes of 16 bytes */ 910f743729SConrad Meyer { U32 cached = MEM_read32(ip); ip += 4; 920f743729SConrad Meyer while (ip < iend-15) { 930f743729SConrad Meyer U32 c = cached; cached = MEM_read32(ip); ip += 4; 940f743729SConrad Meyer Counting1[(BYTE) c ]++; 950f743729SConrad Meyer Counting2[(BYTE)(c>>8) ]++; 960f743729SConrad Meyer Counting3[(BYTE)(c>>16)]++; 970f743729SConrad Meyer Counting4[ c>>24 ]++; 980f743729SConrad Meyer c = cached; cached = MEM_read32(ip); ip += 4; 990f743729SConrad Meyer Counting1[(BYTE) c ]++; 1000f743729SConrad Meyer Counting2[(BYTE)(c>>8) ]++; 1010f743729SConrad Meyer Counting3[(BYTE)(c>>16)]++; 1020f743729SConrad Meyer Counting4[ c>>24 ]++; 1030f743729SConrad Meyer c = cached; cached = MEM_read32(ip); ip += 4; 1040f743729SConrad Meyer Counting1[(BYTE) c ]++; 1050f743729SConrad Meyer Counting2[(BYTE)(c>>8) ]++; 1060f743729SConrad Meyer Counting3[(BYTE)(c>>16)]++; 1070f743729SConrad Meyer Counting4[ c>>24 ]++; 1080f743729SConrad Meyer c = cached; cached = MEM_read32(ip); ip += 4; 1090f743729SConrad Meyer Counting1[(BYTE) c ]++; 1100f743729SConrad Meyer Counting2[(BYTE)(c>>8) ]++; 1110f743729SConrad Meyer Counting3[(BYTE)(c>>16)]++; 1120f743729SConrad Meyer Counting4[ c>>24 ]++; 1130f743729SConrad Meyer } 1140f743729SConrad Meyer ip-=4; 1150f743729SConrad Meyer } 1160f743729SConrad Meyer 1170f743729SConrad Meyer /* finish last symbols */ 1180f743729SConrad Meyer while (ip<iend) Counting1[*ip++]++; 1190f743729SConrad Meyer 1200f743729SConrad Meyer { U32 s; 121*f7cd7fe5SConrad Meyer for (s=0; s<256; s++) { 122*f7cd7fe5SConrad Meyer Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; 123*f7cd7fe5SConrad Meyer if (Counting1[s] > max) max = Counting1[s]; 1240f743729SConrad Meyer } } 1250f743729SConrad Meyer 126*f7cd7fe5SConrad Meyer { unsigned maxSymbolValue = 255; 127*f7cd7fe5SConrad Meyer while (!Counting1[maxSymbolValue]) maxSymbolValue--; 128*f7cd7fe5SConrad Meyer if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall); 1290f743729SConrad Meyer *maxSymbolValuePtr = maxSymbolValue; 130*f7cd7fe5SConrad Meyer ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */ 131*f7cd7fe5SConrad Meyer } 1320f743729SConrad Meyer return (size_t)max; 1330f743729SConrad Meyer } 1340f743729SConrad Meyer 1350f743729SConrad Meyer /* HIST_countFast_wksp() : 1360f743729SConrad Meyer * Same as HIST_countFast(), but using an externally provided scratch buffer. 137a0483764SConrad Meyer * `workSpace` is a writable buffer which must be 4-bytes aligned, 138a0483764SConrad Meyer * `workSpaceSize` must be >= HIST_WKSP_SIZE 139a0483764SConrad Meyer */ 1400f743729SConrad Meyer size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 1410f743729SConrad Meyer const void* source, size_t sourceSize, 142a0483764SConrad Meyer void* workSpace, size_t workSpaceSize) 1430f743729SConrad Meyer { 1440f743729SConrad Meyer if (sourceSize < 1500) /* heuristic threshold */ 1450f743729SConrad Meyer return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); 146a0483764SConrad Meyer if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 147a0483764SConrad Meyer if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); 148a0483764SConrad Meyer return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); 1490f743729SConrad Meyer } 1500f743729SConrad Meyer 1510f743729SConrad Meyer /* HIST_count_wksp() : 1520f743729SConrad Meyer * Same as HIST_count(), but using an externally provided scratch buffer. 1530f743729SConrad Meyer * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ 1540f743729SConrad Meyer size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 155a0483764SConrad Meyer const void* source, size_t sourceSize, 156a0483764SConrad Meyer void* workSpace, size_t workSpaceSize) 1570f743729SConrad Meyer { 158a0483764SConrad Meyer if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 159a0483764SConrad Meyer if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); 1600f743729SConrad Meyer if (*maxSymbolValuePtr < 255) 161a0483764SConrad Meyer return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); 1620f743729SConrad Meyer *maxSymbolValuePtr = 255; 163a0483764SConrad Meyer return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); 1640f743729SConrad Meyer } 1650f743729SConrad Meyer 166*f7cd7fe5SConrad Meyer #ifndef ZSTD_NO_UNUSED_FUNCTIONS 167*f7cd7fe5SConrad Meyer /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ 168*f7cd7fe5SConrad Meyer size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, 169*f7cd7fe5SConrad Meyer const void* source, size_t sourceSize) 170*f7cd7fe5SConrad Meyer { 171*f7cd7fe5SConrad Meyer unsigned tmpCounters[HIST_WKSP_SIZE_U32]; 172*f7cd7fe5SConrad Meyer return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); 173*f7cd7fe5SConrad Meyer } 174*f7cd7fe5SConrad Meyer 1750f743729SConrad Meyer size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, 1760f743729SConrad Meyer const void* src, size_t srcSize) 1770f743729SConrad Meyer { 1780f743729SConrad Meyer unsigned tmpCounters[HIST_WKSP_SIZE_U32]; 179a0483764SConrad Meyer return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); 1800f743729SConrad Meyer } 181*f7cd7fe5SConrad Meyer #endif 182