1*e0c1b49fSNick Terrell /* ****************************************************************** 2*e0c1b49fSNick Terrell * hist : Histogram functions 3*e0c1b49fSNick Terrell * part of Finite State Entropy project 4*e0c1b49fSNick Terrell * Copyright (c) Yann Collet, Facebook, Inc. 5*e0c1b49fSNick Terrell * 6*e0c1b49fSNick Terrell * You can contact the author at : 7*e0c1b49fSNick Terrell * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 8*e0c1b49fSNick Terrell * - Public forum : https://groups.google.com/forum/#!forum/lz4c 9*e0c1b49fSNick Terrell * 10*e0c1b49fSNick Terrell * This source code is licensed under both the BSD-style license (found in the 11*e0c1b49fSNick Terrell * LICENSE file in the root directory of this source tree) and the GPLv2 (found 12*e0c1b49fSNick Terrell * in the COPYING file in the root directory of this source tree). 13*e0c1b49fSNick Terrell * You may select, at your option, one of the above-listed licenses. 14*e0c1b49fSNick Terrell ****************************************************************** */ 15*e0c1b49fSNick Terrell 16*e0c1b49fSNick Terrell /* --- dependencies --- */ 17*e0c1b49fSNick Terrell #include "../common/zstd_deps.h" /* size_t */ 18*e0c1b49fSNick Terrell 19*e0c1b49fSNick Terrell 20*e0c1b49fSNick Terrell /* --- simple histogram functions --- */ 21*e0c1b49fSNick Terrell 22*e0c1b49fSNick Terrell /*! HIST_count(): 23*e0c1b49fSNick Terrell * Provides the precise count of each byte within a table 'count'. 24*e0c1b49fSNick Terrell * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). 25*e0c1b49fSNick Terrell * Updates *maxSymbolValuePtr with actual largest symbol value detected. 26*e0c1b49fSNick Terrell * @return : count of the most frequent symbol (which isn't identified). 27*e0c1b49fSNick Terrell * or an error code, which can be tested using HIST_isError(). 28*e0c1b49fSNick Terrell * note : if return == srcSize, there is only one symbol. 29*e0c1b49fSNick Terrell */ 30*e0c1b49fSNick Terrell size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, 31*e0c1b49fSNick Terrell const void* src, size_t srcSize); 32*e0c1b49fSNick Terrell 33*e0c1b49fSNick Terrell unsigned HIST_isError(size_t code); /*< tells if a return value is an error code */ 34*e0c1b49fSNick Terrell 35*e0c1b49fSNick Terrell 36*e0c1b49fSNick Terrell /* --- advanced histogram functions --- */ 37*e0c1b49fSNick Terrell 38*e0c1b49fSNick Terrell #define HIST_WKSP_SIZE_U32 1024 39*e0c1b49fSNick Terrell #define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) 40*e0c1b49fSNick Terrell /* HIST_count_wksp() : 41*e0c1b49fSNick Terrell * Same as HIST_count(), but using an externally provided scratch buffer. 42*e0c1b49fSNick Terrell * Benefit is this function will use very little stack space. 43*e0c1b49fSNick Terrell * `workSpace` is a writable buffer which must be 4-bytes aligned, 44*e0c1b49fSNick Terrell * `workSpaceSize` must be >= HIST_WKSP_SIZE 45*e0c1b49fSNick Terrell */ 46*e0c1b49fSNick Terrell size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 47*e0c1b49fSNick Terrell const void* src, size_t srcSize, 48*e0c1b49fSNick Terrell void* workSpace, size_t workSpaceSize); 49*e0c1b49fSNick Terrell 50*e0c1b49fSNick Terrell /* HIST_countFast() : 51*e0c1b49fSNick Terrell * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. 52*e0c1b49fSNick Terrell * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` 53*e0c1b49fSNick Terrell */ 54*e0c1b49fSNick Terrell size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, 55*e0c1b49fSNick Terrell const void* src, size_t srcSize); 56*e0c1b49fSNick Terrell 57*e0c1b49fSNick Terrell /* HIST_countFast_wksp() : 58*e0c1b49fSNick Terrell * Same as HIST_countFast(), but using an externally provided scratch buffer. 59*e0c1b49fSNick Terrell * `workSpace` is a writable buffer which must be 4-bytes aligned, 60*e0c1b49fSNick Terrell * `workSpaceSize` must be >= HIST_WKSP_SIZE 61*e0c1b49fSNick Terrell */ 62*e0c1b49fSNick Terrell size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 63*e0c1b49fSNick Terrell const void* src, size_t srcSize, 64*e0c1b49fSNick Terrell void* workSpace, size_t workSpaceSize); 65*e0c1b49fSNick Terrell 66*e0c1b49fSNick Terrell /*! HIST_count_simple() : 67*e0c1b49fSNick Terrell * Same as HIST_countFast(), this function is unsafe, 68*e0c1b49fSNick Terrell * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. 69*e0c1b49fSNick Terrell * It is also a bit slower for large inputs. 70*e0c1b49fSNick Terrell * However, it does not need any additional memory (not even on stack). 71*e0c1b49fSNick Terrell * @return : count of the most frequent symbol. 72*e0c1b49fSNick Terrell * Note this function doesn't produce any error (i.e. it must succeed). 73*e0c1b49fSNick Terrell */ 74*e0c1b49fSNick Terrell unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, 75*e0c1b49fSNick Terrell const void* src, size_t srcSize); 76