Home | History | Annotate | Line # | Download | only in compress
      1 /* ******************************************************************
      2  * hist : Histogram functions
      3  * part of Finite State Entropy project
      4  * Copyright (c) Meta Platforms, Inc. and affiliates.
      5  *
      6  *  You can contact the author at :
      7  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
      8  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
      9  *
     10  * This source code is licensed under both the BSD-style license (found in the
     11  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
     12  * in the COPYING file in the root directory of this source tree).
     13  * You may select, at your option, one of the above-listed licenses.
     14 ****************************************************************** */
     15 
     16 /* --- dependencies --- */
     17 #include "../common/zstd_deps.h"   /* size_t */
     18 
     19 
     20 /* --- simple histogram functions --- */
     21 
     22 /*! HIST_count():
     23  *  Provides the precise count of each byte within a table 'count'.
     24  * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
     25  *  Updates *maxSymbolValuePtr with actual largest symbol value detected.
     26  * @return : count of the most frequent symbol (which isn't identified).
     27  *           or an error code, which can be tested using HIST_isError().
     28  *           note : if return == srcSize, there is only one symbol.
     29  */
     30 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
     31                   const void* src, size_t srcSize);
     32 
     33 unsigned HIST_isError(size_t code);  /**< tells if a return value is an error code */
     34 
     35 
     36 /* --- advanced histogram functions --- */
     37 
     38 #define HIST_WKSP_SIZE_U32 1024
     39 #define HIST_WKSP_SIZE    (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
     40 /** HIST_count_wksp() :
     41  *  Same as HIST_count(), but using an externally provided scratch buffer.
     42  *  Benefit is this function will use very little stack space.
     43  * `workSpace` is a writable buffer which must be 4-bytes aligned,
     44  * `workSpaceSize` must be >= HIST_WKSP_SIZE
     45  */
     46 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
     47                        const void* src, size_t srcSize,
     48                        void* workSpace, size_t workSpaceSize);
     49 
     50 /** HIST_countFast() :
     51  *  same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
     52  *  This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
     53  */
     54 size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
     55                       const void* src, size_t srcSize);
     56 
     57 /** HIST_countFast_wksp() :
     58  *  Same as HIST_countFast(), but using an externally provided scratch buffer.
     59  * `workSpace` is a writable buffer which must be 4-bytes aligned,
     60  * `workSpaceSize` must be >= HIST_WKSP_SIZE
     61  */
     62 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
     63                            const void* src, size_t srcSize,
     64                            void* workSpace, size_t workSpaceSize);
     65 
     66 /*! HIST_count_simple() :
     67  *  Same as HIST_countFast(), this function is unsafe,
     68  *  and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
     69  *  It is also a bit slower for large inputs.
     70  *  However, it does not need any additional memory (not even on stack).
     71  * @return : count of the most frequent symbol.
     72  *  Note this function doesn't produce any error (i.e. it must succeed).
     73  */
     74 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
     75                            const void* src, size_t srcSize);
     76 
     77 /*! HIST_add() :
     78  *  Lowest level: just add nb of occurrences of characters from @src into @count.
     79  *  @count is not reset. @count array is presumed large enough (i.e. 1 KB).
     80  @  This function does not need any additional stack memory.
     81  */
     82 void HIST_add(unsigned* count, const void* src, size_t srcSize);
     83