xref: /freebsd/sys/contrib/zstd/lib/common/bitstream.h (revision 052d3c129019c2f03488f7cb7399580091f9a713)
10c16b537SWarner Losh /* ******************************************************************
20c16b537SWarner Losh    bitstream
30c16b537SWarner Losh    Part of FSE library
40c16b537SWarner Losh    header file (to include)
50c16b537SWarner Losh    Copyright (C) 2013-2017, Yann Collet.
60c16b537SWarner Losh 
70c16b537SWarner Losh    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
80c16b537SWarner Losh 
90c16b537SWarner Losh    Redistribution and use in source and binary forms, with or without
100c16b537SWarner Losh    modification, are permitted provided that the following conditions are
110c16b537SWarner Losh    met:
120c16b537SWarner Losh 
130c16b537SWarner Losh        * Redistributions of source code must retain the above copyright
140c16b537SWarner Losh    notice, this list of conditions and the following disclaimer.
150c16b537SWarner Losh        * Redistributions in binary form must reproduce the above
160c16b537SWarner Losh    copyright notice, this list of conditions and the following disclaimer
170c16b537SWarner Losh    in the documentation and/or other materials provided with the
180c16b537SWarner Losh    distribution.
190c16b537SWarner Losh 
200c16b537SWarner Losh    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
210c16b537SWarner Losh    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
220c16b537SWarner Losh    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
230c16b537SWarner Losh    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
240c16b537SWarner Losh    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
250c16b537SWarner Losh    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
260c16b537SWarner Losh    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
270c16b537SWarner Losh    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
280c16b537SWarner Losh    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
290c16b537SWarner Losh    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
300c16b537SWarner Losh    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
310c16b537SWarner Losh 
320c16b537SWarner Losh    You can contact the author at :
330c16b537SWarner Losh    - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
340c16b537SWarner Losh ****************************************************************** */
350c16b537SWarner Losh #ifndef BITSTREAM_H_MODULE
360c16b537SWarner Losh #define BITSTREAM_H_MODULE
370c16b537SWarner Losh 
380c16b537SWarner Losh #if defined (__cplusplus)
390c16b537SWarner Losh extern "C" {
400c16b537SWarner Losh #endif
410c16b537SWarner Losh 
420c16b537SWarner Losh /*
430c16b537SWarner Losh *  This API consists of small unitary functions, which must be inlined for best performance.
440c16b537SWarner Losh *  Since link-time-optimization is not available for all compilers,
450c16b537SWarner Losh *  these functions are defined into a .h to be included.
460c16b537SWarner Losh */
470c16b537SWarner Losh 
480c16b537SWarner Losh /*-****************************************
490c16b537SWarner Losh *  Dependencies
500c16b537SWarner Losh ******************************************/
510c16b537SWarner Losh #include "mem.h"            /* unaligned access routines */
520c16b537SWarner Losh #include "error_private.h"  /* error codes and messages */
530c16b537SWarner Losh 
540c16b537SWarner Losh 
550c16b537SWarner Losh /*-*************************************
560c16b537SWarner Losh *  Debug
570c16b537SWarner Losh ***************************************/
580c16b537SWarner Losh #if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
590c16b537SWarner Losh #  include <assert.h>
600c16b537SWarner Losh #else
610c16b537SWarner Losh #  ifndef assert
620c16b537SWarner Losh #    define assert(condition) ((void)0)
630c16b537SWarner Losh #  endif
640c16b537SWarner Losh #endif
650c16b537SWarner Losh 
660c16b537SWarner Losh 
670c16b537SWarner Losh /*=========================================
680c16b537SWarner Losh *  Target specific
690c16b537SWarner Losh =========================================*/
700c16b537SWarner Losh #if defined(__BMI__) && defined(__GNUC__)
710c16b537SWarner Losh #  include <immintrin.h>   /* support for bextr (experimental) */
720c16b537SWarner Losh #endif
730c16b537SWarner Losh 
740c16b537SWarner Losh #define STREAM_ACCUMULATOR_MIN_32  25
750c16b537SWarner Losh #define STREAM_ACCUMULATOR_MIN_64  57
760c16b537SWarner Losh #define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
770c16b537SWarner Losh 
780c16b537SWarner Losh 
790c16b537SWarner Losh /*-******************************************
800c16b537SWarner Losh *  bitStream encoding API (write forward)
810c16b537SWarner Losh ********************************************/
820c16b537SWarner Losh /* bitStream can mix input from multiple sources.
830c16b537SWarner Losh  * A critical property of these streams is that they encode and decode in **reverse** direction.
840c16b537SWarner Losh  * So the first bit sequence you add will be the last to be read, like a LIFO stack.
850c16b537SWarner Losh  */
860c16b537SWarner Losh typedef struct
870c16b537SWarner Losh {
880c16b537SWarner Losh     size_t bitContainer;
890c16b537SWarner Losh     unsigned bitPos;
900c16b537SWarner Losh     char*  startPtr;
910c16b537SWarner Losh     char*  ptr;
920c16b537SWarner Losh     char*  endPtr;
930c16b537SWarner Losh } BIT_CStream_t;
940c16b537SWarner Losh 
950c16b537SWarner Losh MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
960c16b537SWarner Losh MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
970c16b537SWarner Losh MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
980c16b537SWarner Losh MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
990c16b537SWarner Losh 
1000c16b537SWarner Losh /* Start with initCStream, providing the size of buffer to write into.
1010c16b537SWarner Losh *  bitStream will never write outside of this buffer.
1020c16b537SWarner Losh *  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
1030c16b537SWarner Losh *
1040c16b537SWarner Losh *  bits are first added to a local register.
1050c16b537SWarner Losh *  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
1060c16b537SWarner Losh *  Writing data into memory is an explicit operation, performed by the flushBits function.
1070c16b537SWarner Losh *  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
1080c16b537SWarner Losh *  After a flushBits, a maximum of 7 bits might still be stored into local register.
1090c16b537SWarner Losh *
1100c16b537SWarner Losh *  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
1110c16b537SWarner Losh *
1120c16b537SWarner Losh *  Last operation is to close the bitStream.
1130c16b537SWarner Losh *  The function returns the final size of CStream in bytes.
1140c16b537SWarner Losh *  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
1150c16b537SWarner Losh */
1160c16b537SWarner Losh 
1170c16b537SWarner Losh 
1180c16b537SWarner Losh /*-********************************************
1190c16b537SWarner Losh *  bitStream decoding API (read backward)
1200c16b537SWarner Losh **********************************************/
1210c16b537SWarner Losh typedef struct
1220c16b537SWarner Losh {
1230c16b537SWarner Losh     size_t   bitContainer;
1240c16b537SWarner Losh     unsigned bitsConsumed;
1250c16b537SWarner Losh     const char* ptr;
1260c16b537SWarner Losh     const char* start;
1270c16b537SWarner Losh     const char* limitPtr;
1280c16b537SWarner Losh } BIT_DStream_t;
1290c16b537SWarner Losh 
1300c16b537SWarner Losh typedef enum { BIT_DStream_unfinished = 0,
1310c16b537SWarner Losh                BIT_DStream_endOfBuffer = 1,
1320c16b537SWarner Losh                BIT_DStream_completed = 2,
1330c16b537SWarner Losh                BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
1340c16b537SWarner Losh                /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
1350c16b537SWarner Losh 
1360c16b537SWarner Losh MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
1370c16b537SWarner Losh MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
1380c16b537SWarner Losh MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
1390c16b537SWarner Losh MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
1400c16b537SWarner Losh 
1410c16b537SWarner Losh 
1420c16b537SWarner Losh /* Start by invoking BIT_initDStream().
1430c16b537SWarner Losh *  A chunk of the bitStream is then stored into a local register.
1440c16b537SWarner Losh *  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
1450c16b537SWarner Losh *  You can then retrieve bitFields stored into the local register, **in reverse order**.
1460c16b537SWarner Losh *  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
1470c16b537SWarner Losh *  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
1480c16b537SWarner Losh *  Otherwise, it can be less than that, so proceed accordingly.
1490c16b537SWarner Losh *  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
1500c16b537SWarner Losh */
1510c16b537SWarner Losh 
1520c16b537SWarner Losh 
1530c16b537SWarner Losh /*-****************************************
1540c16b537SWarner Losh *  unsafe API
1550c16b537SWarner Losh ******************************************/
1560c16b537SWarner Losh MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
1570c16b537SWarner Losh /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
1580c16b537SWarner Losh 
1590c16b537SWarner Losh MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
1600c16b537SWarner Losh /* unsafe version; does not check buffer overflow */
1610c16b537SWarner Losh 
1620c16b537SWarner Losh MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
1630c16b537SWarner Losh /* faster, but works only if nbBits >= 1 */
1640c16b537SWarner Losh 
1650c16b537SWarner Losh 
1660c16b537SWarner Losh 
1670c16b537SWarner Losh /*-**************************************************************
1680c16b537SWarner Losh *  Internal functions
1690c16b537SWarner Losh ****************************************************************/
170*052d3c12SConrad Meyer MEM_STATIC unsigned BIT_highbit32 (U32 val)
1710c16b537SWarner Losh {
1720c16b537SWarner Losh     assert(val != 0);
1730c16b537SWarner Losh     {
1740c16b537SWarner Losh #   if defined(_MSC_VER)   /* Visual */
1750c16b537SWarner Losh         unsigned long r=0;
1760c16b537SWarner Losh         _BitScanReverse ( &r, val );
1770c16b537SWarner Losh         return (unsigned) r;
17890edb2acSAllan Jude #   elif defined(__GNUC__) && (__GNUC__ >= 3) && __has_builtin(__builtin_clz)   /* Use GCC Intrinsic */
1790c16b537SWarner Losh         return 31 - __builtin_clz (val);
1800c16b537SWarner Losh #   else   /* Software version */
1810c16b537SWarner Losh         static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
1820c16b537SWarner Losh                                                  11, 14, 16, 18, 22, 25,  3, 30,
1830c16b537SWarner Losh                                                   8, 12, 20, 28, 15, 17, 24,  7,
1840c16b537SWarner Losh                                                  19, 27, 23,  6, 26,  5,  4, 31 };
1850c16b537SWarner Losh         U32 v = val;
1860c16b537SWarner Losh         v |= v >> 1;
1870c16b537SWarner Losh         v |= v >> 2;
1880c16b537SWarner Losh         v |= v >> 4;
1890c16b537SWarner Losh         v |= v >> 8;
1900c16b537SWarner Losh         v |= v >> 16;
1910c16b537SWarner Losh         return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
1920c16b537SWarner Losh #   endif
1930c16b537SWarner Losh     }
1940c16b537SWarner Losh }
1950c16b537SWarner Losh 
1960c16b537SWarner Losh /*=====    Local Constants   =====*/
1970c16b537SWarner Losh static const unsigned BIT_mask[] = {
1980c16b537SWarner Losh     0,          1,         3,         7,         0xF,       0x1F,
1990c16b537SWarner Losh     0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
2000c16b537SWarner Losh     0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
2010c16b537SWarner Losh     0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
2020c16b537SWarner Losh     0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
2030c16b537SWarner Losh     0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
2040c16b537SWarner Losh #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
2050c16b537SWarner Losh 
2060c16b537SWarner Losh /*-**************************************************************
2070c16b537SWarner Losh *  bitStream encoding
2080c16b537SWarner Losh ****************************************************************/
2090c16b537SWarner Losh /*! BIT_initCStream() :
2100c16b537SWarner Losh  *  `dstCapacity` must be > sizeof(size_t)
2110c16b537SWarner Losh  *  @return : 0 if success,
2120c16b537SWarner Losh  *            otherwise an error code (can be tested using ERR_isError()) */
2130c16b537SWarner Losh MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
2140c16b537SWarner Losh                                   void* startPtr, size_t dstCapacity)
2150c16b537SWarner Losh {
2160c16b537SWarner Losh     bitC->bitContainer = 0;
2170c16b537SWarner Losh     bitC->bitPos = 0;
2180c16b537SWarner Losh     bitC->startPtr = (char*)startPtr;
2190c16b537SWarner Losh     bitC->ptr = bitC->startPtr;
2200c16b537SWarner Losh     bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
2210c16b537SWarner Losh     if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
2220c16b537SWarner Losh     return 0;
2230c16b537SWarner Losh }
2240c16b537SWarner Losh 
2250c16b537SWarner Losh /*! BIT_addBits() :
2260c16b537SWarner Losh  *  can add up to 31 bits into `bitC`.
2270c16b537SWarner Losh  *  Note : does not check for register overflow ! */
2280c16b537SWarner Losh MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
2290c16b537SWarner Losh                             size_t value, unsigned nbBits)
2300c16b537SWarner Losh {
2310c16b537SWarner Losh     MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
2320c16b537SWarner Losh     assert(nbBits < BIT_MASK_SIZE);
2330c16b537SWarner Losh     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
2340c16b537SWarner Losh     bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
2350c16b537SWarner Losh     bitC->bitPos += nbBits;
2360c16b537SWarner Losh }
2370c16b537SWarner Losh 
2380c16b537SWarner Losh /*! BIT_addBitsFast() :
2390c16b537SWarner Losh  *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
2400c16b537SWarner Losh MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
2410c16b537SWarner Losh                                 size_t value, unsigned nbBits)
2420c16b537SWarner Losh {
2430c16b537SWarner Losh     assert((value>>nbBits) == 0);
2440c16b537SWarner Losh     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
2450c16b537SWarner Losh     bitC->bitContainer |= value << bitC->bitPos;
2460c16b537SWarner Losh     bitC->bitPos += nbBits;
2470c16b537SWarner Losh }
2480c16b537SWarner Losh 
2490c16b537SWarner Losh /*! BIT_flushBitsFast() :
2500c16b537SWarner Losh  *  assumption : bitContainer has not overflowed
2510c16b537SWarner Losh  *  unsafe version; does not check buffer overflow */
2520c16b537SWarner Losh MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
2530c16b537SWarner Losh {
2540c16b537SWarner Losh     size_t const nbBytes = bitC->bitPos >> 3;
2550c16b537SWarner Losh     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
2560c16b537SWarner Losh     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
2570c16b537SWarner Losh     bitC->ptr += nbBytes;
2580c16b537SWarner Losh     assert(bitC->ptr <= bitC->endPtr);
2590c16b537SWarner Losh     bitC->bitPos &= 7;
2600c16b537SWarner Losh     bitC->bitContainer >>= nbBytes*8;
2610c16b537SWarner Losh }
2620c16b537SWarner Losh 
2630c16b537SWarner Losh /*! BIT_flushBits() :
2640c16b537SWarner Losh  *  assumption : bitContainer has not overflowed
2650c16b537SWarner Losh  *  safe version; check for buffer overflow, and prevents it.
2660c16b537SWarner Losh  *  note : does not signal buffer overflow.
2670c16b537SWarner Losh  *  overflow will be revealed later on using BIT_closeCStream() */
2680c16b537SWarner Losh MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
2690c16b537SWarner Losh {
2700c16b537SWarner Losh     size_t const nbBytes = bitC->bitPos >> 3;
2710c16b537SWarner Losh     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
2720c16b537SWarner Losh     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
2730c16b537SWarner Losh     bitC->ptr += nbBytes;
2740c16b537SWarner Losh     if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
2750c16b537SWarner Losh     bitC->bitPos &= 7;
2760c16b537SWarner Losh     bitC->bitContainer >>= nbBytes*8;
2770c16b537SWarner Losh }
2780c16b537SWarner Losh 
2790c16b537SWarner Losh /*! BIT_closeCStream() :
2800c16b537SWarner Losh  *  @return : size of CStream, in bytes,
2810c16b537SWarner Losh  *            or 0 if it could not fit into dstBuffer */
2820c16b537SWarner Losh MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
2830c16b537SWarner Losh {
2840c16b537SWarner Losh     BIT_addBitsFast(bitC, 1, 1);   /* endMark */
2850c16b537SWarner Losh     BIT_flushBits(bitC);
2860c16b537SWarner Losh     if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
2870c16b537SWarner Losh     return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
2880c16b537SWarner Losh }
2890c16b537SWarner Losh 
2900c16b537SWarner Losh 
2910c16b537SWarner Losh /*-********************************************************
2920c16b537SWarner Losh *  bitStream decoding
2930c16b537SWarner Losh **********************************************************/
2940c16b537SWarner Losh /*! BIT_initDStream() :
2950c16b537SWarner Losh  *  Initialize a BIT_DStream_t.
2960c16b537SWarner Losh  * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
2970c16b537SWarner Losh  * `srcSize` must be the *exact* size of the bitStream, in bytes.
2980c16b537SWarner Losh  * @return : size of stream (== srcSize), or an errorCode if a problem is detected
2990c16b537SWarner Losh  */
3000c16b537SWarner Losh MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
3010c16b537SWarner Losh {
3020c16b537SWarner Losh     if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
3030c16b537SWarner Losh 
3040c16b537SWarner Losh     bitD->start = (const char*)srcBuffer;
3050c16b537SWarner Losh     bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
3060c16b537SWarner Losh 
3070c16b537SWarner Losh     if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
3080c16b537SWarner Losh         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
3090c16b537SWarner Losh         bitD->bitContainer = MEM_readLEST(bitD->ptr);
3100c16b537SWarner Losh         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
3110c16b537SWarner Losh           bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
3120c16b537SWarner Losh           if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
3130c16b537SWarner Losh     } else {
3140c16b537SWarner Losh         bitD->ptr   = bitD->start;
3150c16b537SWarner Losh         bitD->bitContainer = *(const BYTE*)(bitD->start);
3160c16b537SWarner Losh         switch(srcSize)
3170c16b537SWarner Losh         {
3180c16b537SWarner Losh         case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
3190c16b537SWarner Losh                 /* fall-through */
3200c16b537SWarner Losh 
3210c16b537SWarner Losh         case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
3220c16b537SWarner Losh                 /* fall-through */
3230c16b537SWarner Losh 
3240c16b537SWarner Losh         case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
3250c16b537SWarner Losh                 /* fall-through */
3260c16b537SWarner Losh 
3270c16b537SWarner Losh         case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
3280c16b537SWarner Losh                 /* fall-through */
3290c16b537SWarner Losh 
3300c16b537SWarner Losh         case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
3310c16b537SWarner Losh                 /* fall-through */
3320c16b537SWarner Losh 
3330c16b537SWarner Losh         case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
3340c16b537SWarner Losh                 /* fall-through */
3350c16b537SWarner Losh 
3360c16b537SWarner Losh         default: break;
3370c16b537SWarner Losh         }
3380c16b537SWarner Losh         {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
3390c16b537SWarner Losh             bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
3400c16b537SWarner Losh             if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
3410c16b537SWarner Losh         }
3420c16b537SWarner Losh         bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
3430c16b537SWarner Losh     }
3440c16b537SWarner Losh 
3450c16b537SWarner Losh     return srcSize;
3460c16b537SWarner Losh }
3470c16b537SWarner Losh 
3480c16b537SWarner Losh MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
3490c16b537SWarner Losh {
3500c16b537SWarner Losh     return bitContainer >> start;
3510c16b537SWarner Losh }
3520c16b537SWarner Losh 
3530c16b537SWarner Losh MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
3540c16b537SWarner Losh {
3550c16b537SWarner Losh #if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008  /* experimental */
3560c16b537SWarner Losh #  if defined(__x86_64__)
3570c16b537SWarner Losh     if (sizeof(bitContainer)==8)
3580c16b537SWarner Losh         return _bextr_u64(bitContainer, start, nbBits);
3590c16b537SWarner Losh     else
3600c16b537SWarner Losh #  endif
3610c16b537SWarner Losh         return _bextr_u32(bitContainer, start, nbBits);
3620c16b537SWarner Losh #else
3630c16b537SWarner Losh     assert(nbBits < BIT_MASK_SIZE);
3640c16b537SWarner Losh     return (bitContainer >> start) & BIT_mask[nbBits];
3650c16b537SWarner Losh #endif
3660c16b537SWarner Losh }
3670c16b537SWarner Losh 
3680c16b537SWarner Losh MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
3690c16b537SWarner Losh {
3700c16b537SWarner Losh     assert(nbBits < BIT_MASK_SIZE);
3710c16b537SWarner Losh     return bitContainer & BIT_mask[nbBits];
3720c16b537SWarner Losh }
3730c16b537SWarner Losh 
3740c16b537SWarner Losh /*! BIT_lookBits() :
3750c16b537SWarner Losh  *  Provides next n bits from local register.
3760c16b537SWarner Losh  *  local register is not modified.
3770c16b537SWarner Losh  *  On 32-bits, maxNbBits==24.
3780c16b537SWarner Losh  *  On 64-bits, maxNbBits==56.
3790c16b537SWarner Losh  * @return : value extracted */
3800c16b537SWarner Losh MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
3810c16b537SWarner Losh {
3820c16b537SWarner Losh #if defined(__BMI__) && defined(__GNUC__)   /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
3830c16b537SWarner Losh     return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
3840c16b537SWarner Losh #else
3850c16b537SWarner Losh     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
3860c16b537SWarner Losh     return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
3870c16b537SWarner Losh #endif
3880c16b537SWarner Losh }
3890c16b537SWarner Losh 
3900c16b537SWarner Losh /*! BIT_lookBitsFast() :
3910c16b537SWarner Losh  *  unsafe version; only works if nbBits >= 1 */
3920c16b537SWarner Losh MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
3930c16b537SWarner Losh {
3940c16b537SWarner Losh     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
3950c16b537SWarner Losh     assert(nbBits >= 1);
3960c16b537SWarner Losh     return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
3970c16b537SWarner Losh }
3980c16b537SWarner Losh 
3990c16b537SWarner Losh MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
4000c16b537SWarner Losh {
4010c16b537SWarner Losh     bitD->bitsConsumed += nbBits;
4020c16b537SWarner Losh }
4030c16b537SWarner Losh 
4040c16b537SWarner Losh /*! BIT_readBits() :
4050c16b537SWarner Losh  *  Read (consume) next n bits from local register and update.
4060c16b537SWarner Losh  *  Pay attention to not read more than nbBits contained into local register.
4070c16b537SWarner Losh  * @return : extracted value. */
4080c16b537SWarner Losh MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
4090c16b537SWarner Losh {
4100c16b537SWarner Losh     size_t const value = BIT_lookBits(bitD, nbBits);
4110c16b537SWarner Losh     BIT_skipBits(bitD, nbBits);
4120c16b537SWarner Losh     return value;
4130c16b537SWarner Losh }
4140c16b537SWarner Losh 
4150c16b537SWarner Losh /*! BIT_readBitsFast() :
4160c16b537SWarner Losh  *  unsafe version; only works only if nbBits >= 1 */
4170c16b537SWarner Losh MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
4180c16b537SWarner Losh {
4190c16b537SWarner Losh     size_t const value = BIT_lookBitsFast(bitD, nbBits);
4200c16b537SWarner Losh     assert(nbBits >= 1);
4210c16b537SWarner Losh     BIT_skipBits(bitD, nbBits);
4220c16b537SWarner Losh     return value;
4230c16b537SWarner Losh }
4240c16b537SWarner Losh 
4250c16b537SWarner Losh /*! BIT_reloadDStream() :
4260c16b537SWarner Losh  *  Refill `bitD` from buffer previously set in BIT_initDStream() .
4270c16b537SWarner Losh  *  This function is safe, it guarantees it will not read beyond src buffer.
4280c16b537SWarner Losh  * @return : status of `BIT_DStream_t` internal register.
4290c16b537SWarner Losh  *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
4300c16b537SWarner Losh MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
4310c16b537SWarner Losh {
4320c16b537SWarner Losh     if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
4330c16b537SWarner Losh         return BIT_DStream_overflow;
4340c16b537SWarner Losh 
4350c16b537SWarner Losh     if (bitD->ptr >= bitD->limitPtr) {
4360c16b537SWarner Losh         bitD->ptr -= bitD->bitsConsumed >> 3;
4370c16b537SWarner Losh         bitD->bitsConsumed &= 7;
4380c16b537SWarner Losh         bitD->bitContainer = MEM_readLEST(bitD->ptr);
4390c16b537SWarner Losh         return BIT_DStream_unfinished;
4400c16b537SWarner Losh     }
4410c16b537SWarner Losh     if (bitD->ptr == bitD->start) {
4420c16b537SWarner Losh         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
4430c16b537SWarner Losh         return BIT_DStream_completed;
4440c16b537SWarner Losh     }
4450c16b537SWarner Losh     /* start < ptr < limitPtr */
4460c16b537SWarner Losh     {   U32 nbBytes = bitD->bitsConsumed >> 3;
4470c16b537SWarner Losh         BIT_DStream_status result = BIT_DStream_unfinished;
4480c16b537SWarner Losh         if (bitD->ptr - nbBytes < bitD->start) {
4490c16b537SWarner Losh             nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
4500c16b537SWarner Losh             result = BIT_DStream_endOfBuffer;
4510c16b537SWarner Losh         }
4520c16b537SWarner Losh         bitD->ptr -= nbBytes;
4530c16b537SWarner Losh         bitD->bitsConsumed -= nbBytes*8;
4540c16b537SWarner Losh         bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
4550c16b537SWarner Losh         return result;
4560c16b537SWarner Losh     }
4570c16b537SWarner Losh }
4580c16b537SWarner Losh 
4590c16b537SWarner Losh /*! BIT_endOfDStream() :
4600c16b537SWarner Losh  * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
4610c16b537SWarner Losh  */
4620c16b537SWarner Losh MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
4630c16b537SWarner Losh {
4640c16b537SWarner Losh     return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
4650c16b537SWarner Losh }
4660c16b537SWarner Losh 
4670c16b537SWarner Losh #if defined (__cplusplus)
4680c16b537SWarner Losh }
4690c16b537SWarner Losh #endif
4700c16b537SWarner Losh 
4710c16b537SWarner Losh #endif /* BITSTREAM_H_MODULE */
472