xref: /freebsd/sys/contrib/zstd/lib/common/bitstream.h (revision 0c16b53773565120a8f80a31a0af2ef56ccd368e)
1*0c16b537SWarner Losh /* ******************************************************************
2*0c16b537SWarner Losh    bitstream
3*0c16b537SWarner Losh    Part of FSE library
4*0c16b537SWarner Losh    header file (to include)
5*0c16b537SWarner Losh    Copyright (C) 2013-2017, Yann Collet.
6*0c16b537SWarner Losh 
7*0c16b537SWarner Losh    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
8*0c16b537SWarner Losh 
9*0c16b537SWarner Losh    Redistribution and use in source and binary forms, with or without
10*0c16b537SWarner Losh    modification, are permitted provided that the following conditions are
11*0c16b537SWarner Losh    met:
12*0c16b537SWarner Losh 
13*0c16b537SWarner Losh        * Redistributions of source code must retain the above copyright
14*0c16b537SWarner Losh    notice, this list of conditions and the following disclaimer.
15*0c16b537SWarner Losh        * Redistributions in binary form must reproduce the above
16*0c16b537SWarner Losh    copyright notice, this list of conditions and the following disclaimer
17*0c16b537SWarner Losh    in the documentation and/or other materials provided with the
18*0c16b537SWarner Losh    distribution.
19*0c16b537SWarner Losh 
20*0c16b537SWarner Losh    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21*0c16b537SWarner Losh    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22*0c16b537SWarner Losh    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23*0c16b537SWarner Losh    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24*0c16b537SWarner Losh    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25*0c16b537SWarner Losh    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26*0c16b537SWarner Losh    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27*0c16b537SWarner Losh    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28*0c16b537SWarner Losh    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29*0c16b537SWarner Losh    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30*0c16b537SWarner Losh    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*0c16b537SWarner Losh 
32*0c16b537SWarner Losh    You can contact the author at :
33*0c16b537SWarner Losh    - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
34*0c16b537SWarner Losh ****************************************************************** */
35*0c16b537SWarner Losh #ifndef BITSTREAM_H_MODULE
36*0c16b537SWarner Losh #define BITSTREAM_H_MODULE
37*0c16b537SWarner Losh 
38*0c16b537SWarner Losh #if defined (__cplusplus)
39*0c16b537SWarner Losh extern "C" {
40*0c16b537SWarner Losh #endif
41*0c16b537SWarner Losh 
42*0c16b537SWarner Losh /*
43*0c16b537SWarner Losh *  This API consists of small unitary functions, which must be inlined for best performance.
44*0c16b537SWarner Losh *  Since link-time-optimization is not available for all compilers,
45*0c16b537SWarner Losh *  these functions are defined into a .h to be included.
46*0c16b537SWarner Losh */
47*0c16b537SWarner Losh 
48*0c16b537SWarner Losh /*-****************************************
49*0c16b537SWarner Losh *  Dependencies
50*0c16b537SWarner Losh ******************************************/
51*0c16b537SWarner Losh #include "mem.h"            /* unaligned access routines */
52*0c16b537SWarner Losh #include "error_private.h"  /* error codes and messages */
53*0c16b537SWarner Losh 
54*0c16b537SWarner Losh 
55*0c16b537SWarner Losh /*-*************************************
56*0c16b537SWarner Losh *  Debug
57*0c16b537SWarner Losh ***************************************/
58*0c16b537SWarner Losh #if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
59*0c16b537SWarner Losh #  include <assert.h>
60*0c16b537SWarner Losh #else
61*0c16b537SWarner Losh #  ifndef assert
62*0c16b537SWarner Losh #    define assert(condition) ((void)0)
63*0c16b537SWarner Losh #  endif
64*0c16b537SWarner Losh #endif
65*0c16b537SWarner Losh 
66*0c16b537SWarner Losh 
67*0c16b537SWarner Losh /*=========================================
68*0c16b537SWarner Losh *  Target specific
69*0c16b537SWarner Losh =========================================*/
70*0c16b537SWarner Losh #if defined(__BMI__) && defined(__GNUC__)
71*0c16b537SWarner Losh #  include <immintrin.h>   /* support for bextr (experimental) */
72*0c16b537SWarner Losh #endif
73*0c16b537SWarner Losh 
74*0c16b537SWarner Losh #define STREAM_ACCUMULATOR_MIN_32  25
75*0c16b537SWarner Losh #define STREAM_ACCUMULATOR_MIN_64  57
76*0c16b537SWarner Losh #define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
77*0c16b537SWarner Losh 
78*0c16b537SWarner Losh 
79*0c16b537SWarner Losh /*-******************************************
80*0c16b537SWarner Losh *  bitStream encoding API (write forward)
81*0c16b537SWarner Losh ********************************************/
82*0c16b537SWarner Losh /* bitStream can mix input from multiple sources.
83*0c16b537SWarner Losh  * A critical property of these streams is that they encode and decode in **reverse** direction.
84*0c16b537SWarner Losh  * So the first bit sequence you add will be the last to be read, like a LIFO stack.
85*0c16b537SWarner Losh  */
86*0c16b537SWarner Losh typedef struct
87*0c16b537SWarner Losh {
88*0c16b537SWarner Losh     size_t bitContainer;
89*0c16b537SWarner Losh     unsigned bitPos;
90*0c16b537SWarner Losh     char*  startPtr;
91*0c16b537SWarner Losh     char*  ptr;
92*0c16b537SWarner Losh     char*  endPtr;
93*0c16b537SWarner Losh } BIT_CStream_t;
94*0c16b537SWarner Losh 
95*0c16b537SWarner Losh MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
96*0c16b537SWarner Losh MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
97*0c16b537SWarner Losh MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
98*0c16b537SWarner Losh MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
99*0c16b537SWarner Losh 
100*0c16b537SWarner Losh /* Start with initCStream, providing the size of buffer to write into.
101*0c16b537SWarner Losh *  bitStream will never write outside of this buffer.
102*0c16b537SWarner Losh *  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
103*0c16b537SWarner Losh *
104*0c16b537SWarner Losh *  bits are first added to a local register.
105*0c16b537SWarner Losh *  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
106*0c16b537SWarner Losh *  Writing data into memory is an explicit operation, performed by the flushBits function.
107*0c16b537SWarner Losh *  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
108*0c16b537SWarner Losh *  After a flushBits, a maximum of 7 bits might still be stored into local register.
109*0c16b537SWarner Losh *
110*0c16b537SWarner Losh *  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
111*0c16b537SWarner Losh *
112*0c16b537SWarner Losh *  Last operation is to close the bitStream.
113*0c16b537SWarner Losh *  The function returns the final size of CStream in bytes.
114*0c16b537SWarner Losh *  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
115*0c16b537SWarner Losh */
116*0c16b537SWarner Losh 
117*0c16b537SWarner Losh 
118*0c16b537SWarner Losh /*-********************************************
119*0c16b537SWarner Losh *  bitStream decoding API (read backward)
120*0c16b537SWarner Losh **********************************************/
121*0c16b537SWarner Losh typedef struct
122*0c16b537SWarner Losh {
123*0c16b537SWarner Losh     size_t   bitContainer;
124*0c16b537SWarner Losh     unsigned bitsConsumed;
125*0c16b537SWarner Losh     const char* ptr;
126*0c16b537SWarner Losh     const char* start;
127*0c16b537SWarner Losh     const char* limitPtr;
128*0c16b537SWarner Losh } BIT_DStream_t;
129*0c16b537SWarner Losh 
130*0c16b537SWarner Losh typedef enum { BIT_DStream_unfinished = 0,
131*0c16b537SWarner Losh                BIT_DStream_endOfBuffer = 1,
132*0c16b537SWarner Losh                BIT_DStream_completed = 2,
133*0c16b537SWarner Losh                BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
134*0c16b537SWarner Losh                /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
135*0c16b537SWarner Losh 
136*0c16b537SWarner Losh MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
137*0c16b537SWarner Losh MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
138*0c16b537SWarner Losh MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
139*0c16b537SWarner Losh MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
140*0c16b537SWarner Losh 
141*0c16b537SWarner Losh 
142*0c16b537SWarner Losh /* Start by invoking BIT_initDStream().
143*0c16b537SWarner Losh *  A chunk of the bitStream is then stored into a local register.
144*0c16b537SWarner Losh *  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
145*0c16b537SWarner Losh *  You can then retrieve bitFields stored into the local register, **in reverse order**.
146*0c16b537SWarner Losh *  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
147*0c16b537SWarner Losh *  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
148*0c16b537SWarner Losh *  Otherwise, it can be less than that, so proceed accordingly.
149*0c16b537SWarner Losh *  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
150*0c16b537SWarner Losh */
151*0c16b537SWarner Losh 
152*0c16b537SWarner Losh 
153*0c16b537SWarner Losh /*-****************************************
154*0c16b537SWarner Losh *  unsafe API
155*0c16b537SWarner Losh ******************************************/
156*0c16b537SWarner Losh MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
157*0c16b537SWarner Losh /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
158*0c16b537SWarner Losh 
159*0c16b537SWarner Losh MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
160*0c16b537SWarner Losh /* unsafe version; does not check buffer overflow */
161*0c16b537SWarner Losh 
162*0c16b537SWarner Losh MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
163*0c16b537SWarner Losh /* faster, but works only if nbBits >= 1 */
164*0c16b537SWarner Losh 
165*0c16b537SWarner Losh 
166*0c16b537SWarner Losh 
167*0c16b537SWarner Losh /*-**************************************************************
168*0c16b537SWarner Losh *  Internal functions
169*0c16b537SWarner Losh ****************************************************************/
170*0c16b537SWarner Losh MEM_STATIC unsigned BIT_highbit32 (register U32 val)
171*0c16b537SWarner Losh {
172*0c16b537SWarner Losh     assert(val != 0);
173*0c16b537SWarner Losh     {
174*0c16b537SWarner Losh #   if defined(_MSC_VER)   /* Visual */
175*0c16b537SWarner Losh         unsigned long r=0;
176*0c16b537SWarner Losh         _BitScanReverse ( &r, val );
177*0c16b537SWarner Losh         return (unsigned) r;
178*0c16b537SWarner Losh #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
179*0c16b537SWarner Losh         return 31 - __builtin_clz (val);
180*0c16b537SWarner Losh #   else   /* Software version */
181*0c16b537SWarner Losh         static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
182*0c16b537SWarner Losh                                                  11, 14, 16, 18, 22, 25,  3, 30,
183*0c16b537SWarner Losh                                                   8, 12, 20, 28, 15, 17, 24,  7,
184*0c16b537SWarner Losh                                                  19, 27, 23,  6, 26,  5,  4, 31 };
185*0c16b537SWarner Losh         U32 v = val;
186*0c16b537SWarner Losh         v |= v >> 1;
187*0c16b537SWarner Losh         v |= v >> 2;
188*0c16b537SWarner Losh         v |= v >> 4;
189*0c16b537SWarner Losh         v |= v >> 8;
190*0c16b537SWarner Losh         v |= v >> 16;
191*0c16b537SWarner Losh         return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
192*0c16b537SWarner Losh #   endif
193*0c16b537SWarner Losh     }
194*0c16b537SWarner Losh }
195*0c16b537SWarner Losh 
196*0c16b537SWarner Losh /*=====    Local Constants   =====*/
197*0c16b537SWarner Losh static const unsigned BIT_mask[] = {
198*0c16b537SWarner Losh     0,          1,         3,         7,         0xF,       0x1F,
199*0c16b537SWarner Losh     0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
200*0c16b537SWarner Losh     0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
201*0c16b537SWarner Losh     0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
202*0c16b537SWarner Losh     0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
203*0c16b537SWarner Losh     0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
204*0c16b537SWarner Losh #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
205*0c16b537SWarner Losh 
206*0c16b537SWarner Losh /*-**************************************************************
207*0c16b537SWarner Losh *  bitStream encoding
208*0c16b537SWarner Losh ****************************************************************/
209*0c16b537SWarner Losh /*! BIT_initCStream() :
210*0c16b537SWarner Losh  *  `dstCapacity` must be > sizeof(size_t)
211*0c16b537SWarner Losh  *  @return : 0 if success,
212*0c16b537SWarner Losh  *            otherwise an error code (can be tested using ERR_isError()) */
213*0c16b537SWarner Losh MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
214*0c16b537SWarner Losh                                   void* startPtr, size_t dstCapacity)
215*0c16b537SWarner Losh {
216*0c16b537SWarner Losh     bitC->bitContainer = 0;
217*0c16b537SWarner Losh     bitC->bitPos = 0;
218*0c16b537SWarner Losh     bitC->startPtr = (char*)startPtr;
219*0c16b537SWarner Losh     bitC->ptr = bitC->startPtr;
220*0c16b537SWarner Losh     bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
221*0c16b537SWarner Losh     if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
222*0c16b537SWarner Losh     return 0;
223*0c16b537SWarner Losh }
224*0c16b537SWarner Losh 
225*0c16b537SWarner Losh /*! BIT_addBits() :
226*0c16b537SWarner Losh  *  can add up to 31 bits into `bitC`.
227*0c16b537SWarner Losh  *  Note : does not check for register overflow ! */
228*0c16b537SWarner Losh MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
229*0c16b537SWarner Losh                             size_t value, unsigned nbBits)
230*0c16b537SWarner Losh {
231*0c16b537SWarner Losh     MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
232*0c16b537SWarner Losh     assert(nbBits < BIT_MASK_SIZE);
233*0c16b537SWarner Losh     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
234*0c16b537SWarner Losh     bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
235*0c16b537SWarner Losh     bitC->bitPos += nbBits;
236*0c16b537SWarner Losh }
237*0c16b537SWarner Losh 
238*0c16b537SWarner Losh /*! BIT_addBitsFast() :
239*0c16b537SWarner Losh  *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
240*0c16b537SWarner Losh MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
241*0c16b537SWarner Losh                                 size_t value, unsigned nbBits)
242*0c16b537SWarner Losh {
243*0c16b537SWarner Losh     assert((value>>nbBits) == 0);
244*0c16b537SWarner Losh     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
245*0c16b537SWarner Losh     bitC->bitContainer |= value << bitC->bitPos;
246*0c16b537SWarner Losh     bitC->bitPos += nbBits;
247*0c16b537SWarner Losh }
248*0c16b537SWarner Losh 
249*0c16b537SWarner Losh /*! BIT_flushBitsFast() :
250*0c16b537SWarner Losh  *  assumption : bitContainer has not overflowed
251*0c16b537SWarner Losh  *  unsafe version; does not check buffer overflow */
252*0c16b537SWarner Losh MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
253*0c16b537SWarner Losh {
254*0c16b537SWarner Losh     size_t const nbBytes = bitC->bitPos >> 3;
255*0c16b537SWarner Losh     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
256*0c16b537SWarner Losh     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
257*0c16b537SWarner Losh     bitC->ptr += nbBytes;
258*0c16b537SWarner Losh     assert(bitC->ptr <= bitC->endPtr);
259*0c16b537SWarner Losh     bitC->bitPos &= 7;
260*0c16b537SWarner Losh     bitC->bitContainer >>= nbBytes*8;
261*0c16b537SWarner Losh }
262*0c16b537SWarner Losh 
263*0c16b537SWarner Losh /*! BIT_flushBits() :
264*0c16b537SWarner Losh  *  assumption : bitContainer has not overflowed
265*0c16b537SWarner Losh  *  safe version; check for buffer overflow, and prevents it.
266*0c16b537SWarner Losh  *  note : does not signal buffer overflow.
267*0c16b537SWarner Losh  *  overflow will be revealed later on using BIT_closeCStream() */
268*0c16b537SWarner Losh MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
269*0c16b537SWarner Losh {
270*0c16b537SWarner Losh     size_t const nbBytes = bitC->bitPos >> 3;
271*0c16b537SWarner Losh     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
272*0c16b537SWarner Losh     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
273*0c16b537SWarner Losh     bitC->ptr += nbBytes;
274*0c16b537SWarner Losh     if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
275*0c16b537SWarner Losh     bitC->bitPos &= 7;
276*0c16b537SWarner Losh     bitC->bitContainer >>= nbBytes*8;
277*0c16b537SWarner Losh }
278*0c16b537SWarner Losh 
279*0c16b537SWarner Losh /*! BIT_closeCStream() :
280*0c16b537SWarner Losh  *  @return : size of CStream, in bytes,
281*0c16b537SWarner Losh  *            or 0 if it could not fit into dstBuffer */
282*0c16b537SWarner Losh MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
283*0c16b537SWarner Losh {
284*0c16b537SWarner Losh     BIT_addBitsFast(bitC, 1, 1);   /* endMark */
285*0c16b537SWarner Losh     BIT_flushBits(bitC);
286*0c16b537SWarner Losh     if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
287*0c16b537SWarner Losh     return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
288*0c16b537SWarner Losh }
289*0c16b537SWarner Losh 
290*0c16b537SWarner Losh 
291*0c16b537SWarner Losh /*-********************************************************
292*0c16b537SWarner Losh *  bitStream decoding
293*0c16b537SWarner Losh **********************************************************/
294*0c16b537SWarner Losh /*! BIT_initDStream() :
295*0c16b537SWarner Losh  *  Initialize a BIT_DStream_t.
296*0c16b537SWarner Losh  * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
297*0c16b537SWarner Losh  * `srcSize` must be the *exact* size of the bitStream, in bytes.
298*0c16b537SWarner Losh  * @return : size of stream (== srcSize), or an errorCode if a problem is detected
299*0c16b537SWarner Losh  */
300*0c16b537SWarner Losh MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
301*0c16b537SWarner Losh {
302*0c16b537SWarner Losh     if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
303*0c16b537SWarner Losh 
304*0c16b537SWarner Losh     bitD->start = (const char*)srcBuffer;
305*0c16b537SWarner Losh     bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
306*0c16b537SWarner Losh 
307*0c16b537SWarner Losh     if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
308*0c16b537SWarner Losh         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
309*0c16b537SWarner Losh         bitD->bitContainer = MEM_readLEST(bitD->ptr);
310*0c16b537SWarner Losh         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
311*0c16b537SWarner Losh           bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
312*0c16b537SWarner Losh           if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
313*0c16b537SWarner Losh     } else {
314*0c16b537SWarner Losh         bitD->ptr   = bitD->start;
315*0c16b537SWarner Losh         bitD->bitContainer = *(const BYTE*)(bitD->start);
316*0c16b537SWarner Losh         switch(srcSize)
317*0c16b537SWarner Losh         {
318*0c16b537SWarner Losh         case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
319*0c16b537SWarner Losh                 /* fall-through */
320*0c16b537SWarner Losh 
321*0c16b537SWarner Losh         case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
322*0c16b537SWarner Losh                 /* fall-through */
323*0c16b537SWarner Losh 
324*0c16b537SWarner Losh         case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
325*0c16b537SWarner Losh                 /* fall-through */
326*0c16b537SWarner Losh 
327*0c16b537SWarner Losh         case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
328*0c16b537SWarner Losh                 /* fall-through */
329*0c16b537SWarner Losh 
330*0c16b537SWarner Losh         case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
331*0c16b537SWarner Losh                 /* fall-through */
332*0c16b537SWarner Losh 
333*0c16b537SWarner Losh         case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
334*0c16b537SWarner Losh                 /* fall-through */
335*0c16b537SWarner Losh 
336*0c16b537SWarner Losh         default: break;
337*0c16b537SWarner Losh         }
338*0c16b537SWarner Losh         {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
339*0c16b537SWarner Losh             bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
340*0c16b537SWarner Losh             if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
341*0c16b537SWarner Losh         }
342*0c16b537SWarner Losh         bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
343*0c16b537SWarner Losh     }
344*0c16b537SWarner Losh 
345*0c16b537SWarner Losh     return srcSize;
346*0c16b537SWarner Losh }
347*0c16b537SWarner Losh 
348*0c16b537SWarner Losh MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
349*0c16b537SWarner Losh {
350*0c16b537SWarner Losh     return bitContainer >> start;
351*0c16b537SWarner Losh }
352*0c16b537SWarner Losh 
353*0c16b537SWarner Losh MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
354*0c16b537SWarner Losh {
355*0c16b537SWarner Losh #if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008  /* experimental */
356*0c16b537SWarner Losh #  if defined(__x86_64__)
357*0c16b537SWarner Losh     if (sizeof(bitContainer)==8)
358*0c16b537SWarner Losh         return _bextr_u64(bitContainer, start, nbBits);
359*0c16b537SWarner Losh     else
360*0c16b537SWarner Losh #  endif
361*0c16b537SWarner Losh         return _bextr_u32(bitContainer, start, nbBits);
362*0c16b537SWarner Losh #else
363*0c16b537SWarner Losh     assert(nbBits < BIT_MASK_SIZE);
364*0c16b537SWarner Losh     return (bitContainer >> start) & BIT_mask[nbBits];
365*0c16b537SWarner Losh #endif
366*0c16b537SWarner Losh }
367*0c16b537SWarner Losh 
368*0c16b537SWarner Losh MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
369*0c16b537SWarner Losh {
370*0c16b537SWarner Losh     assert(nbBits < BIT_MASK_SIZE);
371*0c16b537SWarner Losh     return bitContainer & BIT_mask[nbBits];
372*0c16b537SWarner Losh }
373*0c16b537SWarner Losh 
374*0c16b537SWarner Losh /*! BIT_lookBits() :
375*0c16b537SWarner Losh  *  Provides next n bits from local register.
376*0c16b537SWarner Losh  *  local register is not modified.
377*0c16b537SWarner Losh  *  On 32-bits, maxNbBits==24.
378*0c16b537SWarner Losh  *  On 64-bits, maxNbBits==56.
379*0c16b537SWarner Losh  * @return : value extracted */
380*0c16b537SWarner Losh MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
381*0c16b537SWarner Losh {
382*0c16b537SWarner Losh #if defined(__BMI__) && defined(__GNUC__)   /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
383*0c16b537SWarner Losh     return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
384*0c16b537SWarner Losh #else
385*0c16b537SWarner Losh     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
386*0c16b537SWarner Losh     return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
387*0c16b537SWarner Losh #endif
388*0c16b537SWarner Losh }
389*0c16b537SWarner Losh 
390*0c16b537SWarner Losh /*! BIT_lookBitsFast() :
391*0c16b537SWarner Losh  *  unsafe version; only works if nbBits >= 1 */
392*0c16b537SWarner Losh MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
393*0c16b537SWarner Losh {
394*0c16b537SWarner Losh     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
395*0c16b537SWarner Losh     assert(nbBits >= 1);
396*0c16b537SWarner Losh     return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
397*0c16b537SWarner Losh }
398*0c16b537SWarner Losh 
399*0c16b537SWarner Losh MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
400*0c16b537SWarner Losh {
401*0c16b537SWarner Losh     bitD->bitsConsumed += nbBits;
402*0c16b537SWarner Losh }
403*0c16b537SWarner Losh 
404*0c16b537SWarner Losh /*! BIT_readBits() :
405*0c16b537SWarner Losh  *  Read (consume) next n bits from local register and update.
406*0c16b537SWarner Losh  *  Pay attention to not read more than nbBits contained into local register.
407*0c16b537SWarner Losh  * @return : extracted value. */
408*0c16b537SWarner Losh MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
409*0c16b537SWarner Losh {
410*0c16b537SWarner Losh     size_t const value = BIT_lookBits(bitD, nbBits);
411*0c16b537SWarner Losh     BIT_skipBits(bitD, nbBits);
412*0c16b537SWarner Losh     return value;
413*0c16b537SWarner Losh }
414*0c16b537SWarner Losh 
415*0c16b537SWarner Losh /*! BIT_readBitsFast() :
416*0c16b537SWarner Losh  *  unsafe version; only works only if nbBits >= 1 */
417*0c16b537SWarner Losh MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
418*0c16b537SWarner Losh {
419*0c16b537SWarner Losh     size_t const value = BIT_lookBitsFast(bitD, nbBits);
420*0c16b537SWarner Losh     assert(nbBits >= 1);
421*0c16b537SWarner Losh     BIT_skipBits(bitD, nbBits);
422*0c16b537SWarner Losh     return value;
423*0c16b537SWarner Losh }
424*0c16b537SWarner Losh 
425*0c16b537SWarner Losh /*! BIT_reloadDStream() :
426*0c16b537SWarner Losh  *  Refill `bitD` from buffer previously set in BIT_initDStream() .
427*0c16b537SWarner Losh  *  This function is safe, it guarantees it will not read beyond src buffer.
428*0c16b537SWarner Losh  * @return : status of `BIT_DStream_t` internal register.
429*0c16b537SWarner Losh  *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
430*0c16b537SWarner Losh MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
431*0c16b537SWarner Losh {
432*0c16b537SWarner Losh     if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
433*0c16b537SWarner Losh         return BIT_DStream_overflow;
434*0c16b537SWarner Losh 
435*0c16b537SWarner Losh     if (bitD->ptr >= bitD->limitPtr) {
436*0c16b537SWarner Losh         bitD->ptr -= bitD->bitsConsumed >> 3;
437*0c16b537SWarner Losh         bitD->bitsConsumed &= 7;
438*0c16b537SWarner Losh         bitD->bitContainer = MEM_readLEST(bitD->ptr);
439*0c16b537SWarner Losh         return BIT_DStream_unfinished;
440*0c16b537SWarner Losh     }
441*0c16b537SWarner Losh     if (bitD->ptr == bitD->start) {
442*0c16b537SWarner Losh         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
443*0c16b537SWarner Losh         return BIT_DStream_completed;
444*0c16b537SWarner Losh     }
445*0c16b537SWarner Losh     /* start < ptr < limitPtr */
446*0c16b537SWarner Losh     {   U32 nbBytes = bitD->bitsConsumed >> 3;
447*0c16b537SWarner Losh         BIT_DStream_status result = BIT_DStream_unfinished;
448*0c16b537SWarner Losh         if (bitD->ptr - nbBytes < bitD->start) {
449*0c16b537SWarner Losh             nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
450*0c16b537SWarner Losh             result = BIT_DStream_endOfBuffer;
451*0c16b537SWarner Losh         }
452*0c16b537SWarner Losh         bitD->ptr -= nbBytes;
453*0c16b537SWarner Losh         bitD->bitsConsumed -= nbBytes*8;
454*0c16b537SWarner Losh         bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
455*0c16b537SWarner Losh         return result;
456*0c16b537SWarner Losh     }
457*0c16b537SWarner Losh }
458*0c16b537SWarner Losh 
459*0c16b537SWarner Losh /*! BIT_endOfDStream() :
460*0c16b537SWarner Losh  * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
461*0c16b537SWarner Losh  */
462*0c16b537SWarner Losh MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
463*0c16b537SWarner Losh {
464*0c16b537SWarner Losh     return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
465*0c16b537SWarner Losh }
466*0c16b537SWarner Losh 
467*0c16b537SWarner Losh #if defined (__cplusplus)
468*0c16b537SWarner Losh }
469*0c16b537SWarner Losh #endif
470*0c16b537SWarner Losh 
471*0c16b537SWarner Losh #endif /* BITSTREAM_H_MODULE */
472