xref: /freebsd/sys/contrib/zstd/lib/common/bitstream.h (revision 5ff13fbc199bdf5f0572845351c68ee5ca828e71)
10c16b537SWarner Losh /* ******************************************************************
237f1f268SConrad Meyer  * bitstream
337f1f268SConrad Meyer  * Part of FSE library
4*5ff13fbcSAllan Jude  * Copyright (c) Yann Collet, Facebook, Inc.
537f1f268SConrad Meyer  *
637f1f268SConrad Meyer  * You can contact the author at :
737f1f268SConrad Meyer  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
837f1f268SConrad Meyer  *
937f1f268SConrad Meyer  * This source code is licensed under both the BSD-style license (found in the
1037f1f268SConrad Meyer  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
1137f1f268SConrad Meyer  * in the COPYING file in the root directory of this source tree).
1237f1f268SConrad Meyer  * You may select, at your option, one of the above-listed licenses.
130c16b537SWarner Losh ****************************************************************** */
140c16b537SWarner Losh #ifndef BITSTREAM_H_MODULE
150c16b537SWarner Losh #define BITSTREAM_H_MODULE
160c16b537SWarner Losh 
170c16b537SWarner Losh #if defined (__cplusplus)
180c16b537SWarner Losh extern "C" {
190c16b537SWarner Losh #endif
200c16b537SWarner Losh /*
210c16b537SWarner Losh *  This API consists of small unitary functions, which must be inlined for best performance.
220c16b537SWarner Losh *  Since link-time-optimization is not available for all compilers,
230c16b537SWarner Losh *  these functions are defined into a .h to be included.
240c16b537SWarner Losh */
250c16b537SWarner Losh 
260c16b537SWarner Losh /*-****************************************
270c16b537SWarner Losh *  Dependencies
280c16b537SWarner Losh ******************************************/
290c16b537SWarner Losh #include "mem.h"            /* unaligned access routines */
3037f1f268SConrad Meyer #include "compiler.h"       /* UNLIKELY() */
310f743729SConrad Meyer #include "debug.h"          /* assert(), DEBUGLOG(), RAWLOG() */
320c16b537SWarner Losh #include "error_private.h"  /* error codes and messages */
330c16b537SWarner Losh 
340c16b537SWarner Losh 
350c16b537SWarner Losh /*=========================================
360c16b537SWarner Losh *  Target specific
370c16b537SWarner Losh =========================================*/
38f7cd7fe5SConrad Meyer #ifndef ZSTD_NO_INTRINSICS
390c16b537SWarner Losh #  if defined(__BMI__) && defined(__GNUC__)
400c16b537SWarner Losh #    include <immintrin.h>   /* support for bextr (experimental) */
419cbefe25SConrad Meyer #  elif defined(__ICCARM__)
429cbefe25SConrad Meyer #    include <intrinsics.h>
430c16b537SWarner Losh #  endif
44f7cd7fe5SConrad Meyer #endif
450c16b537SWarner Losh 
460c16b537SWarner Losh #define STREAM_ACCUMULATOR_MIN_32  25
470c16b537SWarner Losh #define STREAM_ACCUMULATOR_MIN_64  57
480c16b537SWarner Losh #define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
490c16b537SWarner Losh 
500c16b537SWarner Losh 
510c16b537SWarner Losh /*-******************************************
520c16b537SWarner Losh *  bitStream encoding API (write forward)
530c16b537SWarner Losh ********************************************/
540c16b537SWarner Losh /* bitStream can mix input from multiple sources.
550c16b537SWarner Losh  * A critical property of these streams is that they encode and decode in **reverse** direction.
560c16b537SWarner Losh  * So the first bit sequence you add will be the last to be read, like a LIFO stack.
570c16b537SWarner Losh  */
580f743729SConrad Meyer typedef struct {
590c16b537SWarner Losh     size_t bitContainer;
600c16b537SWarner Losh     unsigned bitPos;
610c16b537SWarner Losh     char*  startPtr;
620c16b537SWarner Losh     char*  ptr;
630c16b537SWarner Losh     char*  endPtr;
640c16b537SWarner Losh } BIT_CStream_t;
650c16b537SWarner Losh 
660c16b537SWarner Losh MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
670c16b537SWarner Losh MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
680c16b537SWarner Losh MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
690c16b537SWarner Losh MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
700c16b537SWarner Losh 
710c16b537SWarner Losh /* Start with initCStream, providing the size of buffer to write into.
720c16b537SWarner Losh *  bitStream will never write outside of this buffer.
730c16b537SWarner Losh *  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
740c16b537SWarner Losh *
750c16b537SWarner Losh *  bits are first added to a local register.
760c16b537SWarner Losh *  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
770c16b537SWarner Losh *  Writing data into memory is an explicit operation, performed by the flushBits function.
780c16b537SWarner Losh *  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
790c16b537SWarner Losh *  After a flushBits, a maximum of 7 bits might still be stored into local register.
800c16b537SWarner Losh *
810c16b537SWarner Losh *  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
820c16b537SWarner Losh *
830c16b537SWarner Losh *  Last operation is to close the bitStream.
840c16b537SWarner Losh *  The function returns the final size of CStream in bytes.
850c16b537SWarner Losh *  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
860c16b537SWarner Losh */
870c16b537SWarner Losh 
880c16b537SWarner Losh 
890c16b537SWarner Losh /*-********************************************
900c16b537SWarner Losh *  bitStream decoding API (read backward)
910c16b537SWarner Losh **********************************************/
920f743729SConrad Meyer typedef struct {
930c16b537SWarner Losh     size_t   bitContainer;
940c16b537SWarner Losh     unsigned bitsConsumed;
950c16b537SWarner Losh     const char* ptr;
960c16b537SWarner Losh     const char* start;
970c16b537SWarner Losh     const char* limitPtr;
980c16b537SWarner Losh } BIT_DStream_t;
990c16b537SWarner Losh 
1000c16b537SWarner Losh typedef enum { BIT_DStream_unfinished = 0,
1010c16b537SWarner Losh                BIT_DStream_endOfBuffer = 1,
1020c16b537SWarner Losh                BIT_DStream_completed = 2,
1030c16b537SWarner Losh                BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
1040c16b537SWarner Losh                /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
1050c16b537SWarner Losh 
1060c16b537SWarner Losh MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
1070c16b537SWarner Losh MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
1080c16b537SWarner Losh MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
1090c16b537SWarner Losh MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
1100c16b537SWarner Losh 
1110c16b537SWarner Losh 
1120c16b537SWarner Losh /* Start by invoking BIT_initDStream().
1130c16b537SWarner Losh *  A chunk of the bitStream is then stored into a local register.
1140c16b537SWarner Losh *  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
1150c16b537SWarner Losh *  You can then retrieve bitFields stored into the local register, **in reverse order**.
1160c16b537SWarner Losh *  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
1170c16b537SWarner Losh *  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
1180c16b537SWarner Losh *  Otherwise, it can be less than that, so proceed accordingly.
1190c16b537SWarner Losh *  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
1200c16b537SWarner Losh */
1210c16b537SWarner Losh 
1220c16b537SWarner Losh 
1230c16b537SWarner Losh /*-****************************************
1240c16b537SWarner Losh *  unsafe API
1250c16b537SWarner Losh ******************************************/
1260c16b537SWarner Losh MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
1270c16b537SWarner Losh /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
1280c16b537SWarner Losh 
1290c16b537SWarner Losh MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
1300c16b537SWarner Losh /* unsafe version; does not check buffer overflow */
1310c16b537SWarner Losh 
1320c16b537SWarner Losh MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
1330c16b537SWarner Losh /* faster, but works only if nbBits >= 1 */
1340c16b537SWarner Losh 
1350c16b537SWarner Losh 
1360c16b537SWarner Losh 
1370c16b537SWarner Losh /*-**************************************************************
1380c16b537SWarner Losh *  Internal functions
1390c16b537SWarner Losh ****************************************************************/
BIT_highbit32(U32 val)140052d3c12SConrad Meyer MEM_STATIC unsigned BIT_highbit32 (U32 val)
1410c16b537SWarner Losh {
1420c16b537SWarner Losh     assert(val != 0);
1430c16b537SWarner Losh     {
1440c16b537SWarner Losh #   if defined(_MSC_VER)   /* Visual */
145f7cd7fe5SConrad Meyer #       if STATIC_BMI2 == 1
146f7cd7fe5SConrad Meyer             return _lzcnt_u32(val) ^ 31;
147f7cd7fe5SConrad Meyer #       else
148*5ff13fbcSAllan Jude             if (val != 0) {
149*5ff13fbcSAllan Jude                 unsigned long r;
150*5ff13fbcSAllan Jude                 _BitScanReverse(&r, val);
151*5ff13fbcSAllan Jude                 return (unsigned)r;
152*5ff13fbcSAllan Jude             } else {
153*5ff13fbcSAllan Jude                 /* Should not reach this code path */
154*5ff13fbcSAllan Jude                 __assume(0);
155*5ff13fbcSAllan Jude             }
156f7cd7fe5SConrad Meyer #       endif
1570f743729SConrad Meyer #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
1589cbefe25SConrad Meyer         return __builtin_clz (val) ^ 31;
1599cbefe25SConrad Meyer #   elif defined(__ICCARM__)    /* IAR Intrinsic */
1609cbefe25SConrad Meyer         return 31 - __CLZ(val);
1610c16b537SWarner Losh #   else   /* Software version */
1620c16b537SWarner Losh         static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
1630c16b537SWarner Losh                                                  11, 14, 16, 18, 22, 25,  3, 30,
1640c16b537SWarner Losh                                                   8, 12, 20, 28, 15, 17, 24,  7,
1650c16b537SWarner Losh                                                  19, 27, 23,  6, 26,  5,  4, 31 };
1660c16b537SWarner Losh         U32 v = val;
1670c16b537SWarner Losh         v |= v >> 1;
1680c16b537SWarner Losh         v |= v >> 2;
1690c16b537SWarner Losh         v |= v >> 4;
1700c16b537SWarner Losh         v |= v >> 8;
1710c16b537SWarner Losh         v |= v >> 16;
1720c16b537SWarner Losh         return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
1730c16b537SWarner Losh #   endif
1740c16b537SWarner Losh     }
1750c16b537SWarner Losh }
1760c16b537SWarner Losh 
1770c16b537SWarner Losh /*=====    Local Constants   =====*/
1780c16b537SWarner Losh static const unsigned BIT_mask[] = {
1790c16b537SWarner Losh     0,          1,         3,         7,         0xF,       0x1F,
1800c16b537SWarner Losh     0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
1810c16b537SWarner Losh     0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
1820c16b537SWarner Losh     0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
1830c16b537SWarner Losh     0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
1840c16b537SWarner Losh     0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
1850c16b537SWarner Losh #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
1860c16b537SWarner Losh 
1870c16b537SWarner Losh /*-**************************************************************
1880c16b537SWarner Losh *  bitStream encoding
1890c16b537SWarner Losh ****************************************************************/
1900c16b537SWarner Losh /*! BIT_initCStream() :
1910c16b537SWarner Losh  *  `dstCapacity` must be > sizeof(size_t)
1920c16b537SWarner Losh  *  @return : 0 if success,
1930c16b537SWarner Losh  *            otherwise an error code (can be tested using ERR_isError()) */
BIT_initCStream(BIT_CStream_t * bitC,void * startPtr,size_t dstCapacity)1940c16b537SWarner Losh MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
1950c16b537SWarner Losh                                   void* startPtr, size_t dstCapacity)
1960c16b537SWarner Losh {
1970c16b537SWarner Losh     bitC->bitContainer = 0;
1980c16b537SWarner Losh     bitC->bitPos = 0;
1990c16b537SWarner Losh     bitC->startPtr = (char*)startPtr;
2000c16b537SWarner Losh     bitC->ptr = bitC->startPtr;
2010c16b537SWarner Losh     bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
2020c16b537SWarner Losh     if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
2030c16b537SWarner Losh     return 0;
2040c16b537SWarner Losh }
2050c16b537SWarner Losh 
2060c16b537SWarner Losh /*! BIT_addBits() :
2070c16b537SWarner Losh  *  can add up to 31 bits into `bitC`.
2080c16b537SWarner Losh  *  Note : does not check for register overflow ! */
BIT_addBits(BIT_CStream_t * bitC,size_t value,unsigned nbBits)2090c16b537SWarner Losh MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
2100c16b537SWarner Losh                             size_t value, unsigned nbBits)
2110c16b537SWarner Losh {
212f7cd7fe5SConrad Meyer     DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
2130c16b537SWarner Losh     assert(nbBits < BIT_MASK_SIZE);
2140c16b537SWarner Losh     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
2150c16b537SWarner Losh     bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
2160c16b537SWarner Losh     bitC->bitPos += nbBits;
2170c16b537SWarner Losh }
2180c16b537SWarner Losh 
2190c16b537SWarner Losh /*! BIT_addBitsFast() :
2200f743729SConrad Meyer  *  works only if `value` is _clean_,
2210f743729SConrad Meyer  *  meaning all high bits above nbBits are 0 */
BIT_addBitsFast(BIT_CStream_t * bitC,size_t value,unsigned nbBits)2220c16b537SWarner Losh MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
2230c16b537SWarner Losh                                 size_t value, unsigned nbBits)
2240c16b537SWarner Losh {
2250c16b537SWarner Losh     assert((value>>nbBits) == 0);
2260c16b537SWarner Losh     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
2270c16b537SWarner Losh     bitC->bitContainer |= value << bitC->bitPos;
2280c16b537SWarner Losh     bitC->bitPos += nbBits;
2290c16b537SWarner Losh }
2300c16b537SWarner Losh 
2310c16b537SWarner Losh /*! BIT_flushBitsFast() :
2320c16b537SWarner Losh  *  assumption : bitContainer has not overflowed
2330c16b537SWarner Losh  *  unsafe version; does not check buffer overflow */
BIT_flushBitsFast(BIT_CStream_t * bitC)2340c16b537SWarner Losh MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
2350c16b537SWarner Losh {
2360c16b537SWarner Losh     size_t const nbBytes = bitC->bitPos >> 3;
2370c16b537SWarner Losh     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
2389cbefe25SConrad Meyer     assert(bitC->ptr <= bitC->endPtr);
2390c16b537SWarner Losh     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
2400c16b537SWarner Losh     bitC->ptr += nbBytes;
2410c16b537SWarner Losh     bitC->bitPos &= 7;
2420c16b537SWarner Losh     bitC->bitContainer >>= nbBytes*8;
2430c16b537SWarner Losh }
2440c16b537SWarner Losh 
2450c16b537SWarner Losh /*! BIT_flushBits() :
2460c16b537SWarner Losh  *  assumption : bitContainer has not overflowed
2470c16b537SWarner Losh  *  safe version; check for buffer overflow, and prevents it.
2480c16b537SWarner Losh  *  note : does not signal buffer overflow.
2490c16b537SWarner Losh  *  overflow will be revealed later on using BIT_closeCStream() */
BIT_flushBits(BIT_CStream_t * bitC)2500c16b537SWarner Losh MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
2510c16b537SWarner Losh {
2520c16b537SWarner Losh     size_t const nbBytes = bitC->bitPos >> 3;
2530c16b537SWarner Losh     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
2549cbefe25SConrad Meyer     assert(bitC->ptr <= bitC->endPtr);
2550c16b537SWarner Losh     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
2560c16b537SWarner Losh     bitC->ptr += nbBytes;
2570c16b537SWarner Losh     if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
2580c16b537SWarner Losh     bitC->bitPos &= 7;
2590c16b537SWarner Losh     bitC->bitContainer >>= nbBytes*8;
2600c16b537SWarner Losh }
2610c16b537SWarner Losh 
2620c16b537SWarner Losh /*! BIT_closeCStream() :
2630c16b537SWarner Losh  *  @return : size of CStream, in bytes,
2640c16b537SWarner Losh  *            or 0 if it could not fit into dstBuffer */
BIT_closeCStream(BIT_CStream_t * bitC)2650c16b537SWarner Losh MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
2660c16b537SWarner Losh {
2670c16b537SWarner Losh     BIT_addBitsFast(bitC, 1, 1);   /* endMark */
2680c16b537SWarner Losh     BIT_flushBits(bitC);
2690c16b537SWarner Losh     if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
2700c16b537SWarner Losh     return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
2710c16b537SWarner Losh }
2720c16b537SWarner Losh 
2730c16b537SWarner Losh 
2740c16b537SWarner Losh /*-********************************************************
2750c16b537SWarner Losh *  bitStream decoding
2760c16b537SWarner Losh **********************************************************/
2770c16b537SWarner Losh /*! BIT_initDStream() :
2780c16b537SWarner Losh  *  Initialize a BIT_DStream_t.
2790c16b537SWarner Losh  * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
2800c16b537SWarner Losh  * `srcSize` must be the *exact* size of the bitStream, in bytes.
2810c16b537SWarner Losh  * @return : size of stream (== srcSize), or an errorCode if a problem is detected
2820c16b537SWarner Losh  */
BIT_initDStream(BIT_DStream_t * bitD,const void * srcBuffer,size_t srcSize)2830c16b537SWarner Losh MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
2840c16b537SWarner Losh {
285f7cd7fe5SConrad Meyer     if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
2860c16b537SWarner Losh 
2870c16b537SWarner Losh     bitD->start = (const char*)srcBuffer;
2880c16b537SWarner Losh     bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
2890c16b537SWarner Losh 
2900c16b537SWarner Losh     if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
2910c16b537SWarner Losh         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
2920c16b537SWarner Losh         bitD->bitContainer = MEM_readLEST(bitD->ptr);
2930c16b537SWarner Losh         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
2940c16b537SWarner Losh           bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
2950c16b537SWarner Losh           if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
2960c16b537SWarner Losh     } else {
2970c16b537SWarner Losh         bitD->ptr   = bitD->start;
2980c16b537SWarner Losh         bitD->bitContainer = *(const BYTE*)(bitD->start);
2990c16b537SWarner Losh         switch(srcSize)
3000c16b537SWarner Losh         {
3010c16b537SWarner Losh         case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
302*5ff13fbcSAllan Jude                 ZSTD_FALLTHROUGH;
3030c16b537SWarner Losh 
3040c16b537SWarner Losh         case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
305*5ff13fbcSAllan Jude                 ZSTD_FALLTHROUGH;
3060c16b537SWarner Losh 
3070c16b537SWarner Losh         case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
308*5ff13fbcSAllan Jude                 ZSTD_FALLTHROUGH;
3090c16b537SWarner Losh 
3100c16b537SWarner Losh         case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
311*5ff13fbcSAllan Jude                 ZSTD_FALLTHROUGH;
3120c16b537SWarner Losh 
3130c16b537SWarner Losh         case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
314*5ff13fbcSAllan Jude                 ZSTD_FALLTHROUGH;
3150c16b537SWarner Losh 
3160c16b537SWarner Losh         case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
317*5ff13fbcSAllan Jude                 ZSTD_FALLTHROUGH;
3180c16b537SWarner Losh 
3190c16b537SWarner Losh         default: break;
3200c16b537SWarner Losh         }
3210c16b537SWarner Losh         {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
3220c16b537SWarner Losh             bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
3230c16b537SWarner Losh             if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
3240c16b537SWarner Losh         }
3250c16b537SWarner Losh         bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
3260c16b537SWarner Losh     }
3270c16b537SWarner Losh 
3280c16b537SWarner Losh     return srcSize;
3290c16b537SWarner Losh }
3300c16b537SWarner Losh 
BIT_getUpperBits(size_t bitContainer,U32 const start)331f7cd7fe5SConrad Meyer MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
3320c16b537SWarner Losh {
3330c16b537SWarner Losh     return bitContainer >> start;
3340c16b537SWarner Losh }
3350c16b537SWarner Losh 
BIT_getMiddleBits(size_t bitContainer,U32 const start,U32 const nbBits)336f7cd7fe5SConrad Meyer MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
3370c16b537SWarner Losh {
3380f743729SConrad Meyer     U32 const regMask = sizeof(bitContainer)*8 - 1;
3390f743729SConrad Meyer     /* if start > regMask, bitstream is corrupted, and result is undefined */
3400c16b537SWarner Losh     assert(nbBits < BIT_MASK_SIZE);
341*5ff13fbcSAllan Jude     /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
342*5ff13fbcSAllan Jude      * than accessing memory. When bmi2 instruction is not present, we consider
343*5ff13fbcSAllan Jude      * such cpus old (pre-Haswell, 2013) and their performance is not of that
344*5ff13fbcSAllan Jude      * importance.
345*5ff13fbcSAllan Jude      */
346*5ff13fbcSAllan Jude #if defined(__x86_64__) || defined(_M_X86)
347*5ff13fbcSAllan Jude     return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
348*5ff13fbcSAllan Jude #else
3490f743729SConrad Meyer     return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
350*5ff13fbcSAllan Jude #endif
3510c16b537SWarner Losh }
3520c16b537SWarner Losh 
BIT_getLowerBits(size_t bitContainer,U32 const nbBits)353f7cd7fe5SConrad Meyer MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
3540c16b537SWarner Losh {
355f7cd7fe5SConrad Meyer #if defined(STATIC_BMI2) && STATIC_BMI2 == 1
356f7cd7fe5SConrad Meyer 	return  _bzhi_u64(bitContainer, nbBits);
357f7cd7fe5SConrad Meyer #else
3580c16b537SWarner Losh     assert(nbBits < BIT_MASK_SIZE);
3590c16b537SWarner Losh     return bitContainer & BIT_mask[nbBits];
360f7cd7fe5SConrad Meyer #endif
3610c16b537SWarner Losh }
3620c16b537SWarner Losh 
3630c16b537SWarner Losh /*! BIT_lookBits() :
3640c16b537SWarner Losh  *  Provides next n bits from local register.
3650c16b537SWarner Losh  *  local register is not modified.
3660c16b537SWarner Losh  *  On 32-bits, maxNbBits==24.
3670c16b537SWarner Losh  *  On 64-bits, maxNbBits==56.
3680c16b537SWarner Losh  * @return : value extracted */
BIT_lookBits(const BIT_DStream_t * bitD,U32 nbBits)369f7cd7fe5SConrad Meyer MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
3700c16b537SWarner Losh {
3710f743729SConrad Meyer     /* arbitrate between double-shift and shift+mask */
3720f743729SConrad Meyer #if 1
3730f743729SConrad Meyer     /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
3740f743729SConrad Meyer      * bitstream is likely corrupted, and result is undefined */
3750c16b537SWarner Losh     return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
3760c16b537SWarner Losh #else
3770f743729SConrad Meyer     /* this code path is slower on my os-x laptop */
3780c16b537SWarner Losh     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
3790c16b537SWarner Losh     return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
3800c16b537SWarner Losh #endif
3810c16b537SWarner Losh }
3820c16b537SWarner Losh 
3830c16b537SWarner Losh /*! BIT_lookBitsFast() :
3840c16b537SWarner Losh  *  unsafe version; only works if nbBits >= 1 */
BIT_lookBitsFast(const BIT_DStream_t * bitD,U32 nbBits)3850c16b537SWarner Losh MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
3860c16b537SWarner Losh {
3870c16b537SWarner Losh     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
3880c16b537SWarner Losh     assert(nbBits >= 1);
3890c16b537SWarner Losh     return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
3900c16b537SWarner Losh }
3910c16b537SWarner Losh 
BIT_skipBits(BIT_DStream_t * bitD,U32 nbBits)392f7cd7fe5SConrad Meyer MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
3930c16b537SWarner Losh {
3940c16b537SWarner Losh     bitD->bitsConsumed += nbBits;
3950c16b537SWarner Losh }
3960c16b537SWarner Losh 
3970c16b537SWarner Losh /*! BIT_readBits() :
3980c16b537SWarner Losh  *  Read (consume) next n bits from local register and update.
3990c16b537SWarner Losh  *  Pay attention to not read more than nbBits contained into local register.
4000c16b537SWarner Losh  * @return : extracted value. */
BIT_readBits(BIT_DStream_t * bitD,unsigned nbBits)401f7cd7fe5SConrad Meyer MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
4020c16b537SWarner Losh {
4030c16b537SWarner Losh     size_t const value = BIT_lookBits(bitD, nbBits);
4040c16b537SWarner Losh     BIT_skipBits(bitD, nbBits);
4050c16b537SWarner Losh     return value;
4060c16b537SWarner Losh }
4070c16b537SWarner Losh 
4080c16b537SWarner Losh /*! BIT_readBitsFast() :
4090c16b537SWarner Losh  *  unsafe version; only works only if nbBits >= 1 */
BIT_readBitsFast(BIT_DStream_t * bitD,unsigned nbBits)410a0483764SConrad Meyer MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
4110c16b537SWarner Losh {
4120c16b537SWarner Losh     size_t const value = BIT_lookBitsFast(bitD, nbBits);
4130c16b537SWarner Losh     assert(nbBits >= 1);
4140c16b537SWarner Losh     BIT_skipBits(bitD, nbBits);
4150c16b537SWarner Losh     return value;
4160c16b537SWarner Losh }
4170c16b537SWarner Losh 
41837f1f268SConrad Meyer /*! BIT_reloadDStreamFast() :
41937f1f268SConrad Meyer  *  Similar to BIT_reloadDStream(), but with two differences:
42037f1f268SConrad Meyer  *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
42137f1f268SConrad Meyer  *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
42237f1f268SConrad Meyer  *     point you must use BIT_reloadDStream() to reload.
42337f1f268SConrad Meyer  */
BIT_reloadDStreamFast(BIT_DStream_t * bitD)42437f1f268SConrad Meyer MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
42537f1f268SConrad Meyer {
42637f1f268SConrad Meyer     if (UNLIKELY(bitD->ptr < bitD->limitPtr))
42737f1f268SConrad Meyer         return BIT_DStream_overflow;
42837f1f268SConrad Meyer     assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
42937f1f268SConrad Meyer     bitD->ptr -= bitD->bitsConsumed >> 3;
43037f1f268SConrad Meyer     bitD->bitsConsumed &= 7;
43137f1f268SConrad Meyer     bitD->bitContainer = MEM_readLEST(bitD->ptr);
43237f1f268SConrad Meyer     return BIT_DStream_unfinished;
43337f1f268SConrad Meyer }
43437f1f268SConrad Meyer 
4350c16b537SWarner Losh /*! BIT_reloadDStream() :
4360c16b537SWarner Losh  *  Refill `bitD` from buffer previously set in BIT_initDStream() .
4370c16b537SWarner Losh  *  This function is safe, it guarantees it will not read beyond src buffer.
4380c16b537SWarner Losh  * @return : status of `BIT_DStream_t` internal register.
43919fcbaf1SConrad Meyer  *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
BIT_reloadDStream(BIT_DStream_t * bitD)4400c16b537SWarner Losh MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
4410c16b537SWarner Losh {
4420c16b537SWarner Losh     if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
4430c16b537SWarner Losh         return BIT_DStream_overflow;
4440c16b537SWarner Losh 
4450c16b537SWarner Losh     if (bitD->ptr >= bitD->limitPtr) {
44637f1f268SConrad Meyer         return BIT_reloadDStreamFast(bitD);
4470c16b537SWarner Losh     }
4480c16b537SWarner Losh     if (bitD->ptr == bitD->start) {
4490c16b537SWarner Losh         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
4500c16b537SWarner Losh         return BIT_DStream_completed;
4510c16b537SWarner Losh     }
4520c16b537SWarner Losh     /* start < ptr < limitPtr */
4530c16b537SWarner Losh     {   U32 nbBytes = bitD->bitsConsumed >> 3;
4540c16b537SWarner Losh         BIT_DStream_status result = BIT_DStream_unfinished;
4550c16b537SWarner Losh         if (bitD->ptr - nbBytes < bitD->start) {
4560c16b537SWarner Losh             nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
4570c16b537SWarner Losh             result = BIT_DStream_endOfBuffer;
4580c16b537SWarner Losh         }
4590c16b537SWarner Losh         bitD->ptr -= nbBytes;
4600c16b537SWarner Losh         bitD->bitsConsumed -= nbBytes*8;
4610c16b537SWarner Losh         bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
4620c16b537SWarner Losh         return result;
4630c16b537SWarner Losh     }
4640c16b537SWarner Losh }
4650c16b537SWarner Losh 
4660c16b537SWarner Losh /*! BIT_endOfDStream() :
4670c16b537SWarner Losh  * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
4680c16b537SWarner Losh  */
BIT_endOfDStream(const BIT_DStream_t * DStream)4690c16b537SWarner Losh MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
4700c16b537SWarner Losh {
4710c16b537SWarner Losh     return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
4720c16b537SWarner Losh }
4730c16b537SWarner Losh 
4740c16b537SWarner Losh #if defined (__cplusplus)
4750c16b537SWarner Losh }
4760c16b537SWarner Losh #endif
4770c16b537SWarner Losh 
4780c16b537SWarner Losh #endif /* BITSTREAM_H_MODULE */
479