1*3b35e7eeSXin LI // SPDX-License-Identifier: 0BSD 2*3b35e7eeSXin LI 381ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 481ad8388SMartin Matuska // 581ad8388SMartin Matuska /// \file range_encoder.h 681ad8388SMartin Matuska /// \brief Range Encoder 781ad8388SMartin Matuska /// 881ad8388SMartin Matuska // Authors: Igor Pavlov 981ad8388SMartin Matuska // Lasse Collin 1081ad8388SMartin Matuska // 1181ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 1281ad8388SMartin Matuska 1381ad8388SMartin Matuska #ifndef LZMA_RANGE_ENCODER_H 1481ad8388SMartin Matuska #define LZMA_RANGE_ENCODER_H 1581ad8388SMartin Matuska 1681ad8388SMartin Matuska #include "range_common.h" 1781ad8388SMartin Matuska #include "price.h" 1881ad8388SMartin Matuska 1981ad8388SMartin Matuska 2081ad8388SMartin Matuska /// Maximum number of symbols that can be put pending into lzma_range_encoder 2173ed8e77SXin LI /// structure between calls to lzma_rc_encode(). For LZMA, 48+5 is enough 2281ad8388SMartin Matuska /// (match with big distance and length followed by range encoder flush). 2373ed8e77SXin LI #define RC_SYMBOLS_MAX 53 2481ad8388SMartin Matuska 2581ad8388SMartin Matuska 2681ad8388SMartin Matuska typedef struct { 2781ad8388SMartin Matuska uint64_t low; 2881ad8388SMartin Matuska uint64_t cache_size; 2981ad8388SMartin Matuska uint32_t range; 3081ad8388SMartin Matuska uint8_t cache; 3181ad8388SMartin Matuska 3273ed8e77SXin LI /// Number of bytes written out by rc_encode() -> rc_shift_low() 3373ed8e77SXin LI uint64_t out_total; 3473ed8e77SXin LI 3581ad8388SMartin Matuska /// Number of symbols in the tables 3681ad8388SMartin Matuska size_t count; 3781ad8388SMartin Matuska 3881ad8388SMartin Matuska /// rc_encode()'s position in the tables 3981ad8388SMartin Matuska size_t pos; 4081ad8388SMartin Matuska 4181ad8388SMartin Matuska /// Symbols to encode 4281ad8388SMartin Matuska enum { 4381ad8388SMartin Matuska RC_BIT_0, 4481ad8388SMartin Matuska RC_BIT_1, 4581ad8388SMartin Matuska RC_DIRECT_0, 4681ad8388SMartin Matuska RC_DIRECT_1, 4781ad8388SMartin Matuska RC_FLUSH, 4881ad8388SMartin Matuska } symbols[RC_SYMBOLS_MAX]; 4981ad8388SMartin Matuska 5081ad8388SMartin Matuska /// Probabilities associated with RC_BIT_0 or RC_BIT_1 5181ad8388SMartin Matuska probability *probs[RC_SYMBOLS_MAX]; 5281ad8388SMartin Matuska 5381ad8388SMartin Matuska } lzma_range_encoder; 5481ad8388SMartin Matuska 5581ad8388SMartin Matuska 5681ad8388SMartin Matuska static inline void 5781ad8388SMartin Matuska rc_reset(lzma_range_encoder *rc) 5881ad8388SMartin Matuska { 5981ad8388SMartin Matuska rc->low = 0; 6081ad8388SMartin Matuska rc->cache_size = 1; 6181ad8388SMartin Matuska rc->range = UINT32_MAX; 6281ad8388SMartin Matuska rc->cache = 0; 6373ed8e77SXin LI rc->out_total = 0; 6481ad8388SMartin Matuska rc->count = 0; 6581ad8388SMartin Matuska rc->pos = 0; 6681ad8388SMartin Matuska } 6781ad8388SMartin Matuska 6881ad8388SMartin Matuska 6981ad8388SMartin Matuska static inline void 7073ed8e77SXin LI rc_forget(lzma_range_encoder *rc) 7173ed8e77SXin LI { 7273ed8e77SXin LI // This must not be called when rc_encode() is partially done. 7373ed8e77SXin LI assert(rc->pos == 0); 7473ed8e77SXin LI rc->count = 0; 7573ed8e77SXin LI } 7673ed8e77SXin LI 7773ed8e77SXin LI 7873ed8e77SXin LI static inline void 7981ad8388SMartin Matuska rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit) 8081ad8388SMartin Matuska { 8181ad8388SMartin Matuska rc->symbols[rc->count] = bit; 8281ad8388SMartin Matuska rc->probs[rc->count] = prob; 8381ad8388SMartin Matuska ++rc->count; 8481ad8388SMartin Matuska } 8581ad8388SMartin Matuska 8681ad8388SMartin Matuska 8781ad8388SMartin Matuska static inline void 8881ad8388SMartin Matuska rc_bittree(lzma_range_encoder *rc, probability *probs, 8981ad8388SMartin Matuska uint32_t bit_count, uint32_t symbol) 9081ad8388SMartin Matuska { 9181ad8388SMartin Matuska uint32_t model_index = 1; 9281ad8388SMartin Matuska 9381ad8388SMartin Matuska do { 9481ad8388SMartin Matuska const uint32_t bit = (symbol >> --bit_count) & 1; 9581ad8388SMartin Matuska rc_bit(rc, &probs[model_index], bit); 9681ad8388SMartin Matuska model_index = (model_index << 1) + bit; 9781ad8388SMartin Matuska } while (bit_count != 0); 9881ad8388SMartin Matuska } 9981ad8388SMartin Matuska 10081ad8388SMartin Matuska 10181ad8388SMartin Matuska static inline void 10281ad8388SMartin Matuska rc_bittree_reverse(lzma_range_encoder *rc, probability *probs, 10381ad8388SMartin Matuska uint32_t bit_count, uint32_t symbol) 10481ad8388SMartin Matuska { 10581ad8388SMartin Matuska uint32_t model_index = 1; 10681ad8388SMartin Matuska 10781ad8388SMartin Matuska do { 10881ad8388SMartin Matuska const uint32_t bit = symbol & 1; 10981ad8388SMartin Matuska symbol >>= 1; 11081ad8388SMartin Matuska rc_bit(rc, &probs[model_index], bit); 11181ad8388SMartin Matuska model_index = (model_index << 1) + bit; 11281ad8388SMartin Matuska } while (--bit_count != 0); 11381ad8388SMartin Matuska } 11481ad8388SMartin Matuska 11581ad8388SMartin Matuska 11681ad8388SMartin Matuska static inline void 11781ad8388SMartin Matuska rc_direct(lzma_range_encoder *rc, 11881ad8388SMartin Matuska uint32_t value, uint32_t bit_count) 11981ad8388SMartin Matuska { 12081ad8388SMartin Matuska do { 12181ad8388SMartin Matuska rc->symbols[rc->count++] 12281ad8388SMartin Matuska = RC_DIRECT_0 + ((value >> --bit_count) & 1); 12381ad8388SMartin Matuska } while (bit_count != 0); 12481ad8388SMartin Matuska } 12581ad8388SMartin Matuska 12681ad8388SMartin Matuska 12781ad8388SMartin Matuska static inline void 12881ad8388SMartin Matuska rc_flush(lzma_range_encoder *rc) 12981ad8388SMartin Matuska { 13081ad8388SMartin Matuska for (size_t i = 0; i < 5; ++i) 13181ad8388SMartin Matuska rc->symbols[rc->count++] = RC_FLUSH; 13281ad8388SMartin Matuska } 13381ad8388SMartin Matuska 13481ad8388SMartin Matuska 13581ad8388SMartin Matuska static inline bool 13681ad8388SMartin Matuska rc_shift_low(lzma_range_encoder *rc, 13781ad8388SMartin Matuska uint8_t *out, size_t *out_pos, size_t out_size) 13881ad8388SMartin Matuska { 13981ad8388SMartin Matuska if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000) 14081ad8388SMartin Matuska || (uint32_t)(rc->low >> 32) != 0) { 14181ad8388SMartin Matuska do { 14281ad8388SMartin Matuska if (*out_pos == out_size) 14381ad8388SMartin Matuska return true; 14481ad8388SMartin Matuska 14581ad8388SMartin Matuska out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32); 14681ad8388SMartin Matuska ++*out_pos; 14773ed8e77SXin LI ++rc->out_total; 14881ad8388SMartin Matuska rc->cache = 0xFF; 14981ad8388SMartin Matuska 15081ad8388SMartin Matuska } while (--rc->cache_size != 0); 15181ad8388SMartin Matuska 15281ad8388SMartin Matuska rc->cache = (rc->low >> 24) & 0xFF; 15381ad8388SMartin Matuska } 15481ad8388SMartin Matuska 15581ad8388SMartin Matuska ++rc->cache_size; 15681ad8388SMartin Matuska rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS; 15781ad8388SMartin Matuska 15881ad8388SMartin Matuska return false; 15981ad8388SMartin Matuska } 16081ad8388SMartin Matuska 16181ad8388SMartin Matuska 16273ed8e77SXin LI // NOTE: The last two arguments are uint64_t instead of size_t because in 16373ed8e77SXin LI // the dummy version these refer to the size of the whole range-encoded 16473ed8e77SXin LI // output stream, not just to the currently available output buffer space. 16573ed8e77SXin LI static inline bool 16673ed8e77SXin LI rc_shift_low_dummy(uint64_t *low, uint64_t *cache_size, uint8_t *cache, 16773ed8e77SXin LI uint64_t *out_pos, uint64_t out_size) 16873ed8e77SXin LI { 16973ed8e77SXin LI if ((uint32_t)(*low) < (uint32_t)(0xFF000000) 17073ed8e77SXin LI || (uint32_t)(*low >> 32) != 0) { 17173ed8e77SXin LI do { 17273ed8e77SXin LI if (*out_pos == out_size) 17373ed8e77SXin LI return true; 17473ed8e77SXin LI 17573ed8e77SXin LI ++*out_pos; 17673ed8e77SXin LI *cache = 0xFF; 17773ed8e77SXin LI 17873ed8e77SXin LI } while (--*cache_size != 0); 17973ed8e77SXin LI 18073ed8e77SXin LI *cache = (*low >> 24) & 0xFF; 18173ed8e77SXin LI } 18273ed8e77SXin LI 18373ed8e77SXin LI ++*cache_size; 18473ed8e77SXin LI *low = (*low & 0x00FFFFFF) << RC_SHIFT_BITS; 18573ed8e77SXin LI 18673ed8e77SXin LI return false; 18773ed8e77SXin LI } 18873ed8e77SXin LI 18973ed8e77SXin LI 19081ad8388SMartin Matuska static inline bool 19181ad8388SMartin Matuska rc_encode(lzma_range_encoder *rc, 19281ad8388SMartin Matuska uint8_t *out, size_t *out_pos, size_t out_size) 19381ad8388SMartin Matuska { 19481ad8388SMartin Matuska assert(rc->count <= RC_SYMBOLS_MAX); 19581ad8388SMartin Matuska 19681ad8388SMartin Matuska while (rc->pos < rc->count) { 19781ad8388SMartin Matuska // Normalize 19881ad8388SMartin Matuska if (rc->range < RC_TOP_VALUE) { 19981ad8388SMartin Matuska if (rc_shift_low(rc, out, out_pos, out_size)) 20081ad8388SMartin Matuska return true; 20181ad8388SMartin Matuska 20281ad8388SMartin Matuska rc->range <<= RC_SHIFT_BITS; 20381ad8388SMartin Matuska } 20481ad8388SMartin Matuska 20581ad8388SMartin Matuska // Encode a bit 20681ad8388SMartin Matuska switch (rc->symbols[rc->pos]) { 20781ad8388SMartin Matuska case RC_BIT_0: { 20881ad8388SMartin Matuska probability prob = *rc->probs[rc->pos]; 20981ad8388SMartin Matuska rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) 21081ad8388SMartin Matuska * prob; 21181ad8388SMartin Matuska prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS; 21281ad8388SMartin Matuska *rc->probs[rc->pos] = prob; 21381ad8388SMartin Matuska break; 21481ad8388SMartin Matuska } 21581ad8388SMartin Matuska 21681ad8388SMartin Matuska case RC_BIT_1: { 21781ad8388SMartin Matuska probability prob = *rc->probs[rc->pos]; 21881ad8388SMartin Matuska const uint32_t bound = prob * (rc->range 21981ad8388SMartin Matuska >> RC_BIT_MODEL_TOTAL_BITS); 22081ad8388SMartin Matuska rc->low += bound; 22181ad8388SMartin Matuska rc->range -= bound; 22281ad8388SMartin Matuska prob -= prob >> RC_MOVE_BITS; 22381ad8388SMartin Matuska *rc->probs[rc->pos] = prob; 22481ad8388SMartin Matuska break; 22581ad8388SMartin Matuska } 22681ad8388SMartin Matuska 22781ad8388SMartin Matuska case RC_DIRECT_0: 22881ad8388SMartin Matuska rc->range >>= 1; 22981ad8388SMartin Matuska break; 23081ad8388SMartin Matuska 23181ad8388SMartin Matuska case RC_DIRECT_1: 23281ad8388SMartin Matuska rc->range >>= 1; 23381ad8388SMartin Matuska rc->low += rc->range; 23481ad8388SMartin Matuska break; 23581ad8388SMartin Matuska 23681ad8388SMartin Matuska case RC_FLUSH: 23781ad8388SMartin Matuska // Prevent further normalizations. 23881ad8388SMartin Matuska rc->range = UINT32_MAX; 23981ad8388SMartin Matuska 24081ad8388SMartin Matuska // Flush the last five bytes (see rc_flush()). 24181ad8388SMartin Matuska do { 24281ad8388SMartin Matuska if (rc_shift_low(rc, out, out_pos, out_size)) 24381ad8388SMartin Matuska return true; 24481ad8388SMartin Matuska } while (++rc->pos < rc->count); 24581ad8388SMartin Matuska 24681ad8388SMartin Matuska // Reset the range encoder so we are ready to continue 24781ad8388SMartin Matuska // encoding if we weren't finishing the stream. 24881ad8388SMartin Matuska rc_reset(rc); 24981ad8388SMartin Matuska return false; 25081ad8388SMartin Matuska 25181ad8388SMartin Matuska default: 25281ad8388SMartin Matuska assert(0); 25381ad8388SMartin Matuska break; 25481ad8388SMartin Matuska } 25581ad8388SMartin Matuska 25681ad8388SMartin Matuska ++rc->pos; 25781ad8388SMartin Matuska } 25881ad8388SMartin Matuska 25981ad8388SMartin Matuska rc->count = 0; 26081ad8388SMartin Matuska rc->pos = 0; 26181ad8388SMartin Matuska 26281ad8388SMartin Matuska return false; 26381ad8388SMartin Matuska } 26481ad8388SMartin Matuska 26581ad8388SMartin Matuska 26673ed8e77SXin LI static inline bool 26773ed8e77SXin LI rc_encode_dummy(const lzma_range_encoder *rc, uint64_t out_limit) 26873ed8e77SXin LI { 26973ed8e77SXin LI assert(rc->count <= RC_SYMBOLS_MAX); 27073ed8e77SXin LI 27173ed8e77SXin LI uint64_t low = rc->low; 27273ed8e77SXin LI uint64_t cache_size = rc->cache_size; 27373ed8e77SXin LI uint32_t range = rc->range; 27473ed8e77SXin LI uint8_t cache = rc->cache; 27573ed8e77SXin LI uint64_t out_pos = rc->out_total; 27673ed8e77SXin LI 27773ed8e77SXin LI size_t pos = rc->pos; 27873ed8e77SXin LI 27973ed8e77SXin LI while (true) { 28073ed8e77SXin LI // Normalize 28173ed8e77SXin LI if (range < RC_TOP_VALUE) { 28273ed8e77SXin LI if (rc_shift_low_dummy(&low, &cache_size, &cache, 28373ed8e77SXin LI &out_pos, out_limit)) 28473ed8e77SXin LI return true; 28573ed8e77SXin LI 28673ed8e77SXin LI range <<= RC_SHIFT_BITS; 28773ed8e77SXin LI } 28873ed8e77SXin LI 28973ed8e77SXin LI // This check is here because the normalization above must 29073ed8e77SXin LI // be done before flushing the last bytes. 29173ed8e77SXin LI if (pos == rc->count) 29273ed8e77SXin LI break; 29373ed8e77SXin LI 29473ed8e77SXin LI // Encode a bit 29573ed8e77SXin LI switch (rc->symbols[pos]) { 29673ed8e77SXin LI case RC_BIT_0: { 29773ed8e77SXin LI probability prob = *rc->probs[pos]; 29873ed8e77SXin LI range = (range >> RC_BIT_MODEL_TOTAL_BITS) 29973ed8e77SXin LI * prob; 30073ed8e77SXin LI break; 30173ed8e77SXin LI } 30273ed8e77SXin LI 30373ed8e77SXin LI case RC_BIT_1: { 30473ed8e77SXin LI probability prob = *rc->probs[pos]; 30573ed8e77SXin LI const uint32_t bound = prob * (range 30673ed8e77SXin LI >> RC_BIT_MODEL_TOTAL_BITS); 30773ed8e77SXin LI low += bound; 30873ed8e77SXin LI range -= bound; 30973ed8e77SXin LI break; 31073ed8e77SXin LI } 31173ed8e77SXin LI 31273ed8e77SXin LI case RC_DIRECT_0: 31373ed8e77SXin LI range >>= 1; 31473ed8e77SXin LI break; 31573ed8e77SXin LI 31673ed8e77SXin LI case RC_DIRECT_1: 31773ed8e77SXin LI range >>= 1; 31873ed8e77SXin LI low += range; 31973ed8e77SXin LI break; 32073ed8e77SXin LI 32173ed8e77SXin LI case RC_FLUSH: 32273ed8e77SXin LI default: 32373ed8e77SXin LI assert(0); 32473ed8e77SXin LI break; 32573ed8e77SXin LI } 32673ed8e77SXin LI 32773ed8e77SXin LI ++pos; 32873ed8e77SXin LI } 32973ed8e77SXin LI 33073ed8e77SXin LI // Flush the last bytes. This isn't in rc->symbols[] so we do 33173ed8e77SXin LI // it after the above loop to take into account the size of 33273ed8e77SXin LI // the flushing that will be done at the end of the stream. 33373ed8e77SXin LI for (pos = 0; pos < 5; ++pos) { 33473ed8e77SXin LI if (rc_shift_low_dummy(&low, &cache_size, 33573ed8e77SXin LI &cache, &out_pos, out_limit)) 33673ed8e77SXin LI return true; 33773ed8e77SXin LI } 33873ed8e77SXin LI 33973ed8e77SXin LI return false; 34073ed8e77SXin LI } 34173ed8e77SXin LI 34273ed8e77SXin LI 34381ad8388SMartin Matuska static inline uint64_t 34481ad8388SMartin Matuska rc_pending(const lzma_range_encoder *rc) 34581ad8388SMartin Matuska { 34681ad8388SMartin Matuska return rc->cache_size + 5 - 1; 34781ad8388SMartin Matuska } 34881ad8388SMartin Matuska 34981ad8388SMartin Matuska #endif 350