1*81ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 2*81ad8388SMartin Matuska // 3*81ad8388SMartin Matuska /// \file range_encoder.h 4*81ad8388SMartin Matuska /// \brief Range Encoder 5*81ad8388SMartin Matuska /// 6*81ad8388SMartin Matuska // Authors: Igor Pavlov 7*81ad8388SMartin Matuska // Lasse Collin 8*81ad8388SMartin Matuska // 9*81ad8388SMartin Matuska // This file has been put into the public domain. 10*81ad8388SMartin Matuska // You can do whatever you want with this file. 11*81ad8388SMartin Matuska // 12*81ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 13*81ad8388SMartin Matuska 14*81ad8388SMartin Matuska #ifndef LZMA_RANGE_ENCODER_H 15*81ad8388SMartin Matuska #define LZMA_RANGE_ENCODER_H 16*81ad8388SMartin Matuska 17*81ad8388SMartin Matuska #include "range_common.h" 18*81ad8388SMartin Matuska #include "price.h" 19*81ad8388SMartin Matuska 20*81ad8388SMartin Matuska 21*81ad8388SMartin Matuska /// Maximum number of symbols that can be put pending into lzma_range_encoder 22*81ad8388SMartin Matuska /// structure between calls to lzma_rc_encode(). For LZMA, 52+5 is enough 23*81ad8388SMartin Matuska /// (match with big distance and length followed by range encoder flush). 24*81ad8388SMartin Matuska #define RC_SYMBOLS_MAX 58 25*81ad8388SMartin Matuska 26*81ad8388SMartin Matuska 27*81ad8388SMartin Matuska typedef struct { 28*81ad8388SMartin Matuska uint64_t low; 29*81ad8388SMartin Matuska uint64_t cache_size; 30*81ad8388SMartin Matuska uint32_t range; 31*81ad8388SMartin Matuska uint8_t cache; 32*81ad8388SMartin Matuska 33*81ad8388SMartin Matuska /// Number of symbols in the tables 34*81ad8388SMartin Matuska size_t count; 35*81ad8388SMartin Matuska 36*81ad8388SMartin Matuska /// rc_encode()'s position in the tables 37*81ad8388SMartin Matuska size_t pos; 38*81ad8388SMartin Matuska 39*81ad8388SMartin Matuska /// Symbols to encode 40*81ad8388SMartin Matuska enum { 41*81ad8388SMartin Matuska RC_BIT_0, 42*81ad8388SMartin Matuska RC_BIT_1, 43*81ad8388SMartin Matuska RC_DIRECT_0, 44*81ad8388SMartin Matuska RC_DIRECT_1, 45*81ad8388SMartin Matuska RC_FLUSH, 46*81ad8388SMartin Matuska } symbols[RC_SYMBOLS_MAX]; 47*81ad8388SMartin Matuska 48*81ad8388SMartin Matuska /// Probabilities associated with RC_BIT_0 or RC_BIT_1 49*81ad8388SMartin Matuska probability *probs[RC_SYMBOLS_MAX]; 50*81ad8388SMartin Matuska 51*81ad8388SMartin Matuska } lzma_range_encoder; 52*81ad8388SMartin Matuska 53*81ad8388SMartin Matuska 54*81ad8388SMartin Matuska static inline void 55*81ad8388SMartin Matuska rc_reset(lzma_range_encoder *rc) 56*81ad8388SMartin Matuska { 57*81ad8388SMartin Matuska rc->low = 0; 58*81ad8388SMartin Matuska rc->cache_size = 1; 59*81ad8388SMartin Matuska rc->range = UINT32_MAX; 60*81ad8388SMartin Matuska rc->cache = 0; 61*81ad8388SMartin Matuska rc->count = 0; 62*81ad8388SMartin Matuska rc->pos = 0; 63*81ad8388SMartin Matuska } 64*81ad8388SMartin Matuska 65*81ad8388SMartin Matuska 66*81ad8388SMartin Matuska static inline void 67*81ad8388SMartin Matuska rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit) 68*81ad8388SMartin Matuska { 69*81ad8388SMartin Matuska rc->symbols[rc->count] = bit; 70*81ad8388SMartin Matuska rc->probs[rc->count] = prob; 71*81ad8388SMartin Matuska ++rc->count; 72*81ad8388SMartin Matuska } 73*81ad8388SMartin Matuska 74*81ad8388SMartin Matuska 75*81ad8388SMartin Matuska static inline void 76*81ad8388SMartin Matuska rc_bittree(lzma_range_encoder *rc, probability *probs, 77*81ad8388SMartin Matuska uint32_t bit_count, uint32_t symbol) 78*81ad8388SMartin Matuska { 79*81ad8388SMartin Matuska uint32_t model_index = 1; 80*81ad8388SMartin Matuska 81*81ad8388SMartin Matuska do { 82*81ad8388SMartin Matuska const uint32_t bit = (symbol >> --bit_count) & 1; 83*81ad8388SMartin Matuska rc_bit(rc, &probs[model_index], bit); 84*81ad8388SMartin Matuska model_index = (model_index << 1) + bit; 85*81ad8388SMartin Matuska } while (bit_count != 0); 86*81ad8388SMartin Matuska } 87*81ad8388SMartin Matuska 88*81ad8388SMartin Matuska 89*81ad8388SMartin Matuska static inline void 90*81ad8388SMartin Matuska rc_bittree_reverse(lzma_range_encoder *rc, probability *probs, 91*81ad8388SMartin Matuska uint32_t bit_count, uint32_t symbol) 92*81ad8388SMartin Matuska { 93*81ad8388SMartin Matuska uint32_t model_index = 1; 94*81ad8388SMartin Matuska 95*81ad8388SMartin Matuska do { 96*81ad8388SMartin Matuska const uint32_t bit = symbol & 1; 97*81ad8388SMartin Matuska symbol >>= 1; 98*81ad8388SMartin Matuska rc_bit(rc, &probs[model_index], bit); 99*81ad8388SMartin Matuska model_index = (model_index << 1) + bit; 100*81ad8388SMartin Matuska } while (--bit_count != 0); 101*81ad8388SMartin Matuska } 102*81ad8388SMartin Matuska 103*81ad8388SMartin Matuska 104*81ad8388SMartin Matuska static inline void 105*81ad8388SMartin Matuska rc_direct(lzma_range_encoder *rc, 106*81ad8388SMartin Matuska uint32_t value, uint32_t bit_count) 107*81ad8388SMartin Matuska { 108*81ad8388SMartin Matuska do { 109*81ad8388SMartin Matuska rc->symbols[rc->count++] 110*81ad8388SMartin Matuska = RC_DIRECT_0 + ((value >> --bit_count) & 1); 111*81ad8388SMartin Matuska } while (bit_count != 0); 112*81ad8388SMartin Matuska } 113*81ad8388SMartin Matuska 114*81ad8388SMartin Matuska 115*81ad8388SMartin Matuska static inline void 116*81ad8388SMartin Matuska rc_flush(lzma_range_encoder *rc) 117*81ad8388SMartin Matuska { 118*81ad8388SMartin Matuska for (size_t i = 0; i < 5; ++i) 119*81ad8388SMartin Matuska rc->symbols[rc->count++] = RC_FLUSH; 120*81ad8388SMartin Matuska } 121*81ad8388SMartin Matuska 122*81ad8388SMartin Matuska 123*81ad8388SMartin Matuska static inline bool 124*81ad8388SMartin Matuska rc_shift_low(lzma_range_encoder *rc, 125*81ad8388SMartin Matuska uint8_t *out, size_t *out_pos, size_t out_size) 126*81ad8388SMartin Matuska { 127*81ad8388SMartin Matuska if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000) 128*81ad8388SMartin Matuska || (uint32_t)(rc->low >> 32) != 0) { 129*81ad8388SMartin Matuska do { 130*81ad8388SMartin Matuska if (*out_pos == out_size) 131*81ad8388SMartin Matuska return true; 132*81ad8388SMartin Matuska 133*81ad8388SMartin Matuska out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32); 134*81ad8388SMartin Matuska ++*out_pos; 135*81ad8388SMartin Matuska rc->cache = 0xFF; 136*81ad8388SMartin Matuska 137*81ad8388SMartin Matuska } while (--rc->cache_size != 0); 138*81ad8388SMartin Matuska 139*81ad8388SMartin Matuska rc->cache = (rc->low >> 24) & 0xFF; 140*81ad8388SMartin Matuska } 141*81ad8388SMartin Matuska 142*81ad8388SMartin Matuska ++rc->cache_size; 143*81ad8388SMartin Matuska rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS; 144*81ad8388SMartin Matuska 145*81ad8388SMartin Matuska return false; 146*81ad8388SMartin Matuska } 147*81ad8388SMartin Matuska 148*81ad8388SMartin Matuska 149*81ad8388SMartin Matuska static inline bool 150*81ad8388SMartin Matuska rc_encode(lzma_range_encoder *rc, 151*81ad8388SMartin Matuska uint8_t *out, size_t *out_pos, size_t out_size) 152*81ad8388SMartin Matuska { 153*81ad8388SMartin Matuska assert(rc->count <= RC_SYMBOLS_MAX); 154*81ad8388SMartin Matuska 155*81ad8388SMartin Matuska while (rc->pos < rc->count) { 156*81ad8388SMartin Matuska // Normalize 157*81ad8388SMartin Matuska if (rc->range < RC_TOP_VALUE) { 158*81ad8388SMartin Matuska if (rc_shift_low(rc, out, out_pos, out_size)) 159*81ad8388SMartin Matuska return true; 160*81ad8388SMartin Matuska 161*81ad8388SMartin Matuska rc->range <<= RC_SHIFT_BITS; 162*81ad8388SMartin Matuska } 163*81ad8388SMartin Matuska 164*81ad8388SMartin Matuska // Encode a bit 165*81ad8388SMartin Matuska switch (rc->symbols[rc->pos]) { 166*81ad8388SMartin Matuska case RC_BIT_0: { 167*81ad8388SMartin Matuska probability prob = *rc->probs[rc->pos]; 168*81ad8388SMartin Matuska rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) 169*81ad8388SMartin Matuska * prob; 170*81ad8388SMartin Matuska prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS; 171*81ad8388SMartin Matuska *rc->probs[rc->pos] = prob; 172*81ad8388SMartin Matuska break; 173*81ad8388SMartin Matuska } 174*81ad8388SMartin Matuska 175*81ad8388SMartin Matuska case RC_BIT_1: { 176*81ad8388SMartin Matuska probability prob = *rc->probs[rc->pos]; 177*81ad8388SMartin Matuska const uint32_t bound = prob * (rc->range 178*81ad8388SMartin Matuska >> RC_BIT_MODEL_TOTAL_BITS); 179*81ad8388SMartin Matuska rc->low += bound; 180*81ad8388SMartin Matuska rc->range -= bound; 181*81ad8388SMartin Matuska prob -= prob >> RC_MOVE_BITS; 182*81ad8388SMartin Matuska *rc->probs[rc->pos] = prob; 183*81ad8388SMartin Matuska break; 184*81ad8388SMartin Matuska } 185*81ad8388SMartin Matuska 186*81ad8388SMartin Matuska case RC_DIRECT_0: 187*81ad8388SMartin Matuska rc->range >>= 1; 188*81ad8388SMartin Matuska break; 189*81ad8388SMartin Matuska 190*81ad8388SMartin Matuska case RC_DIRECT_1: 191*81ad8388SMartin Matuska rc->range >>= 1; 192*81ad8388SMartin Matuska rc->low += rc->range; 193*81ad8388SMartin Matuska break; 194*81ad8388SMartin Matuska 195*81ad8388SMartin Matuska case RC_FLUSH: 196*81ad8388SMartin Matuska // Prevent further normalizations. 197*81ad8388SMartin Matuska rc->range = UINT32_MAX; 198*81ad8388SMartin Matuska 199*81ad8388SMartin Matuska // Flush the last five bytes (see rc_flush()). 200*81ad8388SMartin Matuska do { 201*81ad8388SMartin Matuska if (rc_shift_low(rc, out, out_pos, out_size)) 202*81ad8388SMartin Matuska return true; 203*81ad8388SMartin Matuska } while (++rc->pos < rc->count); 204*81ad8388SMartin Matuska 205*81ad8388SMartin Matuska // Reset the range encoder so we are ready to continue 206*81ad8388SMartin Matuska // encoding if we weren't finishing the stream. 207*81ad8388SMartin Matuska rc_reset(rc); 208*81ad8388SMartin Matuska return false; 209*81ad8388SMartin Matuska 210*81ad8388SMartin Matuska default: 211*81ad8388SMartin Matuska assert(0); 212*81ad8388SMartin Matuska break; 213*81ad8388SMartin Matuska } 214*81ad8388SMartin Matuska 215*81ad8388SMartin Matuska ++rc->pos; 216*81ad8388SMartin Matuska } 217*81ad8388SMartin Matuska 218*81ad8388SMartin Matuska rc->count = 0; 219*81ad8388SMartin Matuska rc->pos = 0; 220*81ad8388SMartin Matuska 221*81ad8388SMartin Matuska return false; 222*81ad8388SMartin Matuska } 223*81ad8388SMartin Matuska 224*81ad8388SMartin Matuska 225*81ad8388SMartin Matuska static inline uint64_t 226*81ad8388SMartin Matuska rc_pending(const lzma_range_encoder *rc) 227*81ad8388SMartin Matuska { 228*81ad8388SMartin Matuska return rc->cache_size + 5 - 1; 229*81ad8388SMartin Matuska } 230*81ad8388SMartin Matuska 231*81ad8388SMartin Matuska #endif 232