1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file range_encoder.h 4 /// \brief Range Encoder 5 /// 6 // Authors: Igor Pavlov 7 // Lasse Collin 8 // 9 // This file has been put into the public domain. 10 // You can do whatever you want with this file. 11 // 12 /////////////////////////////////////////////////////////////////////////////// 13 14 #ifndef LZMA_RANGE_ENCODER_H 15 #define LZMA_RANGE_ENCODER_H 16 17 #include "range_common.h" 18 #include "price.h" 19 20 21 /// Maximum number of symbols that can be put pending into lzma_range_encoder 22 /// structure between calls to lzma_rc_encode(). For LZMA, 48+5 is enough 23 /// (match with big distance and length followed by range encoder flush). 24 #define RC_SYMBOLS_MAX 53 25 26 27 typedef struct { 28 uint64_t low; 29 uint64_t cache_size; 30 uint32_t range; 31 uint8_t cache; 32 33 /// Number of bytes written out by rc_encode() -> rc_shift_low() 34 uint64_t out_total; 35 36 /// Number of symbols in the tables 37 size_t count; 38 39 /// rc_encode()'s position in the tables 40 size_t pos; 41 42 /// Symbols to encode 43 enum { 44 RC_BIT_0, 45 RC_BIT_1, 46 RC_DIRECT_0, 47 RC_DIRECT_1, 48 RC_FLUSH, 49 } symbols[RC_SYMBOLS_MAX]; 50 51 /// Probabilities associated with RC_BIT_0 or RC_BIT_1 52 probability *probs[RC_SYMBOLS_MAX]; 53 54 } lzma_range_encoder; 55 56 57 static inline void 58 rc_reset(lzma_range_encoder *rc) 59 { 60 rc->low = 0; 61 rc->cache_size = 1; 62 rc->range = UINT32_MAX; 63 rc->cache = 0; 64 rc->out_total = 0; 65 rc->count = 0; 66 rc->pos = 0; 67 } 68 69 70 static inline void 71 rc_forget(lzma_range_encoder *rc) 72 { 73 // This must not be called when rc_encode() is partially done. 74 assert(rc->pos == 0); 75 rc->count = 0; 76 } 77 78 79 static inline void 80 rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit) 81 { 82 rc->symbols[rc->count] = bit; 83 rc->probs[rc->count] = prob; 84 ++rc->count; 85 } 86 87 88 static inline void 89 rc_bittree(lzma_range_encoder *rc, probability *probs, 90 uint32_t bit_count, uint32_t symbol) 91 { 92 uint32_t model_index = 1; 93 94 do { 95 const uint32_t bit = (symbol >> --bit_count) & 1; 96 rc_bit(rc, &probs[model_index], bit); 97 model_index = (model_index << 1) + bit; 98 } while (bit_count != 0); 99 } 100 101 102 static inline void 103 rc_bittree_reverse(lzma_range_encoder *rc, probability *probs, 104 uint32_t bit_count, uint32_t symbol) 105 { 106 uint32_t model_index = 1; 107 108 do { 109 const uint32_t bit = symbol & 1; 110 symbol >>= 1; 111 rc_bit(rc, &probs[model_index], bit); 112 model_index = (model_index << 1) + bit; 113 } while (--bit_count != 0); 114 } 115 116 117 static inline void 118 rc_direct(lzma_range_encoder *rc, 119 uint32_t value, uint32_t bit_count) 120 { 121 do { 122 rc->symbols[rc->count++] 123 = RC_DIRECT_0 + ((value >> --bit_count) & 1); 124 } while (bit_count != 0); 125 } 126 127 128 static inline void 129 rc_flush(lzma_range_encoder *rc) 130 { 131 for (size_t i = 0; i < 5; ++i) 132 rc->symbols[rc->count++] = RC_FLUSH; 133 } 134 135 136 static inline bool 137 rc_shift_low(lzma_range_encoder *rc, 138 uint8_t *out, size_t *out_pos, size_t out_size) 139 { 140 if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000) 141 || (uint32_t)(rc->low >> 32) != 0) { 142 do { 143 if (*out_pos == out_size) 144 return true; 145 146 out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32); 147 ++*out_pos; 148 ++rc->out_total; 149 rc->cache = 0xFF; 150 151 } while (--rc->cache_size != 0); 152 153 rc->cache = (rc->low >> 24) & 0xFF; 154 } 155 156 ++rc->cache_size; 157 rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS; 158 159 return false; 160 } 161 162 163 // NOTE: The last two arguments are uint64_t instead of size_t because in 164 // the dummy version these refer to the size of the whole range-encoded 165 // output stream, not just to the currently available output buffer space. 166 static inline bool 167 rc_shift_low_dummy(uint64_t *low, uint64_t *cache_size, uint8_t *cache, 168 uint64_t *out_pos, uint64_t out_size) 169 { 170 if ((uint32_t)(*low) < (uint32_t)(0xFF000000) 171 || (uint32_t)(*low >> 32) != 0) { 172 do { 173 if (*out_pos == out_size) 174 return true; 175 176 ++*out_pos; 177 *cache = 0xFF; 178 179 } while (--*cache_size != 0); 180 181 *cache = (*low >> 24) & 0xFF; 182 } 183 184 ++*cache_size; 185 *low = (*low & 0x00FFFFFF) << RC_SHIFT_BITS; 186 187 return false; 188 } 189 190 191 static inline bool 192 rc_encode(lzma_range_encoder *rc, 193 uint8_t *out, size_t *out_pos, size_t out_size) 194 { 195 assert(rc->count <= RC_SYMBOLS_MAX); 196 197 while (rc->pos < rc->count) { 198 // Normalize 199 if (rc->range < RC_TOP_VALUE) { 200 if (rc_shift_low(rc, out, out_pos, out_size)) 201 return true; 202 203 rc->range <<= RC_SHIFT_BITS; 204 } 205 206 // Encode a bit 207 switch (rc->symbols[rc->pos]) { 208 case RC_BIT_0: { 209 probability prob = *rc->probs[rc->pos]; 210 rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) 211 * prob; 212 prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS; 213 *rc->probs[rc->pos] = prob; 214 break; 215 } 216 217 case RC_BIT_1: { 218 probability prob = *rc->probs[rc->pos]; 219 const uint32_t bound = prob * (rc->range 220 >> RC_BIT_MODEL_TOTAL_BITS); 221 rc->low += bound; 222 rc->range -= bound; 223 prob -= prob >> RC_MOVE_BITS; 224 *rc->probs[rc->pos] = prob; 225 break; 226 } 227 228 case RC_DIRECT_0: 229 rc->range >>= 1; 230 break; 231 232 case RC_DIRECT_1: 233 rc->range >>= 1; 234 rc->low += rc->range; 235 break; 236 237 case RC_FLUSH: 238 // Prevent further normalizations. 239 rc->range = UINT32_MAX; 240 241 // Flush the last five bytes (see rc_flush()). 242 do { 243 if (rc_shift_low(rc, out, out_pos, out_size)) 244 return true; 245 } while (++rc->pos < rc->count); 246 247 // Reset the range encoder so we are ready to continue 248 // encoding if we weren't finishing the stream. 249 rc_reset(rc); 250 return false; 251 252 default: 253 assert(0); 254 break; 255 } 256 257 ++rc->pos; 258 } 259 260 rc->count = 0; 261 rc->pos = 0; 262 263 return false; 264 } 265 266 267 static inline bool 268 rc_encode_dummy(const lzma_range_encoder *rc, uint64_t out_limit) 269 { 270 assert(rc->count <= RC_SYMBOLS_MAX); 271 272 uint64_t low = rc->low; 273 uint64_t cache_size = rc->cache_size; 274 uint32_t range = rc->range; 275 uint8_t cache = rc->cache; 276 uint64_t out_pos = rc->out_total; 277 278 size_t pos = rc->pos; 279 280 while (true) { 281 // Normalize 282 if (range < RC_TOP_VALUE) { 283 if (rc_shift_low_dummy(&low, &cache_size, &cache, 284 &out_pos, out_limit)) 285 return true; 286 287 range <<= RC_SHIFT_BITS; 288 } 289 290 // This check is here because the normalization above must 291 // be done before flushing the last bytes. 292 if (pos == rc->count) 293 break; 294 295 // Encode a bit 296 switch (rc->symbols[pos]) { 297 case RC_BIT_0: { 298 probability prob = *rc->probs[pos]; 299 range = (range >> RC_BIT_MODEL_TOTAL_BITS) 300 * prob; 301 break; 302 } 303 304 case RC_BIT_1: { 305 probability prob = *rc->probs[pos]; 306 const uint32_t bound = prob * (range 307 >> RC_BIT_MODEL_TOTAL_BITS); 308 low += bound; 309 range -= bound; 310 break; 311 } 312 313 case RC_DIRECT_0: 314 range >>= 1; 315 break; 316 317 case RC_DIRECT_1: 318 range >>= 1; 319 low += range; 320 break; 321 322 case RC_FLUSH: 323 default: 324 assert(0); 325 break; 326 } 327 328 ++pos; 329 } 330 331 // Flush the last bytes. This isn't in rc->symbols[] so we do 332 // it after the above loop to take into account the size of 333 // the flushing that will be done at the end of the stream. 334 for (pos = 0; pos < 5; ++pos) { 335 if (rc_shift_low_dummy(&low, &cache_size, 336 &cache, &out_pos, out_limit)) 337 return true; 338 } 339 340 return false; 341 } 342 343 344 static inline uint64_t 345 rc_pending(const lzma_range_encoder *rc) 346 { 347 return rc->cache_size + 5 - 1; 348 } 349 350 #endif 351