1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file range_encoder.h 6 /// \brief Range Encoder 7 /// 8 // Authors: Igor Pavlov 9 // Lasse Collin 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #ifndef LZMA_RANGE_ENCODER_H 14 #define LZMA_RANGE_ENCODER_H 15 16 #include "range_common.h" 17 #include "price.h" 18 19 20 /// Maximum number of symbols that can be put pending into lzma_range_encoder 21 /// structure between calls to lzma_rc_encode(). For LZMA, 48+5 is enough 22 /// (match with big distance and length followed by range encoder flush). 23 #define RC_SYMBOLS_MAX 53 24 25 26 typedef struct { 27 uint64_t low; 28 uint64_t cache_size; 29 uint32_t range; 30 uint8_t cache; 31 32 /// Number of bytes written out by rc_encode() -> rc_shift_low() 33 uint64_t out_total; 34 35 /// Number of symbols in the tables 36 size_t count; 37 38 /// rc_encode()'s position in the tables 39 size_t pos; 40 41 /// Symbols to encode 42 enum { 43 RC_BIT_0, 44 RC_BIT_1, 45 RC_DIRECT_0, 46 RC_DIRECT_1, 47 RC_FLUSH, 48 } symbols[RC_SYMBOLS_MAX]; 49 50 /// Probabilities associated with RC_BIT_0 or RC_BIT_1 51 probability *probs[RC_SYMBOLS_MAX]; 52 53 } lzma_range_encoder; 54 55 56 static inline void 57 rc_reset(lzma_range_encoder *rc) 58 { 59 rc->low = 0; 60 rc->cache_size = 1; 61 rc->range = UINT32_MAX; 62 rc->cache = 0; 63 rc->out_total = 0; 64 rc->count = 0; 65 rc->pos = 0; 66 } 67 68 69 static inline void 70 rc_forget(lzma_range_encoder *rc) 71 { 72 // This must not be called when rc_encode() is partially done. 73 assert(rc->pos == 0); 74 rc->count = 0; 75 } 76 77 78 static inline void 79 rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit) 80 { 81 rc->symbols[rc->count] = bit; 82 rc->probs[rc->count] = prob; 83 ++rc->count; 84 } 85 86 87 static inline void 88 rc_bittree(lzma_range_encoder *rc, probability *probs, 89 uint32_t bit_count, uint32_t symbol) 90 { 91 uint32_t model_index = 1; 92 93 do { 94 const uint32_t bit = (symbol >> --bit_count) & 1; 95 rc_bit(rc, &probs[model_index], bit); 96 model_index = (model_index << 1) + bit; 97 } while (bit_count != 0); 98 } 99 100 101 static inline void 102 rc_bittree_reverse(lzma_range_encoder *rc, probability *probs, 103 uint32_t bit_count, uint32_t symbol) 104 { 105 uint32_t model_index = 1; 106 107 do { 108 const uint32_t bit = symbol & 1; 109 symbol >>= 1; 110 rc_bit(rc, &probs[model_index], bit); 111 model_index = (model_index << 1) + bit; 112 } while (--bit_count != 0); 113 } 114 115 116 static inline void 117 rc_direct(lzma_range_encoder *rc, 118 uint32_t value, uint32_t bit_count) 119 { 120 do { 121 rc->symbols[rc->count++] 122 = RC_DIRECT_0 + ((value >> --bit_count) & 1); 123 } while (bit_count != 0); 124 } 125 126 127 static inline void 128 rc_flush(lzma_range_encoder *rc) 129 { 130 for (size_t i = 0; i < 5; ++i) 131 rc->symbols[rc->count++] = RC_FLUSH; 132 } 133 134 135 static inline bool 136 rc_shift_low(lzma_range_encoder *rc, 137 uint8_t *out, size_t *out_pos, size_t out_size) 138 { 139 if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000) 140 || (uint32_t)(rc->low >> 32) != 0) { 141 do { 142 if (*out_pos == out_size) 143 return true; 144 145 out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32); 146 ++*out_pos; 147 ++rc->out_total; 148 rc->cache = 0xFF; 149 150 } while (--rc->cache_size != 0); 151 152 rc->cache = (rc->low >> 24) & 0xFF; 153 } 154 155 ++rc->cache_size; 156 rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS; 157 158 return false; 159 } 160 161 162 // NOTE: The last two arguments are uint64_t instead of size_t because in 163 // the dummy version these refer to the size of the whole range-encoded 164 // output stream, not just to the currently available output buffer space. 165 static inline bool 166 rc_shift_low_dummy(uint64_t *low, uint64_t *cache_size, uint8_t *cache, 167 uint64_t *out_pos, uint64_t out_size) 168 { 169 if ((uint32_t)(*low) < (uint32_t)(0xFF000000) 170 || (uint32_t)(*low >> 32) != 0) { 171 do { 172 if (*out_pos == out_size) 173 return true; 174 175 ++*out_pos; 176 *cache = 0xFF; 177 178 } while (--*cache_size != 0); 179 180 *cache = (*low >> 24) & 0xFF; 181 } 182 183 ++*cache_size; 184 *low = (*low & 0x00FFFFFF) << RC_SHIFT_BITS; 185 186 return false; 187 } 188 189 190 static inline bool 191 rc_encode(lzma_range_encoder *rc, 192 uint8_t *out, size_t *out_pos, size_t out_size) 193 { 194 assert(rc->count <= RC_SYMBOLS_MAX); 195 196 while (rc->pos < rc->count) { 197 // Normalize 198 if (rc->range < RC_TOP_VALUE) { 199 if (rc_shift_low(rc, out, out_pos, out_size)) 200 return true; 201 202 rc->range <<= RC_SHIFT_BITS; 203 } 204 205 // Encode a bit 206 switch (rc->symbols[rc->pos]) { 207 case RC_BIT_0: { 208 probability prob = *rc->probs[rc->pos]; 209 rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) 210 * prob; 211 prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS; 212 *rc->probs[rc->pos] = prob; 213 break; 214 } 215 216 case RC_BIT_1: { 217 probability prob = *rc->probs[rc->pos]; 218 const uint32_t bound = prob * (rc->range 219 >> RC_BIT_MODEL_TOTAL_BITS); 220 rc->low += bound; 221 rc->range -= bound; 222 prob -= prob >> RC_MOVE_BITS; 223 *rc->probs[rc->pos] = prob; 224 break; 225 } 226 227 case RC_DIRECT_0: 228 rc->range >>= 1; 229 break; 230 231 case RC_DIRECT_1: 232 rc->range >>= 1; 233 rc->low += rc->range; 234 break; 235 236 case RC_FLUSH: 237 // Prevent further normalizations. 238 rc->range = UINT32_MAX; 239 240 // Flush the last five bytes (see rc_flush()). 241 do { 242 if (rc_shift_low(rc, out, out_pos, out_size)) 243 return true; 244 } while (++rc->pos < rc->count); 245 246 // Reset the range encoder so we are ready to continue 247 // encoding if we weren't finishing the stream. 248 rc_reset(rc); 249 return false; 250 251 default: 252 assert(0); 253 break; 254 } 255 256 ++rc->pos; 257 } 258 259 rc->count = 0; 260 rc->pos = 0; 261 262 return false; 263 } 264 265 266 static inline bool 267 rc_encode_dummy(const lzma_range_encoder *rc, uint64_t out_limit) 268 { 269 assert(rc->count <= RC_SYMBOLS_MAX); 270 271 uint64_t low = rc->low; 272 uint64_t cache_size = rc->cache_size; 273 uint32_t range = rc->range; 274 uint8_t cache = rc->cache; 275 uint64_t out_pos = rc->out_total; 276 277 size_t pos = rc->pos; 278 279 while (true) { 280 // Normalize 281 if (range < RC_TOP_VALUE) { 282 if (rc_shift_low_dummy(&low, &cache_size, &cache, 283 &out_pos, out_limit)) 284 return true; 285 286 range <<= RC_SHIFT_BITS; 287 } 288 289 // This check is here because the normalization above must 290 // be done before flushing the last bytes. 291 if (pos == rc->count) 292 break; 293 294 // Encode a bit 295 switch (rc->symbols[pos]) { 296 case RC_BIT_0: { 297 probability prob = *rc->probs[pos]; 298 range = (range >> RC_BIT_MODEL_TOTAL_BITS) 299 * prob; 300 break; 301 } 302 303 case RC_BIT_1: { 304 probability prob = *rc->probs[pos]; 305 const uint32_t bound = prob * (range 306 >> RC_BIT_MODEL_TOTAL_BITS); 307 low += bound; 308 range -= bound; 309 break; 310 } 311 312 case RC_DIRECT_0: 313 range >>= 1; 314 break; 315 316 case RC_DIRECT_1: 317 range >>= 1; 318 low += range; 319 break; 320 321 case RC_FLUSH: 322 default: 323 assert(0); 324 break; 325 } 326 327 ++pos; 328 } 329 330 // Flush the last bytes. This isn't in rc->symbols[] so we do 331 // it after the above loop to take into account the size of 332 // the flushing that will be done at the end of the stream. 333 for (pos = 0; pos < 5; ++pos) { 334 if (rc_shift_low_dummy(&low, &cache_size, 335 &cache, &out_pos, out_limit)) 336 return true; 337 } 338 339 return false; 340 } 341 342 343 static inline uint64_t 344 rc_pending(const lzma_range_encoder *rc) 345 { 346 return rc->cache_size + 5 - 1; 347 } 348 349 #endif 350