xref: /freebsd/contrib/xz/src/liblzma/rangecoder/range_encoder.h (revision 81ad83880dcc267b198c781929dd9a009f98c5f7)
1*81ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
2*81ad8388SMartin Matuska //
3*81ad8388SMartin Matuska /// \file       range_encoder.h
4*81ad8388SMartin Matuska /// \brief      Range Encoder
5*81ad8388SMartin Matuska ///
6*81ad8388SMartin Matuska //  Authors:    Igor Pavlov
7*81ad8388SMartin Matuska //              Lasse Collin
8*81ad8388SMartin Matuska //
9*81ad8388SMartin Matuska //  This file has been put into the public domain.
10*81ad8388SMartin Matuska //  You can do whatever you want with this file.
11*81ad8388SMartin Matuska //
12*81ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
13*81ad8388SMartin Matuska 
14*81ad8388SMartin Matuska #ifndef LZMA_RANGE_ENCODER_H
15*81ad8388SMartin Matuska #define LZMA_RANGE_ENCODER_H
16*81ad8388SMartin Matuska 
17*81ad8388SMartin Matuska #include "range_common.h"
18*81ad8388SMartin Matuska #include "price.h"
19*81ad8388SMartin Matuska 
20*81ad8388SMartin Matuska 
21*81ad8388SMartin Matuska /// Maximum number of symbols that can be put pending into lzma_range_encoder
22*81ad8388SMartin Matuska /// structure between calls to lzma_rc_encode(). For LZMA, 52+5 is enough
23*81ad8388SMartin Matuska /// (match with big distance and length followed by range encoder flush).
24*81ad8388SMartin Matuska #define RC_SYMBOLS_MAX 58
25*81ad8388SMartin Matuska 
26*81ad8388SMartin Matuska 
27*81ad8388SMartin Matuska typedef struct {
28*81ad8388SMartin Matuska 	uint64_t low;
29*81ad8388SMartin Matuska 	uint64_t cache_size;
30*81ad8388SMartin Matuska 	uint32_t range;
31*81ad8388SMartin Matuska 	uint8_t cache;
32*81ad8388SMartin Matuska 
33*81ad8388SMartin Matuska 	/// Number of symbols in the tables
34*81ad8388SMartin Matuska 	size_t count;
35*81ad8388SMartin Matuska 
36*81ad8388SMartin Matuska 	/// rc_encode()'s position in the tables
37*81ad8388SMartin Matuska 	size_t pos;
38*81ad8388SMartin Matuska 
39*81ad8388SMartin Matuska 	/// Symbols to encode
40*81ad8388SMartin Matuska 	enum {
41*81ad8388SMartin Matuska 		RC_BIT_0,
42*81ad8388SMartin Matuska 		RC_BIT_1,
43*81ad8388SMartin Matuska 		RC_DIRECT_0,
44*81ad8388SMartin Matuska 		RC_DIRECT_1,
45*81ad8388SMartin Matuska 		RC_FLUSH,
46*81ad8388SMartin Matuska 	} symbols[RC_SYMBOLS_MAX];
47*81ad8388SMartin Matuska 
48*81ad8388SMartin Matuska 	/// Probabilities associated with RC_BIT_0 or RC_BIT_1
49*81ad8388SMartin Matuska 	probability *probs[RC_SYMBOLS_MAX];
50*81ad8388SMartin Matuska 
51*81ad8388SMartin Matuska } lzma_range_encoder;
52*81ad8388SMartin Matuska 
53*81ad8388SMartin Matuska 
54*81ad8388SMartin Matuska static inline void
55*81ad8388SMartin Matuska rc_reset(lzma_range_encoder *rc)
56*81ad8388SMartin Matuska {
57*81ad8388SMartin Matuska 	rc->low = 0;
58*81ad8388SMartin Matuska 	rc->cache_size = 1;
59*81ad8388SMartin Matuska 	rc->range = UINT32_MAX;
60*81ad8388SMartin Matuska 	rc->cache = 0;
61*81ad8388SMartin Matuska 	rc->count = 0;
62*81ad8388SMartin Matuska 	rc->pos = 0;
63*81ad8388SMartin Matuska }
64*81ad8388SMartin Matuska 
65*81ad8388SMartin Matuska 
66*81ad8388SMartin Matuska static inline void
67*81ad8388SMartin Matuska rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit)
68*81ad8388SMartin Matuska {
69*81ad8388SMartin Matuska 	rc->symbols[rc->count] = bit;
70*81ad8388SMartin Matuska 	rc->probs[rc->count] = prob;
71*81ad8388SMartin Matuska 	++rc->count;
72*81ad8388SMartin Matuska }
73*81ad8388SMartin Matuska 
74*81ad8388SMartin Matuska 
75*81ad8388SMartin Matuska static inline void
76*81ad8388SMartin Matuska rc_bittree(lzma_range_encoder *rc, probability *probs,
77*81ad8388SMartin Matuska 		uint32_t bit_count, uint32_t symbol)
78*81ad8388SMartin Matuska {
79*81ad8388SMartin Matuska 	uint32_t model_index = 1;
80*81ad8388SMartin Matuska 
81*81ad8388SMartin Matuska 	do {
82*81ad8388SMartin Matuska 		const uint32_t bit = (symbol >> --bit_count) & 1;
83*81ad8388SMartin Matuska 		rc_bit(rc, &probs[model_index], bit);
84*81ad8388SMartin Matuska 		model_index = (model_index << 1) + bit;
85*81ad8388SMartin Matuska 	} while (bit_count != 0);
86*81ad8388SMartin Matuska }
87*81ad8388SMartin Matuska 
88*81ad8388SMartin Matuska 
89*81ad8388SMartin Matuska static inline void
90*81ad8388SMartin Matuska rc_bittree_reverse(lzma_range_encoder *rc, probability *probs,
91*81ad8388SMartin Matuska 		uint32_t bit_count, uint32_t symbol)
92*81ad8388SMartin Matuska {
93*81ad8388SMartin Matuska 	uint32_t model_index = 1;
94*81ad8388SMartin Matuska 
95*81ad8388SMartin Matuska 	do {
96*81ad8388SMartin Matuska 		const uint32_t bit = symbol & 1;
97*81ad8388SMartin Matuska 		symbol >>= 1;
98*81ad8388SMartin Matuska 		rc_bit(rc, &probs[model_index], bit);
99*81ad8388SMartin Matuska 		model_index = (model_index << 1) + bit;
100*81ad8388SMartin Matuska 	} while (--bit_count != 0);
101*81ad8388SMartin Matuska }
102*81ad8388SMartin Matuska 
103*81ad8388SMartin Matuska 
104*81ad8388SMartin Matuska static inline void
105*81ad8388SMartin Matuska rc_direct(lzma_range_encoder *rc,
106*81ad8388SMartin Matuska 		uint32_t value, uint32_t bit_count)
107*81ad8388SMartin Matuska {
108*81ad8388SMartin Matuska 	do {
109*81ad8388SMartin Matuska 		rc->symbols[rc->count++]
110*81ad8388SMartin Matuska 				= RC_DIRECT_0 + ((value >> --bit_count) & 1);
111*81ad8388SMartin Matuska 	} while (bit_count != 0);
112*81ad8388SMartin Matuska }
113*81ad8388SMartin Matuska 
114*81ad8388SMartin Matuska 
115*81ad8388SMartin Matuska static inline void
116*81ad8388SMartin Matuska rc_flush(lzma_range_encoder *rc)
117*81ad8388SMartin Matuska {
118*81ad8388SMartin Matuska 	for (size_t i = 0; i < 5; ++i)
119*81ad8388SMartin Matuska 		rc->symbols[rc->count++] = RC_FLUSH;
120*81ad8388SMartin Matuska }
121*81ad8388SMartin Matuska 
122*81ad8388SMartin Matuska 
123*81ad8388SMartin Matuska static inline bool
124*81ad8388SMartin Matuska rc_shift_low(lzma_range_encoder *rc,
125*81ad8388SMartin Matuska 		uint8_t *out, size_t *out_pos, size_t out_size)
126*81ad8388SMartin Matuska {
127*81ad8388SMartin Matuska 	if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000)
128*81ad8388SMartin Matuska 			|| (uint32_t)(rc->low >> 32) != 0) {
129*81ad8388SMartin Matuska 		do {
130*81ad8388SMartin Matuska 			if (*out_pos == out_size)
131*81ad8388SMartin Matuska 				return true;
132*81ad8388SMartin Matuska 
133*81ad8388SMartin Matuska 			out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32);
134*81ad8388SMartin Matuska 			++*out_pos;
135*81ad8388SMartin Matuska 			rc->cache = 0xFF;
136*81ad8388SMartin Matuska 
137*81ad8388SMartin Matuska 		} while (--rc->cache_size != 0);
138*81ad8388SMartin Matuska 
139*81ad8388SMartin Matuska 		rc->cache = (rc->low >> 24) & 0xFF;
140*81ad8388SMartin Matuska 	}
141*81ad8388SMartin Matuska 
142*81ad8388SMartin Matuska 	++rc->cache_size;
143*81ad8388SMartin Matuska 	rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS;
144*81ad8388SMartin Matuska 
145*81ad8388SMartin Matuska 	return false;
146*81ad8388SMartin Matuska }
147*81ad8388SMartin Matuska 
148*81ad8388SMartin Matuska 
149*81ad8388SMartin Matuska static inline bool
150*81ad8388SMartin Matuska rc_encode(lzma_range_encoder *rc,
151*81ad8388SMartin Matuska 		uint8_t *out, size_t *out_pos, size_t out_size)
152*81ad8388SMartin Matuska {
153*81ad8388SMartin Matuska 	assert(rc->count <= RC_SYMBOLS_MAX);
154*81ad8388SMartin Matuska 
155*81ad8388SMartin Matuska 	while (rc->pos < rc->count) {
156*81ad8388SMartin Matuska 		// Normalize
157*81ad8388SMartin Matuska 		if (rc->range < RC_TOP_VALUE) {
158*81ad8388SMartin Matuska 			if (rc_shift_low(rc, out, out_pos, out_size))
159*81ad8388SMartin Matuska 				return true;
160*81ad8388SMartin Matuska 
161*81ad8388SMartin Matuska 			rc->range <<= RC_SHIFT_BITS;
162*81ad8388SMartin Matuska 		}
163*81ad8388SMartin Matuska 
164*81ad8388SMartin Matuska 		// Encode a bit
165*81ad8388SMartin Matuska 		switch (rc->symbols[rc->pos]) {
166*81ad8388SMartin Matuska 		case RC_BIT_0: {
167*81ad8388SMartin Matuska 			probability prob = *rc->probs[rc->pos];
168*81ad8388SMartin Matuska 			rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS)
169*81ad8388SMartin Matuska 					* prob;
170*81ad8388SMartin Matuska 			prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS;
171*81ad8388SMartin Matuska 			*rc->probs[rc->pos] = prob;
172*81ad8388SMartin Matuska 			break;
173*81ad8388SMartin Matuska 		}
174*81ad8388SMartin Matuska 
175*81ad8388SMartin Matuska 		case RC_BIT_1: {
176*81ad8388SMartin Matuska 			probability prob = *rc->probs[rc->pos];
177*81ad8388SMartin Matuska 			const uint32_t bound = prob * (rc->range
178*81ad8388SMartin Matuska 					>> RC_BIT_MODEL_TOTAL_BITS);
179*81ad8388SMartin Matuska 			rc->low += bound;
180*81ad8388SMartin Matuska 			rc->range -= bound;
181*81ad8388SMartin Matuska 			prob -= prob >> RC_MOVE_BITS;
182*81ad8388SMartin Matuska 			*rc->probs[rc->pos] = prob;
183*81ad8388SMartin Matuska 			break;
184*81ad8388SMartin Matuska 		}
185*81ad8388SMartin Matuska 
186*81ad8388SMartin Matuska 		case RC_DIRECT_0:
187*81ad8388SMartin Matuska 			rc->range >>= 1;
188*81ad8388SMartin Matuska 			break;
189*81ad8388SMartin Matuska 
190*81ad8388SMartin Matuska 		case RC_DIRECT_1:
191*81ad8388SMartin Matuska 			rc->range >>= 1;
192*81ad8388SMartin Matuska 			rc->low += rc->range;
193*81ad8388SMartin Matuska 			break;
194*81ad8388SMartin Matuska 
195*81ad8388SMartin Matuska 		case RC_FLUSH:
196*81ad8388SMartin Matuska 			// Prevent further normalizations.
197*81ad8388SMartin Matuska 			rc->range = UINT32_MAX;
198*81ad8388SMartin Matuska 
199*81ad8388SMartin Matuska 			// Flush the last five bytes (see rc_flush()).
200*81ad8388SMartin Matuska 			do {
201*81ad8388SMartin Matuska 				if (rc_shift_low(rc, out, out_pos, out_size))
202*81ad8388SMartin Matuska 					return true;
203*81ad8388SMartin Matuska 			} while (++rc->pos < rc->count);
204*81ad8388SMartin Matuska 
205*81ad8388SMartin Matuska 			// Reset the range encoder so we are ready to continue
206*81ad8388SMartin Matuska 			// encoding if we weren't finishing the stream.
207*81ad8388SMartin Matuska 			rc_reset(rc);
208*81ad8388SMartin Matuska 			return false;
209*81ad8388SMartin Matuska 
210*81ad8388SMartin Matuska 		default:
211*81ad8388SMartin Matuska 			assert(0);
212*81ad8388SMartin Matuska 			break;
213*81ad8388SMartin Matuska 		}
214*81ad8388SMartin Matuska 
215*81ad8388SMartin Matuska 		++rc->pos;
216*81ad8388SMartin Matuska 	}
217*81ad8388SMartin Matuska 
218*81ad8388SMartin Matuska 	rc->count = 0;
219*81ad8388SMartin Matuska 	rc->pos = 0;
220*81ad8388SMartin Matuska 
221*81ad8388SMartin Matuska 	return false;
222*81ad8388SMartin Matuska }
223*81ad8388SMartin Matuska 
224*81ad8388SMartin Matuska 
225*81ad8388SMartin Matuska static inline uint64_t
226*81ad8388SMartin Matuska rc_pending(const lzma_range_encoder *rc)
227*81ad8388SMartin Matuska {
228*81ad8388SMartin Matuska 	return rc->cache_size + 5 - 1;
229*81ad8388SMartin Matuska }
230*81ad8388SMartin Matuska 
231*81ad8388SMartin Matuska #endif
232