xref: /freebsd/contrib/xz/src/liblzma/lzma/fastpos.h (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       fastpos.h
6 /// \brief      Kind of two-bit version of bit scan reverse
7 ///
8 //  Authors:    Igor Pavlov
9 //              Lasse Collin
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #ifndef LZMA_FASTPOS_H
14 #define LZMA_FASTPOS_H
15 
16 // LZMA encodes match distances by storing the highest two bits using
17 // a six-bit value [0, 63], and then the missing lower bits.
18 // Dictionary size is also stored using this encoding in the .xz
19 // file format header.
20 //
21 // fastpos.h provides a way to quickly find out the correct six-bit
22 // values. The following table gives some examples of this encoding:
23 //
24 //     dist   return
25 //       0       0
26 //       1       1
27 //       2       2
28 //       3       3
29 //       4       4
30 //       5       4
31 //       6       5
32 //       7       5
33 //       8       6
34 //      11       6
35 //      12       7
36 //     ...      ...
37 //      15       7
38 //      16       8
39 //      17       8
40 //     ...      ...
41 //      23       8
42 //      24       9
43 //      25       9
44 //     ...      ...
45 //
46 //
47 // Provided functions or macros
48 // ----------------------------
49 //
50 // get_dist_slot(dist) is the basic version. get_dist_slot_2(dist)
51 // assumes that dist >= FULL_DISTANCES, thus the result is at least
52 // FULL_DISTANCES_BITS * 2. Using get_dist_slot(dist) instead of
53 // get_dist_slot_2(dist) would give the same result, but get_dist_slot_2(dist)
54 // should be tiny bit faster due to the assumption being made.
55 //
56 //
57 // Size vs. speed
58 // --------------
59 //
60 // With some CPUs that have fast BSR (bit scan reverse) instruction, the
61 // size optimized version is slightly faster than the bigger table based
62 // approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III
63 // and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that
64 // would still have speed roughly comparable to the table version. Older
65 // x86 CPUs like the original Pentium have very slow BSR; on those systems
66 // the table version is a lot faster.
67 //
68 // On some CPUs, the table version is a lot faster when using position
69 // dependent code, but with position independent code the size optimized
70 // version is slightly faster. This occurs at least on 32-bit SPARC (no
71 // ASM optimizations).
72 //
73 // I'm making the table version the default, because that has good speed
74 // on all systems I have tried. The size optimized version is sometimes
75 // slightly faster, but sometimes it is a lot slower.
76 
77 #ifdef HAVE_SMALL
78 #	define get_dist_slot(dist) \
79 		((dist) <= 4 ? (dist) : get_dist_slot_2(dist))
80 
81 static inline uint32_t
82 get_dist_slot_2(uint32_t dist)
83 {
84 	const uint32_t i = bsr32(dist);
85 	return (i + i) + ((dist >> (i - 1)) & 1);
86 }
87 
88 
89 #else
90 
91 #define FASTPOS_BITS 13
92 
93 lzma_attr_visibility_hidden
94 extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS];
95 
96 
97 #define fastpos_shift(extra, n) \
98 	((extra) + (n) * (FASTPOS_BITS - 1))
99 
100 #define fastpos_limit(extra, n) \
101 	(UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n)))
102 
103 #define fastpos_result(dist, extra, n) \
104 	(uint32_t)(lzma_fastpos[(dist) >> fastpos_shift(extra, n)]) \
105 			+ 2 * fastpos_shift(extra, n)
106 
107 
108 static inline uint32_t
109 get_dist_slot(uint32_t dist)
110 {
111 	// If it is small enough, we can pick the result directly from
112 	// the precalculated table.
113 	if (dist < fastpos_limit(0, 0))
114 		return lzma_fastpos[dist];
115 
116 	if (dist < fastpos_limit(0, 1))
117 		return fastpos_result(dist, 0, 1);
118 
119 	return fastpos_result(dist, 0, 2);
120 }
121 
122 
123 #ifdef FULL_DISTANCES_BITS
124 static inline uint32_t
125 get_dist_slot_2(uint32_t dist)
126 {
127 	assert(dist >= FULL_DISTANCES);
128 
129 	if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
130 		return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0);
131 
132 	if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
133 		return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1);
134 
135 	return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2);
136 }
137 #endif
138 
139 #endif
140 
141 #endif
142