1*c0d9a071SXin LI /*
2*c0d9a071SXin LI * Copyright (c) Meta Platforms, Inc. and affiliates.
3*c0d9a071SXin LI * All rights reserved.
4*c0d9a071SXin LI *
5*c0d9a071SXin LI * This source code is licensed under both the BSD-style license (found in the
6*c0d9a071SXin LI * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*c0d9a071SXin LI * in the COPYING file in the root directory of this source tree).
8*c0d9a071SXin LI * You may select, at your option, one of the above-listed licenses.
9*c0d9a071SXin LI */
10*c0d9a071SXin LI
11*c0d9a071SXin LI #ifndef ZSTD_BITS_H
12*c0d9a071SXin LI #define ZSTD_BITS_H
13*c0d9a071SXin LI
14*c0d9a071SXin LI #include "mem.h"
15*c0d9a071SXin LI
ZSTD_countTrailingZeros32_fallback(U32 val)16*c0d9a071SXin LI MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
17*c0d9a071SXin LI {
18*c0d9a071SXin LI assert(val != 0);
19*c0d9a071SXin LI {
20*c0d9a071SXin LI static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
21*c0d9a071SXin LI 30, 22, 20, 15, 25, 17, 4, 8,
22*c0d9a071SXin LI 31, 27, 13, 23, 21, 19, 16, 7,
23*c0d9a071SXin LI 26, 12, 18, 6, 11, 5, 10, 9};
24*c0d9a071SXin LI return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
25*c0d9a071SXin LI }
26*c0d9a071SXin LI }
27*c0d9a071SXin LI
ZSTD_countTrailingZeros32(U32 val)28*c0d9a071SXin LI MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
29*c0d9a071SXin LI {
30*c0d9a071SXin LI assert(val != 0);
31*c0d9a071SXin LI #if defined(_MSC_VER)
32*c0d9a071SXin LI # if STATIC_BMI2
33*c0d9a071SXin LI return (unsigned)_tzcnt_u32(val);
34*c0d9a071SXin LI # else
35*c0d9a071SXin LI if (val != 0) {
36*c0d9a071SXin LI unsigned long r;
37*c0d9a071SXin LI _BitScanForward(&r, val);
38*c0d9a071SXin LI return (unsigned)r;
39*c0d9a071SXin LI } else {
40*c0d9a071SXin LI __assume(0); /* Should not reach this code path */
41*c0d9a071SXin LI }
42*c0d9a071SXin LI # endif
43*c0d9a071SXin LI #elif defined(__GNUC__) && (__GNUC__ >= 4)
44*c0d9a071SXin LI return (unsigned)__builtin_ctz(val);
45*c0d9a071SXin LI #elif defined(__ICCARM__)
46*c0d9a071SXin LI return (unsigned)__builtin_ctz(val);
47*c0d9a071SXin LI #else
48*c0d9a071SXin LI return ZSTD_countTrailingZeros32_fallback(val);
49*c0d9a071SXin LI #endif
50*c0d9a071SXin LI }
51*c0d9a071SXin LI
ZSTD_countLeadingZeros32_fallback(U32 val)52*c0d9a071SXin LI MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val)
53*c0d9a071SXin LI {
54*c0d9a071SXin LI assert(val != 0);
55*c0d9a071SXin LI {
56*c0d9a071SXin LI static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
57*c0d9a071SXin LI 11, 14, 16, 18, 22, 25, 3, 30,
58*c0d9a071SXin LI 8, 12, 20, 28, 15, 17, 24, 7,
59*c0d9a071SXin LI 19, 27, 23, 6, 26, 5, 4, 31};
60*c0d9a071SXin LI val |= val >> 1;
61*c0d9a071SXin LI val |= val >> 2;
62*c0d9a071SXin LI val |= val >> 4;
63*c0d9a071SXin LI val |= val >> 8;
64*c0d9a071SXin LI val |= val >> 16;
65*c0d9a071SXin LI return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
66*c0d9a071SXin LI }
67*c0d9a071SXin LI }
68*c0d9a071SXin LI
ZSTD_countLeadingZeros32(U32 val)69*c0d9a071SXin LI MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
70*c0d9a071SXin LI {
71*c0d9a071SXin LI assert(val != 0);
72*c0d9a071SXin LI #if defined(_MSC_VER)
73*c0d9a071SXin LI # if STATIC_BMI2
74*c0d9a071SXin LI return (unsigned)_lzcnt_u32(val);
75*c0d9a071SXin LI # else
76*c0d9a071SXin LI if (val != 0) {
77*c0d9a071SXin LI unsigned long r;
78*c0d9a071SXin LI _BitScanReverse(&r, val);
79*c0d9a071SXin LI return (unsigned)(31 - r);
80*c0d9a071SXin LI } else {
81*c0d9a071SXin LI __assume(0); /* Should not reach this code path */
82*c0d9a071SXin LI }
83*c0d9a071SXin LI # endif
84*c0d9a071SXin LI #elif defined(__GNUC__) && (__GNUC__ >= 4)
85*c0d9a071SXin LI return (unsigned)__builtin_clz(val);
86*c0d9a071SXin LI #elif defined(__ICCARM__)
87*c0d9a071SXin LI return (unsigned)__builtin_clz(val);
88*c0d9a071SXin LI #else
89*c0d9a071SXin LI return ZSTD_countLeadingZeros32_fallback(val);
90*c0d9a071SXin LI #endif
91*c0d9a071SXin LI }
92*c0d9a071SXin LI
ZSTD_countTrailingZeros64(U64 val)93*c0d9a071SXin LI MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
94*c0d9a071SXin LI {
95*c0d9a071SXin LI assert(val != 0);
96*c0d9a071SXin LI #if defined(_MSC_VER) && defined(_WIN64)
97*c0d9a071SXin LI # if STATIC_BMI2
98*c0d9a071SXin LI return (unsigned)_tzcnt_u64(val);
99*c0d9a071SXin LI # else
100*c0d9a071SXin LI if (val != 0) {
101*c0d9a071SXin LI unsigned long r;
102*c0d9a071SXin LI _BitScanForward64(&r, val);
103*c0d9a071SXin LI return (unsigned)r;
104*c0d9a071SXin LI } else {
105*c0d9a071SXin LI __assume(0); /* Should not reach this code path */
106*c0d9a071SXin LI }
107*c0d9a071SXin LI # endif
108*c0d9a071SXin LI #elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
109*c0d9a071SXin LI return (unsigned)__builtin_ctzll(val);
110*c0d9a071SXin LI #elif defined(__ICCARM__)
111*c0d9a071SXin LI return (unsigned)__builtin_ctzll(val);
112*c0d9a071SXin LI #else
113*c0d9a071SXin LI {
114*c0d9a071SXin LI U32 mostSignificantWord = (U32)(val >> 32);
115*c0d9a071SXin LI U32 leastSignificantWord = (U32)val;
116*c0d9a071SXin LI if (leastSignificantWord == 0) {
117*c0d9a071SXin LI return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
118*c0d9a071SXin LI } else {
119*c0d9a071SXin LI return ZSTD_countTrailingZeros32(leastSignificantWord);
120*c0d9a071SXin LI }
121*c0d9a071SXin LI }
122*c0d9a071SXin LI #endif
123*c0d9a071SXin LI }
124*c0d9a071SXin LI
ZSTD_countLeadingZeros64(U64 val)125*c0d9a071SXin LI MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
126*c0d9a071SXin LI {
127*c0d9a071SXin LI assert(val != 0);
128*c0d9a071SXin LI #if defined(_MSC_VER) && defined(_WIN64)
129*c0d9a071SXin LI # if STATIC_BMI2
130*c0d9a071SXin LI return (unsigned)_lzcnt_u64(val);
131*c0d9a071SXin LI # else
132*c0d9a071SXin LI if (val != 0) {
133*c0d9a071SXin LI unsigned long r;
134*c0d9a071SXin LI _BitScanReverse64(&r, val);
135*c0d9a071SXin LI return (unsigned)(63 - r);
136*c0d9a071SXin LI } else {
137*c0d9a071SXin LI __assume(0); /* Should not reach this code path */
138*c0d9a071SXin LI }
139*c0d9a071SXin LI # endif
140*c0d9a071SXin LI #elif defined(__GNUC__) && (__GNUC__ >= 4)
141*c0d9a071SXin LI return (unsigned)(__builtin_clzll(val));
142*c0d9a071SXin LI #elif defined(__ICCARM__)
143*c0d9a071SXin LI return (unsigned)(__builtin_clzll(val));
144*c0d9a071SXin LI #else
145*c0d9a071SXin LI {
146*c0d9a071SXin LI U32 mostSignificantWord = (U32)(val >> 32);
147*c0d9a071SXin LI U32 leastSignificantWord = (U32)val;
148*c0d9a071SXin LI if (mostSignificantWord == 0) {
149*c0d9a071SXin LI return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
150*c0d9a071SXin LI } else {
151*c0d9a071SXin LI return ZSTD_countLeadingZeros32(mostSignificantWord);
152*c0d9a071SXin LI }
153*c0d9a071SXin LI }
154*c0d9a071SXin LI #endif
155*c0d9a071SXin LI }
156*c0d9a071SXin LI
ZSTD_NbCommonBytes(size_t val)157*c0d9a071SXin LI MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
158*c0d9a071SXin LI {
159*c0d9a071SXin LI if (MEM_isLittleEndian()) {
160*c0d9a071SXin LI if (MEM_64bits()) {
161*c0d9a071SXin LI return ZSTD_countTrailingZeros64((U64)val) >> 3;
162*c0d9a071SXin LI } else {
163*c0d9a071SXin LI return ZSTD_countTrailingZeros32((U32)val) >> 3;
164*c0d9a071SXin LI }
165*c0d9a071SXin LI } else { /* Big Endian CPU */
166*c0d9a071SXin LI if (MEM_64bits()) {
167*c0d9a071SXin LI return ZSTD_countLeadingZeros64((U64)val) >> 3;
168*c0d9a071SXin LI } else {
169*c0d9a071SXin LI return ZSTD_countLeadingZeros32((U32)val) >> 3;
170*c0d9a071SXin LI }
171*c0d9a071SXin LI }
172*c0d9a071SXin LI }
173*c0d9a071SXin LI
ZSTD_highbit32(U32 val)174*c0d9a071SXin LI MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
175*c0d9a071SXin LI {
176*c0d9a071SXin LI assert(val != 0);
177*c0d9a071SXin LI return 31 - ZSTD_countLeadingZeros32(val);
178*c0d9a071SXin LI }
179*c0d9a071SXin LI
180*c0d9a071SXin LI /* ZSTD_rotateRight_*():
181*c0d9a071SXin LI * Rotates a bitfield to the right by "count" bits.
182*c0d9a071SXin LI * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
183*c0d9a071SXin LI */
184*c0d9a071SXin LI MEM_STATIC
ZSTD_rotateRight_U64(U64 const value,U32 count)185*c0d9a071SXin LI U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
186*c0d9a071SXin LI assert(count < 64);
187*c0d9a071SXin LI count &= 0x3F; /* for fickle pattern recognition */
188*c0d9a071SXin LI return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
189*c0d9a071SXin LI }
190*c0d9a071SXin LI
191*c0d9a071SXin LI MEM_STATIC
ZSTD_rotateRight_U32(U32 const value,U32 count)192*c0d9a071SXin LI U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
193*c0d9a071SXin LI assert(count < 32);
194*c0d9a071SXin LI count &= 0x1F; /* for fickle pattern recognition */
195*c0d9a071SXin LI return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
196*c0d9a071SXin LI }
197*c0d9a071SXin LI
198*c0d9a071SXin LI MEM_STATIC
ZSTD_rotateRight_U16(U16 const value,U32 count)199*c0d9a071SXin LI U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
200*c0d9a071SXin LI assert(count < 16);
201*c0d9a071SXin LI count &= 0x0F; /* for fickle pattern recognition */
202*c0d9a071SXin LI return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
203*c0d9a071SXin LI }
204*c0d9a071SXin LI
205*c0d9a071SXin LI #endif /* ZSTD_BITS_H */
206