1 /*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10 #ifndef __IMMINTRIN_H
11 #error "Never use <bmi2intrin.h> directly; include <immintrin.h> instead."
12 #endif
13
14 #ifndef __BMI2INTRIN_H
15 #define __BMI2INTRIN_H
16
17 /* Define the default attributes for the functions in this file. */
18 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
19
20 /// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits
21 /// starting at bit number \a __Y.
22 ///
23 /// \code{.operation}
24 /// i := __Y[7:0]
25 /// result := __X
26 /// IF i < 32
27 /// result[31:i] := 0
28 /// FI
29 /// \endcode
30 ///
31 /// \headerfile <immintrin.h>
32 ///
33 /// This intrinsic corresponds to the \c BZHI instruction.
34 ///
35 /// \param __X
36 /// The 32-bit source value to copy.
37 /// \param __Y
38 /// The lower 8 bits specify the bit number of the lowest bit to zero.
39 /// \returns The partially zeroed 32-bit value.
40 static __inline__ unsigned int __DEFAULT_FN_ATTRS
_bzhi_u32(unsigned int __X,unsigned int __Y)41 _bzhi_u32(unsigned int __X, unsigned int __Y)
42 {
43 return __builtin_ia32_bzhi_si(__X, __Y);
44 }
45
46 /// Deposit (scatter) low-order bits from the unsigned 32-bit integer \a __X
47 /// into the 32-bit result, according to the mask in the unsigned 32-bit
48 /// integer \a __Y. All other bits of the result are zero.
49 ///
50 /// \code{.operation}
51 /// i := 0
52 /// result := 0
53 /// FOR m := 0 TO 31
54 /// IF __Y[m] == 1
55 /// result[m] := __X[i]
56 /// i := i + 1
57 /// ENDIF
58 /// ENDFOR
59 /// \endcode
60 ///
61 /// \headerfile <immintrin.h>
62 ///
63 /// This intrinsic corresponds to the \c PDEP instruction.
64 ///
65 /// \param __X
66 /// The 32-bit source value to copy.
67 /// \param __Y
68 /// The 32-bit mask specifying where to deposit source bits.
69 /// \returns The 32-bit result.
70 static __inline__ unsigned int __DEFAULT_FN_ATTRS
_pdep_u32(unsigned int __X,unsigned int __Y)71 _pdep_u32(unsigned int __X, unsigned int __Y)
72 {
73 return __builtin_ia32_pdep_si(__X, __Y);
74 }
75
76 /// Extract (gather) bits from the unsigned 32-bit integer \a __X into the
77 /// low-order bits of the 32-bit result, according to the mask in the
78 /// unsigned 32-bit integer \a __Y. All other bits of the result are zero.
79 ///
80 /// \code{.operation}
81 /// i := 0
82 /// result := 0
83 /// FOR m := 0 TO 31
84 /// IF __Y[m] == 1
85 /// result[i] := __X[m]
86 /// i := i + 1
87 /// ENDIF
88 /// ENDFOR
89 /// \endcode
90 ///
91 /// \headerfile <immintrin.h>
92 ///
93 /// This intrinsic corresponds to the \c PEXT instruction.
94 ///
95 /// \param __X
96 /// The 32-bit source value to copy.
97 /// \param __Y
98 /// The 32-bit mask specifying which source bits to extract.
99 /// \returns The 32-bit result.
100 static __inline__ unsigned int __DEFAULT_FN_ATTRS
_pext_u32(unsigned int __X,unsigned int __Y)101 _pext_u32(unsigned int __X, unsigned int __Y)
102 {
103 return __builtin_ia32_pext_si(__X, __Y);
104 }
105
106 /// Multiplies the unsigned 32-bit integers \a __X and \a __Y to form a
107 /// 64-bit product. Stores the upper 32 bits of the product in the
108 /// memory at \a __P and returns the lower 32 bits.
109 ///
110 /// \code{.operation}
111 /// Store32(__P, (__X * __Y)[63:32])
112 /// result := (__X * __Y)[31:0]
113 /// \endcode
114 ///
115 /// \headerfile <immintrin.h>
116 ///
117 /// This intrinsic corresponds to the \c MULX instruction.
118 ///
119 /// \param __X
120 /// An unsigned 32-bit multiplicand.
121 /// \param __Y
122 /// An unsigned 32-bit multiplicand.
123 /// \param __P
124 /// A pointer to memory for storing the upper half of the product.
125 /// \returns The lower half of the product.
126 static __inline__ unsigned int __DEFAULT_FN_ATTRS
_mulx_u32(unsigned int __X,unsigned int __Y,unsigned int * __P)127 _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
128 {
129 unsigned long long __res = (unsigned long long) __X * __Y;
130 *__P = (unsigned int)(__res >> 32);
131 return (unsigned int)__res;
132 }
133
134 #ifdef __x86_64__
135
136 /// Copies the unsigned 64-bit integer \a __X and zeroes the upper bits
137 /// starting at bit number \a __Y.
138 ///
139 /// \code{.operation}
140 /// i := __Y[7:0]
141 /// result := __X
142 /// IF i < 64
143 /// result[63:i] := 0
144 /// FI
145 /// \endcode
146 ///
147 /// \headerfile <immintrin.h>
148 ///
149 /// This intrinsic corresponds to the \c BZHI instruction.
150 ///
151 /// \param __X
152 /// The 64-bit source value to copy.
153 /// \param __Y
154 /// The lower 8 bits specify the bit number of the lowest bit to zero.
155 /// \returns The partially zeroed 64-bit value.
156 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_bzhi_u64(unsigned long long __X,unsigned long long __Y)157 _bzhi_u64(unsigned long long __X, unsigned long long __Y)
158 {
159 return __builtin_ia32_bzhi_di(__X, __Y);
160 }
161
162 /// Deposit (scatter) low-order bits from the unsigned 64-bit integer \a __X
163 /// into the 64-bit result, according to the mask in the unsigned 64-bit
164 /// integer \a __Y. All other bits of the result are zero.
165 ///
166 /// \code{.operation}
167 /// i := 0
168 /// result := 0
169 /// FOR m := 0 TO 63
170 /// IF __Y[m] == 1
171 /// result[m] := __X[i]
172 /// i := i + 1
173 /// ENDIF
174 /// ENDFOR
175 /// \endcode
176 ///
177 /// \headerfile <immintrin.h>
178 ///
179 /// This intrinsic corresponds to the \c PDEP instruction.
180 ///
181 /// \param __X
182 /// The 64-bit source value to copy.
183 /// \param __Y
184 /// The 64-bit mask specifying where to deposit source bits.
185 /// \returns The 64-bit result.
186 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_pdep_u64(unsigned long long __X,unsigned long long __Y)187 _pdep_u64(unsigned long long __X, unsigned long long __Y)
188 {
189 return __builtin_ia32_pdep_di(__X, __Y);
190 }
191
192 /// Extract (gather) bits from the unsigned 64-bit integer \a __X into the
193 /// low-order bits of the 64-bit result, according to the mask in the
194 /// unsigned 64-bit integer \a __Y. All other bits of the result are zero.
195 ///
196 /// \code{.operation}
197 /// i := 0
198 /// result := 0
199 /// FOR m := 0 TO 63
200 /// IF __Y[m] == 1
201 /// result[i] := __X[m]
202 /// i := i + 1
203 /// ENDIF
204 /// ENDFOR
205 /// \endcode
206 ///
207 /// \headerfile <immintrin.h>
208 ///
209 /// This intrinsic corresponds to the \c PEXT instruction.
210 ///
211 /// \param __X
212 /// The 64-bit source value to copy.
213 /// \param __Y
214 /// The 64-bit mask specifying which source bits to extract.
215 /// \returns The 64-bit result.
216 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_pext_u64(unsigned long long __X,unsigned long long __Y)217 _pext_u64(unsigned long long __X, unsigned long long __Y)
218 {
219 return __builtin_ia32_pext_di(__X, __Y);
220 }
221
222 /// Multiplies the unsigned 64-bit integers \a __X and \a __Y to form a
223 /// 128-bit product. Stores the upper 64 bits of the product to the
224 /// memory addressed by \a __P and returns the lower 64 bits.
225 ///
226 /// \code{.operation}
227 /// Store64(__P, (__X * __Y)[127:64])
228 /// result := (__X * __Y)[63:0]
229 /// \endcode
230 ///
231 /// \headerfile <immintrin.h>
232 ///
233 /// This intrinsic corresponds to the \c MULX instruction.
234 ///
235 /// \param __X
236 /// An unsigned 64-bit multiplicand.
237 /// \param __Y
238 /// An unsigned 64-bit multiplicand.
239 /// \param __P
240 /// A pointer to memory for storing the upper half of the product.
241 /// \returns The lower half of the product.
242 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_mulx_u64(unsigned long long __X,unsigned long long __Y,unsigned long long * __P)243 _mulx_u64 (unsigned long long __X, unsigned long long __Y,
244 unsigned long long *__P)
245 {
246 unsigned __int128 __res = (unsigned __int128) __X * __Y;
247 *__P = (unsigned long long) (__res >> 64);
248 return (unsigned long long) __res;
249 }
250
251 #endif /* __x86_64__ */
252
253 #undef __DEFAULT_FN_ATTRS
254
255 #endif /* __BMI2INTRIN_H */
256