xref: /freebsd/contrib/llvm-project/clang/lib/Headers/ia32intrin.h (revision c66ec88fed842fbaad62c30d510644ceb7bd2d71)
1 /* ===-------- ia32intrin.h ---------------------------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __X86INTRIN_H
11 #error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
12 #endif
13 
14 #ifndef __IA32INTRIN_H
15 #define __IA32INTRIN_H
16 
17 /** Find the first set bit starting from the lsb. Result is undefined if
18  *  input is 0.
19  *
20  *  \headerfile <x86intrin.h>
21  *
22  *  This intrinsic corresponds to the <c> BSF </c> instruction or the
23  *  <c> TZCNT </c> instruction.
24  *
25  *  \param __A
26  *     A 32-bit integer operand.
27  *  \returns A 32-bit integer containing the bit number.
28  */
29 static __inline__ int __attribute__((__always_inline__, __nodebug__))
30 __bsfd(int __A) {
31   return __builtin_ctz(__A);
32 }
33 
34 /** Find the first set bit starting from the msb. Result is undefined if
35  *  input is 0.
36  *
37  *  \headerfile <x86intrin.h>
38  *
39  *  This intrinsic corresponds to the <c> BSR </c> instruction or the
40  *  <c> LZCNT </c> instruction and an <c> XOR </c>.
41  *
42  *  \param __A
43  *     A 32-bit integer operand.
44  *  \returns A 32-bit integer containing the bit number.
45  */
46 static __inline__ int __attribute__((__always_inline__, __nodebug__))
47 __bsrd(int __A) {
48   return 31 - __builtin_clz(__A);
49 }
50 
51 /** Swaps the bytes in the input. Converting little endian to big endian or
52  *  vice versa.
53  *
54  *  \headerfile <x86intrin.h>
55  *
56  *  This intrinsic corresponds to the <c> BSWAP </c> instruction.
57  *
58  *  \param __A
59  *     A 32-bit integer operand.
60  *  \returns A 32-bit integer containing the swapped bytes.
61  */
62 static __inline__ int __attribute__((__always_inline__, __nodebug__))
63 __bswapd(int __A) {
64   return __builtin_bswap32(__A);
65 }
66 
67 static __inline__ int __attribute__((__always_inline__, __nodebug__))
68 _bswap(int __A) {
69   return __builtin_bswap32(__A);
70 }
71 
72 #define _bit_scan_forward(A) __bsfd((A))
73 #define _bit_scan_reverse(A) __bsrd((A))
74 
75 #ifdef __x86_64__
76 /** Find the first set bit starting from the lsb. Result is undefined if
77  *  input is 0.
78  *
79  *  \headerfile <x86intrin.h>
80  *
81  *  This intrinsic corresponds to the <c> BSF </c> instruction or the
82  *  <c> TZCNT </c> instruction.
83  *
84  *  \param __A
85  *     A 64-bit integer operand.
86  *  \returns A 32-bit integer containing the bit number.
87  */
88 static __inline__ int __attribute__((__always_inline__, __nodebug__))
89 __bsfq(long long __A) {
90   return __builtin_ctzll(__A);
91 }
92 
93 /** Find the first set bit starting from the msb. Result is undefined if
94  *  input is 0.
95  *
96  *  \headerfile <x86intrin.h>
97  *
98  *  This intrinsic corresponds to the <c> BSR </c> instruction or the
99  *  <c> LZCNT </c> instruction and an <c> XOR </c>.
100  *
101  *  \param __A
102  *     A 64-bit integer operand.
103  *  \returns A 32-bit integer containing the bit number.
104  */
105 static __inline__ int __attribute__((__always_inline__, __nodebug__))
106 __bsrq(long long __A) {
107   return 63 - __builtin_clzll(__A);
108 }
109 
110 /** Swaps the bytes in the input. Converting little endian to big endian or
111  *  vice versa.
112  *
113  *  \headerfile <x86intrin.h>
114  *
115  *  This intrinsic corresponds to the <c> BSWAP </c> instruction.
116  *
117  *  \param __A
118  *     A 64-bit integer operand.
119  *  \returns A 64-bit integer containing the swapped bytes.
120  */
121 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
122 __bswapq(long long __A) {
123   return __builtin_bswap64(__A);
124 }
125 
126 #define _bswap64(A) __bswapq((A))
127 #endif
128 
129 /** Counts the number of bits in the source operand having a value of 1.
130  *
131  *  \headerfile <x86intrin.h>
132  *
133  *  This intrinsic corresponds to the <c> POPCNT </c> instruction or a
134  *  a sequence of arithmetic and logic ops to calculate it.
135  *
136  *  \param __A
137  *     An unsigned 32-bit integer operand.
138  *  \returns A 32-bit integer containing the number of bits with value 1 in the
139  *     source operand.
140  */
141 static __inline__ int __attribute__((__always_inline__, __nodebug__))
142 __popcntd(unsigned int __A)
143 {
144   return __builtin_popcount(__A);
145 }
146 
147 #define _popcnt32(A) __popcntd((A))
148 
149 #ifdef __x86_64__
150 /** Counts the number of bits in the source operand having a value of 1.
151  *
152  *  \headerfile <x86intrin.h>
153  *
154  *  This intrinsic corresponds to the <c> POPCNT </c> instruction or a
155  *  a sequence of arithmetic and logic ops to calculate it.
156  *
157  *  \param __A
158  *     An unsigned 64-bit integer operand.
159  *  \returns A 64-bit integer containing the number of bits with value 1 in the
160  *     source operand.
161  */
162 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
163 __popcntq(unsigned long long __A)
164 {
165   return __builtin_popcountll(__A);
166 }
167 
168 #define _popcnt64(A) __popcntq((A))
169 #endif /* __x86_64__ */
170 
171 #ifdef __x86_64__
172 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
173 __readeflags(void)
174 {
175   return __builtin_ia32_readeflags_u64();
176 }
177 
178 static __inline__ void __attribute__((__always_inline__, __nodebug__))
179 __writeeflags(unsigned long long __f)
180 {
181   __builtin_ia32_writeeflags_u64(__f);
182 }
183 
184 #else /* !__x86_64__ */
185 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
186 __readeflags(void)
187 {
188   return __builtin_ia32_readeflags_u32();
189 }
190 
191 static __inline__ void __attribute__((__always_inline__, __nodebug__))
192 __writeeflags(unsigned int __f)
193 {
194   __builtin_ia32_writeeflags_u32(__f);
195 }
196 #endif /* !__x86_64__ */
197 
198 /** Cast a 32-bit float value to a 32-bit unsigned integer value
199  *
200  *  \headerfile <x86intrin.h>
201  *  This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction in x86_64,
202  *  and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
203  *
204  *  \param __A
205  *     A 32-bit float value.
206  *  \returns a 32-bit unsigned integer containing the converted value.
207  */
208 static __inline__ unsigned int __attribute__((__always_inline__))
209 _castf32_u32(float __A) {
210   unsigned int D;
211   __builtin_memcpy(&D, &__A, sizeof(__A));
212   return D;
213 }
214 
215 /** Cast a 64-bit float value to a 64-bit unsigned integer value
216  *
217  *  \headerfile <x86intrin.h>
218  *  This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
219  *  and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
220  *
221  *  \param __A
222  *     A 64-bit float value.
223  *  \returns a 64-bit unsigned integer containing the converted value.
224  */
225 static __inline__ unsigned long long __attribute__((__always_inline__))
226 _castf64_u64(double __A) {
227   unsigned long long D;
228   __builtin_memcpy(&D, &__A, sizeof(__A));
229   return D;
230 }
231 
232 /** Cast a 32-bit unsigned integer value to a 32-bit float value
233  *
234  *  \headerfile <x86intrin.h>
235  *  This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
236  *  and corresponds to the <c> FLDS </c> instruction in ia32.
237  *
238  *  \param __A
239  *     A 32-bit unsigned integer value.
240  *  \returns a 32-bit float value containing the converted value.
241  */
242 static __inline__ float __attribute__((__always_inline__))
243 _castu32_f32(unsigned int __A) {
244   float D;
245   __builtin_memcpy(&D, &__A, sizeof(__A));
246   return D;
247 }
248 
249 /** Cast a 64-bit unsigned integer value to a 64-bit float value
250  *
251  *  \headerfile <x86intrin.h>
252  *  This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
253  *  and corresponds to the <c> FLDL </c> instruction in ia32.
254  *
255  *  \param __A
256  *     A 64-bit unsigned integer value.
257  *  \returns a 64-bit float value containing the converted value.
258  */
259 static __inline__ double __attribute__((__always_inline__))
260 _castu64_f64(unsigned long long __A) {
261   double D;
262   __builtin_memcpy(&D, &__A, sizeof(__A));
263   return D;
264 }
265 
266 /** Adds the unsigned integer operand to the CRC-32C checksum of the
267  *     unsigned char operand.
268  *
269  *  \headerfile <x86intrin.h>
270  *
271  *  This intrinsic corresponds to the <c> CRC32B </c> instruction.
272  *
273  *  \param __C
274  *     An unsigned integer operand to add to the CRC-32C checksum of operand
275  *     \a  __D.
276  *  \param __D
277  *     An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
278  *  \returns The result of adding operand \a __C to the CRC-32C checksum of
279  *     operand \a __D.
280  */
281 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
282 __crc32b(unsigned int __C, unsigned char __D)
283 {
284   return __builtin_ia32_crc32qi(__C, __D);
285 }
286 
287 /** Adds the unsigned integer operand to the CRC-32C checksum of the
288  *     unsigned short operand.
289  *
290  *  \headerfile <x86intrin.h>
291  *
292  *  This intrinsic corresponds to the <c> CRC32W </c> instruction.
293  *
294  *  \param __C
295  *     An unsigned integer operand to add to the CRC-32C checksum of operand
296  *     \a  __D.
297  *  \param __D
298  *     An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
299  *  \returns The result of adding operand \a __C to the CRC-32C checksum of
300  *     operand \a __D.
301  */
302 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
303 __crc32w(unsigned int __C, unsigned short __D)
304 {
305   return __builtin_ia32_crc32hi(__C, __D);
306 }
307 
308 /** Adds the unsigned integer operand to the CRC-32C checksum of the
309  *     second unsigned integer operand.
310  *
311  *  \headerfile <x86intrin.h>
312  *
313  *  This intrinsic corresponds to the <c> CRC32D </c> instruction.
314  *
315  *  \param __C
316  *     An unsigned integer operand to add to the CRC-32C checksum of operand
317  *     \a  __D.
318  *  \param __D
319  *     An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
320  *  \returns The result of adding operand \a __C to the CRC-32C checksum of
321  *     operand \a __D.
322  */
323 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
324 __crc32d(unsigned int __C, unsigned int __D)
325 {
326   return __builtin_ia32_crc32si(__C, __D);
327 }
328 
329 #ifdef __x86_64__
330 /** Adds the unsigned integer operand to the CRC-32C checksum of the
331  *     unsigned 64-bit integer operand.
332  *
333  *  \headerfile <x86intrin.h>
334  *
335  *  This intrinsic corresponds to the <c> CRC32Q </c> instruction.
336  *
337  *  \param __C
338  *     An unsigned integer operand to add to the CRC-32C checksum of operand
339  *     \a  __D.
340  *  \param __D
341  *     An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
342  *  \returns The result of adding operand \a __C to the CRC-32C checksum of
343  *     operand \a __D.
344  */
345 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
346 __crc32q(unsigned long long __C, unsigned long long __D)
347 {
348   return __builtin_ia32_crc32di(__C, __D);
349 }
350 #endif /* __x86_64__ */
351 
352 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
353 __rdpmc(int __A) {
354   return __builtin_ia32_rdpmc(__A);
355 }
356 
357 /* __rdtscp */
358 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
359 __rdtscp(unsigned int *__A) {
360   return __builtin_ia32_rdtscp(__A);
361 }
362 
363 #define _rdtsc() __rdtsc()
364 
365 #define _rdpmc(A) __rdpmc(A)
366 
367 static __inline__ void __attribute__((__always_inline__, __nodebug__))
368 _wbinvd(void) {
369   __builtin_ia32_wbinvd();
370 }
371 
372 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
373 __rolb(unsigned char __X, int __C) {
374   return __builtin_rotateleft8(__X, __C);
375 }
376 
377 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
378 __rorb(unsigned char __X, int __C) {
379   return __builtin_rotateright8(__X, __C);
380 }
381 
382 static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
383 __rolw(unsigned short __X, int __C) {
384   return __builtin_rotateleft16(__X, __C);
385 }
386 
387 static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
388 __rorw(unsigned short __X, int __C) {
389   return __builtin_rotateright16(__X, __C);
390 }
391 
392 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
393 __rold(unsigned int __X, int __C) {
394   return __builtin_rotateleft32(__X, __C);
395 }
396 
397 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
398 __rord(unsigned int __X, int __C) {
399   return __builtin_rotateright32(__X, __C);
400 }
401 
402 #ifdef __x86_64__
403 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
404 __rolq(unsigned long long __X, int __C) {
405   return __builtin_rotateleft64(__X, __C);
406 }
407 
408 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
409 __rorq(unsigned long long __X, int __C) {
410   return __builtin_rotateright64(__X, __C);
411 }
412 #endif /* __x86_64__ */
413 
414 #ifndef _MSC_VER
415 /* These are already provided as builtins for MSVC. */
416 /* Select the correct function based on the size of long. */
417 #ifdef __LP64__
418 #define _lrotl(a,b) __rolq((a), (b))
419 #define _lrotr(a,b) __rorq((a), (b))
420 #else
421 #define _lrotl(a,b) __rold((a), (b))
422 #define _lrotr(a,b) __rord((a), (b))
423 #endif
424 #define _rotl(a,b) __rold((a), (b))
425 #define _rotr(a,b) __rord((a), (b))
426 #endif // _MSC_VER
427 
428 /* These are not builtins so need to be provided in all modes. */
429 #define _rotwl(a,b) __rolw((a), (b))
430 #define _rotwr(a,b) __rorw((a), (b))
431 
432 #endif /* __IA32INTRIN_H */
433