xref: /freebsd/contrib/llvm-project/clang/lib/Headers/immintrin.h (revision b0d29bc47dba79f6f38e67eabadfb4b32ffd9390)
1 /*===---- immintrin.h - Intel intrinsics -----------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __IMMINTRIN_H
11 #define __IMMINTRIN_H
12 
13 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
14 #include <mmintrin.h>
15 #endif
16 
17 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
18 #include <xmmintrin.h>
19 #endif
20 
21 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
22 #include <emmintrin.h>
23 #endif
24 
25 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
26 #include <pmmintrin.h>
27 #endif
28 
29 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
30 #include <tmmintrin.h>
31 #endif
32 
33 #if !defined(_MSC_VER) || __has_feature(modules) || \
34     (defined(__SSE4_2__) || defined(__SSE4_1__))
35 #include <smmintrin.h>
36 #endif
37 
38 #if !defined(_MSC_VER) || __has_feature(modules) || \
39     (defined(__AES__) || defined(__PCLMUL__))
40 #include <wmmintrin.h>
41 #endif
42 
43 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
44 #include <clflushoptintrin.h>
45 #endif
46 
47 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
48 #include <clwbintrin.h>
49 #endif
50 
51 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
52 #include <avxintrin.h>
53 #endif
54 
55 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
56 #include <avx2intrin.h>
57 #endif
58 
59 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
60 #include <f16cintrin.h>
61 #endif
62 
63 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
64 #include <vpclmulqdqintrin.h>
65 #endif
66 
67 /* No feature check desired due to internal checks */
68 #include <bmiintrin.h>
69 
70 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
71 #include <bmi2intrin.h>
72 #endif
73 
74 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
75 #include <lzcntintrin.h>
76 #endif
77 
78 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
79 #include <popcntintrin.h>
80 #endif
81 
82 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
83 #include <fmaintrin.h>
84 #endif
85 
86 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
87 #include <avx512fintrin.h>
88 #endif
89 
90 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
91 #include <avx512vlintrin.h>
92 #endif
93 
94 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
95 #include <avx512bwintrin.h>
96 #endif
97 
98 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
99 #include <avx512bitalgintrin.h>
100 #endif
101 
102 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
103 #include <avx512cdintrin.h>
104 #endif
105 
106 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
107 #include <avx512vpopcntdqintrin.h>
108 #endif
109 
110 #if !defined(_MSC_VER) || __has_feature(modules) || \
111     (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
112 #include <avx512vpopcntdqvlintrin.h>
113 #endif
114 
115 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
116 #include <avx512vnniintrin.h>
117 #endif
118 
119 #if !defined(_MSC_VER) || __has_feature(modules) || \
120     (defined(__AVX512VL__) && defined(__AVX512VNNI__))
121 #include <avx512vlvnniintrin.h>
122 #endif
123 
124 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
125 #include <avx512dqintrin.h>
126 #endif
127 
128 #if !defined(_MSC_VER) || __has_feature(modules) || \
129     (defined(__AVX512VL__) && defined(__AVX512BITALG__))
130 #include <avx512vlbitalgintrin.h>
131 #endif
132 
133 #if !defined(_MSC_VER) || __has_feature(modules) || \
134     (defined(__AVX512VL__) && defined(__AVX512BW__))
135 #include <avx512vlbwintrin.h>
136 #endif
137 
138 #if !defined(_MSC_VER) || __has_feature(modules) || \
139     (defined(__AVX512VL__) && defined(__AVX512CD__))
140 #include <avx512vlcdintrin.h>
141 #endif
142 
143 #if !defined(_MSC_VER) || __has_feature(modules) || \
144     (defined(__AVX512VL__) && defined(__AVX512DQ__))
145 #include <avx512vldqintrin.h>
146 #endif
147 
148 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
149 #include <avx512erintrin.h>
150 #endif
151 
152 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
153 #include <avx512ifmaintrin.h>
154 #endif
155 
156 #if !defined(_MSC_VER) || __has_feature(modules) || \
157     (defined(__AVX512IFMA__) && defined(__AVX512VL__))
158 #include <avx512ifmavlintrin.h>
159 #endif
160 
161 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
162 #include <avx512vbmiintrin.h>
163 #endif
164 
165 #if !defined(_MSC_VER) || __has_feature(modules) || \
166     (defined(__AVX512VBMI__) && defined(__AVX512VL__))
167 #include <avx512vbmivlintrin.h>
168 #endif
169 
170 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
171 #include <avx512vbmi2intrin.h>
172 #endif
173 
174 #if !defined(_MSC_VER) || __has_feature(modules) || \
175     (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
176 #include <avx512vlvbmi2intrin.h>
177 #endif
178 
179 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
180 #include <avx512pfintrin.h>
181 #endif
182 
183 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__)
184 #include <avx512bf16intrin.h>
185 #endif
186 
187 #if !defined(_MSC_VER) || __has_feature(modules) || \
188     (defined(__AVX512VL__) && defined(__AVX512BF16__))
189 #include <avx512vlbf16intrin.h>
190 #endif
191 
192 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
193 #include <pkuintrin.h>
194 #endif
195 
196 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
197 #include <vaesintrin.h>
198 #endif
199 
200 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
201 #include <gfniintrin.h>
202 #endif
203 
204 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)
205 /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
206 ///
207 /// \headerfile <immintrin.h>
208 ///
209 /// This intrinsic corresponds to the <c> RDPID </c> instruction.
210 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
211 _rdpid_u32(void) {
212   return __builtin_ia32_rdpid();
213 }
214 #endif // __RDPID__
215 
216 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
217 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
218 _rdrand16_step(unsigned short *__p)
219 {
220   return __builtin_ia32_rdrand16_step(__p);
221 }
222 
223 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
224 _rdrand32_step(unsigned int *__p)
225 {
226   return __builtin_ia32_rdrand32_step(__p);
227 }
228 
229 #ifdef __x86_64__
230 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
231 _rdrand64_step(unsigned long long *__p)
232 {
233   return __builtin_ia32_rdrand64_step(__p);
234 }
235 #endif
236 #endif /* __RDRND__ */
237 
238 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
239 #ifdef __x86_64__
240 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
241 _readfsbase_u32(void)
242 {
243   return __builtin_ia32_rdfsbase32();
244 }
245 
246 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
247 _readfsbase_u64(void)
248 {
249   return __builtin_ia32_rdfsbase64();
250 }
251 
252 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
253 _readgsbase_u32(void)
254 {
255   return __builtin_ia32_rdgsbase32();
256 }
257 
258 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
259 _readgsbase_u64(void)
260 {
261   return __builtin_ia32_rdgsbase64();
262 }
263 
264 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
265 _writefsbase_u32(unsigned int __V)
266 {
267   __builtin_ia32_wrfsbase32(__V);
268 }
269 
270 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
271 _writefsbase_u64(unsigned long long __V)
272 {
273   __builtin_ia32_wrfsbase64(__V);
274 }
275 
276 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
277 _writegsbase_u32(unsigned int __V)
278 {
279   __builtin_ia32_wrgsbase32(__V);
280 }
281 
282 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
283 _writegsbase_u64(unsigned long long __V)
284 {
285   __builtin_ia32_wrgsbase64(__V);
286 }
287 
288 #endif
289 #endif /* __FSGSBASE__ */
290 
291 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)
292 
293 /* The structs used below are to force the load/store to be unaligned. This
294  * is accomplished with the __packed__ attribute. The __may_alias__ prevents
295  * tbaa metadata from being generated based on the struct and the type of the
296  * field inside of it.
297  */
298 
299 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
300 _loadbe_i16(void const * __P) {
301   struct __loadu_i16 {
302     short __v;
303   } __attribute__((__packed__, __may_alias__));
304   return __builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
305 }
306 
307 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
308 _storebe_i16(void * __P, short __D) {
309   struct __storeu_i16 {
310     short __v;
311   } __attribute__((__packed__, __may_alias__));
312   ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
313 }
314 
315 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
316 _loadbe_i32(void const * __P) {
317   struct __loadu_i32 {
318     int __v;
319   } __attribute__((__packed__, __may_alias__));
320   return __builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
321 }
322 
323 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
324 _storebe_i32(void * __P, int __D) {
325   struct __storeu_i32 {
326     int __v;
327   } __attribute__((__packed__, __may_alias__));
328   ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
329 }
330 
331 #ifdef __x86_64__
332 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
333 _loadbe_i64(void const * __P) {
334   struct __loadu_i64 {
335     long long __v;
336   } __attribute__((__packed__, __may_alias__));
337   return __builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
338 }
339 
340 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
341 _storebe_i64(void * __P, long long __D) {
342   struct __storeu_i64 {
343     long long __v;
344   } __attribute__((__packed__, __may_alias__));
345   ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
346 }
347 #endif
348 #endif /* __MOVBE */
349 
350 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
351 #include <rtmintrin.h>
352 #include <xtestintrin.h>
353 #endif
354 
355 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
356 #include <shaintrin.h>
357 #endif
358 
359 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
360 #include <fxsrintrin.h>
361 #endif
362 
363 /* No feature check desired due to internal MSC_VER checks */
364 #include <xsaveintrin.h>
365 
366 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
367 #include <xsaveoptintrin.h>
368 #endif
369 
370 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
371 #include <xsavecintrin.h>
372 #endif
373 
374 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
375 #include <xsavesintrin.h>
376 #endif
377 
378 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
379 #include <cetintrin.h>
380 #endif
381 
382 /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
383  * whereas others are also available at all times. */
384 #include <adxintrin.h>
385 
386 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
387 #include <rdseedintrin.h>
388 #endif
389 
390 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)
391 #include <wbnoinvdintrin.h>
392 #endif
393 
394 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)
395 #include <cldemoteintrin.h>
396 #endif
397 
398 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)
399 #include <waitpkgintrin.h>
400 #endif
401 
402 #if !defined(_MSC_VER) || __has_feature(modules) || \
403   defined(__MOVDIRI__) || defined(__MOVDIR64B__)
404 #include <movdirintrin.h>
405 #endif
406 
407 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)
408 #include <pconfigintrin.h>
409 #endif
410 
411 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)
412 #include <sgxintrin.h>
413 #endif
414 
415 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)
416 #include <ptwriteintrin.h>
417 #endif
418 
419 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)
420 #include <invpcidintrin.h>
421 #endif
422 
423 #if !defined(_MSC_VER) || __has_feature(modules) || \
424   defined(__AVX512VP2INTERSECT__)
425 #include <avx512vp2intersectintrin.h>
426 #endif
427 
428 #if !defined(_MSC_VER) || __has_feature(modules) || \
429   (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
430 #include <avx512vlvp2intersectintrin.h>
431 #endif
432 
433 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__ENQCMD__)
434 #include <enqcmdintrin.h>
435 #endif
436 
437 #if defined(_MSC_VER) && __has_extension(gnu_asm)
438 /* Define the default attributes for these intrinsics */
439 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
440 #ifdef __cplusplus
441 extern "C" {
442 #endif
443 /*----------------------------------------------------------------------------*\
444 |* Interlocked Exchange HLE
445 \*----------------------------------------------------------------------------*/
446 #if defined(__i386__) || defined(__x86_64__)
447 static __inline__ long __DEFAULT_FN_ATTRS
448 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
449   __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
450                        : "+r" (_Value), "+m" (*_Target) :: "memory");
451   return _Value;
452 }
453 static __inline__ long __DEFAULT_FN_ATTRS
454 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
455   __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
456                        : "+r" (_Value), "+m" (*_Target) :: "memory");
457   return _Value;
458 }
459 #endif
460 #if defined(__x86_64__)
461 static __inline__ __int64 __DEFAULT_FN_ATTRS
462 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
463   __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
464                        : "+r" (_Value), "+m" (*_Target) :: "memory");
465   return _Value;
466 }
467 static __inline__ __int64 __DEFAULT_FN_ATTRS
468 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
469   __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
470                        : "+r" (_Value), "+m" (*_Target) :: "memory");
471   return _Value;
472 }
473 #endif
474 /*----------------------------------------------------------------------------*\
475 |* Interlocked Compare Exchange HLE
476 \*----------------------------------------------------------------------------*/
477 #if defined(__i386__) || defined(__x86_64__)
478 static __inline__ long __DEFAULT_FN_ATTRS
479 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
480                               long _Exchange, long _Comparand) {
481   __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
482                        : "+a" (_Comparand), "+m" (*_Destination)
483                        : "r" (_Exchange) : "memory");
484   return _Comparand;
485 }
486 static __inline__ long __DEFAULT_FN_ATTRS
487 _InterlockedCompareExchange_HLERelease(long volatile *_Destination,
488                               long _Exchange, long _Comparand) {
489   __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
490                        : "+a" (_Comparand), "+m" (*_Destination)
491                        : "r" (_Exchange) : "memory");
492   return _Comparand;
493 }
494 #endif
495 #if defined(__x86_64__)
496 static __inline__ __int64 __DEFAULT_FN_ATTRS
497 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
498                               __int64 _Exchange, __int64 _Comparand) {
499   __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
500                        : "+a" (_Comparand), "+m" (*_Destination)
501                        : "r" (_Exchange) : "memory");
502   return _Comparand;
503 }
504 static __inline__ __int64 __DEFAULT_FN_ATTRS
505 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
506                               __int64 _Exchange, __int64 _Comparand) {
507   __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
508                        : "+a" (_Comparand), "+m" (*_Destination)
509                        : "r" (_Exchange) : "memory");
510   return _Comparand;
511 }
512 #endif
513 #ifdef __cplusplus
514 }
515 #endif
516 
517 #undef __DEFAULT_FN_ATTRS
518 
519 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
520 
521 #endif /* __IMMINTRIN_H */
522