xref: /freebsd/contrib/llvm-project/clang/lib/Headers/immintrin.h (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 /*===---- immintrin.h - Intel intrinsics -----------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __IMMINTRIN_H
11 #define __IMMINTRIN_H
12 
13 #if !defined(__i386__) && !defined(__x86_64__)
14 #error "This header is only meant to be used on x86 and x64 architecture"
15 #endif
16 
17 #include <x86gprintrin.h>
18 
19 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
20     defined(__MMX__)
21 #include <mmintrin.h>
22 #endif
23 
24 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
25     defined(__SSE__)
26 #include <xmmintrin.h>
27 #endif
28 
29 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
30     defined(__SSE2__)
31 #include <emmintrin.h>
32 #endif
33 
34 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
35     defined(__SSE3__)
36 #include <pmmintrin.h>
37 #endif
38 
39 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
40     defined(__SSSE3__)
41 #include <tmmintrin.h>
42 #endif
43 
44 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
45     (defined(__SSE4_2__) || defined(__SSE4_1__))
46 #include <smmintrin.h>
47 #endif
48 
49 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
50     (defined(__AES__) || defined(__PCLMUL__))
51 #include <wmmintrin.h>
52 #endif
53 
54 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
55     defined(__CLFLUSHOPT__)
56 #include <clflushoptintrin.h>
57 #endif
58 
59 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
60     defined(__CLWB__)
61 #include <clwbintrin.h>
62 #endif
63 
64 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
65     defined(__AVX__)
66 #include <avxintrin.h>
67 #endif
68 
69 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
70     defined(__AVX2__)
71 #include <avx2intrin.h>
72 #endif
73 
74 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
75     defined(__F16C__)
76 #include <f16cintrin.h>
77 #endif
78 
79 /* No feature check desired due to internal checks */
80 #include <bmiintrin.h>
81 
82 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
83     defined(__BMI2__)
84 #include <bmi2intrin.h>
85 #endif
86 
87 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
88     defined(__LZCNT__)
89 #include <lzcntintrin.h>
90 #endif
91 
92 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
93     defined(__POPCNT__)
94 #include <popcntintrin.h>
95 #endif
96 
97 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
98     defined(__FMA__)
99 #include <fmaintrin.h>
100 #endif
101 
102 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
103     defined(__AVX512F__)
104 #include <avx512fintrin.h>
105 #endif
106 
107 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
108     defined(__AVX512VL__)
109 #include <avx512vlintrin.h>
110 #endif
111 
112 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
113     defined(__AVX512BW__)
114 #include <avx512bwintrin.h>
115 #endif
116 
117 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
118     defined(__AVX512BITALG__)
119 #include <avx512bitalgintrin.h>
120 #endif
121 
122 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
123     defined(__AVX512CD__)
124 #include <avx512cdintrin.h>
125 #endif
126 
127 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
128     defined(__AVX512VPOPCNTDQ__)
129 #include <avx512vpopcntdqintrin.h>
130 #endif
131 
132 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
133     (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
134 #include <avx512vpopcntdqvlintrin.h>
135 #endif
136 
137 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
138     defined(__AVX512VNNI__)
139 #include <avx512vnniintrin.h>
140 #endif
141 
142 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
143     (defined(__AVX512VL__) && defined(__AVX512VNNI__))
144 #include <avx512vlvnniintrin.h>
145 #endif
146 
147 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
148     defined(__AVXVNNI__)
149 #include <avxvnniintrin.h>
150 #endif
151 
152 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
153     defined(__AVX512DQ__)
154 #include <avx512dqintrin.h>
155 #endif
156 
157 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
158     (defined(__AVX512VL__) && defined(__AVX512BITALG__))
159 #include <avx512vlbitalgintrin.h>
160 #endif
161 
162 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
163     (defined(__AVX512VL__) && defined(__AVX512BW__))
164 #include <avx512vlbwintrin.h>
165 #endif
166 
167 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
168     (defined(__AVX512VL__) && defined(__AVX512CD__))
169 #include <avx512vlcdintrin.h>
170 #endif
171 
172 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
173     (defined(__AVX512VL__) && defined(__AVX512DQ__))
174 #include <avx512vldqintrin.h>
175 #endif
176 
177 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
178     defined(__AVX512ER__)
179 #include <avx512erintrin.h>
180 #endif
181 
182 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
183     defined(__AVX512IFMA__)
184 #include <avx512ifmaintrin.h>
185 #endif
186 
187 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
188     (defined(__AVX512IFMA__) && defined(__AVX512VL__))
189 #include <avx512ifmavlintrin.h>
190 #endif
191 
192 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
193     defined(__AVXIFMA__)
194 #include <avxifmaintrin.h>
195 #endif
196 
197 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
198     defined(__AVX512VBMI__)
199 #include <avx512vbmiintrin.h>
200 #endif
201 
202 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
203     (defined(__AVX512VBMI__) && defined(__AVX512VL__))
204 #include <avx512vbmivlintrin.h>
205 #endif
206 
207 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
208     defined(__AVX512VBMI2__)
209 #include <avx512vbmi2intrin.h>
210 #endif
211 
212 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
213     (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
214 #include <avx512vlvbmi2intrin.h>
215 #endif
216 
217 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
218     defined(__AVX512PF__)
219 #include <avx512pfintrin.h>
220 #endif
221 
222 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
223     defined(__AVX512FP16__)
224 #include <avx512fp16intrin.h>
225 #endif
226 
227 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
228     (defined(__AVX512VL__) && defined(__AVX512FP16__))
229 #include <avx512vlfp16intrin.h>
230 #endif
231 
232 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
233     defined(__AVX512BF16__)
234 #include <avx512bf16intrin.h>
235 #endif
236 
237 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
238     (defined(__AVX512VL__) && defined(__AVX512BF16__))
239 #include <avx512vlbf16intrin.h>
240 #endif
241 
242 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
243     defined(__PKU__)
244 #include <pkuintrin.h>
245 #endif
246 
247 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
248     defined(__VPCLMULQDQ__)
249 #include <vpclmulqdqintrin.h>
250 #endif
251 
252 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
253     defined(__VAES__)
254 #include <vaesintrin.h>
255 #endif
256 
257 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
258     defined(__GFNI__)
259 #include <gfniintrin.h>
260 #endif
261 
262 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
263     defined(__AVXVNNIINT8__)
264 #include <avxvnniint8intrin.h>
265 #endif
266 
267 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
268     defined(__AVXNECONVERT__)
269 #include <avxneconvertintrin.h>
270 #endif
271 
272 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
273     defined(__SHA512__)
274 #include <sha512intrin.h>
275 #endif
276 
277 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
278     defined(__SM3__)
279 #include <sm3intrin.h>
280 #endif
281 
282 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
283     defined(__SM4__)
284 #include <sm4intrin.h>
285 #endif
286 
287 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
288     defined(__AVXVNNIINT16__)
289 #include <avxvnniint16intrin.h>
290 #endif
291 
292 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
293     defined(__RDPID__)
294 /// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
295 ///
296 /// \headerfile <immintrin.h>
297 ///
298 /// This intrinsic corresponds to the <c> RDPID </c> instruction.
299 ///
300 /// \returns The 32-bit contents of the MSR.
301 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
302 _rdpid_u32(void) {
303   return __builtin_ia32_rdpid();
304 }
305 #endif // __RDPID__
306 
307 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
308     defined(__RDRND__)
309 /// Returns a 16-bit hardware-generated random value.
310 ///
311 /// \headerfile <immintrin.h>
312 ///
313 /// This intrinsic corresponds to the <c> RDRAND </c> instruction.
314 ///
315 /// \param __p
316 ///    A pointer to a 16-bit memory location to place the random value.
317 /// \returns 1 if the value was successfully generated, 0 otherwise.
318 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
319 _rdrand16_step(unsigned short *__p)
320 {
321   return (int)__builtin_ia32_rdrand16_step(__p);
322 }
323 
324 /// Returns a 32-bit hardware-generated random value.
325 ///
326 /// \headerfile <immintrin.h>
327 ///
328 /// This intrinsic corresponds to the <c> RDRAND </c> instruction.
329 ///
330 /// \param __p
331 ///    A pointer to a 32-bit memory location to place the random value.
332 /// \returns 1 if the value was successfully generated, 0 otherwise.
333 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
334 _rdrand32_step(unsigned int *__p)
335 {
336   return (int)__builtin_ia32_rdrand32_step(__p);
337 }
338 
339 /// Returns a 64-bit hardware-generated random value.
340 ///
341 /// \headerfile <immintrin.h>
342 ///
343 /// This intrinsic corresponds to the <c> RDRAND </c> instruction.
344 ///
345 /// \param __p
346 ///    A pointer to a 64-bit memory location to place the random value.
347 /// \returns 1 if the value was successfully generated, 0 otherwise.
348 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
349 _rdrand64_step(unsigned long long *__p)
350 {
351 #ifdef __x86_64__
352   return (int)__builtin_ia32_rdrand64_step(__p);
353 #else
354   // We need to emulate the functionality of 64-bit rdrand with 2 32-bit
355   // rdrand instructions.
356   unsigned int __lo, __hi;
357   unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
358   unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
359   if (__res_lo && __res_hi) {
360     *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
361     return 1;
362   } else {
363     *__p = 0;
364     return 0;
365   }
366 #endif
367 }
368 #endif /* __RDRND__ */
369 
370 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
371     defined(__FSGSBASE__)
372 #ifdef __x86_64__
373 /// Reads the FS base register.
374 ///
375 /// \headerfile <immintrin.h>
376 ///
377 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
378 ///
379 /// \returns The lower 32 bits of the FS base register.
380 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
381 _readfsbase_u32(void)
382 {
383   return __builtin_ia32_rdfsbase32();
384 }
385 
386 /// Reads the FS base register.
387 ///
388 /// \headerfile <immintrin.h>
389 ///
390 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
391 ///
392 /// \returns The contents of the FS base register.
393 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
394 _readfsbase_u64(void)
395 {
396   return __builtin_ia32_rdfsbase64();
397 }
398 
399 /// Reads the GS base register.
400 ///
401 /// \headerfile <immintrin.h>
402 ///
403 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
404 ///
405 /// \returns The lower 32 bits of the GS base register.
406 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
407 _readgsbase_u32(void)
408 {
409   return __builtin_ia32_rdgsbase32();
410 }
411 
412 /// Reads the GS base register.
413 ///
414 /// \headerfile <immintrin.h>
415 ///
416 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
417 ///
418 /// \returns The contents of the GS base register.
419 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
420 _readgsbase_u64(void)
421 {
422   return __builtin_ia32_rdgsbase64();
423 }
424 
425 /// Modifies the FS base register.
426 ///
427 /// \headerfile <immintrin.h>
428 ///
429 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
430 ///
431 /// \param __V
432 ///    Value to use for the lower 32 bits of the FS base register.
433 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
434 _writefsbase_u32(unsigned int __V)
435 {
436   __builtin_ia32_wrfsbase32(__V);
437 }
438 
439 /// Modifies the FS base register.
440 ///
441 /// \headerfile <immintrin.h>
442 ///
443 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
444 ///
445 /// \param __V
446 ///    Value to use for the FS base register.
447 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
448 _writefsbase_u64(unsigned long long __V)
449 {
450   __builtin_ia32_wrfsbase64(__V);
451 }
452 
453 /// Modifies the GS base register.
454 ///
455 /// \headerfile <immintrin.h>
456 ///
457 /// This intrinsic corresponds to the <c> WRGSBASE </c> instruction.
458 ///
459 /// \param __V
460 ///    Value to use for the lower 32 bits of the GS base register.
461 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
462 _writegsbase_u32(unsigned int __V)
463 {
464   __builtin_ia32_wrgsbase32(__V);
465 }
466 
467 /// Modifies the GS base register.
468 ///
469 /// \headerfile <immintrin.h>
470 ///
471 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
472 ///
473 /// \param __V
474 ///    Value to use for GS base register.
475 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
476 _writegsbase_u64(unsigned long long __V)
477 {
478   __builtin_ia32_wrgsbase64(__V);
479 }
480 
481 #endif
482 #endif /* __FSGSBASE__ */
483 
484 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
485     defined(__MOVBE__)
486 
487 /* The structs used below are to force the load/store to be unaligned. This
488  * is accomplished with the __packed__ attribute. The __may_alias__ prevents
489  * tbaa metadata from being generated based on the struct and the type of the
490  * field inside of it.
491  */
492 
493 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
494 _loadbe_i16(void const * __P) {
495   struct __loadu_i16 {
496     unsigned short __v;
497   } __attribute__((__packed__, __may_alias__));
498   return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
499 }
500 
501 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
502 _storebe_i16(void * __P, short __D) {
503   struct __storeu_i16 {
504     unsigned short __v;
505   } __attribute__((__packed__, __may_alias__));
506   ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
507 }
508 
509 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
510 _loadbe_i32(void const * __P) {
511   struct __loadu_i32 {
512     unsigned int __v;
513   } __attribute__((__packed__, __may_alias__));
514   return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
515 }
516 
517 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
518 _storebe_i32(void * __P, int __D) {
519   struct __storeu_i32 {
520     unsigned int __v;
521   } __attribute__((__packed__, __may_alias__));
522   ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D);
523 }
524 
525 #ifdef __x86_64__
526 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
527 _loadbe_i64(void const * __P) {
528   struct __loadu_i64 {
529     unsigned long long __v;
530   } __attribute__((__packed__, __may_alias__));
531   return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
532 }
533 
534 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
535 _storebe_i64(void * __P, long long __D) {
536   struct __storeu_i64 {
537     unsigned long long __v;
538   } __attribute__((__packed__, __may_alias__));
539   ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D);
540 }
541 #endif
542 #endif /* __MOVBE */
543 
544 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
545     defined(__RTM__)
546 #include <rtmintrin.h>
547 #include <xtestintrin.h>
548 #endif
549 
550 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
551     defined(__SHA__)
552 #include <shaintrin.h>
553 #endif
554 
555 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
556     defined(__FXSR__)
557 #include <fxsrintrin.h>
558 #endif
559 
560 /* No feature check desired due to internal MSC_VER checks */
561 #include <xsaveintrin.h>
562 
563 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
564     defined(__XSAVEOPT__)
565 #include <xsaveoptintrin.h>
566 #endif
567 
568 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
569     defined(__XSAVEC__)
570 #include <xsavecintrin.h>
571 #endif
572 
573 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
574     defined(__XSAVES__)
575 #include <xsavesintrin.h>
576 #endif
577 
578 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
579     defined(__SHSTK__)
580 #include <cetintrin.h>
581 #endif
582 
583 /* Intrinsics inside adcintrin.h are available at all times. */
584 #include <adcintrin.h>
585 
586 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
587     defined(__ADX__)
588 #include <adxintrin.h>
589 #endif
590 
591 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
592     defined(__RDSEED__)
593 #include <rdseedintrin.h>
594 #endif
595 
596 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
597     defined(__WBNOINVD__)
598 #include <wbnoinvdintrin.h>
599 #endif
600 
601 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
602     defined(__CLDEMOTE__)
603 #include <cldemoteintrin.h>
604 #endif
605 
606 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
607     defined(__WAITPKG__)
608 #include <waitpkgintrin.h>
609 #endif
610 
611 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
612     defined(__MOVDIRI__) || defined(__MOVDIR64B__)
613 #include <movdirintrin.h>
614 #endif
615 
616 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
617     defined(__PCONFIG__)
618 #include <pconfigintrin.h>
619 #endif
620 
621 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
622     defined(__SGX__)
623 #include <sgxintrin.h>
624 #endif
625 
626 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
627     defined(__PTWRITE__)
628 #include <ptwriteintrin.h>
629 #endif
630 
631 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
632     defined(__INVPCID__)
633 #include <invpcidintrin.h>
634 #endif
635 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
636     defined(__AMX_FP16__)
637 #include <amxfp16intrin.h>
638 #endif
639 
640 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
641     defined(__KL__) || defined(__WIDEKL__)
642 #include <keylockerintrin.h>
643 #endif
644 
645 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
646     defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__)
647 #include <amxintrin.h>
648 #endif
649 
650 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
651     defined(__AMX_COMPLEX__)
652 #include <amxcomplexintrin.h>
653 #endif
654 
655 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
656     defined(__AVX512VP2INTERSECT__)
657 #include <avx512vp2intersectintrin.h>
658 #endif
659 
660 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
661     (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
662 #include <avx512vlvp2intersectintrin.h>
663 #endif
664 
665 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
666     defined(__ENQCMD__)
667 #include <enqcmdintrin.h>
668 #endif
669 
670 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
671     defined(__SERIALIZE__)
672 #include <serializeintrin.h>
673 #endif
674 
675 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
676     defined(__TSXLDTRK__)
677 #include <tsxldtrkintrin.h>
678 #endif
679 
680 #if defined(_MSC_VER) && __has_extension(gnu_asm)
681 /* Define the default attributes for these intrinsics */
682 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
683 #ifdef __cplusplus
684 extern "C" {
685 #endif
686 /*----------------------------------------------------------------------------*\
687 |* Interlocked Exchange HLE
688 \*----------------------------------------------------------------------------*/
689 #if defined(__i386__) || defined(__x86_64__)
690 static __inline__ long __DEFAULT_FN_ATTRS
691 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
692   __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
693                        : "+r" (_Value), "+m" (*_Target) :: "memory");
694   return _Value;
695 }
696 static __inline__ long __DEFAULT_FN_ATTRS
697 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
698   __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
699                        : "+r" (_Value), "+m" (*_Target) :: "memory");
700   return _Value;
701 }
702 #endif
703 #if defined(__x86_64__)
704 static __inline__ __int64 __DEFAULT_FN_ATTRS
705 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
706   __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
707                        : "+r" (_Value), "+m" (*_Target) :: "memory");
708   return _Value;
709 }
710 static __inline__ __int64 __DEFAULT_FN_ATTRS
711 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
712   __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
713                        : "+r" (_Value), "+m" (*_Target) :: "memory");
714   return _Value;
715 }
716 #endif
717 /*----------------------------------------------------------------------------*\
718 |* Interlocked Compare Exchange HLE
719 \*----------------------------------------------------------------------------*/
720 #if defined(__i386__) || defined(__x86_64__)
721 static __inline__ long __DEFAULT_FN_ATTRS
722 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
723                               long _Exchange, long _Comparand) {
724   __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
725                        : "+a" (_Comparand), "+m" (*_Destination)
726                        : "r" (_Exchange) : "memory");
727   return _Comparand;
728 }
729 static __inline__ long __DEFAULT_FN_ATTRS
730 _InterlockedCompareExchange_HLERelease(long volatile *_Destination,
731                               long _Exchange, long _Comparand) {
732   __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
733                        : "+a" (_Comparand), "+m" (*_Destination)
734                        : "r" (_Exchange) : "memory");
735   return _Comparand;
736 }
737 #endif
738 #if defined(__x86_64__)
739 static __inline__ __int64 __DEFAULT_FN_ATTRS
740 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
741                               __int64 _Exchange, __int64 _Comparand) {
742   __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
743                        : "+a" (_Comparand), "+m" (*_Destination)
744                        : "r" (_Exchange) : "memory");
745   return _Comparand;
746 }
747 static __inline__ __int64 __DEFAULT_FN_ATTRS
748 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
749                               __int64 _Exchange, __int64 _Comparand) {
750   __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
751                        : "+a" (_Comparand), "+m" (*_Destination)
752                        : "r" (_Exchange) : "memory");
753   return _Comparand;
754 }
755 #endif
756 #ifdef __cplusplus
757 }
758 #endif
759 
760 #undef __DEFAULT_FN_ATTRS
761 
762 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
763 
764 #endif /* __IMMINTRIN_H */
765