1 /*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10 #ifndef __IMMINTRIN_H
11 #define __IMMINTRIN_H
12
13 #if !defined(__i386__) && !defined(__x86_64__)
14 #error "This header is only meant to be used on x86 and x64 architecture"
15 #endif
16
17 #include <x86gprintrin.h>
18
19 #if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__)
20 #include <mmintrin.h>
21 #endif
22
23 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__)
24 #include <xmmintrin.h>
25 #endif
26
27 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__)
28 #include <emmintrin.h>
29 #endif
30
31 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__)
32 #include <pmmintrin.h>
33 #endif
34
35 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__)
36 #include <tmmintrin.h>
37 #endif
38
39 #if !defined(__SCE__) || __has_feature(modules) || \
40 (defined(__SSE4_2__) || defined(__SSE4_1__))
41 #include <smmintrin.h>
42 #endif
43
44 #if !defined(__SCE__) || __has_feature(modules) || \
45 (defined(__AES__) || defined(__PCLMUL__))
46 #include <wmmintrin.h>
47 #endif
48
49 #if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__)
50 #include <clflushoptintrin.h>
51 #endif
52
53 #if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__)
54 #include <clwbintrin.h>
55 #endif
56
57 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__)
58 #include <avxintrin.h>
59 #endif
60
61 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__)
62 #include <avx2intrin.h>
63 #endif
64
65 #if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__)
66 #include <f16cintrin.h>
67 #endif
68
69 /* No feature check desired due to internal checks */
70 #include <bmiintrin.h>
71
72 #if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__)
73 #include <bmi2intrin.h>
74 #endif
75
76 #if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__)
77 #include <lzcntintrin.h>
78 #endif
79
80 #if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__)
81 #include <popcntintrin.h>
82 #endif
83
84 #if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__)
85 #include <fmaintrin.h>
86 #endif
87
88 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__)
89 #include <avx512fintrin.h>
90 #endif
91
92 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__)
93 #include <avx512vlintrin.h>
94 #endif
95
96 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__)
97 #include <avx512bwintrin.h>
98 #endif
99
100 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__)
101 #include <avx512bitalgintrin.h>
102 #endif
103
104 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__)
105 #include <avx512cdintrin.h>
106 #endif
107
108 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
109 #include <avx512vpopcntdqintrin.h>
110 #endif
111
112 #if !defined(__SCE__) || __has_feature(modules) || \
113 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
114 #include <avx512vpopcntdqvlintrin.h>
115 #endif
116
117 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__)
118 #include <avx512vnniintrin.h>
119 #endif
120
121 #if !defined(__SCE__) || __has_feature(modules) || \
122 (defined(__AVX512VL__) && defined(__AVX512VNNI__))
123 #include <avx512vlvnniintrin.h>
124 #endif
125
126 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__)
127 #include <avxvnniintrin.h>
128 #endif
129
130 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__)
131 #include <avx512dqintrin.h>
132 #endif
133
134 #if !defined(__SCE__) || __has_feature(modules) || \
135 (defined(__AVX512VL__) && defined(__AVX512BITALG__))
136 #include <avx512vlbitalgintrin.h>
137 #endif
138
139 #if !defined(__SCE__) || __has_feature(modules) || \
140 (defined(__AVX512VL__) && defined(__AVX512BW__))
141 #include <avx512vlbwintrin.h>
142 #endif
143
144 #if !defined(__SCE__) || __has_feature(modules) || \
145 (defined(__AVX512VL__) && defined(__AVX512CD__))
146 #include <avx512vlcdintrin.h>
147 #endif
148
149 #if !defined(__SCE__) || __has_feature(modules) || \
150 (defined(__AVX512VL__) && defined(__AVX512DQ__))
151 #include <avx512vldqintrin.h>
152 #endif
153
154 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__)
155 #include <avx512ifmaintrin.h>
156 #endif
157
158 #if !defined(__SCE__) || __has_feature(modules) || \
159 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
160 #include <avx512ifmavlintrin.h>
161 #endif
162
163 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__)
164 #include <avxifmaintrin.h>
165 #endif
166
167 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__)
168 #include <avx512vbmiintrin.h>
169 #endif
170
171 #if !defined(__SCE__) || __has_feature(modules) || \
172 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
173 #include <avx512vbmivlintrin.h>
174 #endif
175
176 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__)
177 #include <avx512vbmi2intrin.h>
178 #endif
179
180 #if !defined(__SCE__) || __has_feature(modules) || \
181 (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
182 #include <avx512vlvbmi2intrin.h>
183 #endif
184
185 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__)
186 #include <avx512fp16intrin.h>
187 #endif
188
189 #if !defined(__SCE__) || __has_feature(modules) || \
190 (defined(__AVX512VL__) && defined(__AVX512FP16__))
191 #include <avx512vlfp16intrin.h>
192 #endif
193
194 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__)
195 #include <avx512bf16intrin.h>
196 #endif
197
198 #if !defined(__SCE__) || __has_feature(modules) || \
199 (defined(__AVX512VL__) && defined(__AVX512BF16__))
200 #include <avx512vlbf16intrin.h>
201 #endif
202
203 #if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__)
204 #include <pkuintrin.h>
205 #endif
206
207 #if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__)
208 #include <vpclmulqdqintrin.h>
209 #endif
210
211 #if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__)
212 #include <vaesintrin.h>
213 #endif
214
215 #if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__)
216 #include <gfniintrin.h>
217 #endif
218
219 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__)
220 #include <avxvnniint8intrin.h>
221 #endif
222
223 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__)
224 #include <avxneconvertintrin.h>
225 #endif
226
227 #if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__)
228 #include <sha512intrin.h>
229 #endif
230
231 #if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__)
232 #include <sm3intrin.h>
233 #endif
234
235 #if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__)
236 #include <sm4intrin.h>
237 #endif
238
239 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__)
240 #include <avxvnniint16intrin.h>
241 #endif
242
243 #if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__)
244 /// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
245 ///
246 /// \headerfile <immintrin.h>
247 ///
248 /// This intrinsic corresponds to the <c> RDPID </c> instruction.
249 ///
250 /// \returns The 32-bit contents of the MSR.
251 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
_rdpid_u32(void)252 _rdpid_u32(void) {
253 return __builtin_ia32_rdpid();
254 }
255 #endif // __RDPID__
256
257 #if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__)
258 /// Returns a 16-bit hardware-generated random value.
259 ///
260 /// \headerfile <immintrin.h>
261 ///
262 /// This intrinsic corresponds to the <c> RDRAND </c> instruction.
263 ///
264 /// \param __p
265 /// A pointer to a 16-bit memory location to place the random value.
266 /// \returns 1 if the value was successfully generated, 0 otherwise.
267 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand16_step(unsigned short * __p)268 _rdrand16_step(unsigned short *__p)
269 {
270 return (int)__builtin_ia32_rdrand16_step(__p);
271 }
272
273 /// Returns a 32-bit hardware-generated random value.
274 ///
275 /// \headerfile <immintrin.h>
276 ///
277 /// This intrinsic corresponds to the <c> RDRAND </c> instruction.
278 ///
279 /// \param __p
280 /// A pointer to a 32-bit memory location to place the random value.
281 /// \returns 1 if the value was successfully generated, 0 otherwise.
282 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand32_step(unsigned int * __p)283 _rdrand32_step(unsigned int *__p)
284 {
285 return (int)__builtin_ia32_rdrand32_step(__p);
286 }
287
288 /// Returns a 64-bit hardware-generated random value.
289 ///
290 /// \headerfile <immintrin.h>
291 ///
292 /// This intrinsic corresponds to the <c> RDRAND </c> instruction.
293 ///
294 /// \param __p
295 /// A pointer to a 64-bit memory location to place the random value.
296 /// \returns 1 if the value was successfully generated, 0 otherwise.
297 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand64_step(unsigned long long * __p)298 _rdrand64_step(unsigned long long *__p)
299 {
300 #ifdef __x86_64__
301 return (int)__builtin_ia32_rdrand64_step(__p);
302 #else
303 // We need to emulate the functionality of 64-bit rdrand with 2 32-bit
304 // rdrand instructions.
305 unsigned int __lo, __hi;
306 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
307 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
308 if (__res_lo && __res_hi) {
309 *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
310 return 1;
311 } else {
312 *__p = 0;
313 return 0;
314 }
315 #endif
316 }
317 #endif /* __RDRND__ */
318
319 #if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__)
320 #ifdef __x86_64__
321 /// Reads the FS base register.
322 ///
323 /// \headerfile <immintrin.h>
324 ///
325 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
326 ///
327 /// \returns The lower 32 bits of the FS base register.
328 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readfsbase_u32(void)329 _readfsbase_u32(void)
330 {
331 return __builtin_ia32_rdfsbase32();
332 }
333
334 /// Reads the FS base register.
335 ///
336 /// \headerfile <immintrin.h>
337 ///
338 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
339 ///
340 /// \returns The contents of the FS base register.
341 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readfsbase_u64(void)342 _readfsbase_u64(void)
343 {
344 return __builtin_ia32_rdfsbase64();
345 }
346
347 /// Reads the GS base register.
348 ///
349 /// \headerfile <immintrin.h>
350 ///
351 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
352 ///
353 /// \returns The lower 32 bits of the GS base register.
354 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readgsbase_u32(void)355 _readgsbase_u32(void)
356 {
357 return __builtin_ia32_rdgsbase32();
358 }
359
360 /// Reads the GS base register.
361 ///
362 /// \headerfile <immintrin.h>
363 ///
364 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
365 ///
366 /// \returns The contents of the GS base register.
367 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readgsbase_u64(void)368 _readgsbase_u64(void)
369 {
370 return __builtin_ia32_rdgsbase64();
371 }
372
373 /// Modifies the FS base register.
374 ///
375 /// \headerfile <immintrin.h>
376 ///
377 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
378 ///
379 /// \param __V
380 /// Value to use for the lower 32 bits of the FS base register.
381 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u32(unsigned int __V)382 _writefsbase_u32(unsigned int __V)
383 {
384 __builtin_ia32_wrfsbase32(__V);
385 }
386
387 /// Modifies the FS base register.
388 ///
389 /// \headerfile <immintrin.h>
390 ///
391 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
392 ///
393 /// \param __V
394 /// Value to use for the FS base register.
395 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u64(unsigned long long __V)396 _writefsbase_u64(unsigned long long __V)
397 {
398 __builtin_ia32_wrfsbase64(__V);
399 }
400
401 /// Modifies the GS base register.
402 ///
403 /// \headerfile <immintrin.h>
404 ///
405 /// This intrinsic corresponds to the <c> WRGSBASE </c> instruction.
406 ///
407 /// \param __V
408 /// Value to use for the lower 32 bits of the GS base register.
409 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u32(unsigned int __V)410 _writegsbase_u32(unsigned int __V)
411 {
412 __builtin_ia32_wrgsbase32(__V);
413 }
414
415 /// Modifies the GS base register.
416 ///
417 /// \headerfile <immintrin.h>
418 ///
419 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
420 ///
421 /// \param __V
422 /// Value to use for GS base register.
423 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u64(unsigned long long __V)424 _writegsbase_u64(unsigned long long __V)
425 {
426 __builtin_ia32_wrgsbase64(__V);
427 }
428
429 #endif
430 #endif /* __FSGSBASE__ */
431
432 #if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__)
433
434 /* The structs used below are to force the load/store to be unaligned. This
435 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
436 * tbaa metadata from being generated based on the struct and the type of the
437 * field inside of it.
438 */
439
440 /// Load a 16-bit value from memory and swap its bytes.
441 ///
442 /// \headerfile <x86intrin.h>
443 ///
444 /// This intrinsic corresponds to the MOVBE instruction.
445 ///
446 /// \param __P
447 /// A pointer to the 16-bit value to load.
448 /// \returns The byte-swapped value.
449 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i16(void const * __P)450 _loadbe_i16(void const * __P) {
451 struct __loadu_i16 {
452 unsigned short __v;
453 } __attribute__((__packed__, __may_alias__));
454 return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
455 }
456
457 /// Swap the bytes of a 16-bit value and store it to memory.
458 ///
459 /// \headerfile <x86intrin.h>
460 ///
461 /// This intrinsic corresponds to the MOVBE instruction.
462 ///
463 /// \param __P
464 /// A pointer to the memory for storing the swapped value.
465 /// \param __D
466 /// The 16-bit value to be byte-swapped.
467 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i16(void * __P,short __D)468 _storebe_i16(void * __P, short __D) {
469 struct __storeu_i16 {
470 unsigned short __v;
471 } __attribute__((__packed__, __may_alias__));
472 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
473 }
474
475 /// Load a 32-bit value from memory and swap its bytes.
476 ///
477 /// \headerfile <x86intrin.h>
478 ///
479 /// This intrinsic corresponds to the MOVBE instruction.
480 ///
481 /// \param __P
482 /// A pointer to the 32-bit value to load.
483 /// \returns The byte-swapped value.
484 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i32(void const * __P)485 _loadbe_i32(void const * __P) {
486 struct __loadu_i32 {
487 unsigned int __v;
488 } __attribute__((__packed__, __may_alias__));
489 return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
490 }
491
492 /// Swap the bytes of a 32-bit value and store it to memory.
493 ///
494 /// \headerfile <x86intrin.h>
495 ///
496 /// This intrinsic corresponds to the MOVBE instruction.
497 ///
498 /// \param __P
499 /// A pointer to the memory for storing the swapped value.
500 /// \param __D
501 /// The 32-bit value to be byte-swapped.
502 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i32(void * __P,int __D)503 _storebe_i32(void * __P, int __D) {
504 struct __storeu_i32 {
505 unsigned int __v;
506 } __attribute__((__packed__, __may_alias__));
507 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D);
508 }
509
510 #ifdef __x86_64__
511 /// Load a 64-bit value from memory and swap its bytes.
512 ///
513 /// \headerfile <x86intrin.h>
514 ///
515 /// This intrinsic corresponds to the MOVBE instruction.
516 ///
517 /// \param __P
518 /// A pointer to the 64-bit value to load.
519 /// \returns The byte-swapped value.
520 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i64(void const * __P)521 _loadbe_i64(void const * __P) {
522 struct __loadu_i64 {
523 unsigned long long __v;
524 } __attribute__((__packed__, __may_alias__));
525 return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
526 }
527
528 /// Swap the bytes of a 64-bit value and store it to memory.
529 ///
530 /// \headerfile <x86intrin.h>
531 ///
532 /// This intrinsic corresponds to the MOVBE instruction.
533 ///
534 /// \param __P
535 /// A pointer to the memory for storing the swapped value.
536 /// \param __D
537 /// The 64-bit value to be byte-swapped.
538 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i64(void * __P,long long __D)539 _storebe_i64(void * __P, long long __D) {
540 struct __storeu_i64 {
541 unsigned long long __v;
542 } __attribute__((__packed__, __may_alias__));
543 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D);
544 }
545 #endif
546 #endif /* __MOVBE */
547
548 #if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__)
549 #include <rtmintrin.h>
550 #include <xtestintrin.h>
551 #endif
552
553 #if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__)
554 #include <shaintrin.h>
555 #endif
556
557 #if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__)
558 #include <fxsrintrin.h>
559 #endif
560
561 /* No feature check desired due to internal MSC_VER checks */
562 #include <xsaveintrin.h>
563
564 #if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__)
565 #include <xsaveoptintrin.h>
566 #endif
567
568 #if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__)
569 #include <xsavecintrin.h>
570 #endif
571
572 #if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__)
573 #include <xsavesintrin.h>
574 #endif
575
576 #if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__)
577 #include <cetintrin.h>
578 #endif
579
580 /* Intrinsics inside adcintrin.h are available at all times. */
581 #include <adcintrin.h>
582
583 #if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__)
584 #include <adxintrin.h>
585 #endif
586
587 #if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__)
588 #include <rdseedintrin.h>
589 #endif
590
591 #if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__)
592 #include <wbnoinvdintrin.h>
593 #endif
594
595 #if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__)
596 #include <cldemoteintrin.h>
597 #endif
598
599 #if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__)
600 #include <waitpkgintrin.h>
601 #endif
602
603 #if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) || \
604 defined(__MOVDIR64B__)
605 #include <movdirintrin.h>
606 #endif
607
608 #if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)
609 #include <pconfigintrin.h>
610 #endif
611
612 #if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__)
613 #include <sgxintrin.h>
614 #endif
615
616 #if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__)
617 #include <ptwriteintrin.h>
618 #endif
619
620 #if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__)
621 #include <invpcidintrin.h>
622 #endif
623 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__)
624 #include <amxfp16intrin.h>
625 #endif
626
627 #if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) || \
628 defined(__WIDEKL__)
629 #include <keylockerintrin.h>
630 #endif
631
632 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) || \
633 defined(__AMX_INT8__) || defined(__AMX_BF16__)
634 #include <amxintrin.h>
635 #endif
636
637 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__)
638 #include <amxcomplexintrin.h>
639 #endif
640
641 #if !defined(__SCE__) || __has_feature(modules) || \
642 defined(__AVX512VP2INTERSECT__)
643 #include <avx512vp2intersectintrin.h>
644 #endif
645
646 #if !defined(__SCE__) || __has_feature(modules) || \
647 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
648 #include <avx512vlvp2intersectintrin.h>
649 #endif
650
651 #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
652 #include <enqcmdintrin.h>
653 #endif
654
655 #if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__)
656 #include <serializeintrin.h>
657 #endif
658
659 #if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__)
660 #include <tsxldtrkintrin.h>
661 #endif
662
663 #if defined(_MSC_VER) && __has_extension(gnu_asm)
664 /* Define the default attributes for these intrinsics */
665 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
666 #ifdef __cplusplus
667 extern "C" {
668 #endif
669 /*----------------------------------------------------------------------------*\
670 |* Interlocked Exchange HLE
671 \*----------------------------------------------------------------------------*/
672 #if defined(__i386__) || defined(__x86_64__)
673 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLEAcquire(long volatile * _Target,long _Value)674 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
675 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
676 : "+r" (_Value), "+m" (*_Target) :: "memory");
677 return _Value;
678 }
679 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLERelease(long volatile * _Target,long _Value)680 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
681 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
682 : "+r" (_Value), "+m" (*_Target) :: "memory");
683 return _Value;
684 }
685 #endif
686 #if defined(__x86_64__)
687 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLEAcquire(__int64 volatile * _Target,__int64 _Value)688 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
689 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
690 : "+r" (_Value), "+m" (*_Target) :: "memory");
691 return _Value;
692 }
693 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLERelease(__int64 volatile * _Target,__int64 _Value)694 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
695 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
696 : "+r" (_Value), "+m" (*_Target) :: "memory");
697 return _Value;
698 }
699 #endif
700 /*----------------------------------------------------------------------------*\
701 |* Interlocked Compare Exchange HLE
702 \*----------------------------------------------------------------------------*/
703 #if defined(__i386__) || defined(__x86_64__)
704 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLEAcquire(long volatile * _Destination,long _Exchange,long _Comparand)705 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
706 long _Exchange, long _Comparand) {
707 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
708 : "+a" (_Comparand), "+m" (*_Destination)
709 : "r" (_Exchange) : "memory");
710 return _Comparand;
711 }
712 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLERelease(long volatile * _Destination,long _Exchange,long _Comparand)713 _InterlockedCompareExchange_HLERelease(long volatile *_Destination,
714 long _Exchange, long _Comparand) {
715 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
716 : "+a" (_Comparand), "+m" (*_Destination)
717 : "r" (_Exchange) : "memory");
718 return _Comparand;
719 }
720 #endif
721 #if defined(__x86_64__)
722 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLEAcquire(__int64 volatile * _Destination,__int64 _Exchange,__int64 _Comparand)723 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
724 __int64 _Exchange, __int64 _Comparand) {
725 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
726 : "+a" (_Comparand), "+m" (*_Destination)
727 : "r" (_Exchange) : "memory");
728 return _Comparand;
729 }
730 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLERelease(__int64 volatile * _Destination,__int64 _Exchange,__int64 _Comparand)731 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
732 __int64 _Exchange, __int64 _Comparand) {
733 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
734 : "+a" (_Comparand), "+m" (*_Destination)
735 : "r" (_Exchange) : "memory");
736 return _Comparand;
737 }
738 #endif
739 #ifdef __cplusplus
740 }
741 #endif
742
743 #undef __DEFAULT_FN_ATTRS
744
745 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
746
747 #endif /* __IMMINTRIN_H */
748