1 /*===---- immintrin.h - Intel intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __IMMINTRIN_H 11 #define __IMMINTRIN_H 12 13 #if !defined(__i386__) && !defined(__x86_64__) 14 #error "This header is only meant to be used on x86 and x64 architecture" 15 #endif 16 17 #include <x86gprintrin.h> 18 19 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 20 defined(__MMX__) 21 #include <mmintrin.h> 22 #endif 23 24 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 25 defined(__SSE__) 26 #include <xmmintrin.h> 27 #endif 28 29 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 30 defined(__SSE2__) 31 #include <emmintrin.h> 32 #endif 33 34 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 35 defined(__SSE3__) 36 #include <pmmintrin.h> 37 #endif 38 39 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 40 defined(__SSSE3__) 41 #include <tmmintrin.h> 42 #endif 43 44 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 45 (defined(__SSE4_2__) || defined(__SSE4_1__)) 46 #include <smmintrin.h> 47 #endif 48 49 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 50 (defined(__AES__) || defined(__PCLMUL__)) 51 #include <wmmintrin.h> 52 #endif 53 54 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 55 defined(__CLFLUSHOPT__) 56 #include <clflushoptintrin.h> 57 #endif 58 59 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 60 defined(__CLWB__) 61 #include <clwbintrin.h> 62 #endif 63 64 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 65 defined(__AVX__) 66 #include <avxintrin.h> 67 #endif 68 69 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 70 defined(__AVX2__) 71 #include <avx2intrin.h> 72 #endif 73 74 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 75 defined(__F16C__) 76 #include <f16cintrin.h> 77 #endif 78 79 /* No feature check desired due to internal checks */ 80 #include <bmiintrin.h> 81 82 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 83 defined(__BMI2__) 84 #include <bmi2intrin.h> 85 #endif 86 87 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 88 defined(__LZCNT__) 89 #include <lzcntintrin.h> 90 #endif 91 92 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 93 defined(__POPCNT__) 94 #include <popcntintrin.h> 95 #endif 96 97 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 98 defined(__FMA__) 99 #include <fmaintrin.h> 100 #endif 101 102 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 103 defined(__AVX512F__) 104 #include <avx512fintrin.h> 105 #endif 106 107 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 108 defined(__AVX512VL__) 109 #include <avx512vlintrin.h> 110 #endif 111 112 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 113 defined(__AVX512BW__) 114 #include <avx512bwintrin.h> 115 #endif 116 117 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 118 defined(__AVX512BITALG__) 119 #include <avx512bitalgintrin.h> 120 #endif 121 122 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 123 defined(__AVX512CD__) 124 #include <avx512cdintrin.h> 125 #endif 126 127 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 128 defined(__AVX512VPOPCNTDQ__) 129 #include <avx512vpopcntdqintrin.h> 130 #endif 131 132 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 133 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) 134 #include <avx512vpopcntdqvlintrin.h> 135 #endif 136 137 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 138 defined(__AVX512VNNI__) 139 #include <avx512vnniintrin.h> 140 #endif 141 142 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 143 (defined(__AVX512VL__) && defined(__AVX512VNNI__)) 144 #include <avx512vlvnniintrin.h> 145 #endif 146 147 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 148 defined(__AVXVNNI__) 149 #include <avxvnniintrin.h> 150 #endif 151 152 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 153 defined(__AVX512DQ__) 154 #include <avx512dqintrin.h> 155 #endif 156 157 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 158 (defined(__AVX512VL__) && defined(__AVX512BITALG__)) 159 #include <avx512vlbitalgintrin.h> 160 #endif 161 162 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 163 (defined(__AVX512VL__) && defined(__AVX512BW__)) 164 #include <avx512vlbwintrin.h> 165 #endif 166 167 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 168 (defined(__AVX512VL__) && defined(__AVX512CD__)) 169 #include <avx512vlcdintrin.h> 170 #endif 171 172 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 173 (defined(__AVX512VL__) && defined(__AVX512DQ__)) 174 #include <avx512vldqintrin.h> 175 #endif 176 177 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 178 defined(__AVX512ER__) 179 #include <avx512erintrin.h> 180 #endif 181 182 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 183 defined(__AVX512IFMA__) 184 #include <avx512ifmaintrin.h> 185 #endif 186 187 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 188 (defined(__AVX512IFMA__) && defined(__AVX512VL__)) 189 #include <avx512ifmavlintrin.h> 190 #endif 191 192 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 193 defined(__AVXIFMA__) 194 #include <avxifmaintrin.h> 195 #endif 196 197 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 198 defined(__AVX512VBMI__) 199 #include <avx512vbmiintrin.h> 200 #endif 201 202 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 203 (defined(__AVX512VBMI__) && defined(__AVX512VL__)) 204 #include <avx512vbmivlintrin.h> 205 #endif 206 207 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 208 defined(__AVX512VBMI2__) 209 #include <avx512vbmi2intrin.h> 210 #endif 211 212 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 213 (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) 214 #include <avx512vlvbmi2intrin.h> 215 #endif 216 217 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 218 defined(__AVX512PF__) 219 #include <avx512pfintrin.h> 220 #endif 221 222 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 223 defined(__AVX512FP16__) 224 #include <avx512fp16intrin.h> 225 #endif 226 227 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 228 (defined(__AVX512VL__) && defined(__AVX512FP16__)) 229 #include <avx512vlfp16intrin.h> 230 #endif 231 232 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 233 defined(__AVX512BF16__) 234 #include <avx512bf16intrin.h> 235 #endif 236 237 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 238 (defined(__AVX512VL__) && defined(__AVX512BF16__)) 239 #include <avx512vlbf16intrin.h> 240 #endif 241 242 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 243 defined(__PKU__) 244 #include <pkuintrin.h> 245 #endif 246 247 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 248 defined(__VPCLMULQDQ__) 249 #include <vpclmulqdqintrin.h> 250 #endif 251 252 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 253 defined(__VAES__) 254 #include <vaesintrin.h> 255 #endif 256 257 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 258 defined(__GFNI__) 259 #include <gfniintrin.h> 260 #endif 261 262 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 263 defined(__AVXVNNIINT8__) 264 #include <avxvnniint8intrin.h> 265 #endif 266 267 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 268 defined(__AVXNECONVERT__) 269 #include <avxneconvertintrin.h> 270 #endif 271 272 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 273 defined(__SHA512__) 274 #include <sha512intrin.h> 275 #endif 276 277 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 278 defined(__SM3__) 279 #include <sm3intrin.h> 280 #endif 281 282 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 283 defined(__SM4__) 284 #include <sm4intrin.h> 285 #endif 286 287 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 288 defined(__AVXVNNIINT16__) 289 #include <avxvnniint16intrin.h> 290 #endif 291 292 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 293 defined(__RDPID__) 294 /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). 295 /// 296 /// \headerfile <immintrin.h> 297 /// 298 /// This intrinsic corresponds to the <c> RDPID </c> instruction. 299 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) 300 _rdpid_u32(void) { 301 return __builtin_ia32_rdpid(); 302 } 303 #endif // __RDPID__ 304 305 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 306 defined(__RDRND__) 307 /// Returns a 16-bit hardware-generated random value. 308 /// 309 /// \headerfile <immintrin.h> 310 /// 311 /// This intrinsic corresponds to the <c> RDRAND </c> instruction. 312 /// 313 /// \param __p 314 /// A pointer to a 16-bit memory location to place the random value. 315 /// \returns 1 if the value was successfully generated, 0 otherwise. 316 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 317 _rdrand16_step(unsigned short *__p) 318 { 319 return (int)__builtin_ia32_rdrand16_step(__p); 320 } 321 322 /// Returns a 32-bit hardware-generated random value. 323 /// 324 /// \headerfile <immintrin.h> 325 /// 326 /// This intrinsic corresponds to the <c> RDRAND </c> instruction. 327 /// 328 /// \param __p 329 /// A pointer to a 32-bit memory location to place the random value. 330 /// \returns 1 if the value was successfully generated, 0 otherwise. 331 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 332 _rdrand32_step(unsigned int *__p) 333 { 334 return (int)__builtin_ia32_rdrand32_step(__p); 335 } 336 337 /// Returns a 64-bit hardware-generated random value. 338 /// 339 /// \headerfile <immintrin.h> 340 /// 341 /// This intrinsic corresponds to the <c> RDRAND </c> instruction. 342 /// 343 /// \param __p 344 /// A pointer to a 64-bit memory location to place the random value. 345 /// \returns 1 if the value was successfully generated, 0 otherwise. 346 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 347 _rdrand64_step(unsigned long long *__p) 348 { 349 #ifdef __x86_64__ 350 return (int)__builtin_ia32_rdrand64_step(__p); 351 #else 352 // We need to emulate the functionality of 64-bit rdrand with 2 32-bit 353 // rdrand instructions. 354 unsigned int __lo, __hi; 355 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo); 356 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi); 357 if (__res_lo && __res_hi) { 358 *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; 359 return 1; 360 } else { 361 *__p = 0; 362 return 0; 363 } 364 #endif 365 } 366 #endif /* __RDRND__ */ 367 368 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 369 defined(__FSGSBASE__) 370 #ifdef __x86_64__ 371 /// Reads the FS base register. 372 /// 373 /// \headerfile <immintrin.h> 374 /// 375 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. 376 /// 377 /// \returns The lower 32 bits of the FS base register. 378 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 379 _readfsbase_u32(void) 380 { 381 return __builtin_ia32_rdfsbase32(); 382 } 383 384 /// Reads the FS base register. 385 /// 386 /// \headerfile <immintrin.h> 387 /// 388 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. 389 /// 390 /// \returns The contents of the FS base register. 391 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 392 _readfsbase_u64(void) 393 { 394 return __builtin_ia32_rdfsbase64(); 395 } 396 397 /// Reads the GS base register. 398 /// 399 /// \headerfile <immintrin.h> 400 /// 401 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. 402 /// 403 /// \returns The lower 32 bits of the GS base register. 404 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 405 _readgsbase_u32(void) 406 { 407 return __builtin_ia32_rdgsbase32(); 408 } 409 410 /// Reads the GS base register. 411 /// 412 /// \headerfile <immintrin.h> 413 /// 414 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. 415 /// 416 /// \returns The contents of the GS base register. 417 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 418 _readgsbase_u64(void) 419 { 420 return __builtin_ia32_rdgsbase64(); 421 } 422 423 /// Modifies the FS base register. 424 /// 425 /// \headerfile <immintrin.h> 426 /// 427 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. 428 /// 429 /// \param __V 430 /// Value to use for the lower 32 bits of the FS base register. 431 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 432 _writefsbase_u32(unsigned int __V) 433 { 434 __builtin_ia32_wrfsbase32(__V); 435 } 436 437 /// Modifies the FS base register. 438 /// 439 /// \headerfile <immintrin.h> 440 /// 441 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. 442 /// 443 /// \param __V 444 /// Value to use for the FS base register. 445 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 446 _writefsbase_u64(unsigned long long __V) 447 { 448 __builtin_ia32_wrfsbase64(__V); 449 } 450 451 /// Modifies the GS base register. 452 /// 453 /// \headerfile <immintrin.h> 454 /// 455 /// This intrinsic corresponds to the <c> WRGSBASE </c> instruction. 456 /// 457 /// \param __V 458 /// Value to use for the lower 32 bits of the GS base register. 459 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 460 _writegsbase_u32(unsigned int __V) 461 { 462 __builtin_ia32_wrgsbase32(__V); 463 } 464 465 /// Modifies the GS base register. 466 /// 467 /// \headerfile <immintrin.h> 468 /// 469 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. 470 /// 471 /// \param __V 472 /// Value to use for GS base register. 473 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 474 _writegsbase_u64(unsigned long long __V) 475 { 476 __builtin_ia32_wrgsbase64(__V); 477 } 478 479 #endif 480 #endif /* __FSGSBASE__ */ 481 482 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 483 defined(__MOVBE__) 484 485 /* The structs used below are to force the load/store to be unaligned. This 486 * is accomplished with the __packed__ attribute. The __may_alias__ prevents 487 * tbaa metadata from being generated based on the struct and the type of the 488 * field inside of it. 489 */ 490 491 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 492 _loadbe_i16(void const * __P) { 493 struct __loadu_i16 { 494 unsigned short __v; 495 } __attribute__((__packed__, __may_alias__)); 496 return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v); 497 } 498 499 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 500 _storebe_i16(void * __P, short __D) { 501 struct __storeu_i16 { 502 unsigned short __v; 503 } __attribute__((__packed__, __may_alias__)); 504 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D); 505 } 506 507 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 508 _loadbe_i32(void const * __P) { 509 struct __loadu_i32 { 510 unsigned int __v; 511 } __attribute__((__packed__, __may_alias__)); 512 return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v); 513 } 514 515 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 516 _storebe_i32(void * __P, int __D) { 517 struct __storeu_i32 { 518 unsigned int __v; 519 } __attribute__((__packed__, __may_alias__)); 520 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D); 521 } 522 523 #ifdef __x86_64__ 524 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 525 _loadbe_i64(void const * __P) { 526 struct __loadu_i64 { 527 unsigned long long __v; 528 } __attribute__((__packed__, __may_alias__)); 529 return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v); 530 } 531 532 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 533 _storebe_i64(void * __P, long long __D) { 534 struct __storeu_i64 { 535 unsigned long long __v; 536 } __attribute__((__packed__, __may_alias__)); 537 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D); 538 } 539 #endif 540 #endif /* __MOVBE */ 541 542 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 543 defined(__RTM__) 544 #include <rtmintrin.h> 545 #include <xtestintrin.h> 546 #endif 547 548 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 549 defined(__SHA__) 550 #include <shaintrin.h> 551 #endif 552 553 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 554 defined(__FXSR__) 555 #include <fxsrintrin.h> 556 #endif 557 558 /* No feature check desired due to internal MSC_VER checks */ 559 #include <xsaveintrin.h> 560 561 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 562 defined(__XSAVEOPT__) 563 #include <xsaveoptintrin.h> 564 #endif 565 566 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 567 defined(__XSAVEC__) 568 #include <xsavecintrin.h> 569 #endif 570 571 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 572 defined(__XSAVES__) 573 #include <xsavesintrin.h> 574 #endif 575 576 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 577 defined(__SHSTK__) 578 #include <cetintrin.h> 579 #endif 580 581 /* Some intrinsics inside adxintrin.h are available only on processors with ADX, 582 * whereas others are also available at all times. */ 583 #include <adxintrin.h> 584 585 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 586 defined(__RDSEED__) 587 #include <rdseedintrin.h> 588 #endif 589 590 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 591 defined(__WBNOINVD__) 592 #include <wbnoinvdintrin.h> 593 #endif 594 595 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 596 defined(__CLDEMOTE__) 597 #include <cldemoteintrin.h> 598 #endif 599 600 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 601 defined(__WAITPKG__) 602 #include <waitpkgintrin.h> 603 #endif 604 605 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 606 defined(__MOVDIRI__) || defined(__MOVDIR64B__) 607 #include <movdirintrin.h> 608 #endif 609 610 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 611 defined(__PCONFIG__) 612 #include <pconfigintrin.h> 613 #endif 614 615 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 616 defined(__SGX__) 617 #include <sgxintrin.h> 618 #endif 619 620 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 621 defined(__PTWRITE__) 622 #include <ptwriteintrin.h> 623 #endif 624 625 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 626 defined(__INVPCID__) 627 #include <invpcidintrin.h> 628 #endif 629 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 630 defined(__AMX_FP16__) 631 #include <amxfp16intrin.h> 632 #endif 633 634 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 635 defined(__KL__) || defined(__WIDEKL__) 636 #include <keylockerintrin.h> 637 #endif 638 639 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 640 defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__) 641 #include <amxintrin.h> 642 #endif 643 644 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 645 defined(__AMX_COMPLEX__) 646 #include <amxcomplexintrin.h> 647 #endif 648 649 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 650 defined(__AVX512VP2INTERSECT__) 651 #include <avx512vp2intersectintrin.h> 652 #endif 653 654 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 655 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) 656 #include <avx512vlvp2intersectintrin.h> 657 #endif 658 659 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 660 defined(__ENQCMD__) 661 #include <enqcmdintrin.h> 662 #endif 663 664 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 665 defined(__SERIALIZE__) 666 #include <serializeintrin.h> 667 #endif 668 669 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 670 defined(__TSXLDTRK__) 671 #include <tsxldtrkintrin.h> 672 #endif 673 674 #if defined(_MSC_VER) && __has_extension(gnu_asm) 675 /* Define the default attributes for these intrinsics */ 676 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) 677 #ifdef __cplusplus 678 extern "C" { 679 #endif 680 /*----------------------------------------------------------------------------*\ 681 |* Interlocked Exchange HLE 682 \*----------------------------------------------------------------------------*/ 683 #if defined(__i386__) || defined(__x86_64__) 684 static __inline__ long __DEFAULT_FN_ATTRS 685 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { 686 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" 687 : "+r" (_Value), "+m" (*_Target) :: "memory"); 688 return _Value; 689 } 690 static __inline__ long __DEFAULT_FN_ATTRS 691 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { 692 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" 693 : "+r" (_Value), "+m" (*_Target) :: "memory"); 694 return _Value; 695 } 696 #endif 697 #if defined(__x86_64__) 698 static __inline__ __int64 __DEFAULT_FN_ATTRS 699 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { 700 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" 701 : "+r" (_Value), "+m" (*_Target) :: "memory"); 702 return _Value; 703 } 704 static __inline__ __int64 __DEFAULT_FN_ATTRS 705 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { 706 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" 707 : "+r" (_Value), "+m" (*_Target) :: "memory"); 708 return _Value; 709 } 710 #endif 711 /*----------------------------------------------------------------------------*\ 712 |* Interlocked Compare Exchange HLE 713 \*----------------------------------------------------------------------------*/ 714 #if defined(__i386__) || defined(__x86_64__) 715 static __inline__ long __DEFAULT_FN_ATTRS 716 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, 717 long _Exchange, long _Comparand) { 718 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" 719 : "+a" (_Comparand), "+m" (*_Destination) 720 : "r" (_Exchange) : "memory"); 721 return _Comparand; 722 } 723 static __inline__ long __DEFAULT_FN_ATTRS 724 _InterlockedCompareExchange_HLERelease(long volatile *_Destination, 725 long _Exchange, long _Comparand) { 726 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" 727 : "+a" (_Comparand), "+m" (*_Destination) 728 : "r" (_Exchange) : "memory"); 729 return _Comparand; 730 } 731 #endif 732 #if defined(__x86_64__) 733 static __inline__ __int64 __DEFAULT_FN_ATTRS 734 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, 735 __int64 _Exchange, __int64 _Comparand) { 736 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" 737 : "+a" (_Comparand), "+m" (*_Destination) 738 : "r" (_Exchange) : "memory"); 739 return _Comparand; 740 } 741 static __inline__ __int64 __DEFAULT_FN_ATTRS 742 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, 743 __int64 _Exchange, __int64 _Comparand) { 744 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" 745 : "+a" (_Comparand), "+m" (*_Destination) 746 : "r" (_Exchange) : "memory"); 747 return _Comparand; 748 } 749 #endif 750 #ifdef __cplusplus 751 } 752 #endif 753 754 #undef __DEFAULT_FN_ATTRS 755 756 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ 757 758 #endif /* __IMMINTRIN_H */ 759