1 /*===---- immintrin.h - Intel intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __IMMINTRIN_H 11 #define __IMMINTRIN_H 12 13 #if !defined(__i386__) && !defined(__x86_64__) 14 #error "This header is only meant to be used on x86 and x64 architecture" 15 #endif 16 17 #include <x86gprintrin.h> 18 19 #if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__) 20 #include <mmintrin.h> 21 #endif 22 23 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__) 24 #include <xmmintrin.h> 25 #endif 26 27 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__) 28 #include <emmintrin.h> 29 #endif 30 31 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__) 32 #include <pmmintrin.h> 33 #endif 34 35 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__) 36 #include <tmmintrin.h> 37 #endif 38 39 #if !defined(__SCE__) || __has_feature(modules) || \ 40 (defined(__SSE4_2__) || defined(__SSE4_1__)) 41 #include <smmintrin.h> 42 #endif 43 44 #if !defined(__SCE__) || __has_feature(modules) || \ 45 (defined(__AES__) || defined(__PCLMUL__)) 46 #include <wmmintrin.h> 47 #endif 48 49 #if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__) 50 #include <clflushoptintrin.h> 51 #endif 52 53 #if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__) 54 #include <clwbintrin.h> 55 #endif 56 57 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__) 58 #include <avxintrin.h> 59 #endif 60 61 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__) 62 #include <avx2intrin.h> 63 #endif 64 65 #if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__) 66 #include <f16cintrin.h> 67 #endif 68 69 /* No feature check desired due to internal checks */ 70 #include <bmiintrin.h> 71 72 #if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__) 73 #include <bmi2intrin.h> 74 #endif 75 76 #if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__) 77 #include <lzcntintrin.h> 78 #endif 79 80 #if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__) 81 #include <popcntintrin.h> 82 #endif 83 84 #if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__) 85 #include <fmaintrin.h> 86 #endif 87 88 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__) 89 #include <avx512fintrin.h> 90 #endif 91 92 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__) 93 #include <avx512vlintrin.h> 94 #endif 95 96 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__) 97 #include <avx512bwintrin.h> 98 #endif 99 100 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__) 101 #include <avx512bitalgintrin.h> 102 #endif 103 104 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__) 105 #include <avx512cdintrin.h> 106 #endif 107 108 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) 109 #include <avx512vpopcntdqintrin.h> 110 #endif 111 112 #if !defined(__SCE__) || __has_feature(modules) || \ 113 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) 114 #include <avx512vpopcntdqvlintrin.h> 115 #endif 116 117 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__) 118 #include <avx512vnniintrin.h> 119 #endif 120 121 #if !defined(__SCE__) || __has_feature(modules) || \ 122 (defined(__AVX512VL__) && defined(__AVX512VNNI__)) 123 #include <avx512vlvnniintrin.h> 124 #endif 125 126 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__) 127 #include <avxvnniintrin.h> 128 #endif 129 130 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__) 131 #include <avx512dqintrin.h> 132 #endif 133 134 #if !defined(__SCE__) || __has_feature(modules) || \ 135 (defined(__AVX512VL__) && defined(__AVX512BITALG__)) 136 #include <avx512vlbitalgintrin.h> 137 #endif 138 139 #if !defined(__SCE__) || __has_feature(modules) || \ 140 (defined(__AVX512VL__) && defined(__AVX512BW__)) 141 #include <avx512vlbwintrin.h> 142 #endif 143 144 #if !defined(__SCE__) || __has_feature(modules) || \ 145 (defined(__AVX512VL__) && defined(__AVX512CD__)) 146 #include <avx512vlcdintrin.h> 147 #endif 148 149 #if !defined(__SCE__) || __has_feature(modules) || \ 150 (defined(__AVX512VL__) && defined(__AVX512DQ__)) 151 #include <avx512vldqintrin.h> 152 #endif 153 154 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__) 155 #include <avx512ifmaintrin.h> 156 #endif 157 158 #if !defined(__SCE__) || __has_feature(modules) || \ 159 (defined(__AVX512IFMA__) && defined(__AVX512VL__)) 160 #include <avx512ifmavlintrin.h> 161 #endif 162 163 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__) 164 #include <avxifmaintrin.h> 165 #endif 166 167 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__) 168 #include <avx512vbmiintrin.h> 169 #endif 170 171 #if !defined(__SCE__) || __has_feature(modules) || \ 172 (defined(__AVX512VBMI__) && defined(__AVX512VL__)) 173 #include <avx512vbmivlintrin.h> 174 #endif 175 176 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__) 177 #include <avx512vbmi2intrin.h> 178 #endif 179 180 #if !defined(__SCE__) || __has_feature(modules) || \ 181 (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) 182 #include <avx512vlvbmi2intrin.h> 183 #endif 184 185 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__) 186 #include <avx512fp16intrin.h> 187 #endif 188 189 #if !defined(__SCE__) || __has_feature(modules) || \ 190 (defined(__AVX512VL__) && defined(__AVX512FP16__)) 191 #include <avx512vlfp16intrin.h> 192 #endif 193 194 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__) 195 #include <avx512bf16intrin.h> 196 #endif 197 198 #if !defined(__SCE__) || __has_feature(modules) || \ 199 (defined(__AVX512VL__) && defined(__AVX512BF16__)) 200 #include <avx512vlbf16intrin.h> 201 #endif 202 203 #if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__) 204 #include <pkuintrin.h> 205 #endif 206 207 #if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__) 208 #include <vpclmulqdqintrin.h> 209 #endif 210 211 #if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__) 212 #include <vaesintrin.h> 213 #endif 214 215 #if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__) 216 #include <gfniintrin.h> 217 #endif 218 219 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__) 220 #include <avxvnniint8intrin.h> 221 #endif 222 223 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__) 224 #include <avxneconvertintrin.h> 225 #endif 226 227 #if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__) 228 #include <sha512intrin.h> 229 #endif 230 231 #if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__) 232 #include <sm3intrin.h> 233 #endif 234 235 #if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__) 236 #include <sm4intrin.h> 237 #endif 238 239 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__) 240 #include <avxvnniint16intrin.h> 241 #endif 242 243 #if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__) 244 /// Reads the value of the IA32_TSC_AUX MSR (0xc0000103). 245 /// 246 /// \headerfile <immintrin.h> 247 /// 248 /// This intrinsic corresponds to the <c> RDPID </c> instruction. 249 /// 250 /// \returns The 32-bit contents of the MSR. 251 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) 252 _rdpid_u32(void) { 253 return __builtin_ia32_rdpid(); 254 } 255 #endif // __RDPID__ 256 257 #if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__) 258 /// Returns a 16-bit hardware-generated random value. 259 /// 260 /// \headerfile <immintrin.h> 261 /// 262 /// This intrinsic corresponds to the <c> RDRAND </c> instruction. 263 /// 264 /// \param __p 265 /// A pointer to a 16-bit memory location to place the random value. 266 /// \returns 1 if the value was successfully generated, 0 otherwise. 267 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 268 _rdrand16_step(unsigned short *__p) 269 { 270 return (int)__builtin_ia32_rdrand16_step(__p); 271 } 272 273 /// Returns a 32-bit hardware-generated random value. 274 /// 275 /// \headerfile <immintrin.h> 276 /// 277 /// This intrinsic corresponds to the <c> RDRAND </c> instruction. 278 /// 279 /// \param __p 280 /// A pointer to a 32-bit memory location to place the random value. 281 /// \returns 1 if the value was successfully generated, 0 otherwise. 282 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 283 _rdrand32_step(unsigned int *__p) 284 { 285 return (int)__builtin_ia32_rdrand32_step(__p); 286 } 287 288 /// Returns a 64-bit hardware-generated random value. 289 /// 290 /// \headerfile <immintrin.h> 291 /// 292 /// This intrinsic corresponds to the <c> RDRAND </c> instruction. 293 /// 294 /// \param __p 295 /// A pointer to a 64-bit memory location to place the random value. 296 /// \returns 1 if the value was successfully generated, 0 otherwise. 297 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 298 _rdrand64_step(unsigned long long *__p) 299 { 300 #ifdef __x86_64__ 301 return (int)__builtin_ia32_rdrand64_step(__p); 302 #else 303 // We need to emulate the functionality of 64-bit rdrand with 2 32-bit 304 // rdrand instructions. 305 unsigned int __lo, __hi; 306 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo); 307 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi); 308 if (__res_lo && __res_hi) { 309 *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; 310 return 1; 311 } else { 312 *__p = 0; 313 return 0; 314 } 315 #endif 316 } 317 #endif /* __RDRND__ */ 318 319 #if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__) 320 #ifdef __x86_64__ 321 /// Reads the FS base register. 322 /// 323 /// \headerfile <immintrin.h> 324 /// 325 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. 326 /// 327 /// \returns The lower 32 bits of the FS base register. 328 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 329 _readfsbase_u32(void) 330 { 331 return __builtin_ia32_rdfsbase32(); 332 } 333 334 /// Reads the FS base register. 335 /// 336 /// \headerfile <immintrin.h> 337 /// 338 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. 339 /// 340 /// \returns The contents of the FS base register. 341 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 342 _readfsbase_u64(void) 343 { 344 return __builtin_ia32_rdfsbase64(); 345 } 346 347 /// Reads the GS base register. 348 /// 349 /// \headerfile <immintrin.h> 350 /// 351 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. 352 /// 353 /// \returns The lower 32 bits of the GS base register. 354 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 355 _readgsbase_u32(void) 356 { 357 return __builtin_ia32_rdgsbase32(); 358 } 359 360 /// Reads the GS base register. 361 /// 362 /// \headerfile <immintrin.h> 363 /// 364 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. 365 /// 366 /// \returns The contents of the GS base register. 367 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 368 _readgsbase_u64(void) 369 { 370 return __builtin_ia32_rdgsbase64(); 371 } 372 373 /// Modifies the FS base register. 374 /// 375 /// \headerfile <immintrin.h> 376 /// 377 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. 378 /// 379 /// \param __V 380 /// Value to use for the lower 32 bits of the FS base register. 381 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 382 _writefsbase_u32(unsigned int __V) 383 { 384 __builtin_ia32_wrfsbase32(__V); 385 } 386 387 /// Modifies the FS base register. 388 /// 389 /// \headerfile <immintrin.h> 390 /// 391 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. 392 /// 393 /// \param __V 394 /// Value to use for the FS base register. 395 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 396 _writefsbase_u64(unsigned long long __V) 397 { 398 __builtin_ia32_wrfsbase64(__V); 399 } 400 401 /// Modifies the GS base register. 402 /// 403 /// \headerfile <immintrin.h> 404 /// 405 /// This intrinsic corresponds to the <c> WRGSBASE </c> instruction. 406 /// 407 /// \param __V 408 /// Value to use for the lower 32 bits of the GS base register. 409 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 410 _writegsbase_u32(unsigned int __V) 411 { 412 __builtin_ia32_wrgsbase32(__V); 413 } 414 415 /// Modifies the GS base register. 416 /// 417 /// \headerfile <immintrin.h> 418 /// 419 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. 420 /// 421 /// \param __V 422 /// Value to use for GS base register. 423 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 424 _writegsbase_u64(unsigned long long __V) 425 { 426 __builtin_ia32_wrgsbase64(__V); 427 } 428 429 #endif 430 #endif /* __FSGSBASE__ */ 431 432 #if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__) 433 434 /* The structs used below are to force the load/store to be unaligned. This 435 * is accomplished with the __packed__ attribute. The __may_alias__ prevents 436 * tbaa metadata from being generated based on the struct and the type of the 437 * field inside of it. 438 */ 439 440 /// Load a 16-bit value from memory and swap its bytes. 441 /// 442 /// \headerfile <x86intrin.h> 443 /// 444 /// This intrinsic corresponds to the MOVBE instruction. 445 /// 446 /// \param __P 447 /// A pointer to the 16-bit value to load. 448 /// \returns The byte-swapped value. 449 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 450 _loadbe_i16(void const * __P) { 451 struct __loadu_i16 { 452 unsigned short __v; 453 } __attribute__((__packed__, __may_alias__)); 454 return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v); 455 } 456 457 /// Swap the bytes of a 16-bit value and store it to memory. 458 /// 459 /// \headerfile <x86intrin.h> 460 /// 461 /// This intrinsic corresponds to the MOVBE instruction. 462 /// 463 /// \param __P 464 /// A pointer to the memory for storing the swapped value. 465 /// \param __D 466 /// The 16-bit value to be byte-swapped. 467 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 468 _storebe_i16(void * __P, short __D) { 469 struct __storeu_i16 { 470 unsigned short __v; 471 } __attribute__((__packed__, __may_alias__)); 472 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D); 473 } 474 475 /// Load a 32-bit value from memory and swap its bytes. 476 /// 477 /// \headerfile <x86intrin.h> 478 /// 479 /// This intrinsic corresponds to the MOVBE instruction. 480 /// 481 /// \param __P 482 /// A pointer to the 32-bit value to load. 483 /// \returns The byte-swapped value. 484 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 485 _loadbe_i32(void const * __P) { 486 struct __loadu_i32 { 487 unsigned int __v; 488 } __attribute__((__packed__, __may_alias__)); 489 return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v); 490 } 491 492 /// Swap the bytes of a 32-bit value and store it to memory. 493 /// 494 /// \headerfile <x86intrin.h> 495 /// 496 /// This intrinsic corresponds to the MOVBE instruction. 497 /// 498 /// \param __P 499 /// A pointer to the memory for storing the swapped value. 500 /// \param __D 501 /// The 32-bit value to be byte-swapped. 502 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 503 _storebe_i32(void * __P, int __D) { 504 struct __storeu_i32 { 505 unsigned int __v; 506 } __attribute__((__packed__, __may_alias__)); 507 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D); 508 } 509 510 #ifdef __x86_64__ 511 /// Load a 64-bit value from memory and swap its bytes. 512 /// 513 /// \headerfile <x86intrin.h> 514 /// 515 /// This intrinsic corresponds to the MOVBE instruction. 516 /// 517 /// \param __P 518 /// A pointer to the 64-bit value to load. 519 /// \returns The byte-swapped value. 520 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 521 _loadbe_i64(void const * __P) { 522 struct __loadu_i64 { 523 unsigned long long __v; 524 } __attribute__((__packed__, __may_alias__)); 525 return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v); 526 } 527 528 /// Swap the bytes of a 64-bit value and store it to memory. 529 /// 530 /// \headerfile <x86intrin.h> 531 /// 532 /// This intrinsic corresponds to the MOVBE instruction. 533 /// 534 /// \param __P 535 /// A pointer to the memory for storing the swapped value. 536 /// \param __D 537 /// The 64-bit value to be byte-swapped. 538 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 539 _storebe_i64(void * __P, long long __D) { 540 struct __storeu_i64 { 541 unsigned long long __v; 542 } __attribute__((__packed__, __may_alias__)); 543 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D); 544 } 545 #endif 546 #endif /* __MOVBE */ 547 548 #if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__) 549 #include <rtmintrin.h> 550 #include <xtestintrin.h> 551 #endif 552 553 #if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__) 554 #include <shaintrin.h> 555 #endif 556 557 #if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__) 558 #include <fxsrintrin.h> 559 #endif 560 561 /* No feature check desired due to internal MSC_VER checks */ 562 #include <xsaveintrin.h> 563 564 #if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__) 565 #include <xsaveoptintrin.h> 566 #endif 567 568 #if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__) 569 #include <xsavecintrin.h> 570 #endif 571 572 #if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__) 573 #include <xsavesintrin.h> 574 #endif 575 576 #if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__) 577 #include <cetintrin.h> 578 #endif 579 580 /* Intrinsics inside adcintrin.h are available at all times. */ 581 #include <adcintrin.h> 582 583 #if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__) 584 #include <adxintrin.h> 585 #endif 586 587 #if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__) 588 #include <rdseedintrin.h> 589 #endif 590 591 #if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__) 592 #include <wbnoinvdintrin.h> 593 #endif 594 595 #if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__) 596 #include <cldemoteintrin.h> 597 #endif 598 599 #if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__) 600 #include <waitpkgintrin.h> 601 #endif 602 603 #if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) || \ 604 defined(__MOVDIR64B__) 605 #include <movdirintrin.h> 606 #endif 607 608 #if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__) 609 #include <pconfigintrin.h> 610 #endif 611 612 #if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__) 613 #include <sgxintrin.h> 614 #endif 615 616 #if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__) 617 #include <ptwriteintrin.h> 618 #endif 619 620 #if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__) 621 #include <invpcidintrin.h> 622 #endif 623 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__) 624 #include <amxfp16intrin.h> 625 #endif 626 627 #if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) || \ 628 defined(__WIDEKL__) 629 #include <keylockerintrin.h> 630 #endif 631 632 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) || \ 633 defined(__AMX_INT8__) || defined(__AMX_BF16__) 634 #include <amxintrin.h> 635 #endif 636 637 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__) 638 #include <amxcomplexintrin.h> 639 #endif 640 641 #if !defined(__SCE__) || __has_feature(modules) || \ 642 defined(__AVX512VP2INTERSECT__) 643 #include <avx512vp2intersectintrin.h> 644 #endif 645 646 #if !defined(__SCE__) || __has_feature(modules) || \ 647 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) 648 #include <avx512vlvp2intersectintrin.h> 649 #endif 650 651 #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__) 652 #include <enqcmdintrin.h> 653 #endif 654 655 #if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__) 656 #include <serializeintrin.h> 657 #endif 658 659 #if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__) 660 #include <tsxldtrkintrin.h> 661 #endif 662 663 #if defined(_MSC_VER) && __has_extension(gnu_asm) 664 /* Define the default attributes for these intrinsics */ 665 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) 666 #ifdef __cplusplus 667 extern "C" { 668 #endif 669 /*----------------------------------------------------------------------------*\ 670 |* Interlocked Exchange HLE 671 \*----------------------------------------------------------------------------*/ 672 #if defined(__i386__) || defined(__x86_64__) 673 static __inline__ long __DEFAULT_FN_ATTRS 674 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { 675 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" 676 : "+r" (_Value), "+m" (*_Target) :: "memory"); 677 return _Value; 678 } 679 static __inline__ long __DEFAULT_FN_ATTRS 680 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { 681 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" 682 : "+r" (_Value), "+m" (*_Target) :: "memory"); 683 return _Value; 684 } 685 #endif 686 #if defined(__x86_64__) 687 static __inline__ __int64 __DEFAULT_FN_ATTRS 688 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { 689 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" 690 : "+r" (_Value), "+m" (*_Target) :: "memory"); 691 return _Value; 692 } 693 static __inline__ __int64 __DEFAULT_FN_ATTRS 694 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { 695 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" 696 : "+r" (_Value), "+m" (*_Target) :: "memory"); 697 return _Value; 698 } 699 #endif 700 /*----------------------------------------------------------------------------*\ 701 |* Interlocked Compare Exchange HLE 702 \*----------------------------------------------------------------------------*/ 703 #if defined(__i386__) || defined(__x86_64__) 704 static __inline__ long __DEFAULT_FN_ATTRS 705 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, 706 long _Exchange, long _Comparand) { 707 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" 708 : "+a" (_Comparand), "+m" (*_Destination) 709 : "r" (_Exchange) : "memory"); 710 return _Comparand; 711 } 712 static __inline__ long __DEFAULT_FN_ATTRS 713 _InterlockedCompareExchange_HLERelease(long volatile *_Destination, 714 long _Exchange, long _Comparand) { 715 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" 716 : "+a" (_Comparand), "+m" (*_Destination) 717 : "r" (_Exchange) : "memory"); 718 return _Comparand; 719 } 720 #endif 721 #if defined(__x86_64__) 722 static __inline__ __int64 __DEFAULT_FN_ATTRS 723 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, 724 __int64 _Exchange, __int64 _Comparand) { 725 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" 726 : "+a" (_Comparand), "+m" (*_Destination) 727 : "r" (_Exchange) : "memory"); 728 return _Comparand; 729 } 730 static __inline__ __int64 __DEFAULT_FN_ATTRS 731 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, 732 __int64 _Exchange, __int64 _Comparand) { 733 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" 734 : "+a" (_Comparand), "+m" (*_Destination) 735 : "r" (_Exchange) : "memory"); 736 return _Comparand; 737 } 738 #endif 739 #ifdef __cplusplus 740 } 741 #endif 742 743 #undef __DEFAULT_FN_ATTRS 744 745 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ 746 747 #endif /* __IMMINTRIN_H */ 748