1 /*===---- immintrin.h - Intel intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __IMMINTRIN_H 11 #define __IMMINTRIN_H 12 13 #if !defined(__i386__) && !defined(__x86_64__) 14 #error "This header is only meant to be used on x86 and x64 architecture" 15 #endif 16 17 #include <x86gprintrin.h> 18 19 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 20 defined(__MMX__) 21 #include <mmintrin.h> 22 #endif 23 24 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 25 defined(__SSE__) 26 #include <xmmintrin.h> 27 #endif 28 29 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 30 defined(__SSE2__) 31 #include <emmintrin.h> 32 #endif 33 34 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 35 defined(__SSE3__) 36 #include <pmmintrin.h> 37 #endif 38 39 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 40 defined(__SSSE3__) 41 #include <tmmintrin.h> 42 #endif 43 44 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 45 (defined(__SSE4_2__) || defined(__SSE4_1__)) 46 #include <smmintrin.h> 47 #endif 48 49 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 50 (defined(__AES__) || defined(__PCLMUL__)) 51 #include <wmmintrin.h> 52 #endif 53 54 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 55 defined(__CLFLUSHOPT__) 56 #include <clflushoptintrin.h> 57 #endif 58 59 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 60 defined(__CLWB__) 61 #include <clwbintrin.h> 62 #endif 63 64 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 65 defined(__AVX__) 66 #include <avxintrin.h> 67 #endif 68 69 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 70 defined(__AVX2__) 71 #include <avx2intrin.h> 72 #endif 73 74 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 75 defined(__F16C__) 76 #include <f16cintrin.h> 77 #endif 78 79 /* No feature check desired due to internal checks */ 80 #include <bmiintrin.h> 81 82 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 83 defined(__BMI2__) 84 #include <bmi2intrin.h> 85 #endif 86 87 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 88 defined(__LZCNT__) 89 #include <lzcntintrin.h> 90 #endif 91 92 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 93 defined(__POPCNT__) 94 #include <popcntintrin.h> 95 #endif 96 97 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 98 defined(__FMA__) 99 #include <fmaintrin.h> 100 #endif 101 102 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 103 defined(__AVX512F__) 104 #include <avx512fintrin.h> 105 #endif 106 107 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 108 defined(__AVX512VL__) 109 #include <avx512vlintrin.h> 110 #endif 111 112 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 113 defined(__AVX512BW__) 114 #include <avx512bwintrin.h> 115 #endif 116 117 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 118 defined(__AVX512BITALG__) 119 #include <avx512bitalgintrin.h> 120 #endif 121 122 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 123 defined(__AVX512CD__) 124 #include <avx512cdintrin.h> 125 #endif 126 127 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 128 defined(__AVX512VPOPCNTDQ__) 129 #include <avx512vpopcntdqintrin.h> 130 #endif 131 132 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 133 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) 134 #include <avx512vpopcntdqvlintrin.h> 135 #endif 136 137 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 138 defined(__AVX512VNNI__) 139 #include <avx512vnniintrin.h> 140 #endif 141 142 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 143 (defined(__AVX512VL__) && defined(__AVX512VNNI__)) 144 #include <avx512vlvnniintrin.h> 145 #endif 146 147 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 148 defined(__AVXVNNI__) 149 #include <avxvnniintrin.h> 150 #endif 151 152 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 153 defined(__AVX512DQ__) 154 #include <avx512dqintrin.h> 155 #endif 156 157 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 158 (defined(__AVX512VL__) && defined(__AVX512BITALG__)) 159 #include <avx512vlbitalgintrin.h> 160 #endif 161 162 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 163 (defined(__AVX512VL__) && defined(__AVX512BW__)) 164 #include <avx512vlbwintrin.h> 165 #endif 166 167 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 168 (defined(__AVX512VL__) && defined(__AVX512CD__)) 169 #include <avx512vlcdintrin.h> 170 #endif 171 172 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 173 (defined(__AVX512VL__) && defined(__AVX512DQ__)) 174 #include <avx512vldqintrin.h> 175 #endif 176 177 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 178 defined(__AVX512ER__) 179 #include <avx512erintrin.h> 180 #endif 181 182 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 183 defined(__AVX512IFMA__) 184 #include <avx512ifmaintrin.h> 185 #endif 186 187 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 188 (defined(__AVX512IFMA__) && defined(__AVX512VL__)) 189 #include <avx512ifmavlintrin.h> 190 #endif 191 192 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 193 defined(__AVXIFMA__) 194 #include <avxifmaintrin.h> 195 #endif 196 197 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 198 defined(__AVX512VBMI__) 199 #include <avx512vbmiintrin.h> 200 #endif 201 202 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 203 (defined(__AVX512VBMI__) && defined(__AVX512VL__)) 204 #include <avx512vbmivlintrin.h> 205 #endif 206 207 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 208 defined(__AVX512VBMI2__) 209 #include <avx512vbmi2intrin.h> 210 #endif 211 212 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 213 (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) 214 #include <avx512vlvbmi2intrin.h> 215 #endif 216 217 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 218 defined(__AVX512PF__) 219 #include <avx512pfintrin.h> 220 #endif 221 222 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 223 defined(__AVX512FP16__) 224 #include <avx512fp16intrin.h> 225 #endif 226 227 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 228 (defined(__AVX512VL__) && defined(__AVX512FP16__)) 229 #include <avx512vlfp16intrin.h> 230 #endif 231 232 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 233 defined(__AVX512BF16__) 234 #include <avx512bf16intrin.h> 235 #endif 236 237 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 238 (defined(__AVX512VL__) && defined(__AVX512BF16__)) 239 #include <avx512vlbf16intrin.h> 240 #endif 241 242 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 243 defined(__PKU__) 244 #include <pkuintrin.h> 245 #endif 246 247 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 248 defined(__VPCLMULQDQ__) 249 #include <vpclmulqdqintrin.h> 250 #endif 251 252 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 253 defined(__VAES__) 254 #include <vaesintrin.h> 255 #endif 256 257 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 258 defined(__GFNI__) 259 #include <gfniintrin.h> 260 #endif 261 262 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 263 defined(__AVXVNNIINT8__) 264 #include <avxvnniint8intrin.h> 265 #endif 266 267 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 268 defined(__AVXNECONVERT__) 269 #include <avxneconvertintrin.h> 270 #endif 271 272 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 273 defined(__SHA512__) 274 #include <sha512intrin.h> 275 #endif 276 277 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 278 defined(__SM3__) 279 #include <sm3intrin.h> 280 #endif 281 282 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 283 defined(__SM4__) 284 #include <sm4intrin.h> 285 #endif 286 287 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 288 defined(__AVXVNNIINT16__) 289 #include <avxvnniint16intrin.h> 290 #endif 291 292 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 293 defined(__RDPID__) 294 /// Reads the value of the IA32_TSC_AUX MSR (0xc0000103). 295 /// 296 /// \headerfile <immintrin.h> 297 /// 298 /// This intrinsic corresponds to the <c> RDPID </c> instruction. 299 /// 300 /// \returns The 32-bit contents of the MSR. 301 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) 302 _rdpid_u32(void) { 303 return __builtin_ia32_rdpid(); 304 } 305 #endif // __RDPID__ 306 307 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 308 defined(__RDRND__) 309 /// Returns a 16-bit hardware-generated random value. 310 /// 311 /// \headerfile <immintrin.h> 312 /// 313 /// This intrinsic corresponds to the <c> RDRAND </c> instruction. 314 /// 315 /// \param __p 316 /// A pointer to a 16-bit memory location to place the random value. 317 /// \returns 1 if the value was successfully generated, 0 otherwise. 318 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 319 _rdrand16_step(unsigned short *__p) 320 { 321 return (int)__builtin_ia32_rdrand16_step(__p); 322 } 323 324 /// Returns a 32-bit hardware-generated random value. 325 /// 326 /// \headerfile <immintrin.h> 327 /// 328 /// This intrinsic corresponds to the <c> RDRAND </c> instruction. 329 /// 330 /// \param __p 331 /// A pointer to a 32-bit memory location to place the random value. 332 /// \returns 1 if the value was successfully generated, 0 otherwise. 333 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 334 _rdrand32_step(unsigned int *__p) 335 { 336 return (int)__builtin_ia32_rdrand32_step(__p); 337 } 338 339 /// Returns a 64-bit hardware-generated random value. 340 /// 341 /// \headerfile <immintrin.h> 342 /// 343 /// This intrinsic corresponds to the <c> RDRAND </c> instruction. 344 /// 345 /// \param __p 346 /// A pointer to a 64-bit memory location to place the random value. 347 /// \returns 1 if the value was successfully generated, 0 otherwise. 348 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 349 _rdrand64_step(unsigned long long *__p) 350 { 351 #ifdef __x86_64__ 352 return (int)__builtin_ia32_rdrand64_step(__p); 353 #else 354 // We need to emulate the functionality of 64-bit rdrand with 2 32-bit 355 // rdrand instructions. 356 unsigned int __lo, __hi; 357 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo); 358 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi); 359 if (__res_lo && __res_hi) { 360 *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; 361 return 1; 362 } else { 363 *__p = 0; 364 return 0; 365 } 366 #endif 367 } 368 #endif /* __RDRND__ */ 369 370 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 371 defined(__FSGSBASE__) 372 #ifdef __x86_64__ 373 /// Reads the FS base register. 374 /// 375 /// \headerfile <immintrin.h> 376 /// 377 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. 378 /// 379 /// \returns The lower 32 bits of the FS base register. 380 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 381 _readfsbase_u32(void) 382 { 383 return __builtin_ia32_rdfsbase32(); 384 } 385 386 /// Reads the FS base register. 387 /// 388 /// \headerfile <immintrin.h> 389 /// 390 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. 391 /// 392 /// \returns The contents of the FS base register. 393 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 394 _readfsbase_u64(void) 395 { 396 return __builtin_ia32_rdfsbase64(); 397 } 398 399 /// Reads the GS base register. 400 /// 401 /// \headerfile <immintrin.h> 402 /// 403 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. 404 /// 405 /// \returns The lower 32 bits of the GS base register. 406 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 407 _readgsbase_u32(void) 408 { 409 return __builtin_ia32_rdgsbase32(); 410 } 411 412 /// Reads the GS base register. 413 /// 414 /// \headerfile <immintrin.h> 415 /// 416 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. 417 /// 418 /// \returns The contents of the GS base register. 419 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 420 _readgsbase_u64(void) 421 { 422 return __builtin_ia32_rdgsbase64(); 423 } 424 425 /// Modifies the FS base register. 426 /// 427 /// \headerfile <immintrin.h> 428 /// 429 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. 430 /// 431 /// \param __V 432 /// Value to use for the lower 32 bits of the FS base register. 433 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 434 _writefsbase_u32(unsigned int __V) 435 { 436 __builtin_ia32_wrfsbase32(__V); 437 } 438 439 /// Modifies the FS base register. 440 /// 441 /// \headerfile <immintrin.h> 442 /// 443 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. 444 /// 445 /// \param __V 446 /// Value to use for the FS base register. 447 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 448 _writefsbase_u64(unsigned long long __V) 449 { 450 __builtin_ia32_wrfsbase64(__V); 451 } 452 453 /// Modifies the GS base register. 454 /// 455 /// \headerfile <immintrin.h> 456 /// 457 /// This intrinsic corresponds to the <c> WRGSBASE </c> instruction. 458 /// 459 /// \param __V 460 /// Value to use for the lower 32 bits of the GS base register. 461 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 462 _writegsbase_u32(unsigned int __V) 463 { 464 __builtin_ia32_wrgsbase32(__V); 465 } 466 467 /// Modifies the GS base register. 468 /// 469 /// \headerfile <immintrin.h> 470 /// 471 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. 472 /// 473 /// \param __V 474 /// Value to use for GS base register. 475 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 476 _writegsbase_u64(unsigned long long __V) 477 { 478 __builtin_ia32_wrgsbase64(__V); 479 } 480 481 #endif 482 #endif /* __FSGSBASE__ */ 483 484 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 485 defined(__MOVBE__) 486 487 /* The structs used below are to force the load/store to be unaligned. This 488 * is accomplished with the __packed__ attribute. The __may_alias__ prevents 489 * tbaa metadata from being generated based on the struct and the type of the 490 * field inside of it. 491 */ 492 493 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 494 _loadbe_i16(void const * __P) { 495 struct __loadu_i16 { 496 unsigned short __v; 497 } __attribute__((__packed__, __may_alias__)); 498 return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v); 499 } 500 501 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 502 _storebe_i16(void * __P, short __D) { 503 struct __storeu_i16 { 504 unsigned short __v; 505 } __attribute__((__packed__, __may_alias__)); 506 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D); 507 } 508 509 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 510 _loadbe_i32(void const * __P) { 511 struct __loadu_i32 { 512 unsigned int __v; 513 } __attribute__((__packed__, __may_alias__)); 514 return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v); 515 } 516 517 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 518 _storebe_i32(void * __P, int __D) { 519 struct __storeu_i32 { 520 unsigned int __v; 521 } __attribute__((__packed__, __may_alias__)); 522 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D); 523 } 524 525 #ifdef __x86_64__ 526 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 527 _loadbe_i64(void const * __P) { 528 struct __loadu_i64 { 529 unsigned long long __v; 530 } __attribute__((__packed__, __may_alias__)); 531 return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v); 532 } 533 534 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 535 _storebe_i64(void * __P, long long __D) { 536 struct __storeu_i64 { 537 unsigned long long __v; 538 } __attribute__((__packed__, __may_alias__)); 539 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D); 540 } 541 #endif 542 #endif /* __MOVBE */ 543 544 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 545 defined(__RTM__) 546 #include <rtmintrin.h> 547 #include <xtestintrin.h> 548 #endif 549 550 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 551 defined(__SHA__) 552 #include <shaintrin.h> 553 #endif 554 555 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 556 defined(__FXSR__) 557 #include <fxsrintrin.h> 558 #endif 559 560 /* No feature check desired due to internal MSC_VER checks */ 561 #include <xsaveintrin.h> 562 563 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 564 defined(__XSAVEOPT__) 565 #include <xsaveoptintrin.h> 566 #endif 567 568 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 569 defined(__XSAVEC__) 570 #include <xsavecintrin.h> 571 #endif 572 573 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 574 defined(__XSAVES__) 575 #include <xsavesintrin.h> 576 #endif 577 578 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 579 defined(__SHSTK__) 580 #include <cetintrin.h> 581 #endif 582 583 /* Intrinsics inside adcintrin.h are available at all times. */ 584 #include <adcintrin.h> 585 586 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 587 defined(__ADX__) 588 #include <adxintrin.h> 589 #endif 590 591 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 592 defined(__RDSEED__) 593 #include <rdseedintrin.h> 594 #endif 595 596 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 597 defined(__WBNOINVD__) 598 #include <wbnoinvdintrin.h> 599 #endif 600 601 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 602 defined(__CLDEMOTE__) 603 #include <cldemoteintrin.h> 604 #endif 605 606 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 607 defined(__WAITPKG__) 608 #include <waitpkgintrin.h> 609 #endif 610 611 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 612 defined(__MOVDIRI__) || defined(__MOVDIR64B__) 613 #include <movdirintrin.h> 614 #endif 615 616 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 617 defined(__PCONFIG__) 618 #include <pconfigintrin.h> 619 #endif 620 621 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 622 defined(__SGX__) 623 #include <sgxintrin.h> 624 #endif 625 626 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 627 defined(__PTWRITE__) 628 #include <ptwriteintrin.h> 629 #endif 630 631 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 632 defined(__INVPCID__) 633 #include <invpcidintrin.h> 634 #endif 635 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 636 defined(__AMX_FP16__) 637 #include <amxfp16intrin.h> 638 #endif 639 640 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 641 defined(__KL__) || defined(__WIDEKL__) 642 #include <keylockerintrin.h> 643 #endif 644 645 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 646 defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__) 647 #include <amxintrin.h> 648 #endif 649 650 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 651 defined(__AMX_COMPLEX__) 652 #include <amxcomplexintrin.h> 653 #endif 654 655 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 656 defined(__AVX512VP2INTERSECT__) 657 #include <avx512vp2intersectintrin.h> 658 #endif 659 660 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 661 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) 662 #include <avx512vlvp2intersectintrin.h> 663 #endif 664 665 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 666 defined(__ENQCMD__) 667 #include <enqcmdintrin.h> 668 #endif 669 670 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 671 defined(__SERIALIZE__) 672 #include <serializeintrin.h> 673 #endif 674 675 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 676 defined(__TSXLDTRK__) 677 #include <tsxldtrkintrin.h> 678 #endif 679 680 #if defined(_MSC_VER) && __has_extension(gnu_asm) 681 /* Define the default attributes for these intrinsics */ 682 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) 683 #ifdef __cplusplus 684 extern "C" { 685 #endif 686 /*----------------------------------------------------------------------------*\ 687 |* Interlocked Exchange HLE 688 \*----------------------------------------------------------------------------*/ 689 #if defined(__i386__) || defined(__x86_64__) 690 static __inline__ long __DEFAULT_FN_ATTRS 691 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { 692 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" 693 : "+r" (_Value), "+m" (*_Target) :: "memory"); 694 return _Value; 695 } 696 static __inline__ long __DEFAULT_FN_ATTRS 697 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { 698 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" 699 : "+r" (_Value), "+m" (*_Target) :: "memory"); 700 return _Value; 701 } 702 #endif 703 #if defined(__x86_64__) 704 static __inline__ __int64 __DEFAULT_FN_ATTRS 705 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { 706 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" 707 : "+r" (_Value), "+m" (*_Target) :: "memory"); 708 return _Value; 709 } 710 static __inline__ __int64 __DEFAULT_FN_ATTRS 711 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { 712 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" 713 : "+r" (_Value), "+m" (*_Target) :: "memory"); 714 return _Value; 715 } 716 #endif 717 /*----------------------------------------------------------------------------*\ 718 |* Interlocked Compare Exchange HLE 719 \*----------------------------------------------------------------------------*/ 720 #if defined(__i386__) || defined(__x86_64__) 721 static __inline__ long __DEFAULT_FN_ATTRS 722 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, 723 long _Exchange, long _Comparand) { 724 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" 725 : "+a" (_Comparand), "+m" (*_Destination) 726 : "r" (_Exchange) : "memory"); 727 return _Comparand; 728 } 729 static __inline__ long __DEFAULT_FN_ATTRS 730 _InterlockedCompareExchange_HLERelease(long volatile *_Destination, 731 long _Exchange, long _Comparand) { 732 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" 733 : "+a" (_Comparand), "+m" (*_Destination) 734 : "r" (_Exchange) : "memory"); 735 return _Comparand; 736 } 737 #endif 738 #if defined(__x86_64__) 739 static __inline__ __int64 __DEFAULT_FN_ATTRS 740 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, 741 __int64 _Exchange, __int64 _Comparand) { 742 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" 743 : "+a" (_Comparand), "+m" (*_Destination) 744 : "r" (_Exchange) : "memory"); 745 return _Comparand; 746 } 747 static __inline__ __int64 __DEFAULT_FN_ATTRS 748 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, 749 __int64 _Exchange, __int64 _Comparand) { 750 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" 751 : "+a" (_Comparand), "+m" (*_Destination) 752 : "r" (_Exchange) : "memory"); 753 return _Comparand; 754 } 755 #endif 756 #ifdef __cplusplus 757 } 758 #endif 759 760 #undef __DEFAULT_FN_ATTRS 761 762 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ 763 764 #endif /* __IMMINTRIN_H */ 765