1 /*===---- immintrin.h - Intel intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __IMMINTRIN_H 11 #define __IMMINTRIN_H 12 13 #if !defined(__i386__) && !defined(__x86_64__) 14 #error "This header is only meant to be used on x86 and x64 architecture" 15 #endif 16 17 #include <x86gprintrin.h> 18 19 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 20 defined(__MMX__) 21 #include <mmintrin.h> 22 #endif 23 24 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 25 defined(__SSE__) 26 #include <xmmintrin.h> 27 #endif 28 29 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 30 defined(__SSE2__) 31 #include <emmintrin.h> 32 #endif 33 34 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 35 defined(__SSE3__) 36 #include <pmmintrin.h> 37 #endif 38 39 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 40 defined(__SSSE3__) 41 #include <tmmintrin.h> 42 #endif 43 44 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 45 (defined(__SSE4_2__) || defined(__SSE4_1__)) 46 #include <smmintrin.h> 47 #endif 48 49 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 50 (defined(__AES__) || defined(__PCLMUL__)) 51 #include <wmmintrin.h> 52 #endif 53 54 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 55 defined(__CLFLUSHOPT__) 56 #include <clflushoptintrin.h> 57 #endif 58 59 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 60 defined(__CLWB__) 61 #include <clwbintrin.h> 62 #endif 63 64 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 65 defined(__AVX__) 66 #include <avxintrin.h> 67 #endif 68 69 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 70 defined(__AVX2__) 71 #include <avx2intrin.h> 72 #endif 73 74 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 75 defined(__F16C__) 76 #include <f16cintrin.h> 77 #endif 78 79 /* No feature check desired due to internal checks */ 80 #include <bmiintrin.h> 81 82 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 83 defined(__BMI2__) 84 #include <bmi2intrin.h> 85 #endif 86 87 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 88 defined(__LZCNT__) 89 #include <lzcntintrin.h> 90 #endif 91 92 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 93 defined(__POPCNT__) 94 #include <popcntintrin.h> 95 #endif 96 97 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 98 defined(__FMA__) 99 #include <fmaintrin.h> 100 #endif 101 102 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 103 defined(__AVX512F__) 104 #include <avx512fintrin.h> 105 #endif 106 107 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 108 defined(__AVX512VL__) 109 #include <avx512vlintrin.h> 110 #endif 111 112 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 113 defined(__AVX512BW__) 114 #include <avx512bwintrin.h> 115 #endif 116 117 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 118 defined(__AVX512BITALG__) 119 #include <avx512bitalgintrin.h> 120 #endif 121 122 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 123 defined(__AVX512CD__) 124 #include <avx512cdintrin.h> 125 #endif 126 127 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 128 defined(__AVX512VPOPCNTDQ__) 129 #include <avx512vpopcntdqintrin.h> 130 #endif 131 132 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 133 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) 134 #include <avx512vpopcntdqvlintrin.h> 135 #endif 136 137 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 138 defined(__AVX512VNNI__) 139 #include <avx512vnniintrin.h> 140 #endif 141 142 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 143 (defined(__AVX512VL__) && defined(__AVX512VNNI__)) 144 #include <avx512vlvnniintrin.h> 145 #endif 146 147 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 148 defined(__AVXVNNI__) 149 #include <avxvnniintrin.h> 150 #endif 151 152 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 153 defined(__AVX512DQ__) 154 #include <avx512dqintrin.h> 155 #endif 156 157 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 158 (defined(__AVX512VL__) && defined(__AVX512BITALG__)) 159 #include <avx512vlbitalgintrin.h> 160 #endif 161 162 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 163 (defined(__AVX512VL__) && defined(__AVX512BW__)) 164 #include <avx512vlbwintrin.h> 165 #endif 166 167 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 168 (defined(__AVX512VL__) && defined(__AVX512CD__)) 169 #include <avx512vlcdintrin.h> 170 #endif 171 172 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 173 (defined(__AVX512VL__) && defined(__AVX512DQ__)) 174 #include <avx512vldqintrin.h> 175 #endif 176 177 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 178 defined(__AVX512ER__) 179 #include <avx512erintrin.h> 180 #endif 181 182 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 183 defined(__AVX512IFMA__) 184 #include <avx512ifmaintrin.h> 185 #endif 186 187 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 188 (defined(__AVX512IFMA__) && defined(__AVX512VL__)) 189 #include <avx512ifmavlintrin.h> 190 #endif 191 192 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 193 defined(__AVXIFMA__) 194 #include <avxifmaintrin.h> 195 #endif 196 197 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 198 defined(__AVX512VBMI__) 199 #include <avx512vbmiintrin.h> 200 #endif 201 202 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 203 (defined(__AVX512VBMI__) && defined(__AVX512VL__)) 204 #include <avx512vbmivlintrin.h> 205 #endif 206 207 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 208 defined(__AVX512VBMI2__) 209 #include <avx512vbmi2intrin.h> 210 #endif 211 212 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 213 (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) 214 #include <avx512vlvbmi2intrin.h> 215 #endif 216 217 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 218 defined(__AVX512PF__) 219 #include <avx512pfintrin.h> 220 #endif 221 222 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 223 defined(__AVX512FP16__) 224 #include <avx512fp16intrin.h> 225 #endif 226 227 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 228 (defined(__AVX512VL__) && defined(__AVX512FP16__)) 229 #include <avx512vlfp16intrin.h> 230 #endif 231 232 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 233 defined(__AVX512BF16__) 234 #include <avx512bf16intrin.h> 235 #endif 236 237 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 238 (defined(__AVX512VL__) && defined(__AVX512BF16__)) 239 #include <avx512vlbf16intrin.h> 240 #endif 241 242 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 243 defined(__PKU__) 244 #include <pkuintrin.h> 245 #endif 246 247 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 248 defined(__VPCLMULQDQ__) 249 #include <vpclmulqdqintrin.h> 250 #endif 251 252 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 253 defined(__VAES__) 254 #include <vaesintrin.h> 255 #endif 256 257 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 258 defined(__GFNI__) 259 #include <gfniintrin.h> 260 #endif 261 262 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 263 defined(__AVXVNNIINT8__) 264 #include <avxvnniint8intrin.h> 265 #endif 266 267 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 268 defined(__AVXNECONVERT__) 269 #include <avxneconvertintrin.h> 270 #endif 271 272 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 273 defined(__SHA512__) 274 #include <sha512intrin.h> 275 #endif 276 277 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 278 defined(__SM3__) 279 #include <sm3intrin.h> 280 #endif 281 282 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 283 defined(__SM4__) 284 #include <sm4intrin.h> 285 #endif 286 287 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 288 defined(__AVXVNNIINT16__) 289 #include <avxvnniint16intrin.h> 290 #endif 291 292 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 293 defined(__RDPID__) 294 /// Reads the value of the IA32_TSC_AUX MSR (0xc0000103). 295 /// 296 /// \headerfile <immintrin.h> 297 /// 298 /// This intrinsic corresponds to the <c> RDPID </c> instruction. 299 /// 300 /// \returns The 32-bit contents of the MSR. 301 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) 302 _rdpid_u32(void) { 303 return __builtin_ia32_rdpid(); 304 } 305 #endif // __RDPID__ 306 307 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 308 defined(__RDRND__) 309 /// Returns a 16-bit hardware-generated random value. 310 /// 311 /// \headerfile <immintrin.h> 312 /// 313 /// This intrinsic corresponds to the <c> RDRAND </c> instruction. 314 /// 315 /// \param __p 316 /// A pointer to a 16-bit memory location to place the random value. 317 /// \returns 1 if the value was successfully generated, 0 otherwise. 318 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 319 _rdrand16_step(unsigned short *__p) 320 { 321 return (int)__builtin_ia32_rdrand16_step(__p); 322 } 323 324 /// Returns a 32-bit hardware-generated random value. 325 /// 326 /// \headerfile <immintrin.h> 327 /// 328 /// This intrinsic corresponds to the <c> RDRAND </c> instruction. 329 /// 330 /// \param __p 331 /// A pointer to a 32-bit memory location to place the random value. 332 /// \returns 1 if the value was successfully generated, 0 otherwise. 333 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 334 _rdrand32_step(unsigned int *__p) 335 { 336 return (int)__builtin_ia32_rdrand32_step(__p); 337 } 338 339 /// Returns a 64-bit hardware-generated random value. 340 /// 341 /// \headerfile <immintrin.h> 342 /// 343 /// This intrinsic corresponds to the <c> RDRAND </c> instruction. 344 /// 345 /// \param __p 346 /// A pointer to a 64-bit memory location to place the random value. 347 /// \returns 1 if the value was successfully generated, 0 otherwise. 348 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 349 _rdrand64_step(unsigned long long *__p) 350 { 351 #ifdef __x86_64__ 352 return (int)__builtin_ia32_rdrand64_step(__p); 353 #else 354 // We need to emulate the functionality of 64-bit rdrand with 2 32-bit 355 // rdrand instructions. 356 unsigned int __lo, __hi; 357 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo); 358 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi); 359 if (__res_lo && __res_hi) { 360 *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; 361 return 1; 362 } else { 363 *__p = 0; 364 return 0; 365 } 366 #endif 367 } 368 #endif /* __RDRND__ */ 369 370 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 371 defined(__FSGSBASE__) 372 #ifdef __x86_64__ 373 /// Reads the FS base register. 374 /// 375 /// \headerfile <immintrin.h> 376 /// 377 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. 378 /// 379 /// \returns The lower 32 bits of the FS base register. 380 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 381 _readfsbase_u32(void) 382 { 383 return __builtin_ia32_rdfsbase32(); 384 } 385 386 /// Reads the FS base register. 387 /// 388 /// \headerfile <immintrin.h> 389 /// 390 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. 391 /// 392 /// \returns The contents of the FS base register. 393 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 394 _readfsbase_u64(void) 395 { 396 return __builtin_ia32_rdfsbase64(); 397 } 398 399 /// Reads the GS base register. 400 /// 401 /// \headerfile <immintrin.h> 402 /// 403 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. 404 /// 405 /// \returns The lower 32 bits of the GS base register. 406 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 407 _readgsbase_u32(void) 408 { 409 return __builtin_ia32_rdgsbase32(); 410 } 411 412 /// Reads the GS base register. 413 /// 414 /// \headerfile <immintrin.h> 415 /// 416 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. 417 /// 418 /// \returns The contents of the GS base register. 419 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 420 _readgsbase_u64(void) 421 { 422 return __builtin_ia32_rdgsbase64(); 423 } 424 425 /// Modifies the FS base register. 426 /// 427 /// \headerfile <immintrin.h> 428 /// 429 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. 430 /// 431 /// \param __V 432 /// Value to use for the lower 32 bits of the FS base register. 433 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 434 _writefsbase_u32(unsigned int __V) 435 { 436 __builtin_ia32_wrfsbase32(__V); 437 } 438 439 /// Modifies the FS base register. 440 /// 441 /// \headerfile <immintrin.h> 442 /// 443 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. 444 /// 445 /// \param __V 446 /// Value to use for the FS base register. 447 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 448 _writefsbase_u64(unsigned long long __V) 449 { 450 __builtin_ia32_wrfsbase64(__V); 451 } 452 453 /// Modifies the GS base register. 454 /// 455 /// \headerfile <immintrin.h> 456 /// 457 /// This intrinsic corresponds to the <c> WRGSBASE </c> instruction. 458 /// 459 /// \param __V 460 /// Value to use for the lower 32 bits of the GS base register. 461 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 462 _writegsbase_u32(unsigned int __V) 463 { 464 __builtin_ia32_wrgsbase32(__V); 465 } 466 467 /// Modifies the GS base register. 468 /// 469 /// \headerfile <immintrin.h> 470 /// 471 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. 472 /// 473 /// \param __V 474 /// Value to use for GS base register. 475 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 476 _writegsbase_u64(unsigned long long __V) 477 { 478 __builtin_ia32_wrgsbase64(__V); 479 } 480 481 #endif 482 #endif /* __FSGSBASE__ */ 483 484 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 485 defined(__MOVBE__) 486 487 /* The structs used below are to force the load/store to be unaligned. This 488 * is accomplished with the __packed__ attribute. The __may_alias__ prevents 489 * tbaa metadata from being generated based on the struct and the type of the 490 * field inside of it. 491 */ 492 493 /// Load a 16-bit value from memory and swap its bytes. 494 /// 495 /// \headerfile <x86intrin.h> 496 /// 497 /// This intrinsic corresponds to the MOVBE instruction. 498 /// 499 /// \param __P 500 /// A pointer to the 16-bit value to load. 501 /// \returns The byte-swapped value. 502 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 503 _loadbe_i16(void const * __P) { 504 struct __loadu_i16 { 505 unsigned short __v; 506 } __attribute__((__packed__, __may_alias__)); 507 return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v); 508 } 509 510 /// Swap the bytes of a 16-bit value and store it to memory. 511 /// 512 /// \headerfile <x86intrin.h> 513 /// 514 /// This intrinsic corresponds to the MOVBE instruction. 515 /// 516 /// \param __P 517 /// A pointer to the memory for storing the swapped value. 518 /// \param __D 519 /// The 16-bit value to be byte-swapped. 520 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 521 _storebe_i16(void * __P, short __D) { 522 struct __storeu_i16 { 523 unsigned short __v; 524 } __attribute__((__packed__, __may_alias__)); 525 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D); 526 } 527 528 /// Load a 32-bit value from memory and swap its bytes. 529 /// 530 /// \headerfile <x86intrin.h> 531 /// 532 /// This intrinsic corresponds to the MOVBE instruction. 533 /// 534 /// \param __P 535 /// A pointer to the 32-bit value to load. 536 /// \returns The byte-swapped value. 537 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 538 _loadbe_i32(void const * __P) { 539 struct __loadu_i32 { 540 unsigned int __v; 541 } __attribute__((__packed__, __may_alias__)); 542 return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v); 543 } 544 545 /// Swap the bytes of a 32-bit value and store it to memory. 546 /// 547 /// \headerfile <x86intrin.h> 548 /// 549 /// This intrinsic corresponds to the MOVBE instruction. 550 /// 551 /// \param __P 552 /// A pointer to the memory for storing the swapped value. 553 /// \param __D 554 /// The 32-bit value to be byte-swapped. 555 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 556 _storebe_i32(void * __P, int __D) { 557 struct __storeu_i32 { 558 unsigned int __v; 559 } __attribute__((__packed__, __may_alias__)); 560 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D); 561 } 562 563 #ifdef __x86_64__ 564 /// Load a 64-bit value from memory and swap its bytes. 565 /// 566 /// \headerfile <x86intrin.h> 567 /// 568 /// This intrinsic corresponds to the MOVBE instruction. 569 /// 570 /// \param __P 571 /// A pointer to the 64-bit value to load. 572 /// \returns The byte-swapped value. 573 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 574 _loadbe_i64(void const * __P) { 575 struct __loadu_i64 { 576 unsigned long long __v; 577 } __attribute__((__packed__, __may_alias__)); 578 return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v); 579 } 580 581 /// Swap the bytes of a 64-bit value and store it to memory. 582 /// 583 /// \headerfile <x86intrin.h> 584 /// 585 /// This intrinsic corresponds to the MOVBE instruction. 586 /// 587 /// \param __P 588 /// A pointer to the memory for storing the swapped value. 589 /// \param __D 590 /// The 64-bit value to be byte-swapped. 591 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 592 _storebe_i64(void * __P, long long __D) { 593 struct __storeu_i64 { 594 unsigned long long __v; 595 } __attribute__((__packed__, __may_alias__)); 596 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D); 597 } 598 #endif 599 #endif /* __MOVBE */ 600 601 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 602 defined(__RTM__) 603 #include <rtmintrin.h> 604 #include <xtestintrin.h> 605 #endif 606 607 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 608 defined(__SHA__) 609 #include <shaintrin.h> 610 #endif 611 612 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 613 defined(__FXSR__) 614 #include <fxsrintrin.h> 615 #endif 616 617 /* No feature check desired due to internal MSC_VER checks */ 618 #include <xsaveintrin.h> 619 620 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 621 defined(__XSAVEOPT__) 622 #include <xsaveoptintrin.h> 623 #endif 624 625 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 626 defined(__XSAVEC__) 627 #include <xsavecintrin.h> 628 #endif 629 630 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 631 defined(__XSAVES__) 632 #include <xsavesintrin.h> 633 #endif 634 635 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 636 defined(__SHSTK__) 637 #include <cetintrin.h> 638 #endif 639 640 /* Intrinsics inside adcintrin.h are available at all times. */ 641 #include <adcintrin.h> 642 643 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 644 defined(__ADX__) 645 #include <adxintrin.h> 646 #endif 647 648 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 649 defined(__RDSEED__) 650 #include <rdseedintrin.h> 651 #endif 652 653 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 654 defined(__WBNOINVD__) 655 #include <wbnoinvdintrin.h> 656 #endif 657 658 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 659 defined(__CLDEMOTE__) 660 #include <cldemoteintrin.h> 661 #endif 662 663 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 664 defined(__WAITPKG__) 665 #include <waitpkgintrin.h> 666 #endif 667 668 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 669 defined(__MOVDIRI__) || defined(__MOVDIR64B__) 670 #include <movdirintrin.h> 671 #endif 672 673 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 674 defined(__PCONFIG__) 675 #include <pconfigintrin.h> 676 #endif 677 678 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 679 defined(__SGX__) 680 #include <sgxintrin.h> 681 #endif 682 683 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 684 defined(__PTWRITE__) 685 #include <ptwriteintrin.h> 686 #endif 687 688 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 689 defined(__INVPCID__) 690 #include <invpcidintrin.h> 691 #endif 692 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 693 defined(__AMX_FP16__) 694 #include <amxfp16intrin.h> 695 #endif 696 697 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 698 defined(__KL__) || defined(__WIDEKL__) 699 #include <keylockerintrin.h> 700 #endif 701 702 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 703 defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__) 704 #include <amxintrin.h> 705 #endif 706 707 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 708 defined(__AMX_COMPLEX__) 709 #include <amxcomplexintrin.h> 710 #endif 711 712 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 713 defined(__AVX512VP2INTERSECT__) 714 #include <avx512vp2intersectintrin.h> 715 #endif 716 717 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 718 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) 719 #include <avx512vlvp2intersectintrin.h> 720 #endif 721 722 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 723 defined(__ENQCMD__) 724 #include <enqcmdintrin.h> 725 #endif 726 727 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 728 defined(__SERIALIZE__) 729 #include <serializeintrin.h> 730 #endif 731 732 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ 733 defined(__TSXLDTRK__) 734 #include <tsxldtrkintrin.h> 735 #endif 736 737 #if defined(_MSC_VER) && __has_extension(gnu_asm) 738 /* Define the default attributes for these intrinsics */ 739 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) 740 #ifdef __cplusplus 741 extern "C" { 742 #endif 743 /*----------------------------------------------------------------------------*\ 744 |* Interlocked Exchange HLE 745 \*----------------------------------------------------------------------------*/ 746 #if defined(__i386__) || defined(__x86_64__) 747 static __inline__ long __DEFAULT_FN_ATTRS 748 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { 749 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" 750 : "+r" (_Value), "+m" (*_Target) :: "memory"); 751 return _Value; 752 } 753 static __inline__ long __DEFAULT_FN_ATTRS 754 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { 755 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" 756 : "+r" (_Value), "+m" (*_Target) :: "memory"); 757 return _Value; 758 } 759 #endif 760 #if defined(__x86_64__) 761 static __inline__ __int64 __DEFAULT_FN_ATTRS 762 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { 763 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" 764 : "+r" (_Value), "+m" (*_Target) :: "memory"); 765 return _Value; 766 } 767 static __inline__ __int64 __DEFAULT_FN_ATTRS 768 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { 769 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" 770 : "+r" (_Value), "+m" (*_Target) :: "memory"); 771 return _Value; 772 } 773 #endif 774 /*----------------------------------------------------------------------------*\ 775 |* Interlocked Compare Exchange HLE 776 \*----------------------------------------------------------------------------*/ 777 #if defined(__i386__) || defined(__x86_64__) 778 static __inline__ long __DEFAULT_FN_ATTRS 779 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, 780 long _Exchange, long _Comparand) { 781 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" 782 : "+a" (_Comparand), "+m" (*_Destination) 783 : "r" (_Exchange) : "memory"); 784 return _Comparand; 785 } 786 static __inline__ long __DEFAULT_FN_ATTRS 787 _InterlockedCompareExchange_HLERelease(long volatile *_Destination, 788 long _Exchange, long _Comparand) { 789 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" 790 : "+a" (_Comparand), "+m" (*_Destination) 791 : "r" (_Exchange) : "memory"); 792 return _Comparand; 793 } 794 #endif 795 #if defined(__x86_64__) 796 static __inline__ __int64 __DEFAULT_FN_ATTRS 797 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, 798 __int64 _Exchange, __int64 _Comparand) { 799 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" 800 : "+a" (_Comparand), "+m" (*_Destination) 801 : "r" (_Exchange) : "memory"); 802 return _Comparand; 803 } 804 static __inline__ __int64 __DEFAULT_FN_ATTRS 805 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, 806 __int64 _Exchange, __int64 _Comparand) { 807 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" 808 : "+a" (_Comparand), "+m" (*_Destination) 809 : "r" (_Exchange) : "memory"); 810 return _Comparand; 811 } 812 #endif 813 #ifdef __cplusplus 814 } 815 #endif 816 817 #undef __DEFAULT_FN_ATTRS 818 819 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ 820 821 #endif /* __IMMINTRIN_H */ 822