1 /*===---- immintrin.h - Intel intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __IMMINTRIN_H 11 #define __IMMINTRIN_H 12 13 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__) 14 #include <mmintrin.h> 15 #endif 16 17 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__) 18 #include <xmmintrin.h> 19 #endif 20 21 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__) 22 #include <emmintrin.h> 23 #endif 24 25 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__) 26 #include <pmmintrin.h> 27 #endif 28 29 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__) 30 #include <tmmintrin.h> 31 #endif 32 33 #if !defined(_MSC_VER) || __has_feature(modules) || \ 34 (defined(__SSE4_2__) || defined(__SSE4_1__)) 35 #include <smmintrin.h> 36 #endif 37 38 #if !defined(_MSC_VER) || __has_feature(modules) || \ 39 (defined(__AES__) || defined(__PCLMUL__)) 40 #include <wmmintrin.h> 41 #endif 42 43 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) 44 #include <clflushoptintrin.h> 45 #endif 46 47 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__) 48 #include <clwbintrin.h> 49 #endif 50 51 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) 52 #include <avxintrin.h> 53 #endif 54 55 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__) 56 #include <avx2intrin.h> 57 #endif 58 59 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__) 60 #include <f16cintrin.h> 61 #endif 62 63 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__) 64 #include <vpclmulqdqintrin.h> 65 #endif 66 67 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) 68 #include <bmiintrin.h> 69 #endif 70 71 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) 72 #include <bmi2intrin.h> 73 #endif 74 75 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__) 76 #include <lzcntintrin.h> 77 #endif 78 79 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__) 80 #include <popcntintrin.h> 81 #endif 82 83 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__) 84 #include <fmaintrin.h> 85 #endif 86 87 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__) 88 #include <avx512fintrin.h> 89 #endif 90 91 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__) 92 #include <avx512vlintrin.h> 93 #endif 94 95 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__) 96 #include <avx512bwintrin.h> 97 #endif 98 99 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__) 100 #include <avx512bitalgintrin.h> 101 #endif 102 103 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) 104 #include <avx512cdintrin.h> 105 #endif 106 107 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) 108 #include <avx512vpopcntdqintrin.h> 109 #endif 110 111 #if !defined(_MSC_VER) || __has_feature(modules) || \ 112 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) 113 #include <avx512vpopcntdqvlintrin.h> 114 #endif 115 116 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__) 117 #include <avx512vnniintrin.h> 118 #endif 119 120 #if !defined(_MSC_VER) || __has_feature(modules) || \ 121 (defined(__AVX512VL__) && defined(__AVX512VNNI__)) 122 #include <avx512vlvnniintrin.h> 123 #endif 124 125 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) 126 #include <avx512dqintrin.h> 127 #endif 128 129 #if !defined(_MSC_VER) || __has_feature(modules) || \ 130 (defined(__AVX512VL__) && defined(__AVX512BITALG__)) 131 #include <avx512vlbitalgintrin.h> 132 #endif 133 134 #if !defined(_MSC_VER) || __has_feature(modules) || \ 135 (defined(__AVX512VL__) && defined(__AVX512BW__)) 136 #include <avx512vlbwintrin.h> 137 #endif 138 139 #if !defined(_MSC_VER) || __has_feature(modules) || \ 140 (defined(__AVX512VL__) && defined(__AVX512CD__)) 141 #include <avx512vlcdintrin.h> 142 #endif 143 144 #if !defined(_MSC_VER) || __has_feature(modules) || \ 145 (defined(__AVX512VL__) && defined(__AVX512DQ__)) 146 #include <avx512vldqintrin.h> 147 #endif 148 149 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__) 150 #include <avx512erintrin.h> 151 #endif 152 153 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__) 154 #include <avx512ifmaintrin.h> 155 #endif 156 157 #if !defined(_MSC_VER) || __has_feature(modules) || \ 158 (defined(__AVX512IFMA__) && defined(__AVX512VL__)) 159 #include <avx512ifmavlintrin.h> 160 #endif 161 162 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__) 163 #include <avx512vbmiintrin.h> 164 #endif 165 166 #if !defined(_MSC_VER) || __has_feature(modules) || \ 167 (defined(__AVX512VBMI__) && defined(__AVX512VL__)) 168 #include <avx512vbmivlintrin.h> 169 #endif 170 171 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__) 172 #include <avx512vbmi2intrin.h> 173 #endif 174 175 #if !defined(_MSC_VER) || __has_feature(modules) || \ 176 (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) 177 #include <avx512vlvbmi2intrin.h> 178 #endif 179 180 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__) 181 #include <avx512pfintrin.h> 182 #endif 183 184 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__) 185 #include <avx512bf16intrin.h> 186 #endif 187 188 #if !defined(_MSC_VER) || __has_feature(modules) || \ 189 (defined(__AVX512VL__) && defined(__AVX512BF16__)) 190 #include <avx512vlbf16intrin.h> 191 #endif 192 193 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__) 194 #include <pkuintrin.h> 195 #endif 196 197 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__) 198 #include <vaesintrin.h> 199 #endif 200 201 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__) 202 #include <gfniintrin.h> 203 #endif 204 205 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__) 206 /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). 207 /// 208 /// \headerfile <immintrin.h> 209 /// 210 /// This intrinsic corresponds to the <c> RDPID </c> instruction. 211 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) 212 _rdpid_u32(void) { 213 return __builtin_ia32_rdpid(); 214 } 215 #endif // __RDPID__ 216 217 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) 218 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 219 _rdrand16_step(unsigned short *__p) 220 { 221 return __builtin_ia32_rdrand16_step(__p); 222 } 223 224 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 225 _rdrand32_step(unsigned int *__p) 226 { 227 return __builtin_ia32_rdrand32_step(__p); 228 } 229 230 #ifdef __x86_64__ 231 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 232 _rdrand64_step(unsigned long long *__p) 233 { 234 return __builtin_ia32_rdrand64_step(__p); 235 } 236 #endif 237 #endif /* __RDRND__ */ 238 239 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__) 240 #ifdef __x86_64__ 241 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 242 _readfsbase_u32(void) 243 { 244 return __builtin_ia32_rdfsbase32(); 245 } 246 247 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 248 _readfsbase_u64(void) 249 { 250 return __builtin_ia32_rdfsbase64(); 251 } 252 253 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 254 _readgsbase_u32(void) 255 { 256 return __builtin_ia32_rdgsbase32(); 257 } 258 259 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 260 _readgsbase_u64(void) 261 { 262 return __builtin_ia32_rdgsbase64(); 263 } 264 265 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 266 _writefsbase_u32(unsigned int __V) 267 { 268 __builtin_ia32_wrfsbase32(__V); 269 } 270 271 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 272 _writefsbase_u64(unsigned long long __V) 273 { 274 __builtin_ia32_wrfsbase64(__V); 275 } 276 277 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 278 _writegsbase_u32(unsigned int __V) 279 { 280 __builtin_ia32_wrgsbase32(__V); 281 } 282 283 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 284 _writegsbase_u64(unsigned long long __V) 285 { 286 __builtin_ia32_wrgsbase64(__V); 287 } 288 289 #endif 290 #endif /* __FSGSBASE__ */ 291 292 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__) 293 294 /* The structs used below are to force the load/store to be unaligned. This 295 * is accomplished with the __packed__ attribute. The __may_alias__ prevents 296 * tbaa metadata from being generated based on the struct and the type of the 297 * field inside of it. 298 */ 299 300 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 301 _loadbe_i16(void const * __P) { 302 struct __loadu_i16 { 303 short __v; 304 } __attribute__((__packed__, __may_alias__)); 305 return __builtin_bswap16(((struct __loadu_i16*)__P)->__v); 306 } 307 308 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 309 _storebe_i16(void * __P, short __D) { 310 struct __storeu_i16 { 311 short __v; 312 } __attribute__((__packed__, __may_alias__)); 313 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D); 314 } 315 316 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 317 _loadbe_i32(void const * __P) { 318 struct __loadu_i32 { 319 int __v; 320 } __attribute__((__packed__, __may_alias__)); 321 return __builtin_bswap32(((struct __loadu_i32*)__P)->__v); 322 } 323 324 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 325 _storebe_i32(void * __P, int __D) { 326 struct __storeu_i32 { 327 int __v; 328 } __attribute__((__packed__, __may_alias__)); 329 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D); 330 } 331 332 #ifdef __x86_64__ 333 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 334 _loadbe_i64(void const * __P) { 335 struct __loadu_i64 { 336 long long __v; 337 } __attribute__((__packed__, __may_alias__)); 338 return __builtin_bswap64(((struct __loadu_i64*)__P)->__v); 339 } 340 341 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 342 _storebe_i64(void * __P, long long __D) { 343 struct __storeu_i64 { 344 long long __v; 345 } __attribute__((__packed__, __may_alias__)); 346 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D); 347 } 348 #endif 349 #endif /* __MOVBE */ 350 351 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) 352 #include <rtmintrin.h> 353 #include <xtestintrin.h> 354 #endif 355 356 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__) 357 #include <shaintrin.h> 358 #endif 359 360 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__) 361 #include <fxsrintrin.h> 362 #endif 363 364 /* No feature check desired due to internal MSC_VER checks */ 365 #include <xsaveintrin.h> 366 367 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__) 368 #include <xsaveoptintrin.h> 369 #endif 370 371 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__) 372 #include <xsavecintrin.h> 373 #endif 374 375 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__) 376 #include <xsavesintrin.h> 377 #endif 378 379 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__) 380 #include <cetintrin.h> 381 #endif 382 383 /* Some intrinsics inside adxintrin.h are available only on processors with ADX, 384 * whereas others are also available at all times. */ 385 #include <adxintrin.h> 386 387 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__) 388 #include <rdseedintrin.h> 389 #endif 390 391 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__) 392 #include <wbnoinvdintrin.h> 393 #endif 394 395 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__) 396 #include <cldemoteintrin.h> 397 #endif 398 399 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__) 400 #include <waitpkgintrin.h> 401 #endif 402 403 #if !defined(_MSC_VER) || __has_feature(modules) || \ 404 defined(__MOVDIRI__) || defined(__MOVDIR64B__) 405 #include <movdirintrin.h> 406 #endif 407 408 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__) 409 #include <pconfigintrin.h> 410 #endif 411 412 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__) 413 #include <sgxintrin.h> 414 #endif 415 416 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__) 417 #include <ptwriteintrin.h> 418 #endif 419 420 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__) 421 #include <invpcidintrin.h> 422 #endif 423 424 #if !defined(_MSC_VER) || __has_feature(modules) || \ 425 defined(__AVX512VP2INTERSECT__) 426 #include <avx512vp2intersectintrin.h> 427 #endif 428 429 #if !defined(_MSC_VER) || __has_feature(modules) || \ 430 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) 431 #include <avx512vlvp2intersectintrin.h> 432 #endif 433 434 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__ENQCMD__) 435 #include <enqcmdintrin.h> 436 #endif 437 438 #if defined(_MSC_VER) && __has_extension(gnu_asm) 439 /* Define the default attributes for these intrinsics */ 440 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) 441 #ifdef __cplusplus 442 extern "C" { 443 #endif 444 /*----------------------------------------------------------------------------*\ 445 |* Interlocked Exchange HLE 446 \*----------------------------------------------------------------------------*/ 447 #if defined(__i386__) || defined(__x86_64__) 448 static __inline__ long __DEFAULT_FN_ATTRS 449 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { 450 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" 451 : "+r" (_Value), "+m" (*_Target) :: "memory"); 452 return _Value; 453 } 454 static __inline__ long __DEFAULT_FN_ATTRS 455 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { 456 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" 457 : "+r" (_Value), "+m" (*_Target) :: "memory"); 458 return _Value; 459 } 460 #endif 461 #if defined(__x86_64__) 462 static __inline__ __int64 __DEFAULT_FN_ATTRS 463 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { 464 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" 465 : "+r" (_Value), "+m" (*_Target) :: "memory"); 466 return _Value; 467 } 468 static __inline__ __int64 __DEFAULT_FN_ATTRS 469 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { 470 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" 471 : "+r" (_Value), "+m" (*_Target) :: "memory"); 472 return _Value; 473 } 474 #endif 475 /*----------------------------------------------------------------------------*\ 476 |* Interlocked Compare Exchange HLE 477 \*----------------------------------------------------------------------------*/ 478 #if defined(__i386__) || defined(__x86_64__) 479 static __inline__ long __DEFAULT_FN_ATTRS 480 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, 481 long _Exchange, long _Comparand) { 482 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" 483 : "+a" (_Comparand), "+m" (*_Destination) 484 : "r" (_Exchange) : "memory"); 485 return _Comparand; 486 } 487 static __inline__ long __DEFAULT_FN_ATTRS 488 _InterlockedCompareExchange_HLERelease(long volatile *_Destination, 489 long _Exchange, long _Comparand) { 490 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" 491 : "+a" (_Comparand), "+m" (*_Destination) 492 : "r" (_Exchange) : "memory"); 493 return _Comparand; 494 } 495 #endif 496 #if defined(__x86_64__) 497 static __inline__ __int64 __DEFAULT_FN_ATTRS 498 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, 499 __int64 _Exchange, __int64 _Comparand) { 500 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" 501 : "+a" (_Comparand), "+m" (*_Destination) 502 : "r" (_Exchange) : "memory"); 503 return _Comparand; 504 } 505 static __inline__ __int64 __DEFAULT_FN_ATTRS 506 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, 507 __int64 _Exchange, __int64 _Comparand) { 508 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" 509 : "+a" (_Comparand), "+m" (*_Destination) 510 : "r" (_Exchange) : "memory"); 511 return _Comparand; 512 } 513 #endif 514 #ifdef __cplusplus 515 } 516 #endif 517 518 #undef __DEFAULT_FN_ATTRS 519 520 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ 521 522 #endif /* __IMMINTRIN_H */ 523