1 /*===---- immintrin.h - Intel intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __IMMINTRIN_H 11 #define __IMMINTRIN_H 12 13 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__) 14 #include <mmintrin.h> 15 #endif 16 17 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__) 18 #include <xmmintrin.h> 19 #endif 20 21 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__) 22 #include <emmintrin.h> 23 #endif 24 25 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__) 26 #include <pmmintrin.h> 27 #endif 28 29 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__) 30 #include <tmmintrin.h> 31 #endif 32 33 #if !defined(_MSC_VER) || __has_feature(modules) || \ 34 (defined(__SSE4_2__) || defined(__SSE4_1__)) 35 #include <smmintrin.h> 36 #endif 37 38 #if !defined(_MSC_VER) || __has_feature(modules) || \ 39 (defined(__AES__) || defined(__PCLMUL__)) 40 #include <wmmintrin.h> 41 #endif 42 43 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) 44 #include <clflushoptintrin.h> 45 #endif 46 47 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__) 48 #include <clwbintrin.h> 49 #endif 50 51 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) 52 #include <avxintrin.h> 53 #endif 54 55 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__) 56 #include <avx2intrin.h> 57 #endif 58 59 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__) 60 #include <f16cintrin.h> 61 #endif 62 63 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__) 64 #include <vpclmulqdqintrin.h> 65 #endif 66 67 /* No feature check desired due to internal checks */ 68 #include <bmiintrin.h> 69 70 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) 71 #include <bmi2intrin.h> 72 #endif 73 74 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__) 75 #include <lzcntintrin.h> 76 #endif 77 78 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__) 79 #include <popcntintrin.h> 80 #endif 81 82 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__) 83 #include <fmaintrin.h> 84 #endif 85 86 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__) 87 #include <avx512fintrin.h> 88 #endif 89 90 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__) 91 #include <avx512vlintrin.h> 92 #endif 93 94 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__) 95 #include <avx512bwintrin.h> 96 #endif 97 98 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__) 99 #include <avx512bitalgintrin.h> 100 #endif 101 102 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) 103 #include <avx512cdintrin.h> 104 #endif 105 106 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) 107 #include <avx512vpopcntdqintrin.h> 108 #endif 109 110 #if !defined(_MSC_VER) || __has_feature(modules) || \ 111 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) 112 #include <avx512vpopcntdqvlintrin.h> 113 #endif 114 115 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__) 116 #include <avx512vnniintrin.h> 117 #endif 118 119 #if !defined(_MSC_VER) || __has_feature(modules) || \ 120 (defined(__AVX512VL__) && defined(__AVX512VNNI__)) 121 #include <avx512vlvnniintrin.h> 122 #endif 123 124 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) 125 #include <avx512dqintrin.h> 126 #endif 127 128 #if !defined(_MSC_VER) || __has_feature(modules) || \ 129 (defined(__AVX512VL__) && defined(__AVX512BITALG__)) 130 #include <avx512vlbitalgintrin.h> 131 #endif 132 133 #if !defined(_MSC_VER) || __has_feature(modules) || \ 134 (defined(__AVX512VL__) && defined(__AVX512BW__)) 135 #include <avx512vlbwintrin.h> 136 #endif 137 138 #if !defined(_MSC_VER) || __has_feature(modules) || \ 139 (defined(__AVX512VL__) && defined(__AVX512CD__)) 140 #include <avx512vlcdintrin.h> 141 #endif 142 143 #if !defined(_MSC_VER) || __has_feature(modules) || \ 144 (defined(__AVX512VL__) && defined(__AVX512DQ__)) 145 #include <avx512vldqintrin.h> 146 #endif 147 148 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__) 149 #include <avx512erintrin.h> 150 #endif 151 152 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__) 153 #include <avx512ifmaintrin.h> 154 #endif 155 156 #if !defined(_MSC_VER) || __has_feature(modules) || \ 157 (defined(__AVX512IFMA__) && defined(__AVX512VL__)) 158 #include <avx512ifmavlintrin.h> 159 #endif 160 161 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__) 162 #include <avx512vbmiintrin.h> 163 #endif 164 165 #if !defined(_MSC_VER) || __has_feature(modules) || \ 166 (defined(__AVX512VBMI__) && defined(__AVX512VL__)) 167 #include <avx512vbmivlintrin.h> 168 #endif 169 170 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__) 171 #include <avx512vbmi2intrin.h> 172 #endif 173 174 #if !defined(_MSC_VER) || __has_feature(modules) || \ 175 (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) 176 #include <avx512vlvbmi2intrin.h> 177 #endif 178 179 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__) 180 #include <avx512pfintrin.h> 181 #endif 182 183 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__) 184 #include <avx512bf16intrin.h> 185 #endif 186 187 #if !defined(_MSC_VER) || __has_feature(modules) || \ 188 (defined(__AVX512VL__) && defined(__AVX512BF16__)) 189 #include <avx512vlbf16intrin.h> 190 #endif 191 192 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__) 193 #include <pkuintrin.h> 194 #endif 195 196 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__) 197 #include <vaesintrin.h> 198 #endif 199 200 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__) 201 #include <gfniintrin.h> 202 #endif 203 204 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__) 205 /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). 206 /// 207 /// \headerfile <immintrin.h> 208 /// 209 /// This intrinsic corresponds to the <c> RDPID </c> instruction. 210 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) 211 _rdpid_u32(void) { 212 return __builtin_ia32_rdpid(); 213 } 214 #endif // __RDPID__ 215 216 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) 217 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 218 _rdrand16_step(unsigned short *__p) 219 { 220 return __builtin_ia32_rdrand16_step(__p); 221 } 222 223 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 224 _rdrand32_step(unsigned int *__p) 225 { 226 return __builtin_ia32_rdrand32_step(__p); 227 } 228 229 #ifdef __x86_64__ 230 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 231 _rdrand64_step(unsigned long long *__p) 232 { 233 return __builtin_ia32_rdrand64_step(__p); 234 } 235 #endif 236 #endif /* __RDRND__ */ 237 238 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__) 239 #ifdef __x86_64__ 240 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 241 _readfsbase_u32(void) 242 { 243 return __builtin_ia32_rdfsbase32(); 244 } 245 246 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 247 _readfsbase_u64(void) 248 { 249 return __builtin_ia32_rdfsbase64(); 250 } 251 252 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 253 _readgsbase_u32(void) 254 { 255 return __builtin_ia32_rdgsbase32(); 256 } 257 258 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 259 _readgsbase_u64(void) 260 { 261 return __builtin_ia32_rdgsbase64(); 262 } 263 264 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 265 _writefsbase_u32(unsigned int __V) 266 { 267 __builtin_ia32_wrfsbase32(__V); 268 } 269 270 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 271 _writefsbase_u64(unsigned long long __V) 272 { 273 __builtin_ia32_wrfsbase64(__V); 274 } 275 276 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 277 _writegsbase_u32(unsigned int __V) 278 { 279 __builtin_ia32_wrgsbase32(__V); 280 } 281 282 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 283 _writegsbase_u64(unsigned long long __V) 284 { 285 __builtin_ia32_wrgsbase64(__V); 286 } 287 288 #endif 289 #endif /* __FSGSBASE__ */ 290 291 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__) 292 293 /* The structs used below are to force the load/store to be unaligned. This 294 * is accomplished with the __packed__ attribute. The __may_alias__ prevents 295 * tbaa metadata from being generated based on the struct and the type of the 296 * field inside of it. 297 */ 298 299 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 300 _loadbe_i16(void const * __P) { 301 struct __loadu_i16 { 302 short __v; 303 } __attribute__((__packed__, __may_alias__)); 304 return __builtin_bswap16(((struct __loadu_i16*)__P)->__v); 305 } 306 307 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 308 _storebe_i16(void * __P, short __D) { 309 struct __storeu_i16 { 310 short __v; 311 } __attribute__((__packed__, __may_alias__)); 312 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D); 313 } 314 315 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 316 _loadbe_i32(void const * __P) { 317 struct __loadu_i32 { 318 int __v; 319 } __attribute__((__packed__, __may_alias__)); 320 return __builtin_bswap32(((struct __loadu_i32*)__P)->__v); 321 } 322 323 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 324 _storebe_i32(void * __P, int __D) { 325 struct __storeu_i32 { 326 int __v; 327 } __attribute__((__packed__, __may_alias__)); 328 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D); 329 } 330 331 #ifdef __x86_64__ 332 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 333 _loadbe_i64(void const * __P) { 334 struct __loadu_i64 { 335 long long __v; 336 } __attribute__((__packed__, __may_alias__)); 337 return __builtin_bswap64(((struct __loadu_i64*)__P)->__v); 338 } 339 340 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 341 _storebe_i64(void * __P, long long __D) { 342 struct __storeu_i64 { 343 long long __v; 344 } __attribute__((__packed__, __may_alias__)); 345 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D); 346 } 347 #endif 348 #endif /* __MOVBE */ 349 350 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) 351 #include <rtmintrin.h> 352 #include <xtestintrin.h> 353 #endif 354 355 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__) 356 #include <shaintrin.h> 357 #endif 358 359 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__) 360 #include <fxsrintrin.h> 361 #endif 362 363 /* No feature check desired due to internal MSC_VER checks */ 364 #include <xsaveintrin.h> 365 366 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__) 367 #include <xsaveoptintrin.h> 368 #endif 369 370 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__) 371 #include <xsavecintrin.h> 372 #endif 373 374 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__) 375 #include <xsavesintrin.h> 376 #endif 377 378 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__) 379 #include <cetintrin.h> 380 #endif 381 382 /* Some intrinsics inside adxintrin.h are available only on processors with ADX, 383 * whereas others are also available at all times. */ 384 #include <adxintrin.h> 385 386 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__) 387 #include <rdseedintrin.h> 388 #endif 389 390 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__) 391 #include <wbnoinvdintrin.h> 392 #endif 393 394 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__) 395 #include <cldemoteintrin.h> 396 #endif 397 398 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__) 399 #include <waitpkgintrin.h> 400 #endif 401 402 #if !defined(_MSC_VER) || __has_feature(modules) || \ 403 defined(__MOVDIRI__) || defined(__MOVDIR64B__) 404 #include <movdirintrin.h> 405 #endif 406 407 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__) 408 #include <pconfigintrin.h> 409 #endif 410 411 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__) 412 #include <sgxintrin.h> 413 #endif 414 415 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__) 416 #include <ptwriteintrin.h> 417 #endif 418 419 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__) 420 #include <invpcidintrin.h> 421 #endif 422 423 #if !defined(_MSC_VER) || __has_feature(modules) || \ 424 defined(__AVX512VP2INTERSECT__) 425 #include <avx512vp2intersectintrin.h> 426 #endif 427 428 #if !defined(_MSC_VER) || __has_feature(modules) || \ 429 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) 430 #include <avx512vlvp2intersectintrin.h> 431 #endif 432 433 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__ENQCMD__) 434 #include <enqcmdintrin.h> 435 #endif 436 437 #if defined(_MSC_VER) && __has_extension(gnu_asm) 438 /* Define the default attributes for these intrinsics */ 439 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) 440 #ifdef __cplusplus 441 extern "C" { 442 #endif 443 /*----------------------------------------------------------------------------*\ 444 |* Interlocked Exchange HLE 445 \*----------------------------------------------------------------------------*/ 446 #if defined(__i386__) || defined(__x86_64__) 447 static __inline__ long __DEFAULT_FN_ATTRS 448 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { 449 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" 450 : "+r" (_Value), "+m" (*_Target) :: "memory"); 451 return _Value; 452 } 453 static __inline__ long __DEFAULT_FN_ATTRS 454 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { 455 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" 456 : "+r" (_Value), "+m" (*_Target) :: "memory"); 457 return _Value; 458 } 459 #endif 460 #if defined(__x86_64__) 461 static __inline__ __int64 __DEFAULT_FN_ATTRS 462 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { 463 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" 464 : "+r" (_Value), "+m" (*_Target) :: "memory"); 465 return _Value; 466 } 467 static __inline__ __int64 __DEFAULT_FN_ATTRS 468 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { 469 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" 470 : "+r" (_Value), "+m" (*_Target) :: "memory"); 471 return _Value; 472 } 473 #endif 474 /*----------------------------------------------------------------------------*\ 475 |* Interlocked Compare Exchange HLE 476 \*----------------------------------------------------------------------------*/ 477 #if defined(__i386__) || defined(__x86_64__) 478 static __inline__ long __DEFAULT_FN_ATTRS 479 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, 480 long _Exchange, long _Comparand) { 481 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" 482 : "+a" (_Comparand), "+m" (*_Destination) 483 : "r" (_Exchange) : "memory"); 484 return _Comparand; 485 } 486 static __inline__ long __DEFAULT_FN_ATTRS 487 _InterlockedCompareExchange_HLERelease(long volatile *_Destination, 488 long _Exchange, long _Comparand) { 489 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" 490 : "+a" (_Comparand), "+m" (*_Destination) 491 : "r" (_Exchange) : "memory"); 492 return _Comparand; 493 } 494 #endif 495 #if defined(__x86_64__) 496 static __inline__ __int64 __DEFAULT_FN_ATTRS 497 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, 498 __int64 _Exchange, __int64 _Comparand) { 499 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" 500 : "+a" (_Comparand), "+m" (*_Destination) 501 : "r" (_Exchange) : "memory"); 502 return _Comparand; 503 } 504 static __inline__ __int64 __DEFAULT_FN_ATTRS 505 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, 506 __int64 _Exchange, __int64 _Comparand) { 507 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" 508 : "+a" (_Comparand), "+m" (*_Destination) 509 : "r" (_Exchange) : "memory"); 510 return _Comparand; 511 } 512 #endif 513 #ifdef __cplusplus 514 } 515 #endif 516 517 #undef __DEFAULT_FN_ATTRS 518 519 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ 520 521 #endif /* __IMMINTRIN_H */ 522