/*===---- immintrin.h - Intel intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #define __IMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__MMX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSE__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSE2__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSE3__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSSE3__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__SSE4_2__) || defined(__SSE4_1__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AES__) || defined(__PCLMUL__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CLFLUSHOPT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CLWB__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX2__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__F16C__) #include #endif /* No feature check desired due to internal checks */ #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__BMI2__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__LZCNT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__POPCNT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__FMA__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512F__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VL__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512BW__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512BITALG__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512CD__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VPOPCNTDQ__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VNNI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512VNNI__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXVNNI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512DQ__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BITALG__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BW__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512CD__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512DQ__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512ER__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512IFMA__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512IFMA__) && defined(__AVX512VL__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXIFMA__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VBMI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VBMI__) && defined(__AVX512VL__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VBMI2__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512PF__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512FP16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512FP16__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512BF16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BF16__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__PKU__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__VPCLMULQDQ__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__VAES__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__GFNI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXVNNIINT8__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXNECONVERT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SHA512__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SM3__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SM4__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXVNNIINT16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDPID__) /// Reads the value of the IA32_TSC_AUX MSR (0xc0000103). /// /// \headerfile /// /// This intrinsic corresponds to the RDPID instruction. /// /// \returns The 32-bit contents of the MSR. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) _rdpid_u32(void) { return __builtin_ia32_rdpid(); } #endif // __RDPID__ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDRND__) /// Returns a 16-bit hardware-generated random value. /// /// \headerfile /// /// This intrinsic corresponds to the RDRAND instruction. /// /// \param __p /// A pointer to a 16-bit memory location to place the random value. /// \returns 1 if the value was successfully generated, 0 otherwise. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand16_step(unsigned short *__p) { return (int)__builtin_ia32_rdrand16_step(__p); } /// Returns a 32-bit hardware-generated random value. /// /// \headerfile /// /// This intrinsic corresponds to the RDRAND instruction. /// /// \param __p /// A pointer to a 32-bit memory location to place the random value. /// \returns 1 if the value was successfully generated, 0 otherwise. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand32_step(unsigned int *__p) { return (int)__builtin_ia32_rdrand32_step(__p); } /// Returns a 64-bit hardware-generated random value. /// /// \headerfile /// /// This intrinsic corresponds to the RDRAND instruction. /// /// \param __p /// A pointer to a 64-bit memory location to place the random value. /// \returns 1 if the value was successfully generated, 0 otherwise. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand64_step(unsigned long long *__p) { #ifdef __x86_64__ return (int)__builtin_ia32_rdrand64_step(__p); #else // We need to emulate the functionality of 64-bit rdrand with 2 32-bit // rdrand instructions. unsigned int __lo, __hi; unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo); unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi); if (__res_lo && __res_hi) { *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; return 1; } else { *__p = 0; return 0; } #endif } #endif /* __RDRND__ */ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__FSGSBASE__) #ifdef __x86_64__ /// Reads the FS base register. /// /// \headerfile /// /// This intrinsic corresponds to the RDFSBASE instruction. /// /// \returns The lower 32 bits of the FS base register. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readfsbase_u32(void) { return __builtin_ia32_rdfsbase32(); } /// Reads the FS base register. /// /// \headerfile /// /// This intrinsic corresponds to the RDFSBASE instruction. /// /// \returns The contents of the FS base register. static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readfsbase_u64(void) { return __builtin_ia32_rdfsbase64(); } /// Reads the GS base register. /// /// \headerfile /// /// This intrinsic corresponds to the RDGSBASE instruction. /// /// \returns The lower 32 bits of the GS base register. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readgsbase_u32(void) { return __builtin_ia32_rdgsbase32(); } /// Reads the GS base register. /// /// \headerfile /// /// This intrinsic corresponds to the RDGSBASE instruction. /// /// \returns The contents of the GS base register. static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readgsbase_u64(void) { return __builtin_ia32_rdgsbase64(); } /// Modifies the FS base register. /// /// \headerfile /// /// This intrinsic corresponds to the WRFSBASE instruction. /// /// \param __V /// Value to use for the lower 32 bits of the FS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writefsbase_u32(unsigned int __V) { __builtin_ia32_wrfsbase32(__V); } /// Modifies the FS base register. /// /// \headerfile /// /// This intrinsic corresponds to the WRFSBASE instruction. /// /// \param __V /// Value to use for the FS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writefsbase_u64(unsigned long long __V) { __builtin_ia32_wrfsbase64(__V); } /// Modifies the GS base register. /// /// \headerfile /// /// This intrinsic corresponds to the WRGSBASE instruction. /// /// \param __V /// Value to use for the lower 32 bits of the GS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writegsbase_u32(unsigned int __V) { __builtin_ia32_wrgsbase32(__V); } /// Modifies the GS base register. /// /// \headerfile /// /// This intrinsic corresponds to the WRFSBASE instruction. /// /// \param __V /// Value to use for GS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writegsbase_u64(unsigned long long __V) { __builtin_ia32_wrgsbase64(__V); } #endif #endif /* __FSGSBASE__ */ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__MOVBE__) /* The structs used below are to force the load/store to be unaligned. This * is accomplished with the __packed__ attribute. The __may_alias__ prevents * tbaa metadata from being generated based on the struct and the type of the * field inside of it. */ static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i16(void const * __P) { struct __loadu_i16 { unsigned short __v; } __attribute__((__packed__, __may_alias__)); return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i16(void * __P, short __D) { struct __storeu_i16 { unsigned short __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D); } static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i32(void const * __P) { struct __loadu_i32 { unsigned int __v; } __attribute__((__packed__, __may_alias__)); return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i32(void * __P, int __D) { struct __storeu_i32 { unsigned int __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D); } #ifdef __x86_64__ static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i64(void const * __P) { struct __loadu_i64 { unsigned long long __v; } __attribute__((__packed__, __may_alias__)); return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i64(void * __P, long long __D) { struct __storeu_i64 { unsigned long long __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D); } #endif #endif /* __MOVBE */ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RTM__) #include #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SHA__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__FXSR__) #include #endif /* No feature check desired due to internal MSC_VER checks */ #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__XSAVEOPT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__XSAVEC__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__XSAVES__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SHSTK__) #include #endif /* Intrinsics inside adcintrin.h are available at all times. */ #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__ADX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDSEED__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__WBNOINVD__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CLDEMOTE__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__WAITPKG__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__MOVDIRI__) || defined(__MOVDIR64B__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__PCONFIG__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SGX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__PTWRITE__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__INVPCID__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AMX_FP16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__KL__) || defined(__WIDEKL__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AMX_COMPLEX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VP2INTERSECT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__ENQCMD__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SERIALIZE__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__TSXLDTRK__) #include #endif #if defined(_MSC_VER) && __has_extension(gnu_asm) /* Define the default attributes for these intrinsics */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) #ifdef __cplusplus extern "C" { #endif /*----------------------------------------------------------------------------*\ |* Interlocked Exchange HLE \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) static __inline__ long __DEFAULT_FN_ATTRS _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } static __inline__ long __DEFAULT_FN_ATTRS _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } #endif #if defined(__x86_64__) static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } #endif /*----------------------------------------------------------------------------*\ |* Interlocked Compare Exchange HLE \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) static __inline__ long __DEFAULT_FN_ATTRS _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, long _Exchange, long _Comparand) { __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; } static __inline__ long __DEFAULT_FN_ATTRS _InterlockedCompareExchange_HLERelease(long volatile *_Destination, long _Exchange, long _Comparand) { __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; } #endif #if defined(__x86_64__) static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand) { __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; } static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand) { __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; } #endif #ifdef __cplusplus } #endif #undef __DEFAULT_FN_ATTRS #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ #endif /* __IMMINTRIN_H */