1 /*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------=== 2 * 3 * 4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 * See https://llvm.org/LICENSE.txt for license information. 6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 * 8 *===-----------------------------------------------------------------------=== 9 */ 10 #ifndef __IMMINTRIN_H 11 #error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." 12 #endif 13 14 #ifndef __IFMAVLINTRIN_H 15 #define __IFMAVLINTRIN_H 16 17 /* Define the default attributes for the functions in this file. */ 18 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128))) 19 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256))) 20 21 22 23 static __inline__ __m128i __DEFAULT_FN_ATTRS128 24 _mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z) 25 { 26 return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di) __X, (__v2di) __Y, 27 (__v2di) __Z); 28 } 29 30 static __inline__ __m128i __DEFAULT_FN_ATTRS128 31 _mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 32 { 33 return (__m128i)__builtin_ia32_selectq_128(__M, 34 (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), 35 (__v2di)__W); 36 } 37 38 static __inline__ __m128i __DEFAULT_FN_ATTRS128 39 _mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) 40 { 41 return (__m128i)__builtin_ia32_selectq_128(__M, 42 (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), 43 (__v2di)_mm_setzero_si128()); 44 } 45 46 static __inline__ __m256i __DEFAULT_FN_ATTRS256 47 _mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z) 48 { 49 return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y, 50 (__v4di)__Z); 51 } 52 53 static __inline__ __m256i __DEFAULT_FN_ATTRS256 54 _mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 55 { 56 return (__m256i)__builtin_ia32_selectq_256(__M, 57 (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), 58 (__v4di)__W); 59 } 60 61 static __inline__ __m256i __DEFAULT_FN_ATTRS256 62 _mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) 63 { 64 return (__m256i)__builtin_ia32_selectq_256(__M, 65 (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), 66 (__v4di)_mm256_setzero_si256()); 67 } 68 69 static __inline__ __m128i __DEFAULT_FN_ATTRS128 70 _mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z) 71 { 72 return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y, 73 (__v2di)__Z); 74 } 75 76 static __inline__ __m128i __DEFAULT_FN_ATTRS128 77 _mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 78 { 79 return (__m128i)__builtin_ia32_selectq_128(__M, 80 (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), 81 (__v2di)__W); 82 } 83 84 static __inline__ __m128i __DEFAULT_FN_ATTRS128 85 _mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) 86 { 87 return (__m128i)__builtin_ia32_selectq_128(__M, 88 (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), 89 (__v2di)_mm_setzero_si128()); 90 } 91 92 static __inline__ __m256i __DEFAULT_FN_ATTRS256 93 _mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z) 94 { 95 return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y, 96 (__v4di)__Z); 97 } 98 99 static __inline__ __m256i __DEFAULT_FN_ATTRS256 100 _mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 101 { 102 return (__m256i)__builtin_ia32_selectq_256(__M, 103 (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), 104 (__v4di)__W); 105 } 106 107 static __inline__ __m256i __DEFAULT_FN_ATTRS256 108 _mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) 109 { 110 return (__m256i)__builtin_ia32_selectq_256(__M, 111 (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), 112 (__v4di)_mm256_setzero_si256()); 113 } 114 115 116 #undef __DEFAULT_FN_ATTRS128 117 #undef __DEFAULT_FN_ATTRS256 118 119 #endif 120