10b57cec5SDimitry Andric /*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * 40b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 50b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 60b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 70b57cec5SDimitry Andric * 80b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 90b57cec5SDimitry Andric */ 100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 110b57cec5SDimitry Andric #error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." 120b57cec5SDimitry Andric #endif 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef __IFMAVLINTRIN_H 150b57cec5SDimitry Andric #define __IFMAVLINTRIN_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 18*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS128 \ 19*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 20*5f757f3fSDimitry Andric __target__("avx512ifma,avx512vl,no-evex512"), \ 21*5f757f3fSDimitry Andric __min_vector_width__(128))) 22*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS256 \ 23*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 24*5f757f3fSDimitry Andric __target__("avx512ifma,avx512vl,no-evex512"), \ 25*5f757f3fSDimitry Andric __min_vector_width__(256))) 260b57cec5SDimitry Andric 27bdd1243dSDimitry Andric #define _mm_madd52hi_epu64(X, Y, Z) \ 28bdd1243dSDimitry Andric ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \ 29bdd1243dSDimitry Andric (__v2di)(Z))) 300b57cec5SDimitry Andric 31bdd1243dSDimitry Andric #define _mm256_madd52hi_epu64(X, Y, Z) \ 32bdd1243dSDimitry Andric ((__m256i)__builtin_ia32_vpmadd52huq256((__v4di)(X), (__v4di)(Y), \ 33bdd1243dSDimitry Andric (__v4di)(Z))) 340b57cec5SDimitry Andric 35bdd1243dSDimitry Andric #define _mm_madd52lo_epu64(X, Y, Z) \ 36bdd1243dSDimitry Andric ((__m128i)__builtin_ia32_vpmadd52luq128((__v2di)(X), (__v2di)(Y), \ 37bdd1243dSDimitry Andric (__v2di)(Z))) 38bdd1243dSDimitry Andric 39bdd1243dSDimitry Andric #define _mm256_madd52lo_epu64(X, Y, Z) \ 40bdd1243dSDimitry Andric ((__m256i)__builtin_ia32_vpmadd52luq256((__v4di)(X), (__v4di)(Y), \ 41bdd1243dSDimitry Andric (__v4di)(Z))) 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 440b57cec5SDimitry Andric _mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 450b57cec5SDimitry Andric { 460b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__M, 470b57cec5SDimitry Andric (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), 480b57cec5SDimitry Andric (__v2di)__W); 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 520b57cec5SDimitry Andric _mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) 530b57cec5SDimitry Andric { 540b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__M, 550b57cec5SDimitry Andric (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), 560b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 600b57cec5SDimitry Andric _mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 610b57cec5SDimitry Andric { 620b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__M, 630b57cec5SDimitry Andric (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), 640b57cec5SDimitry Andric (__v4di)__W); 650b57cec5SDimitry Andric } 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 680b57cec5SDimitry Andric _mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) 690b57cec5SDimitry Andric { 700b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__M, 710b57cec5SDimitry Andric (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), 720b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 760b57cec5SDimitry Andric _mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 770b57cec5SDimitry Andric { 780b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__M, 790b57cec5SDimitry Andric (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), 800b57cec5SDimitry Andric (__v2di)__W); 810b57cec5SDimitry Andric } 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 840b57cec5SDimitry Andric _mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) 850b57cec5SDimitry Andric { 860b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__M, 870b57cec5SDimitry Andric (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), 880b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 890b57cec5SDimitry Andric } 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 920b57cec5SDimitry Andric _mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 930b57cec5SDimitry Andric { 940b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__M, 950b57cec5SDimitry Andric (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), 960b57cec5SDimitry Andric (__v4di)__W); 970b57cec5SDimitry Andric } 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1000b57cec5SDimitry Andric _mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) 1010b57cec5SDimitry Andric { 1020b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__M, 1030b57cec5SDimitry Andric (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), 1040b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 1050b57cec5SDimitry Andric } 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128 1090b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric #endif 112