xref: /freebsd/contrib/llvm-project/clang/lib/Headers/avx512fintrin.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
20b57cec5SDimitry Andric  *
30b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric  *
70b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
80b57cec5SDimitry Andric  */
90b57cec5SDimitry Andric #ifndef __IMMINTRIN_H
100b57cec5SDimitry Andric #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
110b57cec5SDimitry Andric #endif
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef __AVX512FINTRIN_H
140b57cec5SDimitry Andric #define __AVX512FINTRIN_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric typedef char __v64qi __attribute__((__vector_size__(64)));
170b57cec5SDimitry Andric typedef short __v32hi __attribute__((__vector_size__(64)));
180b57cec5SDimitry Andric typedef double __v8df __attribute__((__vector_size__(64)));
190b57cec5SDimitry Andric typedef float __v16sf __attribute__((__vector_size__(64)));
200b57cec5SDimitry Andric typedef long long __v8di __attribute__((__vector_size__(64)));
210b57cec5SDimitry Andric typedef int __v16si __attribute__((__vector_size__(64)));
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric /* Unsigned types */
240b57cec5SDimitry Andric typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
250b57cec5SDimitry Andric typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
260b57cec5SDimitry Andric typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
270b57cec5SDimitry Andric typedef unsigned int __v16su __attribute__((__vector_size__(64)));
280b57cec5SDimitry Andric 
2904eeddc0SDimitry Andric /* We need an explicitly signed variant for char. Note that this shouldn't
3004eeddc0SDimitry Andric  * appear in the interface though. */
3104eeddc0SDimitry Andric typedef signed char __v64qs __attribute__((__vector_size__(64)));
3204eeddc0SDimitry Andric 
330b57cec5SDimitry Andric typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
340b57cec5SDimitry Andric typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
350b57cec5SDimitry Andric typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
380b57cec5SDimitry Andric typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
390b57cec5SDimitry Andric typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric typedef unsigned char __mmask8;
420b57cec5SDimitry Andric typedef unsigned short __mmask16;
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric /* Rounding mode macros.  */
450b57cec5SDimitry Andric #define _MM_FROUND_TO_NEAREST_INT   0x00
460b57cec5SDimitry Andric #define _MM_FROUND_TO_NEG_INF       0x01
470b57cec5SDimitry Andric #define _MM_FROUND_TO_POS_INF       0x02
480b57cec5SDimitry Andric #define _MM_FROUND_TO_ZERO          0x03
490b57cec5SDimitry Andric #define _MM_FROUND_CUR_DIRECTION    0x04
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric /* Constants for integer comparison predicates */
520b57cec5SDimitry Andric typedef enum {
530b57cec5SDimitry Andric     _MM_CMPINT_EQ,      /* Equal */
540b57cec5SDimitry Andric     _MM_CMPINT_LT,      /* Less than */
550b57cec5SDimitry Andric     _MM_CMPINT_LE,      /* Less than or Equal */
560b57cec5SDimitry Andric     _MM_CMPINT_UNUSED,
570b57cec5SDimitry Andric     _MM_CMPINT_NE,      /* Not Equal */
580b57cec5SDimitry Andric     _MM_CMPINT_NLT,     /* Not Less than */
590b57cec5SDimitry Andric #define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
600b57cec5SDimitry Andric     _MM_CMPINT_NLE      /* Not Less than or Equal */
610b57cec5SDimitry Andric #define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
620b57cec5SDimitry Andric } _MM_CMPINT_ENUM;
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric typedef enum
650b57cec5SDimitry Andric {
660b57cec5SDimitry Andric   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
670b57cec5SDimitry Andric   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
680b57cec5SDimitry Andric   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
690b57cec5SDimitry Andric   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
700b57cec5SDimitry Andric   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
710b57cec5SDimitry Andric   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
720b57cec5SDimitry Andric   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
730b57cec5SDimitry Andric   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
740b57cec5SDimitry Andric   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
750b57cec5SDimitry Andric   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
760b57cec5SDimitry Andric   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
770b57cec5SDimitry Andric   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
780b57cec5SDimitry Andric   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
790b57cec5SDimitry Andric   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
800b57cec5SDimitry Andric   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
810b57cec5SDimitry Andric   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
820b57cec5SDimitry Andric   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
830b57cec5SDimitry Andric   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
840b57cec5SDimitry Andric   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
850b57cec5SDimitry Andric   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
860b57cec5SDimitry Andric   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
870b57cec5SDimitry Andric   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
880b57cec5SDimitry Andric   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
890b57cec5SDimitry Andric   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
900b57cec5SDimitry Andric   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
910b57cec5SDimitry Andric   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
920b57cec5SDimitry Andric   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
930b57cec5SDimitry Andric   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
940b57cec5SDimitry Andric   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
950b57cec5SDimitry Andric   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
960b57cec5SDimitry Andric   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
970b57cec5SDimitry Andric   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
980b57cec5SDimitry Andric   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
990b57cec5SDimitry Andric   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
1000b57cec5SDimitry Andric   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
1010b57cec5SDimitry Andric   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
1020b57cec5SDimitry Andric   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
1030b57cec5SDimitry Andric   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
1040b57cec5SDimitry Andric   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
1050b57cec5SDimitry Andric   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
1060b57cec5SDimitry Andric   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
1070b57cec5SDimitry Andric   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
1080b57cec5SDimitry Andric   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
1090b57cec5SDimitry Andric   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
1100b57cec5SDimitry Andric   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
1110b57cec5SDimitry Andric   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
1120b57cec5SDimitry Andric   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
1130b57cec5SDimitry Andric   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
1140b57cec5SDimitry Andric   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
1150b57cec5SDimitry Andric   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
1160b57cec5SDimitry Andric   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
1170b57cec5SDimitry Andric   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
1180b57cec5SDimitry Andric   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
1190b57cec5SDimitry Andric   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
1200b57cec5SDimitry Andric   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
1210b57cec5SDimitry Andric   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
1220b57cec5SDimitry Andric   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
1230b57cec5SDimitry Andric   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
1240b57cec5SDimitry Andric   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
1250b57cec5SDimitry Andric   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
1260b57cec5SDimitry Andric   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
1270b57cec5SDimitry Andric   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
1280b57cec5SDimitry Andric   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
1290b57cec5SDimitry Andric   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
1300b57cec5SDimitry Andric   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
1310b57cec5SDimitry Andric   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
1320b57cec5SDimitry Andric   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
1330b57cec5SDimitry Andric   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
1340b57cec5SDimitry Andric   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
1350b57cec5SDimitry Andric   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
1360b57cec5SDimitry Andric   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
1370b57cec5SDimitry Andric   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
1380b57cec5SDimitry Andric   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
1390b57cec5SDimitry Andric   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
1400b57cec5SDimitry Andric   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
1410b57cec5SDimitry Andric   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
1420b57cec5SDimitry Andric   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
1430b57cec5SDimitry Andric   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
1440b57cec5SDimitry Andric   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
1450b57cec5SDimitry Andric   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
1460b57cec5SDimitry Andric   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
1470b57cec5SDimitry Andric   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
1480b57cec5SDimitry Andric   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
1490b57cec5SDimitry Andric   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
1500b57cec5SDimitry Andric   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
1510b57cec5SDimitry Andric   _MM_PERM_DDDD = 0xFF
1520b57cec5SDimitry Andric } _MM_PERM_ENUM;
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric typedef enum
1550b57cec5SDimitry Andric {
1560b57cec5SDimitry Andric   _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
1570b57cec5SDimitry Andric   _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
1580b57cec5SDimitry Andric   _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
1590b57cec5SDimitry Andric   _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
1600b57cec5SDimitry Andric } _MM_MANTISSA_NORM_ENUM;
1610b57cec5SDimitry Andric 
1620b57cec5SDimitry Andric typedef enum
1630b57cec5SDimitry Andric {
1640b57cec5SDimitry Andric   _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
1650b57cec5SDimitry Andric   _MM_MANT_SIGN_zero,   /* sign = 0             */
1660b57cec5SDimitry Andric   _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
1670b57cec5SDimitry Andric } _MM_MANTISSA_SIGN_ENUM;
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */
170*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512)))
171*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS128                                                  \
172*5f757f3fSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
173*5f757f3fSDimitry Andric                  __target__("avx512f,no-evex512"), __min_vector_width__(128)))
174*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS                                                     \
175*5f757f3fSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
176*5f757f3fSDimitry Andric                  __target__("avx512f,no-evex512")))
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric /* Create vectors with repeated elements */
1790b57cec5SDimitry Andric 
1800b57cec5SDimitry Andric static  __inline __m512i __DEFAULT_FN_ATTRS512
1810b57cec5SDimitry Andric _mm512_setzero_si512(void)
1820b57cec5SDimitry Andric {
1830b57cec5SDimitry Andric   return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
1840b57cec5SDimitry Andric }
1850b57cec5SDimitry Andric 
1860b57cec5SDimitry Andric #define _mm512_setzero_epi32 _mm512_setzero_si512
1870b57cec5SDimitry Andric 
1880b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
1890b57cec5SDimitry Andric _mm512_undefined_pd(void)
1900b57cec5SDimitry Andric {
1910b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_undef512();
1920b57cec5SDimitry Andric }
1930b57cec5SDimitry Andric 
1940b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
1950b57cec5SDimitry Andric _mm512_undefined(void)
1960b57cec5SDimitry Andric {
1970b57cec5SDimitry Andric   return (__m512)__builtin_ia32_undef512();
1980b57cec5SDimitry Andric }
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
2010b57cec5SDimitry Andric _mm512_undefined_ps(void)
2020b57cec5SDimitry Andric {
2030b57cec5SDimitry Andric   return (__m512)__builtin_ia32_undef512();
2040b57cec5SDimitry Andric }
2050b57cec5SDimitry Andric 
2060b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
2070b57cec5SDimitry Andric _mm512_undefined_epi32(void)
2080b57cec5SDimitry Andric {
2090b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_undef512();
2100b57cec5SDimitry Andric }
2110b57cec5SDimitry Andric 
2120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
2130b57cec5SDimitry Andric _mm512_broadcastd_epi32 (__m128i __A)
2140b57cec5SDimitry Andric {
2150b57cec5SDimitry Andric   return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
2160b57cec5SDimitry Andric                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
2170b57cec5SDimitry Andric }
2180b57cec5SDimitry Andric 
2190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
2200b57cec5SDimitry Andric _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
2210b57cec5SDimitry Andric {
2220b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__M,
2230b57cec5SDimitry Andric                                              (__v16si) _mm512_broadcastd_epi32(__A),
2240b57cec5SDimitry Andric                                              (__v16si) __O);
2250b57cec5SDimitry Andric }
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
2280b57cec5SDimitry Andric _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
2290b57cec5SDimitry Andric {
2300b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__M,
2310b57cec5SDimitry Andric                                              (__v16si) _mm512_broadcastd_epi32(__A),
2320b57cec5SDimitry Andric                                              (__v16si) _mm512_setzero_si512());
2330b57cec5SDimitry Andric }
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
2360b57cec5SDimitry Andric _mm512_broadcastq_epi64 (__m128i __A)
2370b57cec5SDimitry Andric {
2380b57cec5SDimitry Andric   return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
2390b57cec5SDimitry Andric                                           0, 0, 0, 0, 0, 0, 0, 0);
2400b57cec5SDimitry Andric }
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
2430b57cec5SDimitry Andric _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
2440b57cec5SDimitry Andric {
2450b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512(__M,
2460b57cec5SDimitry Andric                                              (__v8di) _mm512_broadcastq_epi64(__A),
2470b57cec5SDimitry Andric                                              (__v8di) __O);
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric }
2500b57cec5SDimitry Andric 
2510b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
2520b57cec5SDimitry Andric _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2530b57cec5SDimitry Andric {
2540b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512(__M,
2550b57cec5SDimitry Andric                                              (__v8di) _mm512_broadcastq_epi64(__A),
2560b57cec5SDimitry Andric                                              (__v8di) _mm512_setzero_si512());
2570b57cec5SDimitry Andric }
2580b57cec5SDimitry Andric 
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
2610b57cec5SDimitry Andric _mm512_setzero_ps(void)
2620b57cec5SDimitry Andric {
263bdd1243dSDimitry Andric   return __extension__ (__m512){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
264bdd1243dSDimitry Andric                                  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
2650b57cec5SDimitry Andric }
2660b57cec5SDimitry Andric 
2670b57cec5SDimitry Andric #define _mm512_setzero _mm512_setzero_ps
2680b57cec5SDimitry Andric 
2690b57cec5SDimitry Andric static  __inline __m512d __DEFAULT_FN_ATTRS512
2700b57cec5SDimitry Andric _mm512_setzero_pd(void)
2710b57cec5SDimitry Andric {
2720b57cec5SDimitry Andric   return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
2730b57cec5SDimitry Andric }
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
2760b57cec5SDimitry Andric _mm512_set1_ps(float __w)
2770b57cec5SDimitry Andric {
2780b57cec5SDimitry Andric   return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
2790b57cec5SDimitry Andric                                  __w, __w, __w, __w, __w, __w, __w, __w  };
2800b57cec5SDimitry Andric }
2810b57cec5SDimitry Andric 
2820b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
2830b57cec5SDimitry Andric _mm512_set1_pd(double __w)
2840b57cec5SDimitry Andric {
2850b57cec5SDimitry Andric   return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
2860b57cec5SDimitry Andric }
2870b57cec5SDimitry Andric 
2880b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
2890b57cec5SDimitry Andric _mm512_set1_epi8(char __w)
2900b57cec5SDimitry Andric {
2910b57cec5SDimitry Andric   return __extension__ (__m512i)(__v64qi){
2920b57cec5SDimitry Andric     __w, __w, __w, __w, __w, __w, __w, __w,
2930b57cec5SDimitry Andric     __w, __w, __w, __w, __w, __w, __w, __w,
2940b57cec5SDimitry Andric     __w, __w, __w, __w, __w, __w, __w, __w,
2950b57cec5SDimitry Andric     __w, __w, __w, __w, __w, __w, __w, __w,
2960b57cec5SDimitry Andric     __w, __w, __w, __w, __w, __w, __w, __w,
2970b57cec5SDimitry Andric     __w, __w, __w, __w, __w, __w, __w, __w,
2980b57cec5SDimitry Andric     __w, __w, __w, __w, __w, __w, __w, __w,
2990b57cec5SDimitry Andric     __w, __w, __w, __w, __w, __w, __w, __w  };
3000b57cec5SDimitry Andric }
3010b57cec5SDimitry Andric 
3020b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
3030b57cec5SDimitry Andric _mm512_set1_epi16(short __w)
3040b57cec5SDimitry Andric {
3050b57cec5SDimitry Andric   return __extension__ (__m512i)(__v32hi){
3060b57cec5SDimitry Andric     __w, __w, __w, __w, __w, __w, __w, __w,
3070b57cec5SDimitry Andric     __w, __w, __w, __w, __w, __w, __w, __w,
3080b57cec5SDimitry Andric     __w, __w, __w, __w, __w, __w, __w, __w,
3090b57cec5SDimitry Andric     __w, __w, __w, __w, __w, __w, __w, __w };
3100b57cec5SDimitry Andric }
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
3130b57cec5SDimitry Andric _mm512_set1_epi32(int __s)
3140b57cec5SDimitry Andric {
3150b57cec5SDimitry Andric   return __extension__ (__m512i)(__v16si){
3160b57cec5SDimitry Andric     __s, __s, __s, __s, __s, __s, __s, __s,
3170b57cec5SDimitry Andric     __s, __s, __s, __s, __s, __s, __s, __s };
3180b57cec5SDimitry Andric }
3190b57cec5SDimitry Andric 
3200b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
3210b57cec5SDimitry Andric _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
3220b57cec5SDimitry Andric {
3230b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__M,
3240b57cec5SDimitry Andric                                              (__v16si)_mm512_set1_epi32(__A),
3250b57cec5SDimitry Andric                                              (__v16si)_mm512_setzero_si512());
3260b57cec5SDimitry Andric }
3270b57cec5SDimitry Andric 
3280b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
3290b57cec5SDimitry Andric _mm512_set1_epi64(long long __d)
3300b57cec5SDimitry Andric {
3310b57cec5SDimitry Andric   return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
3320b57cec5SDimitry Andric }
3330b57cec5SDimitry Andric 
3340b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
3350b57cec5SDimitry Andric _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
3360b57cec5SDimitry Andric {
3370b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512(__M,
3380b57cec5SDimitry Andric                                              (__v8di)_mm512_set1_epi64(__A),
3390b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512());
3400b57cec5SDimitry Andric }
3410b57cec5SDimitry Andric 
3420b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
3430b57cec5SDimitry Andric _mm512_broadcastss_ps(__m128 __A)
3440b57cec5SDimitry Andric {
3450b57cec5SDimitry Andric   return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
3460b57cec5SDimitry Andric                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3470b57cec5SDimitry Andric }
3480b57cec5SDimitry Andric 
3490b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
3500b57cec5SDimitry Andric _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
3510b57cec5SDimitry Andric {
3520b57cec5SDimitry Andric   return __extension__ (__m512i)(__v16si)
3530b57cec5SDimitry Andric    { __D, __C, __B, __A, __D, __C, __B, __A,
3540b57cec5SDimitry Andric      __D, __C, __B, __A, __D, __C, __B, __A };
3550b57cec5SDimitry Andric }
3560b57cec5SDimitry Andric 
3570b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
3580b57cec5SDimitry Andric _mm512_set4_epi64 (long long __A, long long __B, long long __C,
3590b57cec5SDimitry Andric        long long __D)
3600b57cec5SDimitry Andric {
3610b57cec5SDimitry Andric   return __extension__ (__m512i) (__v8di)
3620b57cec5SDimitry Andric    { __D, __C, __B, __A, __D, __C, __B, __A };
3630b57cec5SDimitry Andric }
3640b57cec5SDimitry Andric 
3650b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
3660b57cec5SDimitry Andric _mm512_set4_pd (double __A, double __B, double __C, double __D)
3670b57cec5SDimitry Andric {
3680b57cec5SDimitry Andric   return __extension__ (__m512d)
3690b57cec5SDimitry Andric    { __D, __C, __B, __A, __D, __C, __B, __A };
3700b57cec5SDimitry Andric }
3710b57cec5SDimitry Andric 
3720b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
3730b57cec5SDimitry Andric _mm512_set4_ps (float __A, float __B, float __C, float __D)
3740b57cec5SDimitry Andric {
3750b57cec5SDimitry Andric   return __extension__ (__m512)
3760b57cec5SDimitry Andric    { __D, __C, __B, __A, __D, __C, __B, __A,
3770b57cec5SDimitry Andric      __D, __C, __B, __A, __D, __C, __B, __A };
3780b57cec5SDimitry Andric }
3790b57cec5SDimitry Andric 
3800b57cec5SDimitry Andric #define _mm512_setr4_epi32(e0,e1,e2,e3)               \
3810b57cec5SDimitry Andric   _mm512_set4_epi32((e3),(e2),(e1),(e0))
3820b57cec5SDimitry Andric 
3830b57cec5SDimitry Andric #define _mm512_setr4_epi64(e0,e1,e2,e3)               \
3840b57cec5SDimitry Andric   _mm512_set4_epi64((e3),(e2),(e1),(e0))
3850b57cec5SDimitry Andric 
3860b57cec5SDimitry Andric #define _mm512_setr4_pd(e0,e1,e2,e3)                \
3870b57cec5SDimitry Andric   _mm512_set4_pd((e3),(e2),(e1),(e0))
3880b57cec5SDimitry Andric 
3890b57cec5SDimitry Andric #define _mm512_setr4_ps(e0,e1,e2,e3)                \
3900b57cec5SDimitry Andric   _mm512_set4_ps((e3),(e2),(e1),(e0))
3910b57cec5SDimitry Andric 
3920b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
3930b57cec5SDimitry Andric _mm512_broadcastsd_pd(__m128d __A)
3940b57cec5SDimitry Andric {
3950b57cec5SDimitry Andric   return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
3960b57cec5SDimitry Andric                                           0, 0, 0, 0, 0, 0, 0, 0);
3970b57cec5SDimitry Andric }
3980b57cec5SDimitry Andric 
3990b57cec5SDimitry Andric /* Cast between vector types */
4000b57cec5SDimitry Andric 
4010b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
4020b57cec5SDimitry Andric _mm512_castpd256_pd512(__m256d __a)
4030b57cec5SDimitry Andric {
40406c3fb27SDimitry Andric   return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
40506c3fb27SDimitry Andric                                  1, 2, 3, 4, 5, 6, 7);
4060b57cec5SDimitry Andric }
4070b57cec5SDimitry Andric 
4080b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
4090b57cec5SDimitry Andric _mm512_castps256_ps512(__m256 __a)
4100b57cec5SDimitry Andric {
41106c3fb27SDimitry Andric   return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
41206c3fb27SDimitry Andric                                  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4130b57cec5SDimitry Andric }
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric static __inline __m128d __DEFAULT_FN_ATTRS512
4160b57cec5SDimitry Andric _mm512_castpd512_pd128(__m512d __a)
4170b57cec5SDimitry Andric {
4180b57cec5SDimitry Andric   return __builtin_shufflevector(__a, __a, 0, 1);
4190b57cec5SDimitry Andric }
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric static __inline __m256d __DEFAULT_FN_ATTRS512
4220b57cec5SDimitry Andric _mm512_castpd512_pd256 (__m512d __A)
4230b57cec5SDimitry Andric {
4240b57cec5SDimitry Andric   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
4250b57cec5SDimitry Andric }
4260b57cec5SDimitry Andric 
4270b57cec5SDimitry Andric static __inline __m128 __DEFAULT_FN_ATTRS512
4280b57cec5SDimitry Andric _mm512_castps512_ps128(__m512 __a)
4290b57cec5SDimitry Andric {
4300b57cec5SDimitry Andric   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
4310b57cec5SDimitry Andric }
4320b57cec5SDimitry Andric 
4330b57cec5SDimitry Andric static __inline __m256 __DEFAULT_FN_ATTRS512
4340b57cec5SDimitry Andric _mm512_castps512_ps256 (__m512 __A)
4350b57cec5SDimitry Andric {
4360b57cec5SDimitry Andric   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
4370b57cec5SDimitry Andric }
4380b57cec5SDimitry Andric 
4390b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
4400b57cec5SDimitry Andric _mm512_castpd_ps (__m512d __A)
4410b57cec5SDimitry Andric {
4420b57cec5SDimitry Andric   return (__m512) (__A);
4430b57cec5SDimitry Andric }
4440b57cec5SDimitry Andric 
4450b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
4460b57cec5SDimitry Andric _mm512_castpd_si512 (__m512d __A)
4470b57cec5SDimitry Andric {
4480b57cec5SDimitry Andric   return (__m512i) (__A);
4490b57cec5SDimitry Andric }
4500b57cec5SDimitry Andric 
4510b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
4520b57cec5SDimitry Andric _mm512_castpd128_pd512 (__m128d __A)
4530b57cec5SDimitry Andric {
45406c3fb27SDimitry Andric   __m256d __B = __builtin_nondeterministic_value(__B);
45506c3fb27SDimitry Andric   return __builtin_shufflevector(
45606c3fb27SDimitry Andric       __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
45706c3fb27SDimitry Andric       __B, 0, 1, 2, 3, 4, 5, 6, 7);
4580b57cec5SDimitry Andric }
4590b57cec5SDimitry Andric 
4600b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
4610b57cec5SDimitry Andric _mm512_castps_pd (__m512 __A)
4620b57cec5SDimitry Andric {
4630b57cec5SDimitry Andric   return (__m512d) (__A);
4640b57cec5SDimitry Andric }
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
4670b57cec5SDimitry Andric _mm512_castps_si512 (__m512 __A)
4680b57cec5SDimitry Andric {
4690b57cec5SDimitry Andric   return (__m512i) (__A);
4700b57cec5SDimitry Andric }
4710b57cec5SDimitry Andric 
4720b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
4730b57cec5SDimitry Andric _mm512_castps128_ps512 (__m128 __A)
4740b57cec5SDimitry Andric {
47506c3fb27SDimitry Andric   __m256 __B = __builtin_nondeterministic_value(__B);
47606c3fb27SDimitry Andric   return __builtin_shufflevector(
47706c3fb27SDimitry Andric       __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
47806c3fb27SDimitry Andric       __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4790b57cec5SDimitry Andric }
4800b57cec5SDimitry Andric 
4810b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
4820b57cec5SDimitry Andric _mm512_castsi128_si512 (__m128i __A)
4830b57cec5SDimitry Andric {
48406c3fb27SDimitry Andric   __m256i __B = __builtin_nondeterministic_value(__B);
48506c3fb27SDimitry Andric   return __builtin_shufflevector(
48606c3fb27SDimitry Andric       __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
48706c3fb27SDimitry Andric       __B, 0, 1, 2, 3, 4, 5, 6, 7);
4880b57cec5SDimitry Andric }
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
4910b57cec5SDimitry Andric _mm512_castsi256_si512 (__m256i __A)
4920b57cec5SDimitry Andric {
49306c3fb27SDimitry Andric    return  __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
4940b57cec5SDimitry Andric }
4950b57cec5SDimitry Andric 
4960b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
4970b57cec5SDimitry Andric _mm512_castsi512_ps (__m512i __A)
4980b57cec5SDimitry Andric {
4990b57cec5SDimitry Andric   return (__m512) (__A);
5000b57cec5SDimitry Andric }
5010b57cec5SDimitry Andric 
5020b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
5030b57cec5SDimitry Andric _mm512_castsi512_pd (__m512i __A)
5040b57cec5SDimitry Andric {
5050b57cec5SDimitry Andric   return (__m512d) (__A);
5060b57cec5SDimitry Andric }
5070b57cec5SDimitry Andric 
5080b57cec5SDimitry Andric static __inline __m128i __DEFAULT_FN_ATTRS512
5090b57cec5SDimitry Andric _mm512_castsi512_si128 (__m512i __A)
5100b57cec5SDimitry Andric {
5110b57cec5SDimitry Andric   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
5120b57cec5SDimitry Andric }
5130b57cec5SDimitry Andric 
5140b57cec5SDimitry Andric static __inline __m256i __DEFAULT_FN_ATTRS512
5150b57cec5SDimitry Andric _mm512_castsi512_si256 (__m512i __A)
5160b57cec5SDimitry Andric {
5170b57cec5SDimitry Andric   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
5180b57cec5SDimitry Andric }
5190b57cec5SDimitry Andric 
5200b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5210b57cec5SDimitry Andric _mm512_int2mask(int __a)
5220b57cec5SDimitry Andric {
5230b57cec5SDimitry Andric   return (__mmask16)__a;
5240b57cec5SDimitry Andric }
5250b57cec5SDimitry Andric 
5260b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
5270b57cec5SDimitry Andric _mm512_mask2int(__mmask16 __a)
5280b57cec5SDimitry Andric {
5290b57cec5SDimitry Andric   return (int)__a;
5300b57cec5SDimitry Andric }
5310b57cec5SDimitry Andric 
5320b57cec5SDimitry Andric /// Constructs a 512-bit floating-point vector of [8 x double] from a
5330b57cec5SDimitry Andric ///    128-bit floating-point vector of [2 x double]. The lower 128 bits
5340b57cec5SDimitry Andric ///    contain the value of the source vector. The upper 384 bits are set
5350b57cec5SDimitry Andric ///    to zero.
5360b57cec5SDimitry Andric ///
5370b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
5380b57cec5SDimitry Andric ///
5390b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
5400b57cec5SDimitry Andric ///
5410b57cec5SDimitry Andric /// \param __a
5420b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
5430b57cec5SDimitry Andric /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
5440b57cec5SDimitry Andric ///    contain the value of the parameter. The upper 384 bits are set to zero.
5450b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
5460b57cec5SDimitry Andric _mm512_zextpd128_pd512(__m128d __a)
5470b57cec5SDimitry Andric {
5480b57cec5SDimitry Andric   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
5490b57cec5SDimitry Andric }
5500b57cec5SDimitry Andric 
5510b57cec5SDimitry Andric /// Constructs a 512-bit floating-point vector of [8 x double] from a
5520b57cec5SDimitry Andric ///    256-bit floating-point vector of [4 x double]. The lower 256 bits
5530b57cec5SDimitry Andric ///    contain the value of the source vector. The upper 256 bits are set
5540b57cec5SDimitry Andric ///    to zero.
5550b57cec5SDimitry Andric ///
5560b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
5570b57cec5SDimitry Andric ///
5580b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
5590b57cec5SDimitry Andric ///
5600b57cec5SDimitry Andric /// \param __a
5610b57cec5SDimitry Andric ///    A 256-bit vector of [4 x double].
5620b57cec5SDimitry Andric /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
5630b57cec5SDimitry Andric ///    contain the value of the parameter. The upper 256 bits are set to zero.
5640b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
5650b57cec5SDimitry Andric _mm512_zextpd256_pd512(__m256d __a)
5660b57cec5SDimitry Andric {
5670b57cec5SDimitry Andric   return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
5680b57cec5SDimitry Andric }
5690b57cec5SDimitry Andric 
5700b57cec5SDimitry Andric /// Constructs a 512-bit floating-point vector of [16 x float] from a
5710b57cec5SDimitry Andric ///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
5720b57cec5SDimitry Andric ///    the value of the source vector. The upper 384 bits are set to zero.
5730b57cec5SDimitry Andric ///
5740b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
5750b57cec5SDimitry Andric ///
5760b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
5770b57cec5SDimitry Andric ///
5780b57cec5SDimitry Andric /// \param __a
5790b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float].
5800b57cec5SDimitry Andric /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
5810b57cec5SDimitry Andric ///    contain the value of the parameter. The upper 384 bits are set to zero.
5820b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
5830b57cec5SDimitry Andric _mm512_zextps128_ps512(__m128 __a)
5840b57cec5SDimitry Andric {
5850b57cec5SDimitry Andric   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
5860b57cec5SDimitry Andric }
5870b57cec5SDimitry Andric 
5880b57cec5SDimitry Andric /// Constructs a 512-bit floating-point vector of [16 x float] from a
5890b57cec5SDimitry Andric ///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
5900b57cec5SDimitry Andric ///    the value of the source vector. The upper 256 bits are set to zero.
5910b57cec5SDimitry Andric ///
5920b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
5930b57cec5SDimitry Andric ///
5940b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
5950b57cec5SDimitry Andric ///
5960b57cec5SDimitry Andric /// \param __a
5970b57cec5SDimitry Andric ///    A 256-bit vector of [8 x float].
5980b57cec5SDimitry Andric /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
5990b57cec5SDimitry Andric ///    contain the value of the parameter. The upper 256 bits are set to zero.
6000b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
6010b57cec5SDimitry Andric _mm512_zextps256_ps512(__m256 __a)
6020b57cec5SDimitry Andric {
6030b57cec5SDimitry Andric   return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
6040b57cec5SDimitry Andric }
6050b57cec5SDimitry Andric 
6060b57cec5SDimitry Andric /// Constructs a 512-bit integer vector from a 128-bit integer vector.
6070b57cec5SDimitry Andric ///    The lower 128 bits contain the value of the source vector. The upper
6080b57cec5SDimitry Andric ///    384 bits are set to zero.
6090b57cec5SDimitry Andric ///
6100b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
6110b57cec5SDimitry Andric ///
6120b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
6130b57cec5SDimitry Andric ///
6140b57cec5SDimitry Andric /// \param __a
6150b57cec5SDimitry Andric ///    A 128-bit integer vector.
6160b57cec5SDimitry Andric /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
6170b57cec5SDimitry Andric ///    the parameter. The upper 384 bits are set to zero.
6180b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
6190b57cec5SDimitry Andric _mm512_zextsi128_si512(__m128i __a)
6200b57cec5SDimitry Andric {
6210b57cec5SDimitry Andric   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
6220b57cec5SDimitry Andric }
6230b57cec5SDimitry Andric 
6240b57cec5SDimitry Andric /// Constructs a 512-bit integer vector from a 256-bit integer vector.
6250b57cec5SDimitry Andric ///    The lower 256 bits contain the value of the source vector. The upper
6260b57cec5SDimitry Andric ///    256 bits are set to zero.
6270b57cec5SDimitry Andric ///
6280b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
6290b57cec5SDimitry Andric ///
6300b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
6310b57cec5SDimitry Andric ///
6320b57cec5SDimitry Andric /// \param __a
6330b57cec5SDimitry Andric ///    A 256-bit integer vector.
6340b57cec5SDimitry Andric /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
6350b57cec5SDimitry Andric ///    the parameter. The upper 256 bits are set to zero.
6360b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
6370b57cec5SDimitry Andric _mm512_zextsi256_si512(__m256i __a)
6380b57cec5SDimitry Andric {
6390b57cec5SDimitry Andric   return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
6400b57cec5SDimitry Andric }
6410b57cec5SDimitry Andric 
6420b57cec5SDimitry Andric /* Bitwise operators */
6430b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
6440b57cec5SDimitry Andric _mm512_and_epi32(__m512i __a, __m512i __b)
6450b57cec5SDimitry Andric {
6460b57cec5SDimitry Andric   return (__m512i)((__v16su)__a & (__v16su)__b);
6470b57cec5SDimitry Andric }
6480b57cec5SDimitry Andric 
6490b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
6500b57cec5SDimitry Andric _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
6510b57cec5SDimitry Andric {
6520b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
6530b57cec5SDimitry Andric                 (__v16si) _mm512_and_epi32(__a, __b),
6540b57cec5SDimitry Andric                 (__v16si) __src);
6550b57cec5SDimitry Andric }
6560b57cec5SDimitry Andric 
6570b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
6580b57cec5SDimitry Andric _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
6590b57cec5SDimitry Andric {
6600b57cec5SDimitry Andric   return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
6610b57cec5SDimitry Andric                                          __k, __a, __b);
6620b57cec5SDimitry Andric }
6630b57cec5SDimitry Andric 
6640b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
6650b57cec5SDimitry Andric _mm512_and_epi64(__m512i __a, __m512i __b)
6660b57cec5SDimitry Andric {
6670b57cec5SDimitry Andric   return (__m512i)((__v8du)__a & (__v8du)__b);
6680b57cec5SDimitry Andric }
6690b57cec5SDimitry Andric 
6700b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
6710b57cec5SDimitry Andric _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
6720b57cec5SDimitry Andric {
6730b57cec5SDimitry Andric     return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
6740b57cec5SDimitry Andric                 (__v8di) _mm512_and_epi64(__a, __b),
6750b57cec5SDimitry Andric                 (__v8di) __src);
6760b57cec5SDimitry Andric }
6770b57cec5SDimitry Andric 
6780b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
6790b57cec5SDimitry Andric _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
6800b57cec5SDimitry Andric {
6810b57cec5SDimitry Andric   return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
6820b57cec5SDimitry Andric                                          __k, __a, __b);
6830b57cec5SDimitry Andric }
6840b57cec5SDimitry Andric 
6850b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
6860b57cec5SDimitry Andric _mm512_andnot_si512 (__m512i __A, __m512i __B)
6870b57cec5SDimitry Andric {
6880b57cec5SDimitry Andric   return (__m512i)(~(__v8du)__A & (__v8du)__B);
6890b57cec5SDimitry Andric }
6900b57cec5SDimitry Andric 
6910b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
6920b57cec5SDimitry Andric _mm512_andnot_epi32 (__m512i __A, __m512i __B)
6930b57cec5SDimitry Andric {
6940b57cec5SDimitry Andric   return (__m512i)(~(__v16su)__A & (__v16su)__B);
6950b57cec5SDimitry Andric }
6960b57cec5SDimitry Andric 
6970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
6980b57cec5SDimitry Andric _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6990b57cec5SDimitry Andric {
7000b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
7010b57cec5SDimitry Andric                                          (__v16si)_mm512_andnot_epi32(__A, __B),
7020b57cec5SDimitry Andric                                          (__v16si)__W);
7030b57cec5SDimitry Andric }
7040b57cec5SDimitry Andric 
7050b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7060b57cec5SDimitry Andric _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
7070b57cec5SDimitry Andric {
7080b57cec5SDimitry Andric   return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
7090b57cec5SDimitry Andric                                            __U, __A, __B);
7100b57cec5SDimitry Andric }
7110b57cec5SDimitry Andric 
7120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7130b57cec5SDimitry Andric _mm512_andnot_epi64(__m512i __A, __m512i __B)
7140b57cec5SDimitry Andric {
7150b57cec5SDimitry Andric   return (__m512i)(~(__v8du)__A & (__v8du)__B);
7160b57cec5SDimitry Andric }
7170b57cec5SDimitry Andric 
7180b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7190b57cec5SDimitry Andric _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7200b57cec5SDimitry Andric {
7210b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
7220b57cec5SDimitry Andric                                           (__v8di)_mm512_andnot_epi64(__A, __B),
7230b57cec5SDimitry Andric                                           (__v8di)__W);
7240b57cec5SDimitry Andric }
7250b57cec5SDimitry Andric 
7260b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7270b57cec5SDimitry Andric _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
7280b57cec5SDimitry Andric {
7290b57cec5SDimitry Andric   return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
7300b57cec5SDimitry Andric                                            __U, __A, __B);
7310b57cec5SDimitry Andric }
7320b57cec5SDimitry Andric 
7330b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7340b57cec5SDimitry Andric _mm512_or_epi32(__m512i __a, __m512i __b)
7350b57cec5SDimitry Andric {
7360b57cec5SDimitry Andric   return (__m512i)((__v16su)__a | (__v16su)__b);
7370b57cec5SDimitry Andric }
7380b57cec5SDimitry Andric 
7390b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7400b57cec5SDimitry Andric _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
7410b57cec5SDimitry Andric {
7420b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
7430b57cec5SDimitry Andric                                              (__v16si)_mm512_or_epi32(__a, __b),
7440b57cec5SDimitry Andric                                              (__v16si)__src);
7450b57cec5SDimitry Andric }
7460b57cec5SDimitry Andric 
7470b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7480b57cec5SDimitry Andric _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
7490b57cec5SDimitry Andric {
7500b57cec5SDimitry Andric   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
7510b57cec5SDimitry Andric }
7520b57cec5SDimitry Andric 
7530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7540b57cec5SDimitry Andric _mm512_or_epi64(__m512i __a, __m512i __b)
7550b57cec5SDimitry Andric {
7560b57cec5SDimitry Andric   return (__m512i)((__v8du)__a | (__v8du)__b);
7570b57cec5SDimitry Andric }
7580b57cec5SDimitry Andric 
7590b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7600b57cec5SDimitry Andric _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
7610b57cec5SDimitry Andric {
7620b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
7630b57cec5SDimitry Andric                                              (__v8di)_mm512_or_epi64(__a, __b),
7640b57cec5SDimitry Andric                                              (__v8di)__src);
7650b57cec5SDimitry Andric }
7660b57cec5SDimitry Andric 
7670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7680b57cec5SDimitry Andric _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
7690b57cec5SDimitry Andric {
7700b57cec5SDimitry Andric   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
7710b57cec5SDimitry Andric }
7720b57cec5SDimitry Andric 
7730b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7740b57cec5SDimitry Andric _mm512_xor_epi32(__m512i __a, __m512i __b)
7750b57cec5SDimitry Andric {
7760b57cec5SDimitry Andric   return (__m512i)((__v16su)__a ^ (__v16su)__b);
7770b57cec5SDimitry Andric }
7780b57cec5SDimitry Andric 
7790b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7800b57cec5SDimitry Andric _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
7810b57cec5SDimitry Andric {
7820b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
7830b57cec5SDimitry Andric                                             (__v16si)_mm512_xor_epi32(__a, __b),
7840b57cec5SDimitry Andric                                             (__v16si)__src);
7850b57cec5SDimitry Andric }
7860b57cec5SDimitry Andric 
7870b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7880b57cec5SDimitry Andric _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
7890b57cec5SDimitry Andric {
7900b57cec5SDimitry Andric   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
7910b57cec5SDimitry Andric }
7920b57cec5SDimitry Andric 
7930b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
7940b57cec5SDimitry Andric _mm512_xor_epi64(__m512i __a, __m512i __b)
7950b57cec5SDimitry Andric {
7960b57cec5SDimitry Andric   return (__m512i)((__v8du)__a ^ (__v8du)__b);
7970b57cec5SDimitry Andric }
7980b57cec5SDimitry Andric 
7990b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
8000b57cec5SDimitry Andric _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
8010b57cec5SDimitry Andric {
8020b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
8030b57cec5SDimitry Andric                                              (__v8di)_mm512_xor_epi64(__a, __b),
8040b57cec5SDimitry Andric                                              (__v8di)__src);
8050b57cec5SDimitry Andric }
8060b57cec5SDimitry Andric 
8070b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
8080b57cec5SDimitry Andric _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
8090b57cec5SDimitry Andric {
8100b57cec5SDimitry Andric   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
8110b57cec5SDimitry Andric }
8120b57cec5SDimitry Andric 
8130b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
8140b57cec5SDimitry Andric _mm512_and_si512(__m512i __a, __m512i __b)
8150b57cec5SDimitry Andric {
8160b57cec5SDimitry Andric   return (__m512i)((__v8du)__a & (__v8du)__b);
8170b57cec5SDimitry Andric }
8180b57cec5SDimitry Andric 
8190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
8200b57cec5SDimitry Andric _mm512_or_si512(__m512i __a, __m512i __b)
8210b57cec5SDimitry Andric {
8220b57cec5SDimitry Andric   return (__m512i)((__v8du)__a | (__v8du)__b);
8230b57cec5SDimitry Andric }
8240b57cec5SDimitry Andric 
8250b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
8260b57cec5SDimitry Andric _mm512_xor_si512(__m512i __a, __m512i __b)
8270b57cec5SDimitry Andric {
8280b57cec5SDimitry Andric   return (__m512i)((__v8du)__a ^ (__v8du)__b);
8290b57cec5SDimitry Andric }
8300b57cec5SDimitry Andric 
8310b57cec5SDimitry Andric /* Arithmetic */
8320b57cec5SDimitry Andric 
8330b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
8340b57cec5SDimitry Andric _mm512_add_pd(__m512d __a, __m512d __b)
8350b57cec5SDimitry Andric {
8360b57cec5SDimitry Andric   return (__m512d)((__v8df)__a + (__v8df)__b);
8370b57cec5SDimitry Andric }
8380b57cec5SDimitry Andric 
8390b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
8400b57cec5SDimitry Andric _mm512_add_ps(__m512 __a, __m512 __b)
8410b57cec5SDimitry Andric {
8420b57cec5SDimitry Andric   return (__m512)((__v16sf)__a + (__v16sf)__b);
8430b57cec5SDimitry Andric }
8440b57cec5SDimitry Andric 
8450b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
8460b57cec5SDimitry Andric _mm512_mul_pd(__m512d __a, __m512d __b)
8470b57cec5SDimitry Andric {
8480b57cec5SDimitry Andric   return (__m512d)((__v8df)__a * (__v8df)__b);
8490b57cec5SDimitry Andric }
8500b57cec5SDimitry Andric 
8510b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
8520b57cec5SDimitry Andric _mm512_mul_ps(__m512 __a, __m512 __b)
8530b57cec5SDimitry Andric {
8540b57cec5SDimitry Andric   return (__m512)((__v16sf)__a * (__v16sf)__b);
8550b57cec5SDimitry Andric }
8560b57cec5SDimitry Andric 
8570b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
8580b57cec5SDimitry Andric _mm512_sub_pd(__m512d __a, __m512d __b)
8590b57cec5SDimitry Andric {
8600b57cec5SDimitry Andric   return (__m512d)((__v8df)__a - (__v8df)__b);
8610b57cec5SDimitry Andric }
8620b57cec5SDimitry Andric 
8630b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
8640b57cec5SDimitry Andric _mm512_sub_ps(__m512 __a, __m512 __b)
8650b57cec5SDimitry Andric {
8660b57cec5SDimitry Andric   return (__m512)((__v16sf)__a - (__v16sf)__b);
8670b57cec5SDimitry Andric }
8680b57cec5SDimitry Andric 
8690b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
8700b57cec5SDimitry Andric _mm512_add_epi64 (__m512i __A, __m512i __B)
8710b57cec5SDimitry Andric {
8720b57cec5SDimitry Andric   return (__m512i) ((__v8du) __A + (__v8du) __B);
8730b57cec5SDimitry Andric }
8740b57cec5SDimitry Andric 
8750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
8760b57cec5SDimitry Andric _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
8770b57cec5SDimitry Andric {
8780b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
8790b57cec5SDimitry Andric                                              (__v8di)_mm512_add_epi64(__A, __B),
8800b57cec5SDimitry Andric                                              (__v8di)__W);
8810b57cec5SDimitry Andric }
8820b57cec5SDimitry Andric 
8830b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
8840b57cec5SDimitry Andric _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
8850b57cec5SDimitry Andric {
8860b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
8870b57cec5SDimitry Andric                                              (__v8di)_mm512_add_epi64(__A, __B),
8880b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512());
8890b57cec5SDimitry Andric }
8900b57cec5SDimitry Andric 
8910b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
8920b57cec5SDimitry Andric _mm512_sub_epi64 (__m512i __A, __m512i __B)
8930b57cec5SDimitry Andric {
8940b57cec5SDimitry Andric   return (__m512i) ((__v8du) __A - (__v8du) __B);
8950b57cec5SDimitry Andric }
8960b57cec5SDimitry Andric 
8970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
8980b57cec5SDimitry Andric _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
8990b57cec5SDimitry Andric {
9000b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
9010b57cec5SDimitry Andric                                              (__v8di)_mm512_sub_epi64(__A, __B),
9020b57cec5SDimitry Andric                                              (__v8di)__W);
9030b57cec5SDimitry Andric }
9040b57cec5SDimitry Andric 
9050b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
9060b57cec5SDimitry Andric _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
9070b57cec5SDimitry Andric {
9080b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
9090b57cec5SDimitry Andric                                              (__v8di)_mm512_sub_epi64(__A, __B),
9100b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512());
9110b57cec5SDimitry Andric }
9120b57cec5SDimitry Andric 
9130b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
9140b57cec5SDimitry Andric _mm512_add_epi32 (__m512i __A, __m512i __B)
9150b57cec5SDimitry Andric {
9160b57cec5SDimitry Andric   return (__m512i) ((__v16su) __A + (__v16su) __B);
9170b57cec5SDimitry Andric }
9180b57cec5SDimitry Andric 
9190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
9200b57cec5SDimitry Andric _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
9210b57cec5SDimitry Andric {
9220b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
9230b57cec5SDimitry Andric                                              (__v16si)_mm512_add_epi32(__A, __B),
9240b57cec5SDimitry Andric                                              (__v16si)__W);
9250b57cec5SDimitry Andric }
9260b57cec5SDimitry Andric 
9270b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
9280b57cec5SDimitry Andric _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
9290b57cec5SDimitry Andric {
9300b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
9310b57cec5SDimitry Andric                                              (__v16si)_mm512_add_epi32(__A, __B),
9320b57cec5SDimitry Andric                                              (__v16si)_mm512_setzero_si512());
9330b57cec5SDimitry Andric }
9340b57cec5SDimitry Andric 
9350b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
9360b57cec5SDimitry Andric _mm512_sub_epi32 (__m512i __A, __m512i __B)
9370b57cec5SDimitry Andric {
9380b57cec5SDimitry Andric   return (__m512i) ((__v16su) __A - (__v16su) __B);
9390b57cec5SDimitry Andric }
9400b57cec5SDimitry Andric 
9410b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
9420b57cec5SDimitry Andric _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
9430b57cec5SDimitry Andric {
9440b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
9450b57cec5SDimitry Andric                                              (__v16si)_mm512_sub_epi32(__A, __B),
9460b57cec5SDimitry Andric                                              (__v16si)__W);
9470b57cec5SDimitry Andric }
9480b57cec5SDimitry Andric 
9490b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
9500b57cec5SDimitry Andric _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
9510b57cec5SDimitry Andric {
9520b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
9530b57cec5SDimitry Andric                                              (__v16si)_mm512_sub_epi32(__A, __B),
9540b57cec5SDimitry Andric                                              (__v16si)_mm512_setzero_si512());
9550b57cec5SDimitry Andric }
9560b57cec5SDimitry Andric 
9570b57cec5SDimitry Andric #define _mm512_max_round_pd(A, B, R) \
958349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
959349cc55cSDimitry Andric                                     (__v8df)(__m512d)(B), (int)(R)))
9600b57cec5SDimitry Andric 
9610b57cec5SDimitry Andric #define _mm512_mask_max_round_pd(W, U, A, B, R) \
962349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
9630b57cec5SDimitry Andric                                    (__v8df)_mm512_max_round_pd((A), (B), (R)), \
964349cc55cSDimitry Andric                                    (__v8df)(W)))
9650b57cec5SDimitry Andric 
9660b57cec5SDimitry Andric #define _mm512_maskz_max_round_pd(U, A, B, R) \
967349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
9680b57cec5SDimitry Andric                                    (__v8df)_mm512_max_round_pd((A), (B), (R)), \
969349cc55cSDimitry Andric                                    (__v8df)_mm512_setzero_pd()))
9700b57cec5SDimitry Andric 
9710b57cec5SDimitry Andric static  __inline__ __m512d __DEFAULT_FN_ATTRS512
9720b57cec5SDimitry Andric _mm512_max_pd(__m512d __A, __m512d __B)
9730b57cec5SDimitry Andric {
9740b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
9750b57cec5SDimitry Andric                                            _MM_FROUND_CUR_DIRECTION);
9760b57cec5SDimitry Andric }
9770b57cec5SDimitry Andric 
9780b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
9790b57cec5SDimitry Andric _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
9800b57cec5SDimitry Andric {
9810b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512(__U,
9820b57cec5SDimitry Andric                                               (__v8df)_mm512_max_pd(__A, __B),
9830b57cec5SDimitry Andric                                               (__v8df)__W);
9840b57cec5SDimitry Andric }
9850b57cec5SDimitry Andric 
9860b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
9870b57cec5SDimitry Andric _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
9880b57cec5SDimitry Andric {
9890b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512(__U,
9900b57cec5SDimitry Andric                                               (__v8df)_mm512_max_pd(__A, __B),
9910b57cec5SDimitry Andric                                               (__v8df)_mm512_setzero_pd());
9920b57cec5SDimitry Andric }
9930b57cec5SDimitry Andric 
9940b57cec5SDimitry Andric #define _mm512_max_round_ps(A, B, R) \
995349cc55cSDimitry Andric   ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
996349cc55cSDimitry Andric                                    (__v16sf)(__m512)(B), (int)(R)))
9970b57cec5SDimitry Andric 
9980b57cec5SDimitry Andric #define _mm512_mask_max_round_ps(W, U, A, B, R) \
999349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
10000b57cec5SDimitry Andric                                   (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
1001349cc55cSDimitry Andric                                   (__v16sf)(W)))
10020b57cec5SDimitry Andric 
10030b57cec5SDimitry Andric #define _mm512_maskz_max_round_ps(U, A, B, R) \
1004349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
10050b57cec5SDimitry Andric                                   (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
1006349cc55cSDimitry Andric                                   (__v16sf)_mm512_setzero_ps()))
10070b57cec5SDimitry Andric 
10080b57cec5SDimitry Andric static  __inline__ __m512 __DEFAULT_FN_ATTRS512
10090b57cec5SDimitry Andric _mm512_max_ps(__m512 __A, __m512 __B)
10100b57cec5SDimitry Andric {
10110b57cec5SDimitry Andric   return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
10120b57cec5SDimitry Andric                                           _MM_FROUND_CUR_DIRECTION);
10130b57cec5SDimitry Andric }
10140b57cec5SDimitry Andric 
10150b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
10160b57cec5SDimitry Andric _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10170b57cec5SDimitry Andric {
10180b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512(__U,
10190b57cec5SDimitry Andric                                              (__v16sf)_mm512_max_ps(__A, __B),
10200b57cec5SDimitry Andric                                              (__v16sf)__W);
10210b57cec5SDimitry Andric }
10220b57cec5SDimitry Andric 
10230b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
10240b57cec5SDimitry Andric _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
10250b57cec5SDimitry Andric {
10260b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512(__U,
10270b57cec5SDimitry Andric                                              (__v16sf)_mm512_max_ps(__A, __B),
10280b57cec5SDimitry Andric                                              (__v16sf)_mm512_setzero_ps());
10290b57cec5SDimitry Andric }
10300b57cec5SDimitry Andric 
10310b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
10320b57cec5SDimitry Andric _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
10330b57cec5SDimitry Andric   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
10340b57cec5SDimitry Andric                 (__v4sf) __B,
10350b57cec5SDimitry Andric                 (__v4sf) __W,
10360b57cec5SDimitry Andric                 (__mmask8) __U,
10370b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
10380b57cec5SDimitry Andric }
10390b57cec5SDimitry Andric 
10400b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
10410b57cec5SDimitry Andric _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
10420b57cec5SDimitry Andric   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
10430b57cec5SDimitry Andric                 (__v4sf) __B,
10440b57cec5SDimitry Andric                 (__v4sf)  _mm_setzero_ps (),
10450b57cec5SDimitry Andric                 (__mmask8) __U,
10460b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
10470b57cec5SDimitry Andric }
10480b57cec5SDimitry Andric 
10490b57cec5SDimitry Andric #define _mm_max_round_ss(A, B, R) \
1050349cc55cSDimitry Andric   ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
10510b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
10520b57cec5SDimitry Andric                                            (__v4sf)_mm_setzero_ps(), \
1053349cc55cSDimitry Andric                                            (__mmask8)-1, (int)(R)))
10540b57cec5SDimitry Andric 
10550b57cec5SDimitry Andric #define _mm_mask_max_round_ss(W, U, A, B, R) \
1056349cc55cSDimitry Andric   ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
10570b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
10580b57cec5SDimitry Andric                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
1059349cc55cSDimitry Andric                                            (int)(R)))
10600b57cec5SDimitry Andric 
10610b57cec5SDimitry Andric #define _mm_maskz_max_round_ss(U, A, B, R) \
1062349cc55cSDimitry Andric   ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
10630b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
10640b57cec5SDimitry Andric                                            (__v4sf)_mm_setzero_ps(), \
1065349cc55cSDimitry Andric                                            (__mmask8)(U), (int)(R)))
10660b57cec5SDimitry Andric 
10670b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
10680b57cec5SDimitry Andric _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
10690b57cec5SDimitry Andric   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
10700b57cec5SDimitry Andric                 (__v2df) __B,
10710b57cec5SDimitry Andric                 (__v2df) __W,
10720b57cec5SDimitry Andric                 (__mmask8) __U,
10730b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
10740b57cec5SDimitry Andric }
10750b57cec5SDimitry Andric 
10760b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
10770b57cec5SDimitry Andric _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
10780b57cec5SDimitry Andric   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
10790b57cec5SDimitry Andric                 (__v2df) __B,
10800b57cec5SDimitry Andric                 (__v2df)  _mm_setzero_pd (),
10810b57cec5SDimitry Andric                 (__mmask8) __U,
10820b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
10830b57cec5SDimitry Andric }
10840b57cec5SDimitry Andric 
10850b57cec5SDimitry Andric #define _mm_max_round_sd(A, B, R) \
1086349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
10870b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
10880b57cec5SDimitry Andric                                             (__v2df)_mm_setzero_pd(), \
1089349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
10900b57cec5SDimitry Andric 
10910b57cec5SDimitry Andric #define _mm_mask_max_round_sd(W, U, A, B, R) \
1092349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
10930b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
10940b57cec5SDimitry Andric                                             (__v2df)(__m128d)(W), \
1095349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
10960b57cec5SDimitry Andric 
10970b57cec5SDimitry Andric #define _mm_maskz_max_round_sd(U, A, B, R) \
1098349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
10990b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
11000b57cec5SDimitry Andric                                             (__v2df)_mm_setzero_pd(), \
1101349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
11020b57cec5SDimitry Andric 
11030b57cec5SDimitry Andric static __inline __m512i
11040b57cec5SDimitry Andric __DEFAULT_FN_ATTRS512
11050b57cec5SDimitry Andric _mm512_max_epi32(__m512i __A, __m512i __B)
11060b57cec5SDimitry Andric {
110704eeddc0SDimitry Andric   return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
11080b57cec5SDimitry Andric }
11090b57cec5SDimitry Andric 
11100b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
11110b57cec5SDimitry Andric _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11120b57cec5SDimitry Andric {
11130b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
11140b57cec5SDimitry Andric                                             (__v16si)_mm512_max_epi32(__A, __B),
11150b57cec5SDimitry Andric                                             (__v16si)__W);
11160b57cec5SDimitry Andric }
11170b57cec5SDimitry Andric 
11180b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
11190b57cec5SDimitry Andric _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
11200b57cec5SDimitry Andric {
11210b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
11220b57cec5SDimitry Andric                                             (__v16si)_mm512_max_epi32(__A, __B),
11230b57cec5SDimitry Andric                                             (__v16si)_mm512_setzero_si512());
11240b57cec5SDimitry Andric }
11250b57cec5SDimitry Andric 
11260b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
11270b57cec5SDimitry Andric _mm512_max_epu32(__m512i __A, __m512i __B)
11280b57cec5SDimitry Andric {
112904eeddc0SDimitry Andric   return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
11300b57cec5SDimitry Andric }
11310b57cec5SDimitry Andric 
11320b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
11330b57cec5SDimitry Andric _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11340b57cec5SDimitry Andric {
11350b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
11360b57cec5SDimitry Andric                                             (__v16si)_mm512_max_epu32(__A, __B),
11370b57cec5SDimitry Andric                                             (__v16si)__W);
11380b57cec5SDimitry Andric }
11390b57cec5SDimitry Andric 
11400b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
11410b57cec5SDimitry Andric _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11420b57cec5SDimitry Andric {
11430b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
11440b57cec5SDimitry Andric                                             (__v16si)_mm512_max_epu32(__A, __B),
11450b57cec5SDimitry Andric                                             (__v16si)_mm512_setzero_si512());
11460b57cec5SDimitry Andric }
11470b57cec5SDimitry Andric 
11480b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
11490b57cec5SDimitry Andric _mm512_max_epi64(__m512i __A, __m512i __B)
11500b57cec5SDimitry Andric {
115104eeddc0SDimitry Andric   return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
11520b57cec5SDimitry Andric }
11530b57cec5SDimitry Andric 
11540b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
11550b57cec5SDimitry Andric _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11560b57cec5SDimitry Andric {
11570b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
11580b57cec5SDimitry Andric                                              (__v8di)_mm512_max_epi64(__A, __B),
11590b57cec5SDimitry Andric                                              (__v8di)__W);
11600b57cec5SDimitry Andric }
11610b57cec5SDimitry Andric 
11620b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
11630b57cec5SDimitry Andric _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
11640b57cec5SDimitry Andric {
11650b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
11660b57cec5SDimitry Andric                                              (__v8di)_mm512_max_epi64(__A, __B),
11670b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512());
11680b57cec5SDimitry Andric }
11690b57cec5SDimitry Andric 
11700b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
11710b57cec5SDimitry Andric _mm512_max_epu64(__m512i __A, __m512i __B)
11720b57cec5SDimitry Andric {
117304eeddc0SDimitry Andric   return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
11740b57cec5SDimitry Andric }
11750b57cec5SDimitry Andric 
11760b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
11770b57cec5SDimitry Andric _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11780b57cec5SDimitry Andric {
11790b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
11800b57cec5SDimitry Andric                                              (__v8di)_mm512_max_epu64(__A, __B),
11810b57cec5SDimitry Andric                                              (__v8di)__W);
11820b57cec5SDimitry Andric }
11830b57cec5SDimitry Andric 
11840b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
11850b57cec5SDimitry Andric _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
11860b57cec5SDimitry Andric {
11870b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
11880b57cec5SDimitry Andric                                              (__v8di)_mm512_max_epu64(__A, __B),
11890b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512());
11900b57cec5SDimitry Andric }
11910b57cec5SDimitry Andric 
11920b57cec5SDimitry Andric #define _mm512_min_round_pd(A, B, R) \
1193349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1194349cc55cSDimitry Andric                                     (__v8df)(__m512d)(B), (int)(R)))
11950b57cec5SDimitry Andric 
11960b57cec5SDimitry Andric #define _mm512_mask_min_round_pd(W, U, A, B, R) \
1197349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
11980b57cec5SDimitry Andric                                    (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1199349cc55cSDimitry Andric                                    (__v8df)(W)))
12000b57cec5SDimitry Andric 
12010b57cec5SDimitry Andric #define _mm512_maskz_min_round_pd(U, A, B, R) \
1202349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
12030b57cec5SDimitry Andric                                    (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1204349cc55cSDimitry Andric                                    (__v8df)_mm512_setzero_pd()))
12050b57cec5SDimitry Andric 
12060b57cec5SDimitry Andric static  __inline__ __m512d __DEFAULT_FN_ATTRS512
12070b57cec5SDimitry Andric _mm512_min_pd(__m512d __A, __m512d __B)
12080b57cec5SDimitry Andric {
12090b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
12100b57cec5SDimitry Andric                                            _MM_FROUND_CUR_DIRECTION);
12110b57cec5SDimitry Andric }
12120b57cec5SDimitry Andric 
12130b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
12140b57cec5SDimitry Andric _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12150b57cec5SDimitry Andric {
12160b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512(__U,
12170b57cec5SDimitry Andric                                               (__v8df)_mm512_min_pd(__A, __B),
12180b57cec5SDimitry Andric                                               (__v8df)__W);
12190b57cec5SDimitry Andric }
12200b57cec5SDimitry Andric 
12210b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
12220b57cec5SDimitry Andric _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
12230b57cec5SDimitry Andric {
12240b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512(__U,
12250b57cec5SDimitry Andric                                               (__v8df)_mm512_min_pd(__A, __B),
12260b57cec5SDimitry Andric                                               (__v8df)_mm512_setzero_pd());
12270b57cec5SDimitry Andric }
12280b57cec5SDimitry Andric 
12290b57cec5SDimitry Andric #define _mm512_min_round_ps(A, B, R) \
1230349cc55cSDimitry Andric   ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1231349cc55cSDimitry Andric                                    (__v16sf)(__m512)(B), (int)(R)))
12320b57cec5SDimitry Andric 
12330b57cec5SDimitry Andric #define _mm512_mask_min_round_ps(W, U, A, B, R) \
1234349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
12350b57cec5SDimitry Andric                                   (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1236349cc55cSDimitry Andric                                   (__v16sf)(W)))
12370b57cec5SDimitry Andric 
12380b57cec5SDimitry Andric #define _mm512_maskz_min_round_ps(U, A, B, R) \
1239349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
12400b57cec5SDimitry Andric                                   (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1241349cc55cSDimitry Andric                                   (__v16sf)_mm512_setzero_ps()))
12420b57cec5SDimitry Andric 
12430b57cec5SDimitry Andric static  __inline__ __m512 __DEFAULT_FN_ATTRS512
12440b57cec5SDimitry Andric _mm512_min_ps(__m512 __A, __m512 __B)
12450b57cec5SDimitry Andric {
12460b57cec5SDimitry Andric   return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
12470b57cec5SDimitry Andric                                           _MM_FROUND_CUR_DIRECTION);
12480b57cec5SDimitry Andric }
12490b57cec5SDimitry Andric 
12500b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
12510b57cec5SDimitry Andric _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12520b57cec5SDimitry Andric {
12530b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512(__U,
12540b57cec5SDimitry Andric                                              (__v16sf)_mm512_min_ps(__A, __B),
12550b57cec5SDimitry Andric                                              (__v16sf)__W);
12560b57cec5SDimitry Andric }
12570b57cec5SDimitry Andric 
12580b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
12590b57cec5SDimitry Andric _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
12600b57cec5SDimitry Andric {
12610b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512(__U,
12620b57cec5SDimitry Andric                                              (__v16sf)_mm512_min_ps(__A, __B),
12630b57cec5SDimitry Andric                                              (__v16sf)_mm512_setzero_ps());
12640b57cec5SDimitry Andric }
12650b57cec5SDimitry Andric 
12660b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
12670b57cec5SDimitry Andric _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
12680b57cec5SDimitry Andric   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
12690b57cec5SDimitry Andric                 (__v4sf) __B,
12700b57cec5SDimitry Andric                 (__v4sf) __W,
12710b57cec5SDimitry Andric                 (__mmask8) __U,
12720b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
12730b57cec5SDimitry Andric }
12740b57cec5SDimitry Andric 
12750b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
12760b57cec5SDimitry Andric _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
12770b57cec5SDimitry Andric   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
12780b57cec5SDimitry Andric                 (__v4sf) __B,
12790b57cec5SDimitry Andric                 (__v4sf)  _mm_setzero_ps (),
12800b57cec5SDimitry Andric                 (__mmask8) __U,
12810b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
12820b57cec5SDimitry Andric }
12830b57cec5SDimitry Andric 
12840b57cec5SDimitry Andric #define _mm_min_round_ss(A, B, R) \
1285349cc55cSDimitry Andric   ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
12860b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
12870b57cec5SDimitry Andric                                            (__v4sf)_mm_setzero_ps(), \
1288349cc55cSDimitry Andric                                            (__mmask8)-1, (int)(R)))
12890b57cec5SDimitry Andric 
12900b57cec5SDimitry Andric #define _mm_mask_min_round_ss(W, U, A, B, R) \
1291349cc55cSDimitry Andric   ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
12920b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
12930b57cec5SDimitry Andric                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
1294349cc55cSDimitry Andric                                            (int)(R)))
12950b57cec5SDimitry Andric 
12960b57cec5SDimitry Andric #define _mm_maskz_min_round_ss(U, A, B, R) \
1297349cc55cSDimitry Andric   ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
12980b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
12990b57cec5SDimitry Andric                                            (__v4sf)_mm_setzero_ps(), \
1300349cc55cSDimitry Andric                                            (__mmask8)(U), (int)(R)))
13010b57cec5SDimitry Andric 
13020b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
13030b57cec5SDimitry Andric _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
13040b57cec5SDimitry Andric   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
13050b57cec5SDimitry Andric                 (__v2df) __B,
13060b57cec5SDimitry Andric                 (__v2df) __W,
13070b57cec5SDimitry Andric                 (__mmask8) __U,
13080b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
13090b57cec5SDimitry Andric }
13100b57cec5SDimitry Andric 
13110b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
13120b57cec5SDimitry Andric _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
13130b57cec5SDimitry Andric   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
13140b57cec5SDimitry Andric                 (__v2df) __B,
13150b57cec5SDimitry Andric                 (__v2df)  _mm_setzero_pd (),
13160b57cec5SDimitry Andric                 (__mmask8) __U,
13170b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
13180b57cec5SDimitry Andric }
13190b57cec5SDimitry Andric 
13200b57cec5SDimitry Andric #define _mm_min_round_sd(A, B, R) \
1321349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
13220b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
13230b57cec5SDimitry Andric                                             (__v2df)_mm_setzero_pd(), \
1324349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
13250b57cec5SDimitry Andric 
13260b57cec5SDimitry Andric #define _mm_mask_min_round_sd(W, U, A, B, R) \
1327349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
13280b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
13290b57cec5SDimitry Andric                                             (__v2df)(__m128d)(W), \
1330349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
13310b57cec5SDimitry Andric 
13320b57cec5SDimitry Andric #define _mm_maskz_min_round_sd(U, A, B, R) \
1333349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
13340b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
13350b57cec5SDimitry Andric                                             (__v2df)_mm_setzero_pd(), \
1336349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
13370b57cec5SDimitry Andric 
13380b57cec5SDimitry Andric static __inline __m512i
13390b57cec5SDimitry Andric __DEFAULT_FN_ATTRS512
13400b57cec5SDimitry Andric _mm512_min_epi32(__m512i __A, __m512i __B)
13410b57cec5SDimitry Andric {
134204eeddc0SDimitry Andric   return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
13430b57cec5SDimitry Andric }
13440b57cec5SDimitry Andric 
13450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
13460b57cec5SDimitry Andric _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
13470b57cec5SDimitry Andric {
13480b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
13490b57cec5SDimitry Andric                                             (__v16si)_mm512_min_epi32(__A, __B),
13500b57cec5SDimitry Andric                                             (__v16si)__W);
13510b57cec5SDimitry Andric }
13520b57cec5SDimitry Andric 
13530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
13540b57cec5SDimitry Andric _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
13550b57cec5SDimitry Andric {
13560b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
13570b57cec5SDimitry Andric                                             (__v16si)_mm512_min_epi32(__A, __B),
13580b57cec5SDimitry Andric                                             (__v16si)_mm512_setzero_si512());
13590b57cec5SDimitry Andric }
13600b57cec5SDimitry Andric 
13610b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
13620b57cec5SDimitry Andric _mm512_min_epu32(__m512i __A, __m512i __B)
13630b57cec5SDimitry Andric {
136404eeddc0SDimitry Andric   return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
13650b57cec5SDimitry Andric }
13660b57cec5SDimitry Andric 
13670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
13680b57cec5SDimitry Andric _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
13690b57cec5SDimitry Andric {
13700b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
13710b57cec5SDimitry Andric                                             (__v16si)_mm512_min_epu32(__A, __B),
13720b57cec5SDimitry Andric                                             (__v16si)__W);
13730b57cec5SDimitry Andric }
13740b57cec5SDimitry Andric 
13750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
13760b57cec5SDimitry Andric _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
13770b57cec5SDimitry Andric {
13780b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
13790b57cec5SDimitry Andric                                             (__v16si)_mm512_min_epu32(__A, __B),
13800b57cec5SDimitry Andric                                             (__v16si)_mm512_setzero_si512());
13810b57cec5SDimitry Andric }
13820b57cec5SDimitry Andric 
13830b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
13840b57cec5SDimitry Andric _mm512_min_epi64(__m512i __A, __m512i __B)
13850b57cec5SDimitry Andric {
138604eeddc0SDimitry Andric   return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
13870b57cec5SDimitry Andric }
13880b57cec5SDimitry Andric 
13890b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
13900b57cec5SDimitry Andric _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
13910b57cec5SDimitry Andric {
13920b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
13930b57cec5SDimitry Andric                                              (__v8di)_mm512_min_epi64(__A, __B),
13940b57cec5SDimitry Andric                                              (__v8di)__W);
13950b57cec5SDimitry Andric }
13960b57cec5SDimitry Andric 
13970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
13980b57cec5SDimitry Andric _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
13990b57cec5SDimitry Andric {
14000b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
14010b57cec5SDimitry Andric                                              (__v8di)_mm512_min_epi64(__A, __B),
14020b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512());
14030b57cec5SDimitry Andric }
14040b57cec5SDimitry Andric 
14050b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
14060b57cec5SDimitry Andric _mm512_min_epu64(__m512i __A, __m512i __B)
14070b57cec5SDimitry Andric {
140804eeddc0SDimitry Andric   return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
14090b57cec5SDimitry Andric }
14100b57cec5SDimitry Andric 
14110b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
14120b57cec5SDimitry Andric _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
14130b57cec5SDimitry Andric {
14140b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
14150b57cec5SDimitry Andric                                              (__v8di)_mm512_min_epu64(__A, __B),
14160b57cec5SDimitry Andric                                              (__v8di)__W);
14170b57cec5SDimitry Andric }
14180b57cec5SDimitry Andric 
14190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
14200b57cec5SDimitry Andric _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
14210b57cec5SDimitry Andric {
14220b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
14230b57cec5SDimitry Andric                                              (__v8di)_mm512_min_epu64(__A, __B),
14240b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512());
14250b57cec5SDimitry Andric }
14260b57cec5SDimitry Andric 
14270b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
14280b57cec5SDimitry Andric _mm512_mul_epi32(__m512i __X, __m512i __Y)
14290b57cec5SDimitry Andric {
14300b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
14310b57cec5SDimitry Andric }
14320b57cec5SDimitry Andric 
14330b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
14340b57cec5SDimitry Andric _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
14350b57cec5SDimitry Andric {
14360b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
14370b57cec5SDimitry Andric                                              (__v8di)_mm512_mul_epi32(__X, __Y),
14380b57cec5SDimitry Andric                                              (__v8di)__W);
14390b57cec5SDimitry Andric }
14400b57cec5SDimitry Andric 
14410b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
14420b57cec5SDimitry Andric _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
14430b57cec5SDimitry Andric {
14440b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
14450b57cec5SDimitry Andric                                              (__v8di)_mm512_mul_epi32(__X, __Y),
14460b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512 ());
14470b57cec5SDimitry Andric }
14480b57cec5SDimitry Andric 
14490b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
14500b57cec5SDimitry Andric _mm512_mul_epu32(__m512i __X, __m512i __Y)
14510b57cec5SDimitry Andric {
14520b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
14530b57cec5SDimitry Andric }
14540b57cec5SDimitry Andric 
14550b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
14560b57cec5SDimitry Andric _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
14570b57cec5SDimitry Andric {
14580b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
14590b57cec5SDimitry Andric                                              (__v8di)_mm512_mul_epu32(__X, __Y),
14600b57cec5SDimitry Andric                                              (__v8di)__W);
14610b57cec5SDimitry Andric }
14620b57cec5SDimitry Andric 
14630b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
14640b57cec5SDimitry Andric _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
14650b57cec5SDimitry Andric {
14660b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
14670b57cec5SDimitry Andric                                              (__v8di)_mm512_mul_epu32(__X, __Y),
14680b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512 ());
14690b57cec5SDimitry Andric }
14700b57cec5SDimitry Andric 
14710b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
14720b57cec5SDimitry Andric _mm512_mullo_epi32 (__m512i __A, __m512i __B)
14730b57cec5SDimitry Andric {
14740b57cec5SDimitry Andric   return (__m512i) ((__v16su) __A * (__v16su) __B);
14750b57cec5SDimitry Andric }
14760b57cec5SDimitry Andric 
14770b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
14780b57cec5SDimitry Andric _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
14790b57cec5SDimitry Andric {
14800b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
14810b57cec5SDimitry Andric                                              (__v16si)_mm512_mullo_epi32(__A, __B),
14820b57cec5SDimitry Andric                                              (__v16si)_mm512_setzero_si512());
14830b57cec5SDimitry Andric }
14840b57cec5SDimitry Andric 
14850b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
14860b57cec5SDimitry Andric _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
14870b57cec5SDimitry Andric {
14880b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
14890b57cec5SDimitry Andric                                              (__v16si)_mm512_mullo_epi32(__A, __B),
14900b57cec5SDimitry Andric                                              (__v16si)__W);
14910b57cec5SDimitry Andric }
14920b57cec5SDimitry Andric 
14930b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
14940b57cec5SDimitry Andric _mm512_mullox_epi64 (__m512i __A, __m512i __B) {
14950b57cec5SDimitry Andric   return (__m512i) ((__v8du) __A * (__v8du) __B);
14960b57cec5SDimitry Andric }
14970b57cec5SDimitry Andric 
14980b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
14990b57cec5SDimitry Andric _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
15000b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
15010b57cec5SDimitry Andric                                              (__v8di)_mm512_mullox_epi64(__A, __B),
15020b57cec5SDimitry Andric                                              (__v8di)__W);
15030b57cec5SDimitry Andric }
15040b57cec5SDimitry Andric 
15050b57cec5SDimitry Andric #define _mm512_sqrt_round_pd(A, R) \
1506349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
15070b57cec5SDimitry Andric 
15080b57cec5SDimitry Andric #define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1509349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
15100b57cec5SDimitry Andric                                        (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1511349cc55cSDimitry Andric                                        (__v8df)(__m512d)(W)))
15120b57cec5SDimitry Andric 
15130b57cec5SDimitry Andric #define _mm512_maskz_sqrt_round_pd(U, A, R) \
1514349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
15150b57cec5SDimitry Andric                                        (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1516349cc55cSDimitry Andric                                        (__v8df)_mm512_setzero_pd()))
15170b57cec5SDimitry Andric 
15180b57cec5SDimitry Andric static  __inline__ __m512d __DEFAULT_FN_ATTRS512
15190b57cec5SDimitry Andric _mm512_sqrt_pd(__m512d __A)
15200b57cec5SDimitry Andric {
15210b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
15220b57cec5SDimitry Andric                                            _MM_FROUND_CUR_DIRECTION);
15230b57cec5SDimitry Andric }
15240b57cec5SDimitry Andric 
15250b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
15260b57cec5SDimitry Andric _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
15270b57cec5SDimitry Andric {
15280b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512(__U,
15290b57cec5SDimitry Andric                                               (__v8df)_mm512_sqrt_pd(__A),
15300b57cec5SDimitry Andric                                               (__v8df)__W);
15310b57cec5SDimitry Andric }
15320b57cec5SDimitry Andric 
15330b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
15340b57cec5SDimitry Andric _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
15350b57cec5SDimitry Andric {
15360b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512(__U,
15370b57cec5SDimitry Andric                                               (__v8df)_mm512_sqrt_pd(__A),
15380b57cec5SDimitry Andric                                               (__v8df)_mm512_setzero_pd());
15390b57cec5SDimitry Andric }
15400b57cec5SDimitry Andric 
15410b57cec5SDimitry Andric #define _mm512_sqrt_round_ps(A, R) \
1542349cc55cSDimitry Andric   ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
15430b57cec5SDimitry Andric 
15440b57cec5SDimitry Andric #define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1545349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
15460b57cec5SDimitry Andric                                       (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1547349cc55cSDimitry Andric                                       (__v16sf)(__m512)(W)))
15480b57cec5SDimitry Andric 
15490b57cec5SDimitry Andric #define _mm512_maskz_sqrt_round_ps(U, A, R) \
1550349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
15510b57cec5SDimitry Andric                                       (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1552349cc55cSDimitry Andric                                       (__v16sf)_mm512_setzero_ps()))
15530b57cec5SDimitry Andric 
15540b57cec5SDimitry Andric static  __inline__ __m512 __DEFAULT_FN_ATTRS512
15550b57cec5SDimitry Andric _mm512_sqrt_ps(__m512 __A)
15560b57cec5SDimitry Andric {
15570b57cec5SDimitry Andric   return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
15580b57cec5SDimitry Andric                                           _MM_FROUND_CUR_DIRECTION);
15590b57cec5SDimitry Andric }
15600b57cec5SDimitry Andric 
15610b57cec5SDimitry Andric static  __inline__ __m512 __DEFAULT_FN_ATTRS512
15620b57cec5SDimitry Andric _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
15630b57cec5SDimitry Andric {
15640b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512(__U,
15650b57cec5SDimitry Andric                                              (__v16sf)_mm512_sqrt_ps(__A),
15660b57cec5SDimitry Andric                                              (__v16sf)__W);
15670b57cec5SDimitry Andric }
15680b57cec5SDimitry Andric 
15690b57cec5SDimitry Andric static  __inline__ __m512 __DEFAULT_FN_ATTRS512
15700b57cec5SDimitry Andric _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
15710b57cec5SDimitry Andric {
15720b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512(__U,
15730b57cec5SDimitry Andric                                              (__v16sf)_mm512_sqrt_ps(__A),
15740b57cec5SDimitry Andric                                              (__v16sf)_mm512_setzero_ps());
15750b57cec5SDimitry Andric }
15760b57cec5SDimitry Andric 
15770b57cec5SDimitry Andric static  __inline__ __m512d __DEFAULT_FN_ATTRS512
15780b57cec5SDimitry Andric _mm512_rsqrt14_pd(__m512d __A)
15790b57cec5SDimitry Andric {
15800b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
15810b57cec5SDimitry Andric                  (__v8df)
15820b57cec5SDimitry Andric                  _mm512_setzero_pd (),
15830b57cec5SDimitry Andric                  (__mmask8) -1);}
15840b57cec5SDimitry Andric 
15850b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
15860b57cec5SDimitry Andric _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
15870b57cec5SDimitry Andric {
15880b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
15890b57cec5SDimitry Andric                   (__v8df) __W,
15900b57cec5SDimitry Andric                   (__mmask8) __U);
15910b57cec5SDimitry Andric }
15920b57cec5SDimitry Andric 
15930b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
15940b57cec5SDimitry Andric _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
15950b57cec5SDimitry Andric {
15960b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
15970b57cec5SDimitry Andric                   (__v8df)
15980b57cec5SDimitry Andric                   _mm512_setzero_pd (),
15990b57cec5SDimitry Andric                   (__mmask8) __U);
16000b57cec5SDimitry Andric }
16010b57cec5SDimitry Andric 
16020b57cec5SDimitry Andric static  __inline__ __m512 __DEFAULT_FN_ATTRS512
16030b57cec5SDimitry Andric _mm512_rsqrt14_ps(__m512 __A)
16040b57cec5SDimitry Andric {
16050b57cec5SDimitry Andric   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
16060b57cec5SDimitry Andric                 (__v16sf)
16070b57cec5SDimitry Andric                 _mm512_setzero_ps (),
16080b57cec5SDimitry Andric                 (__mmask16) -1);
16090b57cec5SDimitry Andric }
16100b57cec5SDimitry Andric 
16110b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
16120b57cec5SDimitry Andric _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
16130b57cec5SDimitry Andric {
16140b57cec5SDimitry Andric   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
16150b57cec5SDimitry Andric                  (__v16sf) __W,
16160b57cec5SDimitry Andric                  (__mmask16) __U);
16170b57cec5SDimitry Andric }
16180b57cec5SDimitry Andric 
16190b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
16200b57cec5SDimitry Andric _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
16210b57cec5SDimitry Andric {
16220b57cec5SDimitry Andric   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
16230b57cec5SDimitry Andric                  (__v16sf)
16240b57cec5SDimitry Andric                  _mm512_setzero_ps (),
16250b57cec5SDimitry Andric                  (__mmask16) __U);
16260b57cec5SDimitry Andric }
16270b57cec5SDimitry Andric 
16280b57cec5SDimitry Andric static  __inline__ __m128 __DEFAULT_FN_ATTRS128
16290b57cec5SDimitry Andric _mm_rsqrt14_ss(__m128 __A, __m128 __B)
16300b57cec5SDimitry Andric {
16310b57cec5SDimitry Andric   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
16320b57cec5SDimitry Andric              (__v4sf) __B,
16330b57cec5SDimitry Andric              (__v4sf)
16340b57cec5SDimitry Andric              _mm_setzero_ps (),
16350b57cec5SDimitry Andric              (__mmask8) -1);
16360b57cec5SDimitry Andric }
16370b57cec5SDimitry Andric 
16380b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
16390b57cec5SDimitry Andric _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
16400b57cec5SDimitry Andric {
16410b57cec5SDimitry Andric  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
16420b57cec5SDimitry Andric           (__v4sf) __B,
16430b57cec5SDimitry Andric           (__v4sf) __W,
16440b57cec5SDimitry Andric           (__mmask8) __U);
16450b57cec5SDimitry Andric }
16460b57cec5SDimitry Andric 
16470b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
16480b57cec5SDimitry Andric _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
16490b57cec5SDimitry Andric {
16500b57cec5SDimitry Andric  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
16510b57cec5SDimitry Andric           (__v4sf) __B,
16520b57cec5SDimitry Andric           (__v4sf) _mm_setzero_ps (),
16530b57cec5SDimitry Andric           (__mmask8) __U);
16540b57cec5SDimitry Andric }
16550b57cec5SDimitry Andric 
16560b57cec5SDimitry Andric static  __inline__ __m128d __DEFAULT_FN_ATTRS128
16570b57cec5SDimitry Andric _mm_rsqrt14_sd(__m128d __A, __m128d __B)
16580b57cec5SDimitry Andric {
16590b57cec5SDimitry Andric   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
16600b57cec5SDimitry Andric               (__v2df) __B,
16610b57cec5SDimitry Andric               (__v2df)
16620b57cec5SDimitry Andric               _mm_setzero_pd (),
16630b57cec5SDimitry Andric               (__mmask8) -1);
16640b57cec5SDimitry Andric }
16650b57cec5SDimitry Andric 
16660b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
16670b57cec5SDimitry Andric _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
16680b57cec5SDimitry Andric {
16690b57cec5SDimitry Andric  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
16700b57cec5SDimitry Andric           (__v2df) __B,
16710b57cec5SDimitry Andric           (__v2df) __W,
16720b57cec5SDimitry Andric           (__mmask8) __U);
16730b57cec5SDimitry Andric }
16740b57cec5SDimitry Andric 
16750b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
16760b57cec5SDimitry Andric _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
16770b57cec5SDimitry Andric {
16780b57cec5SDimitry Andric  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
16790b57cec5SDimitry Andric           (__v2df) __B,
16800b57cec5SDimitry Andric           (__v2df) _mm_setzero_pd (),
16810b57cec5SDimitry Andric           (__mmask8) __U);
16820b57cec5SDimitry Andric }
16830b57cec5SDimitry Andric 
16840b57cec5SDimitry Andric static  __inline__ __m512d __DEFAULT_FN_ATTRS512
16850b57cec5SDimitry Andric _mm512_rcp14_pd(__m512d __A)
16860b57cec5SDimitry Andric {
16870b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
16880b57cec5SDimitry Andric                (__v8df)
16890b57cec5SDimitry Andric                _mm512_setzero_pd (),
16900b57cec5SDimitry Andric                (__mmask8) -1);
16910b57cec5SDimitry Andric }
16920b57cec5SDimitry Andric 
16930b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
16940b57cec5SDimitry Andric _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
16950b57cec5SDimitry Andric {
16960b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
16970b57cec5SDimitry Andric                 (__v8df) __W,
16980b57cec5SDimitry Andric                 (__mmask8) __U);
16990b57cec5SDimitry Andric }
17000b57cec5SDimitry Andric 
17010b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
17020b57cec5SDimitry Andric _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
17030b57cec5SDimitry Andric {
17040b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
17050b57cec5SDimitry Andric                 (__v8df)
17060b57cec5SDimitry Andric                 _mm512_setzero_pd (),
17070b57cec5SDimitry Andric                 (__mmask8) __U);
17080b57cec5SDimitry Andric }
17090b57cec5SDimitry Andric 
17100b57cec5SDimitry Andric static  __inline__ __m512 __DEFAULT_FN_ATTRS512
17110b57cec5SDimitry Andric _mm512_rcp14_ps(__m512 __A)
17120b57cec5SDimitry Andric {
17130b57cec5SDimitry Andric   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
17140b57cec5SDimitry Andric               (__v16sf)
17150b57cec5SDimitry Andric               _mm512_setzero_ps (),
17160b57cec5SDimitry Andric               (__mmask16) -1);
17170b57cec5SDimitry Andric }
17180b57cec5SDimitry Andric 
17190b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
17200b57cec5SDimitry Andric _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
17210b57cec5SDimitry Andric {
17220b57cec5SDimitry Andric   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
17230b57cec5SDimitry Andric                    (__v16sf) __W,
17240b57cec5SDimitry Andric                    (__mmask16) __U);
17250b57cec5SDimitry Andric }
17260b57cec5SDimitry Andric 
17270b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
17280b57cec5SDimitry Andric _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
17290b57cec5SDimitry Andric {
17300b57cec5SDimitry Andric   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
17310b57cec5SDimitry Andric                    (__v16sf)
17320b57cec5SDimitry Andric                    _mm512_setzero_ps (),
17330b57cec5SDimitry Andric                    (__mmask16) __U);
17340b57cec5SDimitry Andric }
17350b57cec5SDimitry Andric 
17360b57cec5SDimitry Andric static  __inline__ __m128 __DEFAULT_FN_ATTRS128
17370b57cec5SDimitry Andric _mm_rcp14_ss(__m128 __A, __m128 __B)
17380b57cec5SDimitry Andric {
17390b57cec5SDimitry Andric   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
17400b57cec5SDimitry Andric                  (__v4sf) __B,
17410b57cec5SDimitry Andric                  (__v4sf)
17420b57cec5SDimitry Andric                  _mm_setzero_ps (),
17430b57cec5SDimitry Andric                  (__mmask8) -1);
17440b57cec5SDimitry Andric }
17450b57cec5SDimitry Andric 
17460b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
17470b57cec5SDimitry Andric _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
17480b57cec5SDimitry Andric {
17490b57cec5SDimitry Andric  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
17500b57cec5SDimitry Andric           (__v4sf) __B,
17510b57cec5SDimitry Andric           (__v4sf) __W,
17520b57cec5SDimitry Andric           (__mmask8) __U);
17530b57cec5SDimitry Andric }
17540b57cec5SDimitry Andric 
17550b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
17560b57cec5SDimitry Andric _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
17570b57cec5SDimitry Andric {
17580b57cec5SDimitry Andric  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
17590b57cec5SDimitry Andric           (__v4sf) __B,
17600b57cec5SDimitry Andric           (__v4sf) _mm_setzero_ps (),
17610b57cec5SDimitry Andric           (__mmask8) __U);
17620b57cec5SDimitry Andric }
17630b57cec5SDimitry Andric 
17640b57cec5SDimitry Andric static  __inline__ __m128d __DEFAULT_FN_ATTRS128
17650b57cec5SDimitry Andric _mm_rcp14_sd(__m128d __A, __m128d __B)
17660b57cec5SDimitry Andric {
17670b57cec5SDimitry Andric   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
17680b57cec5SDimitry Andric             (__v2df) __B,
17690b57cec5SDimitry Andric             (__v2df)
17700b57cec5SDimitry Andric             _mm_setzero_pd (),
17710b57cec5SDimitry Andric             (__mmask8) -1);
17720b57cec5SDimitry Andric }
17730b57cec5SDimitry Andric 
17740b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
17750b57cec5SDimitry Andric _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
17760b57cec5SDimitry Andric {
17770b57cec5SDimitry Andric  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
17780b57cec5SDimitry Andric           (__v2df) __B,
17790b57cec5SDimitry Andric           (__v2df) __W,
17800b57cec5SDimitry Andric           (__mmask8) __U);
17810b57cec5SDimitry Andric }
17820b57cec5SDimitry Andric 
17830b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
17840b57cec5SDimitry Andric _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
17850b57cec5SDimitry Andric {
17860b57cec5SDimitry Andric  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
17870b57cec5SDimitry Andric           (__v2df) __B,
17880b57cec5SDimitry Andric           (__v2df) _mm_setzero_pd (),
17890b57cec5SDimitry Andric           (__mmask8) __U);
17900b57cec5SDimitry Andric }
17910b57cec5SDimitry Andric 
17920b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
17930b57cec5SDimitry Andric _mm512_floor_ps(__m512 __A)
17940b57cec5SDimitry Andric {
17950b57cec5SDimitry Andric   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
17960b57cec5SDimitry Andric                                                   _MM_FROUND_FLOOR,
179781ad6265SDimitry Andric                                                   (__v16sf) __A, (unsigned short)-1,
17980b57cec5SDimitry Andric                                                   _MM_FROUND_CUR_DIRECTION);
17990b57cec5SDimitry Andric }
18000b57cec5SDimitry Andric 
18010b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
18020b57cec5SDimitry Andric _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
18030b57cec5SDimitry Andric {
18040b57cec5SDimitry Andric   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
18050b57cec5SDimitry Andric                    _MM_FROUND_FLOOR,
18060b57cec5SDimitry Andric                    (__v16sf) __W, __U,
18070b57cec5SDimitry Andric                    _MM_FROUND_CUR_DIRECTION);
18080b57cec5SDimitry Andric }
18090b57cec5SDimitry Andric 
18100b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
18110b57cec5SDimitry Andric _mm512_floor_pd(__m512d __A)
18120b57cec5SDimitry Andric {
18130b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
18140b57cec5SDimitry Andric                                                    _MM_FROUND_FLOOR,
181581ad6265SDimitry Andric                                                    (__v8df) __A, (unsigned char)-1,
18160b57cec5SDimitry Andric                                                    _MM_FROUND_CUR_DIRECTION);
18170b57cec5SDimitry Andric }
18180b57cec5SDimitry Andric 
18190b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
18200b57cec5SDimitry Andric _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
18210b57cec5SDimitry Andric {
18220b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
18230b57cec5SDimitry Andric                 _MM_FROUND_FLOOR,
18240b57cec5SDimitry Andric                 (__v8df) __W, __U,
18250b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
18260b57cec5SDimitry Andric }
18270b57cec5SDimitry Andric 
18280b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
18290b57cec5SDimitry Andric _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
18300b57cec5SDimitry Andric {
18310b57cec5SDimitry Andric   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
18320b57cec5SDimitry Andric                    _MM_FROUND_CEIL,
18330b57cec5SDimitry Andric                    (__v16sf) __W, __U,
18340b57cec5SDimitry Andric                    _MM_FROUND_CUR_DIRECTION);
18350b57cec5SDimitry Andric }
18360b57cec5SDimitry Andric 
18370b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
18380b57cec5SDimitry Andric _mm512_ceil_ps(__m512 __A)
18390b57cec5SDimitry Andric {
18400b57cec5SDimitry Andric   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
18410b57cec5SDimitry Andric                                                   _MM_FROUND_CEIL,
184281ad6265SDimitry Andric                                                   (__v16sf) __A, (unsigned short)-1,
18430b57cec5SDimitry Andric                                                   _MM_FROUND_CUR_DIRECTION);
18440b57cec5SDimitry Andric }
18450b57cec5SDimitry Andric 
18460b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
18470b57cec5SDimitry Andric _mm512_ceil_pd(__m512d __A)
18480b57cec5SDimitry Andric {
18490b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
18500b57cec5SDimitry Andric                                                    _MM_FROUND_CEIL,
185181ad6265SDimitry Andric                                                    (__v8df) __A, (unsigned char)-1,
18520b57cec5SDimitry Andric                                                    _MM_FROUND_CUR_DIRECTION);
18530b57cec5SDimitry Andric }
18540b57cec5SDimitry Andric 
18550b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
18560b57cec5SDimitry Andric _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
18570b57cec5SDimitry Andric {
18580b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
18590b57cec5SDimitry Andric                 _MM_FROUND_CEIL,
18600b57cec5SDimitry Andric                 (__v8df) __W, __U,
18610b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
18620b57cec5SDimitry Andric }
18630b57cec5SDimitry Andric 
18640b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
18650b57cec5SDimitry Andric _mm512_abs_epi64(__m512i __A)
18660b57cec5SDimitry Andric {
186704eeddc0SDimitry Andric   return (__m512i)__builtin_elementwise_abs((__v8di)__A);
18680b57cec5SDimitry Andric }
18690b57cec5SDimitry Andric 
18700b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
18710b57cec5SDimitry Andric _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
18720b57cec5SDimitry Andric {
18730b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
18740b57cec5SDimitry Andric                                              (__v8di)_mm512_abs_epi64(__A),
18750b57cec5SDimitry Andric                                              (__v8di)__W);
18760b57cec5SDimitry Andric }
18770b57cec5SDimitry Andric 
18780b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
18790b57cec5SDimitry Andric _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
18800b57cec5SDimitry Andric {
18810b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
18820b57cec5SDimitry Andric                                              (__v8di)_mm512_abs_epi64(__A),
18830b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512());
18840b57cec5SDimitry Andric }
18850b57cec5SDimitry Andric 
18860b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
18870b57cec5SDimitry Andric _mm512_abs_epi32(__m512i __A)
18880b57cec5SDimitry Andric {
188904eeddc0SDimitry Andric   return (__m512i)__builtin_elementwise_abs((__v16si) __A);
18900b57cec5SDimitry Andric }
18910b57cec5SDimitry Andric 
18920b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
18930b57cec5SDimitry Andric _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
18940b57cec5SDimitry Andric {
18950b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
18960b57cec5SDimitry Andric                                              (__v16si)_mm512_abs_epi32(__A),
18970b57cec5SDimitry Andric                                              (__v16si)__W);
18980b57cec5SDimitry Andric }
18990b57cec5SDimitry Andric 
19000b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
19010b57cec5SDimitry Andric _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
19020b57cec5SDimitry Andric {
19030b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
19040b57cec5SDimitry Andric                                              (__v16si)_mm512_abs_epi32(__A),
19050b57cec5SDimitry Andric                                              (__v16si)_mm512_setzero_si512());
19060b57cec5SDimitry Andric }
19070b57cec5SDimitry Andric 
19080b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
19090b57cec5SDimitry Andric _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
19100b57cec5SDimitry Andric   __A = _mm_add_ss(__A, __B);
19110b57cec5SDimitry Andric   return __builtin_ia32_selectss_128(__U, __A, __W);
19120b57cec5SDimitry Andric }
19130b57cec5SDimitry Andric 
19140b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
19150b57cec5SDimitry Andric _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
19160b57cec5SDimitry Andric   __A = _mm_add_ss(__A, __B);
19170b57cec5SDimitry Andric   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
19180b57cec5SDimitry Andric }
19190b57cec5SDimitry Andric 
19200b57cec5SDimitry Andric #define _mm_add_round_ss(A, B, R) \
1921349cc55cSDimitry Andric   ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
19220b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
19230b57cec5SDimitry Andric                                            (__v4sf)_mm_setzero_ps(), \
1924349cc55cSDimitry Andric                                            (__mmask8)-1, (int)(R)))
19250b57cec5SDimitry Andric 
19260b57cec5SDimitry Andric #define _mm_mask_add_round_ss(W, U, A, B, R) \
1927349cc55cSDimitry Andric   ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
19280b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
19290b57cec5SDimitry Andric                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
1930349cc55cSDimitry Andric                                            (int)(R)))
19310b57cec5SDimitry Andric 
19320b57cec5SDimitry Andric #define _mm_maskz_add_round_ss(U, A, B, R) \
1933349cc55cSDimitry Andric   ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
19340b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
19350b57cec5SDimitry Andric                                            (__v4sf)_mm_setzero_ps(), \
1936349cc55cSDimitry Andric                                            (__mmask8)(U), (int)(R)))
19370b57cec5SDimitry Andric 
19380b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
19390b57cec5SDimitry Andric _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
19400b57cec5SDimitry Andric   __A = _mm_add_sd(__A, __B);
19410b57cec5SDimitry Andric   return __builtin_ia32_selectsd_128(__U, __A, __W);
19420b57cec5SDimitry Andric }
19430b57cec5SDimitry Andric 
19440b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
19450b57cec5SDimitry Andric _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
19460b57cec5SDimitry Andric   __A = _mm_add_sd(__A, __B);
19470b57cec5SDimitry Andric   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
19480b57cec5SDimitry Andric }
19490b57cec5SDimitry Andric #define _mm_add_round_sd(A, B, R) \
1950349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
19510b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
19520b57cec5SDimitry Andric                                             (__v2df)_mm_setzero_pd(), \
1953349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
19540b57cec5SDimitry Andric 
19550b57cec5SDimitry Andric #define _mm_mask_add_round_sd(W, U, A, B, R) \
1956349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
19570b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
19580b57cec5SDimitry Andric                                             (__v2df)(__m128d)(W), \
1959349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
19600b57cec5SDimitry Andric 
19610b57cec5SDimitry Andric #define _mm_maskz_add_round_sd(U, A, B, R) \
1962349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
19630b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
19640b57cec5SDimitry Andric                                             (__v2df)_mm_setzero_pd(), \
1965349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
19660b57cec5SDimitry Andric 
19670b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
19680b57cec5SDimitry Andric _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
19690b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
19700b57cec5SDimitry Andric                                               (__v8df)_mm512_add_pd(__A, __B),
19710b57cec5SDimitry Andric                                               (__v8df)__W);
19720b57cec5SDimitry Andric }
19730b57cec5SDimitry Andric 
19740b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
19750b57cec5SDimitry Andric _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
19760b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
19770b57cec5SDimitry Andric                                               (__v8df)_mm512_add_pd(__A, __B),
19780b57cec5SDimitry Andric                                               (__v8df)_mm512_setzero_pd());
19790b57cec5SDimitry Andric }
19800b57cec5SDimitry Andric 
19810b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
19820b57cec5SDimitry Andric _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
19830b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
19840b57cec5SDimitry Andric                                              (__v16sf)_mm512_add_ps(__A, __B),
19850b57cec5SDimitry Andric                                              (__v16sf)__W);
19860b57cec5SDimitry Andric }
19870b57cec5SDimitry Andric 
19880b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
19890b57cec5SDimitry Andric _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
19900b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
19910b57cec5SDimitry Andric                                              (__v16sf)_mm512_add_ps(__A, __B),
19920b57cec5SDimitry Andric                                              (__v16sf)_mm512_setzero_ps());
19930b57cec5SDimitry Andric }
19940b57cec5SDimitry Andric 
19950b57cec5SDimitry Andric #define _mm512_add_round_pd(A, B, R) \
1996349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1997349cc55cSDimitry Andric                                     (__v8df)(__m512d)(B), (int)(R)))
19980b57cec5SDimitry Andric 
19990b57cec5SDimitry Andric #define _mm512_mask_add_round_pd(W, U, A, B, R) \
2000349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
20010b57cec5SDimitry Andric                                    (__v8df)_mm512_add_round_pd((A), (B), (R)), \
2002349cc55cSDimitry Andric                                    (__v8df)(__m512d)(W)))
20030b57cec5SDimitry Andric 
20040b57cec5SDimitry Andric #define _mm512_maskz_add_round_pd(U, A, B, R) \
2005349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
20060b57cec5SDimitry Andric                                    (__v8df)_mm512_add_round_pd((A), (B), (R)), \
2007349cc55cSDimitry Andric                                    (__v8df)_mm512_setzero_pd()))
20080b57cec5SDimitry Andric 
20090b57cec5SDimitry Andric #define _mm512_add_round_ps(A, B, R) \
2010349cc55cSDimitry Andric   ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
2011349cc55cSDimitry Andric                                    (__v16sf)(__m512)(B), (int)(R)))
20120b57cec5SDimitry Andric 
20130b57cec5SDimitry Andric #define _mm512_mask_add_round_ps(W, U, A, B, R) \
2014349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
20150b57cec5SDimitry Andric                                   (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2016349cc55cSDimitry Andric                                   (__v16sf)(__m512)(W)))
20170b57cec5SDimitry Andric 
20180b57cec5SDimitry Andric #define _mm512_maskz_add_round_ps(U, A, B, R) \
2019349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
20200b57cec5SDimitry Andric                                   (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2021349cc55cSDimitry Andric                                   (__v16sf)_mm512_setzero_ps()))
20220b57cec5SDimitry Andric 
20230b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
20240b57cec5SDimitry Andric _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
20250b57cec5SDimitry Andric   __A = _mm_sub_ss(__A, __B);
20260b57cec5SDimitry Andric   return __builtin_ia32_selectss_128(__U, __A, __W);
20270b57cec5SDimitry Andric }
20280b57cec5SDimitry Andric 
20290b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
20300b57cec5SDimitry Andric _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
20310b57cec5SDimitry Andric   __A = _mm_sub_ss(__A, __B);
20320b57cec5SDimitry Andric   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
20330b57cec5SDimitry Andric }
20340b57cec5SDimitry Andric #define _mm_sub_round_ss(A, B, R) \
2035349cc55cSDimitry Andric   ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
20360b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
20370b57cec5SDimitry Andric                                            (__v4sf)_mm_setzero_ps(), \
2038349cc55cSDimitry Andric                                            (__mmask8)-1, (int)(R)))
20390b57cec5SDimitry Andric 
20400b57cec5SDimitry Andric #define _mm_mask_sub_round_ss(W, U, A, B, R) \
2041349cc55cSDimitry Andric   ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
20420b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
20430b57cec5SDimitry Andric                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
2044349cc55cSDimitry Andric                                            (int)(R)))
20450b57cec5SDimitry Andric 
20460b57cec5SDimitry Andric #define _mm_maskz_sub_round_ss(U, A, B, R) \
2047349cc55cSDimitry Andric   ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
20480b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
20490b57cec5SDimitry Andric                                            (__v4sf)_mm_setzero_ps(), \
2050349cc55cSDimitry Andric                                            (__mmask8)(U), (int)(R)))
20510b57cec5SDimitry Andric 
20520b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
20530b57cec5SDimitry Andric _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
20540b57cec5SDimitry Andric   __A = _mm_sub_sd(__A, __B);
20550b57cec5SDimitry Andric   return __builtin_ia32_selectsd_128(__U, __A, __W);
20560b57cec5SDimitry Andric }
20570b57cec5SDimitry Andric 
20580b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
20590b57cec5SDimitry Andric _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
20600b57cec5SDimitry Andric   __A = _mm_sub_sd(__A, __B);
20610b57cec5SDimitry Andric   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
20620b57cec5SDimitry Andric }
20630b57cec5SDimitry Andric 
20640b57cec5SDimitry Andric #define _mm_sub_round_sd(A, B, R) \
2065349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
20660b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
20670b57cec5SDimitry Andric                                             (__v2df)_mm_setzero_pd(), \
2068349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
20690b57cec5SDimitry Andric 
20700b57cec5SDimitry Andric #define _mm_mask_sub_round_sd(W, U, A, B, R) \
2071349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
20720b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
20730b57cec5SDimitry Andric                                             (__v2df)(__m128d)(W), \
2074349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
20750b57cec5SDimitry Andric 
20760b57cec5SDimitry Andric #define _mm_maskz_sub_round_sd(U, A, B, R) \
2077349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
20780b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
20790b57cec5SDimitry Andric                                             (__v2df)_mm_setzero_pd(), \
2080349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
20810b57cec5SDimitry Andric 
20820b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
20830b57cec5SDimitry Andric _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
20840b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
20850b57cec5SDimitry Andric                                               (__v8df)_mm512_sub_pd(__A, __B),
20860b57cec5SDimitry Andric                                               (__v8df)__W);
20870b57cec5SDimitry Andric }
20880b57cec5SDimitry Andric 
20890b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
20900b57cec5SDimitry Andric _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
20910b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
20920b57cec5SDimitry Andric                                               (__v8df)_mm512_sub_pd(__A, __B),
20930b57cec5SDimitry Andric                                               (__v8df)_mm512_setzero_pd());
20940b57cec5SDimitry Andric }
20950b57cec5SDimitry Andric 
20960b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
20970b57cec5SDimitry Andric _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
20980b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
20990b57cec5SDimitry Andric                                              (__v16sf)_mm512_sub_ps(__A, __B),
21000b57cec5SDimitry Andric                                              (__v16sf)__W);
21010b57cec5SDimitry Andric }
21020b57cec5SDimitry Andric 
21030b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
21040b57cec5SDimitry Andric _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
21050b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
21060b57cec5SDimitry Andric                                              (__v16sf)_mm512_sub_ps(__A, __B),
21070b57cec5SDimitry Andric                                              (__v16sf)_mm512_setzero_ps());
21080b57cec5SDimitry Andric }
21090b57cec5SDimitry Andric 
21100b57cec5SDimitry Andric #define _mm512_sub_round_pd(A, B, R) \
2111349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2112349cc55cSDimitry Andric                                     (__v8df)(__m512d)(B), (int)(R)))
21130b57cec5SDimitry Andric 
21140b57cec5SDimitry Andric #define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2115349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
21160b57cec5SDimitry Andric                                    (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2117349cc55cSDimitry Andric                                    (__v8df)(__m512d)(W)))
21180b57cec5SDimitry Andric 
21190b57cec5SDimitry Andric #define _mm512_maskz_sub_round_pd(U, A, B, R) \
2120349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
21210b57cec5SDimitry Andric                                    (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2122349cc55cSDimitry Andric                                    (__v8df)_mm512_setzero_pd()))
21230b57cec5SDimitry Andric 
21240b57cec5SDimitry Andric #define _mm512_sub_round_ps(A, B, R) \
2125349cc55cSDimitry Andric   ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2126349cc55cSDimitry Andric                                    (__v16sf)(__m512)(B), (int)(R)))
21270b57cec5SDimitry Andric 
21280b57cec5SDimitry Andric #define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2129349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
21300b57cec5SDimitry Andric                                   (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2131349cc55cSDimitry Andric                                   (__v16sf)(__m512)(W)))
21320b57cec5SDimitry Andric 
21330b57cec5SDimitry Andric #define _mm512_maskz_sub_round_ps(U, A, B, R) \
2134349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
21350b57cec5SDimitry Andric                                   (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2136349cc55cSDimitry Andric                                   (__v16sf)_mm512_setzero_ps()))
21370b57cec5SDimitry Andric 
21380b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
21390b57cec5SDimitry Andric _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
21400b57cec5SDimitry Andric   __A = _mm_mul_ss(__A, __B);
21410b57cec5SDimitry Andric   return __builtin_ia32_selectss_128(__U, __A, __W);
21420b57cec5SDimitry Andric }
21430b57cec5SDimitry Andric 
21440b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
21450b57cec5SDimitry Andric _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
21460b57cec5SDimitry Andric   __A = _mm_mul_ss(__A, __B);
21470b57cec5SDimitry Andric   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
21480b57cec5SDimitry Andric }
21490b57cec5SDimitry Andric #define _mm_mul_round_ss(A, B, R) \
2150349cc55cSDimitry Andric   ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
21510b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
21520b57cec5SDimitry Andric                                            (__v4sf)_mm_setzero_ps(), \
2153349cc55cSDimitry Andric                                            (__mmask8)-1, (int)(R)))
21540b57cec5SDimitry Andric 
21550b57cec5SDimitry Andric #define _mm_mask_mul_round_ss(W, U, A, B, R) \
2156349cc55cSDimitry Andric   ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
21570b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
21580b57cec5SDimitry Andric                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
2159349cc55cSDimitry Andric                                            (int)(R)))
21600b57cec5SDimitry Andric 
21610b57cec5SDimitry Andric #define _mm_maskz_mul_round_ss(U, A, B, R) \
2162349cc55cSDimitry Andric   ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
21630b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
21640b57cec5SDimitry Andric                                            (__v4sf)_mm_setzero_ps(), \
2165349cc55cSDimitry Andric                                            (__mmask8)(U), (int)(R)))
21660b57cec5SDimitry Andric 
21670b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
21680b57cec5SDimitry Andric _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
21690b57cec5SDimitry Andric   __A = _mm_mul_sd(__A, __B);
21700b57cec5SDimitry Andric   return __builtin_ia32_selectsd_128(__U, __A, __W);
21710b57cec5SDimitry Andric }
21720b57cec5SDimitry Andric 
21730b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
21740b57cec5SDimitry Andric _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
21750b57cec5SDimitry Andric   __A = _mm_mul_sd(__A, __B);
21760b57cec5SDimitry Andric   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
21770b57cec5SDimitry Andric }
21780b57cec5SDimitry Andric 
21790b57cec5SDimitry Andric #define _mm_mul_round_sd(A, B, R) \
2180349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
21810b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
21820b57cec5SDimitry Andric                                             (__v2df)_mm_setzero_pd(), \
2183349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
21840b57cec5SDimitry Andric 
21850b57cec5SDimitry Andric #define _mm_mask_mul_round_sd(W, U, A, B, R) \
2186349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
21870b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
21880b57cec5SDimitry Andric                                             (__v2df)(__m128d)(W), \
2189349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
21900b57cec5SDimitry Andric 
21910b57cec5SDimitry Andric #define _mm_maskz_mul_round_sd(U, A, B, R) \
2192349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
21930b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
21940b57cec5SDimitry Andric                                             (__v2df)_mm_setzero_pd(), \
2195349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
21960b57cec5SDimitry Andric 
21970b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
21980b57cec5SDimitry Andric _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
21990b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
22000b57cec5SDimitry Andric                                               (__v8df)_mm512_mul_pd(__A, __B),
22010b57cec5SDimitry Andric                                               (__v8df)__W);
22020b57cec5SDimitry Andric }
22030b57cec5SDimitry Andric 
22040b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
22050b57cec5SDimitry Andric _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
22060b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
22070b57cec5SDimitry Andric                                               (__v8df)_mm512_mul_pd(__A, __B),
22080b57cec5SDimitry Andric                                               (__v8df)_mm512_setzero_pd());
22090b57cec5SDimitry Andric }
22100b57cec5SDimitry Andric 
22110b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
22120b57cec5SDimitry Andric _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
22130b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
22140b57cec5SDimitry Andric                                              (__v16sf)_mm512_mul_ps(__A, __B),
22150b57cec5SDimitry Andric                                              (__v16sf)__W);
22160b57cec5SDimitry Andric }
22170b57cec5SDimitry Andric 
22180b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
22190b57cec5SDimitry Andric _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
22200b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
22210b57cec5SDimitry Andric                                              (__v16sf)_mm512_mul_ps(__A, __B),
22220b57cec5SDimitry Andric                                              (__v16sf)_mm512_setzero_ps());
22230b57cec5SDimitry Andric }
22240b57cec5SDimitry Andric 
22250b57cec5SDimitry Andric #define _mm512_mul_round_pd(A, B, R) \
2226349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2227349cc55cSDimitry Andric                                     (__v8df)(__m512d)(B), (int)(R)))
22280b57cec5SDimitry Andric 
22290b57cec5SDimitry Andric #define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2230349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
22310b57cec5SDimitry Andric                                    (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2232349cc55cSDimitry Andric                                    (__v8df)(__m512d)(W)))
22330b57cec5SDimitry Andric 
22340b57cec5SDimitry Andric #define _mm512_maskz_mul_round_pd(U, A, B, R) \
2235349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
22360b57cec5SDimitry Andric                                    (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2237349cc55cSDimitry Andric                                    (__v8df)_mm512_setzero_pd()))
22380b57cec5SDimitry Andric 
22390b57cec5SDimitry Andric #define _mm512_mul_round_ps(A, B, R) \
2240349cc55cSDimitry Andric   ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2241349cc55cSDimitry Andric                                   (__v16sf)(__m512)(B), (int)(R)))
22420b57cec5SDimitry Andric 
22430b57cec5SDimitry Andric #define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2244349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
22450b57cec5SDimitry Andric                                   (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2246349cc55cSDimitry Andric                                   (__v16sf)(__m512)(W)))
22470b57cec5SDimitry Andric 
22480b57cec5SDimitry Andric #define _mm512_maskz_mul_round_ps(U, A, B, R) \
2249349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
22500b57cec5SDimitry Andric                                   (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2251349cc55cSDimitry Andric                                   (__v16sf)_mm512_setzero_ps()))
22520b57cec5SDimitry Andric 
22530b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
22540b57cec5SDimitry Andric _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
22550b57cec5SDimitry Andric   __A = _mm_div_ss(__A, __B);
22560b57cec5SDimitry Andric   return __builtin_ia32_selectss_128(__U, __A, __W);
22570b57cec5SDimitry Andric }
22580b57cec5SDimitry Andric 
22590b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
22600b57cec5SDimitry Andric _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
22610b57cec5SDimitry Andric   __A = _mm_div_ss(__A, __B);
22620b57cec5SDimitry Andric   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
22630b57cec5SDimitry Andric }
22640b57cec5SDimitry Andric 
22650b57cec5SDimitry Andric #define _mm_div_round_ss(A, B, R) \
2266349cc55cSDimitry Andric   ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
22670b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
22680b57cec5SDimitry Andric                                            (__v4sf)_mm_setzero_ps(), \
2269349cc55cSDimitry Andric                                            (__mmask8)-1, (int)(R)))
22700b57cec5SDimitry Andric 
22710b57cec5SDimitry Andric #define _mm_mask_div_round_ss(W, U, A, B, R) \
2272349cc55cSDimitry Andric   ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
22730b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
22740b57cec5SDimitry Andric                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
2275349cc55cSDimitry Andric                                            (int)(R)))
22760b57cec5SDimitry Andric 
22770b57cec5SDimitry Andric #define _mm_maskz_div_round_ss(U, A, B, R) \
2278349cc55cSDimitry Andric   ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
22790b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
22800b57cec5SDimitry Andric                                            (__v4sf)_mm_setzero_ps(), \
2281349cc55cSDimitry Andric                                            (__mmask8)(U), (int)(R)))
22820b57cec5SDimitry Andric 
22830b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
22840b57cec5SDimitry Andric _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
22850b57cec5SDimitry Andric   __A = _mm_div_sd(__A, __B);
22860b57cec5SDimitry Andric   return __builtin_ia32_selectsd_128(__U, __A, __W);
22870b57cec5SDimitry Andric }
22880b57cec5SDimitry Andric 
22890b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
22900b57cec5SDimitry Andric _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
22910b57cec5SDimitry Andric   __A = _mm_div_sd(__A, __B);
22920b57cec5SDimitry Andric   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
22930b57cec5SDimitry Andric }
22940b57cec5SDimitry Andric 
22950b57cec5SDimitry Andric #define _mm_div_round_sd(A, B, R) \
2296349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
22970b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
22980b57cec5SDimitry Andric                                             (__v2df)_mm_setzero_pd(), \
2299349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
23000b57cec5SDimitry Andric 
23010b57cec5SDimitry Andric #define _mm_mask_div_round_sd(W, U, A, B, R) \
2302349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
23030b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
23040b57cec5SDimitry Andric                                             (__v2df)(__m128d)(W), \
2305349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
23060b57cec5SDimitry Andric 
23070b57cec5SDimitry Andric #define _mm_maskz_div_round_sd(U, A, B, R) \
2308349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
23090b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
23100b57cec5SDimitry Andric                                             (__v2df)_mm_setzero_pd(), \
2311349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
23120b57cec5SDimitry Andric 
23130b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
23140b57cec5SDimitry Andric _mm512_div_pd(__m512d __a, __m512d __b)
23150b57cec5SDimitry Andric {
23160b57cec5SDimitry Andric   return (__m512d)((__v8df)__a/(__v8df)__b);
23170b57cec5SDimitry Andric }
23180b57cec5SDimitry Andric 
23190b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
23200b57cec5SDimitry Andric _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
23210b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
23220b57cec5SDimitry Andric                                               (__v8df)_mm512_div_pd(__A, __B),
23230b57cec5SDimitry Andric                                               (__v8df)__W);
23240b57cec5SDimitry Andric }
23250b57cec5SDimitry Andric 
23260b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
23270b57cec5SDimitry Andric _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
23280b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
23290b57cec5SDimitry Andric                                               (__v8df)_mm512_div_pd(__A, __B),
23300b57cec5SDimitry Andric                                               (__v8df)_mm512_setzero_pd());
23310b57cec5SDimitry Andric }
23320b57cec5SDimitry Andric 
23330b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
23340b57cec5SDimitry Andric _mm512_div_ps(__m512 __a, __m512 __b)
23350b57cec5SDimitry Andric {
23360b57cec5SDimitry Andric   return (__m512)((__v16sf)__a/(__v16sf)__b);
23370b57cec5SDimitry Andric }
23380b57cec5SDimitry Andric 
23390b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
23400b57cec5SDimitry Andric _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
23410b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
23420b57cec5SDimitry Andric                                              (__v16sf)_mm512_div_ps(__A, __B),
23430b57cec5SDimitry Andric                                              (__v16sf)__W);
23440b57cec5SDimitry Andric }
23450b57cec5SDimitry Andric 
23460b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
23470b57cec5SDimitry Andric _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
23480b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
23490b57cec5SDimitry Andric                                              (__v16sf)_mm512_div_ps(__A, __B),
23500b57cec5SDimitry Andric                                              (__v16sf)_mm512_setzero_ps());
23510b57cec5SDimitry Andric }
23520b57cec5SDimitry Andric 
23530b57cec5SDimitry Andric #define _mm512_div_round_pd(A, B, R) \
2354349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2355349cc55cSDimitry Andric                                     (__v8df)(__m512d)(B), (int)(R)))
23560b57cec5SDimitry Andric 
23570b57cec5SDimitry Andric #define _mm512_mask_div_round_pd(W, U, A, B, R) \
2358349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
23590b57cec5SDimitry Andric                                    (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2360349cc55cSDimitry Andric                                    (__v8df)(__m512d)(W)))
23610b57cec5SDimitry Andric 
23620b57cec5SDimitry Andric #define _mm512_maskz_div_round_pd(U, A, B, R) \
2363349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
23640b57cec5SDimitry Andric                                    (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2365349cc55cSDimitry Andric                                    (__v8df)_mm512_setzero_pd()))
23660b57cec5SDimitry Andric 
23670b57cec5SDimitry Andric #define _mm512_div_round_ps(A, B, R) \
2368349cc55cSDimitry Andric   ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2369349cc55cSDimitry Andric                                    (__v16sf)(__m512)(B), (int)(R)))
23700b57cec5SDimitry Andric 
23710b57cec5SDimitry Andric #define _mm512_mask_div_round_ps(W, U, A, B, R) \
2372349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
23730b57cec5SDimitry Andric                                   (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2374349cc55cSDimitry Andric                                   (__v16sf)(__m512)(W)))
23750b57cec5SDimitry Andric 
23760b57cec5SDimitry Andric #define _mm512_maskz_div_round_ps(U, A, B, R) \
2377349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
23780b57cec5SDimitry Andric                                   (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2379349cc55cSDimitry Andric                                   (__v16sf)_mm512_setzero_ps()))
23800b57cec5SDimitry Andric 
23810b57cec5SDimitry Andric #define _mm512_roundscale_ps(A, B) \
2382349cc55cSDimitry Andric   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
23830b57cec5SDimitry Andric                                           (__v16sf)_mm512_undefined_ps(), \
23840b57cec5SDimitry Andric                                           (__mmask16)-1, \
2385349cc55cSDimitry Andric                                           _MM_FROUND_CUR_DIRECTION))
23860b57cec5SDimitry Andric 
23870b57cec5SDimitry Andric #define _mm512_mask_roundscale_ps(A, B, C, imm) \
2388349cc55cSDimitry Andric   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
23890b57cec5SDimitry Andric                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2390349cc55cSDimitry Andric                                          _MM_FROUND_CUR_DIRECTION))
23910b57cec5SDimitry Andric 
23920b57cec5SDimitry Andric #define _mm512_maskz_roundscale_ps(A, B, imm) \
2393349cc55cSDimitry Andric   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
23940b57cec5SDimitry Andric                                           (__v16sf)_mm512_setzero_ps(), \
23950b57cec5SDimitry Andric                                           (__mmask16)(A), \
2396349cc55cSDimitry Andric                                           _MM_FROUND_CUR_DIRECTION))
23970b57cec5SDimitry Andric 
23980b57cec5SDimitry Andric #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2399349cc55cSDimitry Andric   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
24000b57cec5SDimitry Andric                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2401349cc55cSDimitry Andric                                          (int)(R)))
24020b57cec5SDimitry Andric 
24030b57cec5SDimitry Andric #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2404349cc55cSDimitry Andric   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
24050b57cec5SDimitry Andric                                           (__v16sf)_mm512_setzero_ps(), \
2406349cc55cSDimitry Andric                                           (__mmask16)(A), (int)(R)))
24070b57cec5SDimitry Andric 
24080b57cec5SDimitry Andric #define _mm512_roundscale_round_ps(A, imm, R) \
2409349cc55cSDimitry Andric   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
24100b57cec5SDimitry Andric                                           (__v16sf)_mm512_undefined_ps(), \
2411349cc55cSDimitry Andric                                           (__mmask16)-1, (int)(R)))
24120b57cec5SDimitry Andric 
24130b57cec5SDimitry Andric #define _mm512_roundscale_pd(A, B) \
2414349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
24150b57cec5SDimitry Andric                                            (__v8df)_mm512_undefined_pd(), \
24160b57cec5SDimitry Andric                                            (__mmask8)-1, \
2417349cc55cSDimitry Andric                                            _MM_FROUND_CUR_DIRECTION))
24180b57cec5SDimitry Andric 
24190b57cec5SDimitry Andric #define _mm512_mask_roundscale_pd(A, B, C, imm) \
2420349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
24210b57cec5SDimitry Andric                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2422349cc55cSDimitry Andric                                           _MM_FROUND_CUR_DIRECTION))
24230b57cec5SDimitry Andric 
24240b57cec5SDimitry Andric #define _mm512_maskz_roundscale_pd(A, B, imm) \
2425349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
24260b57cec5SDimitry Andric                                            (__v8df)_mm512_setzero_pd(), \
24270b57cec5SDimitry Andric                                            (__mmask8)(A), \
2428349cc55cSDimitry Andric                                            _MM_FROUND_CUR_DIRECTION))
24290b57cec5SDimitry Andric 
24300b57cec5SDimitry Andric #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2431349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
24320b57cec5SDimitry Andric                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2433349cc55cSDimitry Andric                                           (int)(R)))
24340b57cec5SDimitry Andric 
24350b57cec5SDimitry Andric #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2436349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
24370b57cec5SDimitry Andric                                            (__v8df)_mm512_setzero_pd(), \
2438349cc55cSDimitry Andric                                            (__mmask8)(A), (int)(R)))
24390b57cec5SDimitry Andric 
24400b57cec5SDimitry Andric #define _mm512_roundscale_round_pd(A, imm, R) \
2441349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
24420b57cec5SDimitry Andric                                            (__v8df)_mm512_undefined_pd(), \
2443349cc55cSDimitry Andric                                            (__mmask8)-1, (int)(R)))
24440b57cec5SDimitry Andric 
24450b57cec5SDimitry Andric #define _mm512_fmadd_round_pd(A, B, C, R) \
2446349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
24470b57cec5SDimitry Andric                                             (__v8df)(__m512d)(B), \
24480b57cec5SDimitry Andric                                             (__v8df)(__m512d)(C), \
2449349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
24500b57cec5SDimitry Andric 
24510b57cec5SDimitry Andric 
24520b57cec5SDimitry Andric #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2453349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
24540b57cec5SDimitry Andric                                             (__v8df)(__m512d)(B), \
24550b57cec5SDimitry Andric                                             (__v8df)(__m512d)(C), \
2456349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
24570b57cec5SDimitry Andric 
24580b57cec5SDimitry Andric 
24590b57cec5SDimitry Andric #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2460349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
24610b57cec5SDimitry Andric                                              (__v8df)(__m512d)(B), \
24620b57cec5SDimitry Andric                                              (__v8df)(__m512d)(C), \
2463349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
24640b57cec5SDimitry Andric 
24650b57cec5SDimitry Andric 
24660b57cec5SDimitry Andric #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2467349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
24680b57cec5SDimitry Andric                                              (__v8df)(__m512d)(B), \
24690b57cec5SDimitry Andric                                              (__v8df)(__m512d)(C), \
2470349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
24710b57cec5SDimitry Andric 
24720b57cec5SDimitry Andric 
24730b57cec5SDimitry Andric #define _mm512_fmsub_round_pd(A, B, C, R) \
2474349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
24750b57cec5SDimitry Andric                                             (__v8df)(__m512d)(B), \
24760b57cec5SDimitry Andric                                             -(__v8df)(__m512d)(C), \
2477349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
24780b57cec5SDimitry Andric 
24790b57cec5SDimitry Andric 
24800b57cec5SDimitry Andric #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2481349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
24820b57cec5SDimitry Andric                                             (__v8df)(__m512d)(B), \
24830b57cec5SDimitry Andric                                             -(__v8df)(__m512d)(C), \
2484349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
24850b57cec5SDimitry Andric 
24860b57cec5SDimitry Andric 
24870b57cec5SDimitry Andric #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2488349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
24890b57cec5SDimitry Andric                                              (__v8df)(__m512d)(B), \
24900b57cec5SDimitry Andric                                              -(__v8df)(__m512d)(C), \
2491349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
24920b57cec5SDimitry Andric 
24930b57cec5SDimitry Andric 
24940b57cec5SDimitry Andric #define _mm512_fnmadd_round_pd(A, B, C, R) \
2495349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
24960b57cec5SDimitry Andric                                             (__v8df)(__m512d)(B), \
24970b57cec5SDimitry Andric                                             (__v8df)(__m512d)(C), \
2498349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
24990b57cec5SDimitry Andric 
25000b57cec5SDimitry Andric 
25010b57cec5SDimitry Andric #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2502349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
25030b57cec5SDimitry Andric                                              (__v8df)(__m512d)(B), \
25040b57cec5SDimitry Andric                                              (__v8df)(__m512d)(C), \
2505349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
25060b57cec5SDimitry Andric 
25070b57cec5SDimitry Andric 
25080b57cec5SDimitry Andric #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2509349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
25100b57cec5SDimitry Andric                                              (__v8df)(__m512d)(B), \
25110b57cec5SDimitry Andric                                              (__v8df)(__m512d)(C), \
2512349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
25130b57cec5SDimitry Andric 
25140b57cec5SDimitry Andric 
25150b57cec5SDimitry Andric #define _mm512_fnmsub_round_pd(A, B, C, R) \
2516349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
25170b57cec5SDimitry Andric                                             (__v8df)(__m512d)(B), \
25180b57cec5SDimitry Andric                                             -(__v8df)(__m512d)(C), \
2519349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
25200b57cec5SDimitry Andric 
25210b57cec5SDimitry Andric 
25220b57cec5SDimitry Andric #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2523349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
25240b57cec5SDimitry Andric                                              (__v8df)(__m512d)(B), \
25250b57cec5SDimitry Andric                                              -(__v8df)(__m512d)(C), \
2526349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
25270b57cec5SDimitry Andric 
25280b57cec5SDimitry Andric 
25290b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
25300b57cec5SDimitry Andric _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
25310b57cec5SDimitry Andric {
25320b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
25330b57cec5SDimitry Andric                                                     (__v8df) __B,
25340b57cec5SDimitry Andric                                                     (__v8df) __C,
25350b57cec5SDimitry Andric                                                     (__mmask8) -1,
25360b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
25370b57cec5SDimitry Andric }
25380b57cec5SDimitry Andric 
25390b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
25400b57cec5SDimitry Andric _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
25410b57cec5SDimitry Andric {
25420b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
25430b57cec5SDimitry Andric                                                     (__v8df) __B,
25440b57cec5SDimitry Andric                                                     (__v8df) __C,
25450b57cec5SDimitry Andric                                                     (__mmask8) __U,
25460b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
25470b57cec5SDimitry Andric }
25480b57cec5SDimitry Andric 
25490b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
25500b57cec5SDimitry Andric _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
25510b57cec5SDimitry Andric {
25520b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
25530b57cec5SDimitry Andric                                                      (__v8df) __B,
25540b57cec5SDimitry Andric                                                      (__v8df) __C,
25550b57cec5SDimitry Andric                                                      (__mmask8) __U,
25560b57cec5SDimitry Andric                                                      _MM_FROUND_CUR_DIRECTION);
25570b57cec5SDimitry Andric }
25580b57cec5SDimitry Andric 
25590b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
25600b57cec5SDimitry Andric _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
25610b57cec5SDimitry Andric {
25620b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
25630b57cec5SDimitry Andric                                                      (__v8df) __B,
25640b57cec5SDimitry Andric                                                      (__v8df) __C,
25650b57cec5SDimitry Andric                                                      (__mmask8) __U,
25660b57cec5SDimitry Andric                                                      _MM_FROUND_CUR_DIRECTION);
25670b57cec5SDimitry Andric }
25680b57cec5SDimitry Andric 
25690b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
25700b57cec5SDimitry Andric _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
25710b57cec5SDimitry Andric {
25720b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
25730b57cec5SDimitry Andric                                                     (__v8df) __B,
25740b57cec5SDimitry Andric                                                     -(__v8df) __C,
25750b57cec5SDimitry Andric                                                     (__mmask8) -1,
25760b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
25770b57cec5SDimitry Andric }
25780b57cec5SDimitry Andric 
25790b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
25800b57cec5SDimitry Andric _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
25810b57cec5SDimitry Andric {
25820b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
25830b57cec5SDimitry Andric                                                     (__v8df) __B,
25840b57cec5SDimitry Andric                                                     -(__v8df) __C,
25850b57cec5SDimitry Andric                                                     (__mmask8) __U,
25860b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
25870b57cec5SDimitry Andric }
25880b57cec5SDimitry Andric 
25890b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
25900b57cec5SDimitry Andric _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
25910b57cec5SDimitry Andric {
25920b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
25930b57cec5SDimitry Andric                                                      (__v8df) __B,
25940b57cec5SDimitry Andric                                                      -(__v8df) __C,
25950b57cec5SDimitry Andric                                                      (__mmask8) __U,
25960b57cec5SDimitry Andric                                                      _MM_FROUND_CUR_DIRECTION);
25970b57cec5SDimitry Andric }
25980b57cec5SDimitry Andric 
25990b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
26000b57cec5SDimitry Andric _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
26010b57cec5SDimitry Andric {
26020b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
26030b57cec5SDimitry Andric                                                     -(__v8df) __B,
26040b57cec5SDimitry Andric                                                     (__v8df) __C,
26050b57cec5SDimitry Andric                                                     (__mmask8) -1,
26060b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
26070b57cec5SDimitry Andric }
26080b57cec5SDimitry Andric 
26090b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
26100b57cec5SDimitry Andric _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
26110b57cec5SDimitry Andric {
26120b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
26130b57cec5SDimitry Andric                                                      (__v8df) __B,
26140b57cec5SDimitry Andric                                                      (__v8df) __C,
26150b57cec5SDimitry Andric                                                      (__mmask8) __U,
26160b57cec5SDimitry Andric                                                      _MM_FROUND_CUR_DIRECTION);
26170b57cec5SDimitry Andric }
26180b57cec5SDimitry Andric 
26190b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
26200b57cec5SDimitry Andric _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
26210b57cec5SDimitry Andric {
26220b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
26230b57cec5SDimitry Andric                                                      (__v8df) __B,
26240b57cec5SDimitry Andric                                                      (__v8df) __C,
26250b57cec5SDimitry Andric                                                      (__mmask8) __U,
26260b57cec5SDimitry Andric                                                      _MM_FROUND_CUR_DIRECTION);
26270b57cec5SDimitry Andric }
26280b57cec5SDimitry Andric 
26290b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
26300b57cec5SDimitry Andric _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
26310b57cec5SDimitry Andric {
26320b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
26330b57cec5SDimitry Andric                                                     -(__v8df) __B,
26340b57cec5SDimitry Andric                                                     -(__v8df) __C,
26350b57cec5SDimitry Andric                                                     (__mmask8) -1,
26360b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
26370b57cec5SDimitry Andric }
26380b57cec5SDimitry Andric 
26390b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
26400b57cec5SDimitry Andric _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
26410b57cec5SDimitry Andric {
26420b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
26430b57cec5SDimitry Andric                                                      (__v8df) __B,
26440b57cec5SDimitry Andric                                                      -(__v8df) __C,
26450b57cec5SDimitry Andric                                                      (__mmask8) __U,
26460b57cec5SDimitry Andric                                                      _MM_FROUND_CUR_DIRECTION);
26470b57cec5SDimitry Andric }
26480b57cec5SDimitry Andric 
26490b57cec5SDimitry Andric #define _mm512_fmadd_round_ps(A, B, C, R) \
2650349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
26510b57cec5SDimitry Andric                                            (__v16sf)(__m512)(B), \
26520b57cec5SDimitry Andric                                            (__v16sf)(__m512)(C), \
2653349cc55cSDimitry Andric                                            (__mmask16)-1, (int)(R)))
26540b57cec5SDimitry Andric 
26550b57cec5SDimitry Andric 
26560b57cec5SDimitry Andric #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2657349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
26580b57cec5SDimitry Andric                                            (__v16sf)(__m512)(B), \
26590b57cec5SDimitry Andric                                            (__v16sf)(__m512)(C), \
2660349cc55cSDimitry Andric                                            (__mmask16)(U), (int)(R)))
26610b57cec5SDimitry Andric 
26620b57cec5SDimitry Andric 
26630b57cec5SDimitry Andric #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2664349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
26650b57cec5SDimitry Andric                                             (__v16sf)(__m512)(B), \
26660b57cec5SDimitry Andric                                             (__v16sf)(__m512)(C), \
2667349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
26680b57cec5SDimitry Andric 
26690b57cec5SDimitry Andric 
26700b57cec5SDimitry Andric #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2671349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
26720b57cec5SDimitry Andric                                             (__v16sf)(__m512)(B), \
26730b57cec5SDimitry Andric                                             (__v16sf)(__m512)(C), \
2674349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
26750b57cec5SDimitry Andric 
26760b57cec5SDimitry Andric 
26770b57cec5SDimitry Andric #define _mm512_fmsub_round_ps(A, B, C, R) \
2678349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
26790b57cec5SDimitry Andric                                            (__v16sf)(__m512)(B), \
26800b57cec5SDimitry Andric                                            -(__v16sf)(__m512)(C), \
2681349cc55cSDimitry Andric                                            (__mmask16)-1, (int)(R)))
26820b57cec5SDimitry Andric 
26830b57cec5SDimitry Andric 
26840b57cec5SDimitry Andric #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2685349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
26860b57cec5SDimitry Andric                                            (__v16sf)(__m512)(B), \
26870b57cec5SDimitry Andric                                            -(__v16sf)(__m512)(C), \
2688349cc55cSDimitry Andric                                            (__mmask16)(U), (int)(R)))
26890b57cec5SDimitry Andric 
26900b57cec5SDimitry Andric 
26910b57cec5SDimitry Andric #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2692349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
26930b57cec5SDimitry Andric                                             (__v16sf)(__m512)(B), \
26940b57cec5SDimitry Andric                                             -(__v16sf)(__m512)(C), \
2695349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
26960b57cec5SDimitry Andric 
26970b57cec5SDimitry Andric 
26980b57cec5SDimitry Andric #define _mm512_fnmadd_round_ps(A, B, C, R) \
2699349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
27000b57cec5SDimitry Andric                                            -(__v16sf)(__m512)(B), \
27010b57cec5SDimitry Andric                                            (__v16sf)(__m512)(C), \
2702349cc55cSDimitry Andric                                            (__mmask16)-1, (int)(R)))
27030b57cec5SDimitry Andric 
27040b57cec5SDimitry Andric 
27050b57cec5SDimitry Andric #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2706349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
27070b57cec5SDimitry Andric                                             (__v16sf)(__m512)(B), \
27080b57cec5SDimitry Andric                                             (__v16sf)(__m512)(C), \
2709349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
27100b57cec5SDimitry Andric 
27110b57cec5SDimitry Andric 
27120b57cec5SDimitry Andric #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2713349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
27140b57cec5SDimitry Andric                                             (__v16sf)(__m512)(B), \
27150b57cec5SDimitry Andric                                             (__v16sf)(__m512)(C), \
2716349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
27170b57cec5SDimitry Andric 
27180b57cec5SDimitry Andric 
27190b57cec5SDimitry Andric #define _mm512_fnmsub_round_ps(A, B, C, R) \
2720349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
27210b57cec5SDimitry Andric                                            -(__v16sf)(__m512)(B), \
27220b57cec5SDimitry Andric                                            -(__v16sf)(__m512)(C), \
2723349cc55cSDimitry Andric                                            (__mmask16)-1, (int)(R)))
27240b57cec5SDimitry Andric 
27250b57cec5SDimitry Andric 
27260b57cec5SDimitry Andric #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2727349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
27280b57cec5SDimitry Andric                                             (__v16sf)(__m512)(B), \
27290b57cec5SDimitry Andric                                             -(__v16sf)(__m512)(C), \
2730349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
27310b57cec5SDimitry Andric 
27320b57cec5SDimitry Andric 
27330b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
27340b57cec5SDimitry Andric _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
27350b57cec5SDimitry Andric {
27360b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
27370b57cec5SDimitry Andric                                                    (__v16sf) __B,
27380b57cec5SDimitry Andric                                                    (__v16sf) __C,
27390b57cec5SDimitry Andric                                                    (__mmask16) -1,
27400b57cec5SDimitry Andric                                                    _MM_FROUND_CUR_DIRECTION);
27410b57cec5SDimitry Andric }
27420b57cec5SDimitry Andric 
27430b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
27440b57cec5SDimitry Andric _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
27450b57cec5SDimitry Andric {
27460b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
27470b57cec5SDimitry Andric                                                    (__v16sf) __B,
27480b57cec5SDimitry Andric                                                    (__v16sf) __C,
27490b57cec5SDimitry Andric                                                    (__mmask16) __U,
27500b57cec5SDimitry Andric                                                    _MM_FROUND_CUR_DIRECTION);
27510b57cec5SDimitry Andric }
27520b57cec5SDimitry Andric 
27530b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
27540b57cec5SDimitry Andric _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
27550b57cec5SDimitry Andric {
27560b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
27570b57cec5SDimitry Andric                                                     (__v16sf) __B,
27580b57cec5SDimitry Andric                                                     (__v16sf) __C,
27590b57cec5SDimitry Andric                                                     (__mmask16) __U,
27600b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
27610b57cec5SDimitry Andric }
27620b57cec5SDimitry Andric 
27630b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
27640b57cec5SDimitry Andric _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
27650b57cec5SDimitry Andric {
27660b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
27670b57cec5SDimitry Andric                                                     (__v16sf) __B,
27680b57cec5SDimitry Andric                                                     (__v16sf) __C,
27690b57cec5SDimitry Andric                                                     (__mmask16) __U,
27700b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
27710b57cec5SDimitry Andric }
27720b57cec5SDimitry Andric 
27730b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
27740b57cec5SDimitry Andric _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
27750b57cec5SDimitry Andric {
27760b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
27770b57cec5SDimitry Andric                                                    (__v16sf) __B,
27780b57cec5SDimitry Andric                                                    -(__v16sf) __C,
27790b57cec5SDimitry Andric                                                    (__mmask16) -1,
27800b57cec5SDimitry Andric                                                    _MM_FROUND_CUR_DIRECTION);
27810b57cec5SDimitry Andric }
27820b57cec5SDimitry Andric 
27830b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
27840b57cec5SDimitry Andric _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
27850b57cec5SDimitry Andric {
27860b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
27870b57cec5SDimitry Andric                                                    (__v16sf) __B,
27880b57cec5SDimitry Andric                                                    -(__v16sf) __C,
27890b57cec5SDimitry Andric                                                    (__mmask16) __U,
27900b57cec5SDimitry Andric                                                    _MM_FROUND_CUR_DIRECTION);
27910b57cec5SDimitry Andric }
27920b57cec5SDimitry Andric 
27930b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
27940b57cec5SDimitry Andric _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
27950b57cec5SDimitry Andric {
27960b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
27970b57cec5SDimitry Andric                                                     (__v16sf) __B,
27980b57cec5SDimitry Andric                                                     -(__v16sf) __C,
27990b57cec5SDimitry Andric                                                     (__mmask16) __U,
28000b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
28010b57cec5SDimitry Andric }
28020b57cec5SDimitry Andric 
28030b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
28040b57cec5SDimitry Andric _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
28050b57cec5SDimitry Andric {
28060b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
28070b57cec5SDimitry Andric                                                    -(__v16sf) __B,
28080b57cec5SDimitry Andric                                                    (__v16sf) __C,
28090b57cec5SDimitry Andric                                                    (__mmask16) -1,
28100b57cec5SDimitry Andric                                                    _MM_FROUND_CUR_DIRECTION);
28110b57cec5SDimitry Andric }
28120b57cec5SDimitry Andric 
28130b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
28140b57cec5SDimitry Andric _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
28150b57cec5SDimitry Andric {
28160b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
28170b57cec5SDimitry Andric                                                     (__v16sf) __B,
28180b57cec5SDimitry Andric                                                     (__v16sf) __C,
28190b57cec5SDimitry Andric                                                     (__mmask16) __U,
28200b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
28210b57cec5SDimitry Andric }
28220b57cec5SDimitry Andric 
28230b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
28240b57cec5SDimitry Andric _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
28250b57cec5SDimitry Andric {
28260b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
28270b57cec5SDimitry Andric                                                     (__v16sf) __B,
28280b57cec5SDimitry Andric                                                     (__v16sf) __C,
28290b57cec5SDimitry Andric                                                     (__mmask16) __U,
28300b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
28310b57cec5SDimitry Andric }
28320b57cec5SDimitry Andric 
28330b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
28340b57cec5SDimitry Andric _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
28350b57cec5SDimitry Andric {
28360b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
28370b57cec5SDimitry Andric                                                    -(__v16sf) __B,
28380b57cec5SDimitry Andric                                                    -(__v16sf) __C,
28390b57cec5SDimitry Andric                                                    (__mmask16) -1,
28400b57cec5SDimitry Andric                                                    _MM_FROUND_CUR_DIRECTION);
28410b57cec5SDimitry Andric }
28420b57cec5SDimitry Andric 
28430b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
28440b57cec5SDimitry Andric _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
28450b57cec5SDimitry Andric {
28460b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
28470b57cec5SDimitry Andric                                                     (__v16sf) __B,
28480b57cec5SDimitry Andric                                                     -(__v16sf) __C,
28490b57cec5SDimitry Andric                                                     (__mmask16) __U,
28500b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
28510b57cec5SDimitry Andric }
28520b57cec5SDimitry Andric 
28530b57cec5SDimitry Andric #define _mm512_fmaddsub_round_pd(A, B, C, R) \
2854349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
28550b57cec5SDimitry Andric                                                (__v8df)(__m512d)(B), \
28560b57cec5SDimitry Andric                                                (__v8df)(__m512d)(C), \
2857349cc55cSDimitry Andric                                                (__mmask8)-1, (int)(R)))
28580b57cec5SDimitry Andric 
28590b57cec5SDimitry Andric 
28600b57cec5SDimitry Andric #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2861349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
28620b57cec5SDimitry Andric                                                (__v8df)(__m512d)(B), \
28630b57cec5SDimitry Andric                                                (__v8df)(__m512d)(C), \
2864349cc55cSDimitry Andric                                                (__mmask8)(U), (int)(R)))
28650b57cec5SDimitry Andric 
28660b57cec5SDimitry Andric 
28670b57cec5SDimitry Andric #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2868349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
28690b57cec5SDimitry Andric                                                 (__v8df)(__m512d)(B), \
28700b57cec5SDimitry Andric                                                 (__v8df)(__m512d)(C), \
2871349cc55cSDimitry Andric                                                 (__mmask8)(U), (int)(R)))
28720b57cec5SDimitry Andric 
28730b57cec5SDimitry Andric 
28740b57cec5SDimitry Andric #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2875349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
28760b57cec5SDimitry Andric                                                 (__v8df)(__m512d)(B), \
28770b57cec5SDimitry Andric                                                 (__v8df)(__m512d)(C), \
2878349cc55cSDimitry Andric                                                 (__mmask8)(U), (int)(R)))
28790b57cec5SDimitry Andric 
28800b57cec5SDimitry Andric 
28810b57cec5SDimitry Andric #define _mm512_fmsubadd_round_pd(A, B, C, R) \
2882349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
28830b57cec5SDimitry Andric                                                (__v8df)(__m512d)(B), \
28840b57cec5SDimitry Andric                                                -(__v8df)(__m512d)(C), \
2885349cc55cSDimitry Andric                                                (__mmask8)-1, (int)(R)))
28860b57cec5SDimitry Andric 
28870b57cec5SDimitry Andric 
28880b57cec5SDimitry Andric #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2889349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
28900b57cec5SDimitry Andric                                                (__v8df)(__m512d)(B), \
28910b57cec5SDimitry Andric                                                -(__v8df)(__m512d)(C), \
2892349cc55cSDimitry Andric                                                (__mmask8)(U), (int)(R)))
28930b57cec5SDimitry Andric 
28940b57cec5SDimitry Andric 
28950b57cec5SDimitry Andric #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2896349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
28970b57cec5SDimitry Andric                                                 (__v8df)(__m512d)(B), \
28980b57cec5SDimitry Andric                                                 -(__v8df)(__m512d)(C), \
2899349cc55cSDimitry Andric                                                 (__mmask8)(U), (int)(R)))
29000b57cec5SDimitry Andric 
29010b57cec5SDimitry Andric 
29020b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
29030b57cec5SDimitry Andric _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
29040b57cec5SDimitry Andric {
29050b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
29060b57cec5SDimitry Andric                                                       (__v8df) __B,
29070b57cec5SDimitry Andric                                                       (__v8df) __C,
29080b57cec5SDimitry Andric                                                       (__mmask8) -1,
29090b57cec5SDimitry Andric                                                       _MM_FROUND_CUR_DIRECTION);
29100b57cec5SDimitry Andric }
29110b57cec5SDimitry Andric 
29120b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
29130b57cec5SDimitry Andric _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
29140b57cec5SDimitry Andric {
29150b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
29160b57cec5SDimitry Andric                                                       (__v8df) __B,
29170b57cec5SDimitry Andric                                                       (__v8df) __C,
29180b57cec5SDimitry Andric                                                       (__mmask8) __U,
29190b57cec5SDimitry Andric                                                       _MM_FROUND_CUR_DIRECTION);
29200b57cec5SDimitry Andric }
29210b57cec5SDimitry Andric 
29220b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
29230b57cec5SDimitry Andric _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
29240b57cec5SDimitry Andric {
29250b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
29260b57cec5SDimitry Andric                                                        (__v8df) __B,
29270b57cec5SDimitry Andric                                                        (__v8df) __C,
29280b57cec5SDimitry Andric                                                        (__mmask8) __U,
29290b57cec5SDimitry Andric                                                        _MM_FROUND_CUR_DIRECTION);
29300b57cec5SDimitry Andric }
29310b57cec5SDimitry Andric 
29320b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
29330b57cec5SDimitry Andric _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
29340b57cec5SDimitry Andric {
29350b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
29360b57cec5SDimitry Andric                                                        (__v8df) __B,
29370b57cec5SDimitry Andric                                                        (__v8df) __C,
29380b57cec5SDimitry Andric                                                        (__mmask8) __U,
29390b57cec5SDimitry Andric                                                        _MM_FROUND_CUR_DIRECTION);
29400b57cec5SDimitry Andric }
29410b57cec5SDimitry Andric 
29420b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
29430b57cec5SDimitry Andric _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
29440b57cec5SDimitry Andric {
29450b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
29460b57cec5SDimitry Andric                                                        (__v8df) __B,
29470b57cec5SDimitry Andric                                                        -(__v8df) __C,
29480b57cec5SDimitry Andric                                                        (__mmask8) -1,
29490b57cec5SDimitry Andric                                                        _MM_FROUND_CUR_DIRECTION);
29500b57cec5SDimitry Andric }
29510b57cec5SDimitry Andric 
29520b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
29530b57cec5SDimitry Andric _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
29540b57cec5SDimitry Andric {
29550b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
29560b57cec5SDimitry Andric                                                        (__v8df) __B,
29570b57cec5SDimitry Andric                                                        -(__v8df) __C,
29580b57cec5SDimitry Andric                                                        (__mmask8) __U,
29590b57cec5SDimitry Andric                                                        _MM_FROUND_CUR_DIRECTION);
29600b57cec5SDimitry Andric }
29610b57cec5SDimitry Andric 
29620b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
29630b57cec5SDimitry Andric _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
29640b57cec5SDimitry Andric {
29650b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
29660b57cec5SDimitry Andric                                                         (__v8df) __B,
29670b57cec5SDimitry Andric                                                         -(__v8df) __C,
29680b57cec5SDimitry Andric                                                         (__mmask8) __U,
29690b57cec5SDimitry Andric                                                         _MM_FROUND_CUR_DIRECTION);
29700b57cec5SDimitry Andric }
29710b57cec5SDimitry Andric 
29720b57cec5SDimitry Andric #define _mm512_fmaddsub_round_ps(A, B, C, R) \
2973349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
29740b57cec5SDimitry Andric                                               (__v16sf)(__m512)(B), \
29750b57cec5SDimitry Andric                                               (__v16sf)(__m512)(C), \
2976349cc55cSDimitry Andric                                               (__mmask16)-1, (int)(R)))
29770b57cec5SDimitry Andric 
29780b57cec5SDimitry Andric 
29790b57cec5SDimitry Andric #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2980349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
29810b57cec5SDimitry Andric                                               (__v16sf)(__m512)(B), \
29820b57cec5SDimitry Andric                                               (__v16sf)(__m512)(C), \
2983349cc55cSDimitry Andric                                               (__mmask16)(U), (int)(R)))
29840b57cec5SDimitry Andric 
29850b57cec5SDimitry Andric 
29860b57cec5SDimitry Andric #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2987349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
29880b57cec5SDimitry Andric                                                (__v16sf)(__m512)(B), \
29890b57cec5SDimitry Andric                                                (__v16sf)(__m512)(C), \
2990349cc55cSDimitry Andric                                                (__mmask16)(U), (int)(R)))
29910b57cec5SDimitry Andric 
29920b57cec5SDimitry Andric 
29930b57cec5SDimitry Andric #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2994349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
29950b57cec5SDimitry Andric                                                (__v16sf)(__m512)(B), \
29960b57cec5SDimitry Andric                                                (__v16sf)(__m512)(C), \
2997349cc55cSDimitry Andric                                                (__mmask16)(U), (int)(R)))
29980b57cec5SDimitry Andric 
29990b57cec5SDimitry Andric 
30000b57cec5SDimitry Andric #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3001349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
30020b57cec5SDimitry Andric                                               (__v16sf)(__m512)(B), \
30030b57cec5SDimitry Andric                                               -(__v16sf)(__m512)(C), \
3004349cc55cSDimitry Andric                                               (__mmask16)-1, (int)(R)))
30050b57cec5SDimitry Andric 
30060b57cec5SDimitry Andric 
30070b57cec5SDimitry Andric #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3008349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
30090b57cec5SDimitry Andric                                               (__v16sf)(__m512)(B), \
30100b57cec5SDimitry Andric                                               -(__v16sf)(__m512)(C), \
3011349cc55cSDimitry Andric                                               (__mmask16)(U), (int)(R)))
30120b57cec5SDimitry Andric 
30130b57cec5SDimitry Andric 
30140b57cec5SDimitry Andric #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3015349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
30160b57cec5SDimitry Andric                                                (__v16sf)(__m512)(B), \
30170b57cec5SDimitry Andric                                                -(__v16sf)(__m512)(C), \
3018349cc55cSDimitry Andric                                                (__mmask16)(U), (int)(R)))
30190b57cec5SDimitry Andric 
30200b57cec5SDimitry Andric 
30210b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
30220b57cec5SDimitry Andric _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
30230b57cec5SDimitry Andric {
30240b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
30250b57cec5SDimitry Andric                                                       (__v16sf) __B,
30260b57cec5SDimitry Andric                                                       (__v16sf) __C,
30270b57cec5SDimitry Andric                                                       (__mmask16) -1,
30280b57cec5SDimitry Andric                                                       _MM_FROUND_CUR_DIRECTION);
30290b57cec5SDimitry Andric }
30300b57cec5SDimitry Andric 
30310b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
30320b57cec5SDimitry Andric _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
30330b57cec5SDimitry Andric {
30340b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
30350b57cec5SDimitry Andric                                                       (__v16sf) __B,
30360b57cec5SDimitry Andric                                                       (__v16sf) __C,
30370b57cec5SDimitry Andric                                                       (__mmask16) __U,
30380b57cec5SDimitry Andric                                                       _MM_FROUND_CUR_DIRECTION);
30390b57cec5SDimitry Andric }
30400b57cec5SDimitry Andric 
30410b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
30420b57cec5SDimitry Andric _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
30430b57cec5SDimitry Andric {
30440b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
30450b57cec5SDimitry Andric                                                        (__v16sf) __B,
30460b57cec5SDimitry Andric                                                        (__v16sf) __C,
30470b57cec5SDimitry Andric                                                        (__mmask16) __U,
30480b57cec5SDimitry Andric                                                        _MM_FROUND_CUR_DIRECTION);
30490b57cec5SDimitry Andric }
30500b57cec5SDimitry Andric 
30510b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
30520b57cec5SDimitry Andric _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
30530b57cec5SDimitry Andric {
30540b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
30550b57cec5SDimitry Andric                                                        (__v16sf) __B,
30560b57cec5SDimitry Andric                                                        (__v16sf) __C,
30570b57cec5SDimitry Andric                                                        (__mmask16) __U,
30580b57cec5SDimitry Andric                                                        _MM_FROUND_CUR_DIRECTION);
30590b57cec5SDimitry Andric }
30600b57cec5SDimitry Andric 
30610b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
30620b57cec5SDimitry Andric _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
30630b57cec5SDimitry Andric {
30640b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
30650b57cec5SDimitry Andric                                                       (__v16sf) __B,
30660b57cec5SDimitry Andric                                                       -(__v16sf) __C,
30670b57cec5SDimitry Andric                                                       (__mmask16) -1,
30680b57cec5SDimitry Andric                                                       _MM_FROUND_CUR_DIRECTION);
30690b57cec5SDimitry Andric }
30700b57cec5SDimitry Andric 
30710b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
30720b57cec5SDimitry Andric _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
30730b57cec5SDimitry Andric {
30740b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
30750b57cec5SDimitry Andric                                                       (__v16sf) __B,
30760b57cec5SDimitry Andric                                                       -(__v16sf) __C,
30770b57cec5SDimitry Andric                                                       (__mmask16) __U,
30780b57cec5SDimitry Andric                                                       _MM_FROUND_CUR_DIRECTION);
30790b57cec5SDimitry Andric }
30800b57cec5SDimitry Andric 
30810b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
30820b57cec5SDimitry Andric _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
30830b57cec5SDimitry Andric {
30840b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
30850b57cec5SDimitry Andric                                                        (__v16sf) __B,
30860b57cec5SDimitry Andric                                                        -(__v16sf) __C,
30870b57cec5SDimitry Andric                                                        (__mmask16) __U,
30880b57cec5SDimitry Andric                                                        _MM_FROUND_CUR_DIRECTION);
30890b57cec5SDimitry Andric }
30900b57cec5SDimitry Andric 
30910b57cec5SDimitry Andric #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3092349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
30930b57cec5SDimitry Andric                                              (__v8df)(__m512d)(B), \
30940b57cec5SDimitry Andric                                              (__v8df)(__m512d)(C), \
3095349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
30960b57cec5SDimitry Andric 
30970b57cec5SDimitry Andric 
30980b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
30990b57cec5SDimitry Andric _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
31000b57cec5SDimitry Andric {
31010b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
31020b57cec5SDimitry Andric                                                     (__v8df) __B,
31030b57cec5SDimitry Andric                                                     (__v8df) __C,
31040b57cec5SDimitry Andric                                                     (__mmask8) __U,
31050b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
31060b57cec5SDimitry Andric }
31070b57cec5SDimitry Andric 
31080b57cec5SDimitry Andric #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3109349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
31100b57cec5SDimitry Andric                                             (__v16sf)(__m512)(B), \
31110b57cec5SDimitry Andric                                             (__v16sf)(__m512)(C), \
3112349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
31130b57cec5SDimitry Andric 
31140b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
31150b57cec5SDimitry Andric _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
31160b57cec5SDimitry Andric {
31170b57cec5SDimitry Andric   return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
31180b57cec5SDimitry Andric                                                    (__v16sf) __B,
31190b57cec5SDimitry Andric                                                    (__v16sf) __C,
31200b57cec5SDimitry Andric                                                    (__mmask16) __U,
31210b57cec5SDimitry Andric                                                    _MM_FROUND_CUR_DIRECTION);
31220b57cec5SDimitry Andric }
31230b57cec5SDimitry Andric 
31240b57cec5SDimitry Andric #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3125349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
31260b57cec5SDimitry Andric                                                 (__v8df)(__m512d)(B), \
31270b57cec5SDimitry Andric                                                 (__v8df)(__m512d)(C), \
3128349cc55cSDimitry Andric                                                 (__mmask8)(U), (int)(R)))
31290b57cec5SDimitry Andric 
31300b57cec5SDimitry Andric 
31310b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
31320b57cec5SDimitry Andric _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
31330b57cec5SDimitry Andric {
31340b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
31350b57cec5SDimitry Andric                                                        (__v8df) __B,
31360b57cec5SDimitry Andric                                                        (__v8df) __C,
31370b57cec5SDimitry Andric                                                        (__mmask8) __U,
31380b57cec5SDimitry Andric                                                        _MM_FROUND_CUR_DIRECTION);
31390b57cec5SDimitry Andric }
31400b57cec5SDimitry Andric 
31410b57cec5SDimitry Andric #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3142349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
31430b57cec5SDimitry Andric                                                (__v16sf)(__m512)(B), \
31440b57cec5SDimitry Andric                                                (__v16sf)(__m512)(C), \
3145349cc55cSDimitry Andric                                                (__mmask16)(U), (int)(R)))
31460b57cec5SDimitry Andric 
31470b57cec5SDimitry Andric 
31480b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
31490b57cec5SDimitry Andric _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
31500b57cec5SDimitry Andric {
31510b57cec5SDimitry Andric   return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
31520b57cec5SDimitry Andric                                                       (__v16sf) __B,
31530b57cec5SDimitry Andric                                                       (__v16sf) __C,
31540b57cec5SDimitry Andric                                                       (__mmask16) __U,
31550b57cec5SDimitry Andric                                                       _MM_FROUND_CUR_DIRECTION);
31560b57cec5SDimitry Andric }
31570b57cec5SDimitry Andric 
31580b57cec5SDimitry Andric #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3159349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
31600b57cec5SDimitry Andric                                             -(__v8df)(__m512d)(B), \
31610b57cec5SDimitry Andric                                             (__v8df)(__m512d)(C), \
3162349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
31630b57cec5SDimitry Andric 
31640b57cec5SDimitry Andric 
31650b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
31660b57cec5SDimitry Andric _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
31670b57cec5SDimitry Andric {
31680b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
31690b57cec5SDimitry Andric                                                     -(__v8df) __B,
31700b57cec5SDimitry Andric                                                     (__v8df) __C,
31710b57cec5SDimitry Andric                                                     (__mmask8) __U,
31720b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
31730b57cec5SDimitry Andric }
31740b57cec5SDimitry Andric 
31750b57cec5SDimitry Andric #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3176349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
31770b57cec5SDimitry Andric                                            -(__v16sf)(__m512)(B), \
31780b57cec5SDimitry Andric                                            (__v16sf)(__m512)(C), \
3179349cc55cSDimitry Andric                                            (__mmask16)(U), (int)(R)))
31800b57cec5SDimitry Andric 
31810b57cec5SDimitry Andric 
31820b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
31830b57cec5SDimitry Andric _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
31840b57cec5SDimitry Andric {
31850b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
31860b57cec5SDimitry Andric                                                    -(__v16sf) __B,
31870b57cec5SDimitry Andric                                                    (__v16sf) __C,
31880b57cec5SDimitry Andric                                                    (__mmask16) __U,
31890b57cec5SDimitry Andric                                                    _MM_FROUND_CUR_DIRECTION);
31900b57cec5SDimitry Andric }
31910b57cec5SDimitry Andric 
31920b57cec5SDimitry Andric #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3193349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
31940b57cec5SDimitry Andric                                             -(__v8df)(__m512d)(B), \
31950b57cec5SDimitry Andric                                             -(__v8df)(__m512d)(C), \
3196349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
31970b57cec5SDimitry Andric 
31980b57cec5SDimitry Andric 
31990b57cec5SDimitry Andric #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3200349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
32010b57cec5SDimitry Andric                                              (__v8df)(__m512d)(B), \
32020b57cec5SDimitry Andric                                              (__v8df)(__m512d)(C), \
3203349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
32040b57cec5SDimitry Andric 
32050b57cec5SDimitry Andric 
32060b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
32070b57cec5SDimitry Andric _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
32080b57cec5SDimitry Andric {
32090b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
32100b57cec5SDimitry Andric                                                     -(__v8df) __B,
32110b57cec5SDimitry Andric                                                     -(__v8df) __C,
32120b57cec5SDimitry Andric                                                     (__mmask8) __U,
32130b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
32140b57cec5SDimitry Andric }
32150b57cec5SDimitry Andric 
32160b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
32170b57cec5SDimitry Andric _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
32180b57cec5SDimitry Andric {
32190b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
32200b57cec5SDimitry Andric                                                      (__v8df) __B,
32210b57cec5SDimitry Andric                                                      (__v8df) __C,
32220b57cec5SDimitry Andric                                                      (__mmask8) __U,
32230b57cec5SDimitry Andric                                                      _MM_FROUND_CUR_DIRECTION);
32240b57cec5SDimitry Andric }
32250b57cec5SDimitry Andric 
32260b57cec5SDimitry Andric #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3227349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
32280b57cec5SDimitry Andric                                            -(__v16sf)(__m512)(B), \
32290b57cec5SDimitry Andric                                            -(__v16sf)(__m512)(C), \
3230349cc55cSDimitry Andric                                            (__mmask16)(U), (int)(R)))
32310b57cec5SDimitry Andric 
32320b57cec5SDimitry Andric 
32330b57cec5SDimitry Andric #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3234349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
32350b57cec5SDimitry Andric                                             (__v16sf)(__m512)(B), \
32360b57cec5SDimitry Andric                                             (__v16sf)(__m512)(C), \
3237349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
32380b57cec5SDimitry Andric 
32390b57cec5SDimitry Andric 
32400b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
32410b57cec5SDimitry Andric _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
32420b57cec5SDimitry Andric {
32430b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
32440b57cec5SDimitry Andric                                                    -(__v16sf) __B,
32450b57cec5SDimitry Andric                                                    -(__v16sf) __C,
32460b57cec5SDimitry Andric                                                    (__mmask16) __U,
32470b57cec5SDimitry Andric                                                    _MM_FROUND_CUR_DIRECTION);
32480b57cec5SDimitry Andric }
32490b57cec5SDimitry Andric 
32500b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
32510b57cec5SDimitry Andric _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
32520b57cec5SDimitry Andric {
32530b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
32540b57cec5SDimitry Andric                                                     (__v16sf) __B,
32550b57cec5SDimitry Andric                                                     (__v16sf) __C,
32560b57cec5SDimitry Andric                                                     (__mmask16) __U,
32570b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
32580b57cec5SDimitry Andric }
32590b57cec5SDimitry Andric 
32600b57cec5SDimitry Andric 
32610b57cec5SDimitry Andric 
32620b57cec5SDimitry Andric /* Vector permutations */
32630b57cec5SDimitry Andric 
32640b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
32650b57cec5SDimitry Andric _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
32660b57cec5SDimitry Andric {
32670b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
32680b57cec5SDimitry Andric                                                 (__v16si) __B);
32690b57cec5SDimitry Andric }
32700b57cec5SDimitry Andric 
32710b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
32720b57cec5SDimitry Andric _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
32730b57cec5SDimitry Andric                                __m512i __B)
32740b57cec5SDimitry Andric {
32750b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
32760b57cec5SDimitry Andric                               (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
32770b57cec5SDimitry Andric                               (__v16si)__A);
32780b57cec5SDimitry Andric }
32790b57cec5SDimitry Andric 
32800b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
32810b57cec5SDimitry Andric _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
32820b57cec5SDimitry Andric                                 __m512i __B)
32830b57cec5SDimitry Andric {
32840b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
32850b57cec5SDimitry Andric                               (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
32860b57cec5SDimitry Andric                               (__v16si)__I);
32870b57cec5SDimitry Andric }
32880b57cec5SDimitry Andric 
32890b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
32900b57cec5SDimitry Andric _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
32910b57cec5SDimitry Andric                                 __m512i __B)
32920b57cec5SDimitry Andric {
32930b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
32940b57cec5SDimitry Andric                               (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
32950b57cec5SDimitry Andric                               (__v16si)_mm512_setzero_si512());
32960b57cec5SDimitry Andric }
32970b57cec5SDimitry Andric 
32980b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
32990b57cec5SDimitry Andric _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
33000b57cec5SDimitry Andric {
33010b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
33020b57cec5SDimitry Andric                                                 (__v8di) __B);
33030b57cec5SDimitry Andric }
33040b57cec5SDimitry Andric 
33050b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
33060b57cec5SDimitry Andric _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
33070b57cec5SDimitry Andric                                __m512i __B)
33080b57cec5SDimitry Andric {
33090b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512(__U,
33100b57cec5SDimitry Andric                                (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
33110b57cec5SDimitry Andric                                (__v8di)__A);
33120b57cec5SDimitry Andric }
33130b57cec5SDimitry Andric 
33140b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
33150b57cec5SDimitry Andric _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
33160b57cec5SDimitry Andric                                 __m512i __B)
33170b57cec5SDimitry Andric {
33180b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512(__U,
33190b57cec5SDimitry Andric                                (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
33200b57cec5SDimitry Andric                                (__v8di)__I);
33210b57cec5SDimitry Andric }
33220b57cec5SDimitry Andric 
33230b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
33240b57cec5SDimitry Andric _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
33250b57cec5SDimitry Andric                                 __m512i __B)
33260b57cec5SDimitry Andric {
33270b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512(__U,
33280b57cec5SDimitry Andric                                (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
33290b57cec5SDimitry Andric                                (__v8di)_mm512_setzero_si512());
33300b57cec5SDimitry Andric }
33310b57cec5SDimitry Andric 
33320b57cec5SDimitry Andric #define _mm512_alignr_epi64(A, B, I) \
3333349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3334349cc55cSDimitry Andric                                      (__v8di)(__m512i)(B), (int)(I)))
33350b57cec5SDimitry Andric 
33360b57cec5SDimitry Andric #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3337349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
33380b57cec5SDimitry Andric                                   (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3339349cc55cSDimitry Andric                                   (__v8di)(__m512i)(W)))
33400b57cec5SDimitry Andric 
33410b57cec5SDimitry Andric #define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3342349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
33430b57cec5SDimitry Andric                                   (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3344349cc55cSDimitry Andric                                   (__v8di)_mm512_setzero_si512()))
33450b57cec5SDimitry Andric 
33460b57cec5SDimitry Andric #define _mm512_alignr_epi32(A, B, I) \
3347349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3348349cc55cSDimitry Andric                                      (__v16si)(__m512i)(B), (int)(I)))
33490b57cec5SDimitry Andric 
33500b57cec5SDimitry Andric #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3351349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
33520b57cec5SDimitry Andric                                  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3353349cc55cSDimitry Andric                                  (__v16si)(__m512i)(W)))
33540b57cec5SDimitry Andric 
33550b57cec5SDimitry Andric #define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3356349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
33570b57cec5SDimitry Andric                                  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3358349cc55cSDimitry Andric                                  (__v16si)_mm512_setzero_si512()))
33590b57cec5SDimitry Andric /* Vector Extract */
33600b57cec5SDimitry Andric 
33610b57cec5SDimitry Andric #define _mm512_extractf64x4_pd(A, I) \
3362349cc55cSDimitry Andric   ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
33630b57cec5SDimitry Andric                                              (__v4df)_mm256_undefined_pd(), \
3364349cc55cSDimitry Andric                                              (__mmask8)-1))
33650b57cec5SDimitry Andric 
33660b57cec5SDimitry Andric #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3367349cc55cSDimitry Andric   ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
33680b57cec5SDimitry Andric                                              (__v4df)(__m256d)(W), \
3369349cc55cSDimitry Andric                                              (__mmask8)(U)))
33700b57cec5SDimitry Andric 
33710b57cec5SDimitry Andric #define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3372349cc55cSDimitry Andric   ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
33730b57cec5SDimitry Andric                                              (__v4df)_mm256_setzero_pd(), \
3374349cc55cSDimitry Andric                                              (__mmask8)(U)))
33750b57cec5SDimitry Andric 
33760b57cec5SDimitry Andric #define _mm512_extractf32x4_ps(A, I) \
3377349cc55cSDimitry Andric   ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
33780b57cec5SDimitry Andric                                             (__v4sf)_mm_undefined_ps(), \
3379349cc55cSDimitry Andric                                             (__mmask8)-1))
33800b57cec5SDimitry Andric 
33810b57cec5SDimitry Andric #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3382349cc55cSDimitry Andric   ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
33830b57cec5SDimitry Andric                                             (__v4sf)(__m128)(W), \
3384349cc55cSDimitry Andric                                             (__mmask8)(U)))
33850b57cec5SDimitry Andric 
33860b57cec5SDimitry Andric #define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3387349cc55cSDimitry Andric   ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
33880b57cec5SDimitry Andric                                             (__v4sf)_mm_setzero_ps(), \
3389349cc55cSDimitry Andric                                             (__mmask8)(U)))
33900b57cec5SDimitry Andric 
33910b57cec5SDimitry Andric /* Vector Blend */
33920b57cec5SDimitry Andric 
33930b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
33940b57cec5SDimitry Andric _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
33950b57cec5SDimitry Andric {
33960b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
33970b57cec5SDimitry Andric                  (__v8df) __W,
33980b57cec5SDimitry Andric                  (__v8df) __A);
33990b57cec5SDimitry Andric }
34000b57cec5SDimitry Andric 
34010b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
34020b57cec5SDimitry Andric _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
34030b57cec5SDimitry Andric {
34040b57cec5SDimitry Andric   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
34050b57cec5SDimitry Andric                 (__v16sf) __W,
34060b57cec5SDimitry Andric                 (__v16sf) __A);
34070b57cec5SDimitry Andric }
34080b57cec5SDimitry Andric 
34090b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
34100b57cec5SDimitry Andric _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
34110b57cec5SDimitry Andric {
34120b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
34130b57cec5SDimitry Andric                 (__v8di) __W,
34140b57cec5SDimitry Andric                 (__v8di) __A);
34150b57cec5SDimitry Andric }
34160b57cec5SDimitry Andric 
34170b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
34180b57cec5SDimitry Andric _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
34190b57cec5SDimitry Andric {
34200b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
34210b57cec5SDimitry Andric                 (__v16si) __W,
34220b57cec5SDimitry Andric                 (__v16si) __A);
34230b57cec5SDimitry Andric }
34240b57cec5SDimitry Andric 
34250b57cec5SDimitry Andric /* Compare */
34260b57cec5SDimitry Andric 
34270b57cec5SDimitry Andric #define _mm512_cmp_round_ps_mask(A, B, P, R) \
3428349cc55cSDimitry Andric   ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
34290b57cec5SDimitry Andric                                            (__v16sf)(__m512)(B), (int)(P), \
3430349cc55cSDimitry Andric                                            (__mmask16)-1, (int)(R)))
34310b57cec5SDimitry Andric 
34320b57cec5SDimitry Andric #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3433349cc55cSDimitry Andric   ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
34340b57cec5SDimitry Andric                                            (__v16sf)(__m512)(B), (int)(P), \
3435349cc55cSDimitry Andric                                            (__mmask16)(U), (int)(R)))
34360b57cec5SDimitry Andric 
34370b57cec5SDimitry Andric #define _mm512_cmp_ps_mask(A, B, P) \
34380b57cec5SDimitry Andric   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
34390b57cec5SDimitry Andric #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
34400b57cec5SDimitry Andric   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
34410b57cec5SDimitry Andric 
34420b57cec5SDimitry Andric #define _mm512_cmpeq_ps_mask(A, B) \
34430b57cec5SDimitry Andric     _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
34440b57cec5SDimitry Andric #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
34450b57cec5SDimitry Andric     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
34460b57cec5SDimitry Andric 
34470b57cec5SDimitry Andric #define _mm512_cmplt_ps_mask(A, B) \
34480b57cec5SDimitry Andric     _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
34490b57cec5SDimitry Andric #define _mm512_mask_cmplt_ps_mask(k, A, B) \
34500b57cec5SDimitry Andric     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
34510b57cec5SDimitry Andric 
34520b57cec5SDimitry Andric #define _mm512_cmple_ps_mask(A, B) \
34530b57cec5SDimitry Andric     _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
34540b57cec5SDimitry Andric #define _mm512_mask_cmple_ps_mask(k, A, B) \
34550b57cec5SDimitry Andric     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
34560b57cec5SDimitry Andric 
34570b57cec5SDimitry Andric #define _mm512_cmpunord_ps_mask(A, B) \
34580b57cec5SDimitry Andric     _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
34590b57cec5SDimitry Andric #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
34600b57cec5SDimitry Andric     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
34610b57cec5SDimitry Andric 
34620b57cec5SDimitry Andric #define _mm512_cmpneq_ps_mask(A, B) \
34630b57cec5SDimitry Andric     _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
34640b57cec5SDimitry Andric #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
34650b57cec5SDimitry Andric     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
34660b57cec5SDimitry Andric 
34670b57cec5SDimitry Andric #define _mm512_cmpnlt_ps_mask(A, B) \
34680b57cec5SDimitry Andric     _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
34690b57cec5SDimitry Andric #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
34700b57cec5SDimitry Andric     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
34710b57cec5SDimitry Andric 
34720b57cec5SDimitry Andric #define _mm512_cmpnle_ps_mask(A, B) \
34730b57cec5SDimitry Andric     _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
34740b57cec5SDimitry Andric #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
34750b57cec5SDimitry Andric     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
34760b57cec5SDimitry Andric 
34770b57cec5SDimitry Andric #define _mm512_cmpord_ps_mask(A, B) \
34780b57cec5SDimitry Andric     _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
34790b57cec5SDimitry Andric #define _mm512_mask_cmpord_ps_mask(k, A, B) \
34800b57cec5SDimitry Andric     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
34810b57cec5SDimitry Andric 
34820b57cec5SDimitry Andric #define _mm512_cmp_round_pd_mask(A, B, P, R) \
3483349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
34840b57cec5SDimitry Andric                                           (__v8df)(__m512d)(B), (int)(P), \
3485349cc55cSDimitry Andric                                           (__mmask8)-1, (int)(R)))
34860b57cec5SDimitry Andric 
34870b57cec5SDimitry Andric #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3488349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
34890b57cec5SDimitry Andric                                           (__v8df)(__m512d)(B), (int)(P), \
3490349cc55cSDimitry Andric                                           (__mmask8)(U), (int)(R)))
34910b57cec5SDimitry Andric 
34920b57cec5SDimitry Andric #define _mm512_cmp_pd_mask(A, B, P) \
34930b57cec5SDimitry Andric   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
34940b57cec5SDimitry Andric #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
34950b57cec5SDimitry Andric   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
34960b57cec5SDimitry Andric 
34970b57cec5SDimitry Andric #define _mm512_cmpeq_pd_mask(A, B) \
34980b57cec5SDimitry Andric     _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
34990b57cec5SDimitry Andric #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
35000b57cec5SDimitry Andric     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
35010b57cec5SDimitry Andric 
35020b57cec5SDimitry Andric #define _mm512_cmplt_pd_mask(A, B) \
35030b57cec5SDimitry Andric     _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
35040b57cec5SDimitry Andric #define _mm512_mask_cmplt_pd_mask(k, A, B) \
35050b57cec5SDimitry Andric     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
35060b57cec5SDimitry Andric 
35070b57cec5SDimitry Andric #define _mm512_cmple_pd_mask(A, B) \
35080b57cec5SDimitry Andric     _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
35090b57cec5SDimitry Andric #define _mm512_mask_cmple_pd_mask(k, A, B) \
35100b57cec5SDimitry Andric     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
35110b57cec5SDimitry Andric 
35120b57cec5SDimitry Andric #define _mm512_cmpunord_pd_mask(A, B) \
35130b57cec5SDimitry Andric     _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
35140b57cec5SDimitry Andric #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
35150b57cec5SDimitry Andric     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
35160b57cec5SDimitry Andric 
35170b57cec5SDimitry Andric #define _mm512_cmpneq_pd_mask(A, B) \
35180b57cec5SDimitry Andric     _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
35190b57cec5SDimitry Andric #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
35200b57cec5SDimitry Andric     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
35210b57cec5SDimitry Andric 
35220b57cec5SDimitry Andric #define _mm512_cmpnlt_pd_mask(A, B) \
35230b57cec5SDimitry Andric     _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
35240b57cec5SDimitry Andric #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
35250b57cec5SDimitry Andric     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
35260b57cec5SDimitry Andric 
35270b57cec5SDimitry Andric #define _mm512_cmpnle_pd_mask(A, B) \
35280b57cec5SDimitry Andric     _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
35290b57cec5SDimitry Andric #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
35300b57cec5SDimitry Andric     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
35310b57cec5SDimitry Andric 
35320b57cec5SDimitry Andric #define _mm512_cmpord_pd_mask(A, B) \
35330b57cec5SDimitry Andric     _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
35340b57cec5SDimitry Andric #define _mm512_mask_cmpord_pd_mask(k, A, B) \
35350b57cec5SDimitry Andric     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
35360b57cec5SDimitry Andric 
35370b57cec5SDimitry Andric /* Conversion */
35380b57cec5SDimitry Andric 
35390b57cec5SDimitry Andric #define _mm512_cvtt_roundps_epu32(A, R) \
3540349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
35410b57cec5SDimitry Andric                                               (__v16si)_mm512_undefined_epi32(), \
3542349cc55cSDimitry Andric                                               (__mmask16)-1, (int)(R)))
35430b57cec5SDimitry Andric 
35440b57cec5SDimitry Andric #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3545349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
35460b57cec5SDimitry Andric                                               (__v16si)(__m512i)(W), \
3547349cc55cSDimitry Andric                                               (__mmask16)(U), (int)(R)))
35480b57cec5SDimitry Andric 
35490b57cec5SDimitry Andric #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3550349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
35510b57cec5SDimitry Andric                                               (__v16si)_mm512_setzero_si512(), \
3552349cc55cSDimitry Andric                                               (__mmask16)(U), (int)(R)))
35530b57cec5SDimitry Andric 
35540b57cec5SDimitry Andric 
35550b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
35560b57cec5SDimitry Andric _mm512_cvttps_epu32(__m512 __A)
35570b57cec5SDimitry Andric {
35580b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
35590b57cec5SDimitry Andric                   (__v16si)
35600b57cec5SDimitry Andric                   _mm512_setzero_si512 (),
35610b57cec5SDimitry Andric                   (__mmask16) -1,
35620b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
35630b57cec5SDimitry Andric }
35640b57cec5SDimitry Andric 
35650b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
35660b57cec5SDimitry Andric _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
35670b57cec5SDimitry Andric {
35680b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
35690b57cec5SDimitry Andric                    (__v16si) __W,
35700b57cec5SDimitry Andric                    (__mmask16) __U,
35710b57cec5SDimitry Andric                    _MM_FROUND_CUR_DIRECTION);
35720b57cec5SDimitry Andric }
35730b57cec5SDimitry Andric 
35740b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
35750b57cec5SDimitry Andric _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
35760b57cec5SDimitry Andric {
35770b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
35780b57cec5SDimitry Andric                    (__v16si) _mm512_setzero_si512 (),
35790b57cec5SDimitry Andric                    (__mmask16) __U,
35800b57cec5SDimitry Andric                    _MM_FROUND_CUR_DIRECTION);
35810b57cec5SDimitry Andric }
35820b57cec5SDimitry Andric 
35830b57cec5SDimitry Andric #define _mm512_cvt_roundepi32_ps(A, R) \
3584349cc55cSDimitry Andric   ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
35850b57cec5SDimitry Andric                                            (__v16sf)_mm512_setzero_ps(), \
3586349cc55cSDimitry Andric                                            (__mmask16)-1, (int)(R)))
35870b57cec5SDimitry Andric 
35880b57cec5SDimitry Andric #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3589349cc55cSDimitry Andric   ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
35900b57cec5SDimitry Andric                                            (__v16sf)(__m512)(W), \
3591349cc55cSDimitry Andric                                            (__mmask16)(U), (int)(R)))
35920b57cec5SDimitry Andric 
35930b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3594349cc55cSDimitry Andric   ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
35950b57cec5SDimitry Andric                                            (__v16sf)_mm512_setzero_ps(), \
3596349cc55cSDimitry Andric                                            (__mmask16)(U), (int)(R)))
35970b57cec5SDimitry Andric 
35980b57cec5SDimitry Andric #define _mm512_cvt_roundepu32_ps(A, R) \
3599349cc55cSDimitry Andric   ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
36000b57cec5SDimitry Andric                                             (__v16sf)_mm512_setzero_ps(), \
3601349cc55cSDimitry Andric                                             (__mmask16)-1, (int)(R)))
36020b57cec5SDimitry Andric 
36030b57cec5SDimitry Andric #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3604349cc55cSDimitry Andric   ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
36050b57cec5SDimitry Andric                                             (__v16sf)(__m512)(W), \
3606349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
36070b57cec5SDimitry Andric 
36080b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3609349cc55cSDimitry Andric   ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
36100b57cec5SDimitry Andric                                             (__v16sf)_mm512_setzero_ps(), \
3611349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
36120b57cec5SDimitry Andric 
36130b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
36140b57cec5SDimitry Andric _mm512_cvtepu32_ps (__m512i __A)
36150b57cec5SDimitry Andric {
36160b57cec5SDimitry Andric   return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
36170b57cec5SDimitry Andric }
36180b57cec5SDimitry Andric 
36190b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
36200b57cec5SDimitry Andric _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
36210b57cec5SDimitry Andric {
36220b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
36230b57cec5SDimitry Andric                                              (__v16sf)_mm512_cvtepu32_ps(__A),
36240b57cec5SDimitry Andric                                              (__v16sf)__W);
36250b57cec5SDimitry Andric }
36260b57cec5SDimitry Andric 
36270b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
36280b57cec5SDimitry Andric _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
36290b57cec5SDimitry Andric {
36300b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
36310b57cec5SDimitry Andric                                              (__v16sf)_mm512_cvtepu32_ps(__A),
36320b57cec5SDimitry Andric                                              (__v16sf)_mm512_setzero_ps());
36330b57cec5SDimitry Andric }
36340b57cec5SDimitry Andric 
36350b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
36360b57cec5SDimitry Andric _mm512_cvtepi32_pd(__m256i __A)
36370b57cec5SDimitry Andric {
36380b57cec5SDimitry Andric   return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
36390b57cec5SDimitry Andric }
36400b57cec5SDimitry Andric 
36410b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
36420b57cec5SDimitry Andric _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
36430b57cec5SDimitry Andric {
36440b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
36450b57cec5SDimitry Andric                                               (__v8df)_mm512_cvtepi32_pd(__A),
36460b57cec5SDimitry Andric                                               (__v8df)__W);
36470b57cec5SDimitry Andric }
36480b57cec5SDimitry Andric 
36490b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
36500b57cec5SDimitry Andric _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
36510b57cec5SDimitry Andric {
36520b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
36530b57cec5SDimitry Andric                                               (__v8df)_mm512_cvtepi32_pd(__A),
36540b57cec5SDimitry Andric                                               (__v8df)_mm512_setzero_pd());
36550b57cec5SDimitry Andric }
36560b57cec5SDimitry Andric 
36570b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
36580b57cec5SDimitry Andric _mm512_cvtepi32lo_pd(__m512i __A)
36590b57cec5SDimitry Andric {
36600b57cec5SDimitry Andric   return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
36610b57cec5SDimitry Andric }
36620b57cec5SDimitry Andric 
36630b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
36640b57cec5SDimitry Andric _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
36650b57cec5SDimitry Andric {
36660b57cec5SDimitry Andric   return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
36670b57cec5SDimitry Andric }
36680b57cec5SDimitry Andric 
36690b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
36700b57cec5SDimitry Andric _mm512_cvtepi32_ps (__m512i __A)
36710b57cec5SDimitry Andric {
36720b57cec5SDimitry Andric   return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
36730b57cec5SDimitry Andric }
36740b57cec5SDimitry Andric 
36750b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
36760b57cec5SDimitry Andric _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
36770b57cec5SDimitry Andric {
36780b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
36790b57cec5SDimitry Andric                                              (__v16sf)_mm512_cvtepi32_ps(__A),
36800b57cec5SDimitry Andric                                              (__v16sf)__W);
36810b57cec5SDimitry Andric }
36820b57cec5SDimitry Andric 
36830b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
36840b57cec5SDimitry Andric _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
36850b57cec5SDimitry Andric {
36860b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
36870b57cec5SDimitry Andric                                              (__v16sf)_mm512_cvtepi32_ps(__A),
36880b57cec5SDimitry Andric                                              (__v16sf)_mm512_setzero_ps());
36890b57cec5SDimitry Andric }
36900b57cec5SDimitry Andric 
36910b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
36920b57cec5SDimitry Andric _mm512_cvtepu32_pd(__m256i __A)
36930b57cec5SDimitry Andric {
36940b57cec5SDimitry Andric   return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
36950b57cec5SDimitry Andric }
36960b57cec5SDimitry Andric 
36970b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
36980b57cec5SDimitry Andric _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
36990b57cec5SDimitry Andric {
37000b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
37010b57cec5SDimitry Andric                                               (__v8df)_mm512_cvtepu32_pd(__A),
37020b57cec5SDimitry Andric                                               (__v8df)__W);
37030b57cec5SDimitry Andric }
37040b57cec5SDimitry Andric 
37050b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
37060b57cec5SDimitry Andric _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
37070b57cec5SDimitry Andric {
37080b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
37090b57cec5SDimitry Andric                                               (__v8df)_mm512_cvtepu32_pd(__A),
37100b57cec5SDimitry Andric                                               (__v8df)_mm512_setzero_pd());
37110b57cec5SDimitry Andric }
37120b57cec5SDimitry Andric 
37130b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
37140b57cec5SDimitry Andric _mm512_cvtepu32lo_pd(__m512i __A)
37150b57cec5SDimitry Andric {
37160b57cec5SDimitry Andric   return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
37170b57cec5SDimitry Andric }
37180b57cec5SDimitry Andric 
37190b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
37200b57cec5SDimitry Andric _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
37210b57cec5SDimitry Andric {
37220b57cec5SDimitry Andric   return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
37230b57cec5SDimitry Andric }
37240b57cec5SDimitry Andric 
37250b57cec5SDimitry Andric #define _mm512_cvt_roundpd_ps(A, R) \
3726349cc55cSDimitry Andric   ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
37270b57cec5SDimitry Andric                                            (__v8sf)_mm256_setzero_ps(), \
3728349cc55cSDimitry Andric                                            (__mmask8)-1, (int)(R)))
37290b57cec5SDimitry Andric 
37300b57cec5SDimitry Andric #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3731349cc55cSDimitry Andric   ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
37320b57cec5SDimitry Andric                                            (__v8sf)(__m256)(W), (__mmask8)(U), \
3733349cc55cSDimitry Andric                                            (int)(R)))
37340b57cec5SDimitry Andric 
37350b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3736349cc55cSDimitry Andric   ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
37370b57cec5SDimitry Andric                                            (__v8sf)_mm256_setzero_ps(), \
3738349cc55cSDimitry Andric                                            (__mmask8)(U), (int)(R)))
37390b57cec5SDimitry Andric 
37400b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS512
37410b57cec5SDimitry Andric _mm512_cvtpd_ps (__m512d __A)
37420b57cec5SDimitry Andric {
37430b57cec5SDimitry Andric   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
37440b57cec5SDimitry Andric                 (__v8sf) _mm256_undefined_ps (),
37450b57cec5SDimitry Andric                 (__mmask8) -1,
37460b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
37470b57cec5SDimitry Andric }
37480b57cec5SDimitry Andric 
37490b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS512
37500b57cec5SDimitry Andric _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
37510b57cec5SDimitry Andric {
37520b57cec5SDimitry Andric   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
37530b57cec5SDimitry Andric                 (__v8sf) __W,
37540b57cec5SDimitry Andric                 (__mmask8) __U,
37550b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
37560b57cec5SDimitry Andric }
37570b57cec5SDimitry Andric 
37580b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS512
37590b57cec5SDimitry Andric _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
37600b57cec5SDimitry Andric {
37610b57cec5SDimitry Andric   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
37620b57cec5SDimitry Andric                 (__v8sf) _mm256_setzero_ps (),
37630b57cec5SDimitry Andric                 (__mmask8) __U,
37640b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
37650b57cec5SDimitry Andric }
37660b57cec5SDimitry Andric 
37670b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
37680b57cec5SDimitry Andric _mm512_cvtpd_pslo (__m512d __A)
37690b57cec5SDimitry Andric {
37700b57cec5SDimitry Andric   return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
37710b57cec5SDimitry Andric                 (__v8sf) _mm256_setzero_ps (),
37720b57cec5SDimitry Andric                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
37730b57cec5SDimitry Andric }
37740b57cec5SDimitry Andric 
37750b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
37760b57cec5SDimitry Andric _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
37770b57cec5SDimitry Andric {
37780b57cec5SDimitry Andric   return (__m512) __builtin_shufflevector (
37790b57cec5SDimitry Andric                 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
37800b57cec5SDimitry Andric                                                __U, __A),
37810b57cec5SDimitry Andric                 (__v8sf) _mm256_setzero_ps (),
37820b57cec5SDimitry Andric                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
37830b57cec5SDimitry Andric }
37840b57cec5SDimitry Andric 
37850b57cec5SDimitry Andric #define _mm512_cvt_roundps_ph(A, I) \
3786349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
37870b57cec5SDimitry Andric                                              (__v16hi)_mm256_undefined_si256(), \
3788349cc55cSDimitry Andric                                              (__mmask16)-1))
37890b57cec5SDimitry Andric 
37900b57cec5SDimitry Andric #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3791349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
37920b57cec5SDimitry Andric                                              (__v16hi)(__m256i)(U), \
3793349cc55cSDimitry Andric                                              (__mmask16)(W)))
37940b57cec5SDimitry Andric 
37950b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3796349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
37970b57cec5SDimitry Andric                                              (__v16hi)_mm256_setzero_si256(), \
3798349cc55cSDimitry Andric                                              (__mmask16)(W)))
37990b57cec5SDimitry Andric 
38000b57cec5SDimitry Andric #define _mm512_cvtps_ph       _mm512_cvt_roundps_ph
38010b57cec5SDimitry Andric #define _mm512_mask_cvtps_ph  _mm512_mask_cvt_roundps_ph
38020b57cec5SDimitry Andric #define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
38030b57cec5SDimitry Andric 
38040b57cec5SDimitry Andric #define _mm512_cvt_roundph_ps(A, R) \
3805349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
38060b57cec5SDimitry Andric                                             (__v16sf)_mm512_undefined_ps(), \
3807349cc55cSDimitry Andric                                             (__mmask16)-1, (int)(R)))
38080b57cec5SDimitry Andric 
38090b57cec5SDimitry Andric #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3810349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
38110b57cec5SDimitry Andric                                             (__v16sf)(__m512)(W), \
3812349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
38130b57cec5SDimitry Andric 
38140b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3815349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
38160b57cec5SDimitry Andric                                             (__v16sf)_mm512_setzero_ps(), \
3817349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
38180b57cec5SDimitry Andric 
38190b57cec5SDimitry Andric 
38200b57cec5SDimitry Andric static  __inline __m512 __DEFAULT_FN_ATTRS512
38210b57cec5SDimitry Andric _mm512_cvtph_ps(__m256i __A)
38220b57cec5SDimitry Andric {
38230b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
38240b57cec5SDimitry Andric                 (__v16sf)
38250b57cec5SDimitry Andric                 _mm512_setzero_ps (),
38260b57cec5SDimitry Andric                 (__mmask16) -1,
38270b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
38280b57cec5SDimitry Andric }
38290b57cec5SDimitry Andric 
38300b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
38310b57cec5SDimitry Andric _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
38320b57cec5SDimitry Andric {
38330b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
38340b57cec5SDimitry Andric                  (__v16sf) __W,
38350b57cec5SDimitry Andric                  (__mmask16) __U,
38360b57cec5SDimitry Andric                  _MM_FROUND_CUR_DIRECTION);
38370b57cec5SDimitry Andric }
38380b57cec5SDimitry Andric 
38390b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
38400b57cec5SDimitry Andric _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
38410b57cec5SDimitry Andric {
38420b57cec5SDimitry Andric   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
38430b57cec5SDimitry Andric                  (__v16sf) _mm512_setzero_ps (),
38440b57cec5SDimitry Andric                  (__mmask16) __U,
38450b57cec5SDimitry Andric                  _MM_FROUND_CUR_DIRECTION);
38460b57cec5SDimitry Andric }
38470b57cec5SDimitry Andric 
38480b57cec5SDimitry Andric #define _mm512_cvtt_roundpd_epi32(A, R) \
3849349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
38500b57cec5SDimitry Andric                                              (__v8si)_mm256_setzero_si256(), \
3851349cc55cSDimitry Andric                                              (__mmask8)-1, (int)(R)))
38520b57cec5SDimitry Andric 
38530b57cec5SDimitry Andric #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3854349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
38550b57cec5SDimitry Andric                                              (__v8si)(__m256i)(W), \
3856349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
38570b57cec5SDimitry Andric 
38580b57cec5SDimitry Andric #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3859349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
38600b57cec5SDimitry Andric                                              (__v8si)_mm256_setzero_si256(), \
3861349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
38620b57cec5SDimitry Andric 
38630b57cec5SDimitry Andric static __inline __m256i __DEFAULT_FN_ATTRS512
38640b57cec5SDimitry Andric _mm512_cvttpd_epi32(__m512d __a)
38650b57cec5SDimitry Andric {
38660b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
38670b57cec5SDimitry Andric                                                    (__v8si)_mm256_setzero_si256(),
38680b57cec5SDimitry Andric                                                    (__mmask8) -1,
38690b57cec5SDimitry Andric                                                     _MM_FROUND_CUR_DIRECTION);
38700b57cec5SDimitry Andric }
38710b57cec5SDimitry Andric 
38720b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
38730b57cec5SDimitry Andric _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
38740b57cec5SDimitry Andric {
38750b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
38760b57cec5SDimitry Andric                   (__v8si) __W,
38770b57cec5SDimitry Andric                   (__mmask8) __U,
38780b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
38790b57cec5SDimitry Andric }
38800b57cec5SDimitry Andric 
38810b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
38820b57cec5SDimitry Andric _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
38830b57cec5SDimitry Andric {
38840b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
38850b57cec5SDimitry Andric                   (__v8si) _mm256_setzero_si256 (),
38860b57cec5SDimitry Andric                   (__mmask8) __U,
38870b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
38880b57cec5SDimitry Andric }
38890b57cec5SDimitry Andric 
38900b57cec5SDimitry Andric #define _mm512_cvtt_roundps_epi32(A, R) \
3891349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
38920b57cec5SDimitry Andric                                              (__v16si)_mm512_setzero_si512(), \
3893349cc55cSDimitry Andric                                              (__mmask16)-1, (int)(R)))
38940b57cec5SDimitry Andric 
38950b57cec5SDimitry Andric #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3896349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
38970b57cec5SDimitry Andric                                              (__v16si)(__m512i)(W), \
3898349cc55cSDimitry Andric                                              (__mmask16)(U), (int)(R)))
38990b57cec5SDimitry Andric 
39000b57cec5SDimitry Andric #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3901349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
39020b57cec5SDimitry Andric                                              (__v16si)_mm512_setzero_si512(), \
3903349cc55cSDimitry Andric                                              (__mmask16)(U), (int)(R)))
39040b57cec5SDimitry Andric 
39050b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
39060b57cec5SDimitry Andric _mm512_cvttps_epi32(__m512 __a)
39070b57cec5SDimitry Andric {
39080b57cec5SDimitry Andric   return (__m512i)
39090b57cec5SDimitry Andric     __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
39100b57cec5SDimitry Andric                                      (__v16si) _mm512_setzero_si512 (),
39110b57cec5SDimitry Andric                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
39120b57cec5SDimitry Andric }
39130b57cec5SDimitry Andric 
39140b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
39150b57cec5SDimitry Andric _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
39160b57cec5SDimitry Andric {
39170b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
39180b57cec5SDimitry Andric                   (__v16si) __W,
39190b57cec5SDimitry Andric                   (__mmask16) __U,
39200b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
39210b57cec5SDimitry Andric }
39220b57cec5SDimitry Andric 
39230b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
39240b57cec5SDimitry Andric _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
39250b57cec5SDimitry Andric {
39260b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
39270b57cec5SDimitry Andric                   (__v16si) _mm512_setzero_si512 (),
39280b57cec5SDimitry Andric                   (__mmask16) __U,
39290b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
39300b57cec5SDimitry Andric }
39310b57cec5SDimitry Andric 
39320b57cec5SDimitry Andric #define _mm512_cvt_roundps_epi32(A, R) \
3933349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
39340b57cec5SDimitry Andric                                             (__v16si)_mm512_setzero_si512(), \
3935349cc55cSDimitry Andric                                             (__mmask16)-1, (int)(R)))
39360b57cec5SDimitry Andric 
39370b57cec5SDimitry Andric #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3938349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
39390b57cec5SDimitry Andric                                             (__v16si)(__m512i)(W), \
3940349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
39410b57cec5SDimitry Andric 
39420b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3943349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
39440b57cec5SDimitry Andric                                             (__v16si)_mm512_setzero_si512(), \
3945349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
39460b57cec5SDimitry Andric 
39470b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
39480b57cec5SDimitry Andric _mm512_cvtps_epi32 (__m512 __A)
39490b57cec5SDimitry Andric {
39500b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
39510b57cec5SDimitry Andric                  (__v16si) _mm512_undefined_epi32 (),
39520b57cec5SDimitry Andric                  (__mmask16) -1,
39530b57cec5SDimitry Andric                  _MM_FROUND_CUR_DIRECTION);
39540b57cec5SDimitry Andric }
39550b57cec5SDimitry Andric 
39560b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
39570b57cec5SDimitry Andric _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
39580b57cec5SDimitry Andric {
39590b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
39600b57cec5SDimitry Andric                  (__v16si) __W,
39610b57cec5SDimitry Andric                  (__mmask16) __U,
39620b57cec5SDimitry Andric                  _MM_FROUND_CUR_DIRECTION);
39630b57cec5SDimitry Andric }
39640b57cec5SDimitry Andric 
39650b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
39660b57cec5SDimitry Andric _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
39670b57cec5SDimitry Andric {
39680b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
39690b57cec5SDimitry Andric                  (__v16si)
39700b57cec5SDimitry Andric                  _mm512_setzero_si512 (),
39710b57cec5SDimitry Andric                  (__mmask16) __U,
39720b57cec5SDimitry Andric                  _MM_FROUND_CUR_DIRECTION);
39730b57cec5SDimitry Andric }
39740b57cec5SDimitry Andric 
39750b57cec5SDimitry Andric #define _mm512_cvt_roundpd_epi32(A, R) \
3976349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
39770b57cec5SDimitry Andric                                             (__v8si)_mm256_setzero_si256(), \
3978349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
39790b57cec5SDimitry Andric 
39800b57cec5SDimitry Andric #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3981349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
39820b57cec5SDimitry Andric                                             (__v8si)(__m256i)(W), \
3983349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
39840b57cec5SDimitry Andric 
39850b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3986349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
39870b57cec5SDimitry Andric                                             (__v8si)_mm256_setzero_si256(), \
3988349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
39890b57cec5SDimitry Andric 
39900b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
39910b57cec5SDimitry Andric _mm512_cvtpd_epi32 (__m512d __A)
39920b57cec5SDimitry Andric {
39930b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
39940b57cec5SDimitry Andric                  (__v8si)
39950b57cec5SDimitry Andric                  _mm256_undefined_si256 (),
39960b57cec5SDimitry Andric                  (__mmask8) -1,
39970b57cec5SDimitry Andric                  _MM_FROUND_CUR_DIRECTION);
39980b57cec5SDimitry Andric }
39990b57cec5SDimitry Andric 
40000b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
40010b57cec5SDimitry Andric _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
40020b57cec5SDimitry Andric {
40030b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
40040b57cec5SDimitry Andric                  (__v8si) __W,
40050b57cec5SDimitry Andric                  (__mmask8) __U,
40060b57cec5SDimitry Andric                  _MM_FROUND_CUR_DIRECTION);
40070b57cec5SDimitry Andric }
40080b57cec5SDimitry Andric 
40090b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
40100b57cec5SDimitry Andric _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
40110b57cec5SDimitry Andric {
40120b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
40130b57cec5SDimitry Andric                  (__v8si)
40140b57cec5SDimitry Andric                  _mm256_setzero_si256 (),
40150b57cec5SDimitry Andric                  (__mmask8) __U,
40160b57cec5SDimitry Andric                  _MM_FROUND_CUR_DIRECTION);
40170b57cec5SDimitry Andric }
40180b57cec5SDimitry Andric 
40190b57cec5SDimitry Andric #define _mm512_cvt_roundps_epu32(A, R) \
4020349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
40210b57cec5SDimitry Andric                                              (__v16si)_mm512_setzero_si512(), \
4022349cc55cSDimitry Andric                                              (__mmask16)-1, (int)(R)))
40230b57cec5SDimitry Andric 
40240b57cec5SDimitry Andric #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4025349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
40260b57cec5SDimitry Andric                                              (__v16si)(__m512i)(W), \
4027349cc55cSDimitry Andric                                              (__mmask16)(U), (int)(R)))
40280b57cec5SDimitry Andric 
40290b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4030349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
40310b57cec5SDimitry Andric                                              (__v16si)_mm512_setzero_si512(), \
4032349cc55cSDimitry Andric                                              (__mmask16)(U), (int)(R)))
40330b57cec5SDimitry Andric 
40340b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
40350b57cec5SDimitry Andric _mm512_cvtps_epu32 ( __m512 __A)
40360b57cec5SDimitry Andric {
40370b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
40380b57cec5SDimitry Andric                   (__v16si)\
40390b57cec5SDimitry Andric                   _mm512_undefined_epi32 (),
40400b57cec5SDimitry Andric                   (__mmask16) -1,\
40410b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
40420b57cec5SDimitry Andric }
40430b57cec5SDimitry Andric 
40440b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
40450b57cec5SDimitry Andric _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
40460b57cec5SDimitry Andric {
40470b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
40480b57cec5SDimitry Andric                   (__v16si) __W,
40490b57cec5SDimitry Andric                   (__mmask16) __U,
40500b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
40510b57cec5SDimitry Andric }
40520b57cec5SDimitry Andric 
40530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
40540b57cec5SDimitry Andric _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
40550b57cec5SDimitry Andric {
40560b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
40570b57cec5SDimitry Andric                   (__v16si)
40580b57cec5SDimitry Andric                   _mm512_setzero_si512 (),
40590b57cec5SDimitry Andric                   (__mmask16) __U ,
40600b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
40610b57cec5SDimitry Andric }
40620b57cec5SDimitry Andric 
40630b57cec5SDimitry Andric #define _mm512_cvt_roundpd_epu32(A, R) \
4064349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
40650b57cec5SDimitry Andric                                              (__v8si)_mm256_setzero_si256(), \
4066349cc55cSDimitry Andric                                              (__mmask8)-1, (int)(R)))
40670b57cec5SDimitry Andric 
40680b57cec5SDimitry Andric #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4069349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
40700b57cec5SDimitry Andric                                              (__v8si)(__m256i)(W), \
4071349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
40720b57cec5SDimitry Andric 
40730b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4074349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
40750b57cec5SDimitry Andric                                              (__v8si)_mm256_setzero_si256(), \
4076349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
40770b57cec5SDimitry Andric 
40780b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
40790b57cec5SDimitry Andric _mm512_cvtpd_epu32 (__m512d __A)
40800b57cec5SDimitry Andric {
40810b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
40820b57cec5SDimitry Andric                   (__v8si)
40830b57cec5SDimitry Andric                   _mm256_undefined_si256 (),
40840b57cec5SDimitry Andric                   (__mmask8) -1,
40850b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
40860b57cec5SDimitry Andric }
40870b57cec5SDimitry Andric 
40880b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
40890b57cec5SDimitry Andric _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
40900b57cec5SDimitry Andric {
40910b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
40920b57cec5SDimitry Andric                   (__v8si) __W,
40930b57cec5SDimitry Andric                   (__mmask8) __U,
40940b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
40950b57cec5SDimitry Andric }
40960b57cec5SDimitry Andric 
40970b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
40980b57cec5SDimitry Andric _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
40990b57cec5SDimitry Andric {
41000b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
41010b57cec5SDimitry Andric                   (__v8si)
41020b57cec5SDimitry Andric                   _mm256_setzero_si256 (),
41030b57cec5SDimitry Andric                   (__mmask8) __U,
41040b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
41050b57cec5SDimitry Andric }
41060b57cec5SDimitry Andric 
41070b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512
41080b57cec5SDimitry Andric _mm512_cvtsd_f64(__m512d __a)
41090b57cec5SDimitry Andric {
41100b57cec5SDimitry Andric   return __a[0];
41110b57cec5SDimitry Andric }
41120b57cec5SDimitry Andric 
41130b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512
41140b57cec5SDimitry Andric _mm512_cvtss_f32(__m512 __a)
41150b57cec5SDimitry Andric {
41160b57cec5SDimitry Andric   return __a[0];
41170b57cec5SDimitry Andric }
41180b57cec5SDimitry Andric 
41190b57cec5SDimitry Andric /* Unpack and Interleave */
41200b57cec5SDimitry Andric 
41210b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
41220b57cec5SDimitry Andric _mm512_unpackhi_pd(__m512d __a, __m512d __b)
41230b57cec5SDimitry Andric {
41240b57cec5SDimitry Andric   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
41250b57cec5SDimitry Andric                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
41260b57cec5SDimitry Andric }
41270b57cec5SDimitry Andric 
41280b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
41290b57cec5SDimitry Andric _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
41300b57cec5SDimitry Andric {
41310b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
41320b57cec5SDimitry Andric                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
41330b57cec5SDimitry Andric                                            (__v8df)__W);
41340b57cec5SDimitry Andric }
41350b57cec5SDimitry Andric 
41360b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
41370b57cec5SDimitry Andric _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
41380b57cec5SDimitry Andric {
41390b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
41400b57cec5SDimitry Andric                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
41410b57cec5SDimitry Andric                                            (__v8df)_mm512_setzero_pd());
41420b57cec5SDimitry Andric }
41430b57cec5SDimitry Andric 
41440b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
41450b57cec5SDimitry Andric _mm512_unpacklo_pd(__m512d __a, __m512d __b)
41460b57cec5SDimitry Andric {
41470b57cec5SDimitry Andric   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
41480b57cec5SDimitry Andric                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
41490b57cec5SDimitry Andric }
41500b57cec5SDimitry Andric 
41510b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
41520b57cec5SDimitry Andric _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
41530b57cec5SDimitry Andric {
41540b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
41550b57cec5SDimitry Andric                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
41560b57cec5SDimitry Andric                                            (__v8df)__W);
41570b57cec5SDimitry Andric }
41580b57cec5SDimitry Andric 
41590b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
41600b57cec5SDimitry Andric _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
41610b57cec5SDimitry Andric {
41620b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
41630b57cec5SDimitry Andric                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
41640b57cec5SDimitry Andric                                            (__v8df)_mm512_setzero_pd());
41650b57cec5SDimitry Andric }
41660b57cec5SDimitry Andric 
41670b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
41680b57cec5SDimitry Andric _mm512_unpackhi_ps(__m512 __a, __m512 __b)
41690b57cec5SDimitry Andric {
41700b57cec5SDimitry Andric   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
41710b57cec5SDimitry Andric                                          2,    18,    3,    19,
41720b57cec5SDimitry Andric                                          2+4,  18+4,  3+4,  19+4,
41730b57cec5SDimitry Andric                                          2+8,  18+8,  3+8,  19+8,
41740b57cec5SDimitry Andric                                          2+12, 18+12, 3+12, 19+12);
41750b57cec5SDimitry Andric }
41760b57cec5SDimitry Andric 
41770b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
41780b57cec5SDimitry Andric _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
41790b57cec5SDimitry Andric {
41800b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
41810b57cec5SDimitry Andric                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
41820b57cec5SDimitry Andric                                           (__v16sf)__W);
41830b57cec5SDimitry Andric }
41840b57cec5SDimitry Andric 
41850b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
41860b57cec5SDimitry Andric _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
41870b57cec5SDimitry Andric {
41880b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
41890b57cec5SDimitry Andric                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
41900b57cec5SDimitry Andric                                           (__v16sf)_mm512_setzero_ps());
41910b57cec5SDimitry Andric }
41920b57cec5SDimitry Andric 
41930b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
41940b57cec5SDimitry Andric _mm512_unpacklo_ps(__m512 __a, __m512 __b)
41950b57cec5SDimitry Andric {
41960b57cec5SDimitry Andric   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
41970b57cec5SDimitry Andric                                          0,    16,    1,    17,
41980b57cec5SDimitry Andric                                          0+4,  16+4,  1+4,  17+4,
41990b57cec5SDimitry Andric                                          0+8,  16+8,  1+8,  17+8,
42000b57cec5SDimitry Andric                                          0+12, 16+12, 1+12, 17+12);
42010b57cec5SDimitry Andric }
42020b57cec5SDimitry Andric 
42030b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
42040b57cec5SDimitry Andric _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
42050b57cec5SDimitry Andric {
42060b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
42070b57cec5SDimitry Andric                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
42080b57cec5SDimitry Andric                                           (__v16sf)__W);
42090b57cec5SDimitry Andric }
42100b57cec5SDimitry Andric 
42110b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
42120b57cec5SDimitry Andric _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
42130b57cec5SDimitry Andric {
42140b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
42150b57cec5SDimitry Andric                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
42160b57cec5SDimitry Andric                                           (__v16sf)_mm512_setzero_ps());
42170b57cec5SDimitry Andric }
42180b57cec5SDimitry Andric 
42190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
42200b57cec5SDimitry Andric _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
42210b57cec5SDimitry Andric {
42220b57cec5SDimitry Andric   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
42230b57cec5SDimitry Andric                                           2,    18,    3,    19,
42240b57cec5SDimitry Andric                                           2+4,  18+4,  3+4,  19+4,
42250b57cec5SDimitry Andric                                           2+8,  18+8,  3+8,  19+8,
42260b57cec5SDimitry Andric                                           2+12, 18+12, 3+12, 19+12);
42270b57cec5SDimitry Andric }
42280b57cec5SDimitry Andric 
42290b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
42300b57cec5SDimitry Andric _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
42310b57cec5SDimitry Andric {
42320b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
42330b57cec5SDimitry Andric                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
42340b57cec5SDimitry Andric                                        (__v16si)__W);
42350b57cec5SDimitry Andric }
42360b57cec5SDimitry Andric 
42370b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
42380b57cec5SDimitry Andric _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
42390b57cec5SDimitry Andric {
42400b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
42410b57cec5SDimitry Andric                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
42420b57cec5SDimitry Andric                                        (__v16si)_mm512_setzero_si512());
42430b57cec5SDimitry Andric }
42440b57cec5SDimitry Andric 
42450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
42460b57cec5SDimitry Andric _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
42470b57cec5SDimitry Andric {
42480b57cec5SDimitry Andric   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
42490b57cec5SDimitry Andric                                           0,    16,    1,    17,
42500b57cec5SDimitry Andric                                           0+4,  16+4,  1+4,  17+4,
42510b57cec5SDimitry Andric                                           0+8,  16+8,  1+8,  17+8,
42520b57cec5SDimitry Andric                                           0+12, 16+12, 1+12, 17+12);
42530b57cec5SDimitry Andric }
42540b57cec5SDimitry Andric 
42550b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
42560b57cec5SDimitry Andric _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
42570b57cec5SDimitry Andric {
42580b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
42590b57cec5SDimitry Andric                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
42600b57cec5SDimitry Andric                                        (__v16si)__W);
42610b57cec5SDimitry Andric }
42620b57cec5SDimitry Andric 
42630b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
42640b57cec5SDimitry Andric _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
42650b57cec5SDimitry Andric {
42660b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
42670b57cec5SDimitry Andric                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
42680b57cec5SDimitry Andric                                        (__v16si)_mm512_setzero_si512());
42690b57cec5SDimitry Andric }
42700b57cec5SDimitry Andric 
42710b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
42720b57cec5SDimitry Andric _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
42730b57cec5SDimitry Andric {
42740b57cec5SDimitry Andric   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
42750b57cec5SDimitry Andric                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
42760b57cec5SDimitry Andric }
42770b57cec5SDimitry Andric 
42780b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
42790b57cec5SDimitry Andric _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
42800b57cec5SDimitry Andric {
42810b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
42820b57cec5SDimitry Andric                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
42830b57cec5SDimitry Andric                                         (__v8di)__W);
42840b57cec5SDimitry Andric }
42850b57cec5SDimitry Andric 
42860b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
42870b57cec5SDimitry Andric _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
42880b57cec5SDimitry Andric {
42890b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
42900b57cec5SDimitry Andric                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
42910b57cec5SDimitry Andric                                         (__v8di)_mm512_setzero_si512());
42920b57cec5SDimitry Andric }
42930b57cec5SDimitry Andric 
42940b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
42950b57cec5SDimitry Andric _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
42960b57cec5SDimitry Andric {
42970b57cec5SDimitry Andric   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
42980b57cec5SDimitry Andric                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
42990b57cec5SDimitry Andric }
43000b57cec5SDimitry Andric 
43010b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
43020b57cec5SDimitry Andric _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
43030b57cec5SDimitry Andric {
43040b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
43050b57cec5SDimitry Andric                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
43060b57cec5SDimitry Andric                                         (__v8di)__W);
43070b57cec5SDimitry Andric }
43080b57cec5SDimitry Andric 
43090b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
43100b57cec5SDimitry Andric _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
43110b57cec5SDimitry Andric {
43120b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
43130b57cec5SDimitry Andric                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
43140b57cec5SDimitry Andric                                         (__v8di)_mm512_setzero_si512());
43150b57cec5SDimitry Andric }
43160b57cec5SDimitry Andric 
43170b57cec5SDimitry Andric 
43180b57cec5SDimitry Andric /* SIMD load ops */
43190b57cec5SDimitry Andric 
43200b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
43210b57cec5SDimitry Andric _mm512_loadu_si512 (void const *__P)
43220b57cec5SDimitry Andric {
43230b57cec5SDimitry Andric   struct __loadu_si512 {
43240b57cec5SDimitry Andric     __m512i_u __v;
43250b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
4326480093f4SDimitry Andric   return ((const struct __loadu_si512*)__P)->__v;
43270b57cec5SDimitry Andric }
43280b57cec5SDimitry Andric 
43290b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
43300b57cec5SDimitry Andric _mm512_loadu_epi32 (void const *__P)
43310b57cec5SDimitry Andric {
43320b57cec5SDimitry Andric   struct __loadu_epi32 {
43330b57cec5SDimitry Andric     __m512i_u __v;
43340b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
4335480093f4SDimitry Andric   return ((const struct __loadu_epi32*)__P)->__v;
43360b57cec5SDimitry Andric }
43370b57cec5SDimitry Andric 
43380b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
43390b57cec5SDimitry Andric _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
43400b57cec5SDimitry Andric {
43410b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
43420b57cec5SDimitry Andric                   (__v16si) __W,
43430b57cec5SDimitry Andric                   (__mmask16) __U);
43440b57cec5SDimitry Andric }
43450b57cec5SDimitry Andric 
43460b57cec5SDimitry Andric 
43470b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
43480b57cec5SDimitry Andric _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
43490b57cec5SDimitry Andric {
43500b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
43510b57cec5SDimitry Andric                                                      (__v16si)
43520b57cec5SDimitry Andric                                                      _mm512_setzero_si512 (),
43530b57cec5SDimitry Andric                                                      (__mmask16) __U);
43540b57cec5SDimitry Andric }
43550b57cec5SDimitry Andric 
43560b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
43570b57cec5SDimitry Andric _mm512_loadu_epi64 (void const *__P)
43580b57cec5SDimitry Andric {
43590b57cec5SDimitry Andric   struct __loadu_epi64 {
43600b57cec5SDimitry Andric     __m512i_u __v;
43610b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
4362480093f4SDimitry Andric   return ((const struct __loadu_epi64*)__P)->__v;
43630b57cec5SDimitry Andric }
43640b57cec5SDimitry Andric 
43650b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
43660b57cec5SDimitry Andric _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
43670b57cec5SDimitry Andric {
43680b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
43690b57cec5SDimitry Andric                   (__v8di) __W,
43700b57cec5SDimitry Andric                   (__mmask8) __U);
43710b57cec5SDimitry Andric }
43720b57cec5SDimitry Andric 
43730b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
43740b57cec5SDimitry Andric _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
43750b57cec5SDimitry Andric {
43760b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
43770b57cec5SDimitry Andric                                                      (__v8di)
43780b57cec5SDimitry Andric                                                      _mm512_setzero_si512 (),
43790b57cec5SDimitry Andric                                                      (__mmask8) __U);
43800b57cec5SDimitry Andric }
43810b57cec5SDimitry Andric 
43820b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
43830b57cec5SDimitry Andric _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
43840b57cec5SDimitry Andric {
43850b57cec5SDimitry Andric   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
43860b57cec5SDimitry Andric                    (__v16sf) __W,
43870b57cec5SDimitry Andric                    (__mmask16) __U);
43880b57cec5SDimitry Andric }
43890b57cec5SDimitry Andric 
43900b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
43910b57cec5SDimitry Andric _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
43920b57cec5SDimitry Andric {
43930b57cec5SDimitry Andric   return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
43940b57cec5SDimitry Andric                                                   (__v16sf)
43950b57cec5SDimitry Andric                                                   _mm512_setzero_ps (),
43960b57cec5SDimitry Andric                                                   (__mmask16) __U);
43970b57cec5SDimitry Andric }
43980b57cec5SDimitry Andric 
43990b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
44000b57cec5SDimitry Andric _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
44010b57cec5SDimitry Andric {
44020b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
44030b57cec5SDimitry Andric                 (__v8df) __W,
44040b57cec5SDimitry Andric                 (__mmask8) __U);
44050b57cec5SDimitry Andric }
44060b57cec5SDimitry Andric 
44070b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
44080b57cec5SDimitry Andric _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
44090b57cec5SDimitry Andric {
44100b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
44110b57cec5SDimitry Andric                                                    (__v8df)
44120b57cec5SDimitry Andric                                                    _mm512_setzero_pd (),
44130b57cec5SDimitry Andric                                                    (__mmask8) __U);
44140b57cec5SDimitry Andric }
44150b57cec5SDimitry Andric 
44160b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
44170b57cec5SDimitry Andric _mm512_loadu_pd(void const *__p)
44180b57cec5SDimitry Andric {
44190b57cec5SDimitry Andric   struct __loadu_pd {
44200b57cec5SDimitry Andric     __m512d_u __v;
44210b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
4422480093f4SDimitry Andric   return ((const struct __loadu_pd*)__p)->__v;
44230b57cec5SDimitry Andric }
44240b57cec5SDimitry Andric 
44250b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
44260b57cec5SDimitry Andric _mm512_loadu_ps(void const *__p)
44270b57cec5SDimitry Andric {
44280b57cec5SDimitry Andric   struct __loadu_ps {
44290b57cec5SDimitry Andric     __m512_u __v;
44300b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
4431480093f4SDimitry Andric   return ((const struct __loadu_ps*)__p)->__v;
44320b57cec5SDimitry Andric }
44330b57cec5SDimitry Andric 
44340b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
44350b57cec5SDimitry Andric _mm512_load_ps(void const *__p)
44360b57cec5SDimitry Andric {
4437480093f4SDimitry Andric   return *(const __m512*)__p;
44380b57cec5SDimitry Andric }
44390b57cec5SDimitry Andric 
44400b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
44410b57cec5SDimitry Andric _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
44420b57cec5SDimitry Andric {
44430b57cec5SDimitry Andric   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
44440b57cec5SDimitry Andric                    (__v16sf) __W,
44450b57cec5SDimitry Andric                    (__mmask16) __U);
44460b57cec5SDimitry Andric }
44470b57cec5SDimitry Andric 
44480b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
44490b57cec5SDimitry Andric _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
44500b57cec5SDimitry Andric {
44510b57cec5SDimitry Andric   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
44520b57cec5SDimitry Andric                                                   (__v16sf)
44530b57cec5SDimitry Andric                                                   _mm512_setzero_ps (),
44540b57cec5SDimitry Andric                                                   (__mmask16) __U);
44550b57cec5SDimitry Andric }
44560b57cec5SDimitry Andric 
44570b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
44580b57cec5SDimitry Andric _mm512_load_pd(void const *__p)
44590b57cec5SDimitry Andric {
4460480093f4SDimitry Andric   return *(const __m512d*)__p;
44610b57cec5SDimitry Andric }
44620b57cec5SDimitry Andric 
44630b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
44640b57cec5SDimitry Andric _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
44650b57cec5SDimitry Andric {
44660b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
44670b57cec5SDimitry Andric                           (__v8df) __W,
44680b57cec5SDimitry Andric                           (__mmask8) __U);
44690b57cec5SDimitry Andric }
44700b57cec5SDimitry Andric 
44710b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
44720b57cec5SDimitry Andric _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
44730b57cec5SDimitry Andric {
44740b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
44750b57cec5SDimitry Andric                                                    (__v8df)
44760b57cec5SDimitry Andric                                                    _mm512_setzero_pd (),
44770b57cec5SDimitry Andric                                                    (__mmask8) __U);
44780b57cec5SDimitry Andric }
44790b57cec5SDimitry Andric 
44800b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
44810b57cec5SDimitry Andric _mm512_load_si512 (void const *__P)
44820b57cec5SDimitry Andric {
4483480093f4SDimitry Andric   return *(const __m512i *) __P;
44840b57cec5SDimitry Andric }
44850b57cec5SDimitry Andric 
44860b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
44870b57cec5SDimitry Andric _mm512_load_epi32 (void const *__P)
44880b57cec5SDimitry Andric {
4489480093f4SDimitry Andric   return *(const __m512i *) __P;
44900b57cec5SDimitry Andric }
44910b57cec5SDimitry Andric 
44920b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
44930b57cec5SDimitry Andric _mm512_load_epi64 (void const *__P)
44940b57cec5SDimitry Andric {
4495480093f4SDimitry Andric   return *(const __m512i *) __P;
44960b57cec5SDimitry Andric }
44970b57cec5SDimitry Andric 
44980b57cec5SDimitry Andric /* SIMD store ops */
44990b57cec5SDimitry Andric 
45000b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45010b57cec5SDimitry Andric _mm512_storeu_epi64 (void *__P, __m512i __A)
45020b57cec5SDimitry Andric {
45030b57cec5SDimitry Andric   struct __storeu_epi64 {
45040b57cec5SDimitry Andric     __m512i_u __v;
45050b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
45060b57cec5SDimitry Andric   ((struct __storeu_epi64*)__P)->__v = __A;
45070b57cec5SDimitry Andric }
45080b57cec5SDimitry Andric 
45090b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45100b57cec5SDimitry Andric _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
45110b57cec5SDimitry Andric {
45120b57cec5SDimitry Andric   __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
45130b57cec5SDimitry Andric                                      (__mmask8) __U);
45140b57cec5SDimitry Andric }
45150b57cec5SDimitry Andric 
45160b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45170b57cec5SDimitry Andric _mm512_storeu_si512 (void *__P, __m512i __A)
45180b57cec5SDimitry Andric {
45190b57cec5SDimitry Andric   struct __storeu_si512 {
45200b57cec5SDimitry Andric     __m512i_u __v;
45210b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
45220b57cec5SDimitry Andric   ((struct __storeu_si512*)__P)->__v = __A;
45230b57cec5SDimitry Andric }
45240b57cec5SDimitry Andric 
45250b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45260b57cec5SDimitry Andric _mm512_storeu_epi32 (void *__P, __m512i __A)
45270b57cec5SDimitry Andric {
45280b57cec5SDimitry Andric   struct __storeu_epi32 {
45290b57cec5SDimitry Andric     __m512i_u __v;
45300b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
45310b57cec5SDimitry Andric   ((struct __storeu_epi32*)__P)->__v = __A;
45320b57cec5SDimitry Andric }
45330b57cec5SDimitry Andric 
45340b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45350b57cec5SDimitry Andric _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
45360b57cec5SDimitry Andric {
45370b57cec5SDimitry Andric   __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
45380b57cec5SDimitry Andric                                      (__mmask16) __U);
45390b57cec5SDimitry Andric }
45400b57cec5SDimitry Andric 
45410b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45420b57cec5SDimitry Andric _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
45430b57cec5SDimitry Andric {
45440b57cec5SDimitry Andric   __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
45450b57cec5SDimitry Andric }
45460b57cec5SDimitry Andric 
45470b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45480b57cec5SDimitry Andric _mm512_storeu_pd(void *__P, __m512d __A)
45490b57cec5SDimitry Andric {
45500b57cec5SDimitry Andric   struct __storeu_pd {
45510b57cec5SDimitry Andric     __m512d_u __v;
45520b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
45530b57cec5SDimitry Andric   ((struct __storeu_pd*)__P)->__v = __A;
45540b57cec5SDimitry Andric }
45550b57cec5SDimitry Andric 
45560b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45570b57cec5SDimitry Andric _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
45580b57cec5SDimitry Andric {
45590b57cec5SDimitry Andric   __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
45600b57cec5SDimitry Andric                                    (__mmask16) __U);
45610b57cec5SDimitry Andric }
45620b57cec5SDimitry Andric 
45630b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45640b57cec5SDimitry Andric _mm512_storeu_ps(void *__P, __m512 __A)
45650b57cec5SDimitry Andric {
45660b57cec5SDimitry Andric   struct __storeu_ps {
45670b57cec5SDimitry Andric     __m512_u __v;
45680b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
45690b57cec5SDimitry Andric   ((struct __storeu_ps*)__P)->__v = __A;
45700b57cec5SDimitry Andric }
45710b57cec5SDimitry Andric 
45720b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45730b57cec5SDimitry Andric _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
45740b57cec5SDimitry Andric {
45750b57cec5SDimitry Andric   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
45760b57cec5SDimitry Andric }
45770b57cec5SDimitry Andric 
45780b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45790b57cec5SDimitry Andric _mm512_store_pd(void *__P, __m512d __A)
45800b57cec5SDimitry Andric {
45810b57cec5SDimitry Andric   *(__m512d*)__P = __A;
45820b57cec5SDimitry Andric }
45830b57cec5SDimitry Andric 
45840b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45850b57cec5SDimitry Andric _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
45860b57cec5SDimitry Andric {
45870b57cec5SDimitry Andric   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
45880b57cec5SDimitry Andric                                    (__mmask16) __U);
45890b57cec5SDimitry Andric }
45900b57cec5SDimitry Andric 
45910b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45920b57cec5SDimitry Andric _mm512_store_ps(void *__P, __m512 __A)
45930b57cec5SDimitry Andric {
45940b57cec5SDimitry Andric   *(__m512*)__P = __A;
45950b57cec5SDimitry Andric }
45960b57cec5SDimitry Andric 
45970b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
45980b57cec5SDimitry Andric _mm512_store_si512 (void *__P, __m512i __A)
45990b57cec5SDimitry Andric {
46000b57cec5SDimitry Andric   *(__m512i *) __P = __A;
46010b57cec5SDimitry Andric }
46020b57cec5SDimitry Andric 
46030b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
46040b57cec5SDimitry Andric _mm512_store_epi32 (void *__P, __m512i __A)
46050b57cec5SDimitry Andric {
46060b57cec5SDimitry Andric   *(__m512i *) __P = __A;
46070b57cec5SDimitry Andric }
46080b57cec5SDimitry Andric 
46090b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512
46100b57cec5SDimitry Andric _mm512_store_epi64 (void *__P, __m512i __A)
46110b57cec5SDimitry Andric {
46120b57cec5SDimitry Andric   *(__m512i *) __P = __A;
46130b57cec5SDimitry Andric }
46140b57cec5SDimitry Andric 
46150b57cec5SDimitry Andric /* Mask ops */
46160b57cec5SDimitry Andric 
46170b57cec5SDimitry Andric static __inline __mmask16 __DEFAULT_FN_ATTRS
46180b57cec5SDimitry Andric _mm512_knot(__mmask16 __M)
46190b57cec5SDimitry Andric {
46200b57cec5SDimitry Andric   return __builtin_ia32_knothi(__M);
46210b57cec5SDimitry Andric }
46220b57cec5SDimitry Andric 
46230b57cec5SDimitry Andric /* Integer compare */
46240b57cec5SDimitry Andric 
46250b57cec5SDimitry Andric #define _mm512_cmpeq_epi32_mask(A, B) \
46260b57cec5SDimitry Andric     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
46270b57cec5SDimitry Andric #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
46280b57cec5SDimitry Andric     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
46290b57cec5SDimitry Andric #define _mm512_cmpge_epi32_mask(A, B) \
46300b57cec5SDimitry Andric     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
46310b57cec5SDimitry Andric #define _mm512_mask_cmpge_epi32_mask(k, A, B) \
46320b57cec5SDimitry Andric     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
46330b57cec5SDimitry Andric #define _mm512_cmpgt_epi32_mask(A, B) \
46340b57cec5SDimitry Andric     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
46350b57cec5SDimitry Andric #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
46360b57cec5SDimitry Andric     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
46370b57cec5SDimitry Andric #define _mm512_cmple_epi32_mask(A, B) \
46380b57cec5SDimitry Andric     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
46390b57cec5SDimitry Andric #define _mm512_mask_cmple_epi32_mask(k, A, B) \
46400b57cec5SDimitry Andric     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
46410b57cec5SDimitry Andric #define _mm512_cmplt_epi32_mask(A, B) \
46420b57cec5SDimitry Andric     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
46430b57cec5SDimitry Andric #define _mm512_mask_cmplt_epi32_mask(k, A, B) \
46440b57cec5SDimitry Andric     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
46450b57cec5SDimitry Andric #define _mm512_cmpneq_epi32_mask(A, B) \
46460b57cec5SDimitry Andric     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
46470b57cec5SDimitry Andric #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
46480b57cec5SDimitry Andric     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
46490b57cec5SDimitry Andric 
46500b57cec5SDimitry Andric #define _mm512_cmpeq_epu32_mask(A, B) \
46510b57cec5SDimitry Andric     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
46520b57cec5SDimitry Andric #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
46530b57cec5SDimitry Andric     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
46540b57cec5SDimitry Andric #define _mm512_cmpge_epu32_mask(A, B) \
46550b57cec5SDimitry Andric     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
46560b57cec5SDimitry Andric #define _mm512_mask_cmpge_epu32_mask(k, A, B) \
46570b57cec5SDimitry Andric     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
46580b57cec5SDimitry Andric #define _mm512_cmpgt_epu32_mask(A, B) \
46590b57cec5SDimitry Andric     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
46600b57cec5SDimitry Andric #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
46610b57cec5SDimitry Andric     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
46620b57cec5SDimitry Andric #define _mm512_cmple_epu32_mask(A, B) \
46630b57cec5SDimitry Andric     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
46640b57cec5SDimitry Andric #define _mm512_mask_cmple_epu32_mask(k, A, B) \
46650b57cec5SDimitry Andric     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
46660b57cec5SDimitry Andric #define _mm512_cmplt_epu32_mask(A, B) \
46670b57cec5SDimitry Andric     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
46680b57cec5SDimitry Andric #define _mm512_mask_cmplt_epu32_mask(k, A, B) \
46690b57cec5SDimitry Andric     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
46700b57cec5SDimitry Andric #define _mm512_cmpneq_epu32_mask(A, B) \
46710b57cec5SDimitry Andric     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
46720b57cec5SDimitry Andric #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
46730b57cec5SDimitry Andric     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
46740b57cec5SDimitry Andric 
46750b57cec5SDimitry Andric #define _mm512_cmpeq_epi64_mask(A, B) \
46760b57cec5SDimitry Andric     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
46770b57cec5SDimitry Andric #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
46780b57cec5SDimitry Andric     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
46790b57cec5SDimitry Andric #define _mm512_cmpge_epi64_mask(A, B) \
46800b57cec5SDimitry Andric     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
46810b57cec5SDimitry Andric #define _mm512_mask_cmpge_epi64_mask(k, A, B) \
46820b57cec5SDimitry Andric     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
46830b57cec5SDimitry Andric #define _mm512_cmpgt_epi64_mask(A, B) \
46840b57cec5SDimitry Andric     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
46850b57cec5SDimitry Andric #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
46860b57cec5SDimitry Andric     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
46870b57cec5SDimitry Andric #define _mm512_cmple_epi64_mask(A, B) \
46880b57cec5SDimitry Andric     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
46890b57cec5SDimitry Andric #define _mm512_mask_cmple_epi64_mask(k, A, B) \
46900b57cec5SDimitry Andric     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
46910b57cec5SDimitry Andric #define _mm512_cmplt_epi64_mask(A, B) \
46920b57cec5SDimitry Andric     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
46930b57cec5SDimitry Andric #define _mm512_mask_cmplt_epi64_mask(k, A, B) \
46940b57cec5SDimitry Andric     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
46950b57cec5SDimitry Andric #define _mm512_cmpneq_epi64_mask(A, B) \
46960b57cec5SDimitry Andric     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
46970b57cec5SDimitry Andric #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
46980b57cec5SDimitry Andric     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
46990b57cec5SDimitry Andric 
47000b57cec5SDimitry Andric #define _mm512_cmpeq_epu64_mask(A, B) \
47010b57cec5SDimitry Andric     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
47020b57cec5SDimitry Andric #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
47030b57cec5SDimitry Andric     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
47040b57cec5SDimitry Andric #define _mm512_cmpge_epu64_mask(A, B) \
47050b57cec5SDimitry Andric     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
47060b57cec5SDimitry Andric #define _mm512_mask_cmpge_epu64_mask(k, A, B) \
47070b57cec5SDimitry Andric     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
47080b57cec5SDimitry Andric #define _mm512_cmpgt_epu64_mask(A, B) \
47090b57cec5SDimitry Andric     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
47100b57cec5SDimitry Andric #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
47110b57cec5SDimitry Andric     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
47120b57cec5SDimitry Andric #define _mm512_cmple_epu64_mask(A, B) \
47130b57cec5SDimitry Andric     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
47140b57cec5SDimitry Andric #define _mm512_mask_cmple_epu64_mask(k, A, B) \
47150b57cec5SDimitry Andric     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
47160b57cec5SDimitry Andric #define _mm512_cmplt_epu64_mask(A, B) \
47170b57cec5SDimitry Andric     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
47180b57cec5SDimitry Andric #define _mm512_mask_cmplt_epu64_mask(k, A, B) \
47190b57cec5SDimitry Andric     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
47200b57cec5SDimitry Andric #define _mm512_cmpneq_epu64_mask(A, B) \
47210b57cec5SDimitry Andric     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
47220b57cec5SDimitry Andric #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
47230b57cec5SDimitry Andric     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
47240b57cec5SDimitry Andric 
47250b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
47260b57cec5SDimitry Andric _mm512_cvtepi8_epi32(__m128i __A)
47270b57cec5SDimitry Andric {
47280b57cec5SDimitry Andric   /* This function always performs a signed extension, but __v16qi is a char
47290b57cec5SDimitry Andric      which may be signed or unsigned, so use __v16qs. */
47300b57cec5SDimitry Andric   return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
47310b57cec5SDimitry Andric }
47320b57cec5SDimitry Andric 
47330b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
47340b57cec5SDimitry Andric _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
47350b57cec5SDimitry Andric {
47360b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
47370b57cec5SDimitry Andric                                              (__v16si)_mm512_cvtepi8_epi32(__A),
47380b57cec5SDimitry Andric                                              (__v16si)__W);
47390b57cec5SDimitry Andric }
47400b57cec5SDimitry Andric 
47410b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
47420b57cec5SDimitry Andric _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
47430b57cec5SDimitry Andric {
47440b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
47450b57cec5SDimitry Andric                                              (__v16si)_mm512_cvtepi8_epi32(__A),
47460b57cec5SDimitry Andric                                              (__v16si)_mm512_setzero_si512());
47470b57cec5SDimitry Andric }
47480b57cec5SDimitry Andric 
47490b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
47500b57cec5SDimitry Andric _mm512_cvtepi8_epi64(__m128i __A)
47510b57cec5SDimitry Andric {
47520b57cec5SDimitry Andric   /* This function always performs a signed extension, but __v16qi is a char
47530b57cec5SDimitry Andric      which may be signed or unsigned, so use __v16qs. */
47540b57cec5SDimitry Andric   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
47550b57cec5SDimitry Andric }
47560b57cec5SDimitry Andric 
47570b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
47580b57cec5SDimitry Andric _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
47590b57cec5SDimitry Andric {
47600b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
47610b57cec5SDimitry Andric                                              (__v8di)_mm512_cvtepi8_epi64(__A),
47620b57cec5SDimitry Andric                                              (__v8di)__W);
47630b57cec5SDimitry Andric }
47640b57cec5SDimitry Andric 
47650b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
47660b57cec5SDimitry Andric _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
47670b57cec5SDimitry Andric {
47680b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
47690b57cec5SDimitry Andric                                              (__v8di)_mm512_cvtepi8_epi64(__A),
47700b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512 ());
47710b57cec5SDimitry Andric }
47720b57cec5SDimitry Andric 
47730b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
47740b57cec5SDimitry Andric _mm512_cvtepi32_epi64(__m256i __X)
47750b57cec5SDimitry Andric {
47760b57cec5SDimitry Andric   return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
47770b57cec5SDimitry Andric }
47780b57cec5SDimitry Andric 
47790b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
47800b57cec5SDimitry Andric _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
47810b57cec5SDimitry Andric {
47820b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
47830b57cec5SDimitry Andric                                              (__v8di)_mm512_cvtepi32_epi64(__X),
47840b57cec5SDimitry Andric                                              (__v8di)__W);
47850b57cec5SDimitry Andric }
47860b57cec5SDimitry Andric 
47870b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
47880b57cec5SDimitry Andric _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
47890b57cec5SDimitry Andric {
47900b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
47910b57cec5SDimitry Andric                                              (__v8di)_mm512_cvtepi32_epi64(__X),
47920b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512());
47930b57cec5SDimitry Andric }
47940b57cec5SDimitry Andric 
47950b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
47960b57cec5SDimitry Andric _mm512_cvtepi16_epi32(__m256i __A)
47970b57cec5SDimitry Andric {
47980b57cec5SDimitry Andric   return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
47990b57cec5SDimitry Andric }
48000b57cec5SDimitry Andric 
48010b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48020b57cec5SDimitry Andric _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
48030b57cec5SDimitry Andric {
48040b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
48050b57cec5SDimitry Andric                                             (__v16si)_mm512_cvtepi16_epi32(__A),
48060b57cec5SDimitry Andric                                             (__v16si)__W);
48070b57cec5SDimitry Andric }
48080b57cec5SDimitry Andric 
48090b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48100b57cec5SDimitry Andric _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
48110b57cec5SDimitry Andric {
48120b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
48130b57cec5SDimitry Andric                                             (__v16si)_mm512_cvtepi16_epi32(__A),
48140b57cec5SDimitry Andric                                             (__v16si)_mm512_setzero_si512 ());
48150b57cec5SDimitry Andric }
48160b57cec5SDimitry Andric 
48170b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48180b57cec5SDimitry Andric _mm512_cvtepi16_epi64(__m128i __A)
48190b57cec5SDimitry Andric {
48200b57cec5SDimitry Andric   return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
48210b57cec5SDimitry Andric }
48220b57cec5SDimitry Andric 
48230b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48240b57cec5SDimitry Andric _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
48250b57cec5SDimitry Andric {
48260b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
48270b57cec5SDimitry Andric                                              (__v8di)_mm512_cvtepi16_epi64(__A),
48280b57cec5SDimitry Andric                                              (__v8di)__W);
48290b57cec5SDimitry Andric }
48300b57cec5SDimitry Andric 
48310b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48320b57cec5SDimitry Andric _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
48330b57cec5SDimitry Andric {
48340b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
48350b57cec5SDimitry Andric                                              (__v8di)_mm512_cvtepi16_epi64(__A),
48360b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512());
48370b57cec5SDimitry Andric }
48380b57cec5SDimitry Andric 
48390b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48400b57cec5SDimitry Andric _mm512_cvtepu8_epi32(__m128i __A)
48410b57cec5SDimitry Andric {
48420b57cec5SDimitry Andric   return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
48430b57cec5SDimitry Andric }
48440b57cec5SDimitry Andric 
48450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48460b57cec5SDimitry Andric _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
48470b57cec5SDimitry Andric {
48480b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
48490b57cec5SDimitry Andric                                              (__v16si)_mm512_cvtepu8_epi32(__A),
48500b57cec5SDimitry Andric                                              (__v16si)__W);
48510b57cec5SDimitry Andric }
48520b57cec5SDimitry Andric 
48530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48540b57cec5SDimitry Andric _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
48550b57cec5SDimitry Andric {
48560b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
48570b57cec5SDimitry Andric                                              (__v16si)_mm512_cvtepu8_epi32(__A),
48580b57cec5SDimitry Andric                                              (__v16si)_mm512_setzero_si512());
48590b57cec5SDimitry Andric }
48600b57cec5SDimitry Andric 
48610b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48620b57cec5SDimitry Andric _mm512_cvtepu8_epi64(__m128i __A)
48630b57cec5SDimitry Andric {
48640b57cec5SDimitry Andric   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
48650b57cec5SDimitry Andric }
48660b57cec5SDimitry Andric 
48670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48680b57cec5SDimitry Andric _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
48690b57cec5SDimitry Andric {
48700b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
48710b57cec5SDimitry Andric                                              (__v8di)_mm512_cvtepu8_epi64(__A),
48720b57cec5SDimitry Andric                                              (__v8di)__W);
48730b57cec5SDimitry Andric }
48740b57cec5SDimitry Andric 
48750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48760b57cec5SDimitry Andric _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
48770b57cec5SDimitry Andric {
48780b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
48790b57cec5SDimitry Andric                                              (__v8di)_mm512_cvtepu8_epi64(__A),
48800b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512());
48810b57cec5SDimitry Andric }
48820b57cec5SDimitry Andric 
48830b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48840b57cec5SDimitry Andric _mm512_cvtepu32_epi64(__m256i __X)
48850b57cec5SDimitry Andric {
48860b57cec5SDimitry Andric   return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
48870b57cec5SDimitry Andric }
48880b57cec5SDimitry Andric 
48890b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48900b57cec5SDimitry Andric _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
48910b57cec5SDimitry Andric {
48920b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
48930b57cec5SDimitry Andric                                              (__v8di)_mm512_cvtepu32_epi64(__X),
48940b57cec5SDimitry Andric                                              (__v8di)__W);
48950b57cec5SDimitry Andric }
48960b57cec5SDimitry Andric 
48970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
48980b57cec5SDimitry Andric _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
48990b57cec5SDimitry Andric {
49000b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
49010b57cec5SDimitry Andric                                              (__v8di)_mm512_cvtepu32_epi64(__X),
49020b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512());
49030b57cec5SDimitry Andric }
49040b57cec5SDimitry Andric 
49050b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
49060b57cec5SDimitry Andric _mm512_cvtepu16_epi32(__m256i __A)
49070b57cec5SDimitry Andric {
49080b57cec5SDimitry Andric   return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
49090b57cec5SDimitry Andric }
49100b57cec5SDimitry Andric 
49110b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
49120b57cec5SDimitry Andric _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
49130b57cec5SDimitry Andric {
49140b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
49150b57cec5SDimitry Andric                                             (__v16si)_mm512_cvtepu16_epi32(__A),
49160b57cec5SDimitry Andric                                             (__v16si)__W);
49170b57cec5SDimitry Andric }
49180b57cec5SDimitry Andric 
49190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
49200b57cec5SDimitry Andric _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
49210b57cec5SDimitry Andric {
49220b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
49230b57cec5SDimitry Andric                                             (__v16si)_mm512_cvtepu16_epi32(__A),
49240b57cec5SDimitry Andric                                             (__v16si)_mm512_setzero_si512());
49250b57cec5SDimitry Andric }
49260b57cec5SDimitry Andric 
49270b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
49280b57cec5SDimitry Andric _mm512_cvtepu16_epi64(__m128i __A)
49290b57cec5SDimitry Andric {
49300b57cec5SDimitry Andric   return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
49310b57cec5SDimitry Andric }
49320b57cec5SDimitry Andric 
49330b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
49340b57cec5SDimitry Andric _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
49350b57cec5SDimitry Andric {
49360b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
49370b57cec5SDimitry Andric                                              (__v8di)_mm512_cvtepu16_epi64(__A),
49380b57cec5SDimitry Andric                                              (__v8di)__W);
49390b57cec5SDimitry Andric }
49400b57cec5SDimitry Andric 
49410b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
49420b57cec5SDimitry Andric _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
49430b57cec5SDimitry Andric {
49440b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
49450b57cec5SDimitry Andric                                              (__v8di)_mm512_cvtepu16_epi64(__A),
49460b57cec5SDimitry Andric                                              (__v8di)_mm512_setzero_si512());
49470b57cec5SDimitry Andric }
49480b57cec5SDimitry Andric 
49490b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
49500b57cec5SDimitry Andric _mm512_rorv_epi32 (__m512i __A, __m512i __B)
49510b57cec5SDimitry Andric {
49520b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
49530b57cec5SDimitry Andric }
49540b57cec5SDimitry Andric 
49550b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
49560b57cec5SDimitry Andric _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
49570b57cec5SDimitry Andric {
49580b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
49590b57cec5SDimitry Andric                                            (__v16si)_mm512_rorv_epi32(__A, __B),
49600b57cec5SDimitry Andric                                            (__v16si)__W);
49610b57cec5SDimitry Andric }
49620b57cec5SDimitry Andric 
49630b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
49640b57cec5SDimitry Andric _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
49650b57cec5SDimitry Andric {
49660b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
49670b57cec5SDimitry Andric                                            (__v16si)_mm512_rorv_epi32(__A, __B),
49680b57cec5SDimitry Andric                                            (__v16si)_mm512_setzero_si512());
49690b57cec5SDimitry Andric }
49700b57cec5SDimitry Andric 
49710b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
49720b57cec5SDimitry Andric _mm512_rorv_epi64 (__m512i __A, __m512i __B)
49730b57cec5SDimitry Andric {
49740b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
49750b57cec5SDimitry Andric }
49760b57cec5SDimitry Andric 
49770b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
49780b57cec5SDimitry Andric _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
49790b57cec5SDimitry Andric {
49800b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512(__U,
49810b57cec5SDimitry Andric                                             (__v8di)_mm512_rorv_epi64(__A, __B),
49820b57cec5SDimitry Andric                                             (__v8di)__W);
49830b57cec5SDimitry Andric }
49840b57cec5SDimitry Andric 
49850b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
49860b57cec5SDimitry Andric _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
49870b57cec5SDimitry Andric {
49880b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512(__U,
49890b57cec5SDimitry Andric                                             (__v8di)_mm512_rorv_epi64(__A, __B),
49900b57cec5SDimitry Andric                                             (__v8di)_mm512_setzero_si512());
49910b57cec5SDimitry Andric }
49920b57cec5SDimitry Andric 
49930b57cec5SDimitry Andric 
49940b57cec5SDimitry Andric 
49950b57cec5SDimitry Andric #define _mm512_cmp_epi32_mask(a, b, p) \
4996349cc55cSDimitry Andric   ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
49970b57cec5SDimitry Andric                                           (__v16si)(__m512i)(b), (int)(p), \
4998349cc55cSDimitry Andric                                           (__mmask16)-1))
49990b57cec5SDimitry Andric 
50000b57cec5SDimitry Andric #define _mm512_cmp_epu32_mask(a, b, p) \
5001349cc55cSDimitry Andric   ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
50020b57cec5SDimitry Andric                                            (__v16si)(__m512i)(b), (int)(p), \
5003349cc55cSDimitry Andric                                            (__mmask16)-1))
50040b57cec5SDimitry Andric 
50050b57cec5SDimitry Andric #define _mm512_cmp_epi64_mask(a, b, p) \
5006349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
50070b57cec5SDimitry Andric                                          (__v8di)(__m512i)(b), (int)(p), \
5008349cc55cSDimitry Andric                                          (__mmask8)-1))
50090b57cec5SDimitry Andric 
50100b57cec5SDimitry Andric #define _mm512_cmp_epu64_mask(a, b, p) \
5011349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
50120b57cec5SDimitry Andric                                           (__v8di)(__m512i)(b), (int)(p), \
5013349cc55cSDimitry Andric                                           (__mmask8)-1))
50140b57cec5SDimitry Andric 
50150b57cec5SDimitry Andric #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
5016349cc55cSDimitry Andric   ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
50170b57cec5SDimitry Andric                                           (__v16si)(__m512i)(b), (int)(p), \
5018349cc55cSDimitry Andric                                           (__mmask16)(m)))
50190b57cec5SDimitry Andric 
50200b57cec5SDimitry Andric #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
5021349cc55cSDimitry Andric   ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
50220b57cec5SDimitry Andric                                            (__v16si)(__m512i)(b), (int)(p), \
5023349cc55cSDimitry Andric                                            (__mmask16)(m)))
50240b57cec5SDimitry Andric 
50250b57cec5SDimitry Andric #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
5026349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
50270b57cec5SDimitry Andric                                          (__v8di)(__m512i)(b), (int)(p), \
5028349cc55cSDimitry Andric                                          (__mmask8)(m)))
50290b57cec5SDimitry Andric 
50300b57cec5SDimitry Andric #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
5031349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
50320b57cec5SDimitry Andric                                           (__v8di)(__m512i)(b), (int)(p), \
5033349cc55cSDimitry Andric                                           (__mmask8)(m)))
50340b57cec5SDimitry Andric 
50350b57cec5SDimitry Andric #define _mm512_rol_epi32(a, b) \
5036349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
50370b57cec5SDimitry Andric 
50380b57cec5SDimitry Andric #define _mm512_mask_rol_epi32(W, U, a, b) \
5039349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
50400b57cec5SDimitry Andric                                        (__v16si)_mm512_rol_epi32((a), (b)), \
5041349cc55cSDimitry Andric                                        (__v16si)(__m512i)(W)))
50420b57cec5SDimitry Andric 
50430b57cec5SDimitry Andric #define _mm512_maskz_rol_epi32(U, a, b) \
5044349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
50450b57cec5SDimitry Andric                                        (__v16si)_mm512_rol_epi32((a), (b)), \
5046349cc55cSDimitry Andric                                        (__v16si)_mm512_setzero_si512()))
50470b57cec5SDimitry Andric 
50480b57cec5SDimitry Andric #define _mm512_rol_epi64(a, b) \
5049349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
50500b57cec5SDimitry Andric 
50510b57cec5SDimitry Andric #define _mm512_mask_rol_epi64(W, U, a, b) \
5052349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
50530b57cec5SDimitry Andric                                        (__v8di)_mm512_rol_epi64((a), (b)), \
5054349cc55cSDimitry Andric                                        (__v8di)(__m512i)(W)))
50550b57cec5SDimitry Andric 
50560b57cec5SDimitry Andric #define _mm512_maskz_rol_epi64(U, a, b) \
5057349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
50580b57cec5SDimitry Andric                                        (__v8di)_mm512_rol_epi64((a), (b)), \
5059349cc55cSDimitry Andric                                        (__v8di)_mm512_setzero_si512()))
50600b57cec5SDimitry Andric 
50610b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
50620b57cec5SDimitry Andric _mm512_rolv_epi32 (__m512i __A, __m512i __B)
50630b57cec5SDimitry Andric {
50640b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
50650b57cec5SDimitry Andric }
50660b57cec5SDimitry Andric 
50670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
50680b57cec5SDimitry Andric _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
50690b57cec5SDimitry Andric {
50700b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
50710b57cec5SDimitry Andric                                            (__v16si)_mm512_rolv_epi32(__A, __B),
50720b57cec5SDimitry Andric                                            (__v16si)__W);
50730b57cec5SDimitry Andric }
50740b57cec5SDimitry Andric 
50750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
50760b57cec5SDimitry Andric _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
50770b57cec5SDimitry Andric {
50780b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
50790b57cec5SDimitry Andric                                            (__v16si)_mm512_rolv_epi32(__A, __B),
50800b57cec5SDimitry Andric                                            (__v16si)_mm512_setzero_si512());
50810b57cec5SDimitry Andric }
50820b57cec5SDimitry Andric 
50830b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
50840b57cec5SDimitry Andric _mm512_rolv_epi64 (__m512i __A, __m512i __B)
50850b57cec5SDimitry Andric {
50860b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
50870b57cec5SDimitry Andric }
50880b57cec5SDimitry Andric 
50890b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
50900b57cec5SDimitry Andric _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
50910b57cec5SDimitry Andric {
50920b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512(__U,
50930b57cec5SDimitry Andric                                             (__v8di)_mm512_rolv_epi64(__A, __B),
50940b57cec5SDimitry Andric                                             (__v8di)__W);
50950b57cec5SDimitry Andric }
50960b57cec5SDimitry Andric 
50970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
50980b57cec5SDimitry Andric _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
50990b57cec5SDimitry Andric {
51000b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512(__U,
51010b57cec5SDimitry Andric                                             (__v8di)_mm512_rolv_epi64(__A, __B),
51020b57cec5SDimitry Andric                                             (__v8di)_mm512_setzero_si512());
51030b57cec5SDimitry Andric }
51040b57cec5SDimitry Andric 
51050b57cec5SDimitry Andric #define _mm512_ror_epi32(A, B) \
5106349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
51070b57cec5SDimitry Andric 
51080b57cec5SDimitry Andric #define _mm512_mask_ror_epi32(W, U, A, B) \
5109349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
51100b57cec5SDimitry Andric                                        (__v16si)_mm512_ror_epi32((A), (B)), \
5111349cc55cSDimitry Andric                                        (__v16si)(__m512i)(W)))
51120b57cec5SDimitry Andric 
51130b57cec5SDimitry Andric #define _mm512_maskz_ror_epi32(U, A, B) \
5114349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
51150b57cec5SDimitry Andric                                        (__v16si)_mm512_ror_epi32((A), (B)), \
5116349cc55cSDimitry Andric                                        (__v16si)_mm512_setzero_si512()))
51170b57cec5SDimitry Andric 
51180b57cec5SDimitry Andric #define _mm512_ror_epi64(A, B) \
5119349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
51200b57cec5SDimitry Andric 
51210b57cec5SDimitry Andric #define _mm512_mask_ror_epi64(W, U, A, B) \
5122349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
51230b57cec5SDimitry Andric                                        (__v8di)_mm512_ror_epi64((A), (B)), \
5124349cc55cSDimitry Andric                                        (__v8di)(__m512i)(W)))
51250b57cec5SDimitry Andric 
51260b57cec5SDimitry Andric #define _mm512_maskz_ror_epi64(U, A, B) \
5127349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
51280b57cec5SDimitry Andric                                        (__v8di)_mm512_ror_epi64((A), (B)), \
5129349cc55cSDimitry Andric                                        (__v8di)_mm512_setzero_si512()))
51300b57cec5SDimitry Andric 
51310b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
51325ffd83dbSDimitry Andric _mm512_slli_epi32(__m512i __A, unsigned int __B)
51330b57cec5SDimitry Andric {
513481ad6265SDimitry Andric   return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
51350b57cec5SDimitry Andric }
51360b57cec5SDimitry Andric 
51370b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
51385ffd83dbSDimitry Andric _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
51395ffd83dbSDimitry Andric                        unsigned int __B)
51400b57cec5SDimitry Andric {
51410b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
51420b57cec5SDimitry Andric                                          (__v16si)_mm512_slli_epi32(__A, __B),
51430b57cec5SDimitry Andric                                          (__v16si)__W);
51440b57cec5SDimitry Andric }
51450b57cec5SDimitry Andric 
51460b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
51475ffd83dbSDimitry Andric _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
51480b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
51490b57cec5SDimitry Andric                                          (__v16si)_mm512_slli_epi32(__A, __B),
51500b57cec5SDimitry Andric                                          (__v16si)_mm512_setzero_si512());
51510b57cec5SDimitry Andric }
51520b57cec5SDimitry Andric 
51530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
51545ffd83dbSDimitry Andric _mm512_slli_epi64(__m512i __A, unsigned int __B)
51550b57cec5SDimitry Andric {
515681ad6265SDimitry Andric   return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
51570b57cec5SDimitry Andric }
51580b57cec5SDimitry Andric 
51590b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
51605ffd83dbSDimitry Andric _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
51610b57cec5SDimitry Andric {
51620b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
51630b57cec5SDimitry Andric                                           (__v8di)_mm512_slli_epi64(__A, __B),
51640b57cec5SDimitry Andric                                           (__v8di)__W);
51650b57cec5SDimitry Andric }
51660b57cec5SDimitry Andric 
51670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
51685ffd83dbSDimitry Andric _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
51690b57cec5SDimitry Andric {
51700b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
51710b57cec5SDimitry Andric                                           (__v8di)_mm512_slli_epi64(__A, __B),
51720b57cec5SDimitry Andric                                           (__v8di)_mm512_setzero_si512());
51730b57cec5SDimitry Andric }
51740b57cec5SDimitry Andric 
51750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
51765ffd83dbSDimitry Andric _mm512_srli_epi32(__m512i __A, unsigned int __B)
51770b57cec5SDimitry Andric {
517881ad6265SDimitry Andric   return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
51790b57cec5SDimitry Andric }
51800b57cec5SDimitry Andric 
51810b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
51825ffd83dbSDimitry Andric _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
51835ffd83dbSDimitry Andric                        unsigned int __B)
51840b57cec5SDimitry Andric {
51850b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
51860b57cec5SDimitry Andric                                          (__v16si)_mm512_srli_epi32(__A, __B),
51870b57cec5SDimitry Andric                                          (__v16si)__W);
51880b57cec5SDimitry Andric }
51890b57cec5SDimitry Andric 
51900b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
51915ffd83dbSDimitry Andric _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
51920b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
51930b57cec5SDimitry Andric                                          (__v16si)_mm512_srli_epi32(__A, __B),
51940b57cec5SDimitry Andric                                          (__v16si)_mm512_setzero_si512());
51950b57cec5SDimitry Andric }
51960b57cec5SDimitry Andric 
51970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
51985ffd83dbSDimitry Andric _mm512_srli_epi64(__m512i __A, unsigned int __B)
51990b57cec5SDimitry Andric {
520081ad6265SDimitry Andric   return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
52010b57cec5SDimitry Andric }
52020b57cec5SDimitry Andric 
52030b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
52045ffd83dbSDimitry Andric _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
52055ffd83dbSDimitry Andric                        unsigned int __B)
52060b57cec5SDimitry Andric {
52070b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
52080b57cec5SDimitry Andric                                           (__v8di)_mm512_srli_epi64(__A, __B),
52090b57cec5SDimitry Andric                                           (__v8di)__W);
52100b57cec5SDimitry Andric }
52110b57cec5SDimitry Andric 
52120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
52135ffd83dbSDimitry Andric _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A,
52145ffd83dbSDimitry Andric                         unsigned int __B)
52150b57cec5SDimitry Andric {
52160b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
52170b57cec5SDimitry Andric                                           (__v8di)_mm512_srli_epi64(__A, __B),
52180b57cec5SDimitry Andric                                           (__v8di)_mm512_setzero_si512());
52190b57cec5SDimitry Andric }
52200b57cec5SDimitry Andric 
52210b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
52220b57cec5SDimitry Andric _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
52230b57cec5SDimitry Andric {
52240b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
52250b57cec5SDimitry Andric               (__v16si) __W,
52260b57cec5SDimitry Andric               (__mmask16) __U);
52270b57cec5SDimitry Andric }
52280b57cec5SDimitry Andric 
52290b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
52300b57cec5SDimitry Andric _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
52310b57cec5SDimitry Andric {
52320b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
52330b57cec5SDimitry Andric               (__v16si)
52340b57cec5SDimitry Andric               _mm512_setzero_si512 (),
52350b57cec5SDimitry Andric               (__mmask16) __U);
52360b57cec5SDimitry Andric }
52370b57cec5SDimitry Andric 
52380b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
52390b57cec5SDimitry Andric _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
52400b57cec5SDimitry Andric {
52410b57cec5SDimitry Andric   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
52420b57cec5SDimitry Andric           (__mmask16) __U);
52430b57cec5SDimitry Andric }
52440b57cec5SDimitry Andric 
52450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
52460b57cec5SDimitry Andric _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
52470b57cec5SDimitry Andric {
52480b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
52490b57cec5SDimitry Andric                  (__v16si) __A,
52500b57cec5SDimitry Andric                  (__v16si) __W);
52510b57cec5SDimitry Andric }
52520b57cec5SDimitry Andric 
52530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
52540b57cec5SDimitry Andric _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
52550b57cec5SDimitry Andric {
52560b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
52570b57cec5SDimitry Andric                  (__v16si) __A,
52580b57cec5SDimitry Andric                  (__v16si) _mm512_setzero_si512 ());
52590b57cec5SDimitry Andric }
52600b57cec5SDimitry Andric 
52610b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
52620b57cec5SDimitry Andric _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
52630b57cec5SDimitry Andric {
52640b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
52650b57cec5SDimitry Andric                  (__v8di) __A,
52660b57cec5SDimitry Andric                  (__v8di) __W);
52670b57cec5SDimitry Andric }
52680b57cec5SDimitry Andric 
52690b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
52700b57cec5SDimitry Andric _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
52710b57cec5SDimitry Andric {
52720b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
52730b57cec5SDimitry Andric                  (__v8di) __A,
52740b57cec5SDimitry Andric                  (__v8di) _mm512_setzero_si512 ());
52750b57cec5SDimitry Andric }
52760b57cec5SDimitry Andric 
52770b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
52780b57cec5SDimitry Andric _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
52790b57cec5SDimitry Andric {
52800b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
52810b57cec5SDimitry Andric               (__v8di) __W,
52820b57cec5SDimitry Andric               (__mmask8) __U);
52830b57cec5SDimitry Andric }
52840b57cec5SDimitry Andric 
52850b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
52860b57cec5SDimitry Andric _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
52870b57cec5SDimitry Andric {
52880b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
52890b57cec5SDimitry Andric               (__v8di)
52900b57cec5SDimitry Andric               _mm512_setzero_si512 (),
52910b57cec5SDimitry Andric               (__mmask8) __U);
52920b57cec5SDimitry Andric }
52930b57cec5SDimitry Andric 
52940b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
52950b57cec5SDimitry Andric _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
52960b57cec5SDimitry Andric {
52970b57cec5SDimitry Andric   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
52980b57cec5SDimitry Andric           (__mmask8) __U);
52990b57cec5SDimitry Andric }
53000b57cec5SDimitry Andric 
53010b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
53020b57cec5SDimitry Andric _mm512_movedup_pd (__m512d __A)
53030b57cec5SDimitry Andric {
53040b57cec5SDimitry Andric   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
53050b57cec5SDimitry Andric                                           0, 0, 2, 2, 4, 4, 6, 6);
53060b57cec5SDimitry Andric }
53070b57cec5SDimitry Andric 
53080b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
53090b57cec5SDimitry Andric _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
53100b57cec5SDimitry Andric {
53110b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
53120b57cec5SDimitry Andric                                               (__v8df)_mm512_movedup_pd(__A),
53130b57cec5SDimitry Andric                                               (__v8df)__W);
53140b57cec5SDimitry Andric }
53150b57cec5SDimitry Andric 
53160b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
53170b57cec5SDimitry Andric _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
53180b57cec5SDimitry Andric {
53190b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
53200b57cec5SDimitry Andric                                               (__v8df)_mm512_movedup_pd(__A),
53210b57cec5SDimitry Andric                                               (__v8df)_mm512_setzero_pd());
53220b57cec5SDimitry Andric }
53230b57cec5SDimitry Andric 
53240b57cec5SDimitry Andric #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5325349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
53260b57cec5SDimitry Andric                                               (__v8df)(__m512d)(B), \
53270b57cec5SDimitry Andric                                               (__v8di)(__m512i)(C), (int)(imm), \
5328349cc55cSDimitry Andric                                               (__mmask8)-1, (int)(R)))
53290b57cec5SDimitry Andric 
53300b57cec5SDimitry Andric #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5331349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
53320b57cec5SDimitry Andric                                               (__v8df)(__m512d)(B), \
53330b57cec5SDimitry Andric                                               (__v8di)(__m512i)(C), (int)(imm), \
5334349cc55cSDimitry Andric                                               (__mmask8)(U), (int)(R)))
53350b57cec5SDimitry Andric 
53360b57cec5SDimitry Andric #define _mm512_fixupimm_pd(A, B, C, imm) \
5337349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
53380b57cec5SDimitry Andric                                               (__v8df)(__m512d)(B), \
53390b57cec5SDimitry Andric                                               (__v8di)(__m512i)(C), (int)(imm), \
53400b57cec5SDimitry Andric                                               (__mmask8)-1, \
5341349cc55cSDimitry Andric                                               _MM_FROUND_CUR_DIRECTION))
53420b57cec5SDimitry Andric 
53430b57cec5SDimitry Andric #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5344349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
53450b57cec5SDimitry Andric                                               (__v8df)(__m512d)(B), \
53460b57cec5SDimitry Andric                                               (__v8di)(__m512i)(C), (int)(imm), \
53470b57cec5SDimitry Andric                                               (__mmask8)(U), \
5348349cc55cSDimitry Andric                                               _MM_FROUND_CUR_DIRECTION))
53490b57cec5SDimitry Andric 
53500b57cec5SDimitry Andric #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5351349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
53520b57cec5SDimitry Andric                                                (__v8df)(__m512d)(B), \
53530b57cec5SDimitry Andric                                                (__v8di)(__m512i)(C), \
53540b57cec5SDimitry Andric                                                (int)(imm), (__mmask8)(U), \
5355349cc55cSDimitry Andric                                                (int)(R)))
53560b57cec5SDimitry Andric 
53570b57cec5SDimitry Andric #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5358349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
53590b57cec5SDimitry Andric                                                (__v8df)(__m512d)(B), \
53600b57cec5SDimitry Andric                                                (__v8di)(__m512i)(C), \
53610b57cec5SDimitry Andric                                                (int)(imm), (__mmask8)(U), \
5362349cc55cSDimitry Andric                                                _MM_FROUND_CUR_DIRECTION))
53630b57cec5SDimitry Andric 
53640b57cec5SDimitry Andric #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5365349cc55cSDimitry Andric   ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
53660b57cec5SDimitry Andric                                              (__v16sf)(__m512)(B), \
53670b57cec5SDimitry Andric                                              (__v16si)(__m512i)(C), (int)(imm), \
5368349cc55cSDimitry Andric                                              (__mmask16)-1, (int)(R)))
53690b57cec5SDimitry Andric 
53700b57cec5SDimitry Andric #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5371349cc55cSDimitry Andric   ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
53720b57cec5SDimitry Andric                                              (__v16sf)(__m512)(B), \
53730b57cec5SDimitry Andric                                              (__v16si)(__m512i)(C), (int)(imm), \
5374349cc55cSDimitry Andric                                              (__mmask16)(U), (int)(R)))
53750b57cec5SDimitry Andric 
53760b57cec5SDimitry Andric #define _mm512_fixupimm_ps(A, B, C, imm) \
5377349cc55cSDimitry Andric   ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
53780b57cec5SDimitry Andric                                              (__v16sf)(__m512)(B), \
53790b57cec5SDimitry Andric                                              (__v16si)(__m512i)(C), (int)(imm), \
53800b57cec5SDimitry Andric                                              (__mmask16)-1, \
5381349cc55cSDimitry Andric                                              _MM_FROUND_CUR_DIRECTION))
53820b57cec5SDimitry Andric 
53830b57cec5SDimitry Andric #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5384349cc55cSDimitry Andric   ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
53850b57cec5SDimitry Andric                                              (__v16sf)(__m512)(B), \
53860b57cec5SDimitry Andric                                              (__v16si)(__m512i)(C), (int)(imm), \
53870b57cec5SDimitry Andric                                              (__mmask16)(U), \
5388349cc55cSDimitry Andric                                              _MM_FROUND_CUR_DIRECTION))
53890b57cec5SDimitry Andric 
53900b57cec5SDimitry Andric #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5391349cc55cSDimitry Andric   ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
53920b57cec5SDimitry Andric                                               (__v16sf)(__m512)(B), \
53930b57cec5SDimitry Andric                                               (__v16si)(__m512i)(C), \
53940b57cec5SDimitry Andric                                               (int)(imm), (__mmask16)(U), \
5395349cc55cSDimitry Andric                                               (int)(R)))
53960b57cec5SDimitry Andric 
53970b57cec5SDimitry Andric #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5398349cc55cSDimitry Andric   ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
53990b57cec5SDimitry Andric                                               (__v16sf)(__m512)(B), \
54000b57cec5SDimitry Andric                                               (__v16si)(__m512i)(C), \
54010b57cec5SDimitry Andric                                               (int)(imm), (__mmask16)(U), \
5402349cc55cSDimitry Andric                                               _MM_FROUND_CUR_DIRECTION))
54030b57cec5SDimitry Andric 
54040b57cec5SDimitry Andric #define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5405349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
54060b57cec5SDimitry Andric                                            (__v2df)(__m128d)(B), \
54070b57cec5SDimitry Andric                                            (__v2di)(__m128i)(C), (int)(imm), \
5408349cc55cSDimitry Andric                                            (__mmask8)-1, (int)(R)))
54090b57cec5SDimitry Andric 
54100b57cec5SDimitry Andric #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5411349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
54120b57cec5SDimitry Andric                                            (__v2df)(__m128d)(B), \
54130b57cec5SDimitry Andric                                            (__v2di)(__m128i)(C), (int)(imm), \
5414349cc55cSDimitry Andric                                            (__mmask8)(U), (int)(R)))
54150b57cec5SDimitry Andric 
54160b57cec5SDimitry Andric #define _mm_fixupimm_sd(A, B, C, imm) \
5417349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
54180b57cec5SDimitry Andric                                            (__v2df)(__m128d)(B), \
54190b57cec5SDimitry Andric                                            (__v2di)(__m128i)(C), (int)(imm), \
54200b57cec5SDimitry Andric                                            (__mmask8)-1, \
5421349cc55cSDimitry Andric                                            _MM_FROUND_CUR_DIRECTION))
54220b57cec5SDimitry Andric 
54230b57cec5SDimitry Andric #define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5424349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
54250b57cec5SDimitry Andric                                            (__v2df)(__m128d)(B), \
54260b57cec5SDimitry Andric                                            (__v2di)(__m128i)(C), (int)(imm), \
54270b57cec5SDimitry Andric                                            (__mmask8)(U), \
5428349cc55cSDimitry Andric                                            _MM_FROUND_CUR_DIRECTION))
54290b57cec5SDimitry Andric 
54300b57cec5SDimitry Andric #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5431349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
54320b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
54330b57cec5SDimitry Andric                                             (__v2di)(__m128i)(C), (int)(imm), \
5434349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
54350b57cec5SDimitry Andric 
54360b57cec5SDimitry Andric #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5437349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
54380b57cec5SDimitry Andric                                             (__v2df)(__m128d)(B), \
54390b57cec5SDimitry Andric                                             (__v2di)(__m128i)(C), (int)(imm), \
54400b57cec5SDimitry Andric                                             (__mmask8)(U), \
5441349cc55cSDimitry Andric                                             _MM_FROUND_CUR_DIRECTION))
54420b57cec5SDimitry Andric 
54430b57cec5SDimitry Andric #define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5444349cc55cSDimitry Andric   ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
54450b57cec5SDimitry Andric                                           (__v4sf)(__m128)(B), \
54460b57cec5SDimitry Andric                                           (__v4si)(__m128i)(C), (int)(imm), \
5447349cc55cSDimitry Andric                                           (__mmask8)-1, (int)(R)))
54480b57cec5SDimitry Andric 
54490b57cec5SDimitry Andric #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5450349cc55cSDimitry Andric   ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
54510b57cec5SDimitry Andric                                           (__v4sf)(__m128)(B), \
54520b57cec5SDimitry Andric                                           (__v4si)(__m128i)(C), (int)(imm), \
5453349cc55cSDimitry Andric                                           (__mmask8)(U), (int)(R)))
54540b57cec5SDimitry Andric 
54550b57cec5SDimitry Andric #define _mm_fixupimm_ss(A, B, C, imm) \
5456349cc55cSDimitry Andric   ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
54570b57cec5SDimitry Andric                                           (__v4sf)(__m128)(B), \
54580b57cec5SDimitry Andric                                           (__v4si)(__m128i)(C), (int)(imm), \
54590b57cec5SDimitry Andric                                           (__mmask8)-1, \
5460349cc55cSDimitry Andric                                           _MM_FROUND_CUR_DIRECTION))
54610b57cec5SDimitry Andric 
54620b57cec5SDimitry Andric #define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5463349cc55cSDimitry Andric   ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
54640b57cec5SDimitry Andric                                           (__v4sf)(__m128)(B), \
54650b57cec5SDimitry Andric                                           (__v4si)(__m128i)(C), (int)(imm), \
54660b57cec5SDimitry Andric                                           (__mmask8)(U), \
5467349cc55cSDimitry Andric                                           _MM_FROUND_CUR_DIRECTION))
54680b57cec5SDimitry Andric 
54690b57cec5SDimitry Andric #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5470349cc55cSDimitry Andric   ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
54710b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
54720b57cec5SDimitry Andric                                            (__v4si)(__m128i)(C), (int)(imm), \
5473349cc55cSDimitry Andric                                            (__mmask8)(U), (int)(R)))
54740b57cec5SDimitry Andric 
54750b57cec5SDimitry Andric #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5476349cc55cSDimitry Andric   ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
54770b57cec5SDimitry Andric                                            (__v4sf)(__m128)(B), \
54780b57cec5SDimitry Andric                                            (__v4si)(__m128i)(C), (int)(imm), \
54790b57cec5SDimitry Andric                                            (__mmask8)(U), \
5480349cc55cSDimitry Andric                                            _MM_FROUND_CUR_DIRECTION))
54810b57cec5SDimitry Andric 
54820b57cec5SDimitry Andric #define _mm_getexp_round_sd(A, B, R) \
5483349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
54840b57cec5SDimitry Andric                                                   (__v2df)(__m128d)(B), \
54850b57cec5SDimitry Andric                                                   (__v2df)_mm_setzero_pd(), \
5486349cc55cSDimitry Andric                                                   (__mmask8)-1, (int)(R)))
54870b57cec5SDimitry Andric 
54880b57cec5SDimitry Andric 
54890b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
54900b57cec5SDimitry Andric _mm_getexp_sd (__m128d __A, __m128d __B)
54910b57cec5SDimitry Andric {
54920b57cec5SDimitry Andric   return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
54930b57cec5SDimitry Andric                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
54940b57cec5SDimitry Andric }
54950b57cec5SDimitry Andric 
54960b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
54970b57cec5SDimitry Andric _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
54980b57cec5SDimitry Andric {
54990b57cec5SDimitry Andric  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
55000b57cec5SDimitry Andric           (__v2df) __B,
55010b57cec5SDimitry Andric           (__v2df) __W,
55020b57cec5SDimitry Andric           (__mmask8) __U,
55030b57cec5SDimitry Andric           _MM_FROUND_CUR_DIRECTION);
55040b57cec5SDimitry Andric }
55050b57cec5SDimitry Andric 
55060b57cec5SDimitry Andric #define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5507349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
55080b57cec5SDimitry Andric                                                   (__v2df)(__m128d)(B), \
55090b57cec5SDimitry Andric                                                   (__v2df)(__m128d)(W), \
5510349cc55cSDimitry Andric                                                   (__mmask8)(U), (int)(R)))
55110b57cec5SDimitry Andric 
55120b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
55130b57cec5SDimitry Andric _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
55140b57cec5SDimitry Andric {
55150b57cec5SDimitry Andric  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
55160b57cec5SDimitry Andric           (__v2df) __B,
55170b57cec5SDimitry Andric           (__v2df) _mm_setzero_pd (),
55180b57cec5SDimitry Andric           (__mmask8) __U,
55190b57cec5SDimitry Andric           _MM_FROUND_CUR_DIRECTION);
55200b57cec5SDimitry Andric }
55210b57cec5SDimitry Andric 
55220b57cec5SDimitry Andric #define _mm_maskz_getexp_round_sd(U, A, B, R) \
5523349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
55240b57cec5SDimitry Andric                                                   (__v2df)(__m128d)(B), \
55250b57cec5SDimitry Andric                                                   (__v2df)_mm_setzero_pd(), \
5526349cc55cSDimitry Andric                                                   (__mmask8)(U), (int)(R)))
55270b57cec5SDimitry Andric 
55280b57cec5SDimitry Andric #define _mm_getexp_round_ss(A, B, R) \
5529349cc55cSDimitry Andric   ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
55300b57cec5SDimitry Andric                                                  (__v4sf)(__m128)(B), \
55310b57cec5SDimitry Andric                                                  (__v4sf)_mm_setzero_ps(), \
5532349cc55cSDimitry Andric                                                  (__mmask8)-1, (int)(R)))
55330b57cec5SDimitry Andric 
55340b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
55350b57cec5SDimitry Andric _mm_getexp_ss (__m128 __A, __m128 __B)
55360b57cec5SDimitry Andric {
55370b57cec5SDimitry Andric   return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
55380b57cec5SDimitry Andric                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
55390b57cec5SDimitry Andric }
55400b57cec5SDimitry Andric 
55410b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
55420b57cec5SDimitry Andric _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
55430b57cec5SDimitry Andric {
55440b57cec5SDimitry Andric  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
55450b57cec5SDimitry Andric           (__v4sf) __B,
55460b57cec5SDimitry Andric           (__v4sf) __W,
55470b57cec5SDimitry Andric           (__mmask8) __U,
55480b57cec5SDimitry Andric           _MM_FROUND_CUR_DIRECTION);
55490b57cec5SDimitry Andric }
55500b57cec5SDimitry Andric 
55510b57cec5SDimitry Andric #define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5552349cc55cSDimitry Andric   ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
55530b57cec5SDimitry Andric                                                  (__v4sf)(__m128)(B), \
55540b57cec5SDimitry Andric                                                  (__v4sf)(__m128)(W), \
5555349cc55cSDimitry Andric                                                  (__mmask8)(U), (int)(R)))
55560b57cec5SDimitry Andric 
55570b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
55580b57cec5SDimitry Andric _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
55590b57cec5SDimitry Andric {
55600b57cec5SDimitry Andric  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
55610b57cec5SDimitry Andric           (__v4sf) __B,
55620b57cec5SDimitry Andric           (__v4sf) _mm_setzero_ps (),
55630b57cec5SDimitry Andric           (__mmask8) __U,
55640b57cec5SDimitry Andric           _MM_FROUND_CUR_DIRECTION);
55650b57cec5SDimitry Andric }
55660b57cec5SDimitry Andric 
55670b57cec5SDimitry Andric #define _mm_maskz_getexp_round_ss(U, A, B, R) \
5568349cc55cSDimitry Andric   ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
55690b57cec5SDimitry Andric                                                  (__v4sf)(__m128)(B), \
55700b57cec5SDimitry Andric                                                  (__v4sf)_mm_setzero_ps(), \
5571349cc55cSDimitry Andric                                                  (__mmask8)(U), (int)(R)))
55720b57cec5SDimitry Andric 
55730b57cec5SDimitry Andric #define _mm_getmant_round_sd(A, B, C, D, R) \
5574349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
55750b57cec5SDimitry Andric                                                 (__v2df)(__m128d)(B), \
55760b57cec5SDimitry Andric                                                 (int)(((D)<<2) | (C)), \
55770b57cec5SDimitry Andric                                                 (__v2df)_mm_setzero_pd(), \
5578349cc55cSDimitry Andric                                                 (__mmask8)-1, (int)(R)))
55790b57cec5SDimitry Andric 
55800b57cec5SDimitry Andric #define _mm_getmant_sd(A, B, C, D)  \
5581349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
55820b57cec5SDimitry Andric                                                 (__v2df)(__m128d)(B), \
55830b57cec5SDimitry Andric                                                 (int)(((D)<<2) | (C)), \
55840b57cec5SDimitry Andric                                                 (__v2df)_mm_setzero_pd(), \
55850b57cec5SDimitry Andric                                                 (__mmask8)-1, \
5586349cc55cSDimitry Andric                                                 _MM_FROUND_CUR_DIRECTION))
55870b57cec5SDimitry Andric 
55880b57cec5SDimitry Andric #define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5589349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
55900b57cec5SDimitry Andric                                                 (__v2df)(__m128d)(B), \
55910b57cec5SDimitry Andric                                                 (int)(((D)<<2) | (C)), \
55920b57cec5SDimitry Andric                                                 (__v2df)(__m128d)(W), \
55930b57cec5SDimitry Andric                                                 (__mmask8)(U), \
5594349cc55cSDimitry Andric                                                 _MM_FROUND_CUR_DIRECTION))
55950b57cec5SDimitry Andric 
55960b57cec5SDimitry Andric #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5597349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
55980b57cec5SDimitry Andric                                                 (__v2df)(__m128d)(B), \
55990b57cec5SDimitry Andric                                                 (int)(((D)<<2) | (C)), \
56000b57cec5SDimitry Andric                                                 (__v2df)(__m128d)(W), \
5601349cc55cSDimitry Andric                                                 (__mmask8)(U), (int)(R)))
56020b57cec5SDimitry Andric 
56030b57cec5SDimitry Andric #define _mm_maskz_getmant_sd(U, A, B, C, D) \
5604349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
56050b57cec5SDimitry Andric                                                 (__v2df)(__m128d)(B), \
56060b57cec5SDimitry Andric                                                 (int)(((D)<<2) | (C)), \
56070b57cec5SDimitry Andric                                                 (__v2df)_mm_setzero_pd(), \
56080b57cec5SDimitry Andric                                                 (__mmask8)(U), \
5609349cc55cSDimitry Andric                                                 _MM_FROUND_CUR_DIRECTION))
56100b57cec5SDimitry Andric 
56110b57cec5SDimitry Andric #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5612349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
56130b57cec5SDimitry Andric                                                 (__v2df)(__m128d)(B), \
56140b57cec5SDimitry Andric                                                 (int)(((D)<<2) | (C)), \
56150b57cec5SDimitry Andric                                                 (__v2df)_mm_setzero_pd(), \
5616349cc55cSDimitry Andric                                                 (__mmask8)(U), (int)(R)))
56170b57cec5SDimitry Andric 
56180b57cec5SDimitry Andric #define _mm_getmant_round_ss(A, B, C, D, R) \
5619349cc55cSDimitry Andric   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
56200b57cec5SDimitry Andric                                                (__v4sf)(__m128)(B), \
56210b57cec5SDimitry Andric                                                (int)(((D)<<2) | (C)), \
56220b57cec5SDimitry Andric                                                (__v4sf)_mm_setzero_ps(), \
5623349cc55cSDimitry Andric                                                (__mmask8)-1, (int)(R)))
56240b57cec5SDimitry Andric 
56250b57cec5SDimitry Andric #define _mm_getmant_ss(A, B, C, D) \
5626349cc55cSDimitry Andric   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
56270b57cec5SDimitry Andric                                                (__v4sf)(__m128)(B), \
56280b57cec5SDimitry Andric                                                (int)(((D)<<2) | (C)), \
56290b57cec5SDimitry Andric                                                (__v4sf)_mm_setzero_ps(), \
56300b57cec5SDimitry Andric                                                (__mmask8)-1, \
5631349cc55cSDimitry Andric                                                _MM_FROUND_CUR_DIRECTION))
56320b57cec5SDimitry Andric 
56330b57cec5SDimitry Andric #define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5634349cc55cSDimitry Andric   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
56350b57cec5SDimitry Andric                                                (__v4sf)(__m128)(B), \
56360b57cec5SDimitry Andric                                                (int)(((D)<<2) | (C)), \
56370b57cec5SDimitry Andric                                                (__v4sf)(__m128)(W), \
56380b57cec5SDimitry Andric                                                (__mmask8)(U), \
5639349cc55cSDimitry Andric                                                _MM_FROUND_CUR_DIRECTION))
56400b57cec5SDimitry Andric 
56410b57cec5SDimitry Andric #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5642349cc55cSDimitry Andric   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
56430b57cec5SDimitry Andric                                                (__v4sf)(__m128)(B), \
56440b57cec5SDimitry Andric                                                (int)(((D)<<2) | (C)), \
56450b57cec5SDimitry Andric                                                (__v4sf)(__m128)(W), \
5646349cc55cSDimitry Andric                                                (__mmask8)(U), (int)(R)))
56470b57cec5SDimitry Andric 
56480b57cec5SDimitry Andric #define _mm_maskz_getmant_ss(U, A, B, C, D) \
5649349cc55cSDimitry Andric   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
56500b57cec5SDimitry Andric                                                (__v4sf)(__m128)(B), \
56510b57cec5SDimitry Andric                                                (int)(((D)<<2) | (C)), \
56520b57cec5SDimitry Andric                                                (__v4sf)_mm_setzero_ps(), \
56530b57cec5SDimitry Andric                                                (__mmask8)(U), \
5654349cc55cSDimitry Andric                                                _MM_FROUND_CUR_DIRECTION))
56550b57cec5SDimitry Andric 
56560b57cec5SDimitry Andric #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5657349cc55cSDimitry Andric   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
56580b57cec5SDimitry Andric                                                (__v4sf)(__m128)(B), \
56590b57cec5SDimitry Andric                                                (int)(((D)<<2) | (C)), \
56600b57cec5SDimitry Andric                                                (__v4sf)_mm_setzero_ps(), \
5661349cc55cSDimitry Andric                                                (__mmask8)(U), (int)(R)))
56620b57cec5SDimitry Andric 
56630b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS
56640b57cec5SDimitry Andric _mm512_kmov (__mmask16 __A)
56650b57cec5SDimitry Andric {
56660b57cec5SDimitry Andric   return  __A;
56670b57cec5SDimitry Andric }
56680b57cec5SDimitry Andric 
56690b57cec5SDimitry Andric #define _mm_comi_round_sd(A, B, P, R) \
5670349cc55cSDimitry Andric   ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5671349cc55cSDimitry Andric                                (int)(P), (int)(R)))
56720b57cec5SDimitry Andric 
56730b57cec5SDimitry Andric #define _mm_comi_round_ss(A, B, P, R) \
5674349cc55cSDimitry Andric   ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5675349cc55cSDimitry Andric                                (int)(P), (int)(R)))
56760b57cec5SDimitry Andric 
56770b57cec5SDimitry Andric #ifdef __x86_64__
56780b57cec5SDimitry Andric #define _mm_cvt_roundsd_si64(A, R) \
5679349cc55cSDimitry Andric   ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
56800b57cec5SDimitry Andric #endif
56810b57cec5SDimitry Andric 
56820b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
56830b57cec5SDimitry Andric _mm512_sll_epi32(__m512i __A, __m128i __B)
56840b57cec5SDimitry Andric {
56850b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
56860b57cec5SDimitry Andric }
56870b57cec5SDimitry Andric 
56880b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
56890b57cec5SDimitry Andric _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
56900b57cec5SDimitry Andric {
56910b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
56920b57cec5SDimitry Andric                                           (__v16si)_mm512_sll_epi32(__A, __B),
56930b57cec5SDimitry Andric                                           (__v16si)__W);
56940b57cec5SDimitry Andric }
56950b57cec5SDimitry Andric 
56960b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
56970b57cec5SDimitry Andric _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
56980b57cec5SDimitry Andric {
56990b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
57000b57cec5SDimitry Andric                                           (__v16si)_mm512_sll_epi32(__A, __B),
57010b57cec5SDimitry Andric                                           (__v16si)_mm512_setzero_si512());
57020b57cec5SDimitry Andric }
57030b57cec5SDimitry Andric 
57040b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57050b57cec5SDimitry Andric _mm512_sll_epi64(__m512i __A, __m128i __B)
57060b57cec5SDimitry Andric {
57070b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
57080b57cec5SDimitry Andric }
57090b57cec5SDimitry Andric 
57100b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57110b57cec5SDimitry Andric _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
57120b57cec5SDimitry Andric {
57130b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
57140b57cec5SDimitry Andric                                              (__v8di)_mm512_sll_epi64(__A, __B),
57150b57cec5SDimitry Andric                                              (__v8di)__W);
57160b57cec5SDimitry Andric }
57170b57cec5SDimitry Andric 
57180b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57190b57cec5SDimitry Andric _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
57200b57cec5SDimitry Andric {
57210b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
57220b57cec5SDimitry Andric                                            (__v8di)_mm512_sll_epi64(__A, __B),
57230b57cec5SDimitry Andric                                            (__v8di)_mm512_setzero_si512());
57240b57cec5SDimitry Andric }
57250b57cec5SDimitry Andric 
57260b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57270b57cec5SDimitry Andric _mm512_sllv_epi32(__m512i __X, __m512i __Y)
57280b57cec5SDimitry Andric {
57290b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
57300b57cec5SDimitry Andric }
57310b57cec5SDimitry Andric 
57320b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57330b57cec5SDimitry Andric _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
57340b57cec5SDimitry Andric {
57350b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
57360b57cec5SDimitry Andric                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
57370b57cec5SDimitry Andric                                            (__v16si)__W);
57380b57cec5SDimitry Andric }
57390b57cec5SDimitry Andric 
57400b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57410b57cec5SDimitry Andric _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
57420b57cec5SDimitry Andric {
57430b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
57440b57cec5SDimitry Andric                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
57450b57cec5SDimitry Andric                                            (__v16si)_mm512_setzero_si512());
57460b57cec5SDimitry Andric }
57470b57cec5SDimitry Andric 
57480b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57490b57cec5SDimitry Andric _mm512_sllv_epi64(__m512i __X, __m512i __Y)
57500b57cec5SDimitry Andric {
57510b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
57520b57cec5SDimitry Andric }
57530b57cec5SDimitry Andric 
57540b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57550b57cec5SDimitry Andric _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
57560b57cec5SDimitry Andric {
57570b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
57580b57cec5SDimitry Andric                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
57590b57cec5SDimitry Andric                                             (__v8di)__W);
57600b57cec5SDimitry Andric }
57610b57cec5SDimitry Andric 
57620b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57630b57cec5SDimitry Andric _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
57640b57cec5SDimitry Andric {
57650b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
57660b57cec5SDimitry Andric                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
57670b57cec5SDimitry Andric                                             (__v8di)_mm512_setzero_si512());
57680b57cec5SDimitry Andric }
57690b57cec5SDimitry Andric 
57700b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57710b57cec5SDimitry Andric _mm512_sra_epi32(__m512i __A, __m128i __B)
57720b57cec5SDimitry Andric {
57730b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
57740b57cec5SDimitry Andric }
57750b57cec5SDimitry Andric 
57760b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57770b57cec5SDimitry Andric _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
57780b57cec5SDimitry Andric {
57790b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
57800b57cec5SDimitry Andric                                           (__v16si)_mm512_sra_epi32(__A, __B),
57810b57cec5SDimitry Andric                                           (__v16si)__W);
57820b57cec5SDimitry Andric }
57830b57cec5SDimitry Andric 
57840b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57850b57cec5SDimitry Andric _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
57860b57cec5SDimitry Andric {
57870b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
57880b57cec5SDimitry Andric                                           (__v16si)_mm512_sra_epi32(__A, __B),
57890b57cec5SDimitry Andric                                           (__v16si)_mm512_setzero_si512());
57900b57cec5SDimitry Andric }
57910b57cec5SDimitry Andric 
57920b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57930b57cec5SDimitry Andric _mm512_sra_epi64(__m512i __A, __m128i __B)
57940b57cec5SDimitry Andric {
57950b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
57960b57cec5SDimitry Andric }
57970b57cec5SDimitry Andric 
57980b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
57990b57cec5SDimitry Andric _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
58000b57cec5SDimitry Andric {
58010b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
58020b57cec5SDimitry Andric                                            (__v8di)_mm512_sra_epi64(__A, __B),
58030b57cec5SDimitry Andric                                            (__v8di)__W);
58040b57cec5SDimitry Andric }
58050b57cec5SDimitry Andric 
58060b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
58070b57cec5SDimitry Andric _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
58080b57cec5SDimitry Andric {
58090b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
58100b57cec5SDimitry Andric                                            (__v8di)_mm512_sra_epi64(__A, __B),
58110b57cec5SDimitry Andric                                            (__v8di)_mm512_setzero_si512());
58120b57cec5SDimitry Andric }
58130b57cec5SDimitry Andric 
58140b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
58150b57cec5SDimitry Andric _mm512_srav_epi32(__m512i __X, __m512i __Y)
58160b57cec5SDimitry Andric {
58170b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
58180b57cec5SDimitry Andric }
58190b57cec5SDimitry Andric 
58200b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
58210b57cec5SDimitry Andric _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
58220b57cec5SDimitry Andric {
58230b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
58240b57cec5SDimitry Andric                                            (__v16si)_mm512_srav_epi32(__X, __Y),
58250b57cec5SDimitry Andric                                            (__v16si)__W);
58260b57cec5SDimitry Andric }
58270b57cec5SDimitry Andric 
58280b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
58290b57cec5SDimitry Andric _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
58300b57cec5SDimitry Andric {
58310b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
58320b57cec5SDimitry Andric                                            (__v16si)_mm512_srav_epi32(__X, __Y),
58330b57cec5SDimitry Andric                                            (__v16si)_mm512_setzero_si512());
58340b57cec5SDimitry Andric }
58350b57cec5SDimitry Andric 
58360b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
58370b57cec5SDimitry Andric _mm512_srav_epi64(__m512i __X, __m512i __Y)
58380b57cec5SDimitry Andric {
58390b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
58400b57cec5SDimitry Andric }
58410b57cec5SDimitry Andric 
58420b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
58430b57cec5SDimitry Andric _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
58440b57cec5SDimitry Andric {
58450b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
58460b57cec5SDimitry Andric                                             (__v8di)_mm512_srav_epi64(__X, __Y),
58470b57cec5SDimitry Andric                                             (__v8di)__W);
58480b57cec5SDimitry Andric }
58490b57cec5SDimitry Andric 
58500b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
58510b57cec5SDimitry Andric _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
58520b57cec5SDimitry Andric {
58530b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
58540b57cec5SDimitry Andric                                             (__v8di)_mm512_srav_epi64(__X, __Y),
58550b57cec5SDimitry Andric                                             (__v8di)_mm512_setzero_si512());
58560b57cec5SDimitry Andric }
58570b57cec5SDimitry Andric 
58580b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
58590b57cec5SDimitry Andric _mm512_srl_epi32(__m512i __A, __m128i __B)
58600b57cec5SDimitry Andric {
58610b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
58620b57cec5SDimitry Andric }
58630b57cec5SDimitry Andric 
58640b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
58650b57cec5SDimitry Andric _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
58660b57cec5SDimitry Andric {
58670b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
58680b57cec5SDimitry Andric                                           (__v16si)_mm512_srl_epi32(__A, __B),
58690b57cec5SDimitry Andric                                           (__v16si)__W);
58700b57cec5SDimitry Andric }
58710b57cec5SDimitry Andric 
58720b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
58730b57cec5SDimitry Andric _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
58740b57cec5SDimitry Andric {
58750b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
58760b57cec5SDimitry Andric                                           (__v16si)_mm512_srl_epi32(__A, __B),
58770b57cec5SDimitry Andric                                           (__v16si)_mm512_setzero_si512());
58780b57cec5SDimitry Andric }
58790b57cec5SDimitry Andric 
58800b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
58810b57cec5SDimitry Andric _mm512_srl_epi64(__m512i __A, __m128i __B)
58820b57cec5SDimitry Andric {
58830b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
58840b57cec5SDimitry Andric }
58850b57cec5SDimitry Andric 
58860b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
58870b57cec5SDimitry Andric _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
58880b57cec5SDimitry Andric {
58890b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
58900b57cec5SDimitry Andric                                            (__v8di)_mm512_srl_epi64(__A, __B),
58910b57cec5SDimitry Andric                                            (__v8di)__W);
58920b57cec5SDimitry Andric }
58930b57cec5SDimitry Andric 
58940b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
58950b57cec5SDimitry Andric _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
58960b57cec5SDimitry Andric {
58970b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
58980b57cec5SDimitry Andric                                            (__v8di)_mm512_srl_epi64(__A, __B),
58990b57cec5SDimitry Andric                                            (__v8di)_mm512_setzero_si512());
59000b57cec5SDimitry Andric }
59010b57cec5SDimitry Andric 
59020b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
59030b57cec5SDimitry Andric _mm512_srlv_epi32(__m512i __X, __m512i __Y)
59040b57cec5SDimitry Andric {
59050b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
59060b57cec5SDimitry Andric }
59070b57cec5SDimitry Andric 
59080b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
59090b57cec5SDimitry Andric _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
59100b57cec5SDimitry Andric {
59110b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
59120b57cec5SDimitry Andric                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
59130b57cec5SDimitry Andric                                            (__v16si)__W);
59140b57cec5SDimitry Andric }
59150b57cec5SDimitry Andric 
59160b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
59170b57cec5SDimitry Andric _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
59180b57cec5SDimitry Andric {
59190b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
59200b57cec5SDimitry Andric                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
59210b57cec5SDimitry Andric                                            (__v16si)_mm512_setzero_si512());
59220b57cec5SDimitry Andric }
59230b57cec5SDimitry Andric 
59240b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
59250b57cec5SDimitry Andric _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
59260b57cec5SDimitry Andric {
59270b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
59280b57cec5SDimitry Andric }
59290b57cec5SDimitry Andric 
59300b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
59310b57cec5SDimitry Andric _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
59320b57cec5SDimitry Andric {
59330b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
59340b57cec5SDimitry Andric                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
59350b57cec5SDimitry Andric                                             (__v8di)__W);
59360b57cec5SDimitry Andric }
59370b57cec5SDimitry Andric 
59380b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
59390b57cec5SDimitry Andric _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
59400b57cec5SDimitry Andric {
59410b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
59420b57cec5SDimitry Andric                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
59430b57cec5SDimitry Andric                                             (__v8di)_mm512_setzero_si512());
59440b57cec5SDimitry Andric }
59450b57cec5SDimitry Andric 
594681ad6265SDimitry Andric /// \enum _MM_TERNLOG_ENUM
594781ad6265SDimitry Andric ///    A helper to represent the ternary logic operations among vector \a A,
594881ad6265SDimitry Andric ///    \a B and \a C. The representation is passed to \a imm.
594981ad6265SDimitry Andric typedef enum {
595081ad6265SDimitry Andric   _MM_TERNLOG_A = 0xF0,
595181ad6265SDimitry Andric   _MM_TERNLOG_B = 0xCC,
595281ad6265SDimitry Andric   _MM_TERNLOG_C = 0xAA
595381ad6265SDimitry Andric } _MM_TERNLOG_ENUM;
595481ad6265SDimitry Andric 
59550b57cec5SDimitry Andric #define _mm512_ternarylogic_epi32(A, B, C, imm)                                \
595681ad6265SDimitry Andric   ((__m512i)__builtin_ia32_pternlogd512_mask(                                  \
595781ad6265SDimitry Andric       (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C),     \
595881ad6265SDimitry Andric       (unsigned char)(imm), (__mmask16)-1))
59590b57cec5SDimitry Andric 
59600b57cec5SDimitry Andric #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm)                        \
596181ad6265SDimitry Andric   ((__m512i)__builtin_ia32_pternlogd512_mask(                                  \
596281ad6265SDimitry Andric       (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C),     \
596381ad6265SDimitry Andric       (unsigned char)(imm), (__mmask16)(U)))
59640b57cec5SDimitry Andric 
59650b57cec5SDimitry Andric #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm)                       \
596681ad6265SDimitry Andric   ((__m512i)__builtin_ia32_pternlogd512_maskz(                                 \
596781ad6265SDimitry Andric       (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C),     \
596881ad6265SDimitry Andric       (unsigned char)(imm), (__mmask16)(U)))
59690b57cec5SDimitry Andric 
59700b57cec5SDimitry Andric #define _mm512_ternarylogic_epi64(A, B, C, imm)                                \
597181ad6265SDimitry Andric   ((__m512i)__builtin_ia32_pternlogq512_mask(                                  \
597281ad6265SDimitry Andric       (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C),        \
597381ad6265SDimitry Andric       (unsigned char)(imm), (__mmask8)-1))
59740b57cec5SDimitry Andric 
59750b57cec5SDimitry Andric #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm)                        \
597681ad6265SDimitry Andric   ((__m512i)__builtin_ia32_pternlogq512_mask(                                  \
597781ad6265SDimitry Andric       (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C),        \
597881ad6265SDimitry Andric       (unsigned char)(imm), (__mmask8)(U)))
59790b57cec5SDimitry Andric 
59800b57cec5SDimitry Andric #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm)                       \
598181ad6265SDimitry Andric   ((__m512i)__builtin_ia32_pternlogq512_maskz(                                 \
598281ad6265SDimitry Andric       (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C),        \
598381ad6265SDimitry Andric       (unsigned char)(imm), (__mmask8)(U)))
59840b57cec5SDimitry Andric 
59850b57cec5SDimitry Andric #ifdef __x86_64__
59860b57cec5SDimitry Andric #define _mm_cvt_roundsd_i64(A, R) \
5987349cc55cSDimitry Andric   ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
59880b57cec5SDimitry Andric #endif
59890b57cec5SDimitry Andric 
59900b57cec5SDimitry Andric #define _mm_cvt_roundsd_si32(A, R) \
5991349cc55cSDimitry Andric   ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
59920b57cec5SDimitry Andric 
59930b57cec5SDimitry Andric #define _mm_cvt_roundsd_i32(A, R) \
5994349cc55cSDimitry Andric   ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
59950b57cec5SDimitry Andric 
59960b57cec5SDimitry Andric #define _mm_cvt_roundsd_u32(A, R) \
5997349cc55cSDimitry Andric   ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
59980b57cec5SDimitry Andric 
59990b57cec5SDimitry Andric static __inline__ unsigned __DEFAULT_FN_ATTRS128
60000b57cec5SDimitry Andric _mm_cvtsd_u32 (__m128d __A)
60010b57cec5SDimitry Andric {
60020b57cec5SDimitry Andric   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
60030b57cec5SDimitry Andric              _MM_FROUND_CUR_DIRECTION);
60040b57cec5SDimitry Andric }
60050b57cec5SDimitry Andric 
60060b57cec5SDimitry Andric #ifdef __x86_64__
60070b57cec5SDimitry Andric #define _mm_cvt_roundsd_u64(A, R) \
6008349cc55cSDimitry Andric   ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6009349cc55cSDimitry Andric                                                    (int)(R)))
60100b57cec5SDimitry Andric 
60110b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
60120b57cec5SDimitry Andric _mm_cvtsd_u64 (__m128d __A)
60130b57cec5SDimitry Andric {
60140b57cec5SDimitry Andric   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
60150b57cec5SDimitry Andric                  __A,
60160b57cec5SDimitry Andric                  _MM_FROUND_CUR_DIRECTION);
60170b57cec5SDimitry Andric }
60180b57cec5SDimitry Andric #endif
60190b57cec5SDimitry Andric 
60200b57cec5SDimitry Andric #define _mm_cvt_roundss_si32(A, R) \
6021349cc55cSDimitry Andric   ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
60220b57cec5SDimitry Andric 
60230b57cec5SDimitry Andric #define _mm_cvt_roundss_i32(A, R) \
6024349cc55cSDimitry Andric   ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
60250b57cec5SDimitry Andric 
60260b57cec5SDimitry Andric #ifdef __x86_64__
60270b57cec5SDimitry Andric #define _mm_cvt_roundss_si64(A, R) \
6028349cc55cSDimitry Andric   ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
60290b57cec5SDimitry Andric 
60300b57cec5SDimitry Andric #define _mm_cvt_roundss_i64(A, R) \
6031349cc55cSDimitry Andric   ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
60320b57cec5SDimitry Andric #endif
60330b57cec5SDimitry Andric 
60340b57cec5SDimitry Andric #define _mm_cvt_roundss_u32(A, R) \
6035349cc55cSDimitry Andric   ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
60360b57cec5SDimitry Andric 
60370b57cec5SDimitry Andric static __inline__ unsigned __DEFAULT_FN_ATTRS128
60380b57cec5SDimitry Andric _mm_cvtss_u32 (__m128 __A)
60390b57cec5SDimitry Andric {
60400b57cec5SDimitry Andric   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
60410b57cec5SDimitry Andric              _MM_FROUND_CUR_DIRECTION);
60420b57cec5SDimitry Andric }
60430b57cec5SDimitry Andric 
60440b57cec5SDimitry Andric #ifdef __x86_64__
60450b57cec5SDimitry Andric #define _mm_cvt_roundss_u64(A, R) \
6046349cc55cSDimitry Andric   ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6047349cc55cSDimitry Andric                                                    (int)(R)))
60480b57cec5SDimitry Andric 
60490b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
60500b57cec5SDimitry Andric _mm_cvtss_u64 (__m128 __A)
60510b57cec5SDimitry Andric {
60520b57cec5SDimitry Andric   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
60530b57cec5SDimitry Andric                  __A,
60540b57cec5SDimitry Andric                  _MM_FROUND_CUR_DIRECTION);
60550b57cec5SDimitry Andric }
60560b57cec5SDimitry Andric #endif
60570b57cec5SDimitry Andric 
60580b57cec5SDimitry Andric #define _mm_cvtt_roundsd_i32(A, R) \
6059349cc55cSDimitry Andric   ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
60600b57cec5SDimitry Andric 
60610b57cec5SDimitry Andric #define _mm_cvtt_roundsd_si32(A, R) \
6062349cc55cSDimitry Andric   ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
60630b57cec5SDimitry Andric 
60640b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS128
60650b57cec5SDimitry Andric _mm_cvttsd_i32 (__m128d __A)
60660b57cec5SDimitry Andric {
60670b57cec5SDimitry Andric   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
60680b57cec5SDimitry Andric               _MM_FROUND_CUR_DIRECTION);
60690b57cec5SDimitry Andric }
60700b57cec5SDimitry Andric 
60710b57cec5SDimitry Andric #ifdef __x86_64__
60720b57cec5SDimitry Andric #define _mm_cvtt_roundsd_si64(A, R) \
6073349cc55cSDimitry Andric   ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
60740b57cec5SDimitry Andric 
60750b57cec5SDimitry Andric #define _mm_cvtt_roundsd_i64(A, R) \
6076349cc55cSDimitry Andric   ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
60770b57cec5SDimitry Andric 
60780b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS128
60790b57cec5SDimitry Andric _mm_cvttsd_i64 (__m128d __A)
60800b57cec5SDimitry Andric {
60810b57cec5SDimitry Andric   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
60820b57cec5SDimitry Andric               _MM_FROUND_CUR_DIRECTION);
60830b57cec5SDimitry Andric }
60840b57cec5SDimitry Andric #endif
60850b57cec5SDimitry Andric 
60860b57cec5SDimitry Andric #define _mm_cvtt_roundsd_u32(A, R) \
6087349cc55cSDimitry Andric   ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
60880b57cec5SDimitry Andric 
60890b57cec5SDimitry Andric static __inline__ unsigned __DEFAULT_FN_ATTRS128
60900b57cec5SDimitry Andric _mm_cvttsd_u32 (__m128d __A)
60910b57cec5SDimitry Andric {
60920b57cec5SDimitry Andric   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
60930b57cec5SDimitry Andric               _MM_FROUND_CUR_DIRECTION);
60940b57cec5SDimitry Andric }
60950b57cec5SDimitry Andric 
60960b57cec5SDimitry Andric #ifdef __x86_64__
60970b57cec5SDimitry Andric #define _mm_cvtt_roundsd_u64(A, R) \
6098349cc55cSDimitry Andric   ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6099349cc55cSDimitry Andric                                                     (int)(R)))
61000b57cec5SDimitry Andric 
61010b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
61020b57cec5SDimitry Andric _mm_cvttsd_u64 (__m128d __A)
61030b57cec5SDimitry Andric {
61040b57cec5SDimitry Andric   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
61050b57cec5SDimitry Andric                   __A,
61060b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
61070b57cec5SDimitry Andric }
61080b57cec5SDimitry Andric #endif
61090b57cec5SDimitry Andric 
61100b57cec5SDimitry Andric #define _mm_cvtt_roundss_i32(A, R) \
6111349cc55cSDimitry Andric   ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
61120b57cec5SDimitry Andric 
61130b57cec5SDimitry Andric #define _mm_cvtt_roundss_si32(A, R) \
6114349cc55cSDimitry Andric   ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
61150b57cec5SDimitry Andric 
61160b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS128
61170b57cec5SDimitry Andric _mm_cvttss_i32 (__m128 __A)
61180b57cec5SDimitry Andric {
61190b57cec5SDimitry Andric   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
61200b57cec5SDimitry Andric               _MM_FROUND_CUR_DIRECTION);
61210b57cec5SDimitry Andric }
61220b57cec5SDimitry Andric 
61230b57cec5SDimitry Andric #ifdef __x86_64__
61240b57cec5SDimitry Andric #define _mm_cvtt_roundss_i64(A, R) \
6125349cc55cSDimitry Andric   ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
61260b57cec5SDimitry Andric 
61270b57cec5SDimitry Andric #define _mm_cvtt_roundss_si64(A, R) \
6128349cc55cSDimitry Andric   ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
61290b57cec5SDimitry Andric 
61300b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS128
61310b57cec5SDimitry Andric _mm_cvttss_i64 (__m128 __A)
61320b57cec5SDimitry Andric {
61330b57cec5SDimitry Andric   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
61340b57cec5SDimitry Andric               _MM_FROUND_CUR_DIRECTION);
61350b57cec5SDimitry Andric }
61360b57cec5SDimitry Andric #endif
61370b57cec5SDimitry Andric 
61380b57cec5SDimitry Andric #define _mm_cvtt_roundss_u32(A, R) \
6139349cc55cSDimitry Andric   ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
61400b57cec5SDimitry Andric 
61410b57cec5SDimitry Andric static __inline__ unsigned __DEFAULT_FN_ATTRS128
61420b57cec5SDimitry Andric _mm_cvttss_u32 (__m128 __A)
61430b57cec5SDimitry Andric {
61440b57cec5SDimitry Andric   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
61450b57cec5SDimitry Andric               _MM_FROUND_CUR_DIRECTION);
61460b57cec5SDimitry Andric }
61470b57cec5SDimitry Andric 
61480b57cec5SDimitry Andric #ifdef __x86_64__
61490b57cec5SDimitry Andric #define _mm_cvtt_roundss_u64(A, R) \
6150349cc55cSDimitry Andric   ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6151349cc55cSDimitry Andric                                                     (int)(R)))
61520b57cec5SDimitry Andric 
61530b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
61540b57cec5SDimitry Andric _mm_cvttss_u64 (__m128 __A)
61550b57cec5SDimitry Andric {
61560b57cec5SDimitry Andric   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
61570b57cec5SDimitry Andric                   __A,
61580b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
61590b57cec5SDimitry Andric }
61600b57cec5SDimitry Andric #endif
61610b57cec5SDimitry Andric 
61620b57cec5SDimitry Andric #define _mm512_permute_pd(X, C) \
6163349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
61640b57cec5SDimitry Andric 
61650b57cec5SDimitry Andric #define _mm512_mask_permute_pd(W, U, X, C) \
6166349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
61670b57cec5SDimitry Andric                                         (__v8df)_mm512_permute_pd((X), (C)), \
6168349cc55cSDimitry Andric                                         (__v8df)(__m512d)(W)))
61690b57cec5SDimitry Andric 
61700b57cec5SDimitry Andric #define _mm512_maskz_permute_pd(U, X, C) \
6171349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
61720b57cec5SDimitry Andric                                         (__v8df)_mm512_permute_pd((X), (C)), \
6173349cc55cSDimitry Andric                                         (__v8df)_mm512_setzero_pd()))
61740b57cec5SDimitry Andric 
61750b57cec5SDimitry Andric #define _mm512_permute_ps(X, C) \
6176349cc55cSDimitry Andric   ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
61770b57cec5SDimitry Andric 
61780b57cec5SDimitry Andric #define _mm512_mask_permute_ps(W, U, X, C) \
6179349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
61800b57cec5SDimitry Andric                                        (__v16sf)_mm512_permute_ps((X), (C)), \
6181349cc55cSDimitry Andric                                        (__v16sf)(__m512)(W)))
61820b57cec5SDimitry Andric 
61830b57cec5SDimitry Andric #define _mm512_maskz_permute_ps(U, X, C) \
6184349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
61850b57cec5SDimitry Andric                                        (__v16sf)_mm512_permute_ps((X), (C)), \
6186349cc55cSDimitry Andric                                        (__v16sf)_mm512_setzero_ps()))
61870b57cec5SDimitry Andric 
61880b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
61890b57cec5SDimitry Andric _mm512_permutevar_pd(__m512d __A, __m512i __C)
61900b57cec5SDimitry Andric {
61910b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
61920b57cec5SDimitry Andric }
61930b57cec5SDimitry Andric 
61940b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
61950b57cec5SDimitry Andric _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
61960b57cec5SDimitry Andric {
61970b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
61980b57cec5SDimitry Andric                                          (__v8df)_mm512_permutevar_pd(__A, __C),
61990b57cec5SDimitry Andric                                          (__v8df)__W);
62000b57cec5SDimitry Andric }
62010b57cec5SDimitry Andric 
62020b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
62030b57cec5SDimitry Andric _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
62040b57cec5SDimitry Andric {
62050b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
62060b57cec5SDimitry Andric                                          (__v8df)_mm512_permutevar_pd(__A, __C),
62070b57cec5SDimitry Andric                                          (__v8df)_mm512_setzero_pd());
62080b57cec5SDimitry Andric }
62090b57cec5SDimitry Andric 
62100b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
62110b57cec5SDimitry Andric _mm512_permutevar_ps(__m512 __A, __m512i __C)
62120b57cec5SDimitry Andric {
62130b57cec5SDimitry Andric   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
62140b57cec5SDimitry Andric }
62150b57cec5SDimitry Andric 
62160b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
62170b57cec5SDimitry Andric _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
62180b57cec5SDimitry Andric {
62190b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
62200b57cec5SDimitry Andric                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
62210b57cec5SDimitry Andric                                         (__v16sf)__W);
62220b57cec5SDimitry Andric }
62230b57cec5SDimitry Andric 
62240b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
62250b57cec5SDimitry Andric _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
62260b57cec5SDimitry Andric {
62270b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
62280b57cec5SDimitry Andric                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
62290b57cec5SDimitry Andric                                         (__v16sf)_mm512_setzero_ps());
62300b57cec5SDimitry Andric }
62310b57cec5SDimitry Andric 
62320b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512
62330b57cec5SDimitry Andric _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
62340b57cec5SDimitry Andric {
62350b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
62360b57cec5SDimitry Andric                                                  (__v8df)__B);
62370b57cec5SDimitry Andric }
62380b57cec5SDimitry Andric 
62390b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
62400b57cec5SDimitry Andric _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
62410b57cec5SDimitry Andric {
62420b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512(__U,
62430b57cec5SDimitry Andric                                   (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
62440b57cec5SDimitry Andric                                   (__v8df)__A);
62450b57cec5SDimitry Andric }
62460b57cec5SDimitry Andric 
62470b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
62480b57cec5SDimitry Andric _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
62490b57cec5SDimitry Andric                              __m512d __B)
62500b57cec5SDimitry Andric {
62510b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512(__U,
62520b57cec5SDimitry Andric                                   (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
62530b57cec5SDimitry Andric                                   (__v8df)(__m512d)__I);
62540b57cec5SDimitry Andric }
62550b57cec5SDimitry Andric 
62560b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
62570b57cec5SDimitry Andric _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
62580b57cec5SDimitry Andric                              __m512d __B)
62590b57cec5SDimitry Andric {
62600b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512(__U,
62610b57cec5SDimitry Andric                                   (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
62620b57cec5SDimitry Andric                                   (__v8df)_mm512_setzero_pd());
62630b57cec5SDimitry Andric }
62640b57cec5SDimitry Andric 
62650b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512
62660b57cec5SDimitry Andric _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
62670b57cec5SDimitry Andric {
62680b57cec5SDimitry Andric   return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
62690b57cec5SDimitry Andric                                                 (__v16sf) __B);
62700b57cec5SDimitry Andric }
62710b57cec5SDimitry Andric 
62720b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
62730b57cec5SDimitry Andric _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
62740b57cec5SDimitry Andric {
62750b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512(__U,
62760b57cec5SDimitry Andric                                  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
62770b57cec5SDimitry Andric                                  (__v16sf)__A);
62780b57cec5SDimitry Andric }
62790b57cec5SDimitry Andric 
62800b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
62810b57cec5SDimitry Andric _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
62820b57cec5SDimitry Andric {
62830b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512(__U,
62840b57cec5SDimitry Andric                                  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
62850b57cec5SDimitry Andric                                  (__v16sf)(__m512)__I);
62860b57cec5SDimitry Andric }
62870b57cec5SDimitry Andric 
62880b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
62890b57cec5SDimitry Andric _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
62900b57cec5SDimitry Andric {
62910b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512(__U,
62920b57cec5SDimitry Andric                                  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
62930b57cec5SDimitry Andric                                  (__v16sf)_mm512_setzero_ps());
62940b57cec5SDimitry Andric }
62950b57cec5SDimitry Andric 
62960b57cec5SDimitry Andric 
62970b57cec5SDimitry Andric #define _mm512_cvtt_roundpd_epu32(A, R) \
6298349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
62990b57cec5SDimitry Andric                                               (__v8si)_mm256_undefined_si256(), \
6300349cc55cSDimitry Andric                                               (__mmask8)-1, (int)(R)))
63010b57cec5SDimitry Andric 
63020b57cec5SDimitry Andric #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6303349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
63040b57cec5SDimitry Andric                                               (__v8si)(__m256i)(W), \
6305349cc55cSDimitry Andric                                               (__mmask8)(U), (int)(R)))
63060b57cec5SDimitry Andric 
63070b57cec5SDimitry Andric #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6308349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
63090b57cec5SDimitry Andric                                               (__v8si)_mm256_setzero_si256(), \
6310349cc55cSDimitry Andric                                               (__mmask8)(U), (int)(R)))
63110b57cec5SDimitry Andric 
63120b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
63130b57cec5SDimitry Andric _mm512_cvttpd_epu32 (__m512d __A)
63140b57cec5SDimitry Andric {
63150b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
63160b57cec5SDimitry Andric                   (__v8si)
63170b57cec5SDimitry Andric                   _mm256_undefined_si256 (),
63180b57cec5SDimitry Andric                   (__mmask8) -1,
63190b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
63200b57cec5SDimitry Andric }
63210b57cec5SDimitry Andric 
63220b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
63230b57cec5SDimitry Andric _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
63240b57cec5SDimitry Andric {
63250b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
63260b57cec5SDimitry Andric                   (__v8si) __W,
63270b57cec5SDimitry Andric                   (__mmask8) __U,
63280b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
63290b57cec5SDimitry Andric }
63300b57cec5SDimitry Andric 
63310b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
63320b57cec5SDimitry Andric _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
63330b57cec5SDimitry Andric {
63340b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
63350b57cec5SDimitry Andric                   (__v8si)
63360b57cec5SDimitry Andric                   _mm256_setzero_si256 (),
63370b57cec5SDimitry Andric                   (__mmask8) __U,
63380b57cec5SDimitry Andric                   _MM_FROUND_CUR_DIRECTION);
63390b57cec5SDimitry Andric }
63400b57cec5SDimitry Andric 
63410b57cec5SDimitry Andric #define _mm_roundscale_round_sd(A, B, imm, R) \
6342349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
63430b57cec5SDimitry Andric                                                  (__v2df)(__m128d)(B), \
63440b57cec5SDimitry Andric                                                  (__v2df)_mm_setzero_pd(), \
63450b57cec5SDimitry Andric                                                  (__mmask8)-1, (int)(imm), \
6346349cc55cSDimitry Andric                                                  (int)(R)))
63470b57cec5SDimitry Andric 
63480b57cec5SDimitry Andric #define _mm_roundscale_sd(A, B, imm) \
6349349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
63500b57cec5SDimitry Andric                                                  (__v2df)(__m128d)(B), \
63510b57cec5SDimitry Andric                                                  (__v2df)_mm_setzero_pd(), \
63520b57cec5SDimitry Andric                                                  (__mmask8)-1, (int)(imm), \
6353349cc55cSDimitry Andric                                                  _MM_FROUND_CUR_DIRECTION))
63540b57cec5SDimitry Andric 
63550b57cec5SDimitry Andric #define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6356349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
63570b57cec5SDimitry Andric                                                  (__v2df)(__m128d)(B), \
63580b57cec5SDimitry Andric                                                  (__v2df)(__m128d)(W), \
63590b57cec5SDimitry Andric                                                  (__mmask8)(U), (int)(imm), \
6360349cc55cSDimitry Andric                                                  _MM_FROUND_CUR_DIRECTION))
63610b57cec5SDimitry Andric 
63620b57cec5SDimitry Andric #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6363349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
63640b57cec5SDimitry Andric                                                  (__v2df)(__m128d)(B), \
63650b57cec5SDimitry Andric                                                  (__v2df)(__m128d)(W), \
63660b57cec5SDimitry Andric                                                  (__mmask8)(U), (int)(I), \
6367349cc55cSDimitry Andric                                                  (int)(R)))
63680b57cec5SDimitry Andric 
63690b57cec5SDimitry Andric #define _mm_maskz_roundscale_sd(U, A, B, I) \
6370349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
63710b57cec5SDimitry Andric                                                  (__v2df)(__m128d)(B), \
63720b57cec5SDimitry Andric                                                  (__v2df)_mm_setzero_pd(), \
63730b57cec5SDimitry Andric                                                  (__mmask8)(U), (int)(I), \
6374349cc55cSDimitry Andric                                                  _MM_FROUND_CUR_DIRECTION))
63750b57cec5SDimitry Andric 
63760b57cec5SDimitry Andric #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6377349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
63780b57cec5SDimitry Andric                                                  (__v2df)(__m128d)(B), \
63790b57cec5SDimitry Andric                                                  (__v2df)_mm_setzero_pd(), \
63800b57cec5SDimitry Andric                                                  (__mmask8)(U), (int)(I), \
6381349cc55cSDimitry Andric                                                  (int)(R)))
63820b57cec5SDimitry Andric 
63830b57cec5SDimitry Andric #define _mm_roundscale_round_ss(A, B, imm, R) \
6384349cc55cSDimitry Andric   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
63850b57cec5SDimitry Andric                                                 (__v4sf)(__m128)(B), \
63860b57cec5SDimitry Andric                                                 (__v4sf)_mm_setzero_ps(), \
63870b57cec5SDimitry Andric                                                 (__mmask8)-1, (int)(imm), \
6388349cc55cSDimitry Andric                                                 (int)(R)))
63890b57cec5SDimitry Andric 
63900b57cec5SDimitry Andric #define _mm_roundscale_ss(A, B, imm) \
6391349cc55cSDimitry Andric   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
63920b57cec5SDimitry Andric                                                 (__v4sf)(__m128)(B), \
63930b57cec5SDimitry Andric                                                 (__v4sf)_mm_setzero_ps(), \
63940b57cec5SDimitry Andric                                                 (__mmask8)-1, (int)(imm), \
6395349cc55cSDimitry Andric                                                 _MM_FROUND_CUR_DIRECTION))
63960b57cec5SDimitry Andric 
63970b57cec5SDimitry Andric #define _mm_mask_roundscale_ss(W, U, A, B, I) \
6398349cc55cSDimitry Andric   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
63990b57cec5SDimitry Andric                                                 (__v4sf)(__m128)(B), \
64000b57cec5SDimitry Andric                                                 (__v4sf)(__m128)(W), \
64010b57cec5SDimitry Andric                                                 (__mmask8)(U), (int)(I), \
6402349cc55cSDimitry Andric                                                 _MM_FROUND_CUR_DIRECTION))
64030b57cec5SDimitry Andric 
64040b57cec5SDimitry Andric #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6405349cc55cSDimitry Andric   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
64060b57cec5SDimitry Andric                                                 (__v4sf)(__m128)(B), \
64070b57cec5SDimitry Andric                                                 (__v4sf)(__m128)(W), \
64080b57cec5SDimitry Andric                                                 (__mmask8)(U), (int)(I), \
6409349cc55cSDimitry Andric                                                 (int)(R)))
64100b57cec5SDimitry Andric 
64110b57cec5SDimitry Andric #define _mm_maskz_roundscale_ss(U, A, B, I) \
6412349cc55cSDimitry Andric   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
64130b57cec5SDimitry Andric                                                 (__v4sf)(__m128)(B), \
64140b57cec5SDimitry Andric                                                 (__v4sf)_mm_setzero_ps(), \
64150b57cec5SDimitry Andric                                                 (__mmask8)(U), (int)(I), \
6416349cc55cSDimitry Andric                                                 _MM_FROUND_CUR_DIRECTION))
64170b57cec5SDimitry Andric 
64180b57cec5SDimitry Andric #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6419349cc55cSDimitry Andric   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
64200b57cec5SDimitry Andric                                                 (__v4sf)(__m128)(B), \
64210b57cec5SDimitry Andric                                                 (__v4sf)_mm_setzero_ps(), \
64220b57cec5SDimitry Andric                                                 (__mmask8)(U), (int)(I), \
6423349cc55cSDimitry Andric                                                 (int)(R)))
64240b57cec5SDimitry Andric 
64250b57cec5SDimitry Andric #define _mm512_scalef_round_pd(A, B, R) \
6426349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
64270b57cec5SDimitry Andric                                             (__v8df)(__m512d)(B), \
64280b57cec5SDimitry Andric                                             (__v8df)_mm512_undefined_pd(), \
6429349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
64300b57cec5SDimitry Andric 
64310b57cec5SDimitry Andric #define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6432349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
64330b57cec5SDimitry Andric                                             (__v8df)(__m512d)(B), \
64340b57cec5SDimitry Andric                                             (__v8df)(__m512d)(W), \
6435349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
64360b57cec5SDimitry Andric 
64370b57cec5SDimitry Andric #define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6438349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
64390b57cec5SDimitry Andric                                             (__v8df)(__m512d)(B), \
64400b57cec5SDimitry Andric                                             (__v8df)_mm512_setzero_pd(), \
6441349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
64420b57cec5SDimitry Andric 
64430b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
64440b57cec5SDimitry Andric _mm512_scalef_pd (__m512d __A, __m512d __B)
64450b57cec5SDimitry Andric {
64460b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
64470b57cec5SDimitry Andric                 (__v8df) __B,
64480b57cec5SDimitry Andric                 (__v8df)
64490b57cec5SDimitry Andric                 _mm512_undefined_pd (),
64500b57cec5SDimitry Andric                 (__mmask8) -1,
64510b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
64520b57cec5SDimitry Andric }
64530b57cec5SDimitry Andric 
64540b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
64550b57cec5SDimitry Andric _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
64560b57cec5SDimitry Andric {
64570b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
64580b57cec5SDimitry Andric                 (__v8df) __B,
64590b57cec5SDimitry Andric                 (__v8df) __W,
64600b57cec5SDimitry Andric                 (__mmask8) __U,
64610b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
64620b57cec5SDimitry Andric }
64630b57cec5SDimitry Andric 
64640b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
64650b57cec5SDimitry Andric _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
64660b57cec5SDimitry Andric {
64670b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
64680b57cec5SDimitry Andric                 (__v8df) __B,
64690b57cec5SDimitry Andric                 (__v8df)
64700b57cec5SDimitry Andric                 _mm512_setzero_pd (),
64710b57cec5SDimitry Andric                 (__mmask8) __U,
64720b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
64730b57cec5SDimitry Andric }
64740b57cec5SDimitry Andric 
64750b57cec5SDimitry Andric #define _mm512_scalef_round_ps(A, B, R) \
6476349cc55cSDimitry Andric   ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
64770b57cec5SDimitry Andric                                            (__v16sf)(__m512)(B), \
64780b57cec5SDimitry Andric                                            (__v16sf)_mm512_undefined_ps(), \
6479349cc55cSDimitry Andric                                            (__mmask16)-1, (int)(R)))
64800b57cec5SDimitry Andric 
64810b57cec5SDimitry Andric #define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6482349cc55cSDimitry Andric   ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
64830b57cec5SDimitry Andric                                            (__v16sf)(__m512)(B), \
64840b57cec5SDimitry Andric                                            (__v16sf)(__m512)(W), \
6485349cc55cSDimitry Andric                                            (__mmask16)(U), (int)(R)))
64860b57cec5SDimitry Andric 
64870b57cec5SDimitry Andric #define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6488349cc55cSDimitry Andric   ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
64890b57cec5SDimitry Andric                                            (__v16sf)(__m512)(B), \
64900b57cec5SDimitry Andric                                            (__v16sf)_mm512_setzero_ps(), \
6491349cc55cSDimitry Andric                                            (__mmask16)(U), (int)(R)))
64920b57cec5SDimitry Andric 
64930b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
64940b57cec5SDimitry Andric _mm512_scalef_ps (__m512 __A, __m512 __B)
64950b57cec5SDimitry Andric {
64960b57cec5SDimitry Andric   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
64970b57cec5SDimitry Andric                (__v16sf) __B,
64980b57cec5SDimitry Andric                (__v16sf)
64990b57cec5SDimitry Andric                _mm512_undefined_ps (),
65000b57cec5SDimitry Andric                (__mmask16) -1,
65010b57cec5SDimitry Andric                _MM_FROUND_CUR_DIRECTION);
65020b57cec5SDimitry Andric }
65030b57cec5SDimitry Andric 
65040b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
65050b57cec5SDimitry Andric _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
65060b57cec5SDimitry Andric {
65070b57cec5SDimitry Andric   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
65080b57cec5SDimitry Andric                (__v16sf) __B,
65090b57cec5SDimitry Andric                (__v16sf) __W,
65100b57cec5SDimitry Andric                (__mmask16) __U,
65110b57cec5SDimitry Andric                _MM_FROUND_CUR_DIRECTION);
65120b57cec5SDimitry Andric }
65130b57cec5SDimitry Andric 
65140b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
65150b57cec5SDimitry Andric _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
65160b57cec5SDimitry Andric {
65170b57cec5SDimitry Andric   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
65180b57cec5SDimitry Andric                (__v16sf) __B,
65190b57cec5SDimitry Andric                (__v16sf)
65200b57cec5SDimitry Andric                _mm512_setzero_ps (),
65210b57cec5SDimitry Andric                (__mmask16) __U,
65220b57cec5SDimitry Andric                _MM_FROUND_CUR_DIRECTION);
65230b57cec5SDimitry Andric }
65240b57cec5SDimitry Andric 
65250b57cec5SDimitry Andric #define _mm_scalef_round_sd(A, B, R) \
6526349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
65270b57cec5SDimitry Andric                                                (__v2df)(__m128d)(B), \
65280b57cec5SDimitry Andric                                                (__v2df)_mm_setzero_pd(), \
6529349cc55cSDimitry Andric                                                (__mmask8)-1, (int)(R)))
65300b57cec5SDimitry Andric 
65310b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
65320b57cec5SDimitry Andric _mm_scalef_sd (__m128d __A, __m128d __B)
65330b57cec5SDimitry Andric {
65340b57cec5SDimitry Andric   return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
65350b57cec5SDimitry Andric               (__v2df)( __B), (__v2df) _mm_setzero_pd(),
65360b57cec5SDimitry Andric               (__mmask8) -1,
65370b57cec5SDimitry Andric               _MM_FROUND_CUR_DIRECTION);
65380b57cec5SDimitry Andric }
65390b57cec5SDimitry Andric 
65400b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
65410b57cec5SDimitry Andric _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
65420b57cec5SDimitry Andric {
65430b57cec5SDimitry Andric  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
65440b57cec5SDimitry Andric                  (__v2df) __B,
65450b57cec5SDimitry Andric                 (__v2df) __W,
65460b57cec5SDimitry Andric                 (__mmask8) __U,
65470b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
65480b57cec5SDimitry Andric }
65490b57cec5SDimitry Andric 
65500b57cec5SDimitry Andric #define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6551349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
65520b57cec5SDimitry Andric                                                (__v2df)(__m128d)(B), \
65530b57cec5SDimitry Andric                                                (__v2df)(__m128d)(W), \
6554349cc55cSDimitry Andric                                                (__mmask8)(U), (int)(R)))
65550b57cec5SDimitry Andric 
65560b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
65570b57cec5SDimitry Andric _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
65580b57cec5SDimitry Andric {
65590b57cec5SDimitry Andric  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
65600b57cec5SDimitry Andric                  (__v2df) __B,
65610b57cec5SDimitry Andric                 (__v2df) _mm_setzero_pd (),
65620b57cec5SDimitry Andric                 (__mmask8) __U,
65630b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
65640b57cec5SDimitry Andric }
65650b57cec5SDimitry Andric 
65660b57cec5SDimitry Andric #define _mm_maskz_scalef_round_sd(U, A, B, R) \
6567349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
65680b57cec5SDimitry Andric                                                (__v2df)(__m128d)(B), \
65690b57cec5SDimitry Andric                                                (__v2df)_mm_setzero_pd(), \
6570349cc55cSDimitry Andric                                                (__mmask8)(U), (int)(R)))
65710b57cec5SDimitry Andric 
65720b57cec5SDimitry Andric #define _mm_scalef_round_ss(A, B, R) \
6573349cc55cSDimitry Andric   ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
65740b57cec5SDimitry Andric                                               (__v4sf)(__m128)(B), \
65750b57cec5SDimitry Andric                                               (__v4sf)_mm_setzero_ps(), \
6576349cc55cSDimitry Andric                                               (__mmask8)-1, (int)(R)))
65770b57cec5SDimitry Andric 
65780b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
65790b57cec5SDimitry Andric _mm_scalef_ss (__m128 __A, __m128 __B)
65800b57cec5SDimitry Andric {
65810b57cec5SDimitry Andric   return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
65820b57cec5SDimitry Andric              (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
65830b57cec5SDimitry Andric              (__mmask8) -1,
65840b57cec5SDimitry Andric              _MM_FROUND_CUR_DIRECTION);
65850b57cec5SDimitry Andric }
65860b57cec5SDimitry Andric 
65870b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
65880b57cec5SDimitry Andric _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
65890b57cec5SDimitry Andric {
65900b57cec5SDimitry Andric  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
65910b57cec5SDimitry Andric                 (__v4sf) __B,
65920b57cec5SDimitry Andric                 (__v4sf) __W,
65930b57cec5SDimitry Andric                 (__mmask8) __U,
65940b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
65950b57cec5SDimitry Andric }
65960b57cec5SDimitry Andric 
65970b57cec5SDimitry Andric #define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6598349cc55cSDimitry Andric   ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
65990b57cec5SDimitry Andric                                               (__v4sf)(__m128)(B), \
66000b57cec5SDimitry Andric                                               (__v4sf)(__m128)(W), \
6601349cc55cSDimitry Andric                                               (__mmask8)(U), (int)(R)))
66020b57cec5SDimitry Andric 
66030b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
66040b57cec5SDimitry Andric _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
66050b57cec5SDimitry Andric {
66060b57cec5SDimitry Andric  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
66070b57cec5SDimitry Andric                  (__v4sf) __B,
66080b57cec5SDimitry Andric                 (__v4sf) _mm_setzero_ps (),
66090b57cec5SDimitry Andric                 (__mmask8) __U,
66100b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
66110b57cec5SDimitry Andric }
66120b57cec5SDimitry Andric 
66130b57cec5SDimitry Andric #define _mm_maskz_scalef_round_ss(U, A, B, R) \
6614349cc55cSDimitry Andric   ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
66150b57cec5SDimitry Andric                                               (__v4sf)(__m128)(B), \
66160b57cec5SDimitry Andric                                               (__v4sf)_mm_setzero_ps(), \
66170b57cec5SDimitry Andric                                               (__mmask8)(U), \
6618349cc55cSDimitry Andric                                               (int)(R)))
66190b57cec5SDimitry Andric 
66200b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
66215ffd83dbSDimitry Andric _mm512_srai_epi32(__m512i __A, unsigned int __B)
66220b57cec5SDimitry Andric {
662381ad6265SDimitry Andric   return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
66240b57cec5SDimitry Andric }
66250b57cec5SDimitry Andric 
66260b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
66275ffd83dbSDimitry Andric _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
66285ffd83dbSDimitry Andric                        unsigned int __B)
66290b57cec5SDimitry Andric {
66300b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
66310b57cec5SDimitry Andric                                          (__v16si)_mm512_srai_epi32(__A, __B),
66320b57cec5SDimitry Andric                                          (__v16si)__W);
66330b57cec5SDimitry Andric }
66340b57cec5SDimitry Andric 
66350b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
66365ffd83dbSDimitry Andric _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A,
66375ffd83dbSDimitry Andric                         unsigned int __B) {
66380b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
66390b57cec5SDimitry Andric                                          (__v16si)_mm512_srai_epi32(__A, __B),
66400b57cec5SDimitry Andric                                          (__v16si)_mm512_setzero_si512());
66410b57cec5SDimitry Andric }
66420b57cec5SDimitry Andric 
66430b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
66445ffd83dbSDimitry Andric _mm512_srai_epi64(__m512i __A, unsigned int __B)
66450b57cec5SDimitry Andric {
664681ad6265SDimitry Andric   return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
66470b57cec5SDimitry Andric }
66480b57cec5SDimitry Andric 
66490b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
66505ffd83dbSDimitry Andric _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
66510b57cec5SDimitry Andric {
66520b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
66530b57cec5SDimitry Andric                                           (__v8di)_mm512_srai_epi64(__A, __B),
66540b57cec5SDimitry Andric                                           (__v8di)__W);
66550b57cec5SDimitry Andric }
66560b57cec5SDimitry Andric 
66570b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
66585ffd83dbSDimitry Andric _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
66590b57cec5SDimitry Andric {
66600b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
66610b57cec5SDimitry Andric                                           (__v8di)_mm512_srai_epi64(__A, __B),
66620b57cec5SDimitry Andric                                           (__v8di)_mm512_setzero_si512());
66630b57cec5SDimitry Andric }
66640b57cec5SDimitry Andric 
66650b57cec5SDimitry Andric #define _mm512_shuffle_f32x4(A, B, imm) \
6666349cc55cSDimitry Andric   ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6667349cc55cSDimitry Andric                                      (__v16sf)(__m512)(B), (int)(imm)))
66680b57cec5SDimitry Andric 
66690b57cec5SDimitry Andric #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6670349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
66710b57cec5SDimitry Andric                                        (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6672349cc55cSDimitry Andric                                        (__v16sf)(__m512)(W)))
66730b57cec5SDimitry Andric 
66740b57cec5SDimitry Andric #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6675349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
66760b57cec5SDimitry Andric                                        (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6677349cc55cSDimitry Andric                                        (__v16sf)_mm512_setzero_ps()))
66780b57cec5SDimitry Andric 
66790b57cec5SDimitry Andric #define _mm512_shuffle_f64x2(A, B, imm) \
6680349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6681349cc55cSDimitry Andric                                       (__v8df)(__m512d)(B), (int)(imm)))
66820b57cec5SDimitry Andric 
66830b57cec5SDimitry Andric #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6684349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
66850b57cec5SDimitry Andric                                         (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6686349cc55cSDimitry Andric                                         (__v8df)(__m512d)(W)))
66870b57cec5SDimitry Andric 
66880b57cec5SDimitry Andric #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6689349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
66900b57cec5SDimitry Andric                                         (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6691349cc55cSDimitry Andric                                         (__v8df)_mm512_setzero_pd()))
66920b57cec5SDimitry Andric 
66930b57cec5SDimitry Andric #define _mm512_shuffle_i32x4(A, B, imm) \
6694349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6695349cc55cSDimitry Andric                                       (__v16si)(__m512i)(B), (int)(imm)))
66960b57cec5SDimitry Andric 
66970b57cec5SDimitry Andric #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6698349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
66990b57cec5SDimitry Andric                                        (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6700349cc55cSDimitry Andric                                        (__v16si)(__m512i)(W)))
67010b57cec5SDimitry Andric 
67020b57cec5SDimitry Andric #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6703349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
67040b57cec5SDimitry Andric                                        (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6705349cc55cSDimitry Andric                                        (__v16si)_mm512_setzero_si512()))
67060b57cec5SDimitry Andric 
67070b57cec5SDimitry Andric #define _mm512_shuffle_i64x2(A, B, imm) \
6708349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6709349cc55cSDimitry Andric                                       (__v8di)(__m512i)(B), (int)(imm)))
67100b57cec5SDimitry Andric 
67110b57cec5SDimitry Andric #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6712349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
67130b57cec5SDimitry Andric                                        (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6714349cc55cSDimitry Andric                                        (__v8di)(__m512i)(W)))
67150b57cec5SDimitry Andric 
67160b57cec5SDimitry Andric #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6717349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
67180b57cec5SDimitry Andric                                        (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6719349cc55cSDimitry Andric                                        (__v8di)_mm512_setzero_si512()))
67200b57cec5SDimitry Andric 
67210b57cec5SDimitry Andric #define _mm512_shuffle_pd(A, B, M) \
6722349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6723349cc55cSDimitry Andric                                      (__v8df)(__m512d)(B), (int)(M)))
67240b57cec5SDimitry Andric 
67250b57cec5SDimitry Andric #define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6726349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
67270b57cec5SDimitry Andric                                         (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6728349cc55cSDimitry Andric                                         (__v8df)(__m512d)(W)))
67290b57cec5SDimitry Andric 
67300b57cec5SDimitry Andric #define _mm512_maskz_shuffle_pd(U, A, B, M) \
6731349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
67320b57cec5SDimitry Andric                                         (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6733349cc55cSDimitry Andric                                         (__v8df)_mm512_setzero_pd()))
67340b57cec5SDimitry Andric 
67350b57cec5SDimitry Andric #define _mm512_shuffle_ps(A, B, M) \
6736349cc55cSDimitry Andric   ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6737349cc55cSDimitry Andric                                     (__v16sf)(__m512)(B), (int)(M)))
67380b57cec5SDimitry Andric 
67390b57cec5SDimitry Andric #define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6740349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
67410b57cec5SDimitry Andric                                        (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6742349cc55cSDimitry Andric                                        (__v16sf)(__m512)(W)))
67430b57cec5SDimitry Andric 
67440b57cec5SDimitry Andric #define _mm512_maskz_shuffle_ps(U, A, B, M) \
6745349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
67460b57cec5SDimitry Andric                                        (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6747349cc55cSDimitry Andric                                        (__v16sf)_mm512_setzero_ps()))
67480b57cec5SDimitry Andric 
67490b57cec5SDimitry Andric #define _mm_sqrt_round_sd(A, B, R) \
6750349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
67510b57cec5SDimitry Andric                                              (__v2df)(__m128d)(B), \
67520b57cec5SDimitry Andric                                              (__v2df)_mm_setzero_pd(), \
6753349cc55cSDimitry Andric                                              (__mmask8)-1, (int)(R)))
67540b57cec5SDimitry Andric 
67550b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
67560b57cec5SDimitry Andric _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
67570b57cec5SDimitry Andric {
67580b57cec5SDimitry Andric  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
67590b57cec5SDimitry Andric                  (__v2df) __B,
67600b57cec5SDimitry Andric                 (__v2df) __W,
67610b57cec5SDimitry Andric                 (__mmask8) __U,
67620b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
67630b57cec5SDimitry Andric }
67640b57cec5SDimitry Andric 
67650b57cec5SDimitry Andric #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6766349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
67670b57cec5SDimitry Andric                                              (__v2df)(__m128d)(B), \
67680b57cec5SDimitry Andric                                              (__v2df)(__m128d)(W), \
6769349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
67700b57cec5SDimitry Andric 
67710b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
67720b57cec5SDimitry Andric _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
67730b57cec5SDimitry Andric {
67740b57cec5SDimitry Andric  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
67750b57cec5SDimitry Andric                  (__v2df) __B,
67760b57cec5SDimitry Andric                 (__v2df) _mm_setzero_pd (),
67770b57cec5SDimitry Andric                 (__mmask8) __U,
67780b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
67790b57cec5SDimitry Andric }
67800b57cec5SDimitry Andric 
67810b57cec5SDimitry Andric #define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6782349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
67830b57cec5SDimitry Andric                                              (__v2df)(__m128d)(B), \
67840b57cec5SDimitry Andric                                              (__v2df)_mm_setzero_pd(), \
6785349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
67860b57cec5SDimitry Andric 
67870b57cec5SDimitry Andric #define _mm_sqrt_round_ss(A, B, R) \
6788349cc55cSDimitry Andric   ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
67890b57cec5SDimitry Andric                                             (__v4sf)(__m128)(B), \
67900b57cec5SDimitry Andric                                             (__v4sf)_mm_setzero_ps(), \
6791349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
67920b57cec5SDimitry Andric 
67930b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
67940b57cec5SDimitry Andric _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
67950b57cec5SDimitry Andric {
67960b57cec5SDimitry Andric  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
67970b57cec5SDimitry Andric                  (__v4sf) __B,
67980b57cec5SDimitry Andric                 (__v4sf) __W,
67990b57cec5SDimitry Andric                 (__mmask8) __U,
68000b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
68010b57cec5SDimitry Andric }
68020b57cec5SDimitry Andric 
68030b57cec5SDimitry Andric #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6804349cc55cSDimitry Andric   ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
68050b57cec5SDimitry Andric                                             (__v4sf)(__m128)(B), \
68060b57cec5SDimitry Andric                                             (__v4sf)(__m128)(W), (__mmask8)(U), \
6807349cc55cSDimitry Andric                                             (int)(R)))
68080b57cec5SDimitry Andric 
68090b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
68100b57cec5SDimitry Andric _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
68110b57cec5SDimitry Andric {
68120b57cec5SDimitry Andric  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
68130b57cec5SDimitry Andric                  (__v4sf) __B,
68140b57cec5SDimitry Andric                 (__v4sf) _mm_setzero_ps (),
68150b57cec5SDimitry Andric                 (__mmask8) __U,
68160b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
68170b57cec5SDimitry Andric }
68180b57cec5SDimitry Andric 
68190b57cec5SDimitry Andric #define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6820349cc55cSDimitry Andric   ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
68210b57cec5SDimitry Andric                                             (__v4sf)(__m128)(B), \
68220b57cec5SDimitry Andric                                             (__v4sf)_mm_setzero_ps(), \
6823349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
68240b57cec5SDimitry Andric 
68250b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
68260b57cec5SDimitry Andric _mm512_broadcast_f32x4(__m128 __A)
68270b57cec5SDimitry Andric {
68280b57cec5SDimitry Andric   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
68290b57cec5SDimitry Andric                                          0, 1, 2, 3, 0, 1, 2, 3,
68300b57cec5SDimitry Andric                                          0, 1, 2, 3, 0, 1, 2, 3);
68310b57cec5SDimitry Andric }
68320b57cec5SDimitry Andric 
68330b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
68340b57cec5SDimitry Andric _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
68350b57cec5SDimitry Andric {
68360b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
68370b57cec5SDimitry Andric                                            (__v16sf)_mm512_broadcast_f32x4(__A),
68380b57cec5SDimitry Andric                                            (__v16sf)__O);
68390b57cec5SDimitry Andric }
68400b57cec5SDimitry Andric 
68410b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
68420b57cec5SDimitry Andric _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
68430b57cec5SDimitry Andric {
68440b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
68450b57cec5SDimitry Andric                                            (__v16sf)_mm512_broadcast_f32x4(__A),
68460b57cec5SDimitry Andric                                            (__v16sf)_mm512_setzero_ps());
68470b57cec5SDimitry Andric }
68480b57cec5SDimitry Andric 
68490b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
68500b57cec5SDimitry Andric _mm512_broadcast_f64x4(__m256d __A)
68510b57cec5SDimitry Andric {
68520b57cec5SDimitry Andric   return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
68530b57cec5SDimitry Andric                                           0, 1, 2, 3, 0, 1, 2, 3);
68540b57cec5SDimitry Andric }
68550b57cec5SDimitry Andric 
68560b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
68570b57cec5SDimitry Andric _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
68580b57cec5SDimitry Andric {
68590b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
68600b57cec5SDimitry Andric                                             (__v8df)_mm512_broadcast_f64x4(__A),
68610b57cec5SDimitry Andric                                             (__v8df)__O);
68620b57cec5SDimitry Andric }
68630b57cec5SDimitry Andric 
68640b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
68650b57cec5SDimitry Andric _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
68660b57cec5SDimitry Andric {
68670b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
68680b57cec5SDimitry Andric                                             (__v8df)_mm512_broadcast_f64x4(__A),
68690b57cec5SDimitry Andric                                             (__v8df)_mm512_setzero_pd());
68700b57cec5SDimitry Andric }
68710b57cec5SDimitry Andric 
68720b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
68730b57cec5SDimitry Andric _mm512_broadcast_i32x4(__m128i __A)
68740b57cec5SDimitry Andric {
68750b57cec5SDimitry Andric   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
68760b57cec5SDimitry Andric                                           0, 1, 2, 3, 0, 1, 2, 3,
68770b57cec5SDimitry Andric                                           0, 1, 2, 3, 0, 1, 2, 3);
68780b57cec5SDimitry Andric }
68790b57cec5SDimitry Andric 
68800b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
68810b57cec5SDimitry Andric _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
68820b57cec5SDimitry Andric {
68830b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
68840b57cec5SDimitry Andric                                            (__v16si)_mm512_broadcast_i32x4(__A),
68850b57cec5SDimitry Andric                                            (__v16si)__O);
68860b57cec5SDimitry Andric }
68870b57cec5SDimitry Andric 
68880b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
68890b57cec5SDimitry Andric _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
68900b57cec5SDimitry Andric {
68910b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
68920b57cec5SDimitry Andric                                            (__v16si)_mm512_broadcast_i32x4(__A),
68930b57cec5SDimitry Andric                                            (__v16si)_mm512_setzero_si512());
68940b57cec5SDimitry Andric }
68950b57cec5SDimitry Andric 
68960b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
68970b57cec5SDimitry Andric _mm512_broadcast_i64x4(__m256i __A)
68980b57cec5SDimitry Andric {
68990b57cec5SDimitry Andric   return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
69000b57cec5SDimitry Andric                                           0, 1, 2, 3, 0, 1, 2, 3);
69010b57cec5SDimitry Andric }
69020b57cec5SDimitry Andric 
69030b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
69040b57cec5SDimitry Andric _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
69050b57cec5SDimitry Andric {
69060b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
69070b57cec5SDimitry Andric                                             (__v8di)_mm512_broadcast_i64x4(__A),
69080b57cec5SDimitry Andric                                             (__v8di)__O);
69090b57cec5SDimitry Andric }
69100b57cec5SDimitry Andric 
69110b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
69120b57cec5SDimitry Andric _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
69130b57cec5SDimitry Andric {
69140b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
69150b57cec5SDimitry Andric                                             (__v8di)_mm512_broadcast_i64x4(__A),
69160b57cec5SDimitry Andric                                             (__v8di)_mm512_setzero_si512());
69170b57cec5SDimitry Andric }
69180b57cec5SDimitry Andric 
69190b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
69200b57cec5SDimitry Andric _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
69210b57cec5SDimitry Andric {
69220b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512(__M,
69230b57cec5SDimitry Andric                                               (__v8df) _mm512_broadcastsd_pd(__A),
69240b57cec5SDimitry Andric                                               (__v8df) __O);
69250b57cec5SDimitry Andric }
69260b57cec5SDimitry Andric 
69270b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
69280b57cec5SDimitry Andric _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
69290b57cec5SDimitry Andric {
69300b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512(__M,
69310b57cec5SDimitry Andric                                               (__v8df) _mm512_broadcastsd_pd(__A),
69320b57cec5SDimitry Andric                                               (__v8df) _mm512_setzero_pd());
69330b57cec5SDimitry Andric }
69340b57cec5SDimitry Andric 
69350b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
69360b57cec5SDimitry Andric _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
69370b57cec5SDimitry Andric {
69380b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512(__M,
69390b57cec5SDimitry Andric                                              (__v16sf) _mm512_broadcastss_ps(__A),
69400b57cec5SDimitry Andric                                              (__v16sf) __O);
69410b57cec5SDimitry Andric }
69420b57cec5SDimitry Andric 
69430b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
69440b57cec5SDimitry Andric _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
69450b57cec5SDimitry Andric {
69460b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512(__M,
69470b57cec5SDimitry Andric                                              (__v16sf) _mm512_broadcastss_ps(__A),
69480b57cec5SDimitry Andric                                              (__v16sf) _mm512_setzero_ps());
69490b57cec5SDimitry Andric }
69500b57cec5SDimitry Andric 
69510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
69520b57cec5SDimitry Andric _mm512_cvtsepi32_epi8 (__m512i __A)
69530b57cec5SDimitry Andric {
69540b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
69550b57cec5SDimitry Andric                (__v16qi) _mm_undefined_si128 (),
69560b57cec5SDimitry Andric                (__mmask16) -1);
69570b57cec5SDimitry Andric }
69580b57cec5SDimitry Andric 
69590b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
69600b57cec5SDimitry Andric _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
69610b57cec5SDimitry Andric {
69620b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
69630b57cec5SDimitry Andric                (__v16qi) __O, __M);
69640b57cec5SDimitry Andric }
69650b57cec5SDimitry Andric 
69660b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
69670b57cec5SDimitry Andric _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
69680b57cec5SDimitry Andric {
69690b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
69700b57cec5SDimitry Andric                (__v16qi) _mm_setzero_si128 (),
69710b57cec5SDimitry Andric                __M);
69720b57cec5SDimitry Andric }
69730b57cec5SDimitry Andric 
69740b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
69750b57cec5SDimitry Andric _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
69760b57cec5SDimitry Andric {
69770b57cec5SDimitry Andric   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
69780b57cec5SDimitry Andric }
69790b57cec5SDimitry Andric 
69800b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
69810b57cec5SDimitry Andric _mm512_cvtsepi32_epi16 (__m512i __A)
69820b57cec5SDimitry Andric {
69830b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
69840b57cec5SDimitry Andric                (__v16hi) _mm256_undefined_si256 (),
69850b57cec5SDimitry Andric                (__mmask16) -1);
69860b57cec5SDimitry Andric }
69870b57cec5SDimitry Andric 
69880b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
69890b57cec5SDimitry Andric _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
69900b57cec5SDimitry Andric {
69910b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
69920b57cec5SDimitry Andric                (__v16hi) __O, __M);
69930b57cec5SDimitry Andric }
69940b57cec5SDimitry Andric 
69950b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
69960b57cec5SDimitry Andric _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
69970b57cec5SDimitry Andric {
69980b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
69990b57cec5SDimitry Andric                (__v16hi) _mm256_setzero_si256 (),
70000b57cec5SDimitry Andric                __M);
70010b57cec5SDimitry Andric }
70020b57cec5SDimitry Andric 
70030b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
70040b57cec5SDimitry Andric _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
70050b57cec5SDimitry Andric {
70060b57cec5SDimitry Andric   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
70070b57cec5SDimitry Andric }
70080b57cec5SDimitry Andric 
70090b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
70100b57cec5SDimitry Andric _mm512_cvtsepi64_epi8 (__m512i __A)
70110b57cec5SDimitry Andric {
70120b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
70130b57cec5SDimitry Andric                (__v16qi) _mm_undefined_si128 (),
70140b57cec5SDimitry Andric                (__mmask8) -1);
70150b57cec5SDimitry Andric }
70160b57cec5SDimitry Andric 
70170b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
70180b57cec5SDimitry Andric _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
70190b57cec5SDimitry Andric {
70200b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
70210b57cec5SDimitry Andric                (__v16qi) __O, __M);
70220b57cec5SDimitry Andric }
70230b57cec5SDimitry Andric 
70240b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
70250b57cec5SDimitry Andric _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
70260b57cec5SDimitry Andric {
70270b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
70280b57cec5SDimitry Andric                (__v16qi) _mm_setzero_si128 (),
70290b57cec5SDimitry Andric                __M);
70300b57cec5SDimitry Andric }
70310b57cec5SDimitry Andric 
70320b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
70330b57cec5SDimitry Andric _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
70340b57cec5SDimitry Andric {
70350b57cec5SDimitry Andric   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
70360b57cec5SDimitry Andric }
70370b57cec5SDimitry Andric 
70380b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
70390b57cec5SDimitry Andric _mm512_cvtsepi64_epi32 (__m512i __A)
70400b57cec5SDimitry Andric {
70410b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
70420b57cec5SDimitry Andric                (__v8si) _mm256_undefined_si256 (),
70430b57cec5SDimitry Andric                (__mmask8) -1);
70440b57cec5SDimitry Andric }
70450b57cec5SDimitry Andric 
70460b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
70470b57cec5SDimitry Andric _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
70480b57cec5SDimitry Andric {
70490b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
70500b57cec5SDimitry Andric                (__v8si) __O, __M);
70510b57cec5SDimitry Andric }
70520b57cec5SDimitry Andric 
70530b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
70540b57cec5SDimitry Andric _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
70550b57cec5SDimitry Andric {
70560b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
70570b57cec5SDimitry Andric                (__v8si) _mm256_setzero_si256 (),
70580b57cec5SDimitry Andric                __M);
70590b57cec5SDimitry Andric }
70600b57cec5SDimitry Andric 
70610b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
70620b57cec5SDimitry Andric _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
70630b57cec5SDimitry Andric {
70640b57cec5SDimitry Andric   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
70650b57cec5SDimitry Andric }
70660b57cec5SDimitry Andric 
70670b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
70680b57cec5SDimitry Andric _mm512_cvtsepi64_epi16 (__m512i __A)
70690b57cec5SDimitry Andric {
70700b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
70710b57cec5SDimitry Andric                (__v8hi) _mm_undefined_si128 (),
70720b57cec5SDimitry Andric                (__mmask8) -1);
70730b57cec5SDimitry Andric }
70740b57cec5SDimitry Andric 
70750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
70760b57cec5SDimitry Andric _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
70770b57cec5SDimitry Andric {
70780b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
70790b57cec5SDimitry Andric                (__v8hi) __O, __M);
70800b57cec5SDimitry Andric }
70810b57cec5SDimitry Andric 
70820b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
70830b57cec5SDimitry Andric _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
70840b57cec5SDimitry Andric {
70850b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
70860b57cec5SDimitry Andric                (__v8hi) _mm_setzero_si128 (),
70870b57cec5SDimitry Andric                __M);
70880b57cec5SDimitry Andric }
70890b57cec5SDimitry Andric 
70900b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
70910b57cec5SDimitry Andric _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
70920b57cec5SDimitry Andric {
70930b57cec5SDimitry Andric   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
70940b57cec5SDimitry Andric }
70950b57cec5SDimitry Andric 
70960b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
70970b57cec5SDimitry Andric _mm512_cvtusepi32_epi8 (__m512i __A)
70980b57cec5SDimitry Andric {
70990b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
71000b57cec5SDimitry Andric                 (__v16qi) _mm_undefined_si128 (),
71010b57cec5SDimitry Andric                 (__mmask16) -1);
71020b57cec5SDimitry Andric }
71030b57cec5SDimitry Andric 
71040b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
71050b57cec5SDimitry Andric _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
71060b57cec5SDimitry Andric {
71070b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
71080b57cec5SDimitry Andric                 (__v16qi) __O,
71090b57cec5SDimitry Andric                 __M);
71100b57cec5SDimitry Andric }
71110b57cec5SDimitry Andric 
71120b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
71130b57cec5SDimitry Andric _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
71140b57cec5SDimitry Andric {
71150b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
71160b57cec5SDimitry Andric                 (__v16qi) _mm_setzero_si128 (),
71170b57cec5SDimitry Andric                 __M);
71180b57cec5SDimitry Andric }
71190b57cec5SDimitry Andric 
71200b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
71210b57cec5SDimitry Andric _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
71220b57cec5SDimitry Andric {
71230b57cec5SDimitry Andric   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
71240b57cec5SDimitry Andric }
71250b57cec5SDimitry Andric 
71260b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
71270b57cec5SDimitry Andric _mm512_cvtusepi32_epi16 (__m512i __A)
71280b57cec5SDimitry Andric {
71290b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
71300b57cec5SDimitry Andric                 (__v16hi) _mm256_undefined_si256 (),
71310b57cec5SDimitry Andric                 (__mmask16) -1);
71320b57cec5SDimitry Andric }
71330b57cec5SDimitry Andric 
71340b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
71350b57cec5SDimitry Andric _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
71360b57cec5SDimitry Andric {
71370b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
71380b57cec5SDimitry Andric                 (__v16hi) __O,
71390b57cec5SDimitry Andric                 __M);
71400b57cec5SDimitry Andric }
71410b57cec5SDimitry Andric 
71420b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
71430b57cec5SDimitry Andric _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
71440b57cec5SDimitry Andric {
71450b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
71460b57cec5SDimitry Andric                 (__v16hi) _mm256_setzero_si256 (),
71470b57cec5SDimitry Andric                 __M);
71480b57cec5SDimitry Andric }
71490b57cec5SDimitry Andric 
71500b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
71510b57cec5SDimitry Andric _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
71520b57cec5SDimitry Andric {
71530b57cec5SDimitry Andric   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
71540b57cec5SDimitry Andric }
71550b57cec5SDimitry Andric 
71560b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
71570b57cec5SDimitry Andric _mm512_cvtusepi64_epi8 (__m512i __A)
71580b57cec5SDimitry Andric {
71590b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
71600b57cec5SDimitry Andric                 (__v16qi) _mm_undefined_si128 (),
71610b57cec5SDimitry Andric                 (__mmask8) -1);
71620b57cec5SDimitry Andric }
71630b57cec5SDimitry Andric 
71640b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
71650b57cec5SDimitry Andric _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
71660b57cec5SDimitry Andric {
71670b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
71680b57cec5SDimitry Andric                 (__v16qi) __O,
71690b57cec5SDimitry Andric                 __M);
71700b57cec5SDimitry Andric }
71710b57cec5SDimitry Andric 
71720b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
71730b57cec5SDimitry Andric _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
71740b57cec5SDimitry Andric {
71750b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
71760b57cec5SDimitry Andric                 (__v16qi) _mm_setzero_si128 (),
71770b57cec5SDimitry Andric                 __M);
71780b57cec5SDimitry Andric }
71790b57cec5SDimitry Andric 
71800b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
71810b57cec5SDimitry Andric _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
71820b57cec5SDimitry Andric {
71830b57cec5SDimitry Andric   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
71840b57cec5SDimitry Andric }
71850b57cec5SDimitry Andric 
71860b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
71870b57cec5SDimitry Andric _mm512_cvtusepi64_epi32 (__m512i __A)
71880b57cec5SDimitry Andric {
71890b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
71900b57cec5SDimitry Andric                 (__v8si) _mm256_undefined_si256 (),
71910b57cec5SDimitry Andric                 (__mmask8) -1);
71920b57cec5SDimitry Andric }
71930b57cec5SDimitry Andric 
71940b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
71950b57cec5SDimitry Andric _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
71960b57cec5SDimitry Andric {
71970b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
71980b57cec5SDimitry Andric                 (__v8si) __O, __M);
71990b57cec5SDimitry Andric }
72000b57cec5SDimitry Andric 
72010b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
72020b57cec5SDimitry Andric _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
72030b57cec5SDimitry Andric {
72040b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
72050b57cec5SDimitry Andric                 (__v8si) _mm256_setzero_si256 (),
72060b57cec5SDimitry Andric                 __M);
72070b57cec5SDimitry Andric }
72080b57cec5SDimitry Andric 
72090b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
72100b57cec5SDimitry Andric _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
72110b57cec5SDimitry Andric {
72120b57cec5SDimitry Andric   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
72130b57cec5SDimitry Andric }
72140b57cec5SDimitry Andric 
72150b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
72160b57cec5SDimitry Andric _mm512_cvtusepi64_epi16 (__m512i __A)
72170b57cec5SDimitry Andric {
72180b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
72190b57cec5SDimitry Andric                 (__v8hi) _mm_undefined_si128 (),
72200b57cec5SDimitry Andric                 (__mmask8) -1);
72210b57cec5SDimitry Andric }
72220b57cec5SDimitry Andric 
72230b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
72240b57cec5SDimitry Andric _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
72250b57cec5SDimitry Andric {
72260b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
72270b57cec5SDimitry Andric                 (__v8hi) __O, __M);
72280b57cec5SDimitry Andric }
72290b57cec5SDimitry Andric 
72300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
72310b57cec5SDimitry Andric _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
72320b57cec5SDimitry Andric {
72330b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
72340b57cec5SDimitry Andric                 (__v8hi) _mm_setzero_si128 (),
72350b57cec5SDimitry Andric                 __M);
72360b57cec5SDimitry Andric }
72370b57cec5SDimitry Andric 
72380b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
72390b57cec5SDimitry Andric _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
72400b57cec5SDimitry Andric {
72410b57cec5SDimitry Andric   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
72420b57cec5SDimitry Andric }
72430b57cec5SDimitry Andric 
72440b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
72450b57cec5SDimitry Andric _mm512_cvtepi32_epi8 (__m512i __A)
72460b57cec5SDimitry Andric {
72470b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
72480b57cec5SDimitry Andric               (__v16qi) _mm_undefined_si128 (),
72490b57cec5SDimitry Andric               (__mmask16) -1);
72500b57cec5SDimitry Andric }
72510b57cec5SDimitry Andric 
72520b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
72530b57cec5SDimitry Andric _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
72540b57cec5SDimitry Andric {
72550b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
72560b57cec5SDimitry Andric               (__v16qi) __O, __M);
72570b57cec5SDimitry Andric }
72580b57cec5SDimitry Andric 
72590b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
72600b57cec5SDimitry Andric _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
72610b57cec5SDimitry Andric {
72620b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
72630b57cec5SDimitry Andric               (__v16qi) _mm_setzero_si128 (),
72640b57cec5SDimitry Andric               __M);
72650b57cec5SDimitry Andric }
72660b57cec5SDimitry Andric 
72670b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
72680b57cec5SDimitry Andric _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
72690b57cec5SDimitry Andric {
72700b57cec5SDimitry Andric   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
72710b57cec5SDimitry Andric }
72720b57cec5SDimitry Andric 
72730b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
72740b57cec5SDimitry Andric _mm512_cvtepi32_epi16 (__m512i __A)
72750b57cec5SDimitry Andric {
72760b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
72770b57cec5SDimitry Andric               (__v16hi) _mm256_undefined_si256 (),
72780b57cec5SDimitry Andric               (__mmask16) -1);
72790b57cec5SDimitry Andric }
72800b57cec5SDimitry Andric 
72810b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
72820b57cec5SDimitry Andric _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
72830b57cec5SDimitry Andric {
72840b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
72850b57cec5SDimitry Andric               (__v16hi) __O, __M);
72860b57cec5SDimitry Andric }
72870b57cec5SDimitry Andric 
72880b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
72890b57cec5SDimitry Andric _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
72900b57cec5SDimitry Andric {
72910b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
72920b57cec5SDimitry Andric               (__v16hi) _mm256_setzero_si256 (),
72930b57cec5SDimitry Andric               __M);
72940b57cec5SDimitry Andric }
72950b57cec5SDimitry Andric 
72960b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
72970b57cec5SDimitry Andric _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
72980b57cec5SDimitry Andric {
72990b57cec5SDimitry Andric   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
73000b57cec5SDimitry Andric }
73010b57cec5SDimitry Andric 
73020b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
73030b57cec5SDimitry Andric _mm512_cvtepi64_epi8 (__m512i __A)
73040b57cec5SDimitry Andric {
73050b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
73060b57cec5SDimitry Andric               (__v16qi) _mm_undefined_si128 (),
73070b57cec5SDimitry Andric               (__mmask8) -1);
73080b57cec5SDimitry Andric }
73090b57cec5SDimitry Andric 
73100b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
73110b57cec5SDimitry Andric _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
73120b57cec5SDimitry Andric {
73130b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
73140b57cec5SDimitry Andric               (__v16qi) __O, __M);
73150b57cec5SDimitry Andric }
73160b57cec5SDimitry Andric 
73170b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
73180b57cec5SDimitry Andric _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
73190b57cec5SDimitry Andric {
73200b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
73210b57cec5SDimitry Andric               (__v16qi) _mm_setzero_si128 (),
73220b57cec5SDimitry Andric               __M);
73230b57cec5SDimitry Andric }
73240b57cec5SDimitry Andric 
73250b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
73260b57cec5SDimitry Andric _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
73270b57cec5SDimitry Andric {
73280b57cec5SDimitry Andric   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
73290b57cec5SDimitry Andric }
73300b57cec5SDimitry Andric 
73310b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
73320b57cec5SDimitry Andric _mm512_cvtepi64_epi32 (__m512i __A)
73330b57cec5SDimitry Andric {
73340b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
73350b57cec5SDimitry Andric               (__v8si) _mm256_undefined_si256 (),
73360b57cec5SDimitry Andric               (__mmask8) -1);
73370b57cec5SDimitry Andric }
73380b57cec5SDimitry Andric 
73390b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
73400b57cec5SDimitry Andric _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
73410b57cec5SDimitry Andric {
73420b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
73430b57cec5SDimitry Andric               (__v8si) __O, __M);
73440b57cec5SDimitry Andric }
73450b57cec5SDimitry Andric 
73460b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512
73470b57cec5SDimitry Andric _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
73480b57cec5SDimitry Andric {
73490b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
73500b57cec5SDimitry Andric               (__v8si) _mm256_setzero_si256 (),
73510b57cec5SDimitry Andric               __M);
73520b57cec5SDimitry Andric }
73530b57cec5SDimitry Andric 
73540b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
73550b57cec5SDimitry Andric _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
73560b57cec5SDimitry Andric {
73570b57cec5SDimitry Andric   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
73580b57cec5SDimitry Andric }
73590b57cec5SDimitry Andric 
73600b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
73610b57cec5SDimitry Andric _mm512_cvtepi64_epi16 (__m512i __A)
73620b57cec5SDimitry Andric {
73630b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
73640b57cec5SDimitry Andric               (__v8hi) _mm_undefined_si128 (),
73650b57cec5SDimitry Andric               (__mmask8) -1);
73660b57cec5SDimitry Andric }
73670b57cec5SDimitry Andric 
73680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
73690b57cec5SDimitry Andric _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
73700b57cec5SDimitry Andric {
73710b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
73720b57cec5SDimitry Andric               (__v8hi) __O, __M);
73730b57cec5SDimitry Andric }
73740b57cec5SDimitry Andric 
73750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512
73760b57cec5SDimitry Andric _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
73770b57cec5SDimitry Andric {
73780b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
73790b57cec5SDimitry Andric               (__v8hi) _mm_setzero_si128 (),
73800b57cec5SDimitry Andric               __M);
73810b57cec5SDimitry Andric }
73820b57cec5SDimitry Andric 
73830b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
73840b57cec5SDimitry Andric _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
73850b57cec5SDimitry Andric {
73860b57cec5SDimitry Andric   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
73870b57cec5SDimitry Andric }
73880b57cec5SDimitry Andric 
73890b57cec5SDimitry Andric #define _mm512_extracti32x4_epi32(A, imm) \
7390349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
73910b57cec5SDimitry Andric                                              (__v4si)_mm_undefined_si128(), \
7392349cc55cSDimitry Andric                                              (__mmask8)-1))
73930b57cec5SDimitry Andric 
73940b57cec5SDimitry Andric #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7395349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
73960b57cec5SDimitry Andric                                              (__v4si)(__m128i)(W), \
7397349cc55cSDimitry Andric                                              (__mmask8)(U)))
73980b57cec5SDimitry Andric 
73990b57cec5SDimitry Andric #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7400349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
74010b57cec5SDimitry Andric                                              (__v4si)_mm_setzero_si128(), \
7402349cc55cSDimitry Andric                                              (__mmask8)(U)))
74030b57cec5SDimitry Andric 
74040b57cec5SDimitry Andric #define _mm512_extracti64x4_epi64(A, imm) \
7405349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
74060b57cec5SDimitry Andric                                              (__v4di)_mm256_undefined_si256(), \
7407349cc55cSDimitry Andric                                              (__mmask8)-1))
74080b57cec5SDimitry Andric 
74090b57cec5SDimitry Andric #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7410349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
74110b57cec5SDimitry Andric                                              (__v4di)(__m256i)(W), \
7412349cc55cSDimitry Andric                                              (__mmask8)(U)))
74130b57cec5SDimitry Andric 
74140b57cec5SDimitry Andric #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7415349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
74160b57cec5SDimitry Andric                                              (__v4di)_mm256_setzero_si256(), \
7417349cc55cSDimitry Andric                                              (__mmask8)(U)))
74180b57cec5SDimitry Andric 
74190b57cec5SDimitry Andric #define _mm512_insertf64x4(A, B, imm) \
7420349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7421349cc55cSDimitry Andric                                        (__v4df)(__m256d)(B), (int)(imm)))
74220b57cec5SDimitry Andric 
74230b57cec5SDimitry Andric #define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7424349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
74250b57cec5SDimitry Andric                                    (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7426349cc55cSDimitry Andric                                    (__v8df)(__m512d)(W)))
74270b57cec5SDimitry Andric 
74280b57cec5SDimitry Andric #define _mm512_maskz_insertf64x4(U, A, B, imm) \
7429349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
74300b57cec5SDimitry Andric                                    (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7431349cc55cSDimitry Andric                                    (__v8df)_mm512_setzero_pd()))
74320b57cec5SDimitry Andric 
74330b57cec5SDimitry Andric #define _mm512_inserti64x4(A, B, imm) \
7434349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7435349cc55cSDimitry Andric                                        (__v4di)(__m256i)(B), (int)(imm)))
74360b57cec5SDimitry Andric 
74370b57cec5SDimitry Andric #define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7438349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
74390b57cec5SDimitry Andric                                    (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7440349cc55cSDimitry Andric                                    (__v8di)(__m512i)(W)))
74410b57cec5SDimitry Andric 
74420b57cec5SDimitry Andric #define _mm512_maskz_inserti64x4(U, A, B, imm) \
7443349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
74440b57cec5SDimitry Andric                                    (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7445349cc55cSDimitry Andric                                    (__v8di)_mm512_setzero_si512()))
74460b57cec5SDimitry Andric 
74470b57cec5SDimitry Andric #define _mm512_insertf32x4(A, B, imm) \
7448349cc55cSDimitry Andric   ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7449349cc55cSDimitry Andric                                       (__v4sf)(__m128)(B), (int)(imm)))
74500b57cec5SDimitry Andric 
74510b57cec5SDimitry Andric #define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7452349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
74530b57cec5SDimitry Andric                                   (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7454349cc55cSDimitry Andric                                   (__v16sf)(__m512)(W)))
74550b57cec5SDimitry Andric 
74560b57cec5SDimitry Andric #define _mm512_maskz_insertf32x4(U, A, B, imm) \
7457349cc55cSDimitry Andric   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
74580b57cec5SDimitry Andric                                   (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7459349cc55cSDimitry Andric                                   (__v16sf)_mm512_setzero_ps()))
74600b57cec5SDimitry Andric 
74610b57cec5SDimitry Andric #define _mm512_inserti32x4(A, B, imm) \
7462349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7463349cc55cSDimitry Andric                                        (__v4si)(__m128i)(B), (int)(imm)))
74640b57cec5SDimitry Andric 
74650b57cec5SDimitry Andric #define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7466349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
74670b57cec5SDimitry Andric                                   (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7468349cc55cSDimitry Andric                                   (__v16si)(__m512i)(W)))
74690b57cec5SDimitry Andric 
74700b57cec5SDimitry Andric #define _mm512_maskz_inserti32x4(U, A, B, imm) \
7471349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
74720b57cec5SDimitry Andric                                   (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7473349cc55cSDimitry Andric                                   (__v16si)_mm512_setzero_si512()))
74740b57cec5SDimitry Andric 
74750b57cec5SDimitry Andric #define _mm512_getmant_round_pd(A, B, C, R) \
7476349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
74770b57cec5SDimitry Andric                                              (int)(((C)<<2) | (B)), \
74780b57cec5SDimitry Andric                                              (__v8df)_mm512_undefined_pd(), \
7479349cc55cSDimitry Andric                                              (__mmask8)-1, (int)(R)))
74800b57cec5SDimitry Andric 
74810b57cec5SDimitry Andric #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7482349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
74830b57cec5SDimitry Andric                                              (int)(((C)<<2) | (B)), \
74840b57cec5SDimitry Andric                                              (__v8df)(__m512d)(W), \
7485349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
74860b57cec5SDimitry Andric 
74870b57cec5SDimitry Andric #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7488349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
74890b57cec5SDimitry Andric                                              (int)(((C)<<2) | (B)), \
74900b57cec5SDimitry Andric                                              (__v8df)_mm512_setzero_pd(), \
7491349cc55cSDimitry Andric                                              (__mmask8)(U), (int)(R)))
74920b57cec5SDimitry Andric 
74930b57cec5SDimitry Andric #define _mm512_getmant_pd(A, B, C) \
7494349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
74950b57cec5SDimitry Andric                                              (int)(((C)<<2) | (B)), \
74960b57cec5SDimitry Andric                                              (__v8df)_mm512_setzero_pd(), \
74970b57cec5SDimitry Andric                                              (__mmask8)-1, \
7498349cc55cSDimitry Andric                                              _MM_FROUND_CUR_DIRECTION))
74990b57cec5SDimitry Andric 
75000b57cec5SDimitry Andric #define _mm512_mask_getmant_pd(W, U, A, B, C) \
7501349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
75020b57cec5SDimitry Andric                                              (int)(((C)<<2) | (B)), \
75030b57cec5SDimitry Andric                                              (__v8df)(__m512d)(W), \
75040b57cec5SDimitry Andric                                              (__mmask8)(U), \
7505349cc55cSDimitry Andric                                              _MM_FROUND_CUR_DIRECTION))
75060b57cec5SDimitry Andric 
75070b57cec5SDimitry Andric #define _mm512_maskz_getmant_pd(U, A, B, C) \
7508349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
75090b57cec5SDimitry Andric                                              (int)(((C)<<2) | (B)), \
75100b57cec5SDimitry Andric                                              (__v8df)_mm512_setzero_pd(), \
75110b57cec5SDimitry Andric                                              (__mmask8)(U), \
7512349cc55cSDimitry Andric                                              _MM_FROUND_CUR_DIRECTION))
75130b57cec5SDimitry Andric 
75140b57cec5SDimitry Andric #define _mm512_getmant_round_ps(A, B, C, R) \
7515349cc55cSDimitry Andric   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
75160b57cec5SDimitry Andric                                             (int)(((C)<<2) | (B)), \
75170b57cec5SDimitry Andric                                             (__v16sf)_mm512_undefined_ps(), \
7518349cc55cSDimitry Andric                                             (__mmask16)-1, (int)(R)))
75190b57cec5SDimitry Andric 
75200b57cec5SDimitry Andric #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7521349cc55cSDimitry Andric   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
75220b57cec5SDimitry Andric                                             (int)(((C)<<2) | (B)), \
75230b57cec5SDimitry Andric                                             (__v16sf)(__m512)(W), \
7524349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
75250b57cec5SDimitry Andric 
75260b57cec5SDimitry Andric #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7527349cc55cSDimitry Andric   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
75280b57cec5SDimitry Andric                                             (int)(((C)<<2) | (B)), \
75290b57cec5SDimitry Andric                                             (__v16sf)_mm512_setzero_ps(), \
7530349cc55cSDimitry Andric                                             (__mmask16)(U), (int)(R)))
75310b57cec5SDimitry Andric 
75320b57cec5SDimitry Andric #define _mm512_getmant_ps(A, B, C) \
7533349cc55cSDimitry Andric   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
75340b57cec5SDimitry Andric                                             (int)(((C)<<2)|(B)), \
75350b57cec5SDimitry Andric                                             (__v16sf)_mm512_undefined_ps(), \
75360b57cec5SDimitry Andric                                             (__mmask16)-1, \
7537349cc55cSDimitry Andric                                             _MM_FROUND_CUR_DIRECTION))
75380b57cec5SDimitry Andric 
75390b57cec5SDimitry Andric #define _mm512_mask_getmant_ps(W, U, A, B, C) \
7540349cc55cSDimitry Andric   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
75410b57cec5SDimitry Andric                                             (int)(((C)<<2)|(B)), \
75420b57cec5SDimitry Andric                                             (__v16sf)(__m512)(W), \
75430b57cec5SDimitry Andric                                             (__mmask16)(U), \
7544349cc55cSDimitry Andric                                             _MM_FROUND_CUR_DIRECTION))
75450b57cec5SDimitry Andric 
75460b57cec5SDimitry Andric #define _mm512_maskz_getmant_ps(U, A, B, C) \
7547349cc55cSDimitry Andric   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
75480b57cec5SDimitry Andric                                             (int)(((C)<<2)|(B)), \
75490b57cec5SDimitry Andric                                             (__v16sf)_mm512_setzero_ps(), \
75500b57cec5SDimitry Andric                                             (__mmask16)(U), \
7551349cc55cSDimitry Andric                                             _MM_FROUND_CUR_DIRECTION))
75520b57cec5SDimitry Andric 
75530b57cec5SDimitry Andric #define _mm512_getexp_round_pd(A, R) \
7554349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
75550b57cec5SDimitry Andric                                             (__v8df)_mm512_undefined_pd(), \
7556349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
75570b57cec5SDimitry Andric 
75580b57cec5SDimitry Andric #define _mm512_mask_getexp_round_pd(W, U, A, R) \
7559349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
75600b57cec5SDimitry Andric                                             (__v8df)(__m512d)(W), \
7561349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
75620b57cec5SDimitry Andric 
75630b57cec5SDimitry Andric #define _mm512_maskz_getexp_round_pd(U, A, R) \
7564349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
75650b57cec5SDimitry Andric                                             (__v8df)_mm512_setzero_pd(), \
7566349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
75670b57cec5SDimitry Andric 
75680b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
75690b57cec5SDimitry Andric _mm512_getexp_pd (__m512d __A)
75700b57cec5SDimitry Andric {
75710b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
75720b57cec5SDimitry Andric                 (__v8df) _mm512_undefined_pd (),
75730b57cec5SDimitry Andric                 (__mmask8) -1,
75740b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
75750b57cec5SDimitry Andric }
75760b57cec5SDimitry Andric 
75770b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
75780b57cec5SDimitry Andric _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
75790b57cec5SDimitry Andric {
75800b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
75810b57cec5SDimitry Andric                 (__v8df) __W,
75820b57cec5SDimitry Andric                 (__mmask8) __U,
75830b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
75840b57cec5SDimitry Andric }
75850b57cec5SDimitry Andric 
75860b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
75870b57cec5SDimitry Andric _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
75880b57cec5SDimitry Andric {
75890b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
75900b57cec5SDimitry Andric                 (__v8df) _mm512_setzero_pd (),
75910b57cec5SDimitry Andric                 (__mmask8) __U,
75920b57cec5SDimitry Andric                 _MM_FROUND_CUR_DIRECTION);
75930b57cec5SDimitry Andric }
75940b57cec5SDimitry Andric 
75950b57cec5SDimitry Andric #define _mm512_getexp_round_ps(A, R) \
7596349cc55cSDimitry Andric   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
75970b57cec5SDimitry Andric                                            (__v16sf)_mm512_undefined_ps(), \
7598349cc55cSDimitry Andric                                            (__mmask16)-1, (int)(R)))
75990b57cec5SDimitry Andric 
76000b57cec5SDimitry Andric #define _mm512_mask_getexp_round_ps(W, U, A, R) \
7601349cc55cSDimitry Andric   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
76020b57cec5SDimitry Andric                                            (__v16sf)(__m512)(W), \
7603349cc55cSDimitry Andric                                            (__mmask16)(U), (int)(R)))
76040b57cec5SDimitry Andric 
76050b57cec5SDimitry Andric #define _mm512_maskz_getexp_round_ps(U, A, R) \
7606349cc55cSDimitry Andric   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
76070b57cec5SDimitry Andric                                            (__v16sf)_mm512_setzero_ps(), \
7608349cc55cSDimitry Andric                                            (__mmask16)(U), (int)(R)))
76090b57cec5SDimitry Andric 
76100b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
76110b57cec5SDimitry Andric _mm512_getexp_ps (__m512 __A)
76120b57cec5SDimitry Andric {
76130b57cec5SDimitry Andric   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
76140b57cec5SDimitry Andric                (__v16sf) _mm512_undefined_ps (),
76150b57cec5SDimitry Andric                (__mmask16) -1,
76160b57cec5SDimitry Andric                _MM_FROUND_CUR_DIRECTION);
76170b57cec5SDimitry Andric }
76180b57cec5SDimitry Andric 
76190b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
76200b57cec5SDimitry Andric _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
76210b57cec5SDimitry Andric {
76220b57cec5SDimitry Andric   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
76230b57cec5SDimitry Andric                (__v16sf) __W,
76240b57cec5SDimitry Andric                (__mmask16) __U,
76250b57cec5SDimitry Andric                _MM_FROUND_CUR_DIRECTION);
76260b57cec5SDimitry Andric }
76270b57cec5SDimitry Andric 
76280b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
76290b57cec5SDimitry Andric _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
76300b57cec5SDimitry Andric {
76310b57cec5SDimitry Andric   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
76320b57cec5SDimitry Andric                (__v16sf) _mm512_setzero_ps (),
76330b57cec5SDimitry Andric                (__mmask16) __U,
76340b57cec5SDimitry Andric                _MM_FROUND_CUR_DIRECTION);
76350b57cec5SDimitry Andric }
76360b57cec5SDimitry Andric 
76370b57cec5SDimitry Andric #define _mm512_i64gather_ps(index, addr, scale) \
7638349cc55cSDimitry Andric   ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
76390b57cec5SDimitry Andric                                         (void const *)(addr), \
76400b57cec5SDimitry Andric                                         (__v8di)(__m512i)(index), (__mmask8)-1, \
7641349cc55cSDimitry Andric                                         (int)(scale)))
76420b57cec5SDimitry Andric 
76430b57cec5SDimitry Andric #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7644349cc55cSDimitry Andric   ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
76450b57cec5SDimitry Andric                                         (void const *)(addr), \
76460b57cec5SDimitry Andric                                         (__v8di)(__m512i)(index), \
7647349cc55cSDimitry Andric                                         (__mmask8)(mask), (int)(scale)))
76480b57cec5SDimitry Andric 
76490b57cec5SDimitry Andric #define _mm512_i64gather_epi32(index, addr, scale) \
7650349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
76510b57cec5SDimitry Andric                                          (void const *)(addr), \
76520b57cec5SDimitry Andric                                          (__v8di)(__m512i)(index), \
7653349cc55cSDimitry Andric                                          (__mmask8)-1, (int)(scale)))
76540b57cec5SDimitry Andric 
76550b57cec5SDimitry Andric #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7656349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
76570b57cec5SDimitry Andric                                          (void const *)(addr), \
76580b57cec5SDimitry Andric                                          (__v8di)(__m512i)(index), \
7659349cc55cSDimitry Andric                                          (__mmask8)(mask), (int)(scale)))
76600b57cec5SDimitry Andric 
76610b57cec5SDimitry Andric #define _mm512_i64gather_pd(index, addr, scale) \
7662349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
76630b57cec5SDimitry Andric                                         (void const *)(addr), \
76640b57cec5SDimitry Andric                                         (__v8di)(__m512i)(index), (__mmask8)-1, \
7665349cc55cSDimitry Andric                                         (int)(scale)))
76660b57cec5SDimitry Andric 
76670b57cec5SDimitry Andric #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7668349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
76690b57cec5SDimitry Andric                                         (void const *)(addr), \
76700b57cec5SDimitry Andric                                         (__v8di)(__m512i)(index), \
7671349cc55cSDimitry Andric                                         (__mmask8)(mask), (int)(scale)))
76720b57cec5SDimitry Andric 
76730b57cec5SDimitry Andric #define _mm512_i64gather_epi64(index, addr, scale) \
7674349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
76750b57cec5SDimitry Andric                                         (void const *)(addr), \
76760b57cec5SDimitry Andric                                         (__v8di)(__m512i)(index), (__mmask8)-1, \
7677349cc55cSDimitry Andric                                         (int)(scale)))
76780b57cec5SDimitry Andric 
76790b57cec5SDimitry Andric #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7680349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
76810b57cec5SDimitry Andric                                         (void const *)(addr), \
76820b57cec5SDimitry Andric                                         (__v8di)(__m512i)(index), \
7683349cc55cSDimitry Andric                                         (__mmask8)(mask), (int)(scale)))
76840b57cec5SDimitry Andric 
76850b57cec5SDimitry Andric #define _mm512_i32gather_ps(index, addr, scale) \
7686349cc55cSDimitry Andric   ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
76870b57cec5SDimitry Andric                                         (void const *)(addr), \
7688a7dea167SDimitry Andric                                         (__v16si)(__m512)(index), \
7689349cc55cSDimitry Andric                                         (__mmask16)-1, (int)(scale)))
76900b57cec5SDimitry Andric 
76910b57cec5SDimitry Andric #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7692349cc55cSDimitry Andric   ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
76930b57cec5SDimitry Andric                                         (void const *)(addr), \
7694a7dea167SDimitry Andric                                         (__v16si)(__m512)(index), \
7695349cc55cSDimitry Andric                                         (__mmask16)(mask), (int)(scale)))
76960b57cec5SDimitry Andric 
76970b57cec5SDimitry Andric #define _mm512_i32gather_epi32(index, addr, scale) \
7698349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
76990b57cec5SDimitry Andric                                          (void const *)(addr), \
77000b57cec5SDimitry Andric                                          (__v16si)(__m512i)(index), \
7701349cc55cSDimitry Andric                                          (__mmask16)-1, (int)(scale)))
77020b57cec5SDimitry Andric 
77030b57cec5SDimitry Andric #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7704349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
77050b57cec5SDimitry Andric                                          (void const *)(addr), \
77060b57cec5SDimitry Andric                                          (__v16si)(__m512i)(index), \
7707349cc55cSDimitry Andric                                          (__mmask16)(mask), (int)(scale)))
77080b57cec5SDimitry Andric 
77090b57cec5SDimitry Andric #define _mm512_i32gather_pd(index, addr, scale) \
7710349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
77110b57cec5SDimitry Andric                                         (void const *)(addr), \
77120b57cec5SDimitry Andric                                         (__v8si)(__m256i)(index), (__mmask8)-1, \
7713349cc55cSDimitry Andric                                         (int)(scale)))
77140b57cec5SDimitry Andric 
77150b57cec5SDimitry Andric #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7716349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
77170b57cec5SDimitry Andric                                         (void const *)(addr), \
77180b57cec5SDimitry Andric                                         (__v8si)(__m256i)(index), \
7719349cc55cSDimitry Andric                                         (__mmask8)(mask), (int)(scale)))
77200b57cec5SDimitry Andric 
77210b57cec5SDimitry Andric #define _mm512_i32gather_epi64(index, addr, scale) \
7722349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
77230b57cec5SDimitry Andric                                         (void const *)(addr), \
77240b57cec5SDimitry Andric                                         (__v8si)(__m256i)(index), (__mmask8)-1, \
7725349cc55cSDimitry Andric                                         (int)(scale)))
77260b57cec5SDimitry Andric 
77270b57cec5SDimitry Andric #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7728349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
77290b57cec5SDimitry Andric                                         (void const *)(addr), \
77300b57cec5SDimitry Andric                                         (__v8si)(__m256i)(index), \
7731349cc55cSDimitry Andric                                         (__mmask8)(mask), (int)(scale)))
77320b57cec5SDimitry Andric 
77330b57cec5SDimitry Andric #define _mm512_i64scatter_ps(addr, index, v1, scale) \
77340b57cec5SDimitry Andric   __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
77350b57cec5SDimitry Andric                                 (__v8di)(__m512i)(index), \
77360b57cec5SDimitry Andric                                 (__v8sf)(__m256)(v1), (int)(scale))
77370b57cec5SDimitry Andric 
77380b57cec5SDimitry Andric #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
77390b57cec5SDimitry Andric   __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
77400b57cec5SDimitry Andric                                 (__v8di)(__m512i)(index), \
77410b57cec5SDimitry Andric                                 (__v8sf)(__m256)(v1), (int)(scale))
77420b57cec5SDimitry Andric 
77430b57cec5SDimitry Andric #define _mm512_i64scatter_epi32(addr, index, v1, scale) \
77440b57cec5SDimitry Andric   __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
77450b57cec5SDimitry Andric                                 (__v8di)(__m512i)(index), \
77460b57cec5SDimitry Andric                                 (__v8si)(__m256i)(v1), (int)(scale))
77470b57cec5SDimitry Andric 
77480b57cec5SDimitry Andric #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
77490b57cec5SDimitry Andric   __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
77500b57cec5SDimitry Andric                                 (__v8di)(__m512i)(index), \
77510b57cec5SDimitry Andric                                 (__v8si)(__m256i)(v1), (int)(scale))
77520b57cec5SDimitry Andric 
77530b57cec5SDimitry Andric #define _mm512_i64scatter_pd(addr, index, v1, scale) \
77540b57cec5SDimitry Andric   __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
77550b57cec5SDimitry Andric                                (__v8di)(__m512i)(index), \
77560b57cec5SDimitry Andric                                (__v8df)(__m512d)(v1), (int)(scale))
77570b57cec5SDimitry Andric 
77580b57cec5SDimitry Andric #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
77590b57cec5SDimitry Andric   __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
77600b57cec5SDimitry Andric                                (__v8di)(__m512i)(index), \
77610b57cec5SDimitry Andric                                (__v8df)(__m512d)(v1), (int)(scale))
77620b57cec5SDimitry Andric 
77630b57cec5SDimitry Andric #define _mm512_i64scatter_epi64(addr, index, v1, scale) \
77640b57cec5SDimitry Andric   __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
77650b57cec5SDimitry Andric                                (__v8di)(__m512i)(index), \
77660b57cec5SDimitry Andric                                (__v8di)(__m512i)(v1), (int)(scale))
77670b57cec5SDimitry Andric 
77680b57cec5SDimitry Andric #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
77690b57cec5SDimitry Andric   __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
77700b57cec5SDimitry Andric                                (__v8di)(__m512i)(index), \
77710b57cec5SDimitry Andric                                (__v8di)(__m512i)(v1), (int)(scale))
77720b57cec5SDimitry Andric 
77730b57cec5SDimitry Andric #define _mm512_i32scatter_ps(addr, index, v1, scale) \
77740b57cec5SDimitry Andric   __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
77750b57cec5SDimitry Andric                                 (__v16si)(__m512i)(index), \
77760b57cec5SDimitry Andric                                 (__v16sf)(__m512)(v1), (int)(scale))
77770b57cec5SDimitry Andric 
77780b57cec5SDimitry Andric #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
77790b57cec5SDimitry Andric   __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
77800b57cec5SDimitry Andric                                 (__v16si)(__m512i)(index), \
77810b57cec5SDimitry Andric                                 (__v16sf)(__m512)(v1), (int)(scale))
77820b57cec5SDimitry Andric 
77830b57cec5SDimitry Andric #define _mm512_i32scatter_epi32(addr, index, v1, scale) \
77840b57cec5SDimitry Andric   __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
77850b57cec5SDimitry Andric                                 (__v16si)(__m512i)(index), \
77860b57cec5SDimitry Andric                                 (__v16si)(__m512i)(v1), (int)(scale))
77870b57cec5SDimitry Andric 
77880b57cec5SDimitry Andric #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
77890b57cec5SDimitry Andric   __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
77900b57cec5SDimitry Andric                                 (__v16si)(__m512i)(index), \
77910b57cec5SDimitry Andric                                 (__v16si)(__m512i)(v1), (int)(scale))
77920b57cec5SDimitry Andric 
77930b57cec5SDimitry Andric #define _mm512_i32scatter_pd(addr, index, v1, scale) \
77940b57cec5SDimitry Andric   __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
77950b57cec5SDimitry Andric                                (__v8si)(__m256i)(index), \
77960b57cec5SDimitry Andric                                (__v8df)(__m512d)(v1), (int)(scale))
77970b57cec5SDimitry Andric 
77980b57cec5SDimitry Andric #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
77990b57cec5SDimitry Andric   __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
78000b57cec5SDimitry Andric                                (__v8si)(__m256i)(index), \
78010b57cec5SDimitry Andric                                (__v8df)(__m512d)(v1), (int)(scale))
78020b57cec5SDimitry Andric 
78030b57cec5SDimitry Andric #define _mm512_i32scatter_epi64(addr, index, v1, scale) \
78040b57cec5SDimitry Andric   __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
78050b57cec5SDimitry Andric                                (__v8si)(__m256i)(index), \
78060b57cec5SDimitry Andric                                (__v8di)(__m512i)(v1), (int)(scale))
78070b57cec5SDimitry Andric 
78080b57cec5SDimitry Andric #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
78090b57cec5SDimitry Andric   __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
78100b57cec5SDimitry Andric                                (__v8si)(__m256i)(index), \
78110b57cec5SDimitry Andric                                (__v8di)(__m512i)(v1), (int)(scale))
78120b57cec5SDimitry Andric 
78130b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
78140b57cec5SDimitry Andric _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
78150b57cec5SDimitry Andric {
78160b57cec5SDimitry Andric   return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
78170b57cec5SDimitry Andric                                        (__v4sf)__A,
78180b57cec5SDimitry Andric                                        (__v4sf)__B,
78190b57cec5SDimitry Andric                                        (__mmask8)__U,
78200b57cec5SDimitry Andric                                        _MM_FROUND_CUR_DIRECTION);
78210b57cec5SDimitry Andric }
78220b57cec5SDimitry Andric 
78230b57cec5SDimitry Andric #define _mm_fmadd_round_ss(A, B, C, R) \
7824349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
78250b57cec5SDimitry Andric                                          (__v4sf)(__m128)(B), \
78260b57cec5SDimitry Andric                                          (__v4sf)(__m128)(C), (__mmask8)-1, \
7827349cc55cSDimitry Andric                                          (int)(R)))
78280b57cec5SDimitry Andric 
78290b57cec5SDimitry Andric #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7830349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
78310b57cec5SDimitry Andric                                          (__v4sf)(__m128)(A), \
78320b57cec5SDimitry Andric                                          (__v4sf)(__m128)(B), (__mmask8)(U), \
7833349cc55cSDimitry Andric                                          (int)(R)))
78340b57cec5SDimitry Andric 
78350b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
78360b57cec5SDimitry Andric _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
78370b57cec5SDimitry Andric {
78380b57cec5SDimitry Andric   return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
78390b57cec5SDimitry Andric                                         (__v4sf)__B,
78400b57cec5SDimitry Andric                                         (__v4sf)__C,
78410b57cec5SDimitry Andric                                         (__mmask8)__U,
78420b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
78430b57cec5SDimitry Andric }
78440b57cec5SDimitry Andric 
78450b57cec5SDimitry Andric #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7846349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
78470b57cec5SDimitry Andric                                           (__v4sf)(__m128)(B), \
78480b57cec5SDimitry Andric                                           (__v4sf)(__m128)(C), (__mmask8)(U), \
7849349cc55cSDimitry Andric                                           (int)(R)))
78500b57cec5SDimitry Andric 
78510b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
78520b57cec5SDimitry Andric _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
78530b57cec5SDimitry Andric {
78540b57cec5SDimitry Andric   return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
78550b57cec5SDimitry Andric                                         (__v4sf)__X,
78560b57cec5SDimitry Andric                                         (__v4sf)__Y,
78570b57cec5SDimitry Andric                                         (__mmask8)__U,
78580b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
78590b57cec5SDimitry Andric }
78600b57cec5SDimitry Andric 
78610b57cec5SDimitry Andric #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7862349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
78630b57cec5SDimitry Andric                                           (__v4sf)(__m128)(X), \
78640b57cec5SDimitry Andric                                           (__v4sf)(__m128)(Y), (__mmask8)(U), \
7865349cc55cSDimitry Andric                                           (int)(R)))
78660b57cec5SDimitry Andric 
78670b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
78680b57cec5SDimitry Andric _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
78690b57cec5SDimitry Andric {
78700b57cec5SDimitry Andric   return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
78710b57cec5SDimitry Andric                                        (__v4sf)__A,
78720b57cec5SDimitry Andric                                        -(__v4sf)__B,
78730b57cec5SDimitry Andric                                        (__mmask8)__U,
78740b57cec5SDimitry Andric                                        _MM_FROUND_CUR_DIRECTION);
78750b57cec5SDimitry Andric }
78760b57cec5SDimitry Andric 
78770b57cec5SDimitry Andric #define _mm_fmsub_round_ss(A, B, C, R) \
7878349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
78790b57cec5SDimitry Andric                                          (__v4sf)(__m128)(B), \
78800b57cec5SDimitry Andric                                          -(__v4sf)(__m128)(C), (__mmask8)-1, \
7881349cc55cSDimitry Andric                                          (int)(R)))
78820b57cec5SDimitry Andric 
78830b57cec5SDimitry Andric #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7884349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
78850b57cec5SDimitry Andric                                          (__v4sf)(__m128)(A), \
78860b57cec5SDimitry Andric                                          -(__v4sf)(__m128)(B), (__mmask8)(U), \
7887349cc55cSDimitry Andric                                          (int)(R)))
78880b57cec5SDimitry Andric 
78890b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
78900b57cec5SDimitry Andric _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
78910b57cec5SDimitry Andric {
78920b57cec5SDimitry Andric   return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
78930b57cec5SDimitry Andric                                         (__v4sf)__B,
78940b57cec5SDimitry Andric                                         -(__v4sf)__C,
78950b57cec5SDimitry Andric                                         (__mmask8)__U,
78960b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
78970b57cec5SDimitry Andric }
78980b57cec5SDimitry Andric 
78990b57cec5SDimitry Andric #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7900349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
79010b57cec5SDimitry Andric                                           (__v4sf)(__m128)(B), \
79020b57cec5SDimitry Andric                                           -(__v4sf)(__m128)(C), (__mmask8)(U), \
7903349cc55cSDimitry Andric                                           (int)(R)))
79040b57cec5SDimitry Andric 
79050b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
79060b57cec5SDimitry Andric _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
79070b57cec5SDimitry Andric {
79080b57cec5SDimitry Andric   return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
79090b57cec5SDimitry Andric                                         (__v4sf)__X,
79100b57cec5SDimitry Andric                                         (__v4sf)__Y,
79110b57cec5SDimitry Andric                                         (__mmask8)__U,
79120b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
79130b57cec5SDimitry Andric }
79140b57cec5SDimitry Andric 
79150b57cec5SDimitry Andric #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7916349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
79170b57cec5SDimitry Andric                                           (__v4sf)(__m128)(X), \
79180b57cec5SDimitry Andric                                           (__v4sf)(__m128)(Y), (__mmask8)(U), \
7919349cc55cSDimitry Andric                                           (int)(R)))
79200b57cec5SDimitry Andric 
79210b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
79220b57cec5SDimitry Andric _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
79230b57cec5SDimitry Andric {
79240b57cec5SDimitry Andric   return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
79250b57cec5SDimitry Andric                                        -(__v4sf)__A,
79260b57cec5SDimitry Andric                                        (__v4sf)__B,
79270b57cec5SDimitry Andric                                        (__mmask8)__U,
79280b57cec5SDimitry Andric                                        _MM_FROUND_CUR_DIRECTION);
79290b57cec5SDimitry Andric }
79300b57cec5SDimitry Andric 
79310b57cec5SDimitry Andric #define _mm_fnmadd_round_ss(A, B, C, R) \
7932349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
79330b57cec5SDimitry Andric                                          -(__v4sf)(__m128)(B), \
79340b57cec5SDimitry Andric                                          (__v4sf)(__m128)(C), (__mmask8)-1, \
7935349cc55cSDimitry Andric                                          (int)(R)))
79360b57cec5SDimitry Andric 
79370b57cec5SDimitry Andric #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7938349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
79390b57cec5SDimitry Andric                                          -(__v4sf)(__m128)(A), \
79400b57cec5SDimitry Andric                                          (__v4sf)(__m128)(B), (__mmask8)(U), \
7941349cc55cSDimitry Andric                                          (int)(R)))
79420b57cec5SDimitry Andric 
79430b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
79440b57cec5SDimitry Andric _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
79450b57cec5SDimitry Andric {
79460b57cec5SDimitry Andric   return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
79470b57cec5SDimitry Andric                                         -(__v4sf)__B,
79480b57cec5SDimitry Andric                                         (__v4sf)__C,
79490b57cec5SDimitry Andric                                         (__mmask8)__U,
79500b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
79510b57cec5SDimitry Andric }
79520b57cec5SDimitry Andric 
79530b57cec5SDimitry Andric #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7954349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
79550b57cec5SDimitry Andric                                           -(__v4sf)(__m128)(B), \
79560b57cec5SDimitry Andric                                           (__v4sf)(__m128)(C), (__mmask8)(U), \
7957349cc55cSDimitry Andric                                           (int)(R)))
79580b57cec5SDimitry Andric 
79590b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
79600b57cec5SDimitry Andric _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
79610b57cec5SDimitry Andric {
79620b57cec5SDimitry Andric   return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
79630b57cec5SDimitry Andric                                         -(__v4sf)__X,
79640b57cec5SDimitry Andric                                         (__v4sf)__Y,
79650b57cec5SDimitry Andric                                         (__mmask8)__U,
79660b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
79670b57cec5SDimitry Andric }
79680b57cec5SDimitry Andric 
79690b57cec5SDimitry Andric #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7970349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
79710b57cec5SDimitry Andric                                           -(__v4sf)(__m128)(X), \
79720b57cec5SDimitry Andric                                           (__v4sf)(__m128)(Y), (__mmask8)(U), \
7973349cc55cSDimitry Andric                                           (int)(R)))
79740b57cec5SDimitry Andric 
79750b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
79760b57cec5SDimitry Andric _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
79770b57cec5SDimitry Andric {
79780b57cec5SDimitry Andric   return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
79790b57cec5SDimitry Andric                                        -(__v4sf)__A,
79800b57cec5SDimitry Andric                                        -(__v4sf)__B,
79810b57cec5SDimitry Andric                                        (__mmask8)__U,
79820b57cec5SDimitry Andric                                        _MM_FROUND_CUR_DIRECTION);
79830b57cec5SDimitry Andric }
79840b57cec5SDimitry Andric 
79850b57cec5SDimitry Andric #define _mm_fnmsub_round_ss(A, B, C, R) \
7986349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
79870b57cec5SDimitry Andric                                          -(__v4sf)(__m128)(B), \
79880b57cec5SDimitry Andric                                          -(__v4sf)(__m128)(C), (__mmask8)-1, \
7989349cc55cSDimitry Andric                                          (int)(R)))
79900b57cec5SDimitry Andric 
79910b57cec5SDimitry Andric #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7992349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
79930b57cec5SDimitry Andric                                          -(__v4sf)(__m128)(A), \
79940b57cec5SDimitry Andric                                          -(__v4sf)(__m128)(B), (__mmask8)(U), \
7995349cc55cSDimitry Andric                                          (int)(R)))
79960b57cec5SDimitry Andric 
79970b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
79980b57cec5SDimitry Andric _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
79990b57cec5SDimitry Andric {
80000b57cec5SDimitry Andric   return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
80010b57cec5SDimitry Andric                                         -(__v4sf)__B,
80020b57cec5SDimitry Andric                                         -(__v4sf)__C,
80030b57cec5SDimitry Andric                                         (__mmask8)__U,
80040b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
80050b57cec5SDimitry Andric }
80060b57cec5SDimitry Andric 
80070b57cec5SDimitry Andric #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
8008349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
80090b57cec5SDimitry Andric                                           -(__v4sf)(__m128)(B), \
80100b57cec5SDimitry Andric                                           -(__v4sf)(__m128)(C), (__mmask8)(U), \
8011349cc55cSDimitry Andric                                           (int)(R)))
80120b57cec5SDimitry Andric 
80130b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
80140b57cec5SDimitry Andric _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
80150b57cec5SDimitry Andric {
80160b57cec5SDimitry Andric   return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
80170b57cec5SDimitry Andric                                         -(__v4sf)__X,
80180b57cec5SDimitry Andric                                         (__v4sf)__Y,
80190b57cec5SDimitry Andric                                         (__mmask8)__U,
80200b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
80210b57cec5SDimitry Andric }
80220b57cec5SDimitry Andric 
80230b57cec5SDimitry Andric #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
8024349cc55cSDimitry Andric   ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
80250b57cec5SDimitry Andric                                           -(__v4sf)(__m128)(X), \
80260b57cec5SDimitry Andric                                           (__v4sf)(__m128)(Y), (__mmask8)(U), \
8027349cc55cSDimitry Andric                                           (int)(R)))
80280b57cec5SDimitry Andric 
80290b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
80300b57cec5SDimitry Andric _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
80310b57cec5SDimitry Andric {
80320b57cec5SDimitry Andric   return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
80330b57cec5SDimitry Andric                                        (__v2df)__A,
80340b57cec5SDimitry Andric                                        (__v2df)__B,
80350b57cec5SDimitry Andric                                        (__mmask8)__U,
80360b57cec5SDimitry Andric                                        _MM_FROUND_CUR_DIRECTION);
80370b57cec5SDimitry Andric }
80380b57cec5SDimitry Andric 
80390b57cec5SDimitry Andric #define _mm_fmadd_round_sd(A, B, C, R) \
8040349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
80410b57cec5SDimitry Andric                                           (__v2df)(__m128d)(B), \
80420b57cec5SDimitry Andric                                           (__v2df)(__m128d)(C), (__mmask8)-1, \
8043349cc55cSDimitry Andric                                           (int)(R)))
80440b57cec5SDimitry Andric 
80450b57cec5SDimitry Andric #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
8046349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
80470b57cec5SDimitry Andric                                           (__v2df)(__m128d)(A), \
80480b57cec5SDimitry Andric                                           (__v2df)(__m128d)(B), (__mmask8)(U), \
8049349cc55cSDimitry Andric                                           (int)(R)))
80500b57cec5SDimitry Andric 
80510b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
80520b57cec5SDimitry Andric _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
80530b57cec5SDimitry Andric {
80540b57cec5SDimitry Andric   return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
80550b57cec5SDimitry Andric                                         (__v2df)__B,
80560b57cec5SDimitry Andric                                         (__v2df)__C,
80570b57cec5SDimitry Andric                                         (__mmask8)__U,
80580b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
80590b57cec5SDimitry Andric }
80600b57cec5SDimitry Andric 
80610b57cec5SDimitry Andric #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
8062349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
80630b57cec5SDimitry Andric                                            (__v2df)(__m128d)(B), \
80640b57cec5SDimitry Andric                                            (__v2df)(__m128d)(C), (__mmask8)(U), \
8065349cc55cSDimitry Andric                                            (int)(R)))
80660b57cec5SDimitry Andric 
80670b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
80680b57cec5SDimitry Andric _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
80690b57cec5SDimitry Andric {
80700b57cec5SDimitry Andric   return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
80710b57cec5SDimitry Andric                                         (__v2df)__X,
80720b57cec5SDimitry Andric                                         (__v2df)__Y,
80730b57cec5SDimitry Andric                                         (__mmask8)__U,
80740b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
80750b57cec5SDimitry Andric }
80760b57cec5SDimitry Andric 
80770b57cec5SDimitry Andric #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
8078349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
80790b57cec5SDimitry Andric                                            (__v2df)(__m128d)(X), \
80800b57cec5SDimitry Andric                                            (__v2df)(__m128d)(Y), (__mmask8)(U), \
8081349cc55cSDimitry Andric                                            (int)(R)))
80820b57cec5SDimitry Andric 
80830b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
80840b57cec5SDimitry Andric _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
80850b57cec5SDimitry Andric {
80860b57cec5SDimitry Andric   return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
80870b57cec5SDimitry Andric                                        (__v2df)__A,
80880b57cec5SDimitry Andric                                        -(__v2df)__B,
80890b57cec5SDimitry Andric                                        (__mmask8)__U,
80900b57cec5SDimitry Andric                                        _MM_FROUND_CUR_DIRECTION);
80910b57cec5SDimitry Andric }
80920b57cec5SDimitry Andric 
80930b57cec5SDimitry Andric #define _mm_fmsub_round_sd(A, B, C, R) \
8094349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
80950b57cec5SDimitry Andric                                           (__v2df)(__m128d)(B), \
80960b57cec5SDimitry Andric                                           -(__v2df)(__m128d)(C), (__mmask8)-1, \
8097349cc55cSDimitry Andric                                           (int)(R)))
80980b57cec5SDimitry Andric 
80990b57cec5SDimitry Andric #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
8100349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
81010b57cec5SDimitry Andric                                           (__v2df)(__m128d)(A), \
81020b57cec5SDimitry Andric                                           -(__v2df)(__m128d)(B), (__mmask8)(U), \
8103349cc55cSDimitry Andric                                           (int)(R)))
81040b57cec5SDimitry Andric 
81050b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
81060b57cec5SDimitry Andric _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
81070b57cec5SDimitry Andric {
81080b57cec5SDimitry Andric   return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
81090b57cec5SDimitry Andric                                         (__v2df)__B,
81100b57cec5SDimitry Andric                                         -(__v2df)__C,
81110b57cec5SDimitry Andric                                         (__mmask8)__U,
81120b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
81130b57cec5SDimitry Andric }
81140b57cec5SDimitry Andric 
81150b57cec5SDimitry Andric #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
8116349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
81170b57cec5SDimitry Andric                                            (__v2df)(__m128d)(B), \
81180b57cec5SDimitry Andric                                            -(__v2df)(__m128d)(C), \
8119349cc55cSDimitry Andric                                            (__mmask8)(U), (int)(R)))
81200b57cec5SDimitry Andric 
81210b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
81220b57cec5SDimitry Andric _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
81230b57cec5SDimitry Andric {
81240b57cec5SDimitry Andric   return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
81250b57cec5SDimitry Andric                                         (__v2df)__X,
81260b57cec5SDimitry Andric                                         (__v2df)__Y,
81270b57cec5SDimitry Andric                                         (__mmask8)__U,
81280b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
81290b57cec5SDimitry Andric }
81300b57cec5SDimitry Andric 
81310b57cec5SDimitry Andric #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
8132349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
81330b57cec5SDimitry Andric                                            (__v2df)(__m128d)(X), \
81340b57cec5SDimitry Andric                                            (__v2df)(__m128d)(Y), \
8135349cc55cSDimitry Andric                                            (__mmask8)(U), (int)(R)))
81360b57cec5SDimitry Andric 
81370b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
81380b57cec5SDimitry Andric _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
81390b57cec5SDimitry Andric {
81400b57cec5SDimitry Andric   return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
81410b57cec5SDimitry Andric                                        -(__v2df)__A,
81420b57cec5SDimitry Andric                                        (__v2df)__B,
81430b57cec5SDimitry Andric                                        (__mmask8)__U,
81440b57cec5SDimitry Andric                                        _MM_FROUND_CUR_DIRECTION);
81450b57cec5SDimitry Andric }
81460b57cec5SDimitry Andric 
81470b57cec5SDimitry Andric #define _mm_fnmadd_round_sd(A, B, C, R) \
8148349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
81490b57cec5SDimitry Andric                                           -(__v2df)(__m128d)(B), \
81500b57cec5SDimitry Andric                                           (__v2df)(__m128d)(C), (__mmask8)-1, \
8151349cc55cSDimitry Andric                                           (int)(R)))
81520b57cec5SDimitry Andric 
81530b57cec5SDimitry Andric #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
8154349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
81550b57cec5SDimitry Andric                                           -(__v2df)(__m128d)(A), \
81560b57cec5SDimitry Andric                                           (__v2df)(__m128d)(B), (__mmask8)(U), \
8157349cc55cSDimitry Andric                                           (int)(R)))
81580b57cec5SDimitry Andric 
81590b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
81600b57cec5SDimitry Andric _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
81610b57cec5SDimitry Andric {
81620b57cec5SDimitry Andric   return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
81630b57cec5SDimitry Andric                                         -(__v2df)__B,
81640b57cec5SDimitry Andric                                         (__v2df)__C,
81650b57cec5SDimitry Andric                                         (__mmask8)__U,
81660b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
81670b57cec5SDimitry Andric }
81680b57cec5SDimitry Andric 
81690b57cec5SDimitry Andric #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
8170349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
81710b57cec5SDimitry Andric                                            -(__v2df)(__m128d)(B), \
81720b57cec5SDimitry Andric                                            (__v2df)(__m128d)(C), (__mmask8)(U), \
8173349cc55cSDimitry Andric                                            (int)(R)))
81740b57cec5SDimitry Andric 
81750b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
81760b57cec5SDimitry Andric _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
81770b57cec5SDimitry Andric {
81780b57cec5SDimitry Andric   return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
81790b57cec5SDimitry Andric                                         -(__v2df)__X,
81800b57cec5SDimitry Andric                                         (__v2df)__Y,
81810b57cec5SDimitry Andric                                         (__mmask8)__U,
81820b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
81830b57cec5SDimitry Andric }
81840b57cec5SDimitry Andric 
81850b57cec5SDimitry Andric #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
8186349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
81870b57cec5SDimitry Andric                                            -(__v2df)(__m128d)(X), \
81880b57cec5SDimitry Andric                                            (__v2df)(__m128d)(Y), (__mmask8)(U), \
8189349cc55cSDimitry Andric                                            (int)(R)))
81900b57cec5SDimitry Andric 
81910b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
81920b57cec5SDimitry Andric _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
81930b57cec5SDimitry Andric {
81940b57cec5SDimitry Andric   return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
81950b57cec5SDimitry Andric                                        -(__v2df)__A,
81960b57cec5SDimitry Andric                                        -(__v2df)__B,
81970b57cec5SDimitry Andric                                        (__mmask8)__U,
81980b57cec5SDimitry Andric                                        _MM_FROUND_CUR_DIRECTION);
81990b57cec5SDimitry Andric }
82000b57cec5SDimitry Andric 
82010b57cec5SDimitry Andric #define _mm_fnmsub_round_sd(A, B, C, R) \
8202349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
82030b57cec5SDimitry Andric                                           -(__v2df)(__m128d)(B), \
82040b57cec5SDimitry Andric                                           -(__v2df)(__m128d)(C), (__mmask8)-1, \
8205349cc55cSDimitry Andric                                           (int)(R)))
82060b57cec5SDimitry Andric 
82070b57cec5SDimitry Andric #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
8208349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
82090b57cec5SDimitry Andric                                           -(__v2df)(__m128d)(A), \
82100b57cec5SDimitry Andric                                           -(__v2df)(__m128d)(B), (__mmask8)(U), \
8211349cc55cSDimitry Andric                                           (int)(R)))
82120b57cec5SDimitry Andric 
82130b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
82140b57cec5SDimitry Andric _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
82150b57cec5SDimitry Andric {
82160b57cec5SDimitry Andric   return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
82170b57cec5SDimitry Andric                                         -(__v2df)__B,
82180b57cec5SDimitry Andric                                         -(__v2df)__C,
82190b57cec5SDimitry Andric                                         (__mmask8)__U,
82200b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
82210b57cec5SDimitry Andric }
82220b57cec5SDimitry Andric 
82230b57cec5SDimitry Andric #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
8224349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
82250b57cec5SDimitry Andric                                            -(__v2df)(__m128d)(B), \
82260b57cec5SDimitry Andric                                            -(__v2df)(__m128d)(C), \
82270b57cec5SDimitry Andric                                            (__mmask8)(U), \
8228349cc55cSDimitry Andric                                            (int)(R)))
82290b57cec5SDimitry Andric 
82300b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
82310b57cec5SDimitry Andric _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
82320b57cec5SDimitry Andric {
82330b57cec5SDimitry Andric   return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
82340b57cec5SDimitry Andric                                         -(__v2df)__X,
82350b57cec5SDimitry Andric                                         (__v2df)__Y,
82360b57cec5SDimitry Andric                                         (__mmask8)__U,
82370b57cec5SDimitry Andric                                         _MM_FROUND_CUR_DIRECTION);
82380b57cec5SDimitry Andric }
82390b57cec5SDimitry Andric 
82400b57cec5SDimitry Andric #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
8241349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
82420b57cec5SDimitry Andric                                            -(__v2df)(__m128d)(X), \
82430b57cec5SDimitry Andric                                            (__v2df)(__m128d)(Y), \
8244349cc55cSDimitry Andric                                            (__mmask8)(U), (int)(R)))
82450b57cec5SDimitry Andric 
82460b57cec5SDimitry Andric #define _mm512_permutex_pd(X, C) \
8247349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
82480b57cec5SDimitry Andric 
82490b57cec5SDimitry Andric #define _mm512_mask_permutex_pd(W, U, X, C) \
8250349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
82510b57cec5SDimitry Andric                                         (__v8df)_mm512_permutex_pd((X), (C)), \
8252349cc55cSDimitry Andric                                         (__v8df)(__m512d)(W)))
82530b57cec5SDimitry Andric 
82540b57cec5SDimitry Andric #define _mm512_maskz_permutex_pd(U, X, C) \
8255349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
82560b57cec5SDimitry Andric                                         (__v8df)_mm512_permutex_pd((X), (C)), \
8257349cc55cSDimitry Andric                                         (__v8df)_mm512_setzero_pd()))
82580b57cec5SDimitry Andric 
82590b57cec5SDimitry Andric #define _mm512_permutex_epi64(X, C) \
8260349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
82610b57cec5SDimitry Andric 
82620b57cec5SDimitry Andric #define _mm512_mask_permutex_epi64(W, U, X, C) \
8263349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
82640b57cec5SDimitry Andric                                        (__v8di)_mm512_permutex_epi64((X), (C)), \
8265349cc55cSDimitry Andric                                        (__v8di)(__m512i)(W)))
82660b57cec5SDimitry Andric 
82670b57cec5SDimitry Andric #define _mm512_maskz_permutex_epi64(U, X, C) \
8268349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
82690b57cec5SDimitry Andric                                        (__v8di)_mm512_permutex_epi64((X), (C)), \
8270349cc55cSDimitry Andric                                        (__v8di)_mm512_setzero_si512()))
82710b57cec5SDimitry Andric 
82720b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
82730b57cec5SDimitry Andric _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
82740b57cec5SDimitry Andric {
82750b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
82760b57cec5SDimitry Andric }
82770b57cec5SDimitry Andric 
82780b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
82790b57cec5SDimitry Andric _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
82800b57cec5SDimitry Andric {
82810b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
82820b57cec5SDimitry Andric                                         (__v8df)_mm512_permutexvar_pd(__X, __Y),
82830b57cec5SDimitry Andric                                         (__v8df)__W);
82840b57cec5SDimitry Andric }
82850b57cec5SDimitry Andric 
82860b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
82870b57cec5SDimitry Andric _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
82880b57cec5SDimitry Andric {
82890b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
82900b57cec5SDimitry Andric                                         (__v8df)_mm512_permutexvar_pd(__X, __Y),
82910b57cec5SDimitry Andric                                         (__v8df)_mm512_setzero_pd());
82920b57cec5SDimitry Andric }
82930b57cec5SDimitry Andric 
82940b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
82950b57cec5SDimitry Andric _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
82960b57cec5SDimitry Andric {
82970b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
82980b57cec5SDimitry Andric }
82990b57cec5SDimitry Andric 
83000b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
83010b57cec5SDimitry Andric _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
83020b57cec5SDimitry Andric {
83030b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
83040b57cec5SDimitry Andric                                      (__v8di)_mm512_permutexvar_epi64(__X, __Y),
83050b57cec5SDimitry Andric                                      (__v8di)_mm512_setzero_si512());
83060b57cec5SDimitry Andric }
83070b57cec5SDimitry Andric 
83080b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
83090b57cec5SDimitry Andric _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
83100b57cec5SDimitry Andric              __m512i __Y)
83110b57cec5SDimitry Andric {
83120b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
83130b57cec5SDimitry Andric                                      (__v8di)_mm512_permutexvar_epi64(__X, __Y),
83140b57cec5SDimitry Andric                                      (__v8di)__W);
83150b57cec5SDimitry Andric }
83160b57cec5SDimitry Andric 
83170b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
83180b57cec5SDimitry Andric _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
83190b57cec5SDimitry Andric {
83200b57cec5SDimitry Andric   return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
83210b57cec5SDimitry Andric }
83220b57cec5SDimitry Andric 
83230b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
83240b57cec5SDimitry Andric _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
83250b57cec5SDimitry Andric {
83260b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
83270b57cec5SDimitry Andric                                        (__v16sf)_mm512_permutexvar_ps(__X, __Y),
83280b57cec5SDimitry Andric                                        (__v16sf)__W);
83290b57cec5SDimitry Andric }
83300b57cec5SDimitry Andric 
83310b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
83320b57cec5SDimitry Andric _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
83330b57cec5SDimitry Andric {
83340b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
83350b57cec5SDimitry Andric                                        (__v16sf)_mm512_permutexvar_ps(__X, __Y),
83360b57cec5SDimitry Andric                                        (__v16sf)_mm512_setzero_ps());
83370b57cec5SDimitry Andric }
83380b57cec5SDimitry Andric 
83390b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
83400b57cec5SDimitry Andric _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
83410b57cec5SDimitry Andric {
83420b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
83430b57cec5SDimitry Andric }
83440b57cec5SDimitry Andric 
83450b57cec5SDimitry Andric #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
83460b57cec5SDimitry Andric 
83470b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
83480b57cec5SDimitry Andric _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
83490b57cec5SDimitry Andric {
83500b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
83510b57cec5SDimitry Andric                                     (__v16si)_mm512_permutexvar_epi32(__X, __Y),
83520b57cec5SDimitry Andric                                     (__v16si)_mm512_setzero_si512());
83530b57cec5SDimitry Andric }
83540b57cec5SDimitry Andric 
83550b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
83560b57cec5SDimitry Andric _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
83570b57cec5SDimitry Andric              __m512i __Y)
83580b57cec5SDimitry Andric {
83590b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
83600b57cec5SDimitry Andric                                     (__v16si)_mm512_permutexvar_epi32(__X, __Y),
83610b57cec5SDimitry Andric                                     (__v16si)__W);
83620b57cec5SDimitry Andric }
83630b57cec5SDimitry Andric 
83640b57cec5SDimitry Andric #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
83650b57cec5SDimitry Andric 
83660b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS
83670b57cec5SDimitry Andric _mm512_kand (__mmask16 __A, __mmask16 __B)
83680b57cec5SDimitry Andric {
83690b57cec5SDimitry Andric   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
83700b57cec5SDimitry Andric }
83710b57cec5SDimitry Andric 
83720b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS
83730b57cec5SDimitry Andric _mm512_kandn (__mmask16 __A, __mmask16 __B)
83740b57cec5SDimitry Andric {
83750b57cec5SDimitry Andric   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
83760b57cec5SDimitry Andric }
83770b57cec5SDimitry Andric 
83780b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS
83790b57cec5SDimitry Andric _mm512_kor (__mmask16 __A, __mmask16 __B)
83800b57cec5SDimitry Andric {
83810b57cec5SDimitry Andric   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
83820b57cec5SDimitry Andric }
83830b57cec5SDimitry Andric 
83840b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
83850b57cec5SDimitry Andric _mm512_kortestc (__mmask16 __A, __mmask16 __B)
83860b57cec5SDimitry Andric {
83870b57cec5SDimitry Andric   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
83880b57cec5SDimitry Andric }
83890b57cec5SDimitry Andric 
83900b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
83910b57cec5SDimitry Andric _mm512_kortestz (__mmask16 __A, __mmask16 __B)
83920b57cec5SDimitry Andric {
83930b57cec5SDimitry Andric   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
83940b57cec5SDimitry Andric }
83950b57cec5SDimitry Andric 
83960b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS
83970b57cec5SDimitry Andric _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
83980b57cec5SDimitry Andric {
83990b57cec5SDimitry Andric   return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
84000b57cec5SDimitry Andric }
84010b57cec5SDimitry Andric 
84020b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS
84030b57cec5SDimitry Andric _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
84040b57cec5SDimitry Andric {
84050b57cec5SDimitry Andric   return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
84060b57cec5SDimitry Andric }
84070b57cec5SDimitry Andric 
84080b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS
84090b57cec5SDimitry Andric _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
84100b57cec5SDimitry Andric   *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
84110b57cec5SDimitry Andric   return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
84120b57cec5SDimitry Andric }
84130b57cec5SDimitry Andric 
84140b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS
84150b57cec5SDimitry Andric _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
84160b57cec5SDimitry Andric {
84170b57cec5SDimitry Andric   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
84180b57cec5SDimitry Andric }
84190b57cec5SDimitry Andric 
84200b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS
84210b57cec5SDimitry Andric _mm512_kxnor (__mmask16 __A, __mmask16 __B)
84220b57cec5SDimitry Andric {
84230b57cec5SDimitry Andric   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
84240b57cec5SDimitry Andric }
84250b57cec5SDimitry Andric 
84260b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS
84270b57cec5SDimitry Andric _mm512_kxor (__mmask16 __A, __mmask16 __B)
84280b57cec5SDimitry Andric {
84290b57cec5SDimitry Andric   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
84300b57cec5SDimitry Andric }
84310b57cec5SDimitry Andric 
84320b57cec5SDimitry Andric #define _kand_mask16 _mm512_kand
84330b57cec5SDimitry Andric #define _kandn_mask16 _mm512_kandn
84340b57cec5SDimitry Andric #define _knot_mask16 _mm512_knot
84350b57cec5SDimitry Andric #define _kor_mask16 _mm512_kor
84360b57cec5SDimitry Andric #define _kxnor_mask16 _mm512_kxnor
84370b57cec5SDimitry Andric #define _kxor_mask16 _mm512_kxor
84380b57cec5SDimitry Andric 
84390b57cec5SDimitry Andric #define _kshiftli_mask16(A, I) \
8440349cc55cSDimitry Andric   ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
84410b57cec5SDimitry Andric 
84420b57cec5SDimitry Andric #define _kshiftri_mask16(A, I) \
8443349cc55cSDimitry Andric   ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
84440b57cec5SDimitry Andric 
84450b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS
84460b57cec5SDimitry Andric _cvtmask16_u32(__mmask16 __A) {
84470b57cec5SDimitry Andric   return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
84480b57cec5SDimitry Andric }
84490b57cec5SDimitry Andric 
84500b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS
84510b57cec5SDimitry Andric _cvtu32_mask16(unsigned int __A) {
84520b57cec5SDimitry Andric   return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
84530b57cec5SDimitry Andric }
84540b57cec5SDimitry Andric 
84550b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS
84560b57cec5SDimitry Andric _load_mask16(__mmask16 *__A) {
84570b57cec5SDimitry Andric   return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
84580b57cec5SDimitry Andric }
84590b57cec5SDimitry Andric 
84600b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
84610b57cec5SDimitry Andric _store_mask16(__mmask16 *__A, __mmask16 __B) {
84620b57cec5SDimitry Andric   *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
84630b57cec5SDimitry Andric }
84640b57cec5SDimitry Andric 
84650b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
8466a7dea167SDimitry Andric _mm512_stream_si512 (void * __P, __m512i __A)
84670b57cec5SDimitry Andric {
84680b57cec5SDimitry Andric   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
84690b57cec5SDimitry Andric   __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
84700b57cec5SDimitry Andric }
84710b57cec5SDimitry Andric 
84720b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
84730b57cec5SDimitry Andric _mm512_stream_load_si512 (void const *__P)
84740b57cec5SDimitry Andric {
84750b57cec5SDimitry Andric   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
84760b57cec5SDimitry Andric   return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
84770b57cec5SDimitry Andric }
84780b57cec5SDimitry Andric 
84790b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
8480a7dea167SDimitry Andric _mm512_stream_pd (void *__P, __m512d __A)
84810b57cec5SDimitry Andric {
84820b57cec5SDimitry Andric   typedef __v8df __v8df_aligned __attribute__((aligned(64)));
84830b57cec5SDimitry Andric   __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
84840b57cec5SDimitry Andric }
84850b57cec5SDimitry Andric 
84860b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
8487a7dea167SDimitry Andric _mm512_stream_ps (void *__P, __m512 __A)
84880b57cec5SDimitry Andric {
84890b57cec5SDimitry Andric   typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
84900b57cec5SDimitry Andric   __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
84910b57cec5SDimitry Andric }
84920b57cec5SDimitry Andric 
84930b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
84940b57cec5SDimitry Andric _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
84950b57cec5SDimitry Andric {
84960b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
84970b57cec5SDimitry Andric                   (__v8df) __W,
84980b57cec5SDimitry Andric                   (__mmask8) __U);
84990b57cec5SDimitry Andric }
85000b57cec5SDimitry Andric 
85010b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
85020b57cec5SDimitry Andric _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
85030b57cec5SDimitry Andric {
85040b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
85050b57cec5SDimitry Andric                   (__v8df)
85060b57cec5SDimitry Andric                   _mm512_setzero_pd (),
85070b57cec5SDimitry Andric                   (__mmask8) __U);
85080b57cec5SDimitry Andric }
85090b57cec5SDimitry Andric 
85100b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
85110b57cec5SDimitry Andric _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
85120b57cec5SDimitry Andric {
85130b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
85140b57cec5SDimitry Andric                   (__v8di) __W,
85150b57cec5SDimitry Andric                   (__mmask8) __U);
85160b57cec5SDimitry Andric }
85170b57cec5SDimitry Andric 
85180b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
85190b57cec5SDimitry Andric _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
85200b57cec5SDimitry Andric {
85210b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
85220b57cec5SDimitry Andric                   (__v8di)
85230b57cec5SDimitry Andric                   _mm512_setzero_si512 (),
85240b57cec5SDimitry Andric                   (__mmask8) __U);
85250b57cec5SDimitry Andric }
85260b57cec5SDimitry Andric 
85270b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
85280b57cec5SDimitry Andric _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
85290b57cec5SDimitry Andric {
85300b57cec5SDimitry Andric   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
85310b57cec5SDimitry Andric                  (__v16sf) __W,
85320b57cec5SDimitry Andric                  (__mmask16) __U);
85330b57cec5SDimitry Andric }
85340b57cec5SDimitry Andric 
85350b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
85360b57cec5SDimitry Andric _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
85370b57cec5SDimitry Andric {
85380b57cec5SDimitry Andric   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
85390b57cec5SDimitry Andric                  (__v16sf)
85400b57cec5SDimitry Andric                  _mm512_setzero_ps (),
85410b57cec5SDimitry Andric                  (__mmask16) __U);
85420b57cec5SDimitry Andric }
85430b57cec5SDimitry Andric 
85440b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
85450b57cec5SDimitry Andric _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
85460b57cec5SDimitry Andric {
85470b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
85480b57cec5SDimitry Andric                   (__v16si) __W,
85490b57cec5SDimitry Andric                   (__mmask16) __U);
85500b57cec5SDimitry Andric }
85510b57cec5SDimitry Andric 
85520b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
85530b57cec5SDimitry Andric _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
85540b57cec5SDimitry Andric {
85550b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
85560b57cec5SDimitry Andric                   (__v16si)
85570b57cec5SDimitry Andric                   _mm512_setzero_si512 (),
85580b57cec5SDimitry Andric                   (__mmask16) __U);
85590b57cec5SDimitry Andric }
85600b57cec5SDimitry Andric 
85610b57cec5SDimitry Andric #define _mm_cmp_round_ss_mask(X, Y, P, R) \
8562349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
85630b57cec5SDimitry Andric                                        (__v4sf)(__m128)(Y), (int)(P), \
8564349cc55cSDimitry Andric                                        (__mmask8)-1, (int)(R)))
85650b57cec5SDimitry Andric 
85660b57cec5SDimitry Andric #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8567349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
85680b57cec5SDimitry Andric                                        (__v4sf)(__m128)(Y), (int)(P), \
8569349cc55cSDimitry Andric                                        (__mmask8)(M), (int)(R)))
85700b57cec5SDimitry Andric 
85710b57cec5SDimitry Andric #define _mm_cmp_ss_mask(X, Y, P) \
8572349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
85730b57cec5SDimitry Andric                                        (__v4sf)(__m128)(Y), (int)(P), \
85740b57cec5SDimitry Andric                                        (__mmask8)-1, \
8575349cc55cSDimitry Andric                                        _MM_FROUND_CUR_DIRECTION))
85760b57cec5SDimitry Andric 
85770b57cec5SDimitry Andric #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8578349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
85790b57cec5SDimitry Andric                                        (__v4sf)(__m128)(Y), (int)(P), \
85800b57cec5SDimitry Andric                                        (__mmask8)(M), \
8581349cc55cSDimitry Andric                                        _MM_FROUND_CUR_DIRECTION))
85820b57cec5SDimitry Andric 
85830b57cec5SDimitry Andric #define _mm_cmp_round_sd_mask(X, Y, P, R) \
8584349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
85850b57cec5SDimitry Andric                                        (__v2df)(__m128d)(Y), (int)(P), \
8586349cc55cSDimitry Andric                                        (__mmask8)-1, (int)(R)))
85870b57cec5SDimitry Andric 
85880b57cec5SDimitry Andric #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8589349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
85900b57cec5SDimitry Andric                                        (__v2df)(__m128d)(Y), (int)(P), \
8591349cc55cSDimitry Andric                                        (__mmask8)(M), (int)(R)))
85920b57cec5SDimitry Andric 
85930b57cec5SDimitry Andric #define _mm_cmp_sd_mask(X, Y, P) \
8594349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
85950b57cec5SDimitry Andric                                        (__v2df)(__m128d)(Y), (int)(P), \
85960b57cec5SDimitry Andric                                        (__mmask8)-1, \
8597349cc55cSDimitry Andric                                        _MM_FROUND_CUR_DIRECTION))
85980b57cec5SDimitry Andric 
85990b57cec5SDimitry Andric #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8600349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
86010b57cec5SDimitry Andric                                        (__v2df)(__m128d)(Y), (int)(P), \
86020b57cec5SDimitry Andric                                        (__mmask8)(M), \
8603349cc55cSDimitry Andric                                        _MM_FROUND_CUR_DIRECTION))
86040b57cec5SDimitry Andric 
86050b57cec5SDimitry Andric /* Bit Test */
86060b57cec5SDimitry Andric 
86070b57cec5SDimitry Andric static __inline __mmask16 __DEFAULT_FN_ATTRS512
86080b57cec5SDimitry Andric _mm512_test_epi32_mask (__m512i __A, __m512i __B)
86090b57cec5SDimitry Andric {
86100b57cec5SDimitry Andric   return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
86110b57cec5SDimitry Andric                                    _mm512_setzero_si512());
86120b57cec5SDimitry Andric }
86130b57cec5SDimitry Andric 
86140b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
86150b57cec5SDimitry Andric _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
86160b57cec5SDimitry Andric {
86170b57cec5SDimitry Andric   return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
86180b57cec5SDimitry Andric                                         _mm512_setzero_si512());
86190b57cec5SDimitry Andric }
86200b57cec5SDimitry Andric 
86210b57cec5SDimitry Andric static __inline __mmask8 __DEFAULT_FN_ATTRS512
86220b57cec5SDimitry Andric _mm512_test_epi64_mask (__m512i __A, __m512i __B)
86230b57cec5SDimitry Andric {
86240b57cec5SDimitry Andric   return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
86250b57cec5SDimitry Andric                                    _mm512_setzero_si512());
86260b57cec5SDimitry Andric }
86270b57cec5SDimitry Andric 
86280b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
86290b57cec5SDimitry Andric _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
86300b57cec5SDimitry Andric {
86310b57cec5SDimitry Andric   return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
86320b57cec5SDimitry Andric                                         _mm512_setzero_si512());
86330b57cec5SDimitry Andric }
86340b57cec5SDimitry Andric 
86350b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
86360b57cec5SDimitry Andric _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
86370b57cec5SDimitry Andric {
86380b57cec5SDimitry Andric   return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
86390b57cec5SDimitry Andric                                   _mm512_setzero_si512());
86400b57cec5SDimitry Andric }
86410b57cec5SDimitry Andric 
86420b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
86430b57cec5SDimitry Andric _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
86440b57cec5SDimitry Andric {
86450b57cec5SDimitry Andric   return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
86460b57cec5SDimitry Andric                                        _mm512_setzero_si512());
86470b57cec5SDimitry Andric }
86480b57cec5SDimitry Andric 
86490b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
86500b57cec5SDimitry Andric _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
86510b57cec5SDimitry Andric {
86520b57cec5SDimitry Andric   return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
86530b57cec5SDimitry Andric                                   _mm512_setzero_si512());
86540b57cec5SDimitry Andric }
86550b57cec5SDimitry Andric 
86560b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
86570b57cec5SDimitry Andric _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
86580b57cec5SDimitry Andric {
86590b57cec5SDimitry Andric   return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
86600b57cec5SDimitry Andric                                        _mm512_setzero_si512());
86610b57cec5SDimitry Andric }
86620b57cec5SDimitry Andric 
86630b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
86640b57cec5SDimitry Andric _mm512_movehdup_ps (__m512 __A)
86650b57cec5SDimitry Andric {
86660b57cec5SDimitry Andric   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
86670b57cec5SDimitry Andric                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
86680b57cec5SDimitry Andric }
86690b57cec5SDimitry Andric 
86700b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
86710b57cec5SDimitry Andric _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
86720b57cec5SDimitry Andric {
86730b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
86740b57cec5SDimitry Andric                                              (__v16sf)_mm512_movehdup_ps(__A),
86750b57cec5SDimitry Andric                                              (__v16sf)__W);
86760b57cec5SDimitry Andric }
86770b57cec5SDimitry Andric 
86780b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
86790b57cec5SDimitry Andric _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
86800b57cec5SDimitry Andric {
86810b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
86820b57cec5SDimitry Andric                                              (__v16sf)_mm512_movehdup_ps(__A),
86830b57cec5SDimitry Andric                                              (__v16sf)_mm512_setzero_ps());
86840b57cec5SDimitry Andric }
86850b57cec5SDimitry Andric 
86860b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
86870b57cec5SDimitry Andric _mm512_moveldup_ps (__m512 __A)
86880b57cec5SDimitry Andric {
86890b57cec5SDimitry Andric   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
86900b57cec5SDimitry Andric                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
86910b57cec5SDimitry Andric }
86920b57cec5SDimitry Andric 
86930b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
86940b57cec5SDimitry Andric _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
86950b57cec5SDimitry Andric {
86960b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
86970b57cec5SDimitry Andric                                              (__v16sf)_mm512_moveldup_ps(__A),
86980b57cec5SDimitry Andric                                              (__v16sf)__W);
86990b57cec5SDimitry Andric }
87000b57cec5SDimitry Andric 
87010b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
87020b57cec5SDimitry Andric _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
87030b57cec5SDimitry Andric {
87040b57cec5SDimitry Andric   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
87050b57cec5SDimitry Andric                                              (__v16sf)_mm512_moveldup_ps(__A),
87060b57cec5SDimitry Andric                                              (__v16sf)_mm512_setzero_ps());
87070b57cec5SDimitry Andric }
87080b57cec5SDimitry Andric 
87090b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
87100b57cec5SDimitry Andric _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
87110b57cec5SDimitry Andric {
87120b57cec5SDimitry Andric   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
87130b57cec5SDimitry Andric }
87140b57cec5SDimitry Andric 
87150b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
87160b57cec5SDimitry Andric _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
87170b57cec5SDimitry Andric {
87180b57cec5SDimitry Andric   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
87190b57cec5SDimitry Andric                                      _mm_setzero_ps());
87200b57cec5SDimitry Andric }
87210b57cec5SDimitry Andric 
87220b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
87230b57cec5SDimitry Andric _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
87240b57cec5SDimitry Andric {
87250b57cec5SDimitry Andric   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
87260b57cec5SDimitry Andric }
87270b57cec5SDimitry Andric 
87280b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
87290b57cec5SDimitry Andric _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
87300b57cec5SDimitry Andric {
87310b57cec5SDimitry Andric   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
87320b57cec5SDimitry Andric                                      _mm_setzero_pd());
87330b57cec5SDimitry Andric }
87340b57cec5SDimitry Andric 
87350b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128
87360b57cec5SDimitry Andric _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
87370b57cec5SDimitry Andric {
87380b57cec5SDimitry Andric   __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
87390b57cec5SDimitry Andric }
87400b57cec5SDimitry Andric 
87410b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128
87420b57cec5SDimitry Andric _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
87430b57cec5SDimitry Andric {
87440b57cec5SDimitry Andric   __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
87450b57cec5SDimitry Andric }
87460b57cec5SDimitry Andric 
87470b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
87480b57cec5SDimitry Andric _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
87490b57cec5SDimitry Andric {
87500b57cec5SDimitry Andric   __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
87510b57cec5SDimitry Andric                                                 (__v4sf)_mm_setzero_ps(),
87520b57cec5SDimitry Andric                                                 0, 4, 4, 4);
87530b57cec5SDimitry Andric 
8754480093f4SDimitry Andric   return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
87550b57cec5SDimitry Andric }
87560b57cec5SDimitry Andric 
87570b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
87580b57cec5SDimitry Andric _mm_maskz_load_ss (__mmask8 __U, const float* __A)
87590b57cec5SDimitry Andric {
8760480093f4SDimitry Andric   return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
87610b57cec5SDimitry Andric                                                 (__v4sf) _mm_setzero_ps(),
87620b57cec5SDimitry Andric                                                 __U & 1);
87630b57cec5SDimitry Andric }
87640b57cec5SDimitry Andric 
87650b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
87660b57cec5SDimitry Andric _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
87670b57cec5SDimitry Andric {
87680b57cec5SDimitry Andric   __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
87690b57cec5SDimitry Andric                                                  (__v2df)_mm_setzero_pd(),
87700b57cec5SDimitry Andric                                                  0, 2);
87710b57cec5SDimitry Andric 
8772480093f4SDimitry Andric   return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
87730b57cec5SDimitry Andric }
87740b57cec5SDimitry Andric 
87750b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
87760b57cec5SDimitry Andric _mm_maskz_load_sd (__mmask8 __U, const double* __A)
87770b57cec5SDimitry Andric {
8778480093f4SDimitry Andric   return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
87790b57cec5SDimitry Andric                                                   (__v2df) _mm_setzero_pd(),
87800b57cec5SDimitry Andric                                                   __U & 1);
87810b57cec5SDimitry Andric }
87820b57cec5SDimitry Andric 
87830b57cec5SDimitry Andric #define _mm512_shuffle_epi32(A, I) \
8784349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
87850b57cec5SDimitry Andric 
87860b57cec5SDimitry Andric #define _mm512_mask_shuffle_epi32(W, U, A, I) \
8787349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
87880b57cec5SDimitry Andric                                        (__v16si)_mm512_shuffle_epi32((A), (I)), \
8789349cc55cSDimitry Andric                                        (__v16si)(__m512i)(W)))
87900b57cec5SDimitry Andric 
87910b57cec5SDimitry Andric #define _mm512_maskz_shuffle_epi32(U, A, I) \
8792349cc55cSDimitry Andric   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
87930b57cec5SDimitry Andric                                        (__v16si)_mm512_shuffle_epi32((A), (I)), \
8794349cc55cSDimitry Andric                                        (__v16si)_mm512_setzero_si512()))
87950b57cec5SDimitry Andric 
87960b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
87970b57cec5SDimitry Andric _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
87980b57cec5SDimitry Andric {
87990b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
88000b57cec5SDimitry Andric                 (__v8df) __W,
88010b57cec5SDimitry Andric                 (__mmask8) __U);
88020b57cec5SDimitry Andric }
88030b57cec5SDimitry Andric 
88040b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
88050b57cec5SDimitry Andric _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
88060b57cec5SDimitry Andric {
88070b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
88080b57cec5SDimitry Andric                 (__v8df) _mm512_setzero_pd (),
88090b57cec5SDimitry Andric                 (__mmask8) __U);
88100b57cec5SDimitry Andric }
88110b57cec5SDimitry Andric 
88120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
88130b57cec5SDimitry Andric _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
88140b57cec5SDimitry Andric {
88150b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
88160b57cec5SDimitry Andric                 (__v8di) __W,
88170b57cec5SDimitry Andric                 (__mmask8) __U);
88180b57cec5SDimitry Andric }
88190b57cec5SDimitry Andric 
88200b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
88210b57cec5SDimitry Andric _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
88220b57cec5SDimitry Andric {
88230b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
88240b57cec5SDimitry Andric                 (__v8di) _mm512_setzero_si512 (),
88250b57cec5SDimitry Andric                 (__mmask8) __U);
88260b57cec5SDimitry Andric }
88270b57cec5SDimitry Andric 
88280b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
88290b57cec5SDimitry Andric _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
88300b57cec5SDimitry Andric {
88310b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
88320b57cec5SDimitry Andric               (__v8df) __W,
88330b57cec5SDimitry Andric               (__mmask8) __U);
88340b57cec5SDimitry Andric }
88350b57cec5SDimitry Andric 
88360b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
88370b57cec5SDimitry Andric _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
88380b57cec5SDimitry Andric {
88390b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
88400b57cec5SDimitry Andric               (__v8df) _mm512_setzero_pd(),
88410b57cec5SDimitry Andric               (__mmask8) __U);
88420b57cec5SDimitry Andric }
88430b57cec5SDimitry Andric 
88440b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
88450b57cec5SDimitry Andric _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
88460b57cec5SDimitry Andric {
88470b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
88480b57cec5SDimitry Andric               (__v8di) __W,
88490b57cec5SDimitry Andric               (__mmask8) __U);
88500b57cec5SDimitry Andric }
88510b57cec5SDimitry Andric 
88520b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
88530b57cec5SDimitry Andric _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
88540b57cec5SDimitry Andric {
88550b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
88560b57cec5SDimitry Andric               (__v8di) _mm512_setzero_si512(),
88570b57cec5SDimitry Andric               (__mmask8) __U);
88580b57cec5SDimitry Andric }
88590b57cec5SDimitry Andric 
88600b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
88610b57cec5SDimitry Andric _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
88620b57cec5SDimitry Andric {
88630b57cec5SDimitry Andric   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
88640b57cec5SDimitry Andric                    (__v16sf) __W,
88650b57cec5SDimitry Andric                    (__mmask16) __U);
88660b57cec5SDimitry Andric }
88670b57cec5SDimitry Andric 
88680b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
88690b57cec5SDimitry Andric _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
88700b57cec5SDimitry Andric {
88710b57cec5SDimitry Andric   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
88720b57cec5SDimitry Andric                    (__v16sf) _mm512_setzero_ps(),
88730b57cec5SDimitry Andric                    (__mmask16) __U);
88740b57cec5SDimitry Andric }
88750b57cec5SDimitry Andric 
88760b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
88770b57cec5SDimitry Andric _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
88780b57cec5SDimitry Andric {
88790b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
88800b57cec5SDimitry Andric               (__v16si) __W,
88810b57cec5SDimitry Andric               (__mmask16) __U);
88820b57cec5SDimitry Andric }
88830b57cec5SDimitry Andric 
88840b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
88850b57cec5SDimitry Andric _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
88860b57cec5SDimitry Andric {
88870b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
88880b57cec5SDimitry Andric               (__v16si) _mm512_setzero_si512(),
88890b57cec5SDimitry Andric               (__mmask16) __U);
88900b57cec5SDimitry Andric }
88910b57cec5SDimitry Andric 
88920b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
88930b57cec5SDimitry Andric _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
88940b57cec5SDimitry Andric {
88950b57cec5SDimitry Andric   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
88960b57cec5SDimitry Andric                (__v16sf) __W,
88970b57cec5SDimitry Andric                (__mmask16) __U);
88980b57cec5SDimitry Andric }
88990b57cec5SDimitry Andric 
89000b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
89010b57cec5SDimitry Andric _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
89020b57cec5SDimitry Andric {
89030b57cec5SDimitry Andric   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
89040b57cec5SDimitry Andric                (__v16sf) _mm512_setzero_ps(),
89050b57cec5SDimitry Andric                (__mmask16) __U);
89060b57cec5SDimitry Andric }
89070b57cec5SDimitry Andric 
89080b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
89090b57cec5SDimitry Andric _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
89100b57cec5SDimitry Andric {
89110b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
89120b57cec5SDimitry Andric                 (__v16si) __W,
89130b57cec5SDimitry Andric                 (__mmask16) __U);
89140b57cec5SDimitry Andric }
89150b57cec5SDimitry Andric 
89160b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
89170b57cec5SDimitry Andric _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
89180b57cec5SDimitry Andric {
89190b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
89200b57cec5SDimitry Andric                 (__v16si) _mm512_setzero_si512(),
89210b57cec5SDimitry Andric                 (__mmask16) __U);
89220b57cec5SDimitry Andric }
89230b57cec5SDimitry Andric 
89240b57cec5SDimitry Andric #define _mm512_cvt_roundps_pd(A, R) \
8925349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
89260b57cec5SDimitry Andric                                             (__v8df)_mm512_undefined_pd(), \
8927349cc55cSDimitry Andric                                             (__mmask8)-1, (int)(R)))
89280b57cec5SDimitry Andric 
89290b57cec5SDimitry Andric #define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8930349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
89310b57cec5SDimitry Andric                                             (__v8df)(__m512d)(W), \
8932349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
89330b57cec5SDimitry Andric 
89340b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8935349cc55cSDimitry Andric   ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
89360b57cec5SDimitry Andric                                             (__v8df)_mm512_setzero_pd(), \
8937349cc55cSDimitry Andric                                             (__mmask8)(U), (int)(R)))
89380b57cec5SDimitry Andric 
89390b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
89400b57cec5SDimitry Andric _mm512_cvtps_pd (__m256 __A)
89410b57cec5SDimitry Andric {
89420b57cec5SDimitry Andric   return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
89430b57cec5SDimitry Andric }
89440b57cec5SDimitry Andric 
89450b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
89460b57cec5SDimitry Andric _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
89470b57cec5SDimitry Andric {
89480b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
89490b57cec5SDimitry Andric                                               (__v8df)_mm512_cvtps_pd(__A),
89500b57cec5SDimitry Andric                                               (__v8df)__W);
89510b57cec5SDimitry Andric }
89520b57cec5SDimitry Andric 
89530b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
89540b57cec5SDimitry Andric _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
89550b57cec5SDimitry Andric {
89560b57cec5SDimitry Andric   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
89570b57cec5SDimitry Andric                                               (__v8df)_mm512_cvtps_pd(__A),
89580b57cec5SDimitry Andric                                               (__v8df)_mm512_setzero_pd());
89590b57cec5SDimitry Andric }
89600b57cec5SDimitry Andric 
89610b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
89620b57cec5SDimitry Andric _mm512_cvtpslo_pd (__m512 __A)
89630b57cec5SDimitry Andric {
89640b57cec5SDimitry Andric   return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
89650b57cec5SDimitry Andric }
89660b57cec5SDimitry Andric 
89670b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
89680b57cec5SDimitry Andric _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
89690b57cec5SDimitry Andric {
89700b57cec5SDimitry Andric   return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
89710b57cec5SDimitry Andric }
89720b57cec5SDimitry Andric 
89730b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
89740b57cec5SDimitry Andric _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
89750b57cec5SDimitry Andric {
89760b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
89770b57cec5SDimitry Andric               (__v8df) __A,
89780b57cec5SDimitry Andric               (__v8df) __W);
89790b57cec5SDimitry Andric }
89800b57cec5SDimitry Andric 
89810b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
89820b57cec5SDimitry Andric _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
89830b57cec5SDimitry Andric {
89840b57cec5SDimitry Andric   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
89850b57cec5SDimitry Andric               (__v8df) __A,
89860b57cec5SDimitry Andric               (__v8df) _mm512_setzero_pd ());
89870b57cec5SDimitry Andric }
89880b57cec5SDimitry Andric 
89890b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
89900b57cec5SDimitry Andric _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
89910b57cec5SDimitry Andric {
89920b57cec5SDimitry Andric   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
89930b57cec5SDimitry Andric              (__v16sf) __A,
89940b57cec5SDimitry Andric              (__v16sf) __W);
89950b57cec5SDimitry Andric }
89960b57cec5SDimitry Andric 
89970b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
89980b57cec5SDimitry Andric _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
89990b57cec5SDimitry Andric {
90000b57cec5SDimitry Andric   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
90010b57cec5SDimitry Andric              (__v16sf) __A,
90020b57cec5SDimitry Andric              (__v16sf) _mm512_setzero_ps ());
90030b57cec5SDimitry Andric }
90040b57cec5SDimitry Andric 
90050b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
90060b57cec5SDimitry Andric _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
90070b57cec5SDimitry Andric {
90080b57cec5SDimitry Andric   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
90090b57cec5SDimitry Andric             (__mmask8) __U);
90100b57cec5SDimitry Andric }
90110b57cec5SDimitry Andric 
90120b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
90130b57cec5SDimitry Andric _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
90140b57cec5SDimitry Andric {
90150b57cec5SDimitry Andric   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
90160b57cec5SDimitry Andric             (__mmask8) __U);
90170b57cec5SDimitry Andric }
90180b57cec5SDimitry Andric 
90190b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
90200b57cec5SDimitry Andric _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
90210b57cec5SDimitry Andric {
90220b57cec5SDimitry Andric   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
90230b57cec5SDimitry Andric             (__mmask16) __U);
90240b57cec5SDimitry Andric }
90250b57cec5SDimitry Andric 
90260b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512
90270b57cec5SDimitry Andric _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
90280b57cec5SDimitry Andric {
90290b57cec5SDimitry Andric   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
90300b57cec5SDimitry Andric             (__mmask16) __U);
90310b57cec5SDimitry Andric }
90320b57cec5SDimitry Andric 
90330b57cec5SDimitry Andric #define _mm_cvt_roundsd_ss(A, B, R) \
9034349cc55cSDimitry Andric   ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
90350b57cec5SDimitry Andric                                               (__v2df)(__m128d)(B), \
90360b57cec5SDimitry Andric                                               (__v4sf)_mm_undefined_ps(), \
9037349cc55cSDimitry Andric                                               (__mmask8)-1, (int)(R)))
90380b57cec5SDimitry Andric 
90390b57cec5SDimitry Andric #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
9040349cc55cSDimitry Andric   ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
90410b57cec5SDimitry Andric                                               (__v2df)(__m128d)(B), \
90420b57cec5SDimitry Andric                                               (__v4sf)(__m128)(W), \
9043349cc55cSDimitry Andric                                               (__mmask8)(U), (int)(R)))
90440b57cec5SDimitry Andric 
90450b57cec5SDimitry Andric #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
9046349cc55cSDimitry Andric   ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
90470b57cec5SDimitry Andric                                               (__v2df)(__m128d)(B), \
90480b57cec5SDimitry Andric                                               (__v4sf)_mm_setzero_ps(), \
9049349cc55cSDimitry Andric                                               (__mmask8)(U), (int)(R)))
90500b57cec5SDimitry Andric 
90510b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
90520b57cec5SDimitry Andric _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
90530b57cec5SDimitry Andric {
90540b57cec5SDimitry Andric   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
90550b57cec5SDimitry Andric                                              (__v2df)__B,
90560b57cec5SDimitry Andric                                              (__v4sf)__W,
90570b57cec5SDimitry Andric                                              (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
90580b57cec5SDimitry Andric }
90590b57cec5SDimitry Andric 
90600b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
90610b57cec5SDimitry Andric _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
90620b57cec5SDimitry Andric {
90630b57cec5SDimitry Andric   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
90640b57cec5SDimitry Andric                                              (__v2df)__B,
90650b57cec5SDimitry Andric                                              (__v4sf)_mm_setzero_ps(),
90660b57cec5SDimitry Andric                                              (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
90670b57cec5SDimitry Andric }
90680b57cec5SDimitry Andric 
90690b57cec5SDimitry Andric #define _mm_cvtss_i32 _mm_cvtss_si32
90700b57cec5SDimitry Andric #define _mm_cvtsd_i32 _mm_cvtsd_si32
90710b57cec5SDimitry Andric #define _mm_cvti32_sd _mm_cvtsi32_sd
90720b57cec5SDimitry Andric #define _mm_cvti32_ss _mm_cvtsi32_ss
90730b57cec5SDimitry Andric #ifdef __x86_64__
90740b57cec5SDimitry Andric #define _mm_cvtss_i64 _mm_cvtss_si64
90750b57cec5SDimitry Andric #define _mm_cvtsd_i64 _mm_cvtsd_si64
90760b57cec5SDimitry Andric #define _mm_cvti64_sd _mm_cvtsi64_sd
90770b57cec5SDimitry Andric #define _mm_cvti64_ss _mm_cvtsi64_ss
90780b57cec5SDimitry Andric #endif
90790b57cec5SDimitry Andric 
90800b57cec5SDimitry Andric #ifdef __x86_64__
90810b57cec5SDimitry Andric #define _mm_cvt_roundi64_sd(A, B, R) \
9082349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9083349cc55cSDimitry Andric                                       (int)(R)))
90840b57cec5SDimitry Andric 
90850b57cec5SDimitry Andric #define _mm_cvt_roundsi64_sd(A, B, R) \
9086349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9087349cc55cSDimitry Andric                                       (int)(R)))
90880b57cec5SDimitry Andric #endif
90890b57cec5SDimitry Andric 
90900b57cec5SDimitry Andric #define _mm_cvt_roundsi32_ss(A, B, R) \
9091349cc55cSDimitry Andric   ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
90920b57cec5SDimitry Andric 
90930b57cec5SDimitry Andric #define _mm_cvt_roundi32_ss(A, B, R) \
9094349cc55cSDimitry Andric   ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
90950b57cec5SDimitry Andric 
90960b57cec5SDimitry Andric #ifdef __x86_64__
90970b57cec5SDimitry Andric #define _mm_cvt_roundsi64_ss(A, B, R) \
9098349cc55cSDimitry Andric   ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9099349cc55cSDimitry Andric                                      (int)(R)))
91000b57cec5SDimitry Andric 
91010b57cec5SDimitry Andric #define _mm_cvt_roundi64_ss(A, B, R) \
9102349cc55cSDimitry Andric   ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9103349cc55cSDimitry Andric                                      (int)(R)))
91040b57cec5SDimitry Andric #endif
91050b57cec5SDimitry Andric 
91060b57cec5SDimitry Andric #define _mm_cvt_roundss_sd(A, B, R) \
9107349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
91080b57cec5SDimitry Andric                                                (__v4sf)(__m128)(B), \
91090b57cec5SDimitry Andric                                                (__v2df)_mm_undefined_pd(), \
9110349cc55cSDimitry Andric                                                (__mmask8)-1, (int)(R)))
91110b57cec5SDimitry Andric 
91120b57cec5SDimitry Andric #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
9113349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
91140b57cec5SDimitry Andric                                                (__v4sf)(__m128)(B), \
91150b57cec5SDimitry Andric                                                (__v2df)(__m128d)(W), \
9116349cc55cSDimitry Andric                                                (__mmask8)(U), (int)(R)))
91170b57cec5SDimitry Andric 
91180b57cec5SDimitry Andric #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
9119349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
91200b57cec5SDimitry Andric                                                (__v4sf)(__m128)(B), \
91210b57cec5SDimitry Andric                                                (__v2df)_mm_setzero_pd(), \
9122349cc55cSDimitry Andric                                                (__mmask8)(U), (int)(R)))
91230b57cec5SDimitry Andric 
91240b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
91250b57cec5SDimitry Andric _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
91260b57cec5SDimitry Andric {
91270b57cec5SDimitry Andric   return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
91280b57cec5SDimitry Andric                                             (__v4sf)__B,
91290b57cec5SDimitry Andric                                             (__v2df)__W,
91300b57cec5SDimitry Andric                                             (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
91310b57cec5SDimitry Andric }
91320b57cec5SDimitry Andric 
91330b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
91340b57cec5SDimitry Andric _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
91350b57cec5SDimitry Andric {
91360b57cec5SDimitry Andric   return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
91370b57cec5SDimitry Andric                                             (__v4sf)__B,
91380b57cec5SDimitry Andric                                             (__v2df)_mm_setzero_pd(),
91390b57cec5SDimitry Andric                                             (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
91400b57cec5SDimitry Andric }
91410b57cec5SDimitry Andric 
91420b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
91430b57cec5SDimitry Andric _mm_cvtu32_sd (__m128d __A, unsigned __B)
91440b57cec5SDimitry Andric {
91450b57cec5SDimitry Andric   __A[0] = __B;
91460b57cec5SDimitry Andric   return __A;
91470b57cec5SDimitry Andric }
91480b57cec5SDimitry Andric 
91490b57cec5SDimitry Andric #ifdef __x86_64__
91500b57cec5SDimitry Andric #define _mm_cvt_roundu64_sd(A, B, R) \
9151349cc55cSDimitry Andric   ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9152349cc55cSDimitry Andric                                        (unsigned long long)(B), (int)(R)))
91530b57cec5SDimitry Andric 
91540b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
91550b57cec5SDimitry Andric _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
91560b57cec5SDimitry Andric {
91570b57cec5SDimitry Andric   __A[0] = __B;
91580b57cec5SDimitry Andric   return __A;
91590b57cec5SDimitry Andric }
91600b57cec5SDimitry Andric #endif
91610b57cec5SDimitry Andric 
91620b57cec5SDimitry Andric #define _mm_cvt_roundu32_ss(A, B, R) \
9163349cc55cSDimitry Andric   ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9164349cc55cSDimitry Andric                                       (int)(R)))
91650b57cec5SDimitry Andric 
91660b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
91670b57cec5SDimitry Andric _mm_cvtu32_ss (__m128 __A, unsigned __B)
91680b57cec5SDimitry Andric {
91690b57cec5SDimitry Andric   __A[0] = __B;
91700b57cec5SDimitry Andric   return __A;
91710b57cec5SDimitry Andric }
91720b57cec5SDimitry Andric 
91730b57cec5SDimitry Andric #ifdef __x86_64__
91740b57cec5SDimitry Andric #define _mm_cvt_roundu64_ss(A, B, R) \
9175349cc55cSDimitry Andric   ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9176349cc55cSDimitry Andric                                       (unsigned long long)(B), (int)(R)))
91770b57cec5SDimitry Andric 
91780b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
91790b57cec5SDimitry Andric _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
91800b57cec5SDimitry Andric {
91810b57cec5SDimitry Andric   __A[0] = __B;
91820b57cec5SDimitry Andric   return __A;
91830b57cec5SDimitry Andric }
91840b57cec5SDimitry Andric #endif
91850b57cec5SDimitry Andric 
91860b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
91870b57cec5SDimitry Andric _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
91880b57cec5SDimitry Andric {
91890b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_selectd_512(__M,
91900b57cec5SDimitry Andric                                               (__v16si) _mm512_set1_epi32(__A),
91910b57cec5SDimitry Andric                                               (__v16si) __O);
91920b57cec5SDimitry Andric }
91930b57cec5SDimitry Andric 
91940b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
91950b57cec5SDimitry Andric _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
91960b57cec5SDimitry Andric {
91970b57cec5SDimitry Andric   return (__m512i) __builtin_ia32_selectq_512(__M,
91980b57cec5SDimitry Andric                                               (__v8di) _mm512_set1_epi64(__A),
91990b57cec5SDimitry Andric                                               (__v8di) __O);
92000b57cec5SDimitry Andric }
92010b57cec5SDimitry Andric 
92020b57cec5SDimitry Andric static  __inline __m512i __DEFAULT_FN_ATTRS512
92030b57cec5SDimitry Andric _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
92040b57cec5SDimitry Andric     char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
92050b57cec5SDimitry Andric     char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
92060b57cec5SDimitry Andric     char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
92070b57cec5SDimitry Andric     char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
92080b57cec5SDimitry Andric     char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
92090b57cec5SDimitry Andric     char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
92100b57cec5SDimitry Andric     char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
92110b57cec5SDimitry Andric     char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
92120b57cec5SDimitry Andric     char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
92130b57cec5SDimitry Andric     char __e4, char __e3, char __e2, char __e1, char __e0) {
92140b57cec5SDimitry Andric 
92150b57cec5SDimitry Andric   return __extension__ (__m512i)(__v64qi)
92160b57cec5SDimitry Andric     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
92170b57cec5SDimitry Andric      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
92180b57cec5SDimitry Andric      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
92190b57cec5SDimitry Andric      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
92200b57cec5SDimitry Andric      __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
92210b57cec5SDimitry Andric      __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
92220b57cec5SDimitry Andric      __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
92230b57cec5SDimitry Andric      __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
92240b57cec5SDimitry Andric }
92250b57cec5SDimitry Andric 
92260b57cec5SDimitry Andric static  __inline __m512i __DEFAULT_FN_ATTRS512
92270b57cec5SDimitry Andric _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
92280b57cec5SDimitry Andric     short __e27, short __e26, short __e25, short __e24, short __e23,
92290b57cec5SDimitry Andric     short __e22, short __e21, short __e20, short __e19, short __e18,
92300b57cec5SDimitry Andric     short __e17, short __e16, short __e15, short __e14, short __e13,
92310b57cec5SDimitry Andric     short __e12, short __e11, short __e10, short __e9, short __e8,
92320b57cec5SDimitry Andric     short __e7, short __e6, short __e5, short __e4, short __e3,
92330b57cec5SDimitry Andric     short __e2, short __e1, short __e0) {
92340b57cec5SDimitry Andric   return __extension__ (__m512i)(__v32hi)
92350b57cec5SDimitry Andric     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
92360b57cec5SDimitry Andric      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
92370b57cec5SDimitry Andric      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
92380b57cec5SDimitry Andric      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
92390b57cec5SDimitry Andric }
92400b57cec5SDimitry Andric 
92410b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512
92420b57cec5SDimitry Andric _mm512_set_epi32 (int __A, int __B, int __C, int __D,
92430b57cec5SDimitry Andric      int __E, int __F, int __G, int __H,
92440b57cec5SDimitry Andric      int __I, int __J, int __K, int __L,
92450b57cec5SDimitry Andric      int __M, int __N, int __O, int __P)
92460b57cec5SDimitry Andric {
92470b57cec5SDimitry Andric   return __extension__ (__m512i)(__v16si)
92480b57cec5SDimitry Andric   { __P, __O, __N, __M, __L, __K, __J, __I,
92490b57cec5SDimitry Andric     __H, __G, __F, __E, __D, __C, __B, __A };
92500b57cec5SDimitry Andric }
92510b57cec5SDimitry Andric 
92520b57cec5SDimitry Andric #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
92530b57cec5SDimitry Andric        e8,e9,e10,e11,e12,e13,e14,e15)          \
92540b57cec5SDimitry Andric   _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
92550b57cec5SDimitry Andric                    (e5),(e4),(e3),(e2),(e1),(e0))
92560b57cec5SDimitry Andric 
92570b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512
92580b57cec5SDimitry Andric _mm512_set_epi64 (long long __A, long long __B, long long __C,
92590b57cec5SDimitry Andric      long long __D, long long __E, long long __F,
92600b57cec5SDimitry Andric      long long __G, long long __H)
92610b57cec5SDimitry Andric {
92620b57cec5SDimitry Andric   return __extension__ (__m512i) (__v8di)
92630b57cec5SDimitry Andric   { __H, __G, __F, __E, __D, __C, __B, __A };
92640b57cec5SDimitry Andric }
92650b57cec5SDimitry Andric 
92660b57cec5SDimitry Andric #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
92670b57cec5SDimitry Andric   _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
92680b57cec5SDimitry Andric 
92690b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
92700b57cec5SDimitry Andric _mm512_set_pd (double __A, double __B, double __C, double __D,
92710b57cec5SDimitry Andric         double __E, double __F, double __G, double __H)
92720b57cec5SDimitry Andric {
92730b57cec5SDimitry Andric   return __extension__ (__m512d)
92740b57cec5SDimitry Andric   { __H, __G, __F, __E, __D, __C, __B, __A };
92750b57cec5SDimitry Andric }
92760b57cec5SDimitry Andric 
92770b57cec5SDimitry Andric #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
92780b57cec5SDimitry Andric   _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
92790b57cec5SDimitry Andric 
92800b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
92810b57cec5SDimitry Andric _mm512_set_ps (float __A, float __B, float __C, float __D,
92820b57cec5SDimitry Andric         float __E, float __F, float __G, float __H,
92830b57cec5SDimitry Andric         float __I, float __J, float __K, float __L,
92840b57cec5SDimitry Andric         float __M, float __N, float __O, float __P)
92850b57cec5SDimitry Andric {
92860b57cec5SDimitry Andric   return __extension__ (__m512)
92870b57cec5SDimitry Andric   { __P, __O, __N, __M, __L, __K, __J, __I,
92880b57cec5SDimitry Andric     __H, __G, __F, __E, __D, __C, __B, __A };
92890b57cec5SDimitry Andric }
92900b57cec5SDimitry Andric 
92910b57cec5SDimitry Andric #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
92920b57cec5SDimitry Andric   _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
92930b57cec5SDimitry Andric                 (e4),(e3),(e2),(e1),(e0))
92940b57cec5SDimitry Andric 
92950b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
92960b57cec5SDimitry Andric _mm512_abs_ps(__m512 __A)
92970b57cec5SDimitry Andric {
92980b57cec5SDimitry Andric   return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
92990b57cec5SDimitry Andric }
93000b57cec5SDimitry Andric 
93010b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512
93020b57cec5SDimitry Andric _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
93030b57cec5SDimitry Andric {
93040b57cec5SDimitry Andric   return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
93050b57cec5SDimitry Andric }
93060b57cec5SDimitry Andric 
93070b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
93080b57cec5SDimitry Andric _mm512_abs_pd(__m512d __A)
93090b57cec5SDimitry Andric {
93100b57cec5SDimitry Andric   return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
93110b57cec5SDimitry Andric }
93120b57cec5SDimitry Andric 
93130b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512
93140b57cec5SDimitry Andric _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
93150b57cec5SDimitry Andric {
93160b57cec5SDimitry Andric   return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
93170b57cec5SDimitry Andric }
93180b57cec5SDimitry Andric 
93190b57cec5SDimitry Andric /* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
93200b57cec5SDimitry Andric  * outputs. This class of vector operation forms the basis of many scientific
9321d409305fSDimitry Andric  * computations. In vector-reduction arithmetic, the evaluation order is
93220b57cec5SDimitry Andric  * independent of the order of the input elements of V.
93230b57cec5SDimitry Andric 
9324fe6060f1SDimitry Andric  * For floating-point intrinsics:
9325fe6060f1SDimitry Andric  * 1. When using fadd/fmul intrinsics, the order of operations within the
9326fe6060f1SDimitry Andric  * vector is unspecified (associative math).
9327fe6060f1SDimitry Andric  * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
9328fe6060f1SDimitry Andric  * produce unspecified results.
9329d409305fSDimitry Andric 
93300b57cec5SDimitry Andric  * Used bisection method. At each step, we partition the vector with previous
93310b57cec5SDimitry Andric  * step in half, and the operation is performed on its two halves.
93320b57cec5SDimitry Andric  * This takes log2(n) steps where n is the number of elements in the vector.
93330b57cec5SDimitry Andric  */
93340b57cec5SDimitry Andric 
93350b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
933681ad6265SDimitry Andric   return __builtin_reduce_add((__v8di)__W);
93370b57cec5SDimitry Andric }
93380b57cec5SDimitry Andric 
93390b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
934081ad6265SDimitry Andric   return __builtin_reduce_mul((__v8di)__W);
93410b57cec5SDimitry Andric }
93420b57cec5SDimitry Andric 
93430b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
934404eeddc0SDimitry Andric   return __builtin_reduce_and((__v8di)__W);
93450b57cec5SDimitry Andric }
93460b57cec5SDimitry Andric 
93470b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
934804eeddc0SDimitry Andric   return __builtin_reduce_or((__v8di)__W);
93490b57cec5SDimitry Andric }
93500b57cec5SDimitry Andric 
93510b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512
93520b57cec5SDimitry Andric _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
93530b57cec5SDimitry Andric   __W = _mm512_maskz_mov_epi64(__M, __W);
935481ad6265SDimitry Andric   return __builtin_reduce_add((__v8di)__W);
93550b57cec5SDimitry Andric }
93560b57cec5SDimitry Andric 
93570b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512
93580b57cec5SDimitry Andric _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
93590b57cec5SDimitry Andric   __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
936081ad6265SDimitry Andric   return __builtin_reduce_mul((__v8di)__W);
93610b57cec5SDimitry Andric }
93620b57cec5SDimitry Andric 
93630b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512
93640b57cec5SDimitry Andric _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
936581ad6265SDimitry Andric   __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W);
936604eeddc0SDimitry Andric   return __builtin_reduce_and((__v8di)__W);
93670b57cec5SDimitry Andric }
93680b57cec5SDimitry Andric 
93690b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512
93700b57cec5SDimitry Andric _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
93710b57cec5SDimitry Andric   __W = _mm512_maskz_mov_epi64(__M, __W);
937204eeddc0SDimitry Andric   return __builtin_reduce_or((__v8di)__W);
93730b57cec5SDimitry Andric }
93740b57cec5SDimitry Andric 
9375d409305fSDimitry Andric // -0.0 is used to ignore the start value since it is the neutral value of
9376d409305fSDimitry Andric // floating point addition. For more information, please refer to
9377d409305fSDimitry Andric // https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
93780b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
9379d409305fSDimitry Andric   return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
93800b57cec5SDimitry Andric }
93810b57cec5SDimitry Andric 
93820b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
9383e8d8bef9SDimitry Andric   return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
93840b57cec5SDimitry Andric }
93850b57cec5SDimitry Andric 
93860b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512
93870b57cec5SDimitry Andric _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
93880b57cec5SDimitry Andric   __W = _mm512_maskz_mov_pd(__M, __W);
9389d409305fSDimitry Andric   return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
93900b57cec5SDimitry Andric }
93910b57cec5SDimitry Andric 
93920b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512
93930b57cec5SDimitry Andric _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
93940b57cec5SDimitry Andric   __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
9395e8d8bef9SDimitry Andric   return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
93960b57cec5SDimitry Andric }
93970b57cec5SDimitry Andric 
93980b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512
93990b57cec5SDimitry Andric _mm512_reduce_add_epi32(__m512i __W) {
940081ad6265SDimitry Andric   return __builtin_reduce_add((__v16si)__W);
94010b57cec5SDimitry Andric }
94020b57cec5SDimitry Andric 
94030b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512
94040b57cec5SDimitry Andric _mm512_reduce_mul_epi32(__m512i __W) {
940581ad6265SDimitry Andric   return __builtin_reduce_mul((__v16si)__W);
94060b57cec5SDimitry Andric }
94070b57cec5SDimitry Andric 
94080b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512
94090b57cec5SDimitry Andric _mm512_reduce_and_epi32(__m512i __W) {
941004eeddc0SDimitry Andric   return __builtin_reduce_and((__v16si)__W);
94110b57cec5SDimitry Andric }
94120b57cec5SDimitry Andric 
94130b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512
94140b57cec5SDimitry Andric _mm512_reduce_or_epi32(__m512i __W) {
941504eeddc0SDimitry Andric   return __builtin_reduce_or((__v16si)__W);
94160b57cec5SDimitry Andric }
94170b57cec5SDimitry Andric 
94180b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512
94190b57cec5SDimitry Andric _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
94200b57cec5SDimitry Andric   __W = _mm512_maskz_mov_epi32(__M, __W);
942181ad6265SDimitry Andric   return __builtin_reduce_add((__v16si)__W);
94220b57cec5SDimitry Andric }
94230b57cec5SDimitry Andric 
94240b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512
94250b57cec5SDimitry Andric _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
94260b57cec5SDimitry Andric   __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
942781ad6265SDimitry Andric   return __builtin_reduce_mul((__v16si)__W);
94280b57cec5SDimitry Andric }
94290b57cec5SDimitry Andric 
94300b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512
94310b57cec5SDimitry Andric _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
943281ad6265SDimitry Andric   __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W);
943304eeddc0SDimitry Andric   return __builtin_reduce_and((__v16si)__W);
94340b57cec5SDimitry Andric }
94350b57cec5SDimitry Andric 
94360b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512
94370b57cec5SDimitry Andric _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
94380b57cec5SDimitry Andric   __W = _mm512_maskz_mov_epi32(__M, __W);
943904eeddc0SDimitry Andric   return __builtin_reduce_or((__v16si)__W);
94400b57cec5SDimitry Andric }
94410b57cec5SDimitry Andric 
94420b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512
94430b57cec5SDimitry Andric _mm512_reduce_add_ps(__m512 __W) {
9444d409305fSDimitry Andric   return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
94450b57cec5SDimitry Andric }
94460b57cec5SDimitry Andric 
94470b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512
94480b57cec5SDimitry Andric _mm512_reduce_mul_ps(__m512 __W) {
9449e8d8bef9SDimitry Andric   return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
94500b57cec5SDimitry Andric }
94510b57cec5SDimitry Andric 
94520b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512
94530b57cec5SDimitry Andric _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
94540b57cec5SDimitry Andric   __W = _mm512_maskz_mov_ps(__M, __W);
9455d409305fSDimitry Andric   return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
94560b57cec5SDimitry Andric }
94570b57cec5SDimitry Andric 
94580b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512
94590b57cec5SDimitry Andric _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
94600b57cec5SDimitry Andric   __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9461e8d8bef9SDimitry Andric   return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
94620b57cec5SDimitry Andric }
94630b57cec5SDimitry Andric 
94640b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512
94650b57cec5SDimitry Andric _mm512_reduce_max_epi64(__m512i __V) {
946604eeddc0SDimitry Andric   return __builtin_reduce_max((__v8di)__V);
94670b57cec5SDimitry Andric }
94680b57cec5SDimitry Andric 
94690b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
94700b57cec5SDimitry Andric _mm512_reduce_max_epu64(__m512i __V) {
947104eeddc0SDimitry Andric   return __builtin_reduce_max((__v8du)__V);
94720b57cec5SDimitry Andric }
94730b57cec5SDimitry Andric 
94740b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512
94750b57cec5SDimitry Andric _mm512_reduce_min_epi64(__m512i __V) {
947604eeddc0SDimitry Andric   return __builtin_reduce_min((__v8di)__V);
94770b57cec5SDimitry Andric }
94780b57cec5SDimitry Andric 
94790b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
94800b57cec5SDimitry Andric _mm512_reduce_min_epu64(__m512i __V) {
948104eeddc0SDimitry Andric   return __builtin_reduce_min((__v8du)__V);
94820b57cec5SDimitry Andric }
94830b57cec5SDimitry Andric 
94840b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512
94850b57cec5SDimitry Andric _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
94860b57cec5SDimitry Andric   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
948704eeddc0SDimitry Andric   return __builtin_reduce_max((__v8di)__V);
94880b57cec5SDimitry Andric }
94890b57cec5SDimitry Andric 
94900b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
94910b57cec5SDimitry Andric _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
94920b57cec5SDimitry Andric   __V = _mm512_maskz_mov_epi64(__M, __V);
949304eeddc0SDimitry Andric   return __builtin_reduce_max((__v8du)__V);
94940b57cec5SDimitry Andric }
94950b57cec5SDimitry Andric 
94960b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512
94970b57cec5SDimitry Andric _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
94980b57cec5SDimitry Andric   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
949904eeddc0SDimitry Andric   return __builtin_reduce_min((__v8di)__V);
95000b57cec5SDimitry Andric }
95010b57cec5SDimitry Andric 
95020b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
95030b57cec5SDimitry Andric _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
950481ad6265SDimitry Andric   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
950504eeddc0SDimitry Andric   return __builtin_reduce_min((__v8du)__V);
95060b57cec5SDimitry Andric }
95070b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512
95080b57cec5SDimitry Andric _mm512_reduce_max_epi32(__m512i __V) {
950904eeddc0SDimitry Andric   return __builtin_reduce_max((__v16si)__V);
95100b57cec5SDimitry Andric }
95110b57cec5SDimitry Andric 
95120b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS512
95130b57cec5SDimitry Andric _mm512_reduce_max_epu32(__m512i __V) {
951404eeddc0SDimitry Andric   return __builtin_reduce_max((__v16su)__V);
95150b57cec5SDimitry Andric }
95160b57cec5SDimitry Andric 
95170b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512
95180b57cec5SDimitry Andric _mm512_reduce_min_epi32(__m512i __V) {
951904eeddc0SDimitry Andric   return __builtin_reduce_min((__v16si)__V);
95200b57cec5SDimitry Andric }
95210b57cec5SDimitry Andric 
95220b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS512
95230b57cec5SDimitry Andric _mm512_reduce_min_epu32(__m512i __V) {
952404eeddc0SDimitry Andric   return __builtin_reduce_min((__v16su)__V);
95250b57cec5SDimitry Andric }
95260b57cec5SDimitry Andric 
95270b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512
95280b57cec5SDimitry Andric _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
95290b57cec5SDimitry Andric   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
953004eeddc0SDimitry Andric   return __builtin_reduce_max((__v16si)__V);
95310b57cec5SDimitry Andric }
95320b57cec5SDimitry Andric 
95330b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS512
95340b57cec5SDimitry Andric _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
95350b57cec5SDimitry Andric   __V = _mm512_maskz_mov_epi32(__M, __V);
953604eeddc0SDimitry Andric   return __builtin_reduce_max((__v16su)__V);
95370b57cec5SDimitry Andric }
95380b57cec5SDimitry Andric 
95390b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512
95400b57cec5SDimitry Andric _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
95410b57cec5SDimitry Andric   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
954204eeddc0SDimitry Andric   return __builtin_reduce_min((__v16si)__V);
95430b57cec5SDimitry Andric }
95440b57cec5SDimitry Andric 
95450b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS512
95460b57cec5SDimitry Andric _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
954781ad6265SDimitry Andric   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V);
954804eeddc0SDimitry Andric   return __builtin_reduce_min((__v16su)__V);
95490b57cec5SDimitry Andric }
95500b57cec5SDimitry Andric 
95510b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512
95520b57cec5SDimitry Andric _mm512_reduce_max_pd(__m512d __V) {
9553fe6060f1SDimitry Andric   return __builtin_ia32_reduce_fmax_pd512(__V);
95540b57cec5SDimitry Andric }
95550b57cec5SDimitry Andric 
95560b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512
95570b57cec5SDimitry Andric _mm512_reduce_min_pd(__m512d __V) {
9558fe6060f1SDimitry Andric   return __builtin_ia32_reduce_fmin_pd512(__V);
95590b57cec5SDimitry Andric }
95600b57cec5SDimitry Andric 
95610b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512
95620b57cec5SDimitry Andric _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
95630b57cec5SDimitry Andric   __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9564fe6060f1SDimitry Andric   return __builtin_ia32_reduce_fmax_pd512(__V);
95650b57cec5SDimitry Andric }
95660b57cec5SDimitry Andric 
95670b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512
95680b57cec5SDimitry Andric _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
95690b57cec5SDimitry Andric   __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9570fe6060f1SDimitry Andric   return __builtin_ia32_reduce_fmin_pd512(__V);
95710b57cec5SDimitry Andric }
95720b57cec5SDimitry Andric 
95730b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512
95740b57cec5SDimitry Andric _mm512_reduce_max_ps(__m512 __V) {
9575fe6060f1SDimitry Andric   return __builtin_ia32_reduce_fmax_ps512(__V);
95760b57cec5SDimitry Andric }
95770b57cec5SDimitry Andric 
95780b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512
95790b57cec5SDimitry Andric _mm512_reduce_min_ps(__m512 __V) {
9580fe6060f1SDimitry Andric   return __builtin_ia32_reduce_fmin_ps512(__V);
95810b57cec5SDimitry Andric }
95820b57cec5SDimitry Andric 
95830b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512
95840b57cec5SDimitry Andric _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
95850b57cec5SDimitry Andric   __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9586fe6060f1SDimitry Andric   return __builtin_ia32_reduce_fmax_ps512(__V);
95870b57cec5SDimitry Andric }
95880b57cec5SDimitry Andric 
95890b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512
95900b57cec5SDimitry Andric _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
95910b57cec5SDimitry Andric   __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9592fe6060f1SDimitry Andric   return __builtin_ia32_reduce_fmin_ps512(__V);
95930b57cec5SDimitry Andric }
95940b57cec5SDimitry Andric 
9595a7dea167SDimitry Andric /// Moves the least significant 32 bits of a vector of [16 x i32] to a
9596a7dea167SDimitry Andric ///    32-bit signed integer value.
9597a7dea167SDimitry Andric ///
9598a7dea167SDimitry Andric /// \headerfile <x86intrin.h>
9599a7dea167SDimitry Andric ///
9600a7dea167SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9601a7dea167SDimitry Andric ///
9602a7dea167SDimitry Andric /// \param __A
9603a7dea167SDimitry Andric ///    A vector of [16 x i32]. The least significant 32 bits are moved to the
9604a7dea167SDimitry Andric ///    destination.
9605a7dea167SDimitry Andric /// \returns A 32-bit signed integer containing the moved value.
9606a7dea167SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512
9607a7dea167SDimitry Andric _mm512_cvtsi512_si32(__m512i __A) {
9608a7dea167SDimitry Andric   __v16si __b = (__v16si)__A;
9609a7dea167SDimitry Andric   return __b[0];
9610a7dea167SDimitry Andric }
9611a7dea167SDimitry Andric 
9612fe6060f1SDimitry Andric /// Loads 8 double-precision (64-bit) floating-point elements stored at memory
9613fe6060f1SDimitry Andric /// locations starting at location \a base_addr at packed 32-bit integer indices
9614fe6060f1SDimitry Andric /// stored in the lower half of \a vindex scaled by \a scale them in dst.
9615fe6060f1SDimitry Andric ///
9616fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9617fe6060f1SDimitry Andric ///
961881ad6265SDimitry Andric /// \code{.operation}
9619fe6060f1SDimitry Andric /// FOR j := 0 to 7
9620fe6060f1SDimitry Andric ///   i := j*64
9621fe6060f1SDimitry Andric ///   m := j*32
9622fe6060f1SDimitry Andric ///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9623fe6060f1SDimitry Andric ///   dst[i+63:i] := MEM[addr+63:addr]
9624fe6060f1SDimitry Andric /// ENDFOR
9625fe6060f1SDimitry Andric /// dst[MAX:512] := 0
962681ad6265SDimitry Andric /// \endcode
9627fe6060f1SDimitry Andric #define _mm512_i32logather_pd(vindex, base_addr, scale)                        \
9628fe6060f1SDimitry Andric   _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9629fe6060f1SDimitry Andric 
9630fe6060f1SDimitry Andric /// Loads 8 double-precision (64-bit) floating-point elements from memory
9631fe6060f1SDimitry Andric /// starting at location \a base_addr at packed 32-bit integer indices stored in
9632fe6060f1SDimitry Andric /// the lower half of \a vindex scaled by \a scale into dst using writemask
9633fe6060f1SDimitry Andric /// \a mask (elements are copied from \a src when the corresponding mask bit is
9634fe6060f1SDimitry Andric /// not set).
9635fe6060f1SDimitry Andric ///
9636fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9637fe6060f1SDimitry Andric ///
963881ad6265SDimitry Andric /// \code{.operation}
9639fe6060f1SDimitry Andric /// FOR j := 0 to 7
9640fe6060f1SDimitry Andric ///   i := j*64
9641fe6060f1SDimitry Andric ///   m := j*32
9642fe6060f1SDimitry Andric ///   IF mask[j]
9643fe6060f1SDimitry Andric ///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9644fe6060f1SDimitry Andric ///     dst[i+63:i] := MEM[addr+63:addr]
9645fe6060f1SDimitry Andric ///   ELSE
9646fe6060f1SDimitry Andric ///     dst[i+63:i] := src[i+63:i]
9647fe6060f1SDimitry Andric ///   FI
9648fe6060f1SDimitry Andric /// ENDFOR
9649fe6060f1SDimitry Andric /// dst[MAX:512] := 0
965081ad6265SDimitry Andric /// \endcode
9651fe6060f1SDimitry Andric #define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale)        \
9652fe6060f1SDimitry Andric   _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex),      \
9653fe6060f1SDimitry Andric                            (base_addr), (scale))
9654fe6060f1SDimitry Andric 
9655fe6060f1SDimitry Andric /// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9656fe6060f1SDimitry Andric /// at packed 32-bit integer indices stored in the lower half of \a vindex
9657fe6060f1SDimitry Andric /// scaled by \a scale and stores them in dst.
9658fe6060f1SDimitry Andric ///
9659fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9660fe6060f1SDimitry Andric ///
966181ad6265SDimitry Andric /// \code{.operation}
9662fe6060f1SDimitry Andric /// FOR j := 0 to 7
9663fe6060f1SDimitry Andric ///   i := j*64
9664fe6060f1SDimitry Andric ///   m := j*32
9665fe6060f1SDimitry Andric ///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9666fe6060f1SDimitry Andric ///   dst[i+63:i] := MEM[addr+63:addr]
9667fe6060f1SDimitry Andric /// ENDFOR
9668fe6060f1SDimitry Andric /// dst[MAX:512] := 0
966981ad6265SDimitry Andric /// \endcode
9670fe6060f1SDimitry Andric #define _mm512_i32logather_epi64(vindex, base_addr, scale)                     \
9671fe6060f1SDimitry Andric   _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9672fe6060f1SDimitry Andric 
9673fe6060f1SDimitry Andric /// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9674fe6060f1SDimitry Andric /// at packed 32-bit integer indices stored in the lower half of \a vindex
9675fe6060f1SDimitry Andric /// scaled by \a scale and stores them in dst using writemask \a mask (elements
9676fe6060f1SDimitry Andric /// are copied from \a src when the corresponding mask bit is not set).
9677fe6060f1SDimitry Andric ///
9678fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9679fe6060f1SDimitry Andric ///
968081ad6265SDimitry Andric /// \code{.operation}
9681fe6060f1SDimitry Andric /// FOR j := 0 to 7
9682fe6060f1SDimitry Andric ///   i := j*64
9683fe6060f1SDimitry Andric ///   m := j*32
9684fe6060f1SDimitry Andric ///   IF mask[j]
9685fe6060f1SDimitry Andric ///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9686fe6060f1SDimitry Andric ///     dst[i+63:i] := MEM[addr+63:addr]
9687fe6060f1SDimitry Andric ///   ELSE
9688fe6060f1SDimitry Andric ///     dst[i+63:i] := src[i+63:i]
9689fe6060f1SDimitry Andric ///   FI
9690fe6060f1SDimitry Andric /// ENDFOR
9691fe6060f1SDimitry Andric /// dst[MAX:512] := 0
969281ad6265SDimitry Andric /// \endcode
9693fe6060f1SDimitry Andric #define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale)     \
9694fe6060f1SDimitry Andric   _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex),   \
9695fe6060f1SDimitry Andric                               (base_addr), (scale))
9696fe6060f1SDimitry Andric 
9697fe6060f1SDimitry Andric /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9698fe6060f1SDimitry Andric /// and to memory locations starting at location \a base_addr at packed 32-bit
9699fe6060f1SDimitry Andric /// integer indices stored in \a vindex scaled by \a scale.
9700fe6060f1SDimitry Andric ///
9701fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9702fe6060f1SDimitry Andric ///
970381ad6265SDimitry Andric /// \code{.operation}
9704fe6060f1SDimitry Andric /// FOR j := 0 to 7
9705fe6060f1SDimitry Andric ///   i := j*64
9706fe6060f1SDimitry Andric ///   m := j*32
9707fe6060f1SDimitry Andric ///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9708fe6060f1SDimitry Andric ///   MEM[addr+63:addr] := v1[i+63:i]
9709fe6060f1SDimitry Andric /// ENDFOR
971081ad6265SDimitry Andric /// \endcode
9711fe6060f1SDimitry Andric #define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale)                   \
9712fe6060f1SDimitry Andric   _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9713fe6060f1SDimitry Andric 
9714fe6060f1SDimitry Andric /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9715fe6060f1SDimitry Andric /// to memory locations starting at location \a base_addr at packed 32-bit
9716fe6060f1SDimitry Andric /// integer indices stored in \a vindex scaled by \a scale. Only those elements
9717fe6060f1SDimitry Andric /// whose corresponding mask bit is set in writemask \a mask are written to
9718fe6060f1SDimitry Andric /// memory.
9719fe6060f1SDimitry Andric ///
9720fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9721fe6060f1SDimitry Andric ///
972281ad6265SDimitry Andric /// \code{.operation}
9723fe6060f1SDimitry Andric /// FOR j := 0 to 7
9724fe6060f1SDimitry Andric ///   i := j*64
9725fe6060f1SDimitry Andric ///   m := j*32
9726fe6060f1SDimitry Andric ///   IF mask[j]
9727fe6060f1SDimitry Andric ///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9728fe6060f1SDimitry Andric ///     MEM[addr+63:addr] := a[i+63:i]
9729fe6060f1SDimitry Andric ///   FI
9730fe6060f1SDimitry Andric /// ENDFOR
973181ad6265SDimitry Andric /// \endcode
9732fe6060f1SDimitry Andric #define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale)        \
9733fe6060f1SDimitry Andric   _mm512_mask_i32scatter_pd((base_addr), (mask),                               \
9734fe6060f1SDimitry Andric                             _mm512_castsi512_si256(vindex), (v1), (scale))
9735fe6060f1SDimitry Andric 
9736fe6060f1SDimitry Andric /// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
9737fe6060f1SDimitry Andric /// memory locations starting at location \a base_addr at packed 32-bit integer
9738fe6060f1SDimitry Andric /// indices stored in \a vindex scaled by \a scale.
9739fe6060f1SDimitry Andric ///
9740fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9741fe6060f1SDimitry Andric ///
974281ad6265SDimitry Andric /// \code{.operation}
9743fe6060f1SDimitry Andric /// FOR j := 0 to 7
9744fe6060f1SDimitry Andric ///   i := j*64
9745fe6060f1SDimitry Andric ///   m := j*32
9746fe6060f1SDimitry Andric ///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9747fe6060f1SDimitry Andric ///   MEM[addr+63:addr] := a[i+63:i]
9748fe6060f1SDimitry Andric /// ENDFOR
974981ad6265SDimitry Andric /// \endcode
9750fe6060f1SDimitry Andric #define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale)                \
9751fe6060f1SDimitry Andric   _mm512_i32scatter_epi64((base_addr),                                         \
9752fe6060f1SDimitry Andric                           _mm512_castsi512_si256(vindex), (v1), (scale))
9753fe6060f1SDimitry Andric 
9754fe6060f1SDimitry Andric /// Stores 8 packed 64-bit integer elements located in a and stores them in
9755fe6060f1SDimitry Andric /// memory locations starting at location \a base_addr at packed 32-bit integer
9756fe6060f1SDimitry Andric /// indices stored in \a vindex scaled by scale using writemask \a mask (elements
9757fe6060f1SDimitry Andric /// whose corresponding mask bit is not set are not written to memory).
9758fe6060f1SDimitry Andric ///
9759fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9760fe6060f1SDimitry Andric ///
976181ad6265SDimitry Andric /// \code{.operation}
9762fe6060f1SDimitry Andric /// FOR j := 0 to 7
9763fe6060f1SDimitry Andric ///   i := j*64
9764fe6060f1SDimitry Andric ///   m := j*32
9765fe6060f1SDimitry Andric ///   IF mask[j]
9766fe6060f1SDimitry Andric ///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9767fe6060f1SDimitry Andric ///     MEM[addr+63:addr] := a[i+63:i]
9768fe6060f1SDimitry Andric ///   FI
9769fe6060f1SDimitry Andric /// ENDFOR
977081ad6265SDimitry Andric /// \endcode
9771fe6060f1SDimitry Andric #define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale)     \
9772fe6060f1SDimitry Andric   _mm512_mask_i32scatter_epi64((base_addr), (mask),                            \
9773fe6060f1SDimitry Andric                                _mm512_castsi512_si256(vindex), (v1), (scale))
9774fe6060f1SDimitry Andric 
97750b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS512
97760b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128
97770b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS
97780b57cec5SDimitry Andric 
97790b57cec5SDimitry Andric #endif /* __AVX512FINTRIN_H */
9780