xref: /freebsd/contrib/llvm-project/clang/lib/Headers/emmintrin.h (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2*0b57cec5SDimitry Andric  *
3*0b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric  *
7*0b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
8*0b57cec5SDimitry Andric  */
9*0b57cec5SDimitry Andric 
10*0b57cec5SDimitry Andric #ifndef __EMMINTRIN_H
11*0b57cec5SDimitry Andric #define __EMMINTRIN_H
12*0b57cec5SDimitry Andric 
13*0b57cec5SDimitry Andric #include <xmmintrin.h>
14*0b57cec5SDimitry Andric 
15*0b57cec5SDimitry Andric typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
16*0b57cec5SDimitry Andric typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
17*0b57cec5SDimitry Andric 
18*0b57cec5SDimitry Andric typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1)));
19*0b57cec5SDimitry Andric typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1)));
20*0b57cec5SDimitry Andric 
21*0b57cec5SDimitry Andric /* Type defines.  */
22*0b57cec5SDimitry Andric typedef double __v2df __attribute__ ((__vector_size__ (16)));
23*0b57cec5SDimitry Andric typedef long long __v2di __attribute__ ((__vector_size__ (16)));
24*0b57cec5SDimitry Andric typedef short __v8hi __attribute__((__vector_size__(16)));
25*0b57cec5SDimitry Andric typedef char __v16qi __attribute__((__vector_size__(16)));
26*0b57cec5SDimitry Andric 
27*0b57cec5SDimitry Andric /* Unsigned types */
28*0b57cec5SDimitry Andric typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
29*0b57cec5SDimitry Andric typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
30*0b57cec5SDimitry Andric typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
31*0b57cec5SDimitry Andric 
32*0b57cec5SDimitry Andric /* We need an explicitly signed variant for char. Note that this shouldn't
33*0b57cec5SDimitry Andric  * appear in the interface though. */
34*0b57cec5SDimitry Andric typedef signed char __v16qs __attribute__((__vector_size__(16)));
35*0b57cec5SDimitry Andric 
36*0b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */
37*0b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"), __min_vector_width__(128)))
38*0b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), __min_vector_width__(64)))
39*0b57cec5SDimitry Andric 
40*0b57cec5SDimitry Andric /// Adds lower double-precision values in both operands and returns the
41*0b57cec5SDimitry Andric ///    sum in the lower 64 bits of the result. The upper 64 bits of the result
42*0b57cec5SDimitry Andric ///    are copied from the upper double-precision value of the first operand.
43*0b57cec5SDimitry Andric ///
44*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
45*0b57cec5SDimitry Andric ///
46*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction.
47*0b57cec5SDimitry Andric ///
48*0b57cec5SDimitry Andric /// \param __a
49*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
50*0b57cec5SDimitry Andric /// \param __b
51*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
52*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
53*0b57cec5SDimitry Andric ///    sum of the lower 64 bits of both operands. The upper 64 bits are copied
54*0b57cec5SDimitry Andric ///    from the upper 64 bits of the first source operand.
55*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
56*0b57cec5SDimitry Andric _mm_add_sd(__m128d __a, __m128d __b)
57*0b57cec5SDimitry Andric {
58*0b57cec5SDimitry Andric   __a[0] += __b[0];
59*0b57cec5SDimitry Andric   return __a;
60*0b57cec5SDimitry Andric }
61*0b57cec5SDimitry Andric 
62*0b57cec5SDimitry Andric /// Adds two 128-bit vectors of [2 x double].
63*0b57cec5SDimitry Andric ///
64*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
65*0b57cec5SDimitry Andric ///
66*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction.
67*0b57cec5SDimitry Andric ///
68*0b57cec5SDimitry Andric /// \param __a
69*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
70*0b57cec5SDimitry Andric /// \param __b
71*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
72*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the sums of both
73*0b57cec5SDimitry Andric ///    operands.
74*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
75*0b57cec5SDimitry Andric _mm_add_pd(__m128d __a, __m128d __b)
76*0b57cec5SDimitry Andric {
77*0b57cec5SDimitry Andric   return (__m128d)((__v2df)__a + (__v2df)__b);
78*0b57cec5SDimitry Andric }
79*0b57cec5SDimitry Andric 
80*0b57cec5SDimitry Andric /// Subtracts the lower double-precision value of the second operand
81*0b57cec5SDimitry Andric ///    from the lower double-precision value of the first operand and returns
82*0b57cec5SDimitry Andric ///    the difference in the lower 64 bits of the result. The upper 64 bits of
83*0b57cec5SDimitry Andric ///    the result are copied from the upper double-precision value of the first
84*0b57cec5SDimitry Andric ///    operand.
85*0b57cec5SDimitry Andric ///
86*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
87*0b57cec5SDimitry Andric ///
88*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction.
89*0b57cec5SDimitry Andric ///
90*0b57cec5SDimitry Andric /// \param __a
91*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the minuend.
92*0b57cec5SDimitry Andric /// \param __b
93*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the subtrahend.
94*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
95*0b57cec5SDimitry Andric ///    difference of the lower 64 bits of both operands. The upper 64 bits are
96*0b57cec5SDimitry Andric ///    copied from the upper 64 bits of the first source operand.
97*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
98*0b57cec5SDimitry Andric _mm_sub_sd(__m128d __a, __m128d __b)
99*0b57cec5SDimitry Andric {
100*0b57cec5SDimitry Andric   __a[0] -= __b[0];
101*0b57cec5SDimitry Andric   return __a;
102*0b57cec5SDimitry Andric }
103*0b57cec5SDimitry Andric 
104*0b57cec5SDimitry Andric /// Subtracts two 128-bit vectors of [2 x double].
105*0b57cec5SDimitry Andric ///
106*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
107*0b57cec5SDimitry Andric ///
108*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction.
109*0b57cec5SDimitry Andric ///
110*0b57cec5SDimitry Andric /// \param __a
111*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the minuend.
112*0b57cec5SDimitry Andric /// \param __b
113*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the subtrahend.
114*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the differences between
115*0b57cec5SDimitry Andric ///    both operands.
116*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
117*0b57cec5SDimitry Andric _mm_sub_pd(__m128d __a, __m128d __b)
118*0b57cec5SDimitry Andric {
119*0b57cec5SDimitry Andric   return (__m128d)((__v2df)__a - (__v2df)__b);
120*0b57cec5SDimitry Andric }
121*0b57cec5SDimitry Andric 
122*0b57cec5SDimitry Andric /// Multiplies lower double-precision values in both operands and returns
123*0b57cec5SDimitry Andric ///    the product in the lower 64 bits of the result. The upper 64 bits of the
124*0b57cec5SDimitry Andric ///    result are copied from the upper double-precision value of the first
125*0b57cec5SDimitry Andric ///    operand.
126*0b57cec5SDimitry Andric ///
127*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
128*0b57cec5SDimitry Andric ///
129*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction.
130*0b57cec5SDimitry Andric ///
131*0b57cec5SDimitry Andric /// \param __a
132*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
133*0b57cec5SDimitry Andric /// \param __b
134*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
135*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
136*0b57cec5SDimitry Andric ///    product of the lower 64 bits of both operands. The upper 64 bits are
137*0b57cec5SDimitry Andric ///    copied from the upper 64 bits of the first source operand.
138*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
139*0b57cec5SDimitry Andric _mm_mul_sd(__m128d __a, __m128d __b)
140*0b57cec5SDimitry Andric {
141*0b57cec5SDimitry Andric   __a[0] *= __b[0];
142*0b57cec5SDimitry Andric   return __a;
143*0b57cec5SDimitry Andric }
144*0b57cec5SDimitry Andric 
145*0b57cec5SDimitry Andric /// Multiplies two 128-bit vectors of [2 x double].
146*0b57cec5SDimitry Andric ///
147*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
148*0b57cec5SDimitry Andric ///
149*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction.
150*0b57cec5SDimitry Andric ///
151*0b57cec5SDimitry Andric /// \param __a
152*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the operands.
153*0b57cec5SDimitry Andric /// \param __b
154*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the operands.
155*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the products of both
156*0b57cec5SDimitry Andric ///    operands.
157*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
158*0b57cec5SDimitry Andric _mm_mul_pd(__m128d __a, __m128d __b)
159*0b57cec5SDimitry Andric {
160*0b57cec5SDimitry Andric   return (__m128d)((__v2df)__a * (__v2df)__b);
161*0b57cec5SDimitry Andric }
162*0b57cec5SDimitry Andric 
163*0b57cec5SDimitry Andric /// Divides the lower double-precision value of the first operand by the
164*0b57cec5SDimitry Andric ///    lower double-precision value of the second operand and returns the
165*0b57cec5SDimitry Andric ///    quotient in the lower 64 bits of the result. The upper 64 bits of the
166*0b57cec5SDimitry Andric ///    result are copied from the upper double-precision value of the first
167*0b57cec5SDimitry Andric ///    operand.
168*0b57cec5SDimitry Andric ///
169*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
170*0b57cec5SDimitry Andric ///
171*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction.
172*0b57cec5SDimitry Andric ///
173*0b57cec5SDimitry Andric /// \param __a
174*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the dividend.
175*0b57cec5SDimitry Andric /// \param __b
176*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing divisor.
177*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
178*0b57cec5SDimitry Andric ///    quotient of the lower 64 bits of both operands. The upper 64 bits are
179*0b57cec5SDimitry Andric ///    copied from the upper 64 bits of the first source operand.
180*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
181*0b57cec5SDimitry Andric _mm_div_sd(__m128d __a, __m128d __b)
182*0b57cec5SDimitry Andric {
183*0b57cec5SDimitry Andric   __a[0] /= __b[0];
184*0b57cec5SDimitry Andric   return __a;
185*0b57cec5SDimitry Andric }
186*0b57cec5SDimitry Andric 
187*0b57cec5SDimitry Andric /// Performs an element-by-element division of two 128-bit vectors of
188*0b57cec5SDimitry Andric ///    [2 x double].
189*0b57cec5SDimitry Andric ///
190*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
191*0b57cec5SDimitry Andric ///
192*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction.
193*0b57cec5SDimitry Andric ///
194*0b57cec5SDimitry Andric /// \param __a
195*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the dividend.
196*0b57cec5SDimitry Andric /// \param __b
197*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the divisor.
198*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the quotients of both
199*0b57cec5SDimitry Andric ///    operands.
200*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
201*0b57cec5SDimitry Andric _mm_div_pd(__m128d __a, __m128d __b)
202*0b57cec5SDimitry Andric {
203*0b57cec5SDimitry Andric   return (__m128d)((__v2df)__a / (__v2df)__b);
204*0b57cec5SDimitry Andric }
205*0b57cec5SDimitry Andric 
206*0b57cec5SDimitry Andric /// Calculates the square root of the lower double-precision value of
207*0b57cec5SDimitry Andric ///    the second operand and returns it in the lower 64 bits of the result.
208*0b57cec5SDimitry Andric ///    The upper 64 bits of the result are copied from the upper
209*0b57cec5SDimitry Andric ///    double-precision value of the first operand.
210*0b57cec5SDimitry Andric ///
211*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
212*0b57cec5SDimitry Andric ///
213*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction.
214*0b57cec5SDimitry Andric ///
215*0b57cec5SDimitry Andric /// \param __a
216*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the operands. The
217*0b57cec5SDimitry Andric ///    upper 64 bits of this operand are copied to the upper 64 bits of the
218*0b57cec5SDimitry Andric ///    result.
219*0b57cec5SDimitry Andric /// \param __b
220*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the operands. The
221*0b57cec5SDimitry Andric ///    square root is calculated using the lower 64 bits of this operand.
222*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
223*0b57cec5SDimitry Andric ///    square root of the lower 64 bits of operand \a __b, and whose upper 64
224*0b57cec5SDimitry Andric ///    bits are copied from the upper 64 bits of operand \a __a.
225*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
226*0b57cec5SDimitry Andric _mm_sqrt_sd(__m128d __a, __m128d __b)
227*0b57cec5SDimitry Andric {
228*0b57cec5SDimitry Andric   __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);
229*0b57cec5SDimitry Andric   return __extension__ (__m128d) { __c[0], __a[1] };
230*0b57cec5SDimitry Andric }
231*0b57cec5SDimitry Andric 
232*0b57cec5SDimitry Andric /// Calculates the square root of the each of two values stored in a
233*0b57cec5SDimitry Andric ///    128-bit vector of [2 x double].
234*0b57cec5SDimitry Andric ///
235*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
236*0b57cec5SDimitry Andric ///
237*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction.
238*0b57cec5SDimitry Andric ///
239*0b57cec5SDimitry Andric /// \param __a
240*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
241*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the square roots of the
242*0b57cec5SDimitry Andric ///    values in the operand.
243*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
244*0b57cec5SDimitry Andric _mm_sqrt_pd(__m128d __a)
245*0b57cec5SDimitry Andric {
246*0b57cec5SDimitry Andric   return __builtin_ia32_sqrtpd((__v2df)__a);
247*0b57cec5SDimitry Andric }
248*0b57cec5SDimitry Andric 
249*0b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and
250*0b57cec5SDimitry Andric ///    returns the lesser of the pair of values in the lower 64-bits of the
251*0b57cec5SDimitry Andric ///    result. The upper 64 bits of the result are copied from the upper
252*0b57cec5SDimitry Andric ///    double-precision value of the first operand.
253*0b57cec5SDimitry Andric ///
254*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
255*0b57cec5SDimitry Andric ///
256*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction.
257*0b57cec5SDimitry Andric ///
258*0b57cec5SDimitry Andric /// \param __a
259*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the operands. The
260*0b57cec5SDimitry Andric ///    lower 64 bits of this operand are used in the comparison.
261*0b57cec5SDimitry Andric /// \param __b
262*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the operands. The
263*0b57cec5SDimitry Andric ///    lower 64 bits of this operand are used in the comparison.
264*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
265*0b57cec5SDimitry Andric ///    minimum value between both operands. The upper 64 bits are copied from
266*0b57cec5SDimitry Andric ///    the upper 64 bits of the first source operand.
267*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
268*0b57cec5SDimitry Andric _mm_min_sd(__m128d __a, __m128d __b)
269*0b57cec5SDimitry Andric {
270*0b57cec5SDimitry Andric   return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);
271*0b57cec5SDimitry Andric }
272*0b57cec5SDimitry Andric 
273*0b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of
274*0b57cec5SDimitry Andric ///    [2 x double] and returns the vector containing the lesser of each pair of
275*0b57cec5SDimitry Andric ///    values.
276*0b57cec5SDimitry Andric ///
277*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
278*0b57cec5SDimitry Andric ///
279*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction.
280*0b57cec5SDimitry Andric ///
281*0b57cec5SDimitry Andric /// \param __a
282*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the operands.
283*0b57cec5SDimitry Andric /// \param __b
284*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the operands.
285*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the minimum values
286*0b57cec5SDimitry Andric ///    between both operands.
287*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
288*0b57cec5SDimitry Andric _mm_min_pd(__m128d __a, __m128d __b)
289*0b57cec5SDimitry Andric {
290*0b57cec5SDimitry Andric   return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);
291*0b57cec5SDimitry Andric }
292*0b57cec5SDimitry Andric 
293*0b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and
294*0b57cec5SDimitry Andric ///    returns the greater of the pair of values in the lower 64-bits of the
295*0b57cec5SDimitry Andric ///    result. The upper 64 bits of the result are copied from the upper
296*0b57cec5SDimitry Andric ///    double-precision value of the first operand.
297*0b57cec5SDimitry Andric ///
298*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
299*0b57cec5SDimitry Andric ///
300*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction.
301*0b57cec5SDimitry Andric ///
302*0b57cec5SDimitry Andric /// \param __a
303*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the operands. The
304*0b57cec5SDimitry Andric ///    lower 64 bits of this operand are used in the comparison.
305*0b57cec5SDimitry Andric /// \param __b
306*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the operands. The
307*0b57cec5SDimitry Andric ///    lower 64 bits of this operand are used in the comparison.
308*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
309*0b57cec5SDimitry Andric ///    maximum value between both operands. The upper 64 bits are copied from
310*0b57cec5SDimitry Andric ///    the upper 64 bits of the first source operand.
311*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
312*0b57cec5SDimitry Andric _mm_max_sd(__m128d __a, __m128d __b)
313*0b57cec5SDimitry Andric {
314*0b57cec5SDimitry Andric   return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);
315*0b57cec5SDimitry Andric }
316*0b57cec5SDimitry Andric 
317*0b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of
318*0b57cec5SDimitry Andric ///    [2 x double] and returns the vector containing the greater of each pair
319*0b57cec5SDimitry Andric ///    of values.
320*0b57cec5SDimitry Andric ///
321*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
322*0b57cec5SDimitry Andric ///
323*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction.
324*0b57cec5SDimitry Andric ///
325*0b57cec5SDimitry Andric /// \param __a
326*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the operands.
327*0b57cec5SDimitry Andric /// \param __b
328*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the operands.
329*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the maximum values
330*0b57cec5SDimitry Andric ///    between both operands.
331*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
332*0b57cec5SDimitry Andric _mm_max_pd(__m128d __a, __m128d __b)
333*0b57cec5SDimitry Andric {
334*0b57cec5SDimitry Andric   return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);
335*0b57cec5SDimitry Andric }
336*0b57cec5SDimitry Andric 
337*0b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double].
338*0b57cec5SDimitry Andric ///
339*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
340*0b57cec5SDimitry Andric ///
341*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.
342*0b57cec5SDimitry Andric ///
343*0b57cec5SDimitry Andric /// \param __a
344*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
345*0b57cec5SDimitry Andric /// \param __b
346*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
347*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the
348*0b57cec5SDimitry Andric ///    values between both operands.
349*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
350*0b57cec5SDimitry Andric _mm_and_pd(__m128d __a, __m128d __b)
351*0b57cec5SDimitry Andric {
352*0b57cec5SDimitry Andric   return (__m128d)((__v2du)__a & (__v2du)__b);
353*0b57cec5SDimitry Andric }
354*0b57cec5SDimitry Andric 
355*0b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double], using
356*0b57cec5SDimitry Andric ///    the one's complement of the values contained in the first source operand.
357*0b57cec5SDimitry Andric ///
358*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
359*0b57cec5SDimitry Andric ///
360*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.
361*0b57cec5SDimitry Andric ///
362*0b57cec5SDimitry Andric /// \param __a
363*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the left source operand. The
364*0b57cec5SDimitry Andric ///    one's complement of this value is used in the bitwise AND.
365*0b57cec5SDimitry Andric /// \param __b
366*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the right source operand.
367*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the
368*0b57cec5SDimitry Andric ///    values in the second operand and the one's complement of the first
369*0b57cec5SDimitry Andric ///    operand.
370*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
371*0b57cec5SDimitry Andric _mm_andnot_pd(__m128d __a, __m128d __b)
372*0b57cec5SDimitry Andric {
373*0b57cec5SDimitry Andric   return (__m128d)(~(__v2du)__a & (__v2du)__b);
374*0b57cec5SDimitry Andric }
375*0b57cec5SDimitry Andric 
376*0b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit vectors of [2 x double].
377*0b57cec5SDimitry Andric ///
378*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
379*0b57cec5SDimitry Andric ///
380*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.
381*0b57cec5SDimitry Andric ///
382*0b57cec5SDimitry Andric /// \param __a
383*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
384*0b57cec5SDimitry Andric /// \param __b
385*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
386*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the
387*0b57cec5SDimitry Andric ///    values between both operands.
388*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
389*0b57cec5SDimitry Andric _mm_or_pd(__m128d __a, __m128d __b)
390*0b57cec5SDimitry Andric {
391*0b57cec5SDimitry Andric   return (__m128d)((__v2du)__a | (__v2du)__b);
392*0b57cec5SDimitry Andric }
393*0b57cec5SDimitry Andric 
394*0b57cec5SDimitry Andric /// Performs a bitwise XOR of two 128-bit vectors of [2 x double].
395*0b57cec5SDimitry Andric ///
396*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
397*0b57cec5SDimitry Andric ///
398*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.
399*0b57cec5SDimitry Andric ///
400*0b57cec5SDimitry Andric /// \param __a
401*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
402*0b57cec5SDimitry Andric /// \param __b
403*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
404*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the
405*0b57cec5SDimitry Andric ///    values between both operands.
406*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
407*0b57cec5SDimitry Andric _mm_xor_pd(__m128d __a, __m128d __b)
408*0b57cec5SDimitry Andric {
409*0b57cec5SDimitry Andric   return (__m128d)((__v2du)__a ^ (__v2du)__b);
410*0b57cec5SDimitry Andric }
411*0b57cec5SDimitry Andric 
412*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the
413*0b57cec5SDimitry Andric ///    128-bit vectors of [2 x double] for equality. Each comparison yields 0x0
414*0b57cec5SDimitry Andric ///    for false, 0xFFFFFFFFFFFFFFFF for true.
415*0b57cec5SDimitry Andric ///
416*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
417*0b57cec5SDimitry Andric ///
418*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction.
419*0b57cec5SDimitry Andric ///
420*0b57cec5SDimitry Andric /// \param __a
421*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
422*0b57cec5SDimitry Andric /// \param __b
423*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
424*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results.
425*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
426*0b57cec5SDimitry Andric _mm_cmpeq_pd(__m128d __a, __m128d __b)
427*0b57cec5SDimitry Andric {
428*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);
429*0b57cec5SDimitry Andric }
430*0b57cec5SDimitry Andric 
431*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the
432*0b57cec5SDimitry Andric ///    128-bit vectors of [2 x double] to determine if the values in the first
433*0b57cec5SDimitry Andric ///    operand are less than those in the second operand. Each comparison
434*0b57cec5SDimitry Andric ///    yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
435*0b57cec5SDimitry Andric ///
436*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
437*0b57cec5SDimitry Andric ///
438*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.
439*0b57cec5SDimitry Andric ///
440*0b57cec5SDimitry Andric /// \param __a
441*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
442*0b57cec5SDimitry Andric /// \param __b
443*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
444*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results.
445*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
446*0b57cec5SDimitry Andric _mm_cmplt_pd(__m128d __a, __m128d __b)
447*0b57cec5SDimitry Andric {
448*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);
449*0b57cec5SDimitry Andric }
450*0b57cec5SDimitry Andric 
451*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the
452*0b57cec5SDimitry Andric ///    128-bit vectors of [2 x double] to determine if the values in the first
453*0b57cec5SDimitry Andric ///    operand are less than or equal to those in the second operand.
454*0b57cec5SDimitry Andric ///
455*0b57cec5SDimitry Andric ///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
456*0b57cec5SDimitry Andric ///
457*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
458*0b57cec5SDimitry Andric ///
459*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.
460*0b57cec5SDimitry Andric ///
461*0b57cec5SDimitry Andric /// \param __a
462*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
463*0b57cec5SDimitry Andric /// \param __b
464*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
465*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results.
466*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
467*0b57cec5SDimitry Andric _mm_cmple_pd(__m128d __a, __m128d __b)
468*0b57cec5SDimitry Andric {
469*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);
470*0b57cec5SDimitry Andric }
471*0b57cec5SDimitry Andric 
472*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the
473*0b57cec5SDimitry Andric ///    128-bit vectors of [2 x double] to determine if the values in the first
474*0b57cec5SDimitry Andric ///    operand are greater than those in the second operand.
475*0b57cec5SDimitry Andric ///
476*0b57cec5SDimitry Andric ///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
477*0b57cec5SDimitry Andric ///
478*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
479*0b57cec5SDimitry Andric ///
480*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.
481*0b57cec5SDimitry Andric ///
482*0b57cec5SDimitry Andric /// \param __a
483*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
484*0b57cec5SDimitry Andric /// \param __b
485*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
486*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results.
487*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
488*0b57cec5SDimitry Andric _mm_cmpgt_pd(__m128d __a, __m128d __b)
489*0b57cec5SDimitry Andric {
490*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);
491*0b57cec5SDimitry Andric }
492*0b57cec5SDimitry Andric 
493*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the
494*0b57cec5SDimitry Andric ///    128-bit vectors of [2 x double] to determine if the values in the first
495*0b57cec5SDimitry Andric ///    operand are greater than or equal to those in the second operand.
496*0b57cec5SDimitry Andric ///
497*0b57cec5SDimitry Andric ///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
498*0b57cec5SDimitry Andric ///
499*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
500*0b57cec5SDimitry Andric ///
501*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.
502*0b57cec5SDimitry Andric ///
503*0b57cec5SDimitry Andric /// \param __a
504*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
505*0b57cec5SDimitry Andric /// \param __b
506*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
507*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results.
508*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
509*0b57cec5SDimitry Andric _mm_cmpge_pd(__m128d __a, __m128d __b)
510*0b57cec5SDimitry Andric {
511*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);
512*0b57cec5SDimitry Andric }
513*0b57cec5SDimitry Andric 
514*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the
515*0b57cec5SDimitry Andric ///    128-bit vectors of [2 x double] to determine if the values in the first
516*0b57cec5SDimitry Andric ///    operand are ordered with respect to those in the second operand.
517*0b57cec5SDimitry Andric ///
518*0b57cec5SDimitry Andric ///    A pair of double-precision values are "ordered" with respect to each
519*0b57cec5SDimitry Andric ///    other if neither value is a NaN. Each comparison yields 0x0 for false,
520*0b57cec5SDimitry Andric ///    0xFFFFFFFFFFFFFFFF for true.
521*0b57cec5SDimitry Andric ///
522*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
523*0b57cec5SDimitry Andric ///
524*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction.
525*0b57cec5SDimitry Andric ///
526*0b57cec5SDimitry Andric /// \param __a
527*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
528*0b57cec5SDimitry Andric /// \param __b
529*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
530*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results.
531*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
532*0b57cec5SDimitry Andric _mm_cmpord_pd(__m128d __a, __m128d __b)
533*0b57cec5SDimitry Andric {
534*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);
535*0b57cec5SDimitry Andric }
536*0b57cec5SDimitry Andric 
537*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the
538*0b57cec5SDimitry Andric ///    128-bit vectors of [2 x double] to determine if the values in the first
539*0b57cec5SDimitry Andric ///    operand are unordered with respect to those in the second operand.
540*0b57cec5SDimitry Andric ///
541*0b57cec5SDimitry Andric ///    A pair of double-precision values are "unordered" with respect to each
542*0b57cec5SDimitry Andric ///    other if one or both values are NaN. Each comparison yields 0x0 for
543*0b57cec5SDimitry Andric ///    false, 0xFFFFFFFFFFFFFFFF for true.
544*0b57cec5SDimitry Andric ///
545*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
546*0b57cec5SDimitry Andric ///
547*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c>
548*0b57cec5SDimitry Andric ///   instruction.
549*0b57cec5SDimitry Andric ///
550*0b57cec5SDimitry Andric /// \param __a
551*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
552*0b57cec5SDimitry Andric /// \param __b
553*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
554*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results.
555*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
556*0b57cec5SDimitry Andric _mm_cmpunord_pd(__m128d __a, __m128d __b)
557*0b57cec5SDimitry Andric {
558*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);
559*0b57cec5SDimitry Andric }
560*0b57cec5SDimitry Andric 
561*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the
562*0b57cec5SDimitry Andric ///    128-bit vectors of [2 x double] to determine if the values in the first
563*0b57cec5SDimitry Andric ///    operand are unequal to those in the second operand.
564*0b57cec5SDimitry Andric ///
565*0b57cec5SDimitry Andric ///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
566*0b57cec5SDimitry Andric ///
567*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
568*0b57cec5SDimitry Andric ///
569*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction.
570*0b57cec5SDimitry Andric ///
571*0b57cec5SDimitry Andric /// \param __a
572*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
573*0b57cec5SDimitry Andric /// \param __b
574*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
575*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results.
576*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
577*0b57cec5SDimitry Andric _mm_cmpneq_pd(__m128d __a, __m128d __b)
578*0b57cec5SDimitry Andric {
579*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);
580*0b57cec5SDimitry Andric }
581*0b57cec5SDimitry Andric 
582*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the
583*0b57cec5SDimitry Andric ///    128-bit vectors of [2 x double] to determine if the values in the first
584*0b57cec5SDimitry Andric ///    operand are not less than those in the second operand.
585*0b57cec5SDimitry Andric ///
586*0b57cec5SDimitry Andric ///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
587*0b57cec5SDimitry Andric ///
588*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
589*0b57cec5SDimitry Andric ///
590*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.
591*0b57cec5SDimitry Andric ///
592*0b57cec5SDimitry Andric /// \param __a
593*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
594*0b57cec5SDimitry Andric /// \param __b
595*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
596*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results.
597*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
598*0b57cec5SDimitry Andric _mm_cmpnlt_pd(__m128d __a, __m128d __b)
599*0b57cec5SDimitry Andric {
600*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);
601*0b57cec5SDimitry Andric }
602*0b57cec5SDimitry Andric 
603*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the
604*0b57cec5SDimitry Andric ///    128-bit vectors of [2 x double] to determine if the values in the first
605*0b57cec5SDimitry Andric ///    operand are not less than or equal to those in the second operand.
606*0b57cec5SDimitry Andric ///
607*0b57cec5SDimitry Andric ///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
608*0b57cec5SDimitry Andric ///
609*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
610*0b57cec5SDimitry Andric ///
611*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.
612*0b57cec5SDimitry Andric ///
613*0b57cec5SDimitry Andric /// \param __a
614*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
615*0b57cec5SDimitry Andric /// \param __b
616*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
617*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results.
618*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
619*0b57cec5SDimitry Andric _mm_cmpnle_pd(__m128d __a, __m128d __b)
620*0b57cec5SDimitry Andric {
621*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);
622*0b57cec5SDimitry Andric }
623*0b57cec5SDimitry Andric 
624*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the
625*0b57cec5SDimitry Andric ///    128-bit vectors of [2 x double] to determine if the values in the first
626*0b57cec5SDimitry Andric ///    operand are not greater than those in the second operand.
627*0b57cec5SDimitry Andric ///
628*0b57cec5SDimitry Andric ///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
629*0b57cec5SDimitry Andric ///
630*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
631*0b57cec5SDimitry Andric ///
632*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.
633*0b57cec5SDimitry Andric ///
634*0b57cec5SDimitry Andric /// \param __a
635*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
636*0b57cec5SDimitry Andric /// \param __b
637*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
638*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results.
639*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
640*0b57cec5SDimitry Andric _mm_cmpngt_pd(__m128d __a, __m128d __b)
641*0b57cec5SDimitry Andric {
642*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);
643*0b57cec5SDimitry Andric }
644*0b57cec5SDimitry Andric 
645*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the
646*0b57cec5SDimitry Andric ///    128-bit vectors of [2 x double] to determine if the values in the first
647*0b57cec5SDimitry Andric ///    operand are not greater than or equal to those in the second operand.
648*0b57cec5SDimitry Andric ///
649*0b57cec5SDimitry Andric ///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
650*0b57cec5SDimitry Andric ///
651*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
652*0b57cec5SDimitry Andric ///
653*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.
654*0b57cec5SDimitry Andric ///
655*0b57cec5SDimitry Andric /// \param __a
656*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
657*0b57cec5SDimitry Andric /// \param __b
658*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
659*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results.
660*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
661*0b57cec5SDimitry Andric _mm_cmpnge_pd(__m128d __a, __m128d __b)
662*0b57cec5SDimitry Andric {
663*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);
664*0b57cec5SDimitry Andric }
665*0b57cec5SDimitry Andric 
666*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
667*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] for equality.
668*0b57cec5SDimitry Andric ///
669*0b57cec5SDimitry Andric ///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
670*0b57cec5SDimitry Andric ///
671*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
672*0b57cec5SDimitry Andric ///
673*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction.
674*0b57cec5SDimitry Andric ///
675*0b57cec5SDimitry Andric /// \param __a
676*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
677*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
678*0b57cec5SDimitry Andric /// \param __b
679*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
680*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
681*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison
682*0b57cec5SDimitry Andric ///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
683*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
684*0b57cec5SDimitry Andric _mm_cmpeq_sd(__m128d __a, __m128d __b)
685*0b57cec5SDimitry Andric {
686*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);
687*0b57cec5SDimitry Andric }
688*0b57cec5SDimitry Andric 
689*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
690*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
691*0b57cec5SDimitry Andric ///    the value in the first parameter is less than the corresponding value in
692*0b57cec5SDimitry Andric ///    the second parameter.
693*0b57cec5SDimitry Andric ///
694*0b57cec5SDimitry Andric ///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
695*0b57cec5SDimitry Andric ///
696*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
697*0b57cec5SDimitry Andric ///
698*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.
699*0b57cec5SDimitry Andric ///
700*0b57cec5SDimitry Andric /// \param __a
701*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
702*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
703*0b57cec5SDimitry Andric /// \param __b
704*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
705*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
706*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison
707*0b57cec5SDimitry Andric ///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
708*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
709*0b57cec5SDimitry Andric _mm_cmplt_sd(__m128d __a, __m128d __b)
710*0b57cec5SDimitry Andric {
711*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);
712*0b57cec5SDimitry Andric }
713*0b57cec5SDimitry Andric 
714*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
715*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
716*0b57cec5SDimitry Andric ///    the value in the first parameter is less than or equal to the
717*0b57cec5SDimitry Andric ///    corresponding value in the second parameter.
718*0b57cec5SDimitry Andric ///
719*0b57cec5SDimitry Andric ///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
720*0b57cec5SDimitry Andric ///
721*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
722*0b57cec5SDimitry Andric ///
723*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.
724*0b57cec5SDimitry Andric ///
725*0b57cec5SDimitry Andric /// \param __a
726*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
727*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
728*0b57cec5SDimitry Andric /// \param __b
729*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
730*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
731*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison
732*0b57cec5SDimitry Andric ///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
733*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
734*0b57cec5SDimitry Andric _mm_cmple_sd(__m128d __a, __m128d __b)
735*0b57cec5SDimitry Andric {
736*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
737*0b57cec5SDimitry Andric }
738*0b57cec5SDimitry Andric 
739*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
740*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
741*0b57cec5SDimitry Andric ///    the value in the first parameter is greater than the corresponding value
742*0b57cec5SDimitry Andric ///    in the second parameter.
743*0b57cec5SDimitry Andric ///
744*0b57cec5SDimitry Andric ///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
745*0b57cec5SDimitry Andric ///
746*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
747*0b57cec5SDimitry Andric ///
748*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.
749*0b57cec5SDimitry Andric ///
750*0b57cec5SDimitry Andric /// \param __a
751*0b57cec5SDimitry Andric ///     A 128-bit vector of [2 x double]. The lower double-precision value is
752*0b57cec5SDimitry Andric ///     compared to the lower double-precision value of \a __b.
753*0b57cec5SDimitry Andric /// \param __b
754*0b57cec5SDimitry Andric ///     A 128-bit vector of [2 x double]. The lower double-precision value is
755*0b57cec5SDimitry Andric ///     compared to the lower double-precision value of \a __a.
756*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison
757*0b57cec5SDimitry Andric ///     results. The upper 64 bits are copied from the upper 64 bits of \a __a.
758*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
759*0b57cec5SDimitry Andric _mm_cmpgt_sd(__m128d __a, __m128d __b)
760*0b57cec5SDimitry Andric {
761*0b57cec5SDimitry Andric   __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);
762*0b57cec5SDimitry Andric   return __extension__ (__m128d) { __c[0], __a[1] };
763*0b57cec5SDimitry Andric }
764*0b57cec5SDimitry Andric 
765*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
766*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
767*0b57cec5SDimitry Andric ///    the value in the first parameter is greater than or equal to the
768*0b57cec5SDimitry Andric ///    corresponding value in the second parameter.
769*0b57cec5SDimitry Andric ///
770*0b57cec5SDimitry Andric ///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
771*0b57cec5SDimitry Andric ///
772*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
773*0b57cec5SDimitry Andric ///
774*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.
775*0b57cec5SDimitry Andric ///
776*0b57cec5SDimitry Andric /// \param __a
777*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
778*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
779*0b57cec5SDimitry Andric /// \param __b
780*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
781*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
782*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison
783*0b57cec5SDimitry Andric ///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
784*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
785*0b57cec5SDimitry Andric _mm_cmpge_sd(__m128d __a, __m128d __b)
786*0b57cec5SDimitry Andric {
787*0b57cec5SDimitry Andric   __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);
788*0b57cec5SDimitry Andric   return __extension__ (__m128d) { __c[0], __a[1] };
789*0b57cec5SDimitry Andric }
790*0b57cec5SDimitry Andric 
791*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
792*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
793*0b57cec5SDimitry Andric ///    the value in the first parameter is "ordered" with respect to the
794*0b57cec5SDimitry Andric ///    corresponding value in the second parameter.
795*0b57cec5SDimitry Andric ///
796*0b57cec5SDimitry Andric ///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair
797*0b57cec5SDimitry Andric ///    of double-precision values are "ordered" with respect to each other if
798*0b57cec5SDimitry Andric ///    neither value is a NaN.
799*0b57cec5SDimitry Andric ///
800*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
801*0b57cec5SDimitry Andric ///
802*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction.
803*0b57cec5SDimitry Andric ///
804*0b57cec5SDimitry Andric /// \param __a
805*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
806*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
807*0b57cec5SDimitry Andric /// \param __b
808*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
809*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
810*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison
811*0b57cec5SDimitry Andric ///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
812*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
813*0b57cec5SDimitry Andric _mm_cmpord_sd(__m128d __a, __m128d __b)
814*0b57cec5SDimitry Andric {
815*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
816*0b57cec5SDimitry Andric }
817*0b57cec5SDimitry Andric 
818*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
819*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
820*0b57cec5SDimitry Andric ///    the value in the first parameter is "unordered" with respect to the
821*0b57cec5SDimitry Andric ///    corresponding value in the second parameter.
822*0b57cec5SDimitry Andric ///
823*0b57cec5SDimitry Andric ///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair
824*0b57cec5SDimitry Andric ///    of double-precision values are "unordered" with respect to each other if
825*0b57cec5SDimitry Andric ///    one or both values are NaN.
826*0b57cec5SDimitry Andric ///
827*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
828*0b57cec5SDimitry Andric ///
829*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c>
830*0b57cec5SDimitry Andric ///   instruction.
831*0b57cec5SDimitry Andric ///
832*0b57cec5SDimitry Andric /// \param __a
833*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
834*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
835*0b57cec5SDimitry Andric /// \param __b
836*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
837*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
838*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison
839*0b57cec5SDimitry Andric ///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
840*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
841*0b57cec5SDimitry Andric _mm_cmpunord_sd(__m128d __a, __m128d __b)
842*0b57cec5SDimitry Andric {
843*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);
844*0b57cec5SDimitry Andric }
845*0b57cec5SDimitry Andric 
846*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
847*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
848*0b57cec5SDimitry Andric ///    the value in the first parameter is unequal to the corresponding value in
849*0b57cec5SDimitry Andric ///    the second parameter.
850*0b57cec5SDimitry Andric ///
851*0b57cec5SDimitry Andric ///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
852*0b57cec5SDimitry Andric ///
853*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
854*0b57cec5SDimitry Andric ///
855*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction.
856*0b57cec5SDimitry Andric ///
857*0b57cec5SDimitry Andric /// \param __a
858*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
859*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
860*0b57cec5SDimitry Andric /// \param __b
861*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
862*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
863*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison
864*0b57cec5SDimitry Andric ///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
865*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
866*0b57cec5SDimitry Andric _mm_cmpneq_sd(__m128d __a, __m128d __b)
867*0b57cec5SDimitry Andric {
868*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);
869*0b57cec5SDimitry Andric }
870*0b57cec5SDimitry Andric 
871*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
872*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
873*0b57cec5SDimitry Andric ///    the value in the first parameter is not less than the corresponding
874*0b57cec5SDimitry Andric ///    value in the second parameter.
875*0b57cec5SDimitry Andric ///
876*0b57cec5SDimitry Andric ///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
877*0b57cec5SDimitry Andric ///
878*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
879*0b57cec5SDimitry Andric ///
880*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.
881*0b57cec5SDimitry Andric ///
882*0b57cec5SDimitry Andric /// \param __a
883*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
884*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
885*0b57cec5SDimitry Andric /// \param __b
886*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
887*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
888*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison
889*0b57cec5SDimitry Andric ///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
890*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
891*0b57cec5SDimitry Andric _mm_cmpnlt_sd(__m128d __a, __m128d __b)
892*0b57cec5SDimitry Andric {
893*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);
894*0b57cec5SDimitry Andric }
895*0b57cec5SDimitry Andric 
896*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
897*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
898*0b57cec5SDimitry Andric ///    the value in the first parameter is not less than or equal to the
899*0b57cec5SDimitry Andric ///    corresponding value in the second parameter.
900*0b57cec5SDimitry Andric ///
901*0b57cec5SDimitry Andric ///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
902*0b57cec5SDimitry Andric ///
903*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
904*0b57cec5SDimitry Andric ///
905*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.
906*0b57cec5SDimitry Andric ///
907*0b57cec5SDimitry Andric /// \param __a
908*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
909*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
910*0b57cec5SDimitry Andric /// \param __b
911*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
912*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
913*0b57cec5SDimitry Andric /// \returns  A 128-bit vector. The lower 64 bits contains the comparison
914*0b57cec5SDimitry Andric ///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
915*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
916*0b57cec5SDimitry Andric _mm_cmpnle_sd(__m128d __a, __m128d __b)
917*0b57cec5SDimitry Andric {
918*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);
919*0b57cec5SDimitry Andric }
920*0b57cec5SDimitry Andric 
921*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
922*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
923*0b57cec5SDimitry Andric ///    the value in the first parameter is not greater than the corresponding
924*0b57cec5SDimitry Andric ///    value in the second parameter.
925*0b57cec5SDimitry Andric ///
926*0b57cec5SDimitry Andric ///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
927*0b57cec5SDimitry Andric ///
928*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
929*0b57cec5SDimitry Andric ///
930*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.
931*0b57cec5SDimitry Andric ///
932*0b57cec5SDimitry Andric /// \param __a
933*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
934*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
935*0b57cec5SDimitry Andric /// \param __b
936*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
937*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
938*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison
939*0b57cec5SDimitry Andric ///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
940*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
941*0b57cec5SDimitry Andric _mm_cmpngt_sd(__m128d __a, __m128d __b)
942*0b57cec5SDimitry Andric {
943*0b57cec5SDimitry Andric   __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);
944*0b57cec5SDimitry Andric   return __extension__ (__m128d) { __c[0], __a[1] };
945*0b57cec5SDimitry Andric }
946*0b57cec5SDimitry Andric 
947*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
948*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
949*0b57cec5SDimitry Andric ///    the value in the first parameter is not greater than or equal to the
950*0b57cec5SDimitry Andric ///    corresponding value in the second parameter.
951*0b57cec5SDimitry Andric ///
952*0b57cec5SDimitry Andric ///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
953*0b57cec5SDimitry Andric ///
954*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
955*0b57cec5SDimitry Andric ///
956*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.
957*0b57cec5SDimitry Andric ///
958*0b57cec5SDimitry Andric /// \param __a
959*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
960*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
961*0b57cec5SDimitry Andric /// \param __b
962*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
963*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
964*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison
965*0b57cec5SDimitry Andric ///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
966*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
967*0b57cec5SDimitry Andric _mm_cmpnge_sd(__m128d __a, __m128d __b)
968*0b57cec5SDimitry Andric {
969*0b57cec5SDimitry Andric   __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);
970*0b57cec5SDimitry Andric   return __extension__ (__m128d) { __c[0], __a[1] };
971*0b57cec5SDimitry Andric }
972*0b57cec5SDimitry Andric 
973*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
974*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] for equality.
975*0b57cec5SDimitry Andric ///
976*0b57cec5SDimitry Andric ///    The comparison yields 0 for false, 1 for true. If either of the two
977*0b57cec5SDimitry Andric ///    lower double-precision values is NaN, 0 is returned.
978*0b57cec5SDimitry Andric ///
979*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
980*0b57cec5SDimitry Andric ///
981*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
982*0b57cec5SDimitry Andric ///
983*0b57cec5SDimitry Andric /// \param __a
984*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
985*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
986*0b57cec5SDimitry Andric /// \param __b
987*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
988*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
989*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two
990*0b57cec5SDimitry Andric ///    lower double-precision values is NaN, 0 is returned.
991*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
992*0b57cec5SDimitry Andric _mm_comieq_sd(__m128d __a, __m128d __b)
993*0b57cec5SDimitry Andric {
994*0b57cec5SDimitry Andric   return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);
995*0b57cec5SDimitry Andric }
996*0b57cec5SDimitry Andric 
997*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
998*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
999*0b57cec5SDimitry Andric ///    the value in the first parameter is less than the corresponding value in
1000*0b57cec5SDimitry Andric ///    the second parameter.
1001*0b57cec5SDimitry Andric ///
1002*0b57cec5SDimitry Andric ///    The comparison yields 0 for false, 1 for true. If either of the two
1003*0b57cec5SDimitry Andric ///    lower double-precision values is NaN, 0 is returned.
1004*0b57cec5SDimitry Andric ///
1005*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1006*0b57cec5SDimitry Andric ///
1007*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1008*0b57cec5SDimitry Andric ///
1009*0b57cec5SDimitry Andric /// \param __a
1010*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1011*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
1012*0b57cec5SDimitry Andric /// \param __b
1013*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1014*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
1015*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two
1016*0b57cec5SDimitry Andric ///     lower double-precision values is NaN, 0 is returned.
1017*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
1018*0b57cec5SDimitry Andric _mm_comilt_sd(__m128d __a, __m128d __b)
1019*0b57cec5SDimitry Andric {
1020*0b57cec5SDimitry Andric   return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);
1021*0b57cec5SDimitry Andric }
1022*0b57cec5SDimitry Andric 
1023*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
1024*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1025*0b57cec5SDimitry Andric ///    the value in the first parameter is less than or equal to the
1026*0b57cec5SDimitry Andric ///    corresponding value in the second parameter.
1027*0b57cec5SDimitry Andric ///
1028*0b57cec5SDimitry Andric ///    The comparison yields 0 for false, 1 for true. If either of the two
1029*0b57cec5SDimitry Andric ///    lower double-precision values is NaN, 0 is returned.
1030*0b57cec5SDimitry Andric ///
1031*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1032*0b57cec5SDimitry Andric ///
1033*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1034*0b57cec5SDimitry Andric ///
1035*0b57cec5SDimitry Andric /// \param __a
1036*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1037*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
1038*0b57cec5SDimitry Andric /// \param __b
1039*0b57cec5SDimitry Andric ///     A 128-bit vector of [2 x double]. The lower double-precision value is
1040*0b57cec5SDimitry Andric ///     compared to the lower double-precision value of \a __a.
1041*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two
1042*0b57cec5SDimitry Andric ///     lower double-precision values is NaN, 0 is returned.
1043*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
1044*0b57cec5SDimitry Andric _mm_comile_sd(__m128d __a, __m128d __b)
1045*0b57cec5SDimitry Andric {
1046*0b57cec5SDimitry Andric   return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);
1047*0b57cec5SDimitry Andric }
1048*0b57cec5SDimitry Andric 
1049*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
1050*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1051*0b57cec5SDimitry Andric ///    the value in the first parameter is greater than the corresponding value
1052*0b57cec5SDimitry Andric ///    in the second parameter.
1053*0b57cec5SDimitry Andric ///
1054*0b57cec5SDimitry Andric ///    The comparison yields 0 for false, 1 for true. If either of the two
1055*0b57cec5SDimitry Andric ///    lower double-precision values is NaN, 0 is returned.
1056*0b57cec5SDimitry Andric ///
1057*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1058*0b57cec5SDimitry Andric ///
1059*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1060*0b57cec5SDimitry Andric ///
1061*0b57cec5SDimitry Andric /// \param __a
1062*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1063*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
1064*0b57cec5SDimitry Andric /// \param __b
1065*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1066*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
1067*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two
1068*0b57cec5SDimitry Andric ///     lower double-precision values is NaN, 0 is returned.
1069*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
1070*0b57cec5SDimitry Andric _mm_comigt_sd(__m128d __a, __m128d __b)
1071*0b57cec5SDimitry Andric {
1072*0b57cec5SDimitry Andric   return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);
1073*0b57cec5SDimitry Andric }
1074*0b57cec5SDimitry Andric 
1075*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
1076*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1077*0b57cec5SDimitry Andric ///    the value in the first parameter is greater than or equal to the
1078*0b57cec5SDimitry Andric ///    corresponding value in the second parameter.
1079*0b57cec5SDimitry Andric ///
1080*0b57cec5SDimitry Andric ///    The comparison yields 0 for false, 1 for true. If either of the two
1081*0b57cec5SDimitry Andric ///    lower double-precision values is NaN, 0 is returned.
1082*0b57cec5SDimitry Andric ///
1083*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1084*0b57cec5SDimitry Andric ///
1085*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1086*0b57cec5SDimitry Andric ///
1087*0b57cec5SDimitry Andric /// \param __a
1088*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1089*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
1090*0b57cec5SDimitry Andric /// \param __b
1091*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1092*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
1093*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two
1094*0b57cec5SDimitry Andric ///    lower double-precision values is NaN, 0 is returned.
1095*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
1096*0b57cec5SDimitry Andric _mm_comige_sd(__m128d __a, __m128d __b)
1097*0b57cec5SDimitry Andric {
1098*0b57cec5SDimitry Andric   return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);
1099*0b57cec5SDimitry Andric }
1100*0b57cec5SDimitry Andric 
1101*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
1102*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1103*0b57cec5SDimitry Andric ///    the value in the first parameter is unequal to the corresponding value in
1104*0b57cec5SDimitry Andric ///    the second parameter.
1105*0b57cec5SDimitry Andric ///
1106*0b57cec5SDimitry Andric ///    The comparison yields 0 for false, 1 for true. If either of the two
1107*0b57cec5SDimitry Andric ///    lower double-precision values is NaN, 1 is returned.
1108*0b57cec5SDimitry Andric ///
1109*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1110*0b57cec5SDimitry Andric ///
1111*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1112*0b57cec5SDimitry Andric ///
1113*0b57cec5SDimitry Andric /// \param __a
1114*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1115*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
1116*0b57cec5SDimitry Andric /// \param __b
1117*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1118*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
1119*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two
1120*0b57cec5SDimitry Andric ///     lower double-precision values is NaN, 1 is returned.
1121*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
1122*0b57cec5SDimitry Andric _mm_comineq_sd(__m128d __a, __m128d __b)
1123*0b57cec5SDimitry Andric {
1124*0b57cec5SDimitry Andric   return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);
1125*0b57cec5SDimitry Andric }
1126*0b57cec5SDimitry Andric 
1127*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
1128*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] for equality. The
1129*0b57cec5SDimitry Andric ///    comparison yields 0 for false, 1 for true.
1130*0b57cec5SDimitry Andric ///
1131*0b57cec5SDimitry Andric ///    If either of the two lower double-precision values is NaN, 0 is returned.
1132*0b57cec5SDimitry Andric ///
1133*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1134*0b57cec5SDimitry Andric ///
1135*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1136*0b57cec5SDimitry Andric ///
1137*0b57cec5SDimitry Andric /// \param __a
1138*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1139*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
1140*0b57cec5SDimitry Andric /// \param __b
1141*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1142*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
1143*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two
1144*0b57cec5SDimitry Andric ///    lower double-precision values is NaN, 0 is returned.
1145*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
1146*0b57cec5SDimitry Andric _mm_ucomieq_sd(__m128d __a, __m128d __b)
1147*0b57cec5SDimitry Andric {
1148*0b57cec5SDimitry Andric   return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);
1149*0b57cec5SDimitry Andric }
1150*0b57cec5SDimitry Andric 
1151*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
1152*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1153*0b57cec5SDimitry Andric ///    the value in the first parameter is less than the corresponding value in
1154*0b57cec5SDimitry Andric ///    the second parameter.
1155*0b57cec5SDimitry Andric ///
1156*0b57cec5SDimitry Andric ///    The comparison yields 0 for false, 1 for true. If either of the two lower
1157*0b57cec5SDimitry Andric ///    double-precision values is NaN, 0 is returned.
1158*0b57cec5SDimitry Andric ///
1159*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1160*0b57cec5SDimitry Andric ///
1161*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1162*0b57cec5SDimitry Andric ///
1163*0b57cec5SDimitry Andric /// \param __a
1164*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1165*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
1166*0b57cec5SDimitry Andric /// \param __b
1167*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1168*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
1169*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two
1170*0b57cec5SDimitry Andric ///    lower double-precision values is NaN, 0 is returned.
1171*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
1172*0b57cec5SDimitry Andric _mm_ucomilt_sd(__m128d __a, __m128d __b)
1173*0b57cec5SDimitry Andric {
1174*0b57cec5SDimitry Andric   return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);
1175*0b57cec5SDimitry Andric }
1176*0b57cec5SDimitry Andric 
1177*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
1178*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1179*0b57cec5SDimitry Andric ///    the value in the first parameter is less than or equal to the
1180*0b57cec5SDimitry Andric ///    corresponding value in the second parameter.
1181*0b57cec5SDimitry Andric ///
1182*0b57cec5SDimitry Andric ///    The comparison yields 0 for false, 1 for true. If either of the two lower
1183*0b57cec5SDimitry Andric ///    double-precision values is NaN, 0 is returned.
1184*0b57cec5SDimitry Andric ///
1185*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1186*0b57cec5SDimitry Andric ///
1187*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1188*0b57cec5SDimitry Andric ///
1189*0b57cec5SDimitry Andric /// \param __a
1190*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1191*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
1192*0b57cec5SDimitry Andric /// \param __b
1193*0b57cec5SDimitry Andric ///     A 128-bit vector of [2 x double]. The lower double-precision value is
1194*0b57cec5SDimitry Andric ///     compared to the lower double-precision value of \a __a.
1195*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two
1196*0b57cec5SDimitry Andric ///     lower double-precision values is NaN, 0 is returned.
1197*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
1198*0b57cec5SDimitry Andric _mm_ucomile_sd(__m128d __a, __m128d __b)
1199*0b57cec5SDimitry Andric {
1200*0b57cec5SDimitry Andric   return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);
1201*0b57cec5SDimitry Andric }
1202*0b57cec5SDimitry Andric 
1203*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
1204*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1205*0b57cec5SDimitry Andric ///    the value in the first parameter is greater than the corresponding value
1206*0b57cec5SDimitry Andric ///    in the second parameter.
1207*0b57cec5SDimitry Andric ///
1208*0b57cec5SDimitry Andric ///    The comparison yields 0 for false, 1 for true. If either of the two lower
1209*0b57cec5SDimitry Andric ///    double-precision values is NaN, 0 is returned.
1210*0b57cec5SDimitry Andric ///
1211*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1212*0b57cec5SDimitry Andric ///
1213*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1214*0b57cec5SDimitry Andric ///
1215*0b57cec5SDimitry Andric /// \param __a
1216*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1217*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
1218*0b57cec5SDimitry Andric /// \param __b
1219*0b57cec5SDimitry Andric ///     A 128-bit vector of [2 x double]. The lower double-precision value is
1220*0b57cec5SDimitry Andric ///     compared to the lower double-precision value of \a __a.
1221*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two
1222*0b57cec5SDimitry Andric ///     lower double-precision values is NaN, 0 is returned.
1223*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
1224*0b57cec5SDimitry Andric _mm_ucomigt_sd(__m128d __a, __m128d __b)
1225*0b57cec5SDimitry Andric {
1226*0b57cec5SDimitry Andric   return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);
1227*0b57cec5SDimitry Andric }
1228*0b57cec5SDimitry Andric 
1229*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
1230*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1231*0b57cec5SDimitry Andric ///    the value in the first parameter is greater than or equal to the
1232*0b57cec5SDimitry Andric ///    corresponding value in the second parameter.
1233*0b57cec5SDimitry Andric ///
1234*0b57cec5SDimitry Andric ///    The comparison yields 0 for false, 1 for true.  If either of the two
1235*0b57cec5SDimitry Andric ///    lower double-precision values is NaN, 0 is returned.
1236*0b57cec5SDimitry Andric ///
1237*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1238*0b57cec5SDimitry Andric ///
1239*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1240*0b57cec5SDimitry Andric ///
1241*0b57cec5SDimitry Andric /// \param __a
1242*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1243*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
1244*0b57cec5SDimitry Andric /// \param __b
1245*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1246*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
1247*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two
1248*0b57cec5SDimitry Andric ///    lower double-precision values is NaN, 0 is returned.
1249*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
1250*0b57cec5SDimitry Andric _mm_ucomige_sd(__m128d __a, __m128d __b)
1251*0b57cec5SDimitry Andric {
1252*0b57cec5SDimitry Andric   return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);
1253*0b57cec5SDimitry Andric }
1254*0b57cec5SDimitry Andric 
1255*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of
1256*0b57cec5SDimitry Andric ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1257*0b57cec5SDimitry Andric ///    the value in the first parameter is unequal to the corresponding value in
1258*0b57cec5SDimitry Andric ///    the second parameter.
1259*0b57cec5SDimitry Andric ///
1260*0b57cec5SDimitry Andric ///    The comparison yields 0 for false, 1 for true. If either of the two lower
1261*0b57cec5SDimitry Andric ///    double-precision values is NaN, 1 is returned.
1262*0b57cec5SDimitry Andric ///
1263*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1264*0b57cec5SDimitry Andric ///
1265*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1266*0b57cec5SDimitry Andric ///
1267*0b57cec5SDimitry Andric /// \param __a
1268*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1269*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __b.
1270*0b57cec5SDimitry Andric /// \param __b
1271*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1272*0b57cec5SDimitry Andric ///    compared to the lower double-precision value of \a __a.
1273*0b57cec5SDimitry Andric /// \returns An integer containing the comparison result. If either of the two
1274*0b57cec5SDimitry Andric ///    lower double-precision values is NaN, 1 is returned.
1275*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
1276*0b57cec5SDimitry Andric _mm_ucomineq_sd(__m128d __a, __m128d __b)
1277*0b57cec5SDimitry Andric {
1278*0b57cec5SDimitry Andric   return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);
1279*0b57cec5SDimitry Andric }
1280*0b57cec5SDimitry Andric 
1281*0b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a
1282*0b57cec5SDimitry Andric ///    128-bit vector of [2 x double] into two single-precision floating-point
1283*0b57cec5SDimitry Andric ///    values, returned in the lower 64 bits of a 128-bit vector of [4 x float].
1284*0b57cec5SDimitry Andric ///    The upper 64 bits of the result vector are set to zero.
1285*0b57cec5SDimitry Andric ///
1286*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1287*0b57cec5SDimitry Andric ///
1288*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction.
1289*0b57cec5SDimitry Andric ///
1290*0b57cec5SDimitry Andric /// \param __a
1291*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
1292*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
1293*0b57cec5SDimitry Andric ///    converted values. The upper 64 bits are set to zero.
1294*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS
1295*0b57cec5SDimitry Andric _mm_cvtpd_ps(__m128d __a)
1296*0b57cec5SDimitry Andric {
1297*0b57cec5SDimitry Andric   return __builtin_ia32_cvtpd2ps((__v2df)__a);
1298*0b57cec5SDimitry Andric }
1299*0b57cec5SDimitry Andric 
1300*0b57cec5SDimitry Andric /// Converts the lower two single-precision floating-point elements of a
1301*0b57cec5SDimitry Andric ///    128-bit vector of [4 x float] into two double-precision floating-point
1302*0b57cec5SDimitry Andric ///    values, returned in a 128-bit vector of [2 x double]. The upper two
1303*0b57cec5SDimitry Andric ///    elements of the input vector are unused.
1304*0b57cec5SDimitry Andric ///
1305*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1306*0b57cec5SDimitry Andric ///
1307*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction.
1308*0b57cec5SDimitry Andric ///
1309*0b57cec5SDimitry Andric /// \param __a
1310*0b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float]. The lower two single-precision
1311*0b57cec5SDimitry Andric ///    floating-point elements are converted to double-precision values. The
1312*0b57cec5SDimitry Andric ///    upper two elements are unused.
1313*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values.
1314*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1315*0b57cec5SDimitry Andric _mm_cvtps_pd(__m128 __a)
1316*0b57cec5SDimitry Andric {
1317*0b57cec5SDimitry Andric   return (__m128d) __builtin_convertvector(
1318*0b57cec5SDimitry Andric       __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);
1319*0b57cec5SDimitry Andric }
1320*0b57cec5SDimitry Andric 
1321*0b57cec5SDimitry Andric /// Converts the lower two integer elements of a 128-bit vector of
1322*0b57cec5SDimitry Andric ///    [4 x i32] into two double-precision floating-point values, returned in a
1323*0b57cec5SDimitry Andric ///    128-bit vector of [2 x double].
1324*0b57cec5SDimitry Andric ///
1325*0b57cec5SDimitry Andric ///    The upper two elements of the input vector are unused.
1326*0b57cec5SDimitry Andric ///
1327*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1328*0b57cec5SDimitry Andric ///
1329*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction.
1330*0b57cec5SDimitry Andric ///
1331*0b57cec5SDimitry Andric /// \param __a
1332*0b57cec5SDimitry Andric ///    A 128-bit integer vector of [4 x i32]. The lower two integer elements are
1333*0b57cec5SDimitry Andric ///    converted to double-precision values.
1334*0b57cec5SDimitry Andric ///
1335*0b57cec5SDimitry Andric ///    The upper two elements are unused.
1336*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values.
1337*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1338*0b57cec5SDimitry Andric _mm_cvtepi32_pd(__m128i __a)
1339*0b57cec5SDimitry Andric {
1340*0b57cec5SDimitry Andric   return (__m128d) __builtin_convertvector(
1341*0b57cec5SDimitry Andric       __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);
1342*0b57cec5SDimitry Andric }
1343*0b57cec5SDimitry Andric 
1344*0b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a
1345*0b57cec5SDimitry Andric ///    128-bit vector of [2 x double] into two signed 32-bit integer values,
1346*0b57cec5SDimitry Andric ///    returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper
1347*0b57cec5SDimitry Andric ///    64 bits of the result vector are set to zero.
1348*0b57cec5SDimitry Andric ///
1349*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1350*0b57cec5SDimitry Andric ///
1351*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction.
1352*0b57cec5SDimitry Andric ///
1353*0b57cec5SDimitry Andric /// \param __a
1354*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
1355*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the
1356*0b57cec5SDimitry Andric ///    converted values. The upper 64 bits are set to zero.
1357*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
1358*0b57cec5SDimitry Andric _mm_cvtpd_epi32(__m128d __a)
1359*0b57cec5SDimitry Andric {
1360*0b57cec5SDimitry Andric   return __builtin_ia32_cvtpd2dq((__v2df)__a);
1361*0b57cec5SDimitry Andric }
1362*0b57cec5SDimitry Andric 
1363*0b57cec5SDimitry Andric /// Converts the low-order element of a 128-bit vector of [2 x double]
1364*0b57cec5SDimitry Andric ///    into a 32-bit signed integer value.
1365*0b57cec5SDimitry Andric ///
1366*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1367*0b57cec5SDimitry Andric ///
1368*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.
1369*0b57cec5SDimitry Andric ///
1370*0b57cec5SDimitry Andric /// \param __a
1371*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
1372*0b57cec5SDimitry Andric ///    conversion.
1373*0b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value.
1374*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
1375*0b57cec5SDimitry Andric _mm_cvtsd_si32(__m128d __a)
1376*0b57cec5SDimitry Andric {
1377*0b57cec5SDimitry Andric   return __builtin_ia32_cvtsd2si((__v2df)__a);
1378*0b57cec5SDimitry Andric }
1379*0b57cec5SDimitry Andric 
1380*0b57cec5SDimitry Andric /// Converts the lower double-precision floating-point element of a
1381*0b57cec5SDimitry Andric ///    128-bit vector of [2 x double], in the second parameter, into a
1382*0b57cec5SDimitry Andric ///    single-precision floating-point value, returned in the lower 32 bits of a
1383*0b57cec5SDimitry Andric ///    128-bit vector of [4 x float]. The upper 96 bits of the result vector are
1384*0b57cec5SDimitry Andric ///    copied from the upper 96 bits of the first parameter.
1385*0b57cec5SDimitry Andric ///
1386*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1387*0b57cec5SDimitry Andric ///
1388*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction.
1389*0b57cec5SDimitry Andric ///
1390*0b57cec5SDimitry Andric /// \param __a
1391*0b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are
1392*0b57cec5SDimitry Andric ///    copied to the upper 96 bits of the result.
1393*0b57cec5SDimitry Andric /// \param __b
1394*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower double-precision
1395*0b57cec5SDimitry Andric ///    floating-point element is used in the conversion.
1396*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the
1397*0b57cec5SDimitry Andric ///    converted value from the second parameter. The upper 96 bits are copied
1398*0b57cec5SDimitry Andric ///    from the upper 96 bits of the first parameter.
1399*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS
1400*0b57cec5SDimitry Andric _mm_cvtsd_ss(__m128 __a, __m128d __b)
1401*0b57cec5SDimitry Andric {
1402*0b57cec5SDimitry Andric   return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
1403*0b57cec5SDimitry Andric }
1404*0b57cec5SDimitry Andric 
1405*0b57cec5SDimitry Andric /// Converts a 32-bit signed integer value, in the second parameter, into
1406*0b57cec5SDimitry Andric ///    a double-precision floating-point value, returned in the lower 64 bits of
1407*0b57cec5SDimitry Andric ///    a 128-bit vector of [2 x double]. The upper 64 bits of the result vector
1408*0b57cec5SDimitry Andric ///    are copied from the upper 64 bits of the first parameter.
1409*0b57cec5SDimitry Andric ///
1410*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1411*0b57cec5SDimitry Andric ///
1412*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.
1413*0b57cec5SDimitry Andric ///
1414*0b57cec5SDimitry Andric /// \param __a
1415*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are
1416*0b57cec5SDimitry Andric ///    copied to the upper 64 bits of the result.
1417*0b57cec5SDimitry Andric /// \param __b
1418*0b57cec5SDimitry Andric ///    A 32-bit signed integer containing the value to be converted.
1419*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the
1420*0b57cec5SDimitry Andric ///    converted value from the second parameter. The upper 64 bits are copied
1421*0b57cec5SDimitry Andric ///    from the upper 64 bits of the first parameter.
1422*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1423*0b57cec5SDimitry Andric _mm_cvtsi32_sd(__m128d __a, int __b)
1424*0b57cec5SDimitry Andric {
1425*0b57cec5SDimitry Andric   __a[0] = __b;
1426*0b57cec5SDimitry Andric   return __a;
1427*0b57cec5SDimitry Andric }
1428*0b57cec5SDimitry Andric 
1429*0b57cec5SDimitry Andric /// Converts the lower single-precision floating-point element of a
1430*0b57cec5SDimitry Andric ///    128-bit vector of [4 x float], in the second parameter, into a
1431*0b57cec5SDimitry Andric ///    double-precision floating-point value, returned in the lower 64 bits of
1432*0b57cec5SDimitry Andric ///    a 128-bit vector of [2 x double]. The upper 64 bits of the result vector
1433*0b57cec5SDimitry Andric ///    are copied from the upper 64 bits of the first parameter.
1434*0b57cec5SDimitry Andric ///
1435*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1436*0b57cec5SDimitry Andric ///
1437*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction.
1438*0b57cec5SDimitry Andric ///
1439*0b57cec5SDimitry Andric /// \param __a
1440*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are
1441*0b57cec5SDimitry Andric ///    copied to the upper 64 bits of the result.
1442*0b57cec5SDimitry Andric /// \param __b
1443*0b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float]. The lower single-precision
1444*0b57cec5SDimitry Andric ///    floating-point element is used in the conversion.
1445*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the
1446*0b57cec5SDimitry Andric ///    converted value from the second parameter. The upper 64 bits are copied
1447*0b57cec5SDimitry Andric ///    from the upper 64 bits of the first parameter.
1448*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1449*0b57cec5SDimitry Andric _mm_cvtss_sd(__m128d __a, __m128 __b)
1450*0b57cec5SDimitry Andric {
1451*0b57cec5SDimitry Andric   __a[0] = __b[0];
1452*0b57cec5SDimitry Andric   return __a;
1453*0b57cec5SDimitry Andric }
1454*0b57cec5SDimitry Andric 
1455*0b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a
1456*0b57cec5SDimitry Andric ///    128-bit vector of [2 x double] into two signed 32-bit integer values,
1457*0b57cec5SDimitry Andric ///    returned in the lower 64 bits of a 128-bit vector of [4 x i32].
1458*0b57cec5SDimitry Andric ///
1459*0b57cec5SDimitry Andric ///    If the result of either conversion is inexact, the result is truncated
1460*0b57cec5SDimitry Andric ///    (rounded towards zero) regardless of the current MXCSR setting. The upper
1461*0b57cec5SDimitry Andric ///    64 bits of the result vector are set to zero.
1462*0b57cec5SDimitry Andric ///
1463*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1464*0b57cec5SDimitry Andric ///
1465*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c>
1466*0b57cec5SDimitry Andric ///   instruction.
1467*0b57cec5SDimitry Andric ///
1468*0b57cec5SDimitry Andric /// \param __a
1469*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
1470*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the
1471*0b57cec5SDimitry Andric ///    converted values. The upper 64 bits are set to zero.
1472*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
1473*0b57cec5SDimitry Andric _mm_cvttpd_epi32(__m128d __a)
1474*0b57cec5SDimitry Andric {
1475*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);
1476*0b57cec5SDimitry Andric }
1477*0b57cec5SDimitry Andric 
1478*0b57cec5SDimitry Andric /// Converts the low-order element of a [2 x double] vector into a 32-bit
1479*0b57cec5SDimitry Andric ///    signed integer value, truncating the result when it is inexact.
1480*0b57cec5SDimitry Andric ///
1481*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1482*0b57cec5SDimitry Andric ///
1483*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
1484*0b57cec5SDimitry Andric ///   instruction.
1485*0b57cec5SDimitry Andric ///
1486*0b57cec5SDimitry Andric /// \param __a
1487*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
1488*0b57cec5SDimitry Andric ///    conversion.
1489*0b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value.
1490*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
1491*0b57cec5SDimitry Andric _mm_cvttsd_si32(__m128d __a)
1492*0b57cec5SDimitry Andric {
1493*0b57cec5SDimitry Andric   return __builtin_ia32_cvttsd2si((__v2df)__a);
1494*0b57cec5SDimitry Andric }
1495*0b57cec5SDimitry Andric 
1496*0b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a
1497*0b57cec5SDimitry Andric ///    128-bit vector of [2 x double] into two signed 32-bit integer values,
1498*0b57cec5SDimitry Andric ///    returned in a 64-bit vector of [2 x i32].
1499*0b57cec5SDimitry Andric ///
1500*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1501*0b57cec5SDimitry Andric ///
1502*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction.
1503*0b57cec5SDimitry Andric ///
1504*0b57cec5SDimitry Andric /// \param __a
1505*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
1506*0b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values.
1507*0b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
1508*0b57cec5SDimitry Andric _mm_cvtpd_pi32(__m128d __a)
1509*0b57cec5SDimitry Andric {
1510*0b57cec5SDimitry Andric   return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);
1511*0b57cec5SDimitry Andric }
1512*0b57cec5SDimitry Andric 
1513*0b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a
1514*0b57cec5SDimitry Andric ///    128-bit vector of [2 x double] into two signed 32-bit integer values,
1515*0b57cec5SDimitry Andric ///    returned in a 64-bit vector of [2 x i32].
1516*0b57cec5SDimitry Andric ///
1517*0b57cec5SDimitry Andric ///    If the result of either conversion is inexact, the result is truncated
1518*0b57cec5SDimitry Andric ///    (rounded towards zero) regardless of the current MXCSR setting.
1519*0b57cec5SDimitry Andric ///
1520*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1521*0b57cec5SDimitry Andric ///
1522*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction.
1523*0b57cec5SDimitry Andric ///
1524*0b57cec5SDimitry Andric /// \param __a
1525*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
1526*0b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values.
1527*0b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
1528*0b57cec5SDimitry Andric _mm_cvttpd_pi32(__m128d __a)
1529*0b57cec5SDimitry Andric {
1530*0b57cec5SDimitry Andric   return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);
1531*0b57cec5SDimitry Andric }
1532*0b57cec5SDimitry Andric 
1533*0b57cec5SDimitry Andric /// Converts the two signed 32-bit integer elements of a 64-bit vector of
1534*0b57cec5SDimitry Andric ///    [2 x i32] into two double-precision floating-point values, returned in a
1535*0b57cec5SDimitry Andric ///    128-bit vector of [2 x double].
1536*0b57cec5SDimitry Andric ///
1537*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1538*0b57cec5SDimitry Andric ///
1539*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction.
1540*0b57cec5SDimitry Andric ///
1541*0b57cec5SDimitry Andric /// \param __a
1542*0b57cec5SDimitry Andric ///    A 64-bit vector of [2 x i32].
1543*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values.
1544*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX
1545*0b57cec5SDimitry Andric _mm_cvtpi32_pd(__m64 __a)
1546*0b57cec5SDimitry Andric {
1547*0b57cec5SDimitry Andric   return __builtin_ia32_cvtpi2pd((__v2si)__a);
1548*0b57cec5SDimitry Andric }
1549*0b57cec5SDimitry Andric 
1550*0b57cec5SDimitry Andric /// Returns the low-order element of a 128-bit vector of [2 x double] as
1551*0b57cec5SDimitry Andric ///    a double-precision floating-point value.
1552*0b57cec5SDimitry Andric ///
1553*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1554*0b57cec5SDimitry Andric ///
1555*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
1556*0b57cec5SDimitry Andric ///
1557*0b57cec5SDimitry Andric /// \param __a
1558*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower 64 bits are returned.
1559*0b57cec5SDimitry Andric /// \returns A double-precision floating-point value copied from the lower 64
1560*0b57cec5SDimitry Andric ///    bits of \a __a.
1561*0b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS
1562*0b57cec5SDimitry Andric _mm_cvtsd_f64(__m128d __a)
1563*0b57cec5SDimitry Andric {
1564*0b57cec5SDimitry Andric   return __a[0];
1565*0b57cec5SDimitry Andric }
1566*0b57cec5SDimitry Andric 
1567*0b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an aligned
1568*0b57cec5SDimitry Andric ///    memory location.
1569*0b57cec5SDimitry Andric ///
1570*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1571*0b57cec5SDimitry Andric ///
1572*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.
1573*0b57cec5SDimitry Andric ///
1574*0b57cec5SDimitry Andric /// \param __dp
1575*0b57cec5SDimitry Andric ///    A pointer to a 128-bit memory location. The address of the memory
1576*0b57cec5SDimitry Andric ///    location has to be 16-byte aligned.
1577*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values.
1578*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1579*0b57cec5SDimitry Andric _mm_load_pd(double const *__dp)
1580*0b57cec5SDimitry Andric {
1581*0b57cec5SDimitry Andric   return *(__m128d*)__dp;
1582*0b57cec5SDimitry Andric }
1583*0b57cec5SDimitry Andric 
1584*0b57cec5SDimitry Andric /// Loads a double-precision floating-point value from a specified memory
1585*0b57cec5SDimitry Andric ///    location and duplicates it to both vector elements of a 128-bit vector of
1586*0b57cec5SDimitry Andric ///    [2 x double].
1587*0b57cec5SDimitry Andric ///
1588*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1589*0b57cec5SDimitry Andric ///
1590*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction.
1591*0b57cec5SDimitry Andric ///
1592*0b57cec5SDimitry Andric /// \param __dp
1593*0b57cec5SDimitry Andric ///    A pointer to a memory location containing a double-precision value.
1594*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded and
1595*0b57cec5SDimitry Andric ///    duplicated values.
1596*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1597*0b57cec5SDimitry Andric _mm_load1_pd(double const *__dp)
1598*0b57cec5SDimitry Andric {
1599*0b57cec5SDimitry Andric   struct __mm_load1_pd_struct {
1600*0b57cec5SDimitry Andric     double __u;
1601*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
1602*0b57cec5SDimitry Andric   double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;
1603*0b57cec5SDimitry Andric   return __extension__ (__m128d){ __u, __u };
1604*0b57cec5SDimitry Andric }
1605*0b57cec5SDimitry Andric 
1606*0b57cec5SDimitry Andric #define        _mm_load_pd1(dp)        _mm_load1_pd(dp)
1607*0b57cec5SDimitry Andric 
1608*0b57cec5SDimitry Andric /// Loads two double-precision values, in reverse order, from an aligned
1609*0b57cec5SDimitry Andric ///    memory location into a 128-bit vector of [2 x double].
1610*0b57cec5SDimitry Andric ///
1611*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1612*0b57cec5SDimitry Andric ///
1613*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction +
1614*0b57cec5SDimitry Andric /// needed shuffling instructions. In AVX mode, the shuffling may be combined
1615*0b57cec5SDimitry Andric /// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction.
1616*0b57cec5SDimitry Andric ///
1617*0b57cec5SDimitry Andric /// \param __dp
1618*0b57cec5SDimitry Andric ///    A 16-byte aligned pointer to an array of double-precision values to be
1619*0b57cec5SDimitry Andric ///    loaded in reverse order.
1620*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the reversed loaded
1621*0b57cec5SDimitry Andric ///    values.
1622*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1623*0b57cec5SDimitry Andric _mm_loadr_pd(double const *__dp)
1624*0b57cec5SDimitry Andric {
1625*0b57cec5SDimitry Andric   __m128d __u = *(__m128d*)__dp;
1626*0b57cec5SDimitry Andric   return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);
1627*0b57cec5SDimitry Andric }
1628*0b57cec5SDimitry Andric 
1629*0b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an
1630*0b57cec5SDimitry Andric ///    unaligned memory location.
1631*0b57cec5SDimitry Andric ///
1632*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1633*0b57cec5SDimitry Andric ///
1634*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.
1635*0b57cec5SDimitry Andric ///
1636*0b57cec5SDimitry Andric /// \param __dp
1637*0b57cec5SDimitry Andric ///    A pointer to a 128-bit memory location. The address of the memory
1638*0b57cec5SDimitry Andric ///    location does not have to be aligned.
1639*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values.
1640*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1641*0b57cec5SDimitry Andric _mm_loadu_pd(double const *__dp)
1642*0b57cec5SDimitry Andric {
1643*0b57cec5SDimitry Andric   struct __loadu_pd {
1644*0b57cec5SDimitry Andric     __m128d_u __v;
1645*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
1646*0b57cec5SDimitry Andric   return ((struct __loadu_pd*)__dp)->__v;
1647*0b57cec5SDimitry Andric }
1648*0b57cec5SDimitry Andric 
1649*0b57cec5SDimitry Andric /// Loads a 64-bit integer value to the low element of a 128-bit integer
1650*0b57cec5SDimitry Andric ///    vector and clears the upper element.
1651*0b57cec5SDimitry Andric ///
1652*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1653*0b57cec5SDimitry Andric ///
1654*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
1655*0b57cec5SDimitry Andric ///
1656*0b57cec5SDimitry Andric /// \param __a
1657*0b57cec5SDimitry Andric ///    A pointer to a 64-bit memory location. The address of the memory
1658*0b57cec5SDimitry Andric ///    location does not have to be aligned.
1659*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the loaded value.
1660*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
1661*0b57cec5SDimitry Andric _mm_loadu_si64(void const *__a)
1662*0b57cec5SDimitry Andric {
1663*0b57cec5SDimitry Andric   struct __loadu_si64 {
1664*0b57cec5SDimitry Andric     long long __v;
1665*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
1666*0b57cec5SDimitry Andric   long long __u = ((struct __loadu_si64*)__a)->__v;
1667*0b57cec5SDimitry Andric   return __extension__ (__m128i)(__v2di){__u, 0LL};
1668*0b57cec5SDimitry Andric }
1669*0b57cec5SDimitry Andric 
1670*0b57cec5SDimitry Andric /// Loads a 32-bit integer value to the low element of a 128-bit integer
1671*0b57cec5SDimitry Andric ///    vector and clears the upper element.
1672*0b57cec5SDimitry Andric ///
1673*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1674*0b57cec5SDimitry Andric ///
1675*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
1676*0b57cec5SDimitry Andric ///
1677*0b57cec5SDimitry Andric /// \param __a
1678*0b57cec5SDimitry Andric ///    A pointer to a 32-bit memory location. The address of the memory
1679*0b57cec5SDimitry Andric ///    location does not have to be aligned.
1680*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the loaded value.
1681*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
1682*0b57cec5SDimitry Andric _mm_loadu_si32(void const *__a)
1683*0b57cec5SDimitry Andric {
1684*0b57cec5SDimitry Andric   struct __loadu_si32 {
1685*0b57cec5SDimitry Andric     int __v;
1686*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
1687*0b57cec5SDimitry Andric   int __u = ((struct __loadu_si32*)__a)->__v;
1688*0b57cec5SDimitry Andric   return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};
1689*0b57cec5SDimitry Andric }
1690*0b57cec5SDimitry Andric 
1691*0b57cec5SDimitry Andric /// Loads a 16-bit integer value to the low element of a 128-bit integer
1692*0b57cec5SDimitry Andric ///    vector and clears the upper element.
1693*0b57cec5SDimitry Andric ///
1694*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1695*0b57cec5SDimitry Andric ///
1696*0b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction.
1697*0b57cec5SDimitry Andric ///
1698*0b57cec5SDimitry Andric /// \param __a
1699*0b57cec5SDimitry Andric ///    A pointer to a 16-bit memory location. The address of the memory
1700*0b57cec5SDimitry Andric ///    location does not have to be aligned.
1701*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the loaded value.
1702*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
1703*0b57cec5SDimitry Andric _mm_loadu_si16(void const *__a)
1704*0b57cec5SDimitry Andric {
1705*0b57cec5SDimitry Andric   struct __loadu_si16 {
1706*0b57cec5SDimitry Andric     short __v;
1707*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
1708*0b57cec5SDimitry Andric   short __u = ((struct __loadu_si16*)__a)->__v;
1709*0b57cec5SDimitry Andric   return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
1710*0b57cec5SDimitry Andric }
1711*0b57cec5SDimitry Andric 
1712*0b57cec5SDimitry Andric /// Loads a 64-bit double-precision value to the low element of a
1713*0b57cec5SDimitry Andric ///    128-bit integer vector and clears the upper element.
1714*0b57cec5SDimitry Andric ///
1715*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1716*0b57cec5SDimitry Andric ///
1717*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.
1718*0b57cec5SDimitry Andric ///
1719*0b57cec5SDimitry Andric /// \param __dp
1720*0b57cec5SDimitry Andric ///    A pointer to a memory location containing a double-precision value.
1721*0b57cec5SDimitry Andric ///    The address of the memory location does not have to be aligned.
1722*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded value.
1723*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1724*0b57cec5SDimitry Andric _mm_load_sd(double const *__dp)
1725*0b57cec5SDimitry Andric {
1726*0b57cec5SDimitry Andric   struct __mm_load_sd_struct {
1727*0b57cec5SDimitry Andric     double __u;
1728*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
1729*0b57cec5SDimitry Andric   double __u = ((struct __mm_load_sd_struct*)__dp)->__u;
1730*0b57cec5SDimitry Andric   return __extension__ (__m128d){ __u, 0 };
1731*0b57cec5SDimitry Andric }
1732*0b57cec5SDimitry Andric 
1733*0b57cec5SDimitry Andric /// Loads a double-precision value into the high-order bits of a 128-bit
1734*0b57cec5SDimitry Andric ///    vector of [2 x double]. The low-order bits are copied from the low-order
1735*0b57cec5SDimitry Andric ///    bits of the first operand.
1736*0b57cec5SDimitry Andric ///
1737*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1738*0b57cec5SDimitry Andric ///
1739*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.
1740*0b57cec5SDimitry Andric ///
1741*0b57cec5SDimitry Andric /// \param __a
1742*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. \n
1743*0b57cec5SDimitry Andric ///    Bits [63:0] are written to bits [63:0] of the result.
1744*0b57cec5SDimitry Andric /// \param __dp
1745*0b57cec5SDimitry Andric ///    A pointer to a 64-bit memory location containing a double-precision
1746*0b57cec5SDimitry Andric ///    floating-point value that is loaded. The loaded value is written to bits
1747*0b57cec5SDimitry Andric ///    [127:64] of the result. The address of the memory location does not have
1748*0b57cec5SDimitry Andric ///    to be aligned.
1749*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values.
1750*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1751*0b57cec5SDimitry Andric _mm_loadh_pd(__m128d __a, double const *__dp)
1752*0b57cec5SDimitry Andric {
1753*0b57cec5SDimitry Andric   struct __mm_loadh_pd_struct {
1754*0b57cec5SDimitry Andric     double __u;
1755*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
1756*0b57cec5SDimitry Andric   double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;
1757*0b57cec5SDimitry Andric   return __extension__ (__m128d){ __a[0], __u };
1758*0b57cec5SDimitry Andric }
1759*0b57cec5SDimitry Andric 
1760*0b57cec5SDimitry Andric /// Loads a double-precision value into the low-order bits of a 128-bit
1761*0b57cec5SDimitry Andric ///    vector of [2 x double]. The high-order bits are copied from the
1762*0b57cec5SDimitry Andric ///    high-order bits of the first operand.
1763*0b57cec5SDimitry Andric ///
1764*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1765*0b57cec5SDimitry Andric ///
1766*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.
1767*0b57cec5SDimitry Andric ///
1768*0b57cec5SDimitry Andric /// \param __a
1769*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. \n
1770*0b57cec5SDimitry Andric ///    Bits [127:64] are written to bits [127:64] of the result.
1771*0b57cec5SDimitry Andric /// \param __dp
1772*0b57cec5SDimitry Andric ///    A pointer to a 64-bit memory location containing a double-precision
1773*0b57cec5SDimitry Andric ///    floating-point value that is loaded. The loaded value is written to bits
1774*0b57cec5SDimitry Andric ///    [63:0] of the result. The address of the memory location does not have to
1775*0b57cec5SDimitry Andric ///    be aligned.
1776*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values.
1777*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1778*0b57cec5SDimitry Andric _mm_loadl_pd(__m128d __a, double const *__dp)
1779*0b57cec5SDimitry Andric {
1780*0b57cec5SDimitry Andric   struct __mm_loadl_pd_struct {
1781*0b57cec5SDimitry Andric     double __u;
1782*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
1783*0b57cec5SDimitry Andric   double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;
1784*0b57cec5SDimitry Andric   return __extension__ (__m128d){ __u, __a[1] };
1785*0b57cec5SDimitry Andric }
1786*0b57cec5SDimitry Andric 
1787*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] with
1788*0b57cec5SDimitry Andric ///    unspecified content. This could be used as an argument to another
1789*0b57cec5SDimitry Andric ///    intrinsic function where the argument is required but the value is not
1790*0b57cec5SDimitry Andric ///    actually used.
1791*0b57cec5SDimitry Andric ///
1792*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1793*0b57cec5SDimitry Andric ///
1794*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
1795*0b57cec5SDimitry Andric ///
1796*0b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] with unspecified
1797*0b57cec5SDimitry Andric ///    content.
1798*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1799*0b57cec5SDimitry Andric _mm_undefined_pd(void)
1800*0b57cec5SDimitry Andric {
1801*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_undef128();
1802*0b57cec5SDimitry Andric }
1803*0b57cec5SDimitry Andric 
1804*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower
1805*0b57cec5SDimitry Andric ///    64 bits of the vector are initialized with the specified double-precision
1806*0b57cec5SDimitry Andric ///    floating-point value. The upper 64 bits are set to zero.
1807*0b57cec5SDimitry Andric ///
1808*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1809*0b57cec5SDimitry Andric ///
1810*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
1811*0b57cec5SDimitry Andric ///
1812*0b57cec5SDimitry Andric /// \param __w
1813*0b57cec5SDimitry Andric ///    A double-precision floating-point value used to initialize the lower 64
1814*0b57cec5SDimitry Andric ///    bits of the result.
1815*0b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. The
1816*0b57cec5SDimitry Andric ///    lower 64 bits contain the value of the parameter. The upper 64 bits are
1817*0b57cec5SDimitry Andric ///    set to zero.
1818*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1819*0b57cec5SDimitry Andric _mm_set_sd(double __w)
1820*0b57cec5SDimitry Andric {
1821*0b57cec5SDimitry Andric   return __extension__ (__m128d){ __w, 0 };
1822*0b57cec5SDimitry Andric }
1823*0b57cec5SDimitry Andric 
1824*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each
1825*0b57cec5SDimitry Andric ///    of the two double-precision floating-point vector elements set to the
1826*0b57cec5SDimitry Andric ///    specified double-precision floating-point value.
1827*0b57cec5SDimitry Andric ///
1828*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1829*0b57cec5SDimitry Andric ///
1830*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.
1831*0b57cec5SDimitry Andric ///
1832*0b57cec5SDimitry Andric /// \param __w
1833*0b57cec5SDimitry Andric ///    A double-precision floating-point value used to initialize each vector
1834*0b57cec5SDimitry Andric ///    element of the result.
1835*0b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double].
1836*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1837*0b57cec5SDimitry Andric _mm_set1_pd(double __w)
1838*0b57cec5SDimitry Andric {
1839*0b57cec5SDimitry Andric   return __extension__ (__m128d){ __w, __w };
1840*0b57cec5SDimitry Andric }
1841*0b57cec5SDimitry Andric 
1842*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each
1843*0b57cec5SDimitry Andric ///    of the two double-precision floating-point vector elements set to the
1844*0b57cec5SDimitry Andric ///    specified double-precision floating-point value.
1845*0b57cec5SDimitry Andric ///
1846*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1847*0b57cec5SDimitry Andric ///
1848*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.
1849*0b57cec5SDimitry Andric ///
1850*0b57cec5SDimitry Andric /// \param __w
1851*0b57cec5SDimitry Andric ///    A double-precision floating-point value used to initialize each vector
1852*0b57cec5SDimitry Andric ///    element of the result.
1853*0b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double].
1854*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1855*0b57cec5SDimitry Andric _mm_set_pd1(double __w)
1856*0b57cec5SDimitry Andric {
1857*0b57cec5SDimitry Andric   return _mm_set1_pd(__w);
1858*0b57cec5SDimitry Andric }
1859*0b57cec5SDimitry Andric 
1860*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]
1861*0b57cec5SDimitry Andric ///    initialized with the specified double-precision floating-point values.
1862*0b57cec5SDimitry Andric ///
1863*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1864*0b57cec5SDimitry Andric ///
1865*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
1866*0b57cec5SDimitry Andric ///
1867*0b57cec5SDimitry Andric /// \param __w
1868*0b57cec5SDimitry Andric ///    A double-precision floating-point value used to initialize the upper 64
1869*0b57cec5SDimitry Andric ///    bits of the result.
1870*0b57cec5SDimitry Andric /// \param __x
1871*0b57cec5SDimitry Andric ///    A double-precision floating-point value used to initialize the lower 64
1872*0b57cec5SDimitry Andric ///    bits of the result.
1873*0b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double].
1874*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1875*0b57cec5SDimitry Andric _mm_set_pd(double __w, double __x)
1876*0b57cec5SDimitry Andric {
1877*0b57cec5SDimitry Andric   return __extension__ (__m128d){ __x, __w };
1878*0b57cec5SDimitry Andric }
1879*0b57cec5SDimitry Andric 
1880*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double],
1881*0b57cec5SDimitry Andric ///    initialized in reverse order with the specified double-precision
1882*0b57cec5SDimitry Andric ///    floating-point values.
1883*0b57cec5SDimitry Andric ///
1884*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1885*0b57cec5SDimitry Andric ///
1886*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
1887*0b57cec5SDimitry Andric ///
1888*0b57cec5SDimitry Andric /// \param __w
1889*0b57cec5SDimitry Andric ///    A double-precision floating-point value used to initialize the lower 64
1890*0b57cec5SDimitry Andric ///    bits of the result.
1891*0b57cec5SDimitry Andric /// \param __x
1892*0b57cec5SDimitry Andric ///    A double-precision floating-point value used to initialize the upper 64
1893*0b57cec5SDimitry Andric ///    bits of the result.
1894*0b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double].
1895*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1896*0b57cec5SDimitry Andric _mm_setr_pd(double __w, double __x)
1897*0b57cec5SDimitry Andric {
1898*0b57cec5SDimitry Andric   return __extension__ (__m128d){ __w, __x };
1899*0b57cec5SDimitry Andric }
1900*0b57cec5SDimitry Andric 
1901*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]
1902*0b57cec5SDimitry Andric ///    initialized to zero.
1903*0b57cec5SDimitry Andric ///
1904*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1905*0b57cec5SDimitry Andric ///
1906*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
1907*0b57cec5SDimitry Andric ///
1908*0b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double] with
1909*0b57cec5SDimitry Andric ///    all elements set to zero.
1910*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1911*0b57cec5SDimitry Andric _mm_setzero_pd(void)
1912*0b57cec5SDimitry Andric {
1913*0b57cec5SDimitry Andric   return __extension__ (__m128d){ 0, 0 };
1914*0b57cec5SDimitry Andric }
1915*0b57cec5SDimitry Andric 
1916*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower
1917*0b57cec5SDimitry Andric ///    64 bits are set to the lower 64 bits of the second parameter. The upper
1918*0b57cec5SDimitry Andric ///    64 bits are set to the upper 64 bits of the first parameter.
1919*0b57cec5SDimitry Andric ///
1920*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1921*0b57cec5SDimitry Andric ///
1922*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.
1923*0b57cec5SDimitry Andric ///
1924*0b57cec5SDimitry Andric /// \param __a
1925*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The upper 64 bits are written to the
1926*0b57cec5SDimitry Andric ///    upper 64 bits of the result.
1927*0b57cec5SDimitry Andric /// \param __b
1928*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower 64 bits are written to the
1929*0b57cec5SDimitry Andric ///    lower 64 bits of the result.
1930*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values.
1931*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1932*0b57cec5SDimitry Andric _mm_move_sd(__m128d __a, __m128d __b)
1933*0b57cec5SDimitry Andric {
1934*0b57cec5SDimitry Andric   __a[0] = __b[0];
1935*0b57cec5SDimitry Andric   return __a;
1936*0b57cec5SDimitry Andric }
1937*0b57cec5SDimitry Andric 
1938*0b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
1939*0b57cec5SDimitry Andric ///    memory location.
1940*0b57cec5SDimitry Andric ///
1941*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1942*0b57cec5SDimitry Andric ///
1943*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.
1944*0b57cec5SDimitry Andric ///
1945*0b57cec5SDimitry Andric /// \param __dp
1946*0b57cec5SDimitry Andric ///    A pointer to a 64-bit memory location.
1947*0b57cec5SDimitry Andric /// \param __a
1948*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the value to be stored.
1949*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
1950*0b57cec5SDimitry Andric _mm_store_sd(double *__dp, __m128d __a)
1951*0b57cec5SDimitry Andric {
1952*0b57cec5SDimitry Andric   struct __mm_store_sd_struct {
1953*0b57cec5SDimitry Andric     double __u;
1954*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
1955*0b57cec5SDimitry Andric   ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
1956*0b57cec5SDimitry Andric }
1957*0b57cec5SDimitry Andric 
1958*0b57cec5SDimitry Andric /// Moves packed double-precision values from a 128-bit vector of
1959*0b57cec5SDimitry Andric ///    [2 x double] to a memory location.
1960*0b57cec5SDimitry Andric ///
1961*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1962*0b57cec5SDimitry Andric ///
1963*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction.
1964*0b57cec5SDimitry Andric ///
1965*0b57cec5SDimitry Andric /// \param __dp
1966*0b57cec5SDimitry Andric ///    A pointer to an aligned memory location that can store two
1967*0b57cec5SDimitry Andric ///    double-precision values.
1968*0b57cec5SDimitry Andric /// \param __a
1969*0b57cec5SDimitry Andric ///    A packed 128-bit vector of [2 x double] containing the values to be
1970*0b57cec5SDimitry Andric ///    moved.
1971*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
1972*0b57cec5SDimitry Andric _mm_store_pd(double *__dp, __m128d __a)
1973*0b57cec5SDimitry Andric {
1974*0b57cec5SDimitry Andric   *(__m128d*)__dp = __a;
1975*0b57cec5SDimitry Andric }
1976*0b57cec5SDimitry Andric 
1977*0b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
1978*0b57cec5SDimitry Andric ///    the upper and lower 64 bits of a memory location.
1979*0b57cec5SDimitry Andric ///
1980*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1981*0b57cec5SDimitry Andric ///
1982*0b57cec5SDimitry Andric /// This intrinsic corresponds to the
1983*0b57cec5SDimitry Andric ///   <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
1984*0b57cec5SDimitry Andric ///
1985*0b57cec5SDimitry Andric /// \param __dp
1986*0b57cec5SDimitry Andric ///    A pointer to a memory location that can store two double-precision
1987*0b57cec5SDimitry Andric ///    values.
1988*0b57cec5SDimitry Andric /// \param __a
1989*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
1990*0b57cec5SDimitry Andric ///    of the values in \a __dp.
1991*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
1992*0b57cec5SDimitry Andric _mm_store1_pd(double *__dp, __m128d __a)
1993*0b57cec5SDimitry Andric {
1994*0b57cec5SDimitry Andric   __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
1995*0b57cec5SDimitry Andric   _mm_store_pd(__dp, __a);
1996*0b57cec5SDimitry Andric }
1997*0b57cec5SDimitry Andric 
1998*0b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
1999*0b57cec5SDimitry Andric ///    the upper and lower 64 bits of a memory location.
2000*0b57cec5SDimitry Andric ///
2001*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2002*0b57cec5SDimitry Andric ///
2003*0b57cec5SDimitry Andric /// This intrinsic corresponds to the
2004*0b57cec5SDimitry Andric ///   <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
2005*0b57cec5SDimitry Andric ///
2006*0b57cec5SDimitry Andric /// \param __dp
2007*0b57cec5SDimitry Andric ///    A pointer to a memory location that can store two double-precision
2008*0b57cec5SDimitry Andric ///    values.
2009*0b57cec5SDimitry Andric /// \param __a
2010*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
2011*0b57cec5SDimitry Andric ///    of the values in \a __dp.
2012*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
2013*0b57cec5SDimitry Andric _mm_store_pd1(double *__dp, __m128d __a)
2014*0b57cec5SDimitry Andric {
2015*0b57cec5SDimitry Andric   _mm_store1_pd(__dp, __a);
2016*0b57cec5SDimitry Andric }
2017*0b57cec5SDimitry Andric 
2018*0b57cec5SDimitry Andric /// Stores a 128-bit vector of [2 x double] into an unaligned memory
2019*0b57cec5SDimitry Andric ///    location.
2020*0b57cec5SDimitry Andric ///
2021*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2022*0b57cec5SDimitry Andric ///
2023*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.
2024*0b57cec5SDimitry Andric ///
2025*0b57cec5SDimitry Andric /// \param __dp
2026*0b57cec5SDimitry Andric ///    A pointer to a 128-bit memory location. The address of the memory
2027*0b57cec5SDimitry Andric ///    location does not have to be aligned.
2028*0b57cec5SDimitry Andric /// \param __a
2029*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the values to be stored.
2030*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
2031*0b57cec5SDimitry Andric _mm_storeu_pd(double *__dp, __m128d __a)
2032*0b57cec5SDimitry Andric {
2033*0b57cec5SDimitry Andric   struct __storeu_pd {
2034*0b57cec5SDimitry Andric     __m128d_u __v;
2035*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
2036*0b57cec5SDimitry Andric   ((struct __storeu_pd*)__dp)->__v = __a;
2037*0b57cec5SDimitry Andric }
2038*0b57cec5SDimitry Andric 
2039*0b57cec5SDimitry Andric /// Stores two double-precision values, in reverse order, from a 128-bit
2040*0b57cec5SDimitry Andric ///    vector of [2 x double] to a 16-byte aligned memory location.
2041*0b57cec5SDimitry Andric ///
2042*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2043*0b57cec5SDimitry Andric ///
2044*0b57cec5SDimitry Andric /// This intrinsic corresponds to a shuffling instruction followed by a
2045*0b57cec5SDimitry Andric /// <c> VMOVAPD / MOVAPD </c> instruction.
2046*0b57cec5SDimitry Andric ///
2047*0b57cec5SDimitry Andric /// \param __dp
2048*0b57cec5SDimitry Andric ///    A pointer to a 16-byte aligned memory location that can store two
2049*0b57cec5SDimitry Andric ///    double-precision values.
2050*0b57cec5SDimitry Andric /// \param __a
2051*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the values to be reversed and
2052*0b57cec5SDimitry Andric ///    stored.
2053*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
2054*0b57cec5SDimitry Andric _mm_storer_pd(double *__dp, __m128d __a)
2055*0b57cec5SDimitry Andric {
2056*0b57cec5SDimitry Andric   __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);
2057*0b57cec5SDimitry Andric   *(__m128d *)__dp = __a;
2058*0b57cec5SDimitry Andric }
2059*0b57cec5SDimitry Andric 
2060*0b57cec5SDimitry Andric /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a
2061*0b57cec5SDimitry Andric ///    memory location.
2062*0b57cec5SDimitry Andric ///
2063*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2064*0b57cec5SDimitry Andric ///
2065*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.
2066*0b57cec5SDimitry Andric ///
2067*0b57cec5SDimitry Andric /// \param __dp
2068*0b57cec5SDimitry Andric ///    A pointer to a 64-bit memory location.
2069*0b57cec5SDimitry Andric /// \param __a
2070*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the value to be stored.
2071*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
2072*0b57cec5SDimitry Andric _mm_storeh_pd(double *__dp, __m128d __a)
2073*0b57cec5SDimitry Andric {
2074*0b57cec5SDimitry Andric   struct __mm_storeh_pd_struct {
2075*0b57cec5SDimitry Andric     double __u;
2076*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
2077*0b57cec5SDimitry Andric   ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
2078*0b57cec5SDimitry Andric }
2079*0b57cec5SDimitry Andric 
2080*0b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
2081*0b57cec5SDimitry Andric ///    memory location.
2082*0b57cec5SDimitry Andric ///
2083*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2084*0b57cec5SDimitry Andric ///
2085*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.
2086*0b57cec5SDimitry Andric ///
2087*0b57cec5SDimitry Andric /// \param __dp
2088*0b57cec5SDimitry Andric ///    A pointer to a 64-bit memory location.
2089*0b57cec5SDimitry Andric /// \param __a
2090*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the value to be stored.
2091*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
2092*0b57cec5SDimitry Andric _mm_storel_pd(double *__dp, __m128d __a)
2093*0b57cec5SDimitry Andric {
2094*0b57cec5SDimitry Andric   struct __mm_storeh_pd_struct {
2095*0b57cec5SDimitry Andric     double __u;
2096*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
2097*0b57cec5SDimitry Andric   ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
2098*0b57cec5SDimitry Andric }
2099*0b57cec5SDimitry Andric 
2100*0b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [16 x i8],
2101*0b57cec5SDimitry Andric ///    saving the lower 8 bits of each sum in the corresponding element of a
2102*0b57cec5SDimitry Andric ///    128-bit result vector of [16 x i8].
2103*0b57cec5SDimitry Andric ///
2104*0b57cec5SDimitry Andric ///    The integer elements of both parameters can be either signed or unsigned.
2105*0b57cec5SDimitry Andric ///
2106*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2107*0b57cec5SDimitry Andric ///
2108*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction.
2109*0b57cec5SDimitry Andric ///
2110*0b57cec5SDimitry Andric /// \param __a
2111*0b57cec5SDimitry Andric ///    A 128-bit vector of [16 x i8].
2112*0b57cec5SDimitry Andric /// \param __b
2113*0b57cec5SDimitry Andric ///    A 128-bit vector of [16 x i8].
2114*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the sums of both
2115*0b57cec5SDimitry Andric ///    parameters.
2116*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2117*0b57cec5SDimitry Andric _mm_add_epi8(__m128i __a, __m128i __b)
2118*0b57cec5SDimitry Andric {
2119*0b57cec5SDimitry Andric   return (__m128i)((__v16qu)__a + (__v16qu)__b);
2120*0b57cec5SDimitry Andric }
2121*0b57cec5SDimitry Andric 
2122*0b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [8 x i16],
2123*0b57cec5SDimitry Andric ///    saving the lower 16 bits of each sum in the corresponding element of a
2124*0b57cec5SDimitry Andric ///    128-bit result vector of [8 x i16].
2125*0b57cec5SDimitry Andric ///
2126*0b57cec5SDimitry Andric ///    The integer elements of both parameters can be either signed or unsigned.
2127*0b57cec5SDimitry Andric ///
2128*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2129*0b57cec5SDimitry Andric ///
2130*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction.
2131*0b57cec5SDimitry Andric ///
2132*0b57cec5SDimitry Andric /// \param __a
2133*0b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16].
2134*0b57cec5SDimitry Andric /// \param __b
2135*0b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16].
2136*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the sums of both
2137*0b57cec5SDimitry Andric ///    parameters.
2138*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2139*0b57cec5SDimitry Andric _mm_add_epi16(__m128i __a, __m128i __b)
2140*0b57cec5SDimitry Andric {
2141*0b57cec5SDimitry Andric   return (__m128i)((__v8hu)__a + (__v8hu)__b);
2142*0b57cec5SDimitry Andric }
2143*0b57cec5SDimitry Andric 
2144*0b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [4 x i32],
2145*0b57cec5SDimitry Andric ///    saving the lower 32 bits of each sum in the corresponding element of a
2146*0b57cec5SDimitry Andric ///    128-bit result vector of [4 x i32].
2147*0b57cec5SDimitry Andric ///
2148*0b57cec5SDimitry Andric ///    The integer elements of both parameters can be either signed or unsigned.
2149*0b57cec5SDimitry Andric ///
2150*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2151*0b57cec5SDimitry Andric ///
2152*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction.
2153*0b57cec5SDimitry Andric ///
2154*0b57cec5SDimitry Andric /// \param __a
2155*0b57cec5SDimitry Andric ///    A 128-bit vector of [4 x i32].
2156*0b57cec5SDimitry Andric /// \param __b
2157*0b57cec5SDimitry Andric ///    A 128-bit vector of [4 x i32].
2158*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the sums of both
2159*0b57cec5SDimitry Andric ///    parameters.
2160*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2161*0b57cec5SDimitry Andric _mm_add_epi32(__m128i __a, __m128i __b)
2162*0b57cec5SDimitry Andric {
2163*0b57cec5SDimitry Andric   return (__m128i)((__v4su)__a + (__v4su)__b);
2164*0b57cec5SDimitry Andric }
2165*0b57cec5SDimitry Andric 
2166*0b57cec5SDimitry Andric /// Adds two signed or unsigned 64-bit integer values, returning the
2167*0b57cec5SDimitry Andric ///    lower 64 bits of the sum.
2168*0b57cec5SDimitry Andric ///
2169*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2170*0b57cec5SDimitry Andric ///
2171*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDQ </c> instruction.
2172*0b57cec5SDimitry Andric ///
2173*0b57cec5SDimitry Andric /// \param __a
2174*0b57cec5SDimitry Andric ///    A 64-bit integer.
2175*0b57cec5SDimitry Andric /// \param __b
2176*0b57cec5SDimitry Andric ///    A 64-bit integer.
2177*0b57cec5SDimitry Andric /// \returns A 64-bit integer containing the sum of both parameters.
2178*0b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2179*0b57cec5SDimitry Andric _mm_add_si64(__m64 __a, __m64 __b)
2180*0b57cec5SDimitry Andric {
2181*0b57cec5SDimitry Andric   return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);
2182*0b57cec5SDimitry Andric }
2183*0b57cec5SDimitry Andric 
2184*0b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [2 x i64],
2185*0b57cec5SDimitry Andric ///    saving the lower 64 bits of each sum in the corresponding element of a
2186*0b57cec5SDimitry Andric ///    128-bit result vector of [2 x i64].
2187*0b57cec5SDimitry Andric ///
2188*0b57cec5SDimitry Andric ///    The integer elements of both parameters can be either signed or unsigned.
2189*0b57cec5SDimitry Andric ///
2190*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2191*0b57cec5SDimitry Andric ///
2192*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction.
2193*0b57cec5SDimitry Andric ///
2194*0b57cec5SDimitry Andric /// \param __a
2195*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x i64].
2196*0b57cec5SDimitry Andric /// \param __b
2197*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x i64].
2198*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the sums of both
2199*0b57cec5SDimitry Andric ///    parameters.
2200*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2201*0b57cec5SDimitry Andric _mm_add_epi64(__m128i __a, __m128i __b)
2202*0b57cec5SDimitry Andric {
2203*0b57cec5SDimitry Andric   return (__m128i)((__v2du)__a + (__v2du)__b);
2204*0b57cec5SDimitry Andric }
2205*0b57cec5SDimitry Andric 
2206*0b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit
2207*0b57cec5SDimitry Andric ///    signed [16 x i8] vectors, saving each sum in the corresponding element of
2208*0b57cec5SDimitry Andric ///    a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are
2209*0b57cec5SDimitry Andric ///    saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80.
2210*0b57cec5SDimitry Andric ///
2211*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2212*0b57cec5SDimitry Andric ///
2213*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction.
2214*0b57cec5SDimitry Andric ///
2215*0b57cec5SDimitry Andric /// \param __a
2216*0b57cec5SDimitry Andric ///    A 128-bit signed [16 x i8] vector.
2217*0b57cec5SDimitry Andric /// \param __b
2218*0b57cec5SDimitry Andric ///    A 128-bit signed [16 x i8] vector.
2219*0b57cec5SDimitry Andric /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of
2220*0b57cec5SDimitry Andric ///    both parameters.
2221*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2222*0b57cec5SDimitry Andric _mm_adds_epi8(__m128i __a, __m128i __b)
2223*0b57cec5SDimitry Andric {
2224*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
2225*0b57cec5SDimitry Andric }
2226*0b57cec5SDimitry Andric 
2227*0b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit
2228*0b57cec5SDimitry Andric ///    signed [8 x i16] vectors, saving each sum in the corresponding element of
2229*0b57cec5SDimitry Andric ///    a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF
2230*0b57cec5SDimitry Andric ///    are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
2231*0b57cec5SDimitry Andric ///    0x8000.
2232*0b57cec5SDimitry Andric ///
2233*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2234*0b57cec5SDimitry Andric ///
2235*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction.
2236*0b57cec5SDimitry Andric ///
2237*0b57cec5SDimitry Andric /// \param __a
2238*0b57cec5SDimitry Andric ///    A 128-bit signed [8 x i16] vector.
2239*0b57cec5SDimitry Andric /// \param __b
2240*0b57cec5SDimitry Andric ///    A 128-bit signed [8 x i16] vector.
2241*0b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of
2242*0b57cec5SDimitry Andric ///    both parameters.
2243*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2244*0b57cec5SDimitry Andric _mm_adds_epi16(__m128i __a, __m128i __b)
2245*0b57cec5SDimitry Andric {
2246*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
2247*0b57cec5SDimitry Andric }
2248*0b57cec5SDimitry Andric 
2249*0b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit
2250*0b57cec5SDimitry Andric ///    unsigned [16 x i8] vectors, saving each sum in the corresponding element
2251*0b57cec5SDimitry Andric ///    of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF
2252*0b57cec5SDimitry Andric ///    are saturated to 0xFF. Negative sums are saturated to 0x00.
2253*0b57cec5SDimitry Andric ///
2254*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2255*0b57cec5SDimitry Andric ///
2256*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.
2257*0b57cec5SDimitry Andric ///
2258*0b57cec5SDimitry Andric /// \param __a
2259*0b57cec5SDimitry Andric ///    A 128-bit unsigned [16 x i8] vector.
2260*0b57cec5SDimitry Andric /// \param __b
2261*0b57cec5SDimitry Andric ///    A 128-bit unsigned [16 x i8] vector.
2262*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums
2263*0b57cec5SDimitry Andric ///    of both parameters.
2264*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2265*0b57cec5SDimitry Andric _mm_adds_epu8(__m128i __a, __m128i __b)
2266*0b57cec5SDimitry Andric {
2267*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
2268*0b57cec5SDimitry Andric }
2269*0b57cec5SDimitry Andric 
2270*0b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit
2271*0b57cec5SDimitry Andric ///    unsigned [8 x i16] vectors, saving each sum in the corresponding element
2272*0b57cec5SDimitry Andric ///    of a 128-bit result vector of [8 x i16]. Positive sums greater than
2273*0b57cec5SDimitry Andric ///    0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000.
2274*0b57cec5SDimitry Andric ///
2275*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2276*0b57cec5SDimitry Andric ///
2277*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.
2278*0b57cec5SDimitry Andric ///
2279*0b57cec5SDimitry Andric /// \param __a
2280*0b57cec5SDimitry Andric ///    A 128-bit unsigned [8 x i16] vector.
2281*0b57cec5SDimitry Andric /// \param __b
2282*0b57cec5SDimitry Andric ///    A 128-bit unsigned [8 x i16] vector.
2283*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums
2284*0b57cec5SDimitry Andric ///    of both parameters.
2285*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2286*0b57cec5SDimitry Andric _mm_adds_epu16(__m128i __a, __m128i __b)
2287*0b57cec5SDimitry Andric {
2288*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
2289*0b57cec5SDimitry Andric }
2290*0b57cec5SDimitry Andric 
2291*0b57cec5SDimitry Andric /// Computes the rounded avarages of corresponding elements of two
2292*0b57cec5SDimitry Andric ///    128-bit unsigned [16 x i8] vectors, saving each result in the
2293*0b57cec5SDimitry Andric ///    corresponding element of a 128-bit result vector of [16 x i8].
2294*0b57cec5SDimitry Andric ///
2295*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2296*0b57cec5SDimitry Andric ///
2297*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction.
2298*0b57cec5SDimitry Andric ///
2299*0b57cec5SDimitry Andric /// \param __a
2300*0b57cec5SDimitry Andric ///    A 128-bit unsigned [16 x i8] vector.
2301*0b57cec5SDimitry Andric /// \param __b
2302*0b57cec5SDimitry Andric ///    A 128-bit unsigned [16 x i8] vector.
2303*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded
2304*0b57cec5SDimitry Andric ///    averages of both parameters.
2305*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2306*0b57cec5SDimitry Andric _mm_avg_epu8(__m128i __a, __m128i __b)
2307*0b57cec5SDimitry Andric {
2308*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
2309*0b57cec5SDimitry Andric }
2310*0b57cec5SDimitry Andric 
2311*0b57cec5SDimitry Andric /// Computes the rounded avarages of corresponding elements of two
2312*0b57cec5SDimitry Andric ///    128-bit unsigned [8 x i16] vectors, saving each result in the
2313*0b57cec5SDimitry Andric ///    corresponding element of a 128-bit result vector of [8 x i16].
2314*0b57cec5SDimitry Andric ///
2315*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2316*0b57cec5SDimitry Andric ///
2317*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction.
2318*0b57cec5SDimitry Andric ///
2319*0b57cec5SDimitry Andric /// \param __a
2320*0b57cec5SDimitry Andric ///    A 128-bit unsigned [8 x i16] vector.
2321*0b57cec5SDimitry Andric /// \param __b
2322*0b57cec5SDimitry Andric ///    A 128-bit unsigned [8 x i16] vector.
2323*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded
2324*0b57cec5SDimitry Andric ///    averages of both parameters.
2325*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2326*0b57cec5SDimitry Andric _mm_avg_epu16(__m128i __a, __m128i __b)
2327*0b57cec5SDimitry Andric {
2328*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
2329*0b57cec5SDimitry Andric }
2330*0b57cec5SDimitry Andric 
2331*0b57cec5SDimitry Andric /// Multiplies the corresponding elements of two 128-bit signed [8 x i16]
2332*0b57cec5SDimitry Andric ///    vectors, producing eight intermediate 32-bit signed integer products, and
2333*0b57cec5SDimitry Andric ///    adds the consecutive pairs of 32-bit products to form a 128-bit signed
2334*0b57cec5SDimitry Andric ///    [4 x i32] vector.
2335*0b57cec5SDimitry Andric ///
2336*0b57cec5SDimitry Andric ///    For example, bits [15:0] of both parameters are multiplied producing a
2337*0b57cec5SDimitry Andric ///    32-bit product, bits [31:16] of both parameters are multiplied producing
2338*0b57cec5SDimitry Andric ///    a 32-bit product, and the sum of those two products becomes bits [31:0]
2339*0b57cec5SDimitry Andric ///    of the result.
2340*0b57cec5SDimitry Andric ///
2341*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2342*0b57cec5SDimitry Andric ///
2343*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction.
2344*0b57cec5SDimitry Andric ///
2345*0b57cec5SDimitry Andric /// \param __a
2346*0b57cec5SDimitry Andric ///    A 128-bit signed [8 x i16] vector.
2347*0b57cec5SDimitry Andric /// \param __b
2348*0b57cec5SDimitry Andric ///    A 128-bit signed [8 x i16] vector.
2349*0b57cec5SDimitry Andric /// \returns A 128-bit signed [4 x i32] vector containing the sums of products
2350*0b57cec5SDimitry Andric ///    of both parameters.
2351*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2352*0b57cec5SDimitry Andric _mm_madd_epi16(__m128i __a, __m128i __b)
2353*0b57cec5SDimitry Andric {
2354*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
2355*0b57cec5SDimitry Andric }
2356*0b57cec5SDimitry Andric 
2357*0b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16]
2358*0b57cec5SDimitry Andric ///    vectors, saving the greater value from each comparison in the
2359*0b57cec5SDimitry Andric ///    corresponding element of a 128-bit result vector of [8 x i16].
2360*0b57cec5SDimitry Andric ///
2361*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2362*0b57cec5SDimitry Andric ///
2363*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction.
2364*0b57cec5SDimitry Andric ///
2365*0b57cec5SDimitry Andric /// \param __a
2366*0b57cec5SDimitry Andric ///    A 128-bit signed [8 x i16] vector.
2367*0b57cec5SDimitry Andric /// \param __b
2368*0b57cec5SDimitry Andric ///    A 128-bit signed [8 x i16] vector.
2369*0b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the greater value of
2370*0b57cec5SDimitry Andric ///    each comparison.
2371*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2372*0b57cec5SDimitry Andric _mm_max_epi16(__m128i __a, __m128i __b)
2373*0b57cec5SDimitry Andric {
2374*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
2375*0b57cec5SDimitry Andric }
2376*0b57cec5SDimitry Andric 
2377*0b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8]
2378*0b57cec5SDimitry Andric ///    vectors, saving the greater value from each comparison in the
2379*0b57cec5SDimitry Andric ///    corresponding element of a 128-bit result vector of [16 x i8].
2380*0b57cec5SDimitry Andric ///
2381*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2382*0b57cec5SDimitry Andric ///
2383*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction.
2384*0b57cec5SDimitry Andric ///
2385*0b57cec5SDimitry Andric /// \param __a
2386*0b57cec5SDimitry Andric ///    A 128-bit unsigned [16 x i8] vector.
2387*0b57cec5SDimitry Andric /// \param __b
2388*0b57cec5SDimitry Andric ///    A 128-bit unsigned [16 x i8] vector.
2389*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of
2390*0b57cec5SDimitry Andric ///    each comparison.
2391*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2392*0b57cec5SDimitry Andric _mm_max_epu8(__m128i __a, __m128i __b)
2393*0b57cec5SDimitry Andric {
2394*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
2395*0b57cec5SDimitry Andric }
2396*0b57cec5SDimitry Andric 
2397*0b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16]
2398*0b57cec5SDimitry Andric ///    vectors, saving the smaller value from each comparison in the
2399*0b57cec5SDimitry Andric ///    corresponding element of a 128-bit result vector of [8 x i16].
2400*0b57cec5SDimitry Andric ///
2401*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2402*0b57cec5SDimitry Andric ///
2403*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction.
2404*0b57cec5SDimitry Andric ///
2405*0b57cec5SDimitry Andric /// \param __a
2406*0b57cec5SDimitry Andric ///    A 128-bit signed [8 x i16] vector.
2407*0b57cec5SDimitry Andric /// \param __b
2408*0b57cec5SDimitry Andric ///    A 128-bit signed [8 x i16] vector.
2409*0b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the smaller value of
2410*0b57cec5SDimitry Andric ///    each comparison.
2411*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2412*0b57cec5SDimitry Andric _mm_min_epi16(__m128i __a, __m128i __b)
2413*0b57cec5SDimitry Andric {
2414*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
2415*0b57cec5SDimitry Andric }
2416*0b57cec5SDimitry Andric 
2417*0b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8]
2418*0b57cec5SDimitry Andric ///    vectors, saving the smaller value from each comparison in the
2419*0b57cec5SDimitry Andric ///    corresponding element of a 128-bit result vector of [16 x i8].
2420*0b57cec5SDimitry Andric ///
2421*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2422*0b57cec5SDimitry Andric ///
2423*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction.
2424*0b57cec5SDimitry Andric ///
2425*0b57cec5SDimitry Andric /// \param __a
2426*0b57cec5SDimitry Andric ///    A 128-bit unsigned [16 x i8] vector.
2427*0b57cec5SDimitry Andric /// \param __b
2428*0b57cec5SDimitry Andric ///    A 128-bit unsigned [16 x i8] vector.
2429*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of
2430*0b57cec5SDimitry Andric ///    each comparison.
2431*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2432*0b57cec5SDimitry Andric _mm_min_epu8(__m128i __a, __m128i __b)
2433*0b57cec5SDimitry Andric {
2434*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
2435*0b57cec5SDimitry Andric }
2436*0b57cec5SDimitry Andric 
2437*0b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16]
2438*0b57cec5SDimitry Andric ///    vectors, saving the upper 16 bits of each 32-bit product in the
2439*0b57cec5SDimitry Andric ///    corresponding element of a 128-bit signed [8 x i16] result vector.
2440*0b57cec5SDimitry Andric ///
2441*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2442*0b57cec5SDimitry Andric ///
2443*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction.
2444*0b57cec5SDimitry Andric ///
2445*0b57cec5SDimitry Andric /// \param __a
2446*0b57cec5SDimitry Andric ///    A 128-bit signed [8 x i16] vector.
2447*0b57cec5SDimitry Andric /// \param __b
2448*0b57cec5SDimitry Andric ///    A 128-bit signed [8 x i16] vector.
2449*0b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of
2450*0b57cec5SDimitry Andric ///    each of the eight 32-bit products.
2451*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2452*0b57cec5SDimitry Andric _mm_mulhi_epi16(__m128i __a, __m128i __b)
2453*0b57cec5SDimitry Andric {
2454*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
2455*0b57cec5SDimitry Andric }
2456*0b57cec5SDimitry Andric 
2457*0b57cec5SDimitry Andric /// Multiplies the corresponding elements of two unsigned [8 x i16]
2458*0b57cec5SDimitry Andric ///    vectors, saving the upper 16 bits of each 32-bit product in the
2459*0b57cec5SDimitry Andric ///    corresponding element of a 128-bit unsigned [8 x i16] result vector.
2460*0b57cec5SDimitry Andric ///
2461*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2462*0b57cec5SDimitry Andric ///
2463*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction.
2464*0b57cec5SDimitry Andric ///
2465*0b57cec5SDimitry Andric /// \param __a
2466*0b57cec5SDimitry Andric ///    A 128-bit unsigned [8 x i16] vector.
2467*0b57cec5SDimitry Andric /// \param __b
2468*0b57cec5SDimitry Andric ///    A 128-bit unsigned [8 x i16] vector.
2469*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits
2470*0b57cec5SDimitry Andric ///    of each of the eight 32-bit products.
2471*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2472*0b57cec5SDimitry Andric _mm_mulhi_epu16(__m128i __a, __m128i __b)
2473*0b57cec5SDimitry Andric {
2474*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
2475*0b57cec5SDimitry Andric }
2476*0b57cec5SDimitry Andric 
2477*0b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16]
2478*0b57cec5SDimitry Andric ///    vectors, saving the lower 16 bits of each 32-bit product in the
2479*0b57cec5SDimitry Andric ///    corresponding element of a 128-bit signed [8 x i16] result vector.
2480*0b57cec5SDimitry Andric ///
2481*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2482*0b57cec5SDimitry Andric ///
2483*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction.
2484*0b57cec5SDimitry Andric ///
2485*0b57cec5SDimitry Andric /// \param __a
2486*0b57cec5SDimitry Andric ///    A 128-bit signed [8 x i16] vector.
2487*0b57cec5SDimitry Andric /// \param __b
2488*0b57cec5SDimitry Andric ///    A 128-bit signed [8 x i16] vector.
2489*0b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of
2490*0b57cec5SDimitry Andric ///    each of the eight 32-bit products.
2491*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2492*0b57cec5SDimitry Andric _mm_mullo_epi16(__m128i __a, __m128i __b)
2493*0b57cec5SDimitry Andric {
2494*0b57cec5SDimitry Andric   return (__m128i)((__v8hu)__a * (__v8hu)__b);
2495*0b57cec5SDimitry Andric }
2496*0b57cec5SDimitry Andric 
2497*0b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower bits
2498*0b57cec5SDimitry Andric ///    of the two 64-bit integer vectors and returns the 64-bit unsigned
2499*0b57cec5SDimitry Andric ///    product.
2500*0b57cec5SDimitry Andric ///
2501*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2502*0b57cec5SDimitry Andric ///
2503*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PMULUDQ </c> instruction.
2504*0b57cec5SDimitry Andric ///
2505*0b57cec5SDimitry Andric /// \param __a
2506*0b57cec5SDimitry Andric ///    A 64-bit integer containing one of the source operands.
2507*0b57cec5SDimitry Andric /// \param __b
2508*0b57cec5SDimitry Andric ///    A 64-bit integer containing one of the source operands.
2509*0b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the product of both operands.
2510*0b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2511*0b57cec5SDimitry Andric _mm_mul_su32(__m64 __a, __m64 __b)
2512*0b57cec5SDimitry Andric {
2513*0b57cec5SDimitry Andric   return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
2514*0b57cec5SDimitry Andric }
2515*0b57cec5SDimitry Andric 
2516*0b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower
2517*0b57cec5SDimitry Andric ///    bits of the corresponding elements of two [2 x i64] vectors, and returns
2518*0b57cec5SDimitry Andric ///    the 64-bit products in the corresponding elements of a [2 x i64] vector.
2519*0b57cec5SDimitry Andric ///
2520*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2521*0b57cec5SDimitry Andric ///
2522*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction.
2523*0b57cec5SDimitry Andric ///
2524*0b57cec5SDimitry Andric /// \param __a
2525*0b57cec5SDimitry Andric ///    A [2 x i64] vector containing one of the source operands.
2526*0b57cec5SDimitry Andric /// \param __b
2527*0b57cec5SDimitry Andric ///    A [2 x i64] vector containing one of the source operands.
2528*0b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the product of both operands.
2529*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2530*0b57cec5SDimitry Andric _mm_mul_epu32(__m128i __a, __m128i __b)
2531*0b57cec5SDimitry Andric {
2532*0b57cec5SDimitry Andric   return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
2533*0b57cec5SDimitry Andric }
2534*0b57cec5SDimitry Andric 
2535*0b57cec5SDimitry Andric /// Computes the absolute differences of corresponding 8-bit integer
2536*0b57cec5SDimitry Andric ///    values in two 128-bit vectors. Sums the first 8 absolute differences, and
2537*0b57cec5SDimitry Andric ///    separately sums the second 8 absolute differences. Packs these two
2538*0b57cec5SDimitry Andric ///    unsigned 16-bit integer sums into the upper and lower elements of a
2539*0b57cec5SDimitry Andric ///    [2 x i64] vector.
2540*0b57cec5SDimitry Andric ///
2541*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2542*0b57cec5SDimitry Andric ///
2543*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction.
2544*0b57cec5SDimitry Andric ///
2545*0b57cec5SDimitry Andric /// \param __a
2546*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing one of the source operands.
2547*0b57cec5SDimitry Andric /// \param __b
2548*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing one of the source operands.
2549*0b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the sums of the sets of absolute
2550*0b57cec5SDimitry Andric ///    differences between both operands.
2551*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2552*0b57cec5SDimitry Andric _mm_sad_epu8(__m128i __a, __m128i __b)
2553*0b57cec5SDimitry Andric {
2554*0b57cec5SDimitry Andric   return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
2555*0b57cec5SDimitry Andric }
2556*0b57cec5SDimitry Andric 
2557*0b57cec5SDimitry Andric /// Subtracts the corresponding 8-bit integer values in the operands.
2558*0b57cec5SDimitry Andric ///
2559*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2560*0b57cec5SDimitry Andric ///
2561*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction.
2562*0b57cec5SDimitry Andric ///
2563*0b57cec5SDimitry Andric /// \param __a
2564*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the minuends.
2565*0b57cec5SDimitry Andric /// \param __b
2566*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the subtrahends.
2567*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values
2568*0b57cec5SDimitry Andric ///    in the operands.
2569*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2570*0b57cec5SDimitry Andric _mm_sub_epi8(__m128i __a, __m128i __b)
2571*0b57cec5SDimitry Andric {
2572*0b57cec5SDimitry Andric   return (__m128i)((__v16qu)__a - (__v16qu)__b);
2573*0b57cec5SDimitry Andric }
2574*0b57cec5SDimitry Andric 
2575*0b57cec5SDimitry Andric /// Subtracts the corresponding 16-bit integer values in the operands.
2576*0b57cec5SDimitry Andric ///
2577*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2578*0b57cec5SDimitry Andric ///
2579*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction.
2580*0b57cec5SDimitry Andric ///
2581*0b57cec5SDimitry Andric /// \param __a
2582*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the minuends.
2583*0b57cec5SDimitry Andric /// \param __b
2584*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the subtrahends.
2585*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values
2586*0b57cec5SDimitry Andric ///    in the operands.
2587*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2588*0b57cec5SDimitry Andric _mm_sub_epi16(__m128i __a, __m128i __b)
2589*0b57cec5SDimitry Andric {
2590*0b57cec5SDimitry Andric   return (__m128i)((__v8hu)__a - (__v8hu)__b);
2591*0b57cec5SDimitry Andric }
2592*0b57cec5SDimitry Andric 
2593*0b57cec5SDimitry Andric /// Subtracts the corresponding 32-bit integer values in the operands.
2594*0b57cec5SDimitry Andric ///
2595*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2596*0b57cec5SDimitry Andric ///
2597*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction.
2598*0b57cec5SDimitry Andric ///
2599*0b57cec5SDimitry Andric /// \param __a
2600*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the minuends.
2601*0b57cec5SDimitry Andric /// \param __b
2602*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the subtrahends.
2603*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values
2604*0b57cec5SDimitry Andric ///    in the operands.
2605*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2606*0b57cec5SDimitry Andric _mm_sub_epi32(__m128i __a, __m128i __b)
2607*0b57cec5SDimitry Andric {
2608*0b57cec5SDimitry Andric   return (__m128i)((__v4su)__a - (__v4su)__b);
2609*0b57cec5SDimitry Andric }
2610*0b57cec5SDimitry Andric 
2611*0b57cec5SDimitry Andric /// Subtracts signed or unsigned 64-bit integer values and writes the
2612*0b57cec5SDimitry Andric ///    difference to the corresponding bits in the destination.
2613*0b57cec5SDimitry Andric ///
2614*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2615*0b57cec5SDimitry Andric ///
2616*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBQ </c> instruction.
2617*0b57cec5SDimitry Andric ///
2618*0b57cec5SDimitry Andric /// \param __a
2619*0b57cec5SDimitry Andric ///    A 64-bit integer vector containing the minuend.
2620*0b57cec5SDimitry Andric /// \param __b
2621*0b57cec5SDimitry Andric ///    A 64-bit integer vector containing the subtrahend.
2622*0b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the difference of the values in
2623*0b57cec5SDimitry Andric ///    the operands.
2624*0b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2625*0b57cec5SDimitry Andric _mm_sub_si64(__m64 __a, __m64 __b)
2626*0b57cec5SDimitry Andric {
2627*0b57cec5SDimitry Andric   return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);
2628*0b57cec5SDimitry Andric }
2629*0b57cec5SDimitry Andric 
2630*0b57cec5SDimitry Andric /// Subtracts the corresponding elements of two [2 x i64] vectors.
2631*0b57cec5SDimitry Andric ///
2632*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2633*0b57cec5SDimitry Andric ///
2634*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction.
2635*0b57cec5SDimitry Andric ///
2636*0b57cec5SDimitry Andric /// \param __a
2637*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the minuends.
2638*0b57cec5SDimitry Andric /// \param __b
2639*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the subtrahends.
2640*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values
2641*0b57cec5SDimitry Andric ///    in the operands.
2642*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2643*0b57cec5SDimitry Andric _mm_sub_epi64(__m128i __a, __m128i __b)
2644*0b57cec5SDimitry Andric {
2645*0b57cec5SDimitry Andric   return (__m128i)((__v2du)__a - (__v2du)__b);
2646*0b57cec5SDimitry Andric }
2647*0b57cec5SDimitry Andric 
2648*0b57cec5SDimitry Andric /// Subtracts corresponding 8-bit signed integer values in the input and
2649*0b57cec5SDimitry Andric ///    returns the differences in the corresponding bytes in the destination.
2650*0b57cec5SDimitry Andric ///    Differences greater than 0x7F are saturated to 0x7F, and differences less
2651*0b57cec5SDimitry Andric ///    than 0x80 are saturated to 0x80.
2652*0b57cec5SDimitry Andric ///
2653*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2654*0b57cec5SDimitry Andric ///
2655*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction.
2656*0b57cec5SDimitry Andric ///
2657*0b57cec5SDimitry Andric /// \param __a
2658*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the minuends.
2659*0b57cec5SDimitry Andric /// \param __b
2660*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the subtrahends.
2661*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values
2662*0b57cec5SDimitry Andric ///    in the operands.
2663*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2664*0b57cec5SDimitry Andric _mm_subs_epi8(__m128i __a, __m128i __b)
2665*0b57cec5SDimitry Andric {
2666*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
2667*0b57cec5SDimitry Andric }
2668*0b57cec5SDimitry Andric 
2669*0b57cec5SDimitry Andric /// Subtracts corresponding 16-bit signed integer values in the input and
2670*0b57cec5SDimitry Andric ///    returns the differences in the corresponding bytes in the destination.
2671*0b57cec5SDimitry Andric ///    Differences greater than 0x7FFF are saturated to 0x7FFF, and values less
2672*0b57cec5SDimitry Andric ///    than 0x8000 are saturated to 0x8000.
2673*0b57cec5SDimitry Andric ///
2674*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2675*0b57cec5SDimitry Andric ///
2676*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction.
2677*0b57cec5SDimitry Andric ///
2678*0b57cec5SDimitry Andric /// \param __a
2679*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the minuends.
2680*0b57cec5SDimitry Andric /// \param __b
2681*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the subtrahends.
2682*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values
2683*0b57cec5SDimitry Andric ///    in the operands.
2684*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2685*0b57cec5SDimitry Andric _mm_subs_epi16(__m128i __a, __m128i __b)
2686*0b57cec5SDimitry Andric {
2687*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
2688*0b57cec5SDimitry Andric }
2689*0b57cec5SDimitry Andric 
2690*0b57cec5SDimitry Andric /// Subtracts corresponding 8-bit unsigned integer values in the input
2691*0b57cec5SDimitry Andric ///    and returns the differences in the corresponding bytes in the
2692*0b57cec5SDimitry Andric ///    destination. Differences less than 0x00 are saturated to 0x00.
2693*0b57cec5SDimitry Andric ///
2694*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2695*0b57cec5SDimitry Andric ///
2696*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction.
2697*0b57cec5SDimitry Andric ///
2698*0b57cec5SDimitry Andric /// \param __a
2699*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the minuends.
2700*0b57cec5SDimitry Andric /// \param __b
2701*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the subtrahends.
2702*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer
2703*0b57cec5SDimitry Andric ///    differences of the values in the operands.
2704*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2705*0b57cec5SDimitry Andric _mm_subs_epu8(__m128i __a, __m128i __b)
2706*0b57cec5SDimitry Andric {
2707*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
2708*0b57cec5SDimitry Andric }
2709*0b57cec5SDimitry Andric 
2710*0b57cec5SDimitry Andric /// Subtracts corresponding 16-bit unsigned integer values in the input
2711*0b57cec5SDimitry Andric ///    and returns the differences in the corresponding bytes in the
2712*0b57cec5SDimitry Andric ///    destination. Differences less than 0x0000 are saturated to 0x0000.
2713*0b57cec5SDimitry Andric ///
2714*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2715*0b57cec5SDimitry Andric ///
2716*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction.
2717*0b57cec5SDimitry Andric ///
2718*0b57cec5SDimitry Andric /// \param __a
2719*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the minuends.
2720*0b57cec5SDimitry Andric /// \param __b
2721*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the subtrahends.
2722*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer
2723*0b57cec5SDimitry Andric ///    differences of the values in the operands.
2724*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2725*0b57cec5SDimitry Andric _mm_subs_epu16(__m128i __a, __m128i __b)
2726*0b57cec5SDimitry Andric {
2727*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
2728*0b57cec5SDimitry Andric }
2729*0b57cec5SDimitry Andric 
2730*0b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors.
2731*0b57cec5SDimitry Andric ///
2732*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2733*0b57cec5SDimitry Andric ///
2734*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.
2735*0b57cec5SDimitry Andric ///
2736*0b57cec5SDimitry Andric /// \param __a
2737*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing one of the source operands.
2738*0b57cec5SDimitry Andric /// \param __b
2739*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing one of the source operands.
2740*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the values
2741*0b57cec5SDimitry Andric ///    in both operands.
2742*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2743*0b57cec5SDimitry Andric _mm_and_si128(__m128i __a, __m128i __b)
2744*0b57cec5SDimitry Andric {
2745*0b57cec5SDimitry Andric   return (__m128i)((__v2du)__a & (__v2du)__b);
2746*0b57cec5SDimitry Andric }
2747*0b57cec5SDimitry Andric 
2748*0b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors, using the
2749*0b57cec5SDimitry Andric ///    one's complement of the values contained in the first source operand.
2750*0b57cec5SDimitry Andric ///
2751*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2752*0b57cec5SDimitry Andric ///
2753*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.
2754*0b57cec5SDimitry Andric ///
2755*0b57cec5SDimitry Andric /// \param __a
2756*0b57cec5SDimitry Andric ///    A 128-bit vector containing the left source operand. The one's complement
2757*0b57cec5SDimitry Andric ///    of this value is used in the bitwise AND.
2758*0b57cec5SDimitry Andric /// \param __b
2759*0b57cec5SDimitry Andric ///    A 128-bit vector containing the right source operand.
2760*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the one's
2761*0b57cec5SDimitry Andric ///    complement of the first operand and the values in the second operand.
2762*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2763*0b57cec5SDimitry Andric _mm_andnot_si128(__m128i __a, __m128i __b)
2764*0b57cec5SDimitry Andric {
2765*0b57cec5SDimitry Andric   return (__m128i)(~(__v2du)__a & (__v2du)__b);
2766*0b57cec5SDimitry Andric }
2767*0b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit integer vectors.
2768*0b57cec5SDimitry Andric ///
2769*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2770*0b57cec5SDimitry Andric ///
2771*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.
2772*0b57cec5SDimitry Andric ///
2773*0b57cec5SDimitry Andric /// \param __a
2774*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing one of the source operands.
2775*0b57cec5SDimitry Andric /// \param __b
2776*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing one of the source operands.
2777*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise OR of the values
2778*0b57cec5SDimitry Andric ///    in both operands.
2779*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2780*0b57cec5SDimitry Andric _mm_or_si128(__m128i __a, __m128i __b)
2781*0b57cec5SDimitry Andric {
2782*0b57cec5SDimitry Andric   return (__m128i)((__v2du)__a | (__v2du)__b);
2783*0b57cec5SDimitry Andric }
2784*0b57cec5SDimitry Andric 
2785*0b57cec5SDimitry Andric /// Performs a bitwise exclusive OR of two 128-bit integer vectors.
2786*0b57cec5SDimitry Andric ///
2787*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2788*0b57cec5SDimitry Andric ///
2789*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.
2790*0b57cec5SDimitry Andric ///
2791*0b57cec5SDimitry Andric /// \param __a
2792*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing one of the source operands.
2793*0b57cec5SDimitry Andric /// \param __b
2794*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing one of the source operands.
2795*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the
2796*0b57cec5SDimitry Andric ///    values in both operands.
2797*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2798*0b57cec5SDimitry Andric _mm_xor_si128(__m128i __a, __m128i __b)
2799*0b57cec5SDimitry Andric {
2800*0b57cec5SDimitry Andric   return (__m128i)((__v2du)__a ^ (__v2du)__b);
2801*0b57cec5SDimitry Andric }
2802*0b57cec5SDimitry Andric 
2803*0b57cec5SDimitry Andric /// Left-shifts the 128-bit integer vector operand by the specified
2804*0b57cec5SDimitry Andric ///    number of bytes. Low-order bits are cleared.
2805*0b57cec5SDimitry Andric ///
2806*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2807*0b57cec5SDimitry Andric ///
2808*0b57cec5SDimitry Andric /// \code
2809*0b57cec5SDimitry Andric /// __m128i _mm_slli_si128(__m128i a, const int imm);
2810*0b57cec5SDimitry Andric /// \endcode
2811*0b57cec5SDimitry Andric ///
2812*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction.
2813*0b57cec5SDimitry Andric ///
2814*0b57cec5SDimitry Andric /// \param a
2815*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
2816*0b57cec5SDimitry Andric /// \param imm
2817*0b57cec5SDimitry Andric ///    An immediate value specifying the number of bytes to left-shift operand
2818*0b57cec5SDimitry Andric ///    \a a.
2819*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted value.
2820*0b57cec5SDimitry Andric #define _mm_slli_si128(a, imm) \
2821*0b57cec5SDimitry Andric   (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
2822*0b57cec5SDimitry Andric 
2823*0b57cec5SDimitry Andric #define _mm_bslli_si128(a, imm) \
2824*0b57cec5SDimitry Andric   (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
2825*0b57cec5SDimitry Andric 
2826*0b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand
2827*0b57cec5SDimitry Andric ///    by the specified number of bits. Low-order bits are cleared.
2828*0b57cec5SDimitry Andric ///
2829*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2830*0b57cec5SDimitry Andric ///
2831*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.
2832*0b57cec5SDimitry Andric ///
2833*0b57cec5SDimitry Andric /// \param __a
2834*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
2835*0b57cec5SDimitry Andric /// \param __count
2836*0b57cec5SDimitry Andric ///    An integer value specifying the number of bits to left-shift each value
2837*0b57cec5SDimitry Andric ///    in operand \a __a.
2838*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values.
2839*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2840*0b57cec5SDimitry Andric _mm_slli_epi16(__m128i __a, int __count)
2841*0b57cec5SDimitry Andric {
2842*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
2843*0b57cec5SDimitry Andric }
2844*0b57cec5SDimitry Andric 
2845*0b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand
2846*0b57cec5SDimitry Andric ///    by the specified number of bits. Low-order bits are cleared.
2847*0b57cec5SDimitry Andric ///
2848*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2849*0b57cec5SDimitry Andric ///
2850*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.
2851*0b57cec5SDimitry Andric ///
2852*0b57cec5SDimitry Andric /// \param __a
2853*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
2854*0b57cec5SDimitry Andric /// \param __count
2855*0b57cec5SDimitry Andric ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2856*0b57cec5SDimitry Andric ///    to left-shift each value in operand \a __a.
2857*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values.
2858*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2859*0b57cec5SDimitry Andric _mm_sll_epi16(__m128i __a, __m128i __count)
2860*0b57cec5SDimitry Andric {
2861*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
2862*0b57cec5SDimitry Andric }
2863*0b57cec5SDimitry Andric 
2864*0b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand
2865*0b57cec5SDimitry Andric ///    by the specified number of bits. Low-order bits are cleared.
2866*0b57cec5SDimitry Andric ///
2867*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2868*0b57cec5SDimitry Andric ///
2869*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.
2870*0b57cec5SDimitry Andric ///
2871*0b57cec5SDimitry Andric /// \param __a
2872*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
2873*0b57cec5SDimitry Andric /// \param __count
2874*0b57cec5SDimitry Andric ///    An integer value specifying the number of bits to left-shift each value
2875*0b57cec5SDimitry Andric ///    in operand \a __a.
2876*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values.
2877*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2878*0b57cec5SDimitry Andric _mm_slli_epi32(__m128i __a, int __count)
2879*0b57cec5SDimitry Andric {
2880*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
2881*0b57cec5SDimitry Andric }
2882*0b57cec5SDimitry Andric 
2883*0b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand
2884*0b57cec5SDimitry Andric ///    by the specified number of bits. Low-order bits are cleared.
2885*0b57cec5SDimitry Andric ///
2886*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2887*0b57cec5SDimitry Andric ///
2888*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.
2889*0b57cec5SDimitry Andric ///
2890*0b57cec5SDimitry Andric /// \param __a
2891*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
2892*0b57cec5SDimitry Andric /// \param __count
2893*0b57cec5SDimitry Andric ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2894*0b57cec5SDimitry Andric ///    to left-shift each value in operand \a __a.
2895*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values.
2896*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2897*0b57cec5SDimitry Andric _mm_sll_epi32(__m128i __a, __m128i __count)
2898*0b57cec5SDimitry Andric {
2899*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
2900*0b57cec5SDimitry Andric }
2901*0b57cec5SDimitry Andric 
2902*0b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand
2903*0b57cec5SDimitry Andric ///    by the specified number of bits. Low-order bits are cleared.
2904*0b57cec5SDimitry Andric ///
2905*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2906*0b57cec5SDimitry Andric ///
2907*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.
2908*0b57cec5SDimitry Andric ///
2909*0b57cec5SDimitry Andric /// \param __a
2910*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
2911*0b57cec5SDimitry Andric /// \param __count
2912*0b57cec5SDimitry Andric ///    An integer value specifying the number of bits to left-shift each value
2913*0b57cec5SDimitry Andric ///    in operand \a __a.
2914*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values.
2915*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2916*0b57cec5SDimitry Andric _mm_slli_epi64(__m128i __a, int __count)
2917*0b57cec5SDimitry Andric {
2918*0b57cec5SDimitry Andric   return __builtin_ia32_psllqi128((__v2di)__a, __count);
2919*0b57cec5SDimitry Andric }
2920*0b57cec5SDimitry Andric 
2921*0b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand
2922*0b57cec5SDimitry Andric ///    by the specified number of bits. Low-order bits are cleared.
2923*0b57cec5SDimitry Andric ///
2924*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2925*0b57cec5SDimitry Andric ///
2926*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.
2927*0b57cec5SDimitry Andric ///
2928*0b57cec5SDimitry Andric /// \param __a
2929*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
2930*0b57cec5SDimitry Andric /// \param __count
2931*0b57cec5SDimitry Andric ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2932*0b57cec5SDimitry Andric ///    to left-shift each value in operand \a __a.
2933*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values.
2934*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2935*0b57cec5SDimitry Andric _mm_sll_epi64(__m128i __a, __m128i __count)
2936*0b57cec5SDimitry Andric {
2937*0b57cec5SDimitry Andric   return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);
2938*0b57cec5SDimitry Andric }
2939*0b57cec5SDimitry Andric 
2940*0b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand
2941*0b57cec5SDimitry Andric ///    by the specified number of bits. High-order bits are filled with the sign
2942*0b57cec5SDimitry Andric ///    bit of the initial value.
2943*0b57cec5SDimitry Andric ///
2944*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2945*0b57cec5SDimitry Andric ///
2946*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.
2947*0b57cec5SDimitry Andric ///
2948*0b57cec5SDimitry Andric /// \param __a
2949*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
2950*0b57cec5SDimitry Andric /// \param __count
2951*0b57cec5SDimitry Andric ///    An integer value specifying the number of bits to right-shift each value
2952*0b57cec5SDimitry Andric ///    in operand \a __a.
2953*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values.
2954*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2955*0b57cec5SDimitry Andric _mm_srai_epi16(__m128i __a, int __count)
2956*0b57cec5SDimitry Andric {
2957*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
2958*0b57cec5SDimitry Andric }
2959*0b57cec5SDimitry Andric 
2960*0b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand
2961*0b57cec5SDimitry Andric ///    by the specified number of bits. High-order bits are filled with the sign
2962*0b57cec5SDimitry Andric ///    bit of the initial value.
2963*0b57cec5SDimitry Andric ///
2964*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2965*0b57cec5SDimitry Andric ///
2966*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.
2967*0b57cec5SDimitry Andric ///
2968*0b57cec5SDimitry Andric /// \param __a
2969*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
2970*0b57cec5SDimitry Andric /// \param __count
2971*0b57cec5SDimitry Andric ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2972*0b57cec5SDimitry Andric ///    to right-shift each value in operand \a __a.
2973*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values.
2974*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2975*0b57cec5SDimitry Andric _mm_sra_epi16(__m128i __a, __m128i __count)
2976*0b57cec5SDimitry Andric {
2977*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
2978*0b57cec5SDimitry Andric }
2979*0b57cec5SDimitry Andric 
2980*0b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand
2981*0b57cec5SDimitry Andric ///    by the specified number of bits. High-order bits are filled with the sign
2982*0b57cec5SDimitry Andric ///    bit of the initial value.
2983*0b57cec5SDimitry Andric ///
2984*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2985*0b57cec5SDimitry Andric ///
2986*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.
2987*0b57cec5SDimitry Andric ///
2988*0b57cec5SDimitry Andric /// \param __a
2989*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
2990*0b57cec5SDimitry Andric /// \param __count
2991*0b57cec5SDimitry Andric ///    An integer value specifying the number of bits to right-shift each value
2992*0b57cec5SDimitry Andric ///    in operand \a __a.
2993*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values.
2994*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2995*0b57cec5SDimitry Andric _mm_srai_epi32(__m128i __a, int __count)
2996*0b57cec5SDimitry Andric {
2997*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
2998*0b57cec5SDimitry Andric }
2999*0b57cec5SDimitry Andric 
3000*0b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand
3001*0b57cec5SDimitry Andric ///    by the specified number of bits. High-order bits are filled with the sign
3002*0b57cec5SDimitry Andric ///    bit of the initial value.
3003*0b57cec5SDimitry Andric ///
3004*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3005*0b57cec5SDimitry Andric ///
3006*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.
3007*0b57cec5SDimitry Andric ///
3008*0b57cec5SDimitry Andric /// \param __a
3009*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
3010*0b57cec5SDimitry Andric /// \param __count
3011*0b57cec5SDimitry Andric ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
3012*0b57cec5SDimitry Andric ///    to right-shift each value in operand \a __a.
3013*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values.
3014*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3015*0b57cec5SDimitry Andric _mm_sra_epi32(__m128i __a, __m128i __count)
3016*0b57cec5SDimitry Andric {
3017*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
3018*0b57cec5SDimitry Andric }
3019*0b57cec5SDimitry Andric 
3020*0b57cec5SDimitry Andric /// Right-shifts the 128-bit integer vector operand by the specified
3021*0b57cec5SDimitry Andric ///    number of bytes. High-order bits are cleared.
3022*0b57cec5SDimitry Andric ///
3023*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3024*0b57cec5SDimitry Andric ///
3025*0b57cec5SDimitry Andric /// \code
3026*0b57cec5SDimitry Andric /// __m128i _mm_srli_si128(__m128i a, const int imm);
3027*0b57cec5SDimitry Andric /// \endcode
3028*0b57cec5SDimitry Andric ///
3029*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction.
3030*0b57cec5SDimitry Andric ///
3031*0b57cec5SDimitry Andric /// \param a
3032*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
3033*0b57cec5SDimitry Andric /// \param imm
3034*0b57cec5SDimitry Andric ///    An immediate value specifying the number of bytes to right-shift operand
3035*0b57cec5SDimitry Andric ///    \a a.
3036*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted value.
3037*0b57cec5SDimitry Andric #define _mm_srli_si128(a, imm) \
3038*0b57cec5SDimitry Andric   (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
3039*0b57cec5SDimitry Andric 
3040*0b57cec5SDimitry Andric #define _mm_bsrli_si128(a, imm) \
3041*0b57cec5SDimitry Andric   (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
3042*0b57cec5SDimitry Andric 
3043*0b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector
3044*0b57cec5SDimitry Andric ///    operand by the specified number of bits. High-order bits are cleared.
3045*0b57cec5SDimitry Andric ///
3046*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3047*0b57cec5SDimitry Andric ///
3048*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.
3049*0b57cec5SDimitry Andric ///
3050*0b57cec5SDimitry Andric /// \param __a
3051*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
3052*0b57cec5SDimitry Andric /// \param __count
3053*0b57cec5SDimitry Andric ///    An integer value specifying the number of bits to right-shift each value
3054*0b57cec5SDimitry Andric ///    in operand \a __a.
3055*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values.
3056*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3057*0b57cec5SDimitry Andric _mm_srli_epi16(__m128i __a, int __count)
3058*0b57cec5SDimitry Andric {
3059*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
3060*0b57cec5SDimitry Andric }
3061*0b57cec5SDimitry Andric 
3062*0b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector
3063*0b57cec5SDimitry Andric ///    operand by the specified number of bits. High-order bits are cleared.
3064*0b57cec5SDimitry Andric ///
3065*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3066*0b57cec5SDimitry Andric ///
3067*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.
3068*0b57cec5SDimitry Andric ///
3069*0b57cec5SDimitry Andric /// \param __a
3070*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
3071*0b57cec5SDimitry Andric /// \param __count
3072*0b57cec5SDimitry Andric ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
3073*0b57cec5SDimitry Andric ///    to right-shift each value in operand \a __a.
3074*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values.
3075*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3076*0b57cec5SDimitry Andric _mm_srl_epi16(__m128i __a, __m128i __count)
3077*0b57cec5SDimitry Andric {
3078*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
3079*0b57cec5SDimitry Andric }
3080*0b57cec5SDimitry Andric 
3081*0b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector
3082*0b57cec5SDimitry Andric ///    operand by the specified number of bits. High-order bits are cleared.
3083*0b57cec5SDimitry Andric ///
3084*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3085*0b57cec5SDimitry Andric ///
3086*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.
3087*0b57cec5SDimitry Andric ///
3088*0b57cec5SDimitry Andric /// \param __a
3089*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
3090*0b57cec5SDimitry Andric /// \param __count
3091*0b57cec5SDimitry Andric ///    An integer value specifying the number of bits to right-shift each value
3092*0b57cec5SDimitry Andric ///    in operand \a __a.
3093*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values.
3094*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3095*0b57cec5SDimitry Andric _mm_srli_epi32(__m128i __a, int __count)
3096*0b57cec5SDimitry Andric {
3097*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
3098*0b57cec5SDimitry Andric }
3099*0b57cec5SDimitry Andric 
3100*0b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector
3101*0b57cec5SDimitry Andric ///    operand by the specified number of bits. High-order bits are cleared.
3102*0b57cec5SDimitry Andric ///
3103*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3104*0b57cec5SDimitry Andric ///
3105*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.
3106*0b57cec5SDimitry Andric ///
3107*0b57cec5SDimitry Andric /// \param __a
3108*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
3109*0b57cec5SDimitry Andric /// \param __count
3110*0b57cec5SDimitry Andric ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
3111*0b57cec5SDimitry Andric ///    to right-shift each value in operand \a __a.
3112*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values.
3113*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3114*0b57cec5SDimitry Andric _mm_srl_epi32(__m128i __a, __m128i __count)
3115*0b57cec5SDimitry Andric {
3116*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
3117*0b57cec5SDimitry Andric }
3118*0b57cec5SDimitry Andric 
3119*0b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector
3120*0b57cec5SDimitry Andric ///    operand by the specified number of bits. High-order bits are cleared.
3121*0b57cec5SDimitry Andric ///
3122*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3123*0b57cec5SDimitry Andric ///
3124*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.
3125*0b57cec5SDimitry Andric ///
3126*0b57cec5SDimitry Andric /// \param __a
3127*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
3128*0b57cec5SDimitry Andric /// \param __count
3129*0b57cec5SDimitry Andric ///    An integer value specifying the number of bits to right-shift each value
3130*0b57cec5SDimitry Andric ///    in operand \a __a.
3131*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values.
3132*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3133*0b57cec5SDimitry Andric _mm_srli_epi64(__m128i __a, int __count)
3134*0b57cec5SDimitry Andric {
3135*0b57cec5SDimitry Andric   return __builtin_ia32_psrlqi128((__v2di)__a, __count);
3136*0b57cec5SDimitry Andric }
3137*0b57cec5SDimitry Andric 
3138*0b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector
3139*0b57cec5SDimitry Andric ///    operand by the specified number of bits. High-order bits are cleared.
3140*0b57cec5SDimitry Andric ///
3141*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3142*0b57cec5SDimitry Andric ///
3143*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.
3144*0b57cec5SDimitry Andric ///
3145*0b57cec5SDimitry Andric /// \param __a
3146*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the source operand.
3147*0b57cec5SDimitry Andric /// \param __count
3148*0b57cec5SDimitry Andric ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
3149*0b57cec5SDimitry Andric ///    to right-shift each value in operand \a __a.
3150*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values.
3151*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3152*0b57cec5SDimitry Andric _mm_srl_epi64(__m128i __a, __m128i __count)
3153*0b57cec5SDimitry Andric {
3154*0b57cec5SDimitry Andric   return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);
3155*0b57cec5SDimitry Andric }
3156*0b57cec5SDimitry Andric 
3157*0b57cec5SDimitry Andric /// Compares each of the corresponding 8-bit values of the 128-bit
3158*0b57cec5SDimitry Andric ///    integer vectors for equality. Each comparison yields 0x0 for false, 0xFF
3159*0b57cec5SDimitry Andric ///    for true.
3160*0b57cec5SDimitry Andric ///
3161*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3162*0b57cec5SDimitry Andric ///
3163*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction.
3164*0b57cec5SDimitry Andric ///
3165*0b57cec5SDimitry Andric /// \param __a
3166*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3167*0b57cec5SDimitry Andric /// \param __b
3168*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3169*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results.
3170*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3171*0b57cec5SDimitry Andric _mm_cmpeq_epi8(__m128i __a, __m128i __b)
3172*0b57cec5SDimitry Andric {
3173*0b57cec5SDimitry Andric   return (__m128i)((__v16qi)__a == (__v16qi)__b);
3174*0b57cec5SDimitry Andric }
3175*0b57cec5SDimitry Andric 
3176*0b57cec5SDimitry Andric /// Compares each of the corresponding 16-bit values of the 128-bit
3177*0b57cec5SDimitry Andric ///    integer vectors for equality. Each comparison yields 0x0 for false,
3178*0b57cec5SDimitry Andric ///    0xFFFF for true.
3179*0b57cec5SDimitry Andric ///
3180*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3181*0b57cec5SDimitry Andric ///
3182*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction.
3183*0b57cec5SDimitry Andric ///
3184*0b57cec5SDimitry Andric /// \param __a
3185*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3186*0b57cec5SDimitry Andric /// \param __b
3187*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3188*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results.
3189*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3190*0b57cec5SDimitry Andric _mm_cmpeq_epi16(__m128i __a, __m128i __b)
3191*0b57cec5SDimitry Andric {
3192*0b57cec5SDimitry Andric   return (__m128i)((__v8hi)__a == (__v8hi)__b);
3193*0b57cec5SDimitry Andric }
3194*0b57cec5SDimitry Andric 
3195*0b57cec5SDimitry Andric /// Compares each of the corresponding 32-bit values of the 128-bit
3196*0b57cec5SDimitry Andric ///    integer vectors for equality. Each comparison yields 0x0 for false,
3197*0b57cec5SDimitry Andric ///    0xFFFFFFFF for true.
3198*0b57cec5SDimitry Andric ///
3199*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3200*0b57cec5SDimitry Andric ///
3201*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction.
3202*0b57cec5SDimitry Andric ///
3203*0b57cec5SDimitry Andric /// \param __a
3204*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3205*0b57cec5SDimitry Andric /// \param __b
3206*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3207*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results.
3208*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3209*0b57cec5SDimitry Andric _mm_cmpeq_epi32(__m128i __a, __m128i __b)
3210*0b57cec5SDimitry Andric {
3211*0b57cec5SDimitry Andric   return (__m128i)((__v4si)__a == (__v4si)__b);
3212*0b57cec5SDimitry Andric }
3213*0b57cec5SDimitry Andric 
3214*0b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit
3215*0b57cec5SDimitry Andric ///    integer vectors to determine if the values in the first operand are
3216*0b57cec5SDimitry Andric ///    greater than those in the second operand. Each comparison yields 0x0 for
3217*0b57cec5SDimitry Andric ///    false, 0xFF for true.
3218*0b57cec5SDimitry Andric ///
3219*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3220*0b57cec5SDimitry Andric ///
3221*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.
3222*0b57cec5SDimitry Andric ///
3223*0b57cec5SDimitry Andric /// \param __a
3224*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3225*0b57cec5SDimitry Andric /// \param __b
3226*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3227*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results.
3228*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3229*0b57cec5SDimitry Andric _mm_cmpgt_epi8(__m128i __a, __m128i __b)
3230*0b57cec5SDimitry Andric {
3231*0b57cec5SDimitry Andric   /* This function always performs a signed comparison, but __v16qi is a char
3232*0b57cec5SDimitry Andric      which may be signed or unsigned, so use __v16qs. */
3233*0b57cec5SDimitry Andric   return (__m128i)((__v16qs)__a > (__v16qs)__b);
3234*0b57cec5SDimitry Andric }
3235*0b57cec5SDimitry Andric 
3236*0b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the
3237*0b57cec5SDimitry Andric ///    128-bit integer vectors to determine if the values in the first operand
3238*0b57cec5SDimitry Andric ///    are greater than those in the second operand.
3239*0b57cec5SDimitry Andric ///
3240*0b57cec5SDimitry Andric ///    Each comparison yields 0x0 for false, 0xFFFF for true.
3241*0b57cec5SDimitry Andric ///
3242*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3243*0b57cec5SDimitry Andric ///
3244*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.
3245*0b57cec5SDimitry Andric ///
3246*0b57cec5SDimitry Andric /// \param __a
3247*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3248*0b57cec5SDimitry Andric /// \param __b
3249*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3250*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results.
3251*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3252*0b57cec5SDimitry Andric _mm_cmpgt_epi16(__m128i __a, __m128i __b)
3253*0b57cec5SDimitry Andric {
3254*0b57cec5SDimitry Andric   return (__m128i)((__v8hi)__a > (__v8hi)__b);
3255*0b57cec5SDimitry Andric }
3256*0b57cec5SDimitry Andric 
3257*0b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the
3258*0b57cec5SDimitry Andric ///    128-bit integer vectors to determine if the values in the first operand
3259*0b57cec5SDimitry Andric ///    are greater than those in the second operand.
3260*0b57cec5SDimitry Andric ///
3261*0b57cec5SDimitry Andric ///    Each comparison yields 0x0 for false, 0xFFFFFFFF for true.
3262*0b57cec5SDimitry Andric ///
3263*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3264*0b57cec5SDimitry Andric ///
3265*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.
3266*0b57cec5SDimitry Andric ///
3267*0b57cec5SDimitry Andric /// \param __a
3268*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3269*0b57cec5SDimitry Andric /// \param __b
3270*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3271*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results.
3272*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3273*0b57cec5SDimitry Andric _mm_cmpgt_epi32(__m128i __a, __m128i __b)
3274*0b57cec5SDimitry Andric {
3275*0b57cec5SDimitry Andric   return (__m128i)((__v4si)__a > (__v4si)__b);
3276*0b57cec5SDimitry Andric }
3277*0b57cec5SDimitry Andric 
3278*0b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit
3279*0b57cec5SDimitry Andric ///    integer vectors to determine if the values in the first operand are less
3280*0b57cec5SDimitry Andric ///    than those in the second operand.
3281*0b57cec5SDimitry Andric ///
3282*0b57cec5SDimitry Andric ///    Each comparison yields 0x0 for false, 0xFF for true.
3283*0b57cec5SDimitry Andric ///
3284*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3285*0b57cec5SDimitry Andric ///
3286*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.
3287*0b57cec5SDimitry Andric ///
3288*0b57cec5SDimitry Andric /// \param __a
3289*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3290*0b57cec5SDimitry Andric /// \param __b
3291*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3292*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results.
3293*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3294*0b57cec5SDimitry Andric _mm_cmplt_epi8(__m128i __a, __m128i __b)
3295*0b57cec5SDimitry Andric {
3296*0b57cec5SDimitry Andric   return _mm_cmpgt_epi8(__b, __a);
3297*0b57cec5SDimitry Andric }
3298*0b57cec5SDimitry Andric 
3299*0b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the
3300*0b57cec5SDimitry Andric ///    128-bit integer vectors to determine if the values in the first operand
3301*0b57cec5SDimitry Andric ///    are less than those in the second operand.
3302*0b57cec5SDimitry Andric ///
3303*0b57cec5SDimitry Andric ///    Each comparison yields 0x0 for false, 0xFFFF for true.
3304*0b57cec5SDimitry Andric ///
3305*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3306*0b57cec5SDimitry Andric ///
3307*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.
3308*0b57cec5SDimitry Andric ///
3309*0b57cec5SDimitry Andric /// \param __a
3310*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3311*0b57cec5SDimitry Andric /// \param __b
3312*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3313*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results.
3314*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3315*0b57cec5SDimitry Andric _mm_cmplt_epi16(__m128i __a, __m128i __b)
3316*0b57cec5SDimitry Andric {
3317*0b57cec5SDimitry Andric   return _mm_cmpgt_epi16(__b, __a);
3318*0b57cec5SDimitry Andric }
3319*0b57cec5SDimitry Andric 
3320*0b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the
3321*0b57cec5SDimitry Andric ///    128-bit integer vectors to determine if the values in the first operand
3322*0b57cec5SDimitry Andric ///    are less than those in the second operand.
3323*0b57cec5SDimitry Andric ///
3324*0b57cec5SDimitry Andric ///    Each comparison yields 0x0 for false, 0xFFFFFFFF for true.
3325*0b57cec5SDimitry Andric ///
3326*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3327*0b57cec5SDimitry Andric ///
3328*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.
3329*0b57cec5SDimitry Andric ///
3330*0b57cec5SDimitry Andric /// \param __a
3331*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3332*0b57cec5SDimitry Andric /// \param __b
3333*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3334*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results.
3335*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3336*0b57cec5SDimitry Andric _mm_cmplt_epi32(__m128i __a, __m128i __b)
3337*0b57cec5SDimitry Andric {
3338*0b57cec5SDimitry Andric   return _mm_cmpgt_epi32(__b, __a);
3339*0b57cec5SDimitry Andric }
3340*0b57cec5SDimitry Andric 
3341*0b57cec5SDimitry Andric #ifdef __x86_64__
3342*0b57cec5SDimitry Andric /// Converts a 64-bit signed integer value from the second operand into a
3343*0b57cec5SDimitry Andric ///    double-precision value and returns it in the lower element of a [2 x
3344*0b57cec5SDimitry Andric ///    double] vector; the upper element of the returned vector is copied from
3345*0b57cec5SDimitry Andric ///    the upper element of the first operand.
3346*0b57cec5SDimitry Andric ///
3347*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3348*0b57cec5SDimitry Andric ///
3349*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.
3350*0b57cec5SDimitry Andric ///
3351*0b57cec5SDimitry Andric /// \param __a
3352*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The upper 64 bits of this operand are
3353*0b57cec5SDimitry Andric ///    copied to the upper 64 bits of the destination.
3354*0b57cec5SDimitry Andric /// \param __b
3355*0b57cec5SDimitry Andric ///    A 64-bit signed integer operand containing the value to be converted.
3356*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
3357*0b57cec5SDimitry Andric ///    converted value of the second operand. The upper 64 bits are copied from
3358*0b57cec5SDimitry Andric ///    the upper 64 bits of the first operand.
3359*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
3360*0b57cec5SDimitry Andric _mm_cvtsi64_sd(__m128d __a, long long __b)
3361*0b57cec5SDimitry Andric {
3362*0b57cec5SDimitry Andric   __a[0] = __b;
3363*0b57cec5SDimitry Andric   return __a;
3364*0b57cec5SDimitry Andric }
3365*0b57cec5SDimitry Andric 
3366*0b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a
3367*0b57cec5SDimitry Andric ///    64-bit signed integer value, according to the current rounding mode.
3368*0b57cec5SDimitry Andric ///
3369*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3370*0b57cec5SDimitry Andric ///
3371*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.
3372*0b57cec5SDimitry Andric ///
3373*0b57cec5SDimitry Andric /// \param __a
3374*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
3375*0b57cec5SDimitry Andric ///    conversion.
3376*0b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value.
3377*0b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS
3378*0b57cec5SDimitry Andric _mm_cvtsd_si64(__m128d __a)
3379*0b57cec5SDimitry Andric {
3380*0b57cec5SDimitry Andric   return __builtin_ia32_cvtsd2si64((__v2df)__a);
3381*0b57cec5SDimitry Andric }
3382*0b57cec5SDimitry Andric 
3383*0b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a
3384*0b57cec5SDimitry Andric ///    64-bit signed integer value, truncating the result when it is inexact.
3385*0b57cec5SDimitry Andric ///
3386*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3387*0b57cec5SDimitry Andric ///
3388*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
3389*0b57cec5SDimitry Andric ///   instruction.
3390*0b57cec5SDimitry Andric ///
3391*0b57cec5SDimitry Andric /// \param __a
3392*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
3393*0b57cec5SDimitry Andric ///    conversion.
3394*0b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value.
3395*0b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS
3396*0b57cec5SDimitry Andric _mm_cvttsd_si64(__m128d __a)
3397*0b57cec5SDimitry Andric {
3398*0b57cec5SDimitry Andric   return __builtin_ia32_cvttsd2si64((__v2df)__a);
3399*0b57cec5SDimitry Andric }
3400*0b57cec5SDimitry Andric #endif
3401*0b57cec5SDimitry Andric 
3402*0b57cec5SDimitry Andric /// Converts a vector of [4 x i32] into a vector of [4 x float].
3403*0b57cec5SDimitry Andric ///
3404*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3405*0b57cec5SDimitry Andric ///
3406*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction.
3407*0b57cec5SDimitry Andric ///
3408*0b57cec5SDimitry Andric /// \param __a
3409*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
3410*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the converted values.
3411*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS
3412*0b57cec5SDimitry Andric _mm_cvtepi32_ps(__m128i __a)
3413*0b57cec5SDimitry Andric {
3414*0b57cec5SDimitry Andric   return (__m128)__builtin_convertvector((__v4si)__a, __v4sf);
3415*0b57cec5SDimitry Andric }
3416*0b57cec5SDimitry Andric 
3417*0b57cec5SDimitry Andric /// Converts a vector of [4 x float] into a vector of [4 x i32].
3418*0b57cec5SDimitry Andric ///
3419*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3420*0b57cec5SDimitry Andric ///
3421*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction.
3422*0b57cec5SDimitry Andric ///
3423*0b57cec5SDimitry Andric /// \param __a
3424*0b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float].
3425*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector of [4 x i32] containing the converted
3426*0b57cec5SDimitry Andric ///    values.
3427*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3428*0b57cec5SDimitry Andric _mm_cvtps_epi32(__m128 __a)
3429*0b57cec5SDimitry Andric {
3430*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);
3431*0b57cec5SDimitry Andric }
3432*0b57cec5SDimitry Andric 
3433*0b57cec5SDimitry Andric /// Converts a vector of [4 x float] into a vector of [4 x i32],
3434*0b57cec5SDimitry Andric ///    truncating the result when it is inexact.
3435*0b57cec5SDimitry Andric ///
3436*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3437*0b57cec5SDimitry Andric ///
3438*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c>
3439*0b57cec5SDimitry Andric ///   instruction.
3440*0b57cec5SDimitry Andric ///
3441*0b57cec5SDimitry Andric /// \param __a
3442*0b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float].
3443*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the converted values.
3444*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3445*0b57cec5SDimitry Andric _mm_cvttps_epi32(__m128 __a)
3446*0b57cec5SDimitry Andric {
3447*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
3448*0b57cec5SDimitry Andric }
3449*0b57cec5SDimitry Andric 
3450*0b57cec5SDimitry Andric /// Returns a vector of [4 x i32] where the lowest element is the input
3451*0b57cec5SDimitry Andric ///    operand and the remaining elements are zero.
3452*0b57cec5SDimitry Andric ///
3453*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3454*0b57cec5SDimitry Andric ///
3455*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
3456*0b57cec5SDimitry Andric ///
3457*0b57cec5SDimitry Andric /// \param __a
3458*0b57cec5SDimitry Andric ///    A 32-bit signed integer operand.
3459*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32].
3460*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3461*0b57cec5SDimitry Andric _mm_cvtsi32_si128(int __a)
3462*0b57cec5SDimitry Andric {
3463*0b57cec5SDimitry Andric   return __extension__ (__m128i)(__v4si){ __a, 0, 0, 0 };
3464*0b57cec5SDimitry Andric }
3465*0b57cec5SDimitry Andric 
3466*0b57cec5SDimitry Andric #ifdef __x86_64__
3467*0b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is the input
3468*0b57cec5SDimitry Andric ///    operand and the upper element is zero.
3469*0b57cec5SDimitry Andric ///
3470*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3471*0b57cec5SDimitry Andric ///
3472*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
3473*0b57cec5SDimitry Andric ///
3474*0b57cec5SDimitry Andric /// \param __a
3475*0b57cec5SDimitry Andric ///    A 64-bit signed integer operand containing the value to be converted.
3476*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the converted value.
3477*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3478*0b57cec5SDimitry Andric _mm_cvtsi64_si128(long long __a)
3479*0b57cec5SDimitry Andric {
3480*0b57cec5SDimitry Andric   return __extension__ (__m128i)(__v2di){ __a, 0 };
3481*0b57cec5SDimitry Andric }
3482*0b57cec5SDimitry Andric #endif
3483*0b57cec5SDimitry Andric 
3484*0b57cec5SDimitry Andric /// Moves the least significant 32 bits of a vector of [4 x i32] to a
3485*0b57cec5SDimitry Andric ///    32-bit signed integer value.
3486*0b57cec5SDimitry Andric ///
3487*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3488*0b57cec5SDimitry Andric ///
3489*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
3490*0b57cec5SDimitry Andric ///
3491*0b57cec5SDimitry Andric /// \param __a
3492*0b57cec5SDimitry Andric ///    A vector of [4 x i32]. The least significant 32 bits are moved to the
3493*0b57cec5SDimitry Andric ///    destination.
3494*0b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the moved value.
3495*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
3496*0b57cec5SDimitry Andric _mm_cvtsi128_si32(__m128i __a)
3497*0b57cec5SDimitry Andric {
3498*0b57cec5SDimitry Andric   __v4si __b = (__v4si)__a;
3499*0b57cec5SDimitry Andric   return __b[0];
3500*0b57cec5SDimitry Andric }
3501*0b57cec5SDimitry Andric 
3502*0b57cec5SDimitry Andric #ifdef __x86_64__
3503*0b57cec5SDimitry Andric /// Moves the least significant 64 bits of a vector of [2 x i64] to a
3504*0b57cec5SDimitry Andric ///    64-bit signed integer value.
3505*0b57cec5SDimitry Andric ///
3506*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3507*0b57cec5SDimitry Andric ///
3508*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
3509*0b57cec5SDimitry Andric ///
3510*0b57cec5SDimitry Andric /// \param __a
3511*0b57cec5SDimitry Andric ///    A vector of [2 x i64]. The least significant 64 bits are moved to the
3512*0b57cec5SDimitry Andric ///    destination.
3513*0b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the moved value.
3514*0b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS
3515*0b57cec5SDimitry Andric _mm_cvtsi128_si64(__m128i __a)
3516*0b57cec5SDimitry Andric {
3517*0b57cec5SDimitry Andric   return __a[0];
3518*0b57cec5SDimitry Andric }
3519*0b57cec5SDimitry Andric #endif
3520*0b57cec5SDimitry Andric 
3521*0b57cec5SDimitry Andric /// Moves packed integer values from an aligned 128-bit memory location
3522*0b57cec5SDimitry Andric ///    to elements in a 128-bit integer vector.
3523*0b57cec5SDimitry Andric ///
3524*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3525*0b57cec5SDimitry Andric ///
3526*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction.
3527*0b57cec5SDimitry Andric ///
3528*0b57cec5SDimitry Andric /// \param __p
3529*0b57cec5SDimitry Andric ///    An aligned pointer to a memory location containing integer values.
3530*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values.
3531*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3532*0b57cec5SDimitry Andric _mm_load_si128(__m128i const *__p)
3533*0b57cec5SDimitry Andric {
3534*0b57cec5SDimitry Andric   return *__p;
3535*0b57cec5SDimitry Andric }
3536*0b57cec5SDimitry Andric 
3537*0b57cec5SDimitry Andric /// Moves packed integer values from an unaligned 128-bit memory location
3538*0b57cec5SDimitry Andric ///    to elements in a 128-bit integer vector.
3539*0b57cec5SDimitry Andric ///
3540*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3541*0b57cec5SDimitry Andric ///
3542*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction.
3543*0b57cec5SDimitry Andric ///
3544*0b57cec5SDimitry Andric /// \param __p
3545*0b57cec5SDimitry Andric ///    A pointer to a memory location containing integer values.
3546*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values.
3547*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3548*0b57cec5SDimitry Andric _mm_loadu_si128(__m128i_u const *__p)
3549*0b57cec5SDimitry Andric {
3550*0b57cec5SDimitry Andric   struct __loadu_si128 {
3551*0b57cec5SDimitry Andric     __m128i_u __v;
3552*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
3553*0b57cec5SDimitry Andric   return ((struct __loadu_si128*)__p)->__v;
3554*0b57cec5SDimitry Andric }
3555*0b57cec5SDimitry Andric 
3556*0b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is taken from
3557*0b57cec5SDimitry Andric ///    the lower element of the operand, and the upper element is zero.
3558*0b57cec5SDimitry Andric ///
3559*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3560*0b57cec5SDimitry Andric ///
3561*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
3562*0b57cec5SDimitry Andric ///
3563*0b57cec5SDimitry Andric /// \param __p
3564*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of
3565*0b57cec5SDimitry Andric ///    the destination.
3566*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the
3567*0b57cec5SDimitry Andric ///    moved value. The higher order bits are cleared.
3568*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3569*0b57cec5SDimitry Andric _mm_loadl_epi64(__m128i_u const *__p)
3570*0b57cec5SDimitry Andric {
3571*0b57cec5SDimitry Andric   struct __mm_loadl_epi64_struct {
3572*0b57cec5SDimitry Andric     long long __u;
3573*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
3574*0b57cec5SDimitry Andric   return __extension__ (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
3575*0b57cec5SDimitry Andric }
3576*0b57cec5SDimitry Andric 
3577*0b57cec5SDimitry Andric /// Generates a 128-bit vector of [4 x i32] with unspecified content.
3578*0b57cec5SDimitry Andric ///    This could be used as an argument to another intrinsic function where the
3579*0b57cec5SDimitry Andric ///    argument is required but the value is not actually used.
3580*0b57cec5SDimitry Andric ///
3581*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3582*0b57cec5SDimitry Andric ///
3583*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
3584*0b57cec5SDimitry Andric ///
3585*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] with unspecified content.
3586*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3587*0b57cec5SDimitry Andric _mm_undefined_si128(void)
3588*0b57cec5SDimitry Andric {
3589*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_undef128();
3590*0b57cec5SDimitry Andric }
3591*0b57cec5SDimitry Andric 
3592*0b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
3593*0b57cec5SDimitry Andric ///    the specified 64-bit integer values.
3594*0b57cec5SDimitry Andric ///
3595*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3596*0b57cec5SDimitry Andric ///
3597*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
3598*0b57cec5SDimitry Andric ///    instruction.
3599*0b57cec5SDimitry Andric ///
3600*0b57cec5SDimitry Andric /// \param __q1
3601*0b57cec5SDimitry Andric ///    A 64-bit integer value used to initialize the upper 64 bits of the
3602*0b57cec5SDimitry Andric ///    destination vector of [2 x i64].
3603*0b57cec5SDimitry Andric /// \param __q0
3604*0b57cec5SDimitry Andric ///    A 64-bit integer value used to initialize the lower 64 bits of the
3605*0b57cec5SDimitry Andric ///    destination vector of [2 x i64].
3606*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values
3607*0b57cec5SDimitry Andric ///    provided in the operands.
3608*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3609*0b57cec5SDimitry Andric _mm_set_epi64x(long long __q1, long long __q0)
3610*0b57cec5SDimitry Andric {
3611*0b57cec5SDimitry Andric   return __extension__ (__m128i)(__v2di){ __q0, __q1 };
3612*0b57cec5SDimitry Andric }
3613*0b57cec5SDimitry Andric 
3614*0b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
3615*0b57cec5SDimitry Andric ///    the specified 64-bit integer values.
3616*0b57cec5SDimitry Andric ///
3617*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3618*0b57cec5SDimitry Andric ///
3619*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
3620*0b57cec5SDimitry Andric ///    instruction.
3621*0b57cec5SDimitry Andric ///
3622*0b57cec5SDimitry Andric /// \param __q1
3623*0b57cec5SDimitry Andric ///    A 64-bit integer value used to initialize the upper 64 bits of the
3624*0b57cec5SDimitry Andric ///    destination vector of [2 x i64].
3625*0b57cec5SDimitry Andric /// \param __q0
3626*0b57cec5SDimitry Andric ///    A 64-bit integer value used to initialize the lower 64 bits of the
3627*0b57cec5SDimitry Andric ///    destination vector of [2 x i64].
3628*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values
3629*0b57cec5SDimitry Andric ///    provided in the operands.
3630*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3631*0b57cec5SDimitry Andric _mm_set_epi64(__m64 __q1, __m64 __q0)
3632*0b57cec5SDimitry Andric {
3633*0b57cec5SDimitry Andric   return _mm_set_epi64x((long long)__q1, (long long)__q0);
3634*0b57cec5SDimitry Andric }
3635*0b57cec5SDimitry Andric 
3636*0b57cec5SDimitry Andric /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with
3637*0b57cec5SDimitry Andric ///    the specified 32-bit integer values.
3638*0b57cec5SDimitry Andric ///
3639*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3640*0b57cec5SDimitry Andric ///
3641*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
3642*0b57cec5SDimitry Andric ///    instruction.
3643*0b57cec5SDimitry Andric ///
3644*0b57cec5SDimitry Andric /// \param __i3
3645*0b57cec5SDimitry Andric ///    A 32-bit integer value used to initialize bits [127:96] of the
3646*0b57cec5SDimitry Andric ///    destination vector.
3647*0b57cec5SDimitry Andric /// \param __i2
3648*0b57cec5SDimitry Andric ///    A 32-bit integer value used to initialize bits [95:64] of the destination
3649*0b57cec5SDimitry Andric ///    vector.
3650*0b57cec5SDimitry Andric /// \param __i1
3651*0b57cec5SDimitry Andric ///    A 32-bit integer value used to initialize bits [63:32] of the destination
3652*0b57cec5SDimitry Andric ///    vector.
3653*0b57cec5SDimitry Andric /// \param __i0
3654*0b57cec5SDimitry Andric ///    A 32-bit integer value used to initialize bits [31:0] of the destination
3655*0b57cec5SDimitry Andric ///    vector.
3656*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] containing the values
3657*0b57cec5SDimitry Andric ///    provided in the operands.
3658*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3659*0b57cec5SDimitry Andric _mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
3660*0b57cec5SDimitry Andric {
3661*0b57cec5SDimitry Andric   return __extension__ (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
3662*0b57cec5SDimitry Andric }
3663*0b57cec5SDimitry Andric 
3664*0b57cec5SDimitry Andric /// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with
3665*0b57cec5SDimitry Andric ///    the specified 16-bit integer values.
3666*0b57cec5SDimitry Andric ///
3667*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3668*0b57cec5SDimitry Andric ///
3669*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
3670*0b57cec5SDimitry Andric ///    instruction.
3671*0b57cec5SDimitry Andric ///
3672*0b57cec5SDimitry Andric /// \param __w7
3673*0b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [127:112] of the
3674*0b57cec5SDimitry Andric ///    destination vector.
3675*0b57cec5SDimitry Andric /// \param __w6
3676*0b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [111:96] of the
3677*0b57cec5SDimitry Andric ///    destination vector.
3678*0b57cec5SDimitry Andric /// \param __w5
3679*0b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [95:80] of the destination
3680*0b57cec5SDimitry Andric ///    vector.
3681*0b57cec5SDimitry Andric /// \param __w4
3682*0b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [79:64] of the destination
3683*0b57cec5SDimitry Andric ///    vector.
3684*0b57cec5SDimitry Andric /// \param __w3
3685*0b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [63:48] of the destination
3686*0b57cec5SDimitry Andric ///    vector.
3687*0b57cec5SDimitry Andric /// \param __w2
3688*0b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [47:32] of the destination
3689*0b57cec5SDimitry Andric ///    vector.
3690*0b57cec5SDimitry Andric /// \param __w1
3691*0b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [31:16] of the destination
3692*0b57cec5SDimitry Andric ///    vector.
3693*0b57cec5SDimitry Andric /// \param __w0
3694*0b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [15:0] of the destination
3695*0b57cec5SDimitry Andric ///    vector.
3696*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] containing the values
3697*0b57cec5SDimitry Andric ///    provided in the operands.
3698*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3699*0b57cec5SDimitry Andric _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
3700*0b57cec5SDimitry Andric {
3701*0b57cec5SDimitry Andric   return __extension__ (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
3702*0b57cec5SDimitry Andric }
3703*0b57cec5SDimitry Andric 
3704*0b57cec5SDimitry Andric /// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with
3705*0b57cec5SDimitry Andric ///    the specified 8-bit integer values.
3706*0b57cec5SDimitry Andric ///
3707*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3708*0b57cec5SDimitry Andric ///
3709*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
3710*0b57cec5SDimitry Andric ///    instruction.
3711*0b57cec5SDimitry Andric ///
3712*0b57cec5SDimitry Andric /// \param __b15
3713*0b57cec5SDimitry Andric ///    Initializes bits [127:120] of the destination vector.
3714*0b57cec5SDimitry Andric /// \param __b14
3715*0b57cec5SDimitry Andric ///    Initializes bits [119:112] of the destination vector.
3716*0b57cec5SDimitry Andric /// \param __b13
3717*0b57cec5SDimitry Andric ///    Initializes bits [111:104] of the destination vector.
3718*0b57cec5SDimitry Andric /// \param __b12
3719*0b57cec5SDimitry Andric ///    Initializes bits [103:96] of the destination vector.
3720*0b57cec5SDimitry Andric /// \param __b11
3721*0b57cec5SDimitry Andric ///    Initializes bits [95:88] of the destination vector.
3722*0b57cec5SDimitry Andric /// \param __b10
3723*0b57cec5SDimitry Andric ///    Initializes bits [87:80] of the destination vector.
3724*0b57cec5SDimitry Andric /// \param __b9
3725*0b57cec5SDimitry Andric ///    Initializes bits [79:72] of the destination vector.
3726*0b57cec5SDimitry Andric /// \param __b8
3727*0b57cec5SDimitry Andric ///    Initializes bits [71:64] of the destination vector.
3728*0b57cec5SDimitry Andric /// \param __b7
3729*0b57cec5SDimitry Andric ///    Initializes bits [63:56] of the destination vector.
3730*0b57cec5SDimitry Andric /// \param __b6
3731*0b57cec5SDimitry Andric ///    Initializes bits [55:48] of the destination vector.
3732*0b57cec5SDimitry Andric /// \param __b5
3733*0b57cec5SDimitry Andric ///    Initializes bits [47:40] of the destination vector.
3734*0b57cec5SDimitry Andric /// \param __b4
3735*0b57cec5SDimitry Andric ///    Initializes bits [39:32] of the destination vector.
3736*0b57cec5SDimitry Andric /// \param __b3
3737*0b57cec5SDimitry Andric ///    Initializes bits [31:24] of the destination vector.
3738*0b57cec5SDimitry Andric /// \param __b2
3739*0b57cec5SDimitry Andric ///    Initializes bits [23:16] of the destination vector.
3740*0b57cec5SDimitry Andric /// \param __b1
3741*0b57cec5SDimitry Andric ///    Initializes bits [15:8] of the destination vector.
3742*0b57cec5SDimitry Andric /// \param __b0
3743*0b57cec5SDimitry Andric ///    Initializes bits [7:0] of the destination vector.
3744*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] containing the values
3745*0b57cec5SDimitry Andric ///    provided in the operands.
3746*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3747*0b57cec5SDimitry Andric _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
3748*0b57cec5SDimitry Andric {
3749*0b57cec5SDimitry Andric   return __extension__ (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
3750*0b57cec5SDimitry Andric }
3751*0b57cec5SDimitry Andric 
3752*0b57cec5SDimitry Andric /// Initializes both values in a 128-bit integer vector with the
3753*0b57cec5SDimitry Andric ///    specified 64-bit integer value.
3754*0b57cec5SDimitry Andric ///
3755*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3756*0b57cec5SDimitry Andric ///
3757*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
3758*0b57cec5SDimitry Andric ///    instruction.
3759*0b57cec5SDimitry Andric ///
3760*0b57cec5SDimitry Andric /// \param __q
3761*0b57cec5SDimitry Andric ///    Integer value used to initialize the elements of the destination integer
3762*0b57cec5SDimitry Andric ///    vector.
3763*0b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector of [2 x i64] with both
3764*0b57cec5SDimitry Andric ///    elements containing the value provided in the operand.
3765*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3766*0b57cec5SDimitry Andric _mm_set1_epi64x(long long __q)
3767*0b57cec5SDimitry Andric {
3768*0b57cec5SDimitry Andric   return _mm_set_epi64x(__q, __q);
3769*0b57cec5SDimitry Andric }
3770*0b57cec5SDimitry Andric 
3771*0b57cec5SDimitry Andric /// Initializes both values in a 128-bit vector of [2 x i64] with the
3772*0b57cec5SDimitry Andric ///    specified 64-bit value.
3773*0b57cec5SDimitry Andric ///
3774*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3775*0b57cec5SDimitry Andric ///
3776*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
3777*0b57cec5SDimitry Andric ///    instruction.
3778*0b57cec5SDimitry Andric ///
3779*0b57cec5SDimitry Andric /// \param __q
3780*0b57cec5SDimitry Andric ///    A 64-bit value used to initialize the elements of the destination integer
3781*0b57cec5SDimitry Andric ///    vector.
3782*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] with all elements
3783*0b57cec5SDimitry Andric ///    containing the value provided in the operand.
3784*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3785*0b57cec5SDimitry Andric _mm_set1_epi64(__m64 __q)
3786*0b57cec5SDimitry Andric {
3787*0b57cec5SDimitry Andric   return _mm_set_epi64(__q, __q);
3788*0b57cec5SDimitry Andric }
3789*0b57cec5SDimitry Andric 
3790*0b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [4 x i32] with the
3791*0b57cec5SDimitry Andric ///    specified 32-bit value.
3792*0b57cec5SDimitry Andric ///
3793*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3794*0b57cec5SDimitry Andric ///
3795*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
3796*0b57cec5SDimitry Andric ///    instruction.
3797*0b57cec5SDimitry Andric ///
3798*0b57cec5SDimitry Andric /// \param __i
3799*0b57cec5SDimitry Andric ///    A 32-bit value used to initialize the elements of the destination integer
3800*0b57cec5SDimitry Andric ///    vector.
3801*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] with all elements
3802*0b57cec5SDimitry Andric ///    containing the value provided in the operand.
3803*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3804*0b57cec5SDimitry Andric _mm_set1_epi32(int __i)
3805*0b57cec5SDimitry Andric {
3806*0b57cec5SDimitry Andric   return _mm_set_epi32(__i, __i, __i, __i);
3807*0b57cec5SDimitry Andric }
3808*0b57cec5SDimitry Andric 
3809*0b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [8 x i16] with the
3810*0b57cec5SDimitry Andric ///    specified 16-bit value.
3811*0b57cec5SDimitry Andric ///
3812*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3813*0b57cec5SDimitry Andric ///
3814*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
3815*0b57cec5SDimitry Andric ///    instruction.
3816*0b57cec5SDimitry Andric ///
3817*0b57cec5SDimitry Andric /// \param __w
3818*0b57cec5SDimitry Andric ///    A 16-bit value used to initialize the elements of the destination integer
3819*0b57cec5SDimitry Andric ///    vector.
3820*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] with all elements
3821*0b57cec5SDimitry Andric ///    containing the value provided in the operand.
3822*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3823*0b57cec5SDimitry Andric _mm_set1_epi16(short __w)
3824*0b57cec5SDimitry Andric {
3825*0b57cec5SDimitry Andric   return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);
3826*0b57cec5SDimitry Andric }
3827*0b57cec5SDimitry Andric 
3828*0b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [16 x i8] with the
3829*0b57cec5SDimitry Andric ///    specified 8-bit value.
3830*0b57cec5SDimitry Andric ///
3831*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3832*0b57cec5SDimitry Andric ///
3833*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
3834*0b57cec5SDimitry Andric ///    instruction.
3835*0b57cec5SDimitry Andric ///
3836*0b57cec5SDimitry Andric /// \param __b
3837*0b57cec5SDimitry Andric ///    An 8-bit value used to initialize the elements of the destination integer
3838*0b57cec5SDimitry Andric ///    vector.
3839*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] with all elements
3840*0b57cec5SDimitry Andric ///    containing the value provided in the operand.
3841*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3842*0b57cec5SDimitry Andric _mm_set1_epi8(char __b)
3843*0b57cec5SDimitry Andric {
3844*0b57cec5SDimitry Andric   return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b);
3845*0b57cec5SDimitry Andric }
3846*0b57cec5SDimitry Andric 
3847*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order
3848*0b57cec5SDimitry Andric ///     with the specified 64-bit integral values.
3849*0b57cec5SDimitry Andric ///
3850*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3851*0b57cec5SDimitry Andric ///
3852*0b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction.
3853*0b57cec5SDimitry Andric ///
3854*0b57cec5SDimitry Andric /// \param __q0
3855*0b57cec5SDimitry Andric ///    A 64-bit integral value used to initialize the lower 64 bits of the
3856*0b57cec5SDimitry Andric ///    result.
3857*0b57cec5SDimitry Andric /// \param __q1
3858*0b57cec5SDimitry Andric ///    A 64-bit integral value used to initialize the upper 64 bits of the
3859*0b57cec5SDimitry Andric ///    result.
3860*0b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector.
3861*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3862*0b57cec5SDimitry Andric _mm_setr_epi64(__m64 __q0, __m64 __q1)
3863*0b57cec5SDimitry Andric {
3864*0b57cec5SDimitry Andric   return _mm_set_epi64(__q1, __q0);
3865*0b57cec5SDimitry Andric }
3866*0b57cec5SDimitry Andric 
3867*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order
3868*0b57cec5SDimitry Andric ///     with the specified 32-bit integral values.
3869*0b57cec5SDimitry Andric ///
3870*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3871*0b57cec5SDimitry Andric ///
3872*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
3873*0b57cec5SDimitry Andric ///    instruction.
3874*0b57cec5SDimitry Andric ///
3875*0b57cec5SDimitry Andric /// \param __i0
3876*0b57cec5SDimitry Andric ///    A 32-bit integral value used to initialize bits [31:0] of the result.
3877*0b57cec5SDimitry Andric /// \param __i1
3878*0b57cec5SDimitry Andric ///    A 32-bit integral value used to initialize bits [63:32] of the result.
3879*0b57cec5SDimitry Andric /// \param __i2
3880*0b57cec5SDimitry Andric ///    A 32-bit integral value used to initialize bits [95:64] of the result.
3881*0b57cec5SDimitry Andric /// \param __i3
3882*0b57cec5SDimitry Andric ///    A 32-bit integral value used to initialize bits [127:96] of the result.
3883*0b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector.
3884*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3885*0b57cec5SDimitry Andric _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
3886*0b57cec5SDimitry Andric {
3887*0b57cec5SDimitry Andric   return _mm_set_epi32(__i3, __i2, __i1, __i0);
3888*0b57cec5SDimitry Andric }
3889*0b57cec5SDimitry Andric 
3890*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order
3891*0b57cec5SDimitry Andric ///     with the specified 16-bit integral values.
3892*0b57cec5SDimitry Andric ///
3893*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3894*0b57cec5SDimitry Andric ///
3895*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
3896*0b57cec5SDimitry Andric ///    instruction.
3897*0b57cec5SDimitry Andric ///
3898*0b57cec5SDimitry Andric /// \param __w0
3899*0b57cec5SDimitry Andric ///    A 16-bit integral value used to initialize bits [15:0] of the result.
3900*0b57cec5SDimitry Andric /// \param __w1
3901*0b57cec5SDimitry Andric ///    A 16-bit integral value used to initialize bits [31:16] of the result.
3902*0b57cec5SDimitry Andric /// \param __w2
3903*0b57cec5SDimitry Andric ///    A 16-bit integral value used to initialize bits [47:32] of the result.
3904*0b57cec5SDimitry Andric /// \param __w3
3905*0b57cec5SDimitry Andric ///    A 16-bit integral value used to initialize bits [63:48] of the result.
3906*0b57cec5SDimitry Andric /// \param __w4
3907*0b57cec5SDimitry Andric ///    A 16-bit integral value used to initialize bits [79:64] of the result.
3908*0b57cec5SDimitry Andric /// \param __w5
3909*0b57cec5SDimitry Andric ///    A 16-bit integral value used to initialize bits [95:80] of the result.
3910*0b57cec5SDimitry Andric /// \param __w6
3911*0b57cec5SDimitry Andric ///    A 16-bit integral value used to initialize bits [111:96] of the result.
3912*0b57cec5SDimitry Andric /// \param __w7
3913*0b57cec5SDimitry Andric ///    A 16-bit integral value used to initialize bits [127:112] of the result.
3914*0b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector.
3915*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3916*0b57cec5SDimitry Andric _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
3917*0b57cec5SDimitry Andric {
3918*0b57cec5SDimitry Andric   return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);
3919*0b57cec5SDimitry Andric }
3920*0b57cec5SDimitry Andric 
3921*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order
3922*0b57cec5SDimitry Andric ///     with the specified 8-bit integral values.
3923*0b57cec5SDimitry Andric ///
3924*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3925*0b57cec5SDimitry Andric ///
3926*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
3927*0b57cec5SDimitry Andric ///    instruction.
3928*0b57cec5SDimitry Andric ///
3929*0b57cec5SDimitry Andric /// \param __b0
3930*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [7:0] of the result.
3931*0b57cec5SDimitry Andric /// \param __b1
3932*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [15:8] of the result.
3933*0b57cec5SDimitry Andric /// \param __b2
3934*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [23:16] of the result.
3935*0b57cec5SDimitry Andric /// \param __b3
3936*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [31:24] of the result.
3937*0b57cec5SDimitry Andric /// \param __b4
3938*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [39:32] of the result.
3939*0b57cec5SDimitry Andric /// \param __b5
3940*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [47:40] of the result.
3941*0b57cec5SDimitry Andric /// \param __b6
3942*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [55:48] of the result.
3943*0b57cec5SDimitry Andric /// \param __b7
3944*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [63:56] of the result.
3945*0b57cec5SDimitry Andric /// \param __b8
3946*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [71:64] of the result.
3947*0b57cec5SDimitry Andric /// \param __b9
3948*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [79:72] of the result.
3949*0b57cec5SDimitry Andric /// \param __b10
3950*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [87:80] of the result.
3951*0b57cec5SDimitry Andric /// \param __b11
3952*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [95:88] of the result.
3953*0b57cec5SDimitry Andric /// \param __b12
3954*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [103:96] of the result.
3955*0b57cec5SDimitry Andric /// \param __b13
3956*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [111:104] of the result.
3957*0b57cec5SDimitry Andric /// \param __b14
3958*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [119:112] of the result.
3959*0b57cec5SDimitry Andric /// \param __b15
3960*0b57cec5SDimitry Andric ///    An 8-bit integral value used to initialize bits [127:120] of the result.
3961*0b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector.
3962*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3963*0b57cec5SDimitry Andric _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
3964*0b57cec5SDimitry Andric {
3965*0b57cec5SDimitry Andric   return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
3966*0b57cec5SDimitry Andric }
3967*0b57cec5SDimitry Andric 
3968*0b57cec5SDimitry Andric /// Creates a 128-bit integer vector initialized to zero.
3969*0b57cec5SDimitry Andric ///
3970*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3971*0b57cec5SDimitry Andric ///
3972*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
3973*0b57cec5SDimitry Andric ///
3974*0b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector with all elements set to
3975*0b57cec5SDimitry Andric ///    zero.
3976*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3977*0b57cec5SDimitry Andric _mm_setzero_si128(void)
3978*0b57cec5SDimitry Andric {
3979*0b57cec5SDimitry Andric   return __extension__ (__m128i)(__v2di){ 0LL, 0LL };
3980*0b57cec5SDimitry Andric }
3981*0b57cec5SDimitry Andric 
3982*0b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a memory location aligned on a
3983*0b57cec5SDimitry Andric ///    128-bit boundary.
3984*0b57cec5SDimitry Andric ///
3985*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3986*0b57cec5SDimitry Andric ///
3987*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.
3988*0b57cec5SDimitry Andric ///
3989*0b57cec5SDimitry Andric /// \param __p
3990*0b57cec5SDimitry Andric ///    A pointer to an aligned memory location that will receive the integer
3991*0b57cec5SDimitry Andric ///    values.
3992*0b57cec5SDimitry Andric /// \param __b
3993*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the values to be moved.
3994*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
3995*0b57cec5SDimitry Andric _mm_store_si128(__m128i *__p, __m128i __b)
3996*0b57cec5SDimitry Andric {
3997*0b57cec5SDimitry Andric   *__p = __b;
3998*0b57cec5SDimitry Andric }
3999*0b57cec5SDimitry Andric 
4000*0b57cec5SDimitry Andric /// Stores a 128-bit integer vector to an unaligned memory location.
4001*0b57cec5SDimitry Andric ///
4002*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4003*0b57cec5SDimitry Andric ///
4004*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.
4005*0b57cec5SDimitry Andric ///
4006*0b57cec5SDimitry Andric /// \param __p
4007*0b57cec5SDimitry Andric ///    A pointer to a memory location that will receive the integer values.
4008*0b57cec5SDimitry Andric /// \param __b
4009*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the values to be moved.
4010*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
4011*0b57cec5SDimitry Andric _mm_storeu_si128(__m128i_u *__p, __m128i __b)
4012*0b57cec5SDimitry Andric {
4013*0b57cec5SDimitry Andric   struct __storeu_si128 {
4014*0b57cec5SDimitry Andric     __m128i_u __v;
4015*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
4016*0b57cec5SDimitry Andric   ((struct __storeu_si128*)__p)->__v = __b;
4017*0b57cec5SDimitry Andric }
4018*0b57cec5SDimitry Andric 
4019*0b57cec5SDimitry Andric /// Stores a 64-bit integer value from the low element of a 128-bit integer
4020*0b57cec5SDimitry Andric ///    vector.
4021*0b57cec5SDimitry Andric ///
4022*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4023*0b57cec5SDimitry Andric ///
4024*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
4025*0b57cec5SDimitry Andric ///
4026*0b57cec5SDimitry Andric /// \param __p
4027*0b57cec5SDimitry Andric ///    A pointer to a 64-bit memory location. The address of the memory
4028*0b57cec5SDimitry Andric ///    location does not have to be algned.
4029*0b57cec5SDimitry Andric /// \param __b
4030*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the value to be stored.
4031*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
4032*0b57cec5SDimitry Andric _mm_storeu_si64(void *__p, __m128i __b)
4033*0b57cec5SDimitry Andric {
4034*0b57cec5SDimitry Andric   struct __storeu_si64 {
4035*0b57cec5SDimitry Andric     long long __v;
4036*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
4037*0b57cec5SDimitry Andric   ((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0];
4038*0b57cec5SDimitry Andric }
4039*0b57cec5SDimitry Andric 
4040*0b57cec5SDimitry Andric /// Stores a 32-bit integer value from the low element of a 128-bit integer
4041*0b57cec5SDimitry Andric ///    vector.
4042*0b57cec5SDimitry Andric ///
4043*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4044*0b57cec5SDimitry Andric ///
4045*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
4046*0b57cec5SDimitry Andric ///
4047*0b57cec5SDimitry Andric /// \param __p
4048*0b57cec5SDimitry Andric ///    A pointer to a 32-bit memory location. The address of the memory
4049*0b57cec5SDimitry Andric ///    location does not have to be aligned.
4050*0b57cec5SDimitry Andric /// \param __b
4051*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the value to be stored.
4052*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
4053*0b57cec5SDimitry Andric _mm_storeu_si32(void *__p, __m128i __b)
4054*0b57cec5SDimitry Andric {
4055*0b57cec5SDimitry Andric   struct __storeu_si32 {
4056*0b57cec5SDimitry Andric     int __v;
4057*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
4058*0b57cec5SDimitry Andric   ((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0];
4059*0b57cec5SDimitry Andric }
4060*0b57cec5SDimitry Andric 
4061*0b57cec5SDimitry Andric /// Stores a 16-bit integer value from the low element of a 128-bit integer
4062*0b57cec5SDimitry Andric ///    vector.
4063*0b57cec5SDimitry Andric ///
4064*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4065*0b57cec5SDimitry Andric ///
4066*0b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction.
4067*0b57cec5SDimitry Andric ///
4068*0b57cec5SDimitry Andric /// \param __p
4069*0b57cec5SDimitry Andric ///    A pointer to a 16-bit memory location. The address of the memory
4070*0b57cec5SDimitry Andric ///    location does not have to be aligned.
4071*0b57cec5SDimitry Andric /// \param __b
4072*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the value to be stored.
4073*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
4074*0b57cec5SDimitry Andric _mm_storeu_si16(void *__p, __m128i __b)
4075*0b57cec5SDimitry Andric {
4076*0b57cec5SDimitry Andric   struct __storeu_si16 {
4077*0b57cec5SDimitry Andric     short __v;
4078*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
4079*0b57cec5SDimitry Andric   ((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0];
4080*0b57cec5SDimitry Andric }
4081*0b57cec5SDimitry Andric 
4082*0b57cec5SDimitry Andric /// Moves bytes selected by the mask from the first operand to the
4083*0b57cec5SDimitry Andric ///    specified unaligned memory location. When a mask bit is 1, the
4084*0b57cec5SDimitry Andric ///    corresponding byte is written, otherwise it is not written.
4085*0b57cec5SDimitry Andric ///
4086*0b57cec5SDimitry Andric ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
4087*0b57cec5SDimitry Andric ///    used again soon). Exception and trap behavior for elements not selected
4088*0b57cec5SDimitry Andric ///    for storage to memory are implementation dependent.
4089*0b57cec5SDimitry Andric ///
4090*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4091*0b57cec5SDimitry Andric ///
4092*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c>
4093*0b57cec5SDimitry Andric ///   instruction.
4094*0b57cec5SDimitry Andric ///
4095*0b57cec5SDimitry Andric /// \param __d
4096*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the values to be moved.
4097*0b57cec5SDimitry Andric /// \param __n
4098*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the mask. The most significant bit of
4099*0b57cec5SDimitry Andric ///    each byte represents the mask bits.
4100*0b57cec5SDimitry Andric /// \param __p
4101*0b57cec5SDimitry Andric ///    A pointer to an unaligned 128-bit memory location where the specified
4102*0b57cec5SDimitry Andric ///    values are moved.
4103*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
4104*0b57cec5SDimitry Andric _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
4105*0b57cec5SDimitry Andric {
4106*0b57cec5SDimitry Andric   __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
4107*0b57cec5SDimitry Andric }
4108*0b57cec5SDimitry Andric 
4109*0b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to
4110*0b57cec5SDimitry Andric ///    a memory location.
4111*0b57cec5SDimitry Andric ///
4112*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4113*0b57cec5SDimitry Andric ///
4114*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.
4115*0b57cec5SDimitry Andric ///
4116*0b57cec5SDimitry Andric /// \param __p
4117*0b57cec5SDimitry Andric ///    A pointer to a 64-bit memory location that will receive the lower 64 bits
4118*0b57cec5SDimitry Andric ///    of the integer vector parameter.
4119*0b57cec5SDimitry Andric /// \param __a
4120*0b57cec5SDimitry Andric ///    A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the
4121*0b57cec5SDimitry Andric ///    value to be stored.
4122*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
4123*0b57cec5SDimitry Andric _mm_storel_epi64(__m128i_u *__p, __m128i __a)
4124*0b57cec5SDimitry Andric {
4125*0b57cec5SDimitry Andric   struct __mm_storel_epi64_struct {
4126*0b57cec5SDimitry Andric     long long __u;
4127*0b57cec5SDimitry Andric   } __attribute__((__packed__, __may_alias__));
4128*0b57cec5SDimitry Andric   ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
4129*0b57cec5SDimitry Andric }
4130*0b57cec5SDimitry Andric 
4131*0b57cec5SDimitry Andric /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit
4132*0b57cec5SDimitry Andric ///    aligned memory location.
4133*0b57cec5SDimitry Andric ///
4134*0b57cec5SDimitry Andric ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
4135*0b57cec5SDimitry Andric ///    used again soon).
4136*0b57cec5SDimitry Andric ///
4137*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4138*0b57cec5SDimitry Andric ///
4139*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.
4140*0b57cec5SDimitry Andric ///
4141*0b57cec5SDimitry Andric /// \param __p
4142*0b57cec5SDimitry Andric ///    A pointer to the 128-bit aligned memory location used to store the value.
4143*0b57cec5SDimitry Andric /// \param __a
4144*0b57cec5SDimitry Andric ///    A vector of [2 x double] containing the 64-bit values to be stored.
4145*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
4146*0b57cec5SDimitry Andric _mm_stream_pd(double *__p, __m128d __a)
4147*0b57cec5SDimitry Andric {
4148*0b57cec5SDimitry Andric   __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);
4149*0b57cec5SDimitry Andric }
4150*0b57cec5SDimitry Andric 
4151*0b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a 128-bit aligned memory location.
4152*0b57cec5SDimitry Andric ///
4153*0b57cec5SDimitry Andric ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
4154*0b57cec5SDimitry Andric ///    used again soon).
4155*0b57cec5SDimitry Andric ///
4156*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4157*0b57cec5SDimitry Andric ///
4158*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.
4159*0b57cec5SDimitry Andric ///
4160*0b57cec5SDimitry Andric /// \param __p
4161*0b57cec5SDimitry Andric ///    A pointer to the 128-bit aligned memory location used to store the value.
4162*0b57cec5SDimitry Andric /// \param __a
4163*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the values to be stored.
4164*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
4165*0b57cec5SDimitry Andric _mm_stream_si128(__m128i *__p, __m128i __a)
4166*0b57cec5SDimitry Andric {
4167*0b57cec5SDimitry Andric   __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);
4168*0b57cec5SDimitry Andric }
4169*0b57cec5SDimitry Andric 
4170*0b57cec5SDimitry Andric /// Stores a 32-bit integer value in the specified memory location.
4171*0b57cec5SDimitry Andric ///
4172*0b57cec5SDimitry Andric ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
4173*0b57cec5SDimitry Andric ///    used again soon).
4174*0b57cec5SDimitry Andric ///
4175*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4176*0b57cec5SDimitry Andric ///
4177*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTI </c> instruction.
4178*0b57cec5SDimitry Andric ///
4179*0b57cec5SDimitry Andric /// \param __p
4180*0b57cec5SDimitry Andric ///    A pointer to the 32-bit memory location used to store the value.
4181*0b57cec5SDimitry Andric /// \param __a
4182*0b57cec5SDimitry Andric ///    A 32-bit integer containing the value to be stored.
4183*0b57cec5SDimitry Andric static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
4184*0b57cec5SDimitry Andric _mm_stream_si32(int *__p, int __a)
4185*0b57cec5SDimitry Andric {
4186*0b57cec5SDimitry Andric   __builtin_ia32_movnti(__p, __a);
4187*0b57cec5SDimitry Andric }
4188*0b57cec5SDimitry Andric 
4189*0b57cec5SDimitry Andric #ifdef __x86_64__
4190*0b57cec5SDimitry Andric /// Stores a 64-bit integer value in the specified memory location.
4191*0b57cec5SDimitry Andric ///
4192*0b57cec5SDimitry Andric ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
4193*0b57cec5SDimitry Andric ///    used again soon).
4194*0b57cec5SDimitry Andric ///
4195*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4196*0b57cec5SDimitry Andric ///
4197*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction.
4198*0b57cec5SDimitry Andric ///
4199*0b57cec5SDimitry Andric /// \param __p
4200*0b57cec5SDimitry Andric ///    A pointer to the 64-bit memory location used to store the value.
4201*0b57cec5SDimitry Andric /// \param __a
4202*0b57cec5SDimitry Andric ///    A 64-bit integer containing the value to be stored.
4203*0b57cec5SDimitry Andric static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
4204*0b57cec5SDimitry Andric _mm_stream_si64(long long *__p, long long __a)
4205*0b57cec5SDimitry Andric {
4206*0b57cec5SDimitry Andric   __builtin_ia32_movnti64(__p, __a);
4207*0b57cec5SDimitry Andric }
4208*0b57cec5SDimitry Andric #endif
4209*0b57cec5SDimitry Andric 
4210*0b57cec5SDimitry Andric #if defined(__cplusplus)
4211*0b57cec5SDimitry Andric extern "C" {
4212*0b57cec5SDimitry Andric #endif
4213*0b57cec5SDimitry Andric 
4214*0b57cec5SDimitry Andric /// The cache line containing \a __p is flushed and invalidated from all
4215*0b57cec5SDimitry Andric ///    caches in the coherency domain.
4216*0b57cec5SDimitry Andric ///
4217*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4218*0b57cec5SDimitry Andric ///
4219*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CLFLUSH </c> instruction.
4220*0b57cec5SDimitry Andric ///
4221*0b57cec5SDimitry Andric /// \param __p
4222*0b57cec5SDimitry Andric ///    A pointer to the memory location used to identify the cache line to be
4223*0b57cec5SDimitry Andric ///    flushed.
4224*0b57cec5SDimitry Andric void _mm_clflush(void const * __p);
4225*0b57cec5SDimitry Andric 
4226*0b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load
4227*0b57cec5SDimitry Andric ///    instructions preceding this instruction and load instructions following
4228*0b57cec5SDimitry Andric ///    this instruction, ensuring the system completes all previous loads before
4229*0b57cec5SDimitry Andric ///    executing subsequent loads.
4230*0b57cec5SDimitry Andric ///
4231*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4232*0b57cec5SDimitry Andric ///
4233*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> LFENCE </c> instruction.
4234*0b57cec5SDimitry Andric ///
4235*0b57cec5SDimitry Andric void _mm_lfence(void);
4236*0b57cec5SDimitry Andric 
4237*0b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load and store
4238*0b57cec5SDimitry Andric ///    instructions preceding this instruction and load and store instructions
4239*0b57cec5SDimitry Andric ///    following this instruction, ensuring that the system completes all
4240*0b57cec5SDimitry Andric ///    previous memory accesses before executing subsequent memory accesses.
4241*0b57cec5SDimitry Andric ///
4242*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4243*0b57cec5SDimitry Andric ///
4244*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MFENCE </c> instruction.
4245*0b57cec5SDimitry Andric ///
4246*0b57cec5SDimitry Andric void _mm_mfence(void);
4247*0b57cec5SDimitry Andric 
4248*0b57cec5SDimitry Andric #if defined(__cplusplus)
4249*0b57cec5SDimitry Andric } // extern "C"
4250*0b57cec5SDimitry Andric #endif
4251*0b57cec5SDimitry Andric 
4252*0b57cec5SDimitry Andric /// Converts 16-bit signed integers from both 128-bit integer vector
4253*0b57cec5SDimitry Andric ///    operands into 8-bit signed integers, and packs the results into the
4254*0b57cec5SDimitry Andric ///    destination. Positive values greater than 0x7F are saturated to 0x7F.
4255*0b57cec5SDimitry Andric ///    Negative values less than 0x80 are saturated to 0x80.
4256*0b57cec5SDimitry Andric ///
4257*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4258*0b57cec5SDimitry Andric ///
4259*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction.
4260*0b57cec5SDimitry Andric ///
4261*0b57cec5SDimitry Andric /// \param __a
4262*0b57cec5SDimitry Andric ///   A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
4263*0b57cec5SDimitry Andric ///   a signed integer and is converted to a 8-bit signed integer with
4264*0b57cec5SDimitry Andric ///   saturation. Values greater than 0x7F are saturated to 0x7F. Values less
4265*0b57cec5SDimitry Andric ///   than 0x80 are saturated to 0x80. The converted [8 x i8] values are
4266*0b57cec5SDimitry Andric ///   written to the lower 64 bits of the result.
4267*0b57cec5SDimitry Andric /// \param __b
4268*0b57cec5SDimitry Andric ///   A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
4269*0b57cec5SDimitry Andric ///   a signed integer and is converted to a 8-bit signed integer with
4270*0b57cec5SDimitry Andric ///   saturation. Values greater than 0x7F are saturated to 0x7F. Values less
4271*0b57cec5SDimitry Andric ///   than 0x80 are saturated to 0x80. The converted [8 x i8] values are
4272*0b57cec5SDimitry Andric ///   written to the higher 64 bits of the result.
4273*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values.
4274*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4275*0b57cec5SDimitry Andric _mm_packs_epi16(__m128i __a, __m128i __b)
4276*0b57cec5SDimitry Andric {
4277*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
4278*0b57cec5SDimitry Andric }
4279*0b57cec5SDimitry Andric 
4280*0b57cec5SDimitry Andric /// Converts 32-bit signed integers from both 128-bit integer vector
4281*0b57cec5SDimitry Andric ///    operands into 16-bit signed integers, and packs the results into the
4282*0b57cec5SDimitry Andric ///    destination. Positive values greater than 0x7FFF are saturated to 0x7FFF.
4283*0b57cec5SDimitry Andric ///    Negative values less than 0x8000 are saturated to 0x8000.
4284*0b57cec5SDimitry Andric ///
4285*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4286*0b57cec5SDimitry Andric ///
4287*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction.
4288*0b57cec5SDimitry Andric ///
4289*0b57cec5SDimitry Andric /// \param __a
4290*0b57cec5SDimitry Andric ///    A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as
4291*0b57cec5SDimitry Andric ///    a signed integer and is converted to a 16-bit signed integer with
4292*0b57cec5SDimitry Andric ///    saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values
4293*0b57cec5SDimitry Andric ///    less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values
4294*0b57cec5SDimitry Andric ///    are written to the lower 64 bits of the result.
4295*0b57cec5SDimitry Andric /// \param __b
4296*0b57cec5SDimitry Andric ///    A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as
4297*0b57cec5SDimitry Andric ///    a signed integer and is converted to a 16-bit signed integer with
4298*0b57cec5SDimitry Andric ///    saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values
4299*0b57cec5SDimitry Andric ///    less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values
4300*0b57cec5SDimitry Andric ///    are written to the higher 64 bits of the result.
4301*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the converted values.
4302*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4303*0b57cec5SDimitry Andric _mm_packs_epi32(__m128i __a, __m128i __b)
4304*0b57cec5SDimitry Andric {
4305*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
4306*0b57cec5SDimitry Andric }
4307*0b57cec5SDimitry Andric 
4308*0b57cec5SDimitry Andric /// Converts 16-bit signed integers from both 128-bit integer vector
4309*0b57cec5SDimitry Andric ///    operands into 8-bit unsigned integers, and packs the results into the
4310*0b57cec5SDimitry Andric ///    destination. Values greater than 0xFF are saturated to 0xFF. Values less
4311*0b57cec5SDimitry Andric ///    than 0x00 are saturated to 0x00.
4312*0b57cec5SDimitry Andric ///
4313*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4314*0b57cec5SDimitry Andric ///
4315*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction.
4316*0b57cec5SDimitry Andric ///
4317*0b57cec5SDimitry Andric /// \param __a
4318*0b57cec5SDimitry Andric ///    A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
4319*0b57cec5SDimitry Andric ///    a signed integer and is converted to an 8-bit unsigned integer with
4320*0b57cec5SDimitry Andric ///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
4321*0b57cec5SDimitry Andric ///    than 0x00 are saturated to 0x00. The converted [8 x i8] values are
4322*0b57cec5SDimitry Andric ///    written to the lower 64 bits of the result.
4323*0b57cec5SDimitry Andric /// \param __b
4324*0b57cec5SDimitry Andric ///    A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
4325*0b57cec5SDimitry Andric ///    a signed integer and is converted to an 8-bit unsigned integer with
4326*0b57cec5SDimitry Andric ///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
4327*0b57cec5SDimitry Andric ///    than 0x00 are saturated to 0x00. The converted [8 x i8] values are
4328*0b57cec5SDimitry Andric ///    written to the higher 64 bits of the result.
4329*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values.
4330*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4331*0b57cec5SDimitry Andric _mm_packus_epi16(__m128i __a, __m128i __b)
4332*0b57cec5SDimitry Andric {
4333*0b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
4334*0b57cec5SDimitry Andric }
4335*0b57cec5SDimitry Andric 
4336*0b57cec5SDimitry Andric /// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using
4337*0b57cec5SDimitry Andric ///    the immediate-value parameter as a selector.
4338*0b57cec5SDimitry Andric ///
4339*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4340*0b57cec5SDimitry Andric ///
4341*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.
4342*0b57cec5SDimitry Andric ///
4343*0b57cec5SDimitry Andric /// \param __a
4344*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
4345*0b57cec5SDimitry Andric /// \param __imm
4346*0b57cec5SDimitry Andric ///    An immediate value. Bits [2:0] selects values from \a __a to be assigned
4347*0b57cec5SDimitry Andric ///    to bits[15:0] of the result. \n
4348*0b57cec5SDimitry Andric ///    000: assign values from bits [15:0] of \a __a. \n
4349*0b57cec5SDimitry Andric ///    001: assign values from bits [31:16] of \a __a. \n
4350*0b57cec5SDimitry Andric ///    010: assign values from bits [47:32] of \a __a. \n
4351*0b57cec5SDimitry Andric ///    011: assign values from bits [63:48] of \a __a. \n
4352*0b57cec5SDimitry Andric ///    100: assign values from bits [79:64] of \a __a. \n
4353*0b57cec5SDimitry Andric ///    101: assign values from bits [95:80] of \a __a. \n
4354*0b57cec5SDimitry Andric ///    110: assign values from bits [111:96] of \a __a. \n
4355*0b57cec5SDimitry Andric ///    111: assign values from bits [127:112] of \a __a.
4356*0b57cec5SDimitry Andric /// \returns An integer, whose lower 16 bits are selected from the 128-bit
4357*0b57cec5SDimitry Andric ///    integer vector parameter and the remaining bits are assigned zeros.
4358*0b57cec5SDimitry Andric #define _mm_extract_epi16(a, imm) \
4359*0b57cec5SDimitry Andric   (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
4360*0b57cec5SDimitry Andric                                                    (int)(imm))
4361*0b57cec5SDimitry Andric 
4362*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by first making a copy of the
4363*0b57cec5SDimitry Andric ///    128-bit integer vector parameter, and then inserting the lower 16 bits
4364*0b57cec5SDimitry Andric ///    of an integer parameter into an offset specified by the immediate-value
4365*0b57cec5SDimitry Andric ///    parameter.
4366*0b57cec5SDimitry Andric ///
4367*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4368*0b57cec5SDimitry Andric ///
4369*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.
4370*0b57cec5SDimitry Andric ///
4371*0b57cec5SDimitry Andric /// \param __a
4372*0b57cec5SDimitry Andric ///    A 128-bit integer vector of [8 x i16]. This vector is copied to the
4373*0b57cec5SDimitry Andric ///    result and then one of the eight elements in the result is replaced by
4374*0b57cec5SDimitry Andric ///    the lower 16 bits of \a __b.
4375*0b57cec5SDimitry Andric /// \param __b
4376*0b57cec5SDimitry Andric ///    An integer. The lower 16 bits of this parameter are written to the
4377*0b57cec5SDimitry Andric ///    result beginning at an offset specified by \a __imm.
4378*0b57cec5SDimitry Andric /// \param __imm
4379*0b57cec5SDimitry Andric ///    An immediate value specifying the bit offset in the result at which the
4380*0b57cec5SDimitry Andric ///    lower 16 bits of \a __b are written.
4381*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the constructed values.
4382*0b57cec5SDimitry Andric #define _mm_insert_epi16(a, b, imm) \
4383*0b57cec5SDimitry Andric   (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
4384*0b57cec5SDimitry Andric                                        (int)(imm))
4385*0b57cec5SDimitry Andric 
4386*0b57cec5SDimitry Andric /// Copies the values of the most significant bits from each 8-bit
4387*0b57cec5SDimitry Andric ///    element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask
4388*0b57cec5SDimitry Andric ///    value, zero-extends the value, and writes it to the destination.
4389*0b57cec5SDimitry Andric ///
4390*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4391*0b57cec5SDimitry Andric ///
4392*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction.
4393*0b57cec5SDimitry Andric ///
4394*0b57cec5SDimitry Andric /// \param __a
4395*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the values with bits to be extracted.
4396*0b57cec5SDimitry Andric /// \returns The most significant bits from each 8-bit element in \a __a,
4397*0b57cec5SDimitry Andric ///    written to bits [15:0]. The other bits are assigned zeros.
4398*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
4399*0b57cec5SDimitry Andric _mm_movemask_epi8(__m128i __a)
4400*0b57cec5SDimitry Andric {
4401*0b57cec5SDimitry Andric   return __builtin_ia32_pmovmskb128((__v16qi)__a);
4402*0b57cec5SDimitry Andric }
4403*0b57cec5SDimitry Andric 
4404*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four 32-bit
4405*0b57cec5SDimitry Andric ///    elements of a 128-bit integer vector parameter, using the immediate-value
4406*0b57cec5SDimitry Andric ///    parameter as a specifier.
4407*0b57cec5SDimitry Andric ///
4408*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4409*0b57cec5SDimitry Andric ///
4410*0b57cec5SDimitry Andric /// \code
4411*0b57cec5SDimitry Andric /// __m128i _mm_shuffle_epi32(__m128i a, const int imm);
4412*0b57cec5SDimitry Andric /// \endcode
4413*0b57cec5SDimitry Andric ///
4414*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.
4415*0b57cec5SDimitry Andric ///
4416*0b57cec5SDimitry Andric /// \param a
4417*0b57cec5SDimitry Andric ///    A 128-bit integer vector containing the values to be copied.
4418*0b57cec5SDimitry Andric /// \param imm
4419*0b57cec5SDimitry Andric ///    An immediate value containing an 8-bit value specifying which elements to
4420*0b57cec5SDimitry Andric ///    copy from a. The destinations within the 128-bit destination are assigned
4421*0b57cec5SDimitry Andric ///    values as follows: \n
4422*0b57cec5SDimitry Andric ///    Bits [1:0] are used to assign values to bits [31:0] of the result. \n
4423*0b57cec5SDimitry Andric ///    Bits [3:2] are used to assign values to bits [63:32] of the result. \n
4424*0b57cec5SDimitry Andric ///    Bits [5:4] are used to assign values to bits [95:64] of the result. \n
4425*0b57cec5SDimitry Andric ///    Bits [7:6] are used to assign values to bits [127:96] of the result. \n
4426*0b57cec5SDimitry Andric ///    Bit value assignments: \n
4427*0b57cec5SDimitry Andric ///    00: assign values from bits [31:0] of \a a. \n
4428*0b57cec5SDimitry Andric ///    01: assign values from bits [63:32] of \a a. \n
4429*0b57cec5SDimitry Andric ///    10: assign values from bits [95:64] of \a a. \n
4430*0b57cec5SDimitry Andric ///    11: assign values from bits [127:96] of \a a.
4431*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values.
4432*0b57cec5SDimitry Andric #define _mm_shuffle_epi32(a, imm) \
4433*0b57cec5SDimitry Andric   (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))
4434*0b57cec5SDimitry Andric 
4435*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four lower 16-bit
4436*0b57cec5SDimitry Andric ///    elements of a 128-bit integer vector of [8 x i16], using the immediate
4437*0b57cec5SDimitry Andric ///    value parameter as a specifier.
4438*0b57cec5SDimitry Andric ///
4439*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4440*0b57cec5SDimitry Andric ///
4441*0b57cec5SDimitry Andric /// \code
4442*0b57cec5SDimitry Andric /// __m128i _mm_shufflelo_epi16(__m128i a, const int imm);
4443*0b57cec5SDimitry Andric /// \endcode
4444*0b57cec5SDimitry Andric ///
4445*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.
4446*0b57cec5SDimitry Andric ///
4447*0b57cec5SDimitry Andric /// \param a
4448*0b57cec5SDimitry Andric ///    A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits
4449*0b57cec5SDimitry Andric ///    [127:64] of the result.
4450*0b57cec5SDimitry Andric /// \param imm
4451*0b57cec5SDimitry Andric ///    An 8-bit immediate value specifying which elements to copy from \a a. \n
4452*0b57cec5SDimitry Andric ///    Bits[1:0] are used to assign values to bits [15:0] of the result. \n
4453*0b57cec5SDimitry Andric ///    Bits[3:2] are used to assign values to bits [31:16] of the result. \n
4454*0b57cec5SDimitry Andric ///    Bits[5:4] are used to assign values to bits [47:32] of the result. \n
4455*0b57cec5SDimitry Andric ///    Bits[7:6] are used to assign values to bits [63:48] of the result. \n
4456*0b57cec5SDimitry Andric ///    Bit value assignments: \n
4457*0b57cec5SDimitry Andric ///    00: assign values from bits [15:0] of \a a. \n
4458*0b57cec5SDimitry Andric ///    01: assign values from bits [31:16] of \a a. \n
4459*0b57cec5SDimitry Andric ///    10: assign values from bits [47:32] of \a a. \n
4460*0b57cec5SDimitry Andric ///    11: assign values from bits [63:48] of \a a. \n
4461*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values.
4462*0b57cec5SDimitry Andric #define _mm_shufflelo_epi16(a, imm) \
4463*0b57cec5SDimitry Andric   (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))
4464*0b57cec5SDimitry Andric 
4465*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four upper 16-bit
4466*0b57cec5SDimitry Andric ///    elements of a 128-bit integer vector of [8 x i16], using the immediate
4467*0b57cec5SDimitry Andric ///    value parameter as a specifier.
4468*0b57cec5SDimitry Andric ///
4469*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4470*0b57cec5SDimitry Andric ///
4471*0b57cec5SDimitry Andric /// \code
4472*0b57cec5SDimitry Andric /// __m128i _mm_shufflehi_epi16(__m128i a, const int imm);
4473*0b57cec5SDimitry Andric /// \endcode
4474*0b57cec5SDimitry Andric ///
4475*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction.
4476*0b57cec5SDimitry Andric ///
4477*0b57cec5SDimitry Andric /// \param a
4478*0b57cec5SDimitry Andric ///    A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits
4479*0b57cec5SDimitry Andric ///    [63:0] of the result.
4480*0b57cec5SDimitry Andric /// \param imm
4481*0b57cec5SDimitry Andric ///    An 8-bit immediate value specifying which elements to copy from \a a. \n
4482*0b57cec5SDimitry Andric ///    Bits[1:0] are used to assign values to bits [79:64] of the result. \n
4483*0b57cec5SDimitry Andric ///    Bits[3:2] are used to assign values to bits [95:80] of the result. \n
4484*0b57cec5SDimitry Andric ///    Bits[5:4] are used to assign values to bits [111:96] of the result. \n
4485*0b57cec5SDimitry Andric ///    Bits[7:6] are used to assign values to bits [127:112] of the result. \n
4486*0b57cec5SDimitry Andric ///    Bit value assignments: \n
4487*0b57cec5SDimitry Andric ///    00: assign values from bits [79:64] of \a a. \n
4488*0b57cec5SDimitry Andric ///    01: assign values from bits [95:80] of \a a. \n
4489*0b57cec5SDimitry Andric ///    10: assign values from bits [111:96] of \a a. \n
4490*0b57cec5SDimitry Andric ///    11: assign values from bits [127:112] of \a a. \n
4491*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values.
4492*0b57cec5SDimitry Andric #define _mm_shufflehi_epi16(a, imm) \
4493*0b57cec5SDimitry Andric   (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))
4494*0b57cec5SDimitry Andric 
4495*0b57cec5SDimitry Andric /// Unpacks the high-order (index 8-15) values from two 128-bit vectors
4496*0b57cec5SDimitry Andric ///    of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
4497*0b57cec5SDimitry Andric ///
4498*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4499*0b57cec5SDimitry Andric ///
4500*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c>
4501*0b57cec5SDimitry Andric ///   instruction.
4502*0b57cec5SDimitry Andric ///
4503*0b57cec5SDimitry Andric /// \param __a
4504*0b57cec5SDimitry Andric ///    A 128-bit vector of [16 x i8].
4505*0b57cec5SDimitry Andric ///    Bits [71:64] are written to bits [7:0] of the result. \n
4506*0b57cec5SDimitry Andric ///    Bits [79:72] are written to bits [23:16] of the result. \n
4507*0b57cec5SDimitry Andric ///    Bits [87:80] are written to bits [39:32] of the result. \n
4508*0b57cec5SDimitry Andric ///    Bits [95:88] are written to bits [55:48] of the result. \n
4509*0b57cec5SDimitry Andric ///    Bits [103:96] are written to bits [71:64] of the result. \n
4510*0b57cec5SDimitry Andric ///    Bits [111:104] are written to bits [87:80] of the result. \n
4511*0b57cec5SDimitry Andric ///    Bits [119:112] are written to bits [103:96] of the result. \n
4512*0b57cec5SDimitry Andric ///    Bits [127:120] are written to bits [119:112] of the result.
4513*0b57cec5SDimitry Andric /// \param __b
4514*0b57cec5SDimitry Andric ///    A 128-bit vector of [16 x i8]. \n
4515*0b57cec5SDimitry Andric ///    Bits [71:64] are written to bits [15:8] of the result. \n
4516*0b57cec5SDimitry Andric ///    Bits [79:72] are written to bits [31:24] of the result. \n
4517*0b57cec5SDimitry Andric ///    Bits [87:80] are written to bits [47:40] of the result. \n
4518*0b57cec5SDimitry Andric ///    Bits [95:88] are written to bits [63:56] of the result. \n
4519*0b57cec5SDimitry Andric ///    Bits [103:96] are written to bits [79:72] of the result. \n
4520*0b57cec5SDimitry Andric ///    Bits [111:104] are written to bits [95:88] of the result. \n
4521*0b57cec5SDimitry Andric ///    Bits [119:112] are written to bits [111:104] of the result. \n
4522*0b57cec5SDimitry Andric ///    Bits [127:120] are written to bits [127:120] of the result.
4523*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
4524*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4525*0b57cec5SDimitry Andric _mm_unpackhi_epi8(__m128i __a, __m128i __b)
4526*0b57cec5SDimitry Andric {
4527*0b57cec5SDimitry Andric   return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
4528*0b57cec5SDimitry Andric }
4529*0b57cec5SDimitry Andric 
4530*0b57cec5SDimitry Andric /// Unpacks the high-order (index 4-7) values from two 128-bit vectors of
4531*0b57cec5SDimitry Andric ///    [8 x i16] and interleaves them into a 128-bit vector of [8 x i16].
4532*0b57cec5SDimitry Andric ///
4533*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4534*0b57cec5SDimitry Andric ///
4535*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c>
4536*0b57cec5SDimitry Andric ///   instruction.
4537*0b57cec5SDimitry Andric ///
4538*0b57cec5SDimitry Andric /// \param __a
4539*0b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16].
4540*0b57cec5SDimitry Andric ///    Bits [79:64] are written to bits [15:0] of the result. \n
4541*0b57cec5SDimitry Andric ///    Bits [95:80] are written to bits [47:32] of the result. \n
4542*0b57cec5SDimitry Andric ///    Bits [111:96] are written to bits [79:64] of the result. \n
4543*0b57cec5SDimitry Andric ///    Bits [127:112] are written to bits [111:96] of the result.
4544*0b57cec5SDimitry Andric /// \param __b
4545*0b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16].
4546*0b57cec5SDimitry Andric ///    Bits [79:64] are written to bits [31:16] of the result. \n
4547*0b57cec5SDimitry Andric ///    Bits [95:80] are written to bits [63:48] of the result. \n
4548*0b57cec5SDimitry Andric ///    Bits [111:96] are written to bits [95:80] of the result. \n
4549*0b57cec5SDimitry Andric ///    Bits [127:112] are written to bits [127:112] of the result.
4550*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
4551*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4552*0b57cec5SDimitry Andric _mm_unpackhi_epi16(__m128i __a, __m128i __b)
4553*0b57cec5SDimitry Andric {
4554*0b57cec5SDimitry Andric   return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
4555*0b57cec5SDimitry Andric }
4556*0b57cec5SDimitry Andric 
4557*0b57cec5SDimitry Andric /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
4558*0b57cec5SDimitry Andric ///    [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].
4559*0b57cec5SDimitry Andric ///
4560*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4561*0b57cec5SDimitry Andric ///
4562*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c>
4563*0b57cec5SDimitry Andric ///   instruction.
4564*0b57cec5SDimitry Andric ///
4565*0b57cec5SDimitry Andric /// \param __a
4566*0b57cec5SDimitry Andric ///    A 128-bit vector of [4 x i32]. \n
4567*0b57cec5SDimitry Andric ///    Bits [95:64] are written to bits [31:0] of the destination. \n
4568*0b57cec5SDimitry Andric ///    Bits [127:96] are written to bits [95:64] of the destination.
4569*0b57cec5SDimitry Andric /// \param __b
4570*0b57cec5SDimitry Andric ///    A 128-bit vector of [4 x i32]. \n
4571*0b57cec5SDimitry Andric ///    Bits [95:64] are written to bits [64:32] of the destination. \n
4572*0b57cec5SDimitry Andric ///    Bits [127:96] are written to bits [127:96] of the destination.
4573*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
4574*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4575*0b57cec5SDimitry Andric _mm_unpackhi_epi32(__m128i __a, __m128i __b)
4576*0b57cec5SDimitry Andric {
4577*0b57cec5SDimitry Andric   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
4578*0b57cec5SDimitry Andric }
4579*0b57cec5SDimitry Andric 
4580*0b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of
4581*0b57cec5SDimitry Andric ///    [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
4582*0b57cec5SDimitry Andric ///
4583*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4584*0b57cec5SDimitry Andric ///
4585*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c>
4586*0b57cec5SDimitry Andric ///   instruction.
4587*0b57cec5SDimitry Andric ///
4588*0b57cec5SDimitry Andric /// \param __a
4589*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x i64]. \n
4590*0b57cec5SDimitry Andric ///    Bits [127:64] are written to bits [63:0] of the destination.
4591*0b57cec5SDimitry Andric /// \param __b
4592*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x i64]. \n
4593*0b57cec5SDimitry Andric ///    Bits [127:64] are written to bits [127:64] of the destination.
4594*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
4595*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4596*0b57cec5SDimitry Andric _mm_unpackhi_epi64(__m128i __a, __m128i __b)
4597*0b57cec5SDimitry Andric {
4598*0b57cec5SDimitry Andric   return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1);
4599*0b57cec5SDimitry Andric }
4600*0b57cec5SDimitry Andric 
4601*0b57cec5SDimitry Andric /// Unpacks the low-order (index 0-7) values from two 128-bit vectors of
4602*0b57cec5SDimitry Andric ///    [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
4603*0b57cec5SDimitry Andric ///
4604*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4605*0b57cec5SDimitry Andric ///
4606*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c>
4607*0b57cec5SDimitry Andric ///   instruction.
4608*0b57cec5SDimitry Andric ///
4609*0b57cec5SDimitry Andric /// \param __a
4610*0b57cec5SDimitry Andric ///    A 128-bit vector of [16 x i8]. \n
4611*0b57cec5SDimitry Andric ///    Bits [7:0] are written to bits [7:0] of the result. \n
4612*0b57cec5SDimitry Andric ///    Bits [15:8] are written to bits [23:16] of the result. \n
4613*0b57cec5SDimitry Andric ///    Bits [23:16] are written to bits [39:32] of the result. \n
4614*0b57cec5SDimitry Andric ///    Bits [31:24] are written to bits [55:48] of the result. \n
4615*0b57cec5SDimitry Andric ///    Bits [39:32] are written to bits [71:64] of the result. \n
4616*0b57cec5SDimitry Andric ///    Bits [47:40] are written to bits [87:80] of the result. \n
4617*0b57cec5SDimitry Andric ///    Bits [55:48] are written to bits [103:96] of the result. \n
4618*0b57cec5SDimitry Andric ///    Bits [63:56] are written to bits [119:112] of the result.
4619*0b57cec5SDimitry Andric /// \param __b
4620*0b57cec5SDimitry Andric ///    A 128-bit vector of [16 x i8].
4621*0b57cec5SDimitry Andric ///    Bits [7:0] are written to bits [15:8] of the result. \n
4622*0b57cec5SDimitry Andric ///    Bits [15:8] are written to bits [31:24] of the result. \n
4623*0b57cec5SDimitry Andric ///    Bits [23:16] are written to bits [47:40] of the result. \n
4624*0b57cec5SDimitry Andric ///    Bits [31:24] are written to bits [63:56] of the result. \n
4625*0b57cec5SDimitry Andric ///    Bits [39:32] are written to bits [79:72] of the result. \n
4626*0b57cec5SDimitry Andric ///    Bits [47:40] are written to bits [95:88] of the result. \n
4627*0b57cec5SDimitry Andric ///    Bits [55:48] are written to bits [111:104] of the result. \n
4628*0b57cec5SDimitry Andric ///    Bits [63:56] are written to bits [127:120] of the result.
4629*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
4630*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4631*0b57cec5SDimitry Andric _mm_unpacklo_epi8(__m128i __a, __m128i __b)
4632*0b57cec5SDimitry Andric {
4633*0b57cec5SDimitry Andric   return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
4634*0b57cec5SDimitry Andric }
4635*0b57cec5SDimitry Andric 
4636*0b57cec5SDimitry Andric /// Unpacks the low-order (index 0-3) values from each of the two 128-bit
4637*0b57cec5SDimitry Andric ///    vectors of [8 x i16] and interleaves them into a 128-bit vector of
4638*0b57cec5SDimitry Andric ///    [8 x i16].
4639*0b57cec5SDimitry Andric ///
4640*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4641*0b57cec5SDimitry Andric ///
4642*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c>
4643*0b57cec5SDimitry Andric ///   instruction.
4644*0b57cec5SDimitry Andric ///
4645*0b57cec5SDimitry Andric /// \param __a
4646*0b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16].
4647*0b57cec5SDimitry Andric ///    Bits [15:0] are written to bits [15:0] of the result. \n
4648*0b57cec5SDimitry Andric ///    Bits [31:16] are written to bits [47:32] of the result. \n
4649*0b57cec5SDimitry Andric ///    Bits [47:32] are written to bits [79:64] of the result. \n
4650*0b57cec5SDimitry Andric ///    Bits [63:48] are written to bits [111:96] of the result.
4651*0b57cec5SDimitry Andric /// \param __b
4652*0b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16].
4653*0b57cec5SDimitry Andric ///    Bits [15:0] are written to bits [31:16] of the result. \n
4654*0b57cec5SDimitry Andric ///    Bits [31:16] are written to bits [63:48] of the result. \n
4655*0b57cec5SDimitry Andric ///    Bits [47:32] are written to bits [95:80] of the result. \n
4656*0b57cec5SDimitry Andric ///    Bits [63:48] are written to bits [127:112] of the result.
4657*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
4658*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4659*0b57cec5SDimitry Andric _mm_unpacklo_epi16(__m128i __a, __m128i __b)
4660*0b57cec5SDimitry Andric {
4661*0b57cec5SDimitry Andric   return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
4662*0b57cec5SDimitry Andric }
4663*0b57cec5SDimitry Andric 
4664*0b57cec5SDimitry Andric /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of
4665*0b57cec5SDimitry Andric ///    [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].
4666*0b57cec5SDimitry Andric ///
4667*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4668*0b57cec5SDimitry Andric ///
4669*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c>
4670*0b57cec5SDimitry Andric ///   instruction.
4671*0b57cec5SDimitry Andric ///
4672*0b57cec5SDimitry Andric /// \param __a
4673*0b57cec5SDimitry Andric ///    A 128-bit vector of [4 x i32]. \n
4674*0b57cec5SDimitry Andric ///    Bits [31:0] are written to bits [31:0] of the destination. \n
4675*0b57cec5SDimitry Andric ///    Bits [63:32] are written to bits [95:64] of the destination.
4676*0b57cec5SDimitry Andric /// \param __b
4677*0b57cec5SDimitry Andric ///    A 128-bit vector of [4 x i32]. \n
4678*0b57cec5SDimitry Andric ///    Bits [31:0] are written to bits [64:32] of the destination. \n
4679*0b57cec5SDimitry Andric ///    Bits [63:32] are written to bits [127:96] of the destination.
4680*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
4681*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4682*0b57cec5SDimitry Andric _mm_unpacklo_epi32(__m128i __a, __m128i __b)
4683*0b57cec5SDimitry Andric {
4684*0b57cec5SDimitry Andric   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
4685*0b57cec5SDimitry Andric }
4686*0b57cec5SDimitry Andric 
4687*0b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors of
4688*0b57cec5SDimitry Andric ///    [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
4689*0b57cec5SDimitry Andric ///
4690*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4691*0b57cec5SDimitry Andric ///
4692*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
4693*0b57cec5SDimitry Andric ///   instruction.
4694*0b57cec5SDimitry Andric ///
4695*0b57cec5SDimitry Andric /// \param __a
4696*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x i64]. \n
4697*0b57cec5SDimitry Andric ///    Bits [63:0] are written to bits [63:0] of the destination. \n
4698*0b57cec5SDimitry Andric /// \param __b
4699*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x i64]. \n
4700*0b57cec5SDimitry Andric ///    Bits [63:0] are written to bits [127:64] of the destination. \n
4701*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
4702*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4703*0b57cec5SDimitry Andric _mm_unpacklo_epi64(__m128i __a, __m128i __b)
4704*0b57cec5SDimitry Andric {
4705*0b57cec5SDimitry Andric   return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0);
4706*0b57cec5SDimitry Andric }
4707*0b57cec5SDimitry Andric 
4708*0b57cec5SDimitry Andric /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
4709*0b57cec5SDimitry Andric ///    integer.
4710*0b57cec5SDimitry Andric ///
4711*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4712*0b57cec5SDimitry Andric ///
4713*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction.
4714*0b57cec5SDimitry Andric ///
4715*0b57cec5SDimitry Andric /// \param __a
4716*0b57cec5SDimitry Andric ///    A 128-bit integer vector operand. The lower 64 bits are moved to the
4717*0b57cec5SDimitry Andric ///    destination.
4718*0b57cec5SDimitry Andric /// \returns A 64-bit integer containing the lower 64 bits of the parameter.
4719*0b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
4720*0b57cec5SDimitry Andric _mm_movepi64_pi64(__m128i __a)
4721*0b57cec5SDimitry Andric {
4722*0b57cec5SDimitry Andric   return (__m64)__a[0];
4723*0b57cec5SDimitry Andric }
4724*0b57cec5SDimitry Andric 
4725*0b57cec5SDimitry Andric /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
4726*0b57cec5SDimitry Andric ///    upper bits.
4727*0b57cec5SDimitry Andric ///
4728*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4729*0b57cec5SDimitry Andric ///
4730*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction.
4731*0b57cec5SDimitry Andric ///
4732*0b57cec5SDimitry Andric /// \param __a
4733*0b57cec5SDimitry Andric ///    A 64-bit value.
4734*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from
4735*0b57cec5SDimitry Andric ///    the operand. The upper 64 bits are assigned zeros.
4736*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4737*0b57cec5SDimitry Andric _mm_movpi64_epi64(__m64 __a)
4738*0b57cec5SDimitry Andric {
4739*0b57cec5SDimitry Andric   return __extension__ (__m128i)(__v2di){ (long long)__a, 0 };
4740*0b57cec5SDimitry Andric }
4741*0b57cec5SDimitry Andric 
4742*0b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit
4743*0b57cec5SDimitry Andric ///    integer vector, zeroing the upper bits.
4744*0b57cec5SDimitry Andric ///
4745*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4746*0b57cec5SDimitry Andric ///
4747*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
4748*0b57cec5SDimitry Andric ///
4749*0b57cec5SDimitry Andric /// \param __a
4750*0b57cec5SDimitry Andric ///    A 128-bit integer vector operand. The lower 64 bits are moved to the
4751*0b57cec5SDimitry Andric ///    destination.
4752*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from
4753*0b57cec5SDimitry Andric ///    the operand. The upper 64 bits are assigned zeros.
4754*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4755*0b57cec5SDimitry Andric _mm_move_epi64(__m128i __a)
4756*0b57cec5SDimitry Andric {
4757*0b57cec5SDimitry Andric   return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2);
4758*0b57cec5SDimitry Andric }
4759*0b57cec5SDimitry Andric 
4760*0b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of
4761*0b57cec5SDimitry Andric ///    [2 x double] and interleaves them into a 128-bit vector of [2 x
4762*0b57cec5SDimitry Andric ///    double].
4763*0b57cec5SDimitry Andric ///
4764*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4765*0b57cec5SDimitry Andric ///
4766*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.
4767*0b57cec5SDimitry Andric ///
4768*0b57cec5SDimitry Andric /// \param __a
4769*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. \n
4770*0b57cec5SDimitry Andric ///    Bits [127:64] are written to bits [63:0] of the destination.
4771*0b57cec5SDimitry Andric /// \param __b
4772*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. \n
4773*0b57cec5SDimitry Andric ///    Bits [127:64] are written to bits [127:64] of the destination.
4774*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values.
4775*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
4776*0b57cec5SDimitry Andric _mm_unpackhi_pd(__m128d __a, __m128d __b)
4777*0b57cec5SDimitry Andric {
4778*0b57cec5SDimitry Andric   return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);
4779*0b57cec5SDimitry Andric }
4780*0b57cec5SDimitry Andric 
4781*0b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors
4782*0b57cec5SDimitry Andric ///    of [2 x double] and interleaves them into a 128-bit vector of [2 x
4783*0b57cec5SDimitry Andric ///    double].
4784*0b57cec5SDimitry Andric ///
4785*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4786*0b57cec5SDimitry Andric ///
4787*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
4788*0b57cec5SDimitry Andric ///
4789*0b57cec5SDimitry Andric /// \param __a
4790*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. \n
4791*0b57cec5SDimitry Andric ///    Bits [63:0] are written to bits [63:0] of the destination.
4792*0b57cec5SDimitry Andric /// \param __b
4793*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. \n
4794*0b57cec5SDimitry Andric ///    Bits [63:0] are written to bits [127:64] of the destination.
4795*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values.
4796*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
4797*0b57cec5SDimitry Andric _mm_unpacklo_pd(__m128d __a, __m128d __b)
4798*0b57cec5SDimitry Andric {
4799*0b57cec5SDimitry Andric   return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0);
4800*0b57cec5SDimitry Andric }
4801*0b57cec5SDimitry Andric 
4802*0b57cec5SDimitry Andric /// Extracts the sign bits of the double-precision values in the 128-bit
4803*0b57cec5SDimitry Andric ///    vector of [2 x double], zero-extends the value, and writes it to the
4804*0b57cec5SDimitry Andric ///    low-order bits of the destination.
4805*0b57cec5SDimitry Andric ///
4806*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4807*0b57cec5SDimitry Andric ///
4808*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction.
4809*0b57cec5SDimitry Andric ///
4810*0b57cec5SDimitry Andric /// \param __a
4811*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the values with sign bits to
4812*0b57cec5SDimitry Andric ///    be extracted.
4813*0b57cec5SDimitry Andric /// \returns The sign bits from each of the double-precision elements in \a __a,
4814*0b57cec5SDimitry Andric ///    written to bits [1:0]. The remaining bits are assigned values of zero.
4815*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
4816*0b57cec5SDimitry Andric _mm_movemask_pd(__m128d __a)
4817*0b57cec5SDimitry Andric {
4818*0b57cec5SDimitry Andric   return __builtin_ia32_movmskpd((__v2df)__a);
4819*0b57cec5SDimitry Andric }
4820*0b57cec5SDimitry Andric 
4821*0b57cec5SDimitry Andric 
4822*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] from two
4823*0b57cec5SDimitry Andric ///    128-bit vector parameters of [2 x double], using the immediate-value
4824*0b57cec5SDimitry Andric ///     parameter as a specifier.
4825*0b57cec5SDimitry Andric ///
4826*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4827*0b57cec5SDimitry Andric ///
4828*0b57cec5SDimitry Andric /// \code
4829*0b57cec5SDimitry Andric /// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i);
4830*0b57cec5SDimitry Andric /// \endcode
4831*0b57cec5SDimitry Andric ///
4832*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction.
4833*0b57cec5SDimitry Andric ///
4834*0b57cec5SDimitry Andric /// \param a
4835*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
4836*0b57cec5SDimitry Andric /// \param b
4837*0b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double].
4838*0b57cec5SDimitry Andric /// \param i
4839*0b57cec5SDimitry Andric ///    An 8-bit immediate value. The least significant two bits specify which
4840*0b57cec5SDimitry Andric ///    elements to copy from \a a and \a b: \n
4841*0b57cec5SDimitry Andric ///    Bit[0] = 0: lower element of \a a copied to lower element of result. \n
4842*0b57cec5SDimitry Andric ///    Bit[0] = 1: upper element of \a a copied to lower element of result. \n
4843*0b57cec5SDimitry Andric ///    Bit[1] = 0: lower element of \a b copied to upper element of result. \n
4844*0b57cec5SDimitry Andric ///    Bit[1] = 1: upper element of \a b copied to upper element of result. \n
4845*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the shuffled values.
4846*0b57cec5SDimitry Andric #define _mm_shuffle_pd(a, b, i) \
4847*0b57cec5SDimitry Andric   (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
4848*0b57cec5SDimitry Andric                                  (int)(i))
4849*0b57cec5SDimitry Andric 
4850*0b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
4851*0b57cec5SDimitry Andric ///    floating-point vector of [4 x float].
4852*0b57cec5SDimitry Andric ///
4853*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4854*0b57cec5SDimitry Andric ///
4855*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
4856*0b57cec5SDimitry Andric ///
4857*0b57cec5SDimitry Andric /// \param __a
4858*0b57cec5SDimitry Andric ///    A 128-bit floating-point vector of [2 x double].
4859*0b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same
4860*0b57cec5SDimitry Andric ///    bitwise pattern as the parameter.
4861*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS
4862*0b57cec5SDimitry Andric _mm_castpd_ps(__m128d __a)
4863*0b57cec5SDimitry Andric {
4864*0b57cec5SDimitry Andric   return (__m128)__a;
4865*0b57cec5SDimitry Andric }
4866*0b57cec5SDimitry Andric 
4867*0b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
4868*0b57cec5SDimitry Andric ///    integer vector.
4869*0b57cec5SDimitry Andric ///
4870*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4871*0b57cec5SDimitry Andric ///
4872*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
4873*0b57cec5SDimitry Andric ///
4874*0b57cec5SDimitry Andric /// \param __a
4875*0b57cec5SDimitry Andric ///    A 128-bit floating-point vector of [2 x double].
4876*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the
4877*0b57cec5SDimitry Andric ///    parameter.
4878*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4879*0b57cec5SDimitry Andric _mm_castpd_si128(__m128d __a)
4880*0b57cec5SDimitry Andric {
4881*0b57cec5SDimitry Andric   return (__m128i)__a;
4882*0b57cec5SDimitry Andric }
4883*0b57cec5SDimitry Andric 
4884*0b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
4885*0b57cec5SDimitry Andric ///    floating-point vector of [2 x double].
4886*0b57cec5SDimitry Andric ///
4887*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4888*0b57cec5SDimitry Andric ///
4889*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
4890*0b57cec5SDimitry Andric ///
4891*0b57cec5SDimitry Andric /// \param __a
4892*0b57cec5SDimitry Andric ///    A 128-bit floating-point vector of [4 x float].
4893*0b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same
4894*0b57cec5SDimitry Andric ///    bitwise pattern as the parameter.
4895*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
4896*0b57cec5SDimitry Andric _mm_castps_pd(__m128 __a)
4897*0b57cec5SDimitry Andric {
4898*0b57cec5SDimitry Andric   return (__m128d)__a;
4899*0b57cec5SDimitry Andric }
4900*0b57cec5SDimitry Andric 
4901*0b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
4902*0b57cec5SDimitry Andric ///    integer vector.
4903*0b57cec5SDimitry Andric ///
4904*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4905*0b57cec5SDimitry Andric ///
4906*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
4907*0b57cec5SDimitry Andric ///
4908*0b57cec5SDimitry Andric /// \param __a
4909*0b57cec5SDimitry Andric ///    A 128-bit floating-point vector of [4 x float].
4910*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the
4911*0b57cec5SDimitry Andric ///    parameter.
4912*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4913*0b57cec5SDimitry Andric _mm_castps_si128(__m128 __a)
4914*0b57cec5SDimitry Andric {
4915*0b57cec5SDimitry Andric   return (__m128i)__a;
4916*0b57cec5SDimitry Andric }
4917*0b57cec5SDimitry Andric 
4918*0b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector
4919*0b57cec5SDimitry Andric ///    of [4 x float].
4920*0b57cec5SDimitry Andric ///
4921*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4922*0b57cec5SDimitry Andric ///
4923*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
4924*0b57cec5SDimitry Andric ///
4925*0b57cec5SDimitry Andric /// \param __a
4926*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
4927*0b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same
4928*0b57cec5SDimitry Andric ///    bitwise pattern as the parameter.
4929*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS
4930*0b57cec5SDimitry Andric _mm_castsi128_ps(__m128i __a)
4931*0b57cec5SDimitry Andric {
4932*0b57cec5SDimitry Andric   return (__m128)__a;
4933*0b57cec5SDimitry Andric }
4934*0b57cec5SDimitry Andric 
4935*0b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector
4936*0b57cec5SDimitry Andric ///    of [2 x double].
4937*0b57cec5SDimitry Andric ///
4938*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4939*0b57cec5SDimitry Andric ///
4940*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction.
4941*0b57cec5SDimitry Andric ///
4942*0b57cec5SDimitry Andric /// \param __a
4943*0b57cec5SDimitry Andric ///    A 128-bit integer vector.
4944*0b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same
4945*0b57cec5SDimitry Andric ///    bitwise pattern as the parameter.
4946*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
4947*0b57cec5SDimitry Andric _mm_castsi128_pd(__m128i __a)
4948*0b57cec5SDimitry Andric {
4949*0b57cec5SDimitry Andric   return (__m128d)__a;
4950*0b57cec5SDimitry Andric }
4951*0b57cec5SDimitry Andric 
4952*0b57cec5SDimitry Andric #if defined(__cplusplus)
4953*0b57cec5SDimitry Andric extern "C" {
4954*0b57cec5SDimitry Andric #endif
4955*0b57cec5SDimitry Andric 
4956*0b57cec5SDimitry Andric /// Indicates that a spin loop is being executed for the purposes of
4957*0b57cec5SDimitry Andric ///    optimizing power consumption during the loop.
4958*0b57cec5SDimitry Andric ///
4959*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4960*0b57cec5SDimitry Andric ///
4961*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PAUSE </c> instruction.
4962*0b57cec5SDimitry Andric ///
4963*0b57cec5SDimitry Andric void _mm_pause(void);
4964*0b57cec5SDimitry Andric 
4965*0b57cec5SDimitry Andric #if defined(__cplusplus)
4966*0b57cec5SDimitry Andric } // extern "C"
4967*0b57cec5SDimitry Andric #endif
4968*0b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS
4969*0b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS_MMX
4970*0b57cec5SDimitry Andric 
4971*0b57cec5SDimitry Andric #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
4972*0b57cec5SDimitry Andric 
4973*0b57cec5SDimitry Andric #define _MM_DENORMALS_ZERO_ON   (0x0040)
4974*0b57cec5SDimitry Andric #define _MM_DENORMALS_ZERO_OFF  (0x0000)
4975*0b57cec5SDimitry Andric 
4976*0b57cec5SDimitry Andric #define _MM_DENORMALS_ZERO_MASK (0x0040)
4977*0b57cec5SDimitry Andric 
4978*0b57cec5SDimitry Andric #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
4979*0b57cec5SDimitry Andric #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
4980*0b57cec5SDimitry Andric 
4981*0b57cec5SDimitry Andric #endif /* __EMMINTRIN_H */
4982