emmintrin.h - OpenGrok cross reference for /freebsd/contrib/llvm-project/clang/lib/Headers/emmintrin.h

Lines Matching +full:2 +full:- +full:bit
1 /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7  *===-----------------------------------------------------------------------===
54                  __target__("sse2,no-evex512"), __min_vector_width__(128)))
57                  __target__("mmx,sse2,no-evex512"), __min_vector_width__(64)))
59 /// Adds lower double-precision values in both operands and returns the
61 ///    are copied from the upper double-precision value of the first operand.
68 ///    A 128-bit vector of [2 x double] containing one of the source operands.
70 ///    A 128-bit vector of [2 x double] containing one of the source operands.
71 /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
80 /// Adds two 128-bit vectors of [2 x double].
87 ///    A 128-bit vector of [2 x double] containing one of the source operands.
89 ///    A 128-bit vector of [2 x double] containing one of the source operands.
90 /// \returns A 128-bit vector of [2 x double] containing the sums of both
97 /// Subtracts the lower double-precision value of the second operand
98 ///    from the lower double-precision value of the first operand and returns
100 ///    the result are copied from the upper double-precision value of the first
108 ///    A 128-bit vector of [2 x double] containing the minuend.
110 ///    A 128-bit vector of [2 x double] containing the subtrahend.
111 /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
116   __a[0] -= __b[0];  in _mm_sub_sd()
120 /// Subtracts two 128-bit vectors of [2 x double].
127 ///    A 128-bit vector of [2 x double] containing the minuend.
129 ///    A 128-bit vector of [2 x double] containing the subtrahend.
130 /// \returns A 128-bit vector of [2 x double] containing the differences between
134   return (__m128d)((__v2df)__a - (__v2df)__b);  in _mm_sub_pd()
137 /// Multiplies lower double-precision values in both operands and returns
139 ///    result are copied from the upper double-precision value of the first
147 ///    A 128-bit vector of [2 x double] containing one of the source operands.
149 ///    A 128-bit vector of [2 x double] containing one of the source operands.
150 /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
159 /// Multiplies two 128-bit vectors of [2 x double].
166 ///    A 128-bit vector of [2 x double] containing one of the operands.
168 ///    A 128-bit vector of [2 x double] containing one of the operands.
169 /// \returns A 128-bit vector of [2 x double] containing the products of both
176 /// Divides the lower double-precision value of the first operand by the
177 ///    lower double-precision value of the second operand and returns the
179 ///    result are copied from the upper double-precision value of the first
187 ///    A 128-bit vector of [2 x double] containing the dividend.
189 ///    A 128-bit vector of [2 x double] containing divisor.
190 /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
199 /// Performs an element-by-element division of two 128-bit vectors of
200 ///    [2 x double].
207 ///    A 128-bit vector of [2 x double] containing the dividend.
209 ///    A 128-bit vector of [2 x double] containing the divisor.
210 /// \returns A 128-bit vector of [2 x double] containing the quotients of both
217 /// Calculates the square root of the lower double-precision value of
220 ///    double-precision value of the first operand.
227 ///    A 128-bit vector of [2 x double] containing one of the operands. The
231 ///    A 128-bit vector of [2 x double] containing one of the operands. The
233 /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
243 ///    128-bit vector of [2 x double].
250 ///    A 128-bit vector of [2 x double].
251 /// \returns A 128-bit vector of [2 x double] containing the square roots of the
257 /// Compares lower 64-bit double-precision values of both operands, and
258 ///    returns the lesser of the pair of values in the lower 64-bits of the
260 ///    double-precision value of the first operand.
269 ///    A 128-bit vector of [2 x double] containing one of the operands. The
272 ///    A 128-bit vector of [2 x double] containing one of the operands. The
274 /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
282 /// Performs element-by-element comparison of the two 128-bit vectors of
283 ///    [2 x double] and returns a vector containing the lesser of each pair of
293 ///    A 128-bit vector of [2 x double] containing one of the operands.
295 ///    A 128-bit vector of [2 x double] containing one of the operands.
296 /// \returns A 128-bit vector of [2 x double] containing the minimum values
303 /// Compares lower 64-bit double-precision values of both operands, and
304 ///    returns the greater of the pair of values in the lower 64-bits of the
306 ///    double-precision value of the first operand.
315 ///    A 128-bit vector of [2 x double] containing one of the operands. The
318 ///    A 128-bit vector of [2 x double] containing one of the operands. The
320 /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
328 /// Performs element-by-element comparison of the two 128-bit vectors of
329 ///    [2 x double] and returns a vector containing the greater of each pair
339 ///    A 128-bit vector of [2 x double] containing one of the operands.
341 ///    A 128-bit vector of [2 x double] containing one of the operands.
342 /// \returns A 128-bit vector of [2 x double] containing the maximum values
349 /// Performs a bitwise AND of two 128-bit vectors of [2 x double].
356 ///    A 128-bit vector of [2 x double] containing one of the source operands.
358 ///    A 128-bit vector of [2 x double] containing one of the source operands.
359 /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the
366 /// Performs a bitwise AND of two 128-bit vectors of [2 x double], using
374 ///    A 128-bit vector of [2 x double] containing the left source operand. The
377 ///    A 128-bit vector of [2 x double] containing the right source operand.
378 /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the
386 /// Performs a bitwise OR of two 128-bit vectors of [2 x double].
393 ///    A 128-bit vector of [2 x double] containing one of the source operands.
395 ///    A 128-bit vector of [2 x double] containing one of the source operands.
396 /// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the
403 /// Performs a bitwise XOR of two 128-bit vectors of [2 x double].
410 ///    A 128-bit vector of [2 x double] containing one of the source operands.
412 ///    A 128-bit vector of [2 x double] containing one of the source operands.
413 /// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the
420 /// Compares each of the corresponding double-precision values of the
421 ///    128-bit vectors of [2 x double] for equality.
431 ///    A 128-bit vector of [2 x double].
433 ///    A 128-bit vector of [2 x double].
434 /// \returns A 128-bit vector containing the comparison results.
440 /// Compares each of the corresponding double-precision values of the
441 ///    128-bit vectors of [2 x double] to determine if the values in the first
452 ///    A 128-bit vector of [2 x double].
454 ///    A 128-bit vector of [2 x double].
455 /// \returns A 128-bit vector containing the comparison results.
461 /// Compares each of the corresponding double-precision values of the
462 ///    128-bit vectors of [2 x double] to determine if the values in the first
473 ///    A 128-bit vector of [2 x double].
475 ///    A 128-bit vector of [2 x double].
476 /// \returns A 128-bit vector containing the comparison results.
482 /// Compares each of the corresponding double-precision values of the
483 ///    128-bit vectors of [2 x double] to determine if the values in the first
494 ///    A 128-bit vector of [2 x double].
496 ///    A 128-bit vector of [2 x double].
497 /// \returns A 128-bit vector containing the comparison results.
503 /// Compares each of the corresponding double-precision values of the
504 ///    128-bit vectors of [2 x double] to determine if the values in the first
515 ///    A 128-bit vector of [2 x double].
517 ///    A 128-bit vector of [2 x double].
518 /// \returns A 128-bit vector containing the comparison results.
524 /// Compares each of the corresponding double-precision values of the
525 ///    128-bit vectors of [2 x double] to determine if the values in the first
528 ///    A pair of double-precision values are ordered with respect to each
537 ///    A 128-bit vector of [2 x double].
539 ///    A 128-bit vector of [2 x double].
540 /// \returns A 128-bit vector containing the comparison results.
546 /// Compares each of the corresponding double-precision values of the
547 ///    128-bit vectors of [2 x double] to determine if the values in the first
550 ///    A pair of double-precision values are unordered with respect to each
560 ///    A 128-bit vector of [2 x double].
562 ///    A 128-bit vector of [2 x double].
563 /// \returns A 128-bit vector containing the comparison results.
569 /// Compares each of the corresponding double-precision values of the
570 ///    128-bit vectors of [2 x double] to determine if the values in the first
581 ///    A 128-bit vector of [2 x double].
583 ///    A 128-bit vector of [2 x double].
584 /// \returns A 128-bit vector containing the comparison results.
590 /// Compares each of the corresponding double-precision values of the
591 ///    128-bit vectors of [2 x double] to determine if the values in the first
602 ///    A 128-bit vector of [2 x double].
604 ///    A 128-bit vector of [2 x double].
605 /// \returns A 128-bit vector containing the comparison results.
611 /// Compares each of the corresponding double-precision values of the
612 ///    128-bit vectors of [2 x double] to determine if the values in the first
623 ///    A 128-bit vector of [2 x double].
625 ///    A 128-bit vector of [2 x double].
626 /// \returns A 128-bit vector containing the comparison results.
632 /// Compares each of the corresponding double-precision values of the
633 ///    128-bit vectors of [2 x double] to determine if the values in the first
644 ///    A 128-bit vector of [2 x double].
646 ///    A 128-bit vector of [2 x double].
647 /// \returns A 128-bit vector containing the comparison results.
653 /// Compares each of the corresponding double-precision values of the
654 ///    128-bit vectors of [2 x double] to determine if the values in the first
665 ///    A 128-bit vector of [2 x double].
667 ///    A 128-bit vector of [2 x double].
668 /// \returns A 128-bit vector containing the comparison results.
674 /// Compares the lower double-precision floating-point values in each of
675 ///    the two 128-bit floating-point vectors of [2 x double] for equality.
685 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
686 ///    compared to the lower double-precision value of \a __b.
688 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
689 ///    compared to the lower double-precision value of \a __a.
690 /// \returns A 128-bit vector. The lower 64 bits contains the comparison
697 /// Compares the lower double-precision floating-point values in each of
698 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
710 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
711 ///    compared to the lower double-precision value of \a __b.
713 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
714 ///    compared to the lower double-precision value of \a __a.
715 /// \returns A 128-bit vector. The lower 64 bits contains the comparison
722 /// Compares the lower double-precision floating-point values in each of
723 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
735 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
736 ///    compared to the lower double-precision value of \a __b.
738 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
739 ///    compared to the lower double-precision value of \a __a.
740 /// \returns A 128-bit vector. The lower 64 bits contains the comparison
747 /// Compares the lower double-precision floating-point values in each of
748 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
760 ///     A 128-bit vector of [2 x double]. The lower double-precision value is
761 ///     compared to the lower double-precision value of \a __b.
763 ///     A 128-bit vector of [2 x double]. The lower double-precision value is
764 ///     compared to the lower double-precision value of \a __a.
765 /// \returns A 128-bit vector. The lower 64 bits contains the comparison
773 /// Compares the lower double-precision floating-point values in each of
774 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
786 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
787 ///    compared to the lower double-precision value of \a __b.
789 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
790 ///    compared to the lower double-precision value of \a __a.
791 /// \returns A 128-bit vector. The lower 64 bits contains the comparison
799 /// Compares the lower double-precision floating-point values in each of
800 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
805 ///    of double-precision values are ordered with respect to each other if
813 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
814 ///    compared to the lower double-precision value of \a __b.
816 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
817 ///    compared to the lower double-precision value of \a __a.
818 /// \returns A 128-bit vector. The lower 64 bits contains the comparison
825 /// Compares the lower double-precision floating-point values in each of
826 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
831 ///    of double-precision values are unordered with respect to each other if
840 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
841 ///    compared to the lower double-precision value of \a __b.
843 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
844 ///    compared to the lower double-precision value of \a __a.
845 /// \returns A 128-bit vector. The lower 64 bits contains the comparison
852 /// Compares the lower double-precision floating-point values in each of
853 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
865 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
866 ///    compared to the lower double-precision value of \a __b.
868 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
869 ///    compared to the lower double-precision value of \a __a.
870 /// \returns A 128-bit vector. The lower 64 bits contains the comparison
877 /// Compares the lower double-precision floating-point values in each of
878 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
890 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
891 ///    compared to the lower double-precision value of \a __b.
893 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
894 ///    compared to the lower double-precision value of \a __a.
895 /// \returns A 128-bit vector. The lower 64 bits contains the comparison
902 /// Compares the lower double-precision floating-point values in each of
903 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
915 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
916 ///    compared to the lower double-precision value of \a __b.
918 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
919 ///    compared to the lower double-precision value of \a __a.
920 /// \returns  A 128-bit vector. The lower 64 bits contains the comparison
927 /// Compares the lower double-precision floating-point values in each of
928 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
940 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
941 ///    compared to the lower double-precision value of \a __b.
943 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
944 ///    compared to the lower double-precision value of \a __a.
945 /// \returns A 128-bit vector. The lower 64 bits contains the comparison
953 /// Compares the lower double-precision floating-point values in each of
954 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
966 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
967 ///    compared to the lower double-precision value of \a __b.
969 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
970 ///    compared to the lower double-precision value of \a __a.
971 /// \returns A 128-bit vector. The lower 64 bits contains the comparison
979 /// Compares the lower double-precision floating-point values in each of
980 ///    the two 128-bit floating-point vectors of [2 x double] for equality.
990 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
991 ///    compared to the lower double-precision value of \a __b.
993 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
994 ///    compared to the lower double-precision value of \a __a.
1001 /// Compares the lower double-precision floating-point values in each of
1002 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1014 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1015 ///    compared to the lower double-precision value of \a __b.
1017 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1018 ///    compared to the lower double-precision value of \a __a.
1025 /// Compares the lower double-precision floating-point values in each of
1026 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1038 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1039 ///    compared to the lower double-precision value of \a __b.
1041 ///     A 128-bit vector of [2 x double]. The lower double-precision value is
1042 ///     compared to the lower double-precision value of \a __a.
1049 /// Compares the lower double-precision floating-point values in each of
1050 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1062 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1063 ///    compared to the lower double-precision value of \a __b.
1065 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1066 ///    compared to the lower double-precision value of \a __a.
1073 /// Compares the lower double-precision floating-point values in each of
1074 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1086 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1087 ///    compared to the lower double-precision value of \a __b.
1089 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1090 ///    compared to the lower double-precision value of \a __a.
1097 /// Compares the lower double-precision floating-point values in each of
1098 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1110 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1111 ///    compared to the lower double-precision value of \a __b.
1113 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1114 ///    compared to the lower double-precision value of \a __a.
1121 /// Compares the lower double-precision floating-point values in each of
1122 ///    the two 128-bit floating-point vectors of [2 x double] for equality.
1132 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1133 ///    compared to the lower double-precision value of \a __b.
1135 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1136 ///    compared to the lower double-precision value of \a __a.
1143 /// Compares the lower double-precision floating-point values in each of
1144 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1156 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1157 ///    compared to the lower double-precision value of \a __b.
1159 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1160 ///    compared to the lower double-precision value of \a __a.
1167 /// Compares the lower double-precision floating-point values in each of
1168 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1180 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1181 ///    compared to the lower double-precision value of \a __b.
1183 ///     A 128-bit vector of [2 x double]. The lower double-precision value is
1184 ///     compared to the lower double-precision value of \a __a.
1191 /// Compares the lower double-precision floating-point values in each of
1192 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1204 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1205 ///    compared to the lower double-precision value of \a __b.
1207 ///     A 128-bit vector of [2 x double]. The lower double-precision value is
1208 ///     compared to the lower double-precision value of \a __a.
1215 /// Compares the lower double-precision floating-point values in each of
1216 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1228 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1229 ///    compared to the lower double-precision value of \a __b.
1231 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1232 ///    compared to the lower double-precision value of \a __a.
1239 /// Compares the lower double-precision floating-point values in each of
1240 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
1252 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1253 ///    compared to the lower double-precision value of \a __b.
1255 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
1256 ///    compared to the lower double-precision value of \a __a.
1263 /// Converts the two double-precision floating-point elements of a
1264 ///    128-bit vector of [2 x double] into two single-precision floating-point
1265 ///    values, returned in the lower 64 bits of a 128-bit vector of [4 x float].
1273 ///    A 128-bit vector of [2 x double].
1274 /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
1280 /// Converts the lower two single-precision floating-point elements of a
1281 ///    128-bit vector of [4 x float] into two double-precision floating-point
1282 ///    values, returned in a 128-bit vector of [2 x double]. The upper two
1290 ///    A 128-bit vector of [4 x float]. The lower two single-precision
1291 ///    floating-point elements are converted to double-precision values. The
1293 /// \returns A 128-bit vector of [2 x double] containing the converted values.
1299 /// Converts the lower two integer elements of a 128-bit vector of
1300 ///    [4 x i32] into two double-precision floating-point values, returned in a
1301 ///    128-bit vector of [2 x double].
1310 ///    A 128-bit integer vector of [4 x i32]. The lower two integer elements are
1311 ///    converted to double-precision values.
1314 /// \returns A 128-bit vector of [2 x double] containing the converted values.
1320 /// Converts the two double-precision floating-point elements of a
1321 ///    128-bit vector of [2 x double] into two signed 32-bit integer values,
1322 ///    returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper
1325 ///    If a converted value does not fit in a 32-bit integer, raises a
1326 ///    floating-point invalid exception. If the exception is masked, returns
1334 ///    A 128-bit vector of [2 x double].
1335 /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the
1341 /// Converts the low-order element of a 128-bit vector of [2 x double]
1342 ///    into a 32-bit signed integer value.
1344 ///    If the converted value does not fit in a 32-bit integer, raises a
1345 ///    floating-point invalid exception. If the exception is masked, returns
1353 ///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
1355 /// \returns A 32-bit signed integer containing the converted value.
1360 /// Converts the lower double-precision floating-point element of a
1361 ///    128-bit vector of [2 x double], in the second parameter, into a
1362 ///    single-precision floating-point value, returned in the lower 32 bits of a
1363 ///    128-bit vector of [4 x float]. The upper 96 bits of the result vector are
1371 ///    A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are
1374 ///    A 128-bit vector of [2 x double]. The lower double-precision
1375 ///    floating-point element is used in the conversion.
1376 /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the
1384 /// Converts a 32-bit signed integer value, in the second parameter, into
1385 ///    a double-precision floating-point value, returned in the lower 64 bits of
1386 ///    a 128-bit vector of [2 x double]. The upper 64 bits of the result vector
1394 ///    A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are
1397 ///    A 32-bit signed integer containing the value to be converted.
1398 /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the
1407 /// Converts the lower single-precision floating-point element of a
1408 ///    128-bit vector of [4 x float], in the second parameter, into a
1409 ///    double-precision floating-point value, returned in the lower 64 bits of
1410 ///    a 128-bit vector of [2 x double]. The upper 64 bits of the result vector
1418 ///    A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are
1421 ///    A 128-bit vector of [4 x float]. The lower single-precision
1422 ///    floating-point element is used in the conversion.
1423 /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the
1432 /// Converts the two double-precision floating-point elements of a
1433 ///    128-bit vector of [2 x double] into two signed truncated (rounded
1434 ///    toward zero) 32-bit integer values, returned in the lower 64 bits
1435 ///    of a 128-bit vector of [4 x i32].
1437 ///    If a converted value does not fit in a 32-bit integer, raises a
1438 ///    floating-point invalid exception. If the exception is masked, returns
1447 ///    A 128-bit vector of [2 x double].
1448 /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the
1454 /// Converts the low-order element of a [2 x double] vector into a 32-bit
1457 ///    If the converted value does not fit in a 32-bit integer, raises a
1458 ///    floating-point invalid exception. If the exception is masked, returns
1467 ///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
1469 /// \returns A 32-bit signed integer containing the converted value.
1474 /// Converts the two double-precision floating-point elements of a
1475 ///    128-bit vector of [2 x double] into two signed 32-bit integer values,
1476 ///    returned in a 64-bit vector of [2 x i32].
1478 ///    If a converted value does not fit in a 32-bit integer, raises a
1479 ///    floating-point invalid exception. If the exception is masked, returns
1487 ///    A 128-bit vector of [2 x double].
1488 /// \returns A 64-bit vector of [2 x i32] containing the converted values.
1493 /// Converts the two double-precision floating-point elements of a
1494 ///    128-bit vector of [2 x double] into two signed truncated (rounded toward
1495 ///    zero) 32-bit integer values, returned in a 64-bit vector of [2 x i32].
1497 ///    If a converted value does not fit in a 32-bit integer, raises a
1498 ///    floating-point invalid exception. If the exception is masked, returns
1506 ///    A 128-bit vector of [2 x double].
1507 /// \returns A 64-bit vector of [2 x i32] containing the converted values.
1512 /// Converts the two signed 32-bit integer elements of a 64-bit vector of
1513 ///    [2 x i32] into two double-precision floating-point values, returned in a
1514 ///    128-bit vector of [2 x double].
1521 ///    A 64-bit vector of [2 x i32].
1522 /// \returns A 128-bit vector of [2 x double] containing the converted values.
1527 /// Returns the low-order element of a 128-bit vector of [2 x double] as
1528 ///    a double-precision floating-point value.
1535 ///    A 128-bit vector of [2 x double]. The lower 64 bits are returned.
1536 /// \returns A double-precision floating-point value copied from the lower 64
1542 /// Loads a 128-bit floating-point vector of [2 x double] from an aligned
1550 ///    A pointer to a 128-bit memory location. The address of the memory
1551 ///    location has to be 16-byte aligned.
1552 /// \returns A 128-bit vector of [2 x double] containing the loaded values.
1557 /// Loads a double-precision floating-point value from a specified memory
1558 ///    location and duplicates it to both vector elements of a 128-bit vector of
1559 ///    [2 x double].
1566 ///    A pointer to a memory location containing a double-precision value.
1567 /// \returns A 128-bit vector of [2 x double] containing the loaded and
1573   double __u = ((const struct __mm_load1_pd_struct *)__dp)->__u;  in _mm_load1_pd()
1579 /// Loads two double-precision values, in reverse order, from an aligned
1580 ///    memory location into a 128-bit vector of [2 x double].
1589 ///    A 16-byte aligned pointer to an array of double-precision values to be
1591 /// \returns A 128-bit vector of [2 x double] containing the reversed loaded
1598 /// Loads a 128-bit floating-point vector of [2 x double] from an
1606 ///    A pointer to a 128-bit memory location. The address of the memory
1608 /// \returns A 128-bit vector of [2 x double] containing the loaded values.
1613   return ((const struct __loadu_pd *)__dp)->__v;  in _mm_loadu_pd()
1616 /// Loads a 64-bit integer value to the low element of a 128-bit integer
1624 ///    A pointer to a 64-bit memory location. The address of the memory
1626 /// \returns A 128-bit vector of [2 x i64] containing the loaded value.
1631   long long __u = ((const struct __loadu_si64 *)__a)->__v;  in _mm_loadu_si64()
1635 /// Loads a 32-bit integer value to the low element of a 128-bit integer
1643 ///    A pointer to a 32-bit memory location. The address of the memory
1645 /// \returns A 128-bit vector of [4 x i32] containing the loaded value.
1650   int __u = ((const struct __loadu_si32 *)__a)->__v;  in _mm_loadu_si32()
1654 /// Loads a 16-bit integer value to the low element of a 128-bit integer
1662 ///    A pointer to a 16-bit memory location. The address of the memory
1664 /// \returns A 128-bit vector of [8 x i16] containing the loaded value.
1669   short __u = ((const struct __loadu_si16 *)__a)->__v;  in _mm_loadu_si16()
1673 /// Loads a 64-bit double-precision value to the low element of a
1674 ///    128-bit integer vector and clears the upper element.
1681 ///    A pointer to a memory location containing a double-precision value.
1683 /// \returns A 128-bit vector of [2 x double] containing the loaded value.
1688   double __u = ((const struct __mm_load_sd_struct *)__dp)->__u;  in _mm_load_sd()
1692 /// Loads a double-precision value into the high-order bits of a 128-bit
1693 ///    vector of [2 x double]. The low-order bits are copied from the low-order
1701 ///    A 128-bit vector of [2 x double]. \n
1704 ///    A pointer to a 64-bit memory location containing a double-precision
1705 ///    floating-point value that is loaded. The loaded value is written to bits
1708 /// \returns A 128-bit vector of [2 x double] containing the moved values.
1714   double __u = ((const struct __mm_loadh_pd_struct *)__dp)->__u;  in _mm_loadh_pd()
1718 /// Loads a double-precision value into the low-order bits of a 128-bit
1719 ///    vector of [2 x double]. The high-order bits are copied from the
1720 ///    high-order bits of the first operand.
1727 ///    A 128-bit vector of [2 x double]. \n
1730 ///    A pointer to a 64-bit memory location containing a double-precision
1731 ///    floating-point value that is loaded. The loaded value is written to bits
1734 /// \returns A 128-bit vector of [2 x double] containing the moved values.
1740   double __u = ((const struct __mm_loadl_pd_struct *)__dp)->__u;  in _mm_loadl_pd()
1744 /// Constructs a 128-bit floating-point vector of [2 x double] with
1753 /// \returns A 128-bit floating-point vector of [2 x double] with unspecified
1759 /// Constructs a 128-bit floating-point vector of [2 x double]. The lower
1760 ///    64 bits of the vector are initialized with the specified double-precision
1761 ///    floating-point value. The upper 64 bits are set to zero.
1768 ///    A double-precision floating-point value used to initialize the lower 64
1770 /// \returns An initialized 128-bit floating-point vector of [2 x double]. The
1777 /// Constructs a 128-bit floating-point vector of [2 x double], with each
1778 ///    of the two double-precision floating-point vector elements set to the
1779 ///    specified double-precision floating-point value.
1786 ///    A double-precision floating-point value used to initialize each vector
1788 /// \returns An initialized 128-bit floating-point vector of [2 x double].
1793 /// Constructs a 128-bit floating-point vector of [2 x double], with each
1794 ///    of the two double-precision floating-point vector elements set to the
1795 ///    specified double-precision floating-point value.
1802 ///    A double-precision floating-point value used to initialize each vector
1804 /// \returns An initialized 128-bit floating-point vector of [2 x double].
1809 /// Constructs a 128-bit floating-point vector of [2 x double]
1810 ///    initialized with the specified double-precision floating-point values.
1817 ///    A double-precision floating-point value used to initialize the upper 64
1820 ///    A double-precision floating-point value used to initialize the lower 64
1822 /// \returns An initialized 128-bit floating-point vector of [2 x double].
1828 /// Constructs a 128-bit floating-point vector of [2 x double],
1829 ///    initialized in reverse order with the specified double-precision
1830 ///    floating-point values.
1837 ///    A double-precision floating-point value used to initialize the lower 64
1840 ///    A double-precision floating-point value used to initialize the upper 64
1842 /// \returns An initialized 128-bit floating-point vector of [2 x double].
1848 /// Constructs a 128-bit floating-point vector of [2 x double]
1855 /// \returns An initialized 128-bit floating-point vector of [2 x double] with
1861 /// Constructs a 128-bit floating-point vector of [2 x double]. The lower
1870 ///    A 128-bit vector of [2 x double]. The upper 64 bits are written to the
1873 ///    A 128-bit vector of [2 x double]. The lower 64 bits are written to the
1875 /// \returns A 128-bit vector of [2 x double] containing the moved values.
1882 /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
1890 ///    A pointer to a 64-bit memory location.
1892 ///    A 128-bit vector of [2 x double] containing the value to be stored.
1898   ((struct __mm_store_sd_struct *)__dp)->__u = __a[0];  in _mm_store_sd()
1901 /// Moves packed double-precision values from a 128-bit vector of
1902 ///    [2 x double] to a memory location.
1910 ///    double-precision values.
1912 ///    A packed 128-bit vector of [2 x double] containing the values to be
1919 /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
1928 ///    A pointer to a memory location that can store two double-precision
1931 ///    A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
1939 /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
1948 ///    A pointer to a memory location that can store two double-precision
1951 ///    A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
1958 /// Stores a 128-bit vector of [2 x double] into an unaligned memory
1966 ///    A pointer to a 128-bit memory location. The address of the memory
1969 ///    A 128-bit vector of [2 x double] containing the values to be stored.
1975   ((struct __storeu_pd *)__dp)->__v = __a;  in _mm_storeu_pd()
1978 /// Stores two double-precision values, in reverse order, from a 128-bit
1979 ///    vector of [2 x double] to a 16-byte aligned memory location.
1987 ///    A pointer to a 16-byte aligned memory location that can store two
1988 ///    double-precision values.
1990 ///    A 128-bit vector of [2 x double] containing the values to be reversed and
1998 /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a
2006 ///    A pointer to a 64-bit memory location.
2008 ///    A 128-bit vector of [2 x double] containing the value to be stored.
2014   ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[1];  in _mm_storeh_pd()
2017 /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
2025 ///    A pointer to a 64-bit memory location.
2027 ///    A 128-bit vector of [2 x double] containing the value to be stored.
2033   ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[0];  in _mm_storel_pd()
2036 /// Adds the corresponding elements of two 128-bit vectors of [16 x i8],
2038 ///    128-bit result vector of [16 x i8].
2047 ///    A 128-bit vector of [16 x i8].
2049 ///    A 128-bit vector of [16 x i8].
2050 /// \returns A 128-bit vector of [16 x i8] containing the sums of both
2057 /// Adds the corresponding elements of two 128-bit vectors of [8 x i16],
2059 ///    128-bit result vector of [8 x i16].
2068 ///    A 128-bit vector of [8 x i16].
2070 ///    A 128-bit vector of [8 x i16].
2071 /// \returns A 128-bit vector of [8 x i16] containing the sums of both
2078 /// Adds the corresponding elements of two 128-bit vectors of [4 x i32],
2080 ///    128-bit result vector of [4 x i32].
2089 ///    A 128-bit vector of [4 x i32].
2091 ///    A 128-bit vector of [4 x i32].
2092 /// \returns A 128-bit vector of [4 x i32] containing the sums of both
2099 /// Adds two signed or unsigned 64-bit integer values, returning the
2107 ///    A 64-bit integer.
2109 ///    A 64-bit integer.
2110 /// \returns A 64-bit integer containing the sum of both parameters.
2116 /// Adds the corresponding elements of two 128-bit vectors of [2 x i64],
2118 ///    128-bit result vector of [2 x i64].
2127 ///    A 128-bit vector of [2 x i64].
2129 ///    A 128-bit vector of [2 x i64].
2130 /// \returns A 128-bit vector of [2 x i64] containing the sums of both
2137 /// Adds, with saturation, the corresponding elements of two 128-bit
2139 ///    of a 128-bit result vector of [16 x i8].
2149 ///    A 128-bit signed [16 x i8] vector.
2151 ///    A 128-bit signed [16 x i8] vector.
2152 /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of
2159 /// Adds, with saturation, the corresponding elements of two 128-bit
2161 ///    of a 128-bit result vector of [8 x i16].
2171 ///    A 128-bit signed [8 x i16] vector.
2173 ///    A 128-bit signed [8 x i16] vector.
2174 /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of
2181 /// Adds, with saturation, the corresponding elements of two 128-bit
2183 ///    of a 128-bit result vector of [16 x i8].
2193 ///    A 128-bit unsigned [16 x i8] vector.
2195 ///    A 128-bit unsigned [16 x i8] vector.
2196 /// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums
2203 /// Adds, with saturation, the corresponding elements of two 128-bit
2205 ///    of a 128-bit result vector of [8 x i16].
2215 ///    A 128-bit unsigned [8 x i16] vector.
2217 ///    A 128-bit unsigned [8 x i16] vector.
2218 /// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums
2226 ///    128-bit unsigned [16 x i8] vectors, saving each result in the
2227 ///    corresponding element of a 128-bit result vector of [16 x i8].
2234 ///    A 128-bit unsigned [16 x i8] vector.
2236 ///    A 128-bit unsigned [16 x i8] vector.
2237 /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded
2245 ///    128-bit unsigned [8 x i16] vectors, saving each result in the
2246 ///    corresponding element of a 128-bit result vector of [8 x i16].
2253 ///    A 128-bit unsigned [8 x i16] vector.
2255 ///    A 128-bit unsigned [8 x i16] vector.
2256 /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded
2263 /// Multiplies the corresponding elements of two 128-bit signed [8 x i16]
2264 ///    vectors, producing eight intermediate 32-bit signed integer products, and
2265 ///    adds the consecutive pairs of 32-bit products to form a 128-bit signed
2269 ///    32-bit product, bits [31:16] of both parameters are multiplied producing
2270 ///    a 32-bit product, and the sum of those two products becomes bits [31:0]
2278 ///    A 128-bit signed [8 x i16] vector.
2280 ///    A 128-bit signed [8 x i16] vector.
2281 /// \returns A 128-bit signed [4 x i32] vector containing the sums of products
2288 /// Compares corresponding elements of two 128-bit signed [8 x i16]
2290 ///    corresponding element of a 128-bit result vector of [8 x i16].
2297 ///    A 128-bit signed [8 x i16] vector.
2299 ///    A 128-bit signed [8 x i16] vector.
2300 /// \returns A 128-bit signed [8 x i16] vector containing the greater value of
2307 /// Compares corresponding elements of two 128-bit unsigned [16 x i8]
2309 ///    corresponding element of a 128-bit result vector of [16 x i8].
2316 ///    A 128-bit unsigned [16 x i8] vector.
2318 ///    A 128-bit unsigned [16 x i8] vector.
2319 /// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of
2326 /// Compares corresponding elements of two 128-bit signed [8 x i16]
2328 ///    corresponding element of a 128-bit result vector of [8 x i16].
2335 ///    A 128-bit signed [8 x i16] vector.
2337 ///    A 128-bit signed [8 x i16] vector.
2338 /// \returns A 128-bit signed [8 x i16] vector containing the smaller value of
2345 /// Compares corresponding elements of two 128-bit unsigned [16 x i8]
2347 ///    corresponding element of a 128-bit result vector of [16 x i8].
2354 ///    A 128-bit unsigned [16 x i8] vector.
2356 ///    A 128-bit unsigned [16 x i8] vector.
2357 /// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of
2365 ///    vectors, saving the upper 16 bits of each 32-bit product in the
2366 ///    corresponding element of a 128-bit signed [8 x i16] result vector.
2373 ///    A 128-bit signed [8 x i16] vector.
2375 ///    A 128-bit signed [8 x i16] vector.
2376 /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of
2377 ///    each of the eight 32-bit products.
2384 ///    vectors, saving the upper 16 bits of each 32-bit product in the
2385 ///    corresponding element of a 128-bit unsigned [8 x i16] result vector.
2392 ///    A 128-bit unsigned [8 x i16] vector.
2394 ///    A 128-bit unsigned [8 x i16] vector.
2395 /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits
2396 ///    of each of the eight 32-bit products.
2403 ///    vectors, saving the lower 16 bits of each 32-bit product in the
2404 ///    corresponding element of a 128-bit signed [8 x i16] result vector.
2411 ///    A 128-bit signed [8 x i16] vector.
2413 ///    A 128-bit signed [8 x i16] vector.
2414 /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of
2415 ///    each of the eight 32-bit products.
2421 /// Multiplies 32-bit unsigned integer values contained in the lower bits
2422 ///    of the two 64-bit integer vectors and returns the 64-bit unsigned
2430 ///    A 64-bit integer containing one of the source operands.
2432 ///    A 64-bit integer containing one of the source operands.
2433 /// \returns A 64-bit integer vector containing the product of both operands.
2439 /// Multiplies 32-bit unsigned integer values contained in the lower
2440 ///    bits of the corresponding elements of two [2 x i64] vectors, and returns
2441 ///    the 64-bit products in the corresponding elements of a [2 x i64] vector.
2448 ///    A [2 x i64] vector containing one of the source operands.
2450 ///    A [2 x i64] vector containing one of the source operands.
2451 /// \returns A [2 x i64] vector containing the product of both operands.
2457 /// Computes the absolute differences of corresponding 8-bit integer
2458 ///    values in two 128-bit vectors. Sums the first 8 absolute differences, and
2460 ///    unsigned 16-bit integer sums into the upper and lower elements of a
2461 ///    [2 x i64] vector.
2468 ///    A 128-bit integer vector containing one of the source operands.
2470 ///    A 128-bit integer vector containing one of the source operands.
2471 /// \returns A [2 x i64] vector containing the sums of the sets of absolute
2478 /// Subtracts the corresponding 8-bit integer values in the operands.
2485 ///    A 128-bit integer vector containing the minuends.
2487 ///    A 128-bit integer vector containing the subtrahends.
2488 /// \returns A 128-bit integer vector containing the differences of the values
2492   return (__m128i)((__v16qu)__a - (__v16qu)__b);  in _mm_sub_epi8()
2495 /// Subtracts the corresponding 16-bit integer values in the operands.
2502 ///    A 128-bit integer vector containing the minuends.
2504 ///    A 128-bit integer vector containing the subtrahends.
2505 /// \returns A 128-bit integer vector containing the differences of the values
2509   return (__m128i)((__v8hu)__a - (__v8hu)__b);  in _mm_sub_epi16()
2512 /// Subtracts the corresponding 32-bit integer values in the operands.
2519 ///    A 128-bit integer vector containing the minuends.
2521 ///    A 128-bit integer vector containing the subtrahends.
2522 /// \returns A 128-bit integer vector containing the differences of the values
2526   return (__m128i)((__v4su)__a - (__v4su)__b);  in _mm_sub_epi32()
2529 /// Subtracts signed or unsigned 64-bit integer values and writes the
2537 ///    A 64-bit integer vector containing the minuend.
2539 ///    A 64-bit integer vector containing the subtrahend.
2540 /// \returns A 64-bit integer vector containing the difference of the values in
2547 /// Subtracts the corresponding elements of two [2 x i64] vectors.
2554 ///    A 128-bit integer vector containing the minuends.
2556 ///    A 128-bit integer vector containing the subtrahends.
2557 /// \returns A 128-bit integer vector containing the differences of the values
2561   return (__m128i)((__v2du)__a - (__v2du)__b);  in _mm_sub_epi64()
2564 /// Subtracts, with saturation, corresponding 8-bit signed integer values in
2576 ///    A 128-bit integer vector containing the minuends.
2578 ///    A 128-bit integer vector containing the subtrahends.
2579 /// \returns A 128-bit integer vector containing the differences of the values
2586 /// Subtracts, with saturation, corresponding 16-bit signed integer values in
2598 ///    A 128-bit integer vector containing the minuends.
2600 ///    A 128-bit integer vector containing the subtrahends.
2601 /// \returns A 128-bit integer vector containing the differences of the values
2608 /// Subtracts, with saturation, corresponding 8-bit unsigned integer values in
2619 ///    A 128-bit integer vector containing the minuends.
2621 ///    A 128-bit integer vector containing the subtrahends.
2622 /// \returns A 128-bit integer vector containing the unsigned integer
2629 /// Subtracts, with saturation, corresponding 16-bit unsigned integer values in
2640 ///    A 128-bit integer vector containing the minuends.
2642 ///    A 128-bit integer vector containing the subtrahends.
2643 /// \returns A 128-bit integer vector containing the unsigned integer
2650 /// Performs a bitwise AND of two 128-bit integer vectors.
2657 ///    A 128-bit integer vector containing one of the source operands.
2659 ///    A 128-bit integer vector containing one of the source operands.
2660 /// \returns A 128-bit integer vector containing the bitwise AND of the values
2667 /// Performs a bitwise AND of two 128-bit integer vectors, using the
2675 ///    A 128-bit vector containing the left source operand. The one's complement
2678 ///    A 128-bit vector containing the right source operand.
2679 /// \returns A 128-bit integer vector containing the bitwise AND of the one's
2685 /// Performs a bitwise OR of two 128-bit integer vectors.
2692 ///    A 128-bit integer vector containing one of the source operands.
2694 ///    A 128-bit integer vector containing one of the source operands.
2695 /// \returns A 128-bit integer vector containing the bitwise OR of the values
2702 /// Performs a bitwise exclusive OR of two 128-bit integer vectors.
2709 ///    A 128-bit integer vector containing one of the source operands.
2711 ///    A 128-bit integer vector containing one of the source operands.
2712 /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the
2719 /// Left-shifts the 128-bit integer vector operand by the specified
2720 ///    number of bytes. Low-order bits are cleared.
2731 ///    A 128-bit integer vector containing the source operand.
2733 ///    An immediate value specifying the number of bytes to left-shift operand
2735 /// \returns A 128-bit integer vector containing the left-shifted value.
2744 /// Left-shifts each 16-bit value in the 128-bit integer vector operand
2745 ///    by the specified number of bits. Low-order bits are cleared.
2752 ///    A 128-bit integer vector containing the source operand.
2754 ///    An integer value specifying the number of bits to left-shift each value
2756 /// \returns A 128-bit integer vector containing the left-shifted values.
2762 /// Left-shifts each 16-bit value in the 128-bit integer vector operand
2763 ///    by the specified number of bits. Low-order bits are cleared.
2770 ///    A 128-bit integer vector containing the source operand.
2772 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2773 ///    to left-shift each value in operand \a __a.
2774 /// \returns A 128-bit integer vector containing the left-shifted values.
2780 /// Left-shifts each 32-bit value in the 128-bit integer vector operand
2781 ///    by the specified number of bits. Low-order bits are cleared.
2788 ///    A 128-bit integer vector containing the source operand.
2790 ///    An integer value specifying the number of bits to left-shift each value
2792 /// \returns A 128-bit integer vector containing the left-shifted values.
2798 /// Left-shifts each 32-bit value in the 128-bit integer vector operand
2799 ///    by the specified number of bits. Low-order bits are cleared.
2806 ///    A 128-bit integer vector containing the source operand.
2808 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2809 ///    to left-shift each value in operand \a __a.
2810 /// \returns A 128-bit integer vector containing the left-shifted values.
2816 /// Left-shifts each 64-bit value in the 128-bit integer vector operand
2817 ///    by the specified number of bits. Low-order bits are cleared.
2824 ///    A 128-bit integer vector containing the source operand.
2826 ///    An integer value specifying the number of bits to left-shift each value
2828 /// \returns A 128-bit integer vector containing the left-shifted values.
2834 /// Left-shifts each 64-bit value in the 128-bit integer vector operand
2835 ///    by the specified number of bits. Low-order bits are cleared.
2842 ///    A 128-bit integer vector containing the source operand.
2844 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2845 ///    to left-shift each value in operand \a __a.
2846 /// \returns A 128-bit integer vector containing the left-shifted values.
2852 /// Right-shifts each 16-bit value in the 128-bit integer vector operand
2853 ///    by the specified number of bits. High-order bits are filled with the sign
2854 ///    bit of the initial value.
2861 ///    A 128-bit integer vector containing the source operand.
2863 ///    An integer value specifying the number of bits to right-shift each value
2865 /// \returns A 128-bit integer vector containing the right-shifted values.
2871 /// Right-shifts each 16-bit value in the 128-bit integer vector operand
2872 ///    by the specified number of bits. High-order bits are filled with the sign
2873 ///    bit of the initial value.
2880 ///    A 128-bit integer vector containing the source operand.
2882 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2883 ///    to right-shift each value in operand \a __a.
2884 /// \returns A 128-bit integer vector containing the right-shifted values.
2890 /// Right-shifts each 32-bit value in the 128-bit integer vector operand
2891 ///    by the specified number of bits. High-order bits are filled with the sign
2892 ///    bit of the initial value.
2899 ///    A 128-bit integer vector containing the source operand.
2901 ///    An integer value specifying the number of bits to right-shift each value
2903 /// \returns A 128-bit integer vector containing the right-shifted values.
2909 /// Right-shifts each 32-bit value in the 128-bit integer vector operand
2910 ///    by the specified number of bits. High-order bits are filled with the sign
2911 ///    bit of the initial value.
2918 ///    A 128-bit integer vector containing the source operand.
2920 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2921 ///    to right-shift each value in operand \a __a.
2922 /// \returns A 128-bit integer vector containing the right-shifted values.
2928 /// Right-shifts the 128-bit integer vector operand by the specified
2929 ///    number of bytes. High-order bits are cleared.
2940 ///    A 128-bit integer vector containing the source operand.
2942 ///    An immediate value specifying the number of bytes to right-shift operand
2944 /// \returns A 128-bit integer vector containing the right-shifted value.
2953 /// Right-shifts each of 16-bit values in the 128-bit integer vector
2954 ///    operand by the specified number of bits. High-order bits are cleared.
2961 ///    A 128-bit integer vector containing the source operand.
2963 ///    An integer value specifying the number of bits to right-shift each value
2965 /// \returns A 128-bit integer vector containing the right-shifted values.
2971 /// Right-shifts each of 16-bit values in the 128-bit integer vector
2972 ///    operand by the specified number of bits. High-order bits are cleared.
2979 ///    A 128-bit integer vector containing the source operand.
2981 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2982 ///    to right-shift each value in operand \a __a.
2983 /// \returns A 128-bit integer vector containing the right-shifted values.
2989 /// Right-shifts each of 32-bit values in the 128-bit integer vector
2990 ///    operand by the specified number of bits. High-order bits are cleared.
2997 ///    A 128-bit integer vector containing the source operand.
2999 ///    An integer value specifying the number of bits to right-shift each value
3001 /// \returns A 128-bit integer vector containing the right-shifted values.
3007 /// Right-shifts each of 32-bit values in the 128-bit integer vector
3008 ///    operand by the specified number of bits. High-order bits are cleared.
3015 ///    A 128-bit integer vector containing the source operand.
3017 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
3018 ///    to right-shift each value in operand \a __a.
3019 /// \returns A 128-bit integer vector containing the right-shifted values.
3025 /// Right-shifts each of 64-bit values in the 128-bit integer vector
3026 ///    operand by the specified number of bits. High-order bits are cleared.
3033 ///    A 128-bit integer vector containing the source operand.
3035 ///    An integer value specifying the number of bits to right-shift each value
3037 /// \returns A 128-bit integer vector containing the right-shifted values.
3043 /// Right-shifts each of 64-bit values in the 128-bit integer vector
3044 ///    operand by the specified number of bits. High-order bits are cleared.
3051 ///    A 128-bit integer vector containing the source operand.
3053 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
3054 ///    to right-shift each value in operand \a __a.
3055 /// \returns A 128-bit integer vector containing the right-shifted values.
3061 /// Compares each of the corresponding 8-bit values of the 128-bit
3071 ///    A 128-bit integer vector.
3073 ///    A 128-bit integer vector.
3074 /// \returns A 128-bit integer vector containing the comparison results.
3080 /// Compares each of the corresponding 16-bit values of the 128-bit
3090 ///    A 128-bit integer vector.
3092 ///    A 128-bit integer vector.
3093 /// \returns A 128-bit integer vector containing the comparison results.
3099 /// Compares each of the corresponding 32-bit values of the 128-bit
3109 ///    A 128-bit integer vector.
3111 ///    A 128-bit integer vector.
3112 /// \returns A 128-bit integer vector containing the comparison results.
3118 /// Compares each of the corresponding signed 8-bit values of the 128-bit
3129 ///    A 128-bit integer vector.
3131 ///    A 128-bit integer vector.
3132 /// \returns A 128-bit integer vector containing the comparison results.
3140 /// Compares each of the corresponding signed 16-bit values of the
3141 ///    128-bit integer vectors to determine if the values in the first operand
3151 ///    A 128-bit integer vector.
3153 ///    A 128-bit integer vector.
3154 /// \returns A 128-bit integer vector containing the comparison results.
3160 /// Compares each of the corresponding signed 32-bit values of the
3161 ///    128-bit integer vectors to determine if the values in the first operand
3171 ///    A 128-bit integer vector.
3173 ///    A 128-bit integer vector.
3174 /// \returns A 128-bit integer vector containing the comparison results.
3180 /// Compares each of the corresponding signed 8-bit values of the 128-bit
3191 ///    A 128-bit integer vector.
3193 ///    A 128-bit integer vector.
3194 /// \returns A 128-bit integer vector containing the comparison results.
3200 /// Compares each of the corresponding signed 16-bit values of the
3201 ///    128-bit integer vectors to determine if the values in the first operand
3211 ///    A 128-bit integer vector.
3213 ///    A 128-bit integer vector.
3214 /// \returns A 128-bit integer vector containing the comparison results.
3220 /// Compares each of the corresponding signed 32-bit values of the
3221 ///    128-bit integer vectors to determine if the values in the first operand
3231 ///    A 128-bit integer vector.
3233 ///    A 128-bit integer vector.
3234 /// \returns A 128-bit integer vector containing the comparison results.
3241 /// Converts a 64-bit signed integer value from the second operand into a
3242 ///    double-precision value and returns it in the lower element of a [2 x
3251 ///    A 128-bit vector of [2 x double]. The upper 64 bits of this operand are
3254 ///    A 64-bit signed integer operand containing the value to be converted.
3255 /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
3264 /// Converts the first (lower) element of a vector of [2 x double] into a
3265 ///    64-bit signed integer value.
3267 ///    If the converted value does not fit in a 64-bit integer, raises a
3268 ///    floating-point invalid exception. If the exception is masked, returns
3276 ///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
3278 /// \returns A 64-bit signed integer containing the converted value.
3283 /// Converts the first (lower) element of a vector of [2 x double] into a
3284 ///    64-bit signed truncated (rounded toward zero) integer value.
3286 ///    If a converted value does not fit in a 64-bit integer, raises a
3287 ///    floating-point invalid exception. If the exception is masked, returns
3296 ///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
3298 /// \returns A 64-bit signed integer containing the converted value.
3311 ///    A 128-bit integer vector.
3312 /// \returns A 128-bit vector of [4 x float] containing the converted values.
3319 ///    If a converted value does not fit in a 32-bit integer, raises a
3320 ///    floating-point invalid exception. If the exception is masked, returns
3328 ///    A 128-bit vector of [4 x float].
3329 /// \returns A 128-bit integer vector of [4 x i32] containing the converted
3336 ///    zero) 32-bit integers, returned in a vector of [4 x i32].
3338 ///    If a converted value does not fit in a 32-bit integer, raises a
3339 ///    floating-point invalid exception. If the exception is masked, returns
3348 ///    A 128-bit vector of [4 x float].
3349 /// \returns A 128-bit vector of [4 x i32] containing the converted values.
3362 ///    A 32-bit signed integer operand.
3363 /// \returns A 128-bit vector of [4 x i32].
3368 /// Returns a vector of [2 x i64] where the lower element is the input
3374 /// in 64-bit mode.
3377 ///    A 64-bit signed integer operand containing the value to be converted.
3378 /// \returns A 128-bit vector of [2 x i64] containing the converted value.
3384 ///    32-bit signed integer value.
3393 /// \returns A 32-bit signed integer containing the moved value.
3399 /// Moves the least significant 64 bits of a vector of [2 x i64] to a
3400 ///    64-bit signed integer value.
3407 ///    A vector of [2 x i64]. The least significant 64 bits are moved to the
3409 /// \returns A 64-bit signed integer containing the moved value.
3414 /// Moves packed integer values from an aligned 128-bit memory location
3415 ///    to elements in a 128-bit integer vector.
3423 /// \returns A 128-bit integer vector containing the moved values.
3429 /// Moves packed integer values from an unaligned 128-bit memory location
3430 ///    to elements in a 128-bit integer vector.
3438 /// \returns A 128-bit integer vector containing the moved values.
3444   return ((const struct __loadu_si128 *)__p)->__v;  in _mm_loadu_si128()
3447 /// Returns a vector of [2 x i64] where the lower element is taken from
3455 ///    A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of
3457 /// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the
3465       ((const struct __mm_loadl_epi64_struct *)__p)->__u, 0};  in _mm_loadl_epi64()
3468 /// Generates a 128-bit vector of [4 x i32] with unspecified content.
3476 /// \returns A 128-bit vector of [4 x i32] with unspecified content.
3481 /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
3482 ///    the specified 64-bit integer values.
3490 ///    A 64-bit integer value used to initialize the upper 64 bits of the
3491 ///    destination vector of [2 x i64].
3493 ///    A 64-bit integer value used to initialize the lower 64 bits of the
3494 ///    destination vector of [2 x i64].
3495 /// \returns An initialized 128-bit vector of [2 x i64] containing the values
3502 /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
3503 ///    the specified 64-bit integer values.
3511 ///    A 64-bit integer value used to initialize the upper 64 bits of the
3512 ///    destination vector of [2 x i64].
3514 ///    A 64-bit integer value used to initialize the lower 64 bits of the
3515 ///    destination vector of [2 x i64].
3516 /// \returns An initialized 128-bit vector of [2 x i64] containing the values
3523 /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with
3524 ///    the specified 32-bit integer values.
3532 ///    A 32-bit integer value used to initialize bits [127:96] of the
3535 ///    A 32-bit integer value used to initialize bits [95:64] of the destination
3538 ///    A 32-bit integer value used to initialize bits [63:32] of the destination
3541 ///    A 32-bit integer value used to initialize bits [31:0] of the destination
3543 /// \returns An initialized 128-bit vector of [4 x i32] containing the values
3550 /// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with
3551 ///    the specified 16-bit integer values.
3559 ///    A 16-bit integer value used to initialize bits [127:112] of the
3562 ///    A 16-bit integer value used to initialize bits [111:96] of the
3565 ///    A 16-bit integer value used to initialize bits [95:80] of the destination
3568 ///    A 16-bit integer value used to initialize bits [79:64] of the destination
3571 ///    A 16-bit integer value used to initialize bits [63:48] of the destination
3574 ///    A 16-bit integer value used to initialize bits [47:32] of the destination
3577 ///    A 16-bit integer value used to initialize bits [31:16] of the destination
3580 ///    A 16-bit integer value used to initialize bits [15:0] of the destination
3582 /// \returns An initialized 128-bit vector of [8 x i16] containing the values
3591 /// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with
3592 ///    the specified 8-bit integer values.
3631 /// \returns An initialized 128-bit vector of [16 x i8] containing the values
3642 /// Initializes both values in a 128-bit integer vector with the
3643 ///    specified 64-bit integer value.
3653 /// \returns An initialized 128-bit integer vector of [2 x i64] with both
3659 /// Initializes both values in a 128-bit vector of [2 x i64] with the
3660 ///    specified 64-bit value.
3668 ///    A 64-bit value used to initialize the elements of the destination integer
3670 /// \returns An initialized 128-bit vector of [2 x i64] with all elements
3676 /// Initializes all values in a 128-bit vector of [4 x i32] with the
3677 ///    specified 32-bit value.
3685 ///    A 32-bit value used to initialize the elements of the destination integer
3687 /// \returns An initialized 128-bit vector of [4 x i32] with all elements
3693 /// Initializes all values in a 128-bit vector of [8 x i16] with the
3694 ///    specified 16-bit value.
3702 ///    A 16-bit value used to initialize the elements of the destination integer
3704 /// \returns An initialized 128-bit vector of [8 x i16] with all elements
3710 /// Initializes all values in a 128-bit vector of [16 x i8] with the
3711 ///    specified 8-bit value.
3719 ///    An 8-bit value used to initialize the elements of the destination integer
3721 /// \returns An initialized 128-bit vector of [16 x i8] with all elements
3728 /// Constructs a 128-bit integer vector, initialized in reverse order
3729 ///     with the specified 64-bit integral values.
3736 ///    A 64-bit integral value used to initialize the lower 64 bits of the
3739 ///    A 64-bit integral value used to initialize the upper 64 bits of the
3741 /// \returns An initialized 128-bit integer vector.
3747 /// Constructs a 128-bit integer vector, initialized in reverse order
3748 ///     with the specified 32-bit integral values.
3756 ///    A 32-bit integral value used to initialize bits [31:0] of the result.
3758 ///    A 32-bit integral value used to initialize bits [63:32] of the result.
3760 ///    A 32-bit integral value used to initialize bits [95:64] of the result.
3762 ///    A 32-bit integral value used to initialize bits [127:96] of the result.
3763 /// \returns An initialized 128-bit integer vector.
3770 /// Constructs a 128-bit integer vector, initialized in reverse order
3771 ///     with the specified 16-bit integral values.
3779 ///    A 16-bit integral value used to initialize bits [15:0] of the result.
3781 ///    A 16-bit integral value used to initialize bits [31:16] of the result.
3783 ///    A 16-bit integral value used to initialize bits [47:32] of the result.
3785 ///    A 16-bit integral value used to initialize bits [63:48] of the result.
3787 ///    A 16-bit integral value used to initialize bits [79:64] of the result.
3789 ///    A 16-bit integral value used to initialize bits [95:80] of the result.
3791 ///    A 16-bit integral value used to initialize bits [111:96] of the result.
3793 ///    A 16-bit integral value used to initialize bits [127:112] of the result.
3794 /// \returns An initialized 128-bit integer vector.
3801 /// Constructs a 128-bit integer vector, initialized in reverse order
3802 ///     with the specified 8-bit integral values.
3810 ///    An 8-bit integral value used to initialize bits [7:0] of the result.
3812 ///    An 8-bit integral value used to initialize bits [15:8] of the result.
3814 ///    An 8-bit integral value used to initialize bits [23:16] of the result.
3816 ///    An 8-bit integral value used to initialize bits [31:24] of the result.
3818 ///    An 8-bit integral value used to initialize bits [39:32] of the result.
3820 ///    An 8-bit integral value used to initialize bits [47:40] of the result.
3822 ///    An 8-bit integral value used to initialize bits [55:48] of the result.
3824 ///    An 8-bit integral value used to initialize bits [63:56] of the result.
3826 ///    An 8-bit integral value used to initialize bits [71:64] of the result.
3828 ///    An 8-bit integral value used to initialize bits [79:72] of the result.
3830 ///    An 8-bit integral value used to initialize bits [87:80] of the result.
3832 ///    An 8-bit integral value used to initialize bits [95:88] of the result.
3834 ///    An 8-bit integral value used to initialize bits [103:96] of the result.
3836 ///    An 8-bit integral value used to initialize bits [111:104] of the result.
3838 ///    An 8-bit integral value used to initialize bits [119:112] of the result.
3840 ///    An 8-bit integral value used to initialize bits [127:120] of the result.
3841 /// \returns An initialized 128-bit integer vector.
3850 /// Creates a 128-bit integer vector initialized to zero.
3856 /// \returns An initialized 128-bit integer vector with all elements set to
3862 /// Stores a 128-bit integer vector to a memory location aligned on a
3863 ///    128-bit boundary.
3873 ///    A 128-bit integer vector containing the values to be moved.
3879 /// Stores a 128-bit integer vector to an unaligned memory location.
3888 ///    A 128-bit integer vector containing the values to be moved.
3894   ((struct __storeu_si128 *)__p)->__v = __b;  in _mm_storeu_si128()
3897 /// Stores a 64-bit integer value from the low element of a 128-bit integer
3905 ///    A pointer to a 64-bit memory location. The address of the memory
3908 ///    A 128-bit integer vector containing the value to be stored.
3914   ((struct __storeu_si64 *)__p)->__v = ((__v2di)__b)[0];  in _mm_storeu_si64()
3917 /// Stores a 32-bit integer value from the low element of a 128-bit integer
3925 ///    A pointer to a 32-bit memory location. The address of the memory
3928 ///    A 128-bit integer vector containing the value to be stored.
3934   ((struct __storeu_si32 *)__p)->__v = ((__v4si)__b)[0];  in _mm_storeu_si32()
3937 /// Stores a 16-bit integer value from the low element of a 128-bit integer
3945 ///    A pointer to a 16-bit memory location. The address of the memory
3948 ///    A 128-bit integer vector containing the value to be stored.
3954   ((struct __storeu_si16 *)__p)->__v = ((__v8hi)__b)[0];  in _mm_storeu_si16()
3958 ///    specified unaligned memory location. When a mask bit is 1, the
3961 ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
3971 ///    A 128-bit integer vector containing the values to be moved.
3973 ///    A 128-bit integer vector containing the mask. The most significant bit of
3976 ///    A pointer to an unaligned 128-bit memory location where the specified
3984 /// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to
3992 ///    A pointer to a 64-bit memory location that will receive the lower 64 bits
3995 ///    A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the
4002   ((struct __mm_storel_epi64_struct *)__p)->__u = __a[0];  in _mm_storel_epi64()
4005 /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit
4008 ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
4016 ///    A pointer to the 128-bit aligned memory location used to store the value.
4018 ///    A vector of [2 x double] containing the 64-bit values to be stored.
4024 /// Stores a 128-bit integer vector to a 128-bit aligned memory location.
4026 ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
4034 ///    A pointer to the 128-bit aligned memory location used to store the value.
4036 ///    A 128-bit integer vector containing the values to be stored.
4042 /// Stores a 32-bit integer value in the specified memory location.
4044 ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
4052 ///    A pointer to the 32-bit memory location used to store the value.
4054 ///    A 32-bit integer containing the value to be stored.
4062 /// Stores a 64-bit integer value in the specified memory location.
4064 ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
4072 ///    A pointer to the 64-bit memory location used to store the value.
4074 ///    A 64-bit integer containing the value to be stored.
4124 /// Converts, with saturation, 16-bit signed integers from both 128-bit integer
4125 ///    vector operands into 8-bit signed integers, and packs the results into
4136 ///   A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
4139 ///   A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
4141 /// \returns A 128-bit vector of [16 x i8] containing the converted values.
4147 /// Converts, with saturation, 32-bit signed integers from both 128-bit integer
4148 ///    vector operands into 16-bit signed integers, and packs the results into
4159 ///    A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
4162 ///    A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
4164 /// \returns A 128-bit vector of [8 x i16] containing the converted values.
4170 /// Converts, with saturation, 16-bit signed integers from both 128-bit integer
4171 ///    vector operands into 8-bit unsigned integers, and packs the results into
4182 ///    A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
4185 ///    A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
4187 /// \returns A 128-bit vector of [16 x i8] containing the converted values.
4193 /// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using
4194 ///    the immediate-value parameter as a selector.
4205 ///    A 128-bit integer vector.
4207 ///    An immediate value. Bits [2:0] selects values from \a a to be assigned
4217 /// \returns An integer, whose lower 16 bits are selected from the 128-bit
4223 /// Constructs a 128-bit integer vector by first making a copy of the
4224 ///    128-bit integer vector parameter, and then inserting the lower 16 bits
4225 ///    of an integer parameter into an offset specified by the immediate-value
4237 ///    A 128-bit integer vector of [8 x i16]. This vector is copied to the
4244 ///    An immediate value specifying the bit offset in the result at which the
4246 /// \returns A 128-bit integer vector containing the constructed values.
4251 /// Copies the values of the most significant bits from each 8-bit
4252 ///    element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask
4253 ///    value, zero-extends the value, and writes it to the destination.
4260 ///    A 128-bit integer vector containing the values with bits to be extracted.
4261 /// \returns The most significant bits from each 8-bit element in \a __a,
4267 /// Constructs a 128-bit integer vector by shuffling four 32-bit
4268 ///    elements of a 128-bit integer vector parameter, using the immediate-value
4280 ///    A 128-bit integer vector containing the values to be copied.
4282 ///    An immediate value containing an 8-bit value specifying which elements to
4283 ///    copy from a. The destinations within the 128-bit destination are assigned
4286 ///    Bits [3:2] are used to assign values to bits [63:32] of the result. \n
4289 ///    Bit value assignments: \n
4295 ///    <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
4297 /// \returns A 128-bit integer vector containing the shuffled values.
4301 /// Constructs a 128-bit integer vector by shuffling four lower 16-bit
4302 ///    elements of a 128-bit integer vector of [8 x i16], using the immediate
4314 ///    A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits
4317 ///    An 8-bit immediate value specifying which elements to copy from \a a. \n
4319 ///    Bits[3:2] are used to assign values to bits [31:16] of the result. \n
4322 ///    Bit value assignments: \n
4328 ///    <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
4330 /// \returns A 128-bit integer vector containing the shuffled values.
4334 /// Constructs a 128-bit integer vector by shuffling four upper 16-bit
4335 ///    elements of a 128-bit integer vector of [8 x i16], using the immediate
4347 ///    A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits
4350 ///    An 8-bit immediate value specifying which elements to copy from \a a. \n
4352 ///    Bits[3:2] are used to assign values to bits [95:80] of the result. \n
4355 ///    Bit value assignments: \n
4361 ///    <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
4363 /// \returns A 128-bit integer vector containing the shuffled values.
4367 /// Unpacks the high-order (index 8-15) values from two 128-bit vectors
4368 ///    of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
4376 ///    A 128-bit vector of [16 x i8].
4386 ///    A 128-bit vector of [16 x i8]. \n
4395 /// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
4403 /// Unpacks the high-order (index 4-7) values from two 128-bit vectors of
4404 ///    [8 x i16] and interleaves them into a 128-bit vector of [8 x i16].
4412 ///    A 128-bit vector of [8 x i16].
4418 ///    A 128-bit vector of [8 x i16].
4423 /// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
4430 /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
4431 ///    [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].
4439 ///    A 128-bit vector of [4 x i32]. \n
4443 ///    A 128-bit vector of [4 x i32]. \n
4446 /// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
4449   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4 + 2, 3,  in _mm_unpackhi_epi32()
4453 /// Unpacks the high-order 64-bit elements from two 128-bit vectors of
4454 ///    [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
4462 ///    A 128-bit vector of [2 x i64]. \n
4465 ///    A 128-bit vector of [2 x i64]. \n
4467 /// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
4470   return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2 + 1);  in _mm_unpackhi_epi64()
4473 /// Unpacks the low-order (index 0-7) values from two 128-bit vectors of
4474 ///    [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
4482 ///    A 128-bit vector of [16 x i8]. \n
4492 ///    A 128-bit vector of [16 x i8].
4501 /// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
4505       (__v16qi)__a, (__v16qi)__b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,  in _mm_unpacklo_epi8()
4509 /// Unpacks the low-order (index 0-3) values from each of the two 128-bit
4510 ///    vectors of [8 x i16] and interleaves them into a 128-bit vector of
4519 ///    A 128-bit vector of [8 x i16].
4525 ///    A 128-bit vector of [8 x i16].
4530 /// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
4534                                           8 + 1, 2, 8 + 2, 3, 8 + 3);  in _mm_unpacklo_epi16()
4537 /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of
4538 ///    [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].
4546 ///    A 128-bit vector of [4 x i32]. \n
4550 ///    A 128-bit vector of [4 x i32]. \n
4553 /// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
4560 /// Unpacks the low-order 64-bit elements from two 128-bit vectors of
4561 ///    [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
4569 ///    A 128-bit vector of [2 x i64]. \n
4572 ///    A 128-bit vector of [2 x i64]. \n
4574 /// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
4577   return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2 + 0);  in _mm_unpacklo_epi64()
4580 /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
4588 ///    A 128-bit integer vector operand. The lower 64 bits are moved to the
4590 /// \returns A 64-bit integer containing the lower 64 bits of the parameter.
4595 /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
4603 ///    A 64-bit value.
4604 /// \returns A 128-bit integer vector. The lower 64 bits contain the value from
4610 /// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit
4618 ///    A 128-bit integer vector operand. The lower 64 bits are moved to the
4620 /// \returns A 128-bit integer vector. The lower 64 bits contain the value from
4623   return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2);  in _mm_move_epi64()
4626 /// Unpacks the high-order 64-bit elements from two 128-bit vectors of
4627 ///    [2 x double] and interleaves them into a 128-bit vector of [2 x
4635 ///    A 128-bit vector of [2 x double]. \n
4638 ///    A 128-bit vector of [2 x double]. \n
4640 /// \returns A 128-bit vector of [2 x double] containing the interleaved values.
4643   return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2 + 1);  in _mm_unpackhi_pd()
4646 /// Unpacks the low-order 64-bit elements from two 128-bit vectors
4647 ///    of [2 x double] and interleaves them into a 128-bit vector of [2 x
4655 ///    A 128-bit vector of [2 x double]. \n
4658 ///    A 128-bit vector of [2 x double]. \n
4660 /// \returns A 128-bit vector of [2 x double] containing the interleaved values.
4663   return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2 + 0);  in _mm_unpacklo_pd()
4666 /// Extracts the sign bits of the double-precision values in the 128-bit
4667 ///    vector of [2 x double], zero-extends the value, and writes it to the
4668 ///    low-order bits of the destination.
4675 ///    A 128-bit vector of [2 x double] containing the values with sign bits to
4677 /// \returns The sign bits from each of the double-precision elements in \a __a,
4683 /// Constructs a 128-bit floating-point vector of [2 x double] from two
4684 ///    128-bit vector parameters of [2 x double], using the immediate-value
4696 ///    A 128-bit vector of [2 x double].
4698 ///    A 128-bit vector of [2 x double].
4700 ///    An 8-bit immediate value. The least significant two bits specify which
4702 ///    Bit[0] = 0: lower element of \a a copied to lower element of result. \n
4703 ///    Bit[0] = 1: upper element of \a a copied to lower element of result. \n
4704 ///    Bit[1] = 0: lower element of \a b copied to upper element of result. \n
4705 ///    Bit[1] = 1: upper element of \a b copied to upper element of result. \n
4707 ///    <c>_MM_SHUFFLE2(b1, b0)</c> can create a 2-bit mask of the form
4709 /// \returns A 128-bit vector of [2 x double] containing the shuffled values.
4714 /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
4715 ///    floating-point vector of [4 x float].
4722 ///    A 128-bit floating-point vector of [2 x double].
4723 /// \returns A 128-bit floating-point vector of [4 x float] containing the same
4729 /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
4737 ///    A 128-bit floating-point vector of [2 x double].
4738 /// \returns A 128-bit integer vector containing the same bitwise pattern as the
4744 /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
4745 ///    floating-point vector of [2 x double].
4752 ///    A 128-bit floating-point vector of [4 x float].
4753 /// \returns A 128-bit floating-point vector of [2 x double] containing the same
4759 /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
4767 ///    A 128-bit floating-point vector of [4 x float].
4768 /// \returns A 128-bit integer vector containing the same bitwise pattern as the
4774 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
4782 ///    A 128-bit integer vector.
4783 /// \returns A 128-bit floating-point vector of [4 x float] containing the same
4789 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
4790 ///    of [2 x double].
4797 ///    A 128-bit integer vector.
4798 /// \returns A 128-bit floating-point vector of [2 x double] containing the same
4804 /// Compares each of the corresponding double-precision values of two
4805 ///    128-bit vectors of [2 x double], using the operation specified by the
4821 ///    A 128-bit vector of [2 x double].
4823 ///    A 128-bit vector of [2 x double].
4827 ///    0x00: Equal (ordered, non-signaling) \n
4828 ///    0x01: Less-than (ordered, signaling) \n
4829 ///    0x02: Less-than-or-equal (ordered, signaling) \n
4830 ///    0x03: Unordered (non-signaling) \n
4831 ///    0x04: Not-equal (unordered, non-signaling) \n
4832 ///    0x05: Not-less-than (unordered, signaling) \n
4833 ///    0x06: Not-less-than-or-equal (unordered, signaling) \n
4834 ///    0x07: Ordered (non-signaling) \n
4835 /// \returns A 128-bit vector of [2 x double] containing the comparison results.
4840 /// Compares each of the corresponding scalar double-precision values of
4841 ///    two 128-bit vectors of [2 x double], using the operation specified by the
4857 ///    A 128-bit vector of [2 x double].
4859 ///    A 128-bit vector of [2 x double].
4863 ///    0x00: Equal (ordered, non-signaling) \n
4864 ///    0x01: Less-than (ordered, signaling) \n
4865 ///    0x02: Less-than-or-equal (ordered, signaling) \n
4866 ///    0x03: Unordered (non-signaling) \n
4867 ///    0x04: Not-equal (unordered, non-signaling) \n
4868 ///    0x05: Not-less-than (unordered, signaling) \n
4869 ///    0x06: Not-less-than-or-equal (unordered, signaling) \n
4870 ///    0x07: Ordered (non-signaling) \n
4871 /// \returns A 128-bit vector of [2 x double] containing the comparison results.