avxintrin.h - OpenGrok cross reference for /freebsd/contrib/llvm-project/clang/lib/Headers/avxintrin.h

Lines Matching +full:128 +full:a
58                  __min_vector_width__(128)))
68 ///    A 256-bit vector of [4 x double] containing one of the source operands.
70 ///    A 256-bit vector of [4 x double] containing one of the source operands.
71 /// \returns A 256-bit vector of [4 x double] containing the sums of both
86 ///    A 256-bit vector of [8 x float] containing one of the source operands.
88 ///    A 256-bit vector of [8 x float] containing one of the source operands.
89 /// \returns A 256-bit vector of [8 x float] containing the sums of both
104 ///    A 256-bit vector of [4 x double] containing the minuend.
106 ///    A 256-bit vector of [4 x double] containing the subtrahend.
107 /// \returns A 256-bit vector of [4 x double] containing the differences between
122 ///    A 256-bit vector of [8 x float] containing the minuend.
124 ///    A 256-bit vector of [8 x float] containing the subtrahend.
125 /// \returns A 256-bit vector of [8 x float] containing the differences between
141 ///    A 256-bit vector of [4 x double] containing the left source operand.
143 ///    A 256-bit vector of [4 x double] containing the right source operand.
144 /// \returns A 256-bit vector of [4 x double] containing the alternating sums
160 ///    A 256-bit vector of [8 x float] containing the left source operand.
162 ///    A 256-bit vector of [8 x float] containing the right source operand.
163 /// \returns A 256-bit vector of [8 x float] containing the alternating sums and
178 ///    A 256-bit vector of [4 x double] containing the dividend.
180 ///    A 256-bit vector of [4 x double] containing the divisor.
181 /// \returns A 256-bit vector of [4 x double] containing the quotients of both
196 ///    A 256-bit vector of [8 x float] containing the dividend.
198 ///    A 256-bit vector of [8 x float] containing the divisor.
199 /// \returns A 256-bit vector of [8 x float] containing the quotients of both
210 ///    If either value in a comparison is NaN, returns the value from \a __b.
217 ///    A 256-bit vector of [4 x double] containing one of the operands.
219 ///    A 256-bit vector of [4 x double] containing one of the operands.
220 /// \returns A 256-bit vector of [4 x double] containing the maximum values
231 ///    If either value in a comparison is NaN, returns the value from \a __b.
238 ///    A 256-bit vector of [8 x float] containing one of the operands.
240 ///    A 256-bit vector of [8 x float] containing one of the operands.
241 /// \returns A 256-bit vector of [8 x float] containing the maximum values
252 ///    If either value in a comparison is NaN, returns the value from \a __b.
259 ///    A 256-bit vector of [4 x double] containing one of the operands.
261 ///    A 256-bit vector of [4 x double] containing one of the operands.
262 /// \returns A 256-bit vector of [4 x double] containing the minimum values
273 ///    If either value in a comparison is NaN, returns the value from \a __b.
280 ///    A 256-bit vector of [8 x float] containing one of the operands.
282 ///    A 256-bit vector of [8 x float] containing one of the operands.
283 /// \returns A 256-bit vector of [8 x float] containing the minimum values
298 ///    A 256-bit vector of [4 x double] containing one of the operands.
300 ///    A 256-bit vector of [4 x double] containing one of the operands.
301 /// \returns A 256-bit vector of [4 x double] containing the products of both
316 ///    A 256-bit vector of [8 x float] containing one of the operands.
318 ///    A 256-bit vector of [8 x float] containing one of the operands.
319 /// \returns A 256-bit vector of [8 x float] containing the products of both
327 /// Calculates the square roots of the values in a 256-bit vector of
335 ///    A 256-bit vector of [4 x double].
336 /// \returns A 256-bit vector of [4 x double] containing the square roots of the
344 /// Calculates the square roots of the values in a 256-bit vector of
352 ///    A 256-bit vector of [8 x float].
353 /// \returns A 256-bit vector of [8 x float] containing the square roots of the
361 /// Calculates the reciprocal square roots of the values in a 256-bit
369 ///    A 256-bit vector of [8 x float].
370 /// \returns A 256-bit vector of [8 x float] containing the reciprocal square
378 /// Calculates the reciprocals of the values in a 256-bit vector of
386 ///    A 256-bit vector of [8 x float].
387 /// \returns A 256-bit vector of [8 x float] containing the reciprocals of the
395 /// Rounds the values in a 256-bit vector of [4 x double] as specified
408 ///    A 256-bit vector of [4 x double].
412 ///    Bit [3] is a precision exception value: \n
413 ///      0: A normal PE exception is used. \n
416 ///      0: Use bits [1:0] of \a M. \n
423 /// \returns A 256-bit vector of [4 x double] containing the rounded values.
427 /// Rounds the values stored in a 256-bit vector of [8 x float] as
440 ///    A 256-bit vector of [8 x float].
444 ///    Bit [3] is a precision exception value: \n
445 ///      0: A normal PE exception is used. \n
448 ///      0: Use bits [1:0] of \a M. \n
455 /// \returns A 256-bit vector of [8 x float] containing the rounded values.
459 /// Rounds up the values stored in a 256-bit vector of [4 x double]. The
472 ///    A 256-bit vector of [4 x double].
473 /// \returns A 256-bit vector of [4 x double] containing the rounded up values.
476 /// Rounds down the values stored in a 256-bit vector of [4 x double].
489 ///    A 256-bit vector of [4 x double].
490 /// \returns A 256-bit vector of [4 x double] containing the rounded down
494 /// Rounds up the values stored in a 256-bit vector of [8 x float]. The
507 ///    A 256-bit vector of [8 x float].
508 /// \returns A 256-bit vector of [8 x float] containing the rounded up values.
511 /// Rounds down the values stored in a 256-bit vector of [8 x float]. The
524 ///    A 256-bit vector of [8 x float].
525 /// \returns A 256-bit vector of [8 x float] containing the rounded down values.
529 /// Performs a bitwise AND of two 256-bit vectors of [4 x double].
536 ///    A 256-bit vector of [4 x double] containing one of the source operands.
538 ///    A 256-bit vector of [4 x double] containing one of the source operands.
539 /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
547 /// Performs a bitwise AND of two 256-bit vectors of [8 x float].
554 ///    A 256-bit vector of [8 x float] containing one of the source operands.
556 ///    A 256-bit vector of [8 x float] containing one of the source operands.
557 /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
565 /// Performs a bitwise AND of two 256-bit vectors of [4 x double], using
573 ///    A 256-bit vector of [4 x double] containing the left source operand. The
576 ///    A 256-bit vector of [4 x double] containing the right source operand.
577 /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
586 /// Performs a bitwise AND of two 256-bit vectors of [8 x float], using
594 ///    A 256-bit vector of [8 x float] containing the left source operand. The
597 ///    A 256-bit vector of [8 x float] containing the right source operand.
598 /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
607 /// Performs a bitwise OR of two 256-bit vectors of [4 x double].
614 ///    A 256-bit vector of [4 x double] containing one of the source operands.
616 ///    A 256-bit vector of [4 x double] containing one of the source operands.
617 /// \returns A 256-bit vector of [4 x double] containing the bitwise OR of the
625 /// Performs a bitwise OR of two 256-bit vectors of [8 x float].
632 ///    A 256-bit vector of [8 x float] containing one of the source operands.
634 ///    A 256-bit vector of [8 x float] containing one of the source operands.
635 /// \returns A 256-bit vector of [8 x float] containing the bitwise OR of the
643 /// Performs a bitwise XOR of two 256-bit vectors of [4 x double].
650 ///    A 256-bit vector of [4 x double] containing one of the source operands.
652 ///    A 256-bit vector of [4 x double] containing one of the source operands.
653 /// \returns A 256-bit vector of [4 x double] containing the bitwise XOR of the
661 /// Performs a bitwise XOR of two 256-bit vectors of [8 x float].
668 ///    A 256-bit vector of [8 x float] containing one of the source operands.
670 ///    A 256-bit vector of [8 x float] containing one of the source operands.
671 /// \returns A 256-bit vector of [8 x float] containing the bitwise XOR of the
688 ///    A 256-bit vector of [4 x double] containing one of the source operands.
690 ///    elements of a vector of [4 x double].
692 ///    A 256-bit vector of [4 x double] containing one of the source operands.
694 ///    elements of a vector of [4 x double].
695 /// \returns A 256-bit vector of [4 x double] containing the horizontal sums of
711 ///    A 256-bit vector of [8 x float] containing one of the source operands.
713 ///    index 0, 1, 4, 5 of a vector of [8 x float].
715 ///    A 256-bit vector of [8 x float] containing one of the source operands.
717 ///    index 2, 3, 6, 7 of a vector of [8 x float].
718 /// \returns A 256-bit vector of [8 x float] containing the horizontal sums of
734 ///    A 256-bit vector of [4 x double] containing one of the source operands.
736 ///    even-indexed elements of a vector of [4 x double].
738 ///    A 256-bit vector of [4 x double] containing one of the source operands.
740 ///    odd-indexed elements of a vector of [4 x double].
741 /// \returns A 256-bit vector of [4 x double] containing the horizontal
757 ///    A 256-bit vector of [8 x float] containing one of the source operands.
759 ///    elements with index 0, 1, 4, 5 of a vector of [8 x float].
761 ///    A 256-bit vector of [8 x float] containing one of the source operands.
763 ///    elements with index 2, 3, 6, 7 of a vector of [8 x float].
764 /// \returns A 256-bit vector of [8 x float] containing the horizontal
773 /// Copies the values in a 128-bit vector of [2 x double] as specified
774 ///    by the 128-bit integer vector operand.
781 ///    A 128-bit vector of [2 x double].
783 ///    A 128-bit integer vector operand specifying how the values are to be
795 /// \returns A 128-bit vector of [2 x double] containing the copied values.
802 /// Copies the values in a 256-bit vector of [4 x double] as specified
810 ///    A 256-bit vector of [4 x double].
812 ///    A 256-bit integer vector operand specifying how the values are to be
825 ///      0: Bits [191:128] of the source are copied to bits [191:128] of the
827 ///      1: Bits [255:192] of the source are copied to bits [191:128] of the
830 ///      0: Bits [191:128] of the source are copied to bits [255:192] of the
834 /// \returns A 256-bit vector of [4 x double] containing the copied values.
841 /// Copies the values stored in a 128-bit vector of [4 x float] as
842 ///    specified by the 128-bit integer vector operand.
849 ///    A 128-bit vector of [4 x float].
851 ///    A 128-bit integer vector operand specifying how the values are to be
889 /// \returns A 128-bit vector of [4 x float] containing the copied values.
896 /// Copies the values stored in a 256-bit vector of [8 x float] as
904 ///    A 256-bit vector of [8 x float].
906 ///    A 256-bit integer vector operand specifying how the values are to be
944 ///    Bits [129:128]: \n
945 ///      00: Bits [159:128] of the source are copied to bits [159:128] of the
947 ///      01: Bits [191:160] of the source are copied to bits [159:128] of the
949 ///      10: Bits [223:192] of the source are copied to bits [159:128] of the
951 ///      11: Bits [255:224] of the source are copied to bits [159:128] of the
954 ///      00: Bits [159:128] of the source are copied to bits [191:160] of the
963 ///      00: Bits [159:128] of the source are copied to bits [223:192] of the
972 ///      00: Bits [159:128] of the source are copied to bits [255:224] of the
980 /// \returns A 256-bit vector of [8 x float] containing the copied values.
987 /// Copies the values in a 128-bit vector of [2 x double] as specified
993 /// __m128d _mm_permute_pd(__m128d A, const int C);
998 /// \param A
999 ///    A 128-bit vector of [2 x double].
1013 /// \returns A 128-bit vector of [2 x double] containing the copied values.
1014 #define _mm_permute_pd(A, C) \  argument
1015   ((__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C)))
1017 /// Copies the values in a 256-bit vector of [4 x double] as specified by
1023 /// __m256d _mm256_permute_pd(__m256d A, const int C);
1028 /// \param A
1029 ///    A 256-bit vector of [4 x double].
1044 ///      0: Bits [191:128] of the source are copied to bits [191:128] of the
1046 ///      1: Bits [255:192] of the source are copied to bits [191:128] of the
1049 ///      0: Bits [191:128] of the source are copied to bits [255:192] of the
1053 /// \returns A 256-bit vector of [4 x double] containing the copied values.
1054 #define _mm256_permute_pd(A, C) \  argument
1055   ((__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C)))
1057 /// Copies the values in a 128-bit vector of [4 x float] as specified by
1063 /// __m128 _mm_permute_ps(__m128 A, const int C);
1068 /// \param A
1069 ///    A 128-bit vector of [4 x float].
1109 /// \returns A 128-bit vector of [4 x float] containing the copied values.
1110 #define _mm_permute_ps(A, C) \  argument
1111   ((__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C)))
1113 /// Copies the values in a 256-bit vector of [8 x float] as specified by
1119 /// __m256 _mm256_permute_ps(__m256 A, const int C);
1124 /// \param A
1125 ///    A 256-bit vector of [8 x float].
1166 ///      00: Bits [159:128] of the source are copied to bits [159:128] of the
1168 ///      01: Bits [191:160] of the source are copied to bits [159:128] of the
1170 ///      10: Bits [223:192] of the source are copied to bits [159:128] of the
1172 ///      11: Bits [255:224] of the source are copied to bits [159:128] of the
1175 ///      00: Bits [159:128] of the source are copied to bits [191:160] of the
1184 ///      00: Bits [159:128] of the source are copied to bits [223:192] of the
1193 ///      00: Bits [159:128] of the source are copied to bits [255:224] of the
1201 /// \returns A 256-bit vector of [8 x float] containing the copied values.
1202 #define _mm256_permute_ps(A, C) \  argument
1203   ((__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C)))
1205 /// Permutes 128-bit data values stored in two 256-bit vectors of
1217 ///    A 256-bit vector of [4 x double].
1219 ///    A 256-bit vector of [4 x double.
1224 ///      00: Bits [127:0] of operand \a V1 are copied to bits [127:0] of the
1226 ///      01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the
1228 ///      10: Bits [127:0] of operand \a V2 are copied to bits [127:0] of the
1230 ///      11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the
1233 ///      00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the
1235 ///      01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the
1237 ///      10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the
1239 ///      11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the
1241 /// \returns A 256-bit vector of [4 x double] containing the copied values.
1246 /// Permutes 128-bit data values stored in two 256-bit vectors of
1258 ///    A 256-bit vector of [8 x float].
1260 ///    A 256-bit vector of [8 x float].
1265 ///    00: Bits [127:0] of operand \a V1 are copied to bits [127:0] of the
1267 ///    01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the
1269 ///    10: Bits [127:0] of operand \a V2 are copied to bits [127:0] of the
1271 ///    11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the
1274 ///    00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the
1276 ///    01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the
1278 ///    10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the
1280 ///    11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the
1282 /// \returns A 256-bit vector of [8 x float] containing the copied values.
1287 /// Permutes 128-bit data values stored in two 256-bit integer vectors,
1299 ///    A 256-bit integer vector.
1301 ///    A 256-bit integer vector.
1305 ///    00: Bits [127:0] of operand \a V1 are copied to bits [127:0] of the
1307 ///    01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the
1309 ///    10: Bits [127:0] of operand \a V2 are copied to bits [127:0] of the
1311 ///    11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the
1314 ///    00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the
1316 ///    01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the
1318 ///    10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the
1320 ///    11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the
1322 /// \returns A 256-bit integer vector containing the copied values.
1341 ///    A 256-bit vector of [4 x double].
1343 ///    A 256-bit vector of [4 x double].
1347 ///    index of a copied value. When a mask bit is 0, the corresponding 64-bit
1348 ///    element in operand \a V1 is copied to the same position in the
1349 ///    destination. When a mask bit is 1, the corresponding 64-bit element in
1350 ///    operand \a V2 is copied to the same position in the destination.
1351 /// \returns A 256-bit vector of [4 x double] containing the copied values.
1369 ///    A 256-bit vector of [8 x float].
1371 ///    A 256-bit vector of [8 x float].
1375 ///    index of a copied value. When a mask bit is 0, the corresponding 32-bit
1376 ///    element in operand \a V1 is copied to the same position in the
1377 ///    destination. When a mask bit is 1, the corresponding 32-bit element in
1378 ///    operand \a V2 is copied to the same position in the destination.
1379 /// \returns A 256-bit vector of [8 x float] containing the copied values.
1393 ///    A 256-bit vector of [4 x double].
1395 ///    A 256-bit vector of [4 x double].
1397 ///    A 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying
1399 ///    to the most significant bit of a copied value. When a mask bit is 0, the
1400 ///    corresponding 64-bit element in operand \a __a is copied to the same
1401 ///    position in the destination. When a mask bit is 1, the corresponding
1402 ///    64-bit element in operand \a __b is copied to the same position in the
1404 /// \returns A 256-bit vector of [4 x double] containing the copied values.
1421 ///    A 256-bit vector of [8 x float].
1423 ///    A 256-bit vector of [8 x float].
1425 ///    A 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63,
1427 ///    mask bit corresponds to the most significant bit of a copied value. When
1428 ///    a mask bit is 0, the corresponding 32-bit element in operand \a __a is
1429 ///    copied to the same position in the destination. When a mask bit is 1, the
1430 ///    corresponding 32-bit element in operand \a __b is copied to the same
1432 /// \returns A 256-bit vector of [8 x float] containing the copied values.
1461 ///    A vector of [8 x float] values, treated as two [4 x float] vectors.
1463 ///    A vector of [8 x float] values, treated as two [4 x float] vectors.
1468 ///    float] subvector. If a bit is set, the corresponding elements from the
1471 ///    result will receive a copy of the final dot product, with bit [0]
1473 ///    highest element of each [4 x float] subvector. If a bit is set, the dot
1477 /// \returns A 256-bit vector of [8 x float] containing the two dot products.
1489 ///    bits [191:128] of the destination, and the selected elements from the
1492 ///    contain a value of 0xFF, the 256-bit destination vector would contain the
1493 ///    following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3].
1498 /// __m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int mask);
1503 /// \param a
1504 ///    A 256-bit vector of [8 x float]. The four selected elements in this
1505 ///    operand are copied to bits [63:0] and bits [191:128] in the destination,
1508 ///    A 256-bit vector of [8 x float]. The four selected elements in this
1513 ///    copy from \a a and \a b \n.
1514 ///    Bits [3:0] specify the values copied from operand \a a. \n
1515 ///    Bits [7:4] specify the values copied from operand \a b. \n
1518 ///    Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the
1527 ///    00: Bits [31:0] and [159:128] are copied from the selected operand. \n
1531 ///    Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
1534 /// \returns A 256-bit vector of [8 x float] containing the shuffled values.
1535 #define _mm256_shuffle_ps(a, b, mask) \  argument
1536   ((__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \
1543 ///    [63:0] and bits [191:128] in the destination, and the selected elements
1546 ///    operand contain a value of 0xF, the 256-bit destination vector would
1547 ///    contain the following values: b[3], a[3], b[1], a[1].
1552 /// __m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int mask);
1557 /// \param a
1558 ///    A 256-bit vector of [4 x double].
1560 ///    A 256-bit vector of [4 x double].
1563 ///    copy from \a a and \a b: \n
1564 ///    Bit [0]=0: Bits [63:0] are copied from \a a to bits [63:0] of the
1566 ///    Bit [0]=1: Bits [127:64] are copied from \a a to bits [63:0] of the
1568 ///    Bit [1]=0: Bits [63:0] are copied from \a b to bits [127:64] of the
1570 ///    Bit [1]=1: Bits [127:64] are copied from \a b to bits [127:64] of the
1572 ///    Bit [2]=0: Bits [191:128] are copied from \a a to bits [191:128] of the
1574 ///    Bit [2]=1: Bits [255:192] are copied from \a a to bits [191:128] of the
1576 ///    Bit [3]=0: Bits [191:128] are copied from \a b to bits [255:192] of the
1578 ///    Bit [3]=1: Bits [255:192] are copied from \a b to bits [255:192] of the
1580 /// \returns A 256-bit vector of [4 x double] containing the shuffled values.
1581 #define _mm256_shuffle_pd(a, b, mask) \  argument
1582   ((__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \
1613 ///    128-bit vectors of [2 x double], using the operation specified by the
1617 ///    If either value in a comparison is NaN, comparisons that are ordered
1623 /// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);
1628 /// \param a
1629 ///    A 128-bit vector of [2 x double].
1631 ///    A 128-bit vector of [2 x double].
1667 /// \returns A 128-bit vector of [2 x double] containing the comparison results.
1668 /// \fn __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c)
1671 /// Compares each of the corresponding values of two 128-bit vectors of
1676 ///    If either value in a comparison is NaN, comparisons that are ordered
1682 /// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);
1687 /// \param a
1688 ///    A 128-bit vector of [4 x float].
1690 ///    A 128-bit vector of [4 x float].
1726 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
1727 /// \fn __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c)
1734 ///    If either value in a comparison is NaN, comparisons that are ordered
1740 /// __m256d _mm256_cmp_pd(__m256d a, __m256d b, const int c);
1745 /// \param a
1746 ///    A 256-bit vector of [4 x double].
1748 ///    A 256-bit vector of [4 x double].
1784 /// \returns A 256-bit vector of [4 x double] containing the comparison results.
1785 #define _mm256_cmp_pd(a, b, c) \  argument
1786   ((__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
1794 ///    If either value in a comparison is NaN, comparisons that are ordered
1800 /// __m256 _mm256_cmp_ps(__m256 a, __m256 b, const int c);
1805 /// \param a
1806 ///    A 256-bit vector of [8 x float].
1808 ///    A 256-bit vector of [8 x float].
1844 /// \returns A 256-bit vector of [8 x float] containing the comparison results.
1845 #define _mm256_cmp_ps(a, b, c) \  argument
1846   ((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
1851 ///    two 128-bit vectors of [2 x double], using the operation specified by the
1855 ///    If either value in a comparison is NaN, comparisons that are ordered
1861 /// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);
1866 /// \param a
1867 ///    A 128-bit vector of [2 x double].
1869 ///    A 128-bit vector of [2 x double].
1905 /// \returns A 128-bit vector of [2 x double] containing the comparison results.
1906 /// \fn __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c)
1909 /// Compares each of the corresponding scalar values of two 128-bit
1914 ///    If either value in a comparison is NaN, comparisons that are ordered
1920 /// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);
1925 /// \param a
1926 ///    A 128-bit vector of [4 x float].
1928 ///    A 128-bit vector of [4 x float].
1964 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
1965 /// \fn __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c)
1967 /// Takes a [8 x i32] vector and returns the vector element value
1980 ///    A 256-bit vector of [8 x i32].
1984 /// \returns A 32-bit integer containing the extracted 32 bits of extended
1989 /// Takes a [16 x i16] vector and returns the vector element value
2002 ///    A 256-bit integer vector of [16 x i16].
2006 /// \returns A 32-bit integer containing the extracted 16 bits of zero extended
2012 /// Takes a [32 x i8] vector and returns the vector element value
2025 ///    A 256-bit integer vector of [32 x i8].
2029 /// \returns A 32-bit integer containing the extracted 8 bits of zero extended
2036 /// Takes a [4 x i64] vector and returns the vector element value
2049 ///    A 256-bit integer vector of [4 x i64].
2053 /// \returns A 64-bit integer containing the extracted 64 bits of extended
2059 /// Takes a [8 x i32] vector and replaces the vector element value
2060 ///    indexed by the immediate constant operand by a new value. Returns the
2073 ///    A vector of [8 x i32] to be used by the insert operation.
2079 /// \returns A copy of vector \a X, after replacing its element indexed by
2080 ///    \a N with \a I.
2086 /// Takes a [16 x i16] vector and replaces the vector element value
2087 ///    indexed by the immediate constant operand with a new value. Returns the
2100 ///    A vector of [16 x i16] to be used by the insert operation.
2106 /// \returns A copy of vector \a X, after replacing its element indexed by
2107 ///    \a N with \a I.
2112 /// Takes a [32 x i8] vector and replaces the vector element value
2113 ///    indexed by the immediate constant operand with a new value. Returns the
2126 ///    A vector of [32 x i8] to be used by the insert operation.
2132 /// \returns A copy of vector \a X, after replacing its element indexed by
2133 ///    \a N with \a I.
2139 /// Takes a [4 x i64] vector and replaces the vector element value
2140 ///    indexed by the immediate constant operand with a new value. Returns the
2153 ///    A vector of [4 x i64] to be used by the insert operation.
2155 ///    A 64-bit integer value. The replacement value for the insert operation.
2159 /// \returns A copy of vector \a X, after replacing its element indexed by
2160 ///     \a N with \a I.
2167 /// Converts a vector of [4 x i32] into a vector of [4 x double].
2174 ///    A 128-bit integer vector of [4 x i32].
2175 /// \returns A 256-bit vector of [4 x double] containing the converted values.
2182 /// Converts a vector of [8 x i32] into a vector of [8 x float].
2189 ///    A 256-bit integer vector.
2190 /// \returns A 256-bit vector of [8 x float] containing the converted values.
2197 /// Converts a 256-bit vector of [4 x double] into a 128-bit vector of
2205 ///    A 256-bit vector of [4 x double].
2206 /// \returns A 128-bit vector of [4 x float] containing the converted values.
2213 /// Converts a vector of [8 x float] into a vector of [8 x i32].
2215 ///    If a converted value does not fit in a 32-bit integer, raises a
2224 ///    A 256-bit vector of [8 x float].
2225 /// \returns A 256-bit integer vector containing the converted values.
2232 /// Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4
2240 ///    A 128-bit vector of [4 x float].
2241 /// \returns A 256-bit vector of [4 x double] containing the converted values.
2248 /// Converts a 256-bit vector of [4 x double] into four signed truncated
2249 ///    (rounded toward zero) 32-bit integers returned in a 128-bit vector of
2252 ///    If a converted value does not fit in a 32-bit integer, raises a
2261 ///    A 256-bit vector of [4 x double].
2262 /// \returns A 128-bit integer vector containing the converted values.
2269 /// Converts a 256-bit vector of [4 x double] into a 128-bit vector of
2272 ///    If a converted value does not fit in a 32-bit integer, raises a
2281 ///    A 256-bit vector of [4 x double].
2282 /// \returns A 128-bit integer vector containing the converted values.
2289 /// Converts a vector of [8 x float] into eight signed truncated (rounded
2290 ///    toward zero) 32-bit integers returned in a vector of [8 x i32].
2292 ///    If a converted value does not fit in a 32-bit integer, raises a
2301 ///    A 256-bit vector of [8 x float].
2302 /// \returns A 256-bit integer vector containing the converted values.
2313 /// This intrinsic is a utility function and does not correspond to a specific
2317 ///    A 256-bit vector of [4 x double].
2318 /// \returns A 64 bit double containing the first element of the input vector.
2329 /// This intrinsic is a utility function and does not correspond to a specific
2333 ///    A 256-bit vector of [8 x i32].
2334 /// \returns A 32 bit integer containing the first element of the input vector.
2346 /// This intrinsic is a utility function and does not correspond to a specific
2350 ///    A 256-bit vector of [8 x float].
2351 /// \returns A 32 bit float containing the first element of the input vector.
2359 /// Moves and duplicates odd-indexed values from a 256-bit vector of
2360 ///    [8 x float] to float values in a 256-bit vector of [8 x float].
2367 ///    A 256-bit vector of [8 x float]. \n
2368 ///    Bits [255:224] of \a __a are written to bits [255:224] and [223:192] of
2370 ///    Bits [191:160] of \a __a are written to bits [191:160] and [159:128] of
2372 ///    Bits [127:96] of \a __a are written to bits [127:96] and [95:64] of the
2374 ///    Bits [63:32] of \a __a are written to bits [63:32] and [31:0] of the
2376 /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
2384 /// Moves and duplicates even-indexed values from a 256-bit vector of
2385 ///    [8 x float] to float values in a 256-bit vector of [8 x float].
2392 ///    A 256-bit vector of [8 x float]. \n
2393 ///    Bits [223:192] of \a __a are written to bits [255:224] and [223:192] of
2395 ///    Bits [159:128] of \a __a are written to bits [191:160] and [159:128] of
2397 ///    Bits [95:64] of \a __a are written to bits [127:96] and [95:64] of the
2399 ///    Bits [31:0] of \a __a are written to bits [63:32] and [31:0] of the
2401 /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
2409 /// Moves and duplicates double-precision floating point values from a
2410 ///    256-bit vector of [4 x double] to double-precision values in a 256-bit
2418 ///    A 256-bit vector of [4 x double]. \n
2419 ///    Bits [63:0] of \a __a are written to bits [127:64] and [63:0] of the
2421 ///    Bits [191:128] of \a __a are written to bits [255:192] and [191:128] of
2423 /// \returns A 256-bit vector of [4 x double] containing the moved and
2433 ///    [4 x double] and interleaves them into a 256-bit vector of [4 x double].
2440 ///    A 256-bit floating-point vector of [4 x double]. \n
2442 ///    Bits [255:192] are written to bits [191:128] of the return value. \n
2444 ///    A 256-bit floating-point vector of [4 x double]. \n
2447 /// \returns A 256-bit vector of [4 x double] containing the interleaved values.
2455 ///    [4 x double] and interleaves them into a 256-bit vector of [4 x double].
2462 ///    A 256-bit floating-point vector of [4 x double]. \n
2464 ///    Bits [191:128] are written to bits [191:128] of the return value.
2466 ///    A 256-bit floating-point vector of [4 x double]. \n
2468 ///    Bits [191:128] are written to bits [255:192] of the return value. \n
2469 /// \returns A 256-bit vector of [4 x double] containing the interleaved values.
2477 ///    two 256-bit vectors of [8 x float] and interleaves them into a 256-bit
2485 ///    A 256-bit vector of [8 x float]. \n
2488 ///    Bits [223:192] are written to bits [159:128] of the return value. \n
2491 ///    A 256-bit vector of [8 x float]. \n
2496 /// \returns A 256-bit vector of [8 x float] containing the interleaved values.
2504 ///    two 256-bit vectors of [8 x float] and interleaves them into a 256-bit
2512 ///    A 256-bit vector of [8 x float]. \n
2515 ///    Bits [159:128] are written to bits [159:128] of the return value. \n
2518 ///    A 256-bit vector of [8 x float]. \n
2521 ///    Bits [159:128] are written to bits [191:160] of the return value. \n
2523 /// \returns A 256-bit vector of [8 x float] containing the interleaved values.
2531 /// Given two 128-bit floating-point vectors of [2 x double], perform an
2550 ///    A 128-bit vector of [2 x double].
2552 ///    A 128-bit vector of [2 x double].
2560 /// Given two 128-bit floating-point vectors of [2 x double], perform an
2579 ///    A 128-bit vector of [2 x double].
2581 ///    A 128-bit vector of [2 x double].
2589 /// Given two 128-bit floating-point vectors of [2 x double], perform an
2609 ///    A 128-bit vector of [2 x double].
2611 ///    A 128-bit vector of [2 x double].
2619 /// Given two 128-bit floating-point vectors of [4 x float], perform an
2638 ///    A 128-bit vector of [4 x float].
2640 ///    A 128-bit vector of [4 x float].
2648 /// Given two 128-bit floating-point vectors of [4 x float], perform an
2667 ///    A 128-bit vector of [4 x float].
2669 ///    A 128-bit vector of [4 x float].
2677 /// Given two 128-bit floating-point vectors of [4 x float], perform an
2697 ///    A 128-bit vector of [4 x float].
2699 ///    A 128-bit vector of [4 x float].
2726 ///    A 256-bit vector of [4 x double].
2728 ///    A 256-bit vector of [4 x double].
2755 ///    A 256-bit vector of [4 x double].
2757 ///    A 256-bit vector of [4 x double].
2785 ///    A 256-bit vector of [4 x double].
2787 ///    A 256-bit vector of [4 x double].
2814 ///    A 256-bit vector of [8 x float].
2816 ///    A 256-bit vector of [8 x float].
2843 ///    A 256-bit vector of [8 x float].
2845 ///    A 256-bit vector of [8 x float].
2873 ///    A 256-bit vector of [8 x float].
2875 ///    A 256-bit vector of [8 x float].
2883 /// Given two 256-bit integer vectors, perform a bit-by-bit comparison
2899 ///    A 256-bit integer vector.
2901 ///    A 256-bit integer vector.
2909 /// Given two 256-bit integer vectors, perform a bit-by-bit comparison
2925 ///    A 256-bit integer vector.
2927 ///    A 256-bit integer vector.
2935 /// Given two 256-bit integer vectors, perform a bit-by-bit comparison
2952 ///    A 256-bit integer vector.
2954 ///    A 256-bit integer vector.
2964 ///    in a 256-bit vector of [4 x double] and writes them to the lower order
2972 ///    A 256-bit vector of [4 x double] containing the double-precision
2982 ///    in a 256-bit vector of [8 x float] and writes them to the lower order
2990 ///    A 256-bit vector of [8 x float] containing the single-precision floating
3011 /// Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
3023 /// Loads a scalar single-precision floating point value from the
3024 ///    specified address pointed to by \a __a and broadcasts it to the elements
3025 ///    of a [4 x float] vector.
3033 /// \returns A 128-bit vector of [4 x float] whose 32-bit elements are set
3045 /// Loads a scalar double-precision floating point value from the
3046 ///    specified address pointed to by \a __a and broadcasts it to the elements
3047 ///    of a [4 x double] vector.
3055 /// \returns A 256-bit vector of [4 x double] whose 64-bit elements are set
3067 /// Loads a scalar single-precision floating point value from the
3068 ///    specified address pointed to by \a __a and broadcasts it to the elements
3069 ///    of a [8 x float] vector.
3077 /// \returns A 256-bit vector of [8 x float] whose 32-bit elements are set
3089 /// Loads the data from a 128-bit vector of [2 x double] from the
3090 ///    specified address pointed to by \a __a and broadcasts it to 128-bit
3091 ///    elements in a 256-bit vector of [4 x double].
3098 ///    The 128-bit vector of [2 x double] to be broadcast.
3099 /// \returns A 256-bit vector of [4 x double] whose 128-bit elements are set
3109 /// Loads the data from a 128-bit vector of [4 x float] from the
3110 ///    specified address pointed to by \a __a and broadcasts it to 128-bit
3111 ///    elements in a 256-bit vector of [8 x float].
3118 ///    The 128-bit vector of [4 x float] to be broadcast.
3119 /// \returns A 256-bit vector of [8 x float] whose 128-bit elements are set
3130 /// Loads 4 double-precision floating point values from a 32-byte aligned
3131 ///    memory location pointed to by \a __p into a vector of [4 x double].
3138 ///    A 32-byte aligned pointer to a memory location containing
3140 /// \returns A 256-bit vector of [4 x double] containing the moved values.
3147 /// Loads 8 single-precision floating point values from a 32-byte aligned
3148 ///    memory location pointed to by \a __p into a vector of [8 x float].
3155 ///    A 32-byte aligned pointer to a memory location containing float values.
3156 /// \returns A 256-bit vector of [8 x float] containing the moved values.
3164 ///    memory location pointed to by \a __p into a vector of [4 x double].
3171 ///    A pointer to a memory location containing double-precision floating
3173 /// \returns A 256-bit vector of [4 x double] containing the moved values.
3184 ///    memory location pointed to by \a __p into a vector of [8 x float].
3191 ///    A pointer to a memory location containing single-precision floating
3193 /// \returns A 256-bit vector of [8 x float] containing the moved values.
3203 /// Loads 256 bits of integer data from a 32-byte aligned memory
3204 ///    location pointed to by \a __p into elements of a 256-bit integer vector.
3211 ///    A 32-byte aligned pointer to a 256-bit integer vector containing integer
3213 /// \returns A 256-bit integer vector containing the moved values.
3221 ///    pointed to by \a __p into a 256-bit integer vector.
3228 ///    A pointer to a 256-bit integer vector containing integer values.
3229 /// \returns A 256-bit integer vector containing the moved values.
3240 ///    pointed to by \a __p into a 256-bit integer vector. This intrinsic may
3241 ///    perform better than \c _mm256_loadu_si256 when the data crosses a cache
3249 ///    A pointer to a 256-bit integer vector containing integer values.
3250 /// \returns A 256-bit integer vector containing the moved values.
3258 /// Stores double-precision floating point values from a 256-bit vector
3259 ///    of [4 x double] to a 32-byte aligned memory location pointed to by
3260 ///    \a __p.
3267 ///    A 32-byte aligned pointer to a memory location that will receive the
3270 ///    A 256-bit vector of [4 x double] containing the values to be moved.
3277 /// Stores single-precision floating point values from a 256-bit vector
3278 ///    of [8 x float] to a 32-byte aligned memory location pointed to by \a __p.
3285 ///    A 32-byte aligned pointer to a memory location that will receive the
3288 ///    A 256-bit vector of [8 x float] containing the values to be moved.
3295 /// Stores double-precision floating point values from a 256-bit vector
3296 ///    of [4 x double] to an unaligned memory location pointed to by \a __p.
3303 ///    A pointer to a memory location that will receive the double-precision
3306 ///    A 256-bit vector of [4 x double] containing the values to be moved.
3316 /// Stores single-precision floating point values from a 256-bit vector
3317 ///    of [8 x float] to an unaligned memory location pointed to by \a __p.
3324 ///    A pointer to a memory location that will receive the float values.
3326 ///    A 256-bit vector of [8 x float] containing the values to be moved.
3336 /// Stores integer values from a 256-bit integer vector to a 32-byte
3337 ///    aligned memory location pointed to by \a __p.
3344 ///    A 32-byte aligned pointer to a memory location that will receive the
3347 ///    A 256-bit integer vector containing the values to be moved.
3354 /// Stores integer values from a 256-bit integer vector to an unaligned
3355 ///    memory location pointed to by \a __p.
3362 ///    A pointer to a memory location that will receive the integer values.
3364 ///    A 256-bit integer vector containing the values to be moved.
3375 /// Conditionally loads double-precision floating point elements from a
3376 ///    memory location pointed to by \a __p into a 128-bit vector of
3385 ///    A pointer to a memory location that contains the double-precision
3388 ///    A 128-bit integer vector containing the mask. The most significant bit of
3389 ///    each data element represents the mask bits. If a mask bit is zero, the
3392 /// \returns A 128-bit vector of [2 x double] containing the loaded values.
3399 /// Conditionally loads double-precision floating point elements from a
3400 ///    memory location pointed to by \a __p into a 256-bit vector of
3409 ///    A pointer to a memory location that contains the double-precision
3412 ///    A 256-bit integer vector of [4 x quadword] containing the mask. The most
3413 ///    significant bit of each quadword element represents the mask bits. If a
3416 /// \returns A 256-bit vector of [4 x double] containing the loaded values.
3424 /// Conditionally loads single-precision floating point elements from a
3425 ///    memory location pointed to by \a __p into a 128-bit vector of
3434 ///    A pointer to a memory location that contains the single-precision
3437 ///    A 128-bit integer vector containing the mask. The most significant bit of
3438 ///    each data element represents the mask bits. If a mask bit is zero, the
3441 /// \returns A 128-bit vector of [4 x float] containing the loaded values.
3448 /// Conditionally loads single-precision floating point elements from a
3449 ///    memory location pointed to by \a __p into a 256-bit vector of
3458 ///    A pointer to a memory location that contains the single-precision
3461 ///    A 256-bit integer vector of [8 x dword] containing the mask. The most
3462 ///    significant bit of each dword element represents the mask bits. If a mask
3465 /// \returns A 256-bit vector of [8 x float] containing the loaded values.
3473 /// Moves single-precision floating point values from a 256-bit vector
3474 ///    of [8 x float] to a memory location pointed to by \a __p, according to
3482 ///    A pointer to a memory location that will receive the float values.
3484 ///    A 256-bit integer vector of [8 x dword] containing the mask. The most
3486 ///    mask bits. If a mask bit is zero, the corresponding value from vector
3487 ///    \a __a is not stored and the corresponding field in the memory location
3488 ///    pointed to by \a __p is not changed.
3490 ///    A 256-bit vector of [8 x float] containing the values to be stored.
3497 /// Moves double-precision values from a 128-bit vector of [2 x double]
3498 ///    to a memory location pointed to by \a __p, according to the specified
3506 ///    A pointer to a memory location that will receive the float values.
3508 ///    A 128-bit integer vector containing the mask. The most significant bit of
3509 ///    each field in the mask vector represents the mask bits. If a mask bit is
3510 ///    zero, the corresponding value from vector \a __a is not stored and the
3511 ///    corresponding field in the memory location pointed to by \a __p is not
3514 ///    A 128-bit vector of [2 x double] containing the values to be stored.
3521 /// Moves double-precision values from a 256-bit vector of [4 x double]
3522 ///    to a memory location pointed to by \a __p, according to the specified
3530 ///    A pointer to a memory location that will receive the float values.
3532 ///    A 256-bit integer vector of [4 x quadword] containing the mask. The most
3534 ///    the mask bits. If a mask bit is zero, the corresponding value from vector
3536 ///    pointed to by \a __p is not changed.
3538 ///    A 256-bit vector of [4 x double] containing the values to be stored.
3545 /// Moves single-precision floating point values from a 128-bit vector
3546 ///    of [4 x float] to a memory location pointed to by \a __p, according to
3554 ///    A pointer to a memory location that will receive the float values.
3556 ///    A 128-bit integer vector containing the mask. The most significant bit of
3557 ///    each field in the mask vector represents the mask bits. If a mask bit is
3559 ///    corresponding field in the memory location pointed to by \a __p is not
3562 ///    A 128-bit vector of [4 x float] containing the values to be stored.
3570 /// Moves integer data from a 256-bit integer vector to a 32-byte
3579 ///    A pointer to a 32-byte aligned memory location that will receive the
3582 ///    A 256-bit integer vector containing the values to be moved.
3590 /// Moves double-precision values from a 256-bit vector of [4 x double]
3591 ///    to a 32-byte aligned memory location. To minimize caching, the data is
3599 ///    A pointer to a 32-byte aligned memory location that will receive the
3602 ///    A 256-bit vector of [4 x double] containing the values to be moved.
3610 /// Moves single-precision floating point values from a 256-bit vector
3611 ///    of [8 x float] to a 32-byte aligned memory location. To minimize
3620 ///    A pointer to a 32-byte aligned memory location that will receive the
3623 ///    A 256-bit vector of [8 x float] containing the values to be moved.
3632 /// Create a 256-bit vector of [4 x double] with undefined values.
3638 /// \returns A 256-bit vector of [4 x double] containing undefined values.
3645 /// Create a 256-bit vector of [8 x float] with undefined values.
3651 /// \returns A 256-bit vector of [8 x float] containing undefined values.
3658 /// Create a 256-bit integer vector with undefined values.
3664 /// \returns A 256-bit integer vector containing undefined values.
3671 /// Constructs a 256-bit floating-point vector of [4 x double]
3680 ///    A double-precision floating-point value used to initialize bits [255:192]
3683 ///    A double-precision floating-point value used to initialize bits [191:128]
3686 ///    A double-precision floating-point value used to initialize bits [127:64]
3689 ///    A double-precision floating-point value used to initialize bits [63:0]
3698 /// Constructs a 256-bit floating-point vector of [8 x float] initialized
3703 /// This intrinsic is a utility function and does not correspond to a specific
3707 ///    A single-precision floating-point value used to initialize bits [255:224]
3710 ///    A single-precision floating-point value used to initialize bits [223:192]
3713 ///    A single-precision floating-point value used to initialize bits [191:160]
3716 ///    A single-precision floating-point value used to initialize bits [159:128]
3719 ///    A single-precision floating-point value used to initialize bits [127:96]
3722 ///    A single-precision floating-point value used to initialize bits [95:64]
3725 ///    A single-precision floating-point value used to initialize bits [63:32]
3728 ///    A single-precision floating-point value used to initialize bits [31:0]
3738 /// Constructs a 256-bit integer vector initialized with the specified
3743 /// This intrinsic is a utility function and does not correspond to a specific
3747 ///    A 32-bit integral value used to initialize bits [255:224] of the result.
3749 ///    A 32-bit integral value used to initialize bits [223:192] of the result.
3751 ///    A 32-bit integral value used to initialize bits [191:160] of the result.
3753 ///    A 32-bit integral value used to initialize bits [159:128] of the result.
3755 ///    A 32-bit integral value used to initialize bits [127:96] of the result.
3757 ///    A 32-bit integral value used to initialize bits [95:64] of the result.
3759 ///    A 32-bit integral value used to initialize bits [63:32] of the result.
3761 ///    A 32-bit integral value used to initialize bits [31:0] of the result.
3770 /// Constructs a 256-bit integer vector initialized with the specified
3775 /// This intrinsic is a utility function and does not correspond to a specific
3779 ///    A 16-bit integral value used to initialize bits [255:240] of the result.
3781 ///    A 16-bit integral value used to initialize bits [239:224] of the result.
3783 ///    A 16-bit integral value used to initialize bits [223:208] of the result.
3785 ///    A 16-bit integral value used to initialize bits [207:192] of the result.
3787 ///    A 16-bit integral value used to initialize bits [191:176] of the result.
3789 ///    A 16-bit integral value used to initialize bits [175:160] of the result.
3791 ///    A 16-bit integral value used to initialize bits [159:144] of the result.
3793 ///    A 16-bit integral value used to initialize bits [143:128] of the result.
3795 ///    A 16-bit integral value used to initialize bits [127:112] of the result.
3797 ///    A 16-bit integral value used to initialize bits [111:96] of the result.
3799 ///    A 16-bit integral value used to initialize bits [95:80] of the result.
3801 ///    A 16-bit integral value used to initialize bits [79:64] of the result.
3803 ///    A 16-bit integral value used to initialize bits [63:48] of the result.
3805 ///    A 16-bit integral value used to initialize bits [47:32] of the result.
3807 ///    A 16-bit integral value used to initialize bits [31:16] of the result.
3809 ///    A 16-bit integral value used to initialize bits [15:0] of the result.
3821 /// Constructs a 256-bit integer vector initialized with the specified
3826 /// This intrinsic is a utility function and does not correspond to a specific
3860 ///    An 8-bit integral value used to initialize bits [135:128] of the result.
3912 /// Constructs a 256-bit integer vector initialized with the specified
3921 ///    A 64-bit integral value used to initialize bits [255:192] of the result.
3923 ///    A 64-bit integral value used to initialize bits [191:128] of the result.
3925 ///    A 64-bit integral value used to initialize bits [127:64] of the result.
3927 ///    A 64-bit integral value used to initialize bits [63:0] of the result.
3936 /// Constructs a 256-bit floating-point vector of [4 x double],
3946 ///    A double-precision floating-point value used to initialize bits [63:0]
3949 ///    A double-precision floating-point value used to initialize bits [127:64]
3952 ///    A double-precision floating-point value used to initialize bits [191:128]
3955 ///    A double-precision floating-point value used to initialize bits [255:192]
3964 /// Constructs a 256-bit floating-point vector of [8 x float],
3970 /// This intrinsic is a utility function and does not correspond to a specific
3974 ///    A single-precision floating-point value used to initialize bits [31:0]
3977 ///    A single-precision floating-point value used to initialize bits [63:32]
3980 ///    A single-precision floating-point value used to initialize bits [95:64]
3983 ///    A single-precision floating-point value used to initialize bits [127:96]
3986 ///    A single-precision floating-point value used to initialize bits [159:128]
3989 ///    A single-precision floating-point value used to initialize bits [191:160]
3992 ///    A single-precision floating-point value used to initialize bits [223:192]
3995 ///    A single-precision floating-point value used to initialize bits [255:224]
4005 /// Constructs a 256-bit integer vector, initialized in reverse order
4010 /// This intrinsic is a utility function and does not correspond to a specific
4014 ///    A 32-bit integral value used to initialize bits [31:0] of the result.
4016 ///    A 32-bit integral value used to initialize bits [63:32] of the result.
4018 ///    A 32-bit integral value used to initialize bits [95:64] of the result.
4020 ///    A 32-bit integral value used to initialize bits [127:96] of the result.
4022 ///    A 32-bit integral value used to initialize bits [159:128] of the result.
4024 ///    A 32-bit integral value used to initialize bits [191:160] of the result.
4026 ///    A 32-bit integral value used to initialize bits [223:192] of the result.
4028 ///    A 32-bit integral value used to initialize bits [255:224] of the result.
4037 /// Constructs a 256-bit integer vector, initialized in reverse order
4042 /// This intrinsic is a utility function and does not correspond to a specific
4046 ///    A 16-bit integral value used to initialize bits [15:0] of the result.
4048 ///    A 16-bit integral value used to initialize bits [31:16] of the result.
4050 ///    A 16-bit integral value used to initialize bits [47:32] of the result.
4052 ///    A 16-bit integral value used to initialize bits [63:48] of the result.
4054 ///    A 16-bit integral value used to initialize bits [79:64] of the result.
4056 ///    A 16-bit integral value used to initialize bits [95:80] of the result.
4058 ///    A 16-bit integral value used to initialize bits [111:96] of the result.
4060 ///    A 16-bit integral value used to initialize bits [127:112] of the result.
4062 ///    A 16-bit integral value used to initialize bits [143:128] of the result.
4064 ///    A 16-bit integral value used to initialize bits [159:144] of the result.
4066 ///    A 16-bit integral value used to initialize bits [175:160] of the result.
4068 ///    A 16-bit integral value used to initialize bits [191:176] of the result.
4070 ///    A 16-bit integral value used to initialize bits [207:192] of the result.
4072 ///    A 16-bit integral value used to initialize bits [223:208] of the result.
4074 ///    A 16-bit integral value used to initialize bits [239:224] of the result.
4076 ///    A 16-bit integral value used to initialize bits [255:240] of the result.
4090 /// Constructs a 256-bit integer vector, initialized in reverse order
4095 /// This intrinsic is a utility function and does not correspond to a specific
4131 ///    An 8-bit integral value used to initialize bits [135:128] of the result.
4179 /// Constructs a 256-bit integer vector, initialized in reverse order
4188 ///    A 64-bit integral value used to initialize bits [63:0] of the result.
4190 ///    A 64-bit integral value used to initialize bits [127:64] of the result.
4192 ///    A 64-bit integral value used to initialize bits [191:128] of the result.
4194 ///    A 64-bit integral value used to initialize bits [255:192] of the result.
4203 /// Constructs a 256-bit floating-point vector of [4 x double], with each
4212 ///    A double-precision floating-point value used to initialize each vector
4221 /// Constructs a 256-bit floating-point vector of [8 x float], with each
4231 ///    A single-precision floating-point value used to initialize each vector
4240 /// Constructs a 256-bit integer vector of [8 x i32], with each of the
4250 ///    A 32-bit integral value used to initialize each vector element of the
4259 /// Constructs a 256-bit integer vector of [16 x i16], with each of the
4268 ///    A 16-bit integral value used to initialize each vector element of the
4278 /// Constructs a 256-bit integer vector of [32 x i8], with each of the
4298 /// Constructs a 256-bit integer vector of [4 x i64], with each of the
4307 ///    A 64-bit integral value used to initialize each vector element of the
4317 /// Constructs a 256-bit floating-point vector of [4 x double] with all
4324 /// \returns A 256-bit vector of [4 x double] with all elements set to zero.
4331 /// Constructs a 256-bit floating-point vector of [8 x float] with all
4338 /// \returns A 256-bit vector of [8 x float] with all elements set to zero.
4345 /// Constructs a 256-bit integer vector initialized to zero.
4351 /// \returns A 256-bit integer vector initialized to zero.
4359 /// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit
4367 ///    A 256-bit floating-point vector of [4 x double].
4368 /// \returns A 256-bit floating-point vector of [8 x float] containing the same
4376 /// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit
4384 ///    A 256-bit floating-point vector of [4 x double].
4385 /// \returns A 256-bit integer vector containing the same bitwise pattern as the
4393 /// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit
4401 ///    A 256-bit floating-point vector of [8 x float].
4402 /// \returns A 256-bit floating-point vector of [4 x double] containing the same
4410 /// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit
4418 ///    A 256-bit floating-point vector of [8 x float].
4419 /// \returns A 256-bit integer vector containing the same bitwise pattern as the
4427 /// Casts a 256-bit integer vector into a 256-bit floating-point vector
4435 ///    A 256-bit integer vector.
4436 /// \returns A 256-bit floating-point vector of [8 x float] containing the same
4444 /// Casts a 256-bit integer vector into a 256-bit floating-point vector
4452 ///    A 256-bit integer vector.
4453 /// \returns A 256-bit floating-point vector of [4 x double] containing the same
4461 /// Returns the lower 128 bits of a 256-bit floating-point vector of
4462 ///    [4 x double] as a 128-bit floating-point vector of [2 x double].
4469 ///    A 256-bit floating-point vector of [4 x double].
4470 /// \returns A 128-bit floating-point vector of [2 x double] containing the
4471 ///    lower 128 bits of the parameter.
4478 /// Returns the lower 128 bits of a 256-bit floating-point vector of
4479 ///    [8 x float] as a 128-bit floating-point vector of [4 x float].
4486 ///    A 256-bit floating-point vector of [8 x float].
4487 /// \returns A 128-bit floating-point vector of [4 x float] containing the
4488 ///    lower 128 bits of the parameter.
4495 /// Truncates a 256-bit integer vector into a 128-bit integer vector.
4502 ///    A 256-bit integer vector.
4503 /// \returns A 128-bit integer vector containing the lower 128 bits of the
4511 /// Constructs a 256-bit floating-point vector of [4 x double] from a
4512 ///    128-bit floating-point vector of [2 x double].
4514 ///    The lower 128 bits contain the value of the source vector. The contents
4515 ///    of the upper 128 bits are undefined.
4522 ///    A 128-bit vector of [2 x double].
4523 /// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits
4524 ///    contain the value of the parameter. The contents of the upper 128 bits
4533 /// Constructs a 256-bit floating-point vector of [8 x float] from a
4534 ///    128-bit floating-point vector of [4 x float].
4536 ///    The lower 128 bits contain the value of the source vector. The contents
4537 ///    of the upper 128 bits are undefined.
4544 ///    A 128-bit vector of [4 x float].
4545 /// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits
4546 ///    contain the value of the parameter. The contents of the upper 128 bits
4556 /// Constructs a 256-bit integer vector from a 128-bit integer vector.
4558 ///    The lower 128 bits contain the value of the source vector. The contents
4559 ///    of the upper 128 bits are undefined.
4566 ///    A 128-bit integer vector.
4567 /// \returns A 256-bit integer vector. The lower 128 bits contain the value of
4568 ///    the parameter. The contents of the upper 128 bits are undefined.
4576 /// Constructs a 256-bit floating-point vector of [4 x double] from a
4577 ///    128-bit floating-point vector of [2 x double]. The lower 128 bits
4578 ///    contain the value of the source vector. The upper 128 bits are set
4586 ///    A 128-bit vector of [2 x double].
4587 /// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits
4588 ///    contain the value of the parameter. The upper 128 bits are set to zero.
4595 /// Constructs a 256-bit floating-point vector of [8 x float] from a
4596 ///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
4597 ///    the value of the source vector. The upper 128 bits are set to zero.
4604 ///    A 128-bit vector of [4 x float].
4605 /// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits
4606 ///    contain the value of the parameter. The upper 128 bits are set to zero.
4613 /// Constructs a 256-bit integer vector from a 128-bit integer vector.
4614 ///    The lower 128 bits contain the value of the source vector. The upper
4615 ///    128 bits are set to zero.
4622 ///    A 128-bit integer vector.
4623 /// \returns A 256-bit integer vector. The lower 128 bits contain the value of
4624 ///    the parameter. The upper 128 bits are set to zero.
4634    invocations where the immediate M is a constant expression.
4636 /// Constructs a new 256-bit vector of [8 x float] by first duplicating
4637 ///    a 256-bit vector of [8 x float] given in the first parameter, and then
4638 ///    replacing either the upper or the lower 128 bits with the contents of a
4639 ///    128-bit vector of [4 x float] in the second parameter.
4642 ///    128 bits.
4653 ///    A 256-bit vector of [8 x float]. This vector is copied to the result
4654 ///    first, and then either the upper or the lower 128 bits of the result will
4655 ///    be replaced by the contents of \a V2.
4657 ///    A 128-bit vector of [4 x float]. The contents of this parameter are
4658 ///    written to either the upper or the lower 128 bits of the result depending
4659 ///    on the value of parameter \a M.
4663 ///    If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result,
4664 ///    and bits [255:128] of \a V1 are copied to bits [255:128] of the
4666 ///    If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the
4667 ///    result, and bits [127:0] of \a V1 are copied to bits [127:0] of the
4669 /// \returns A 256-bit vector of [8 x float] containing the interleaved values.
4674 /// Constructs a new 256-bit vector of [4 x double] by first duplicating
4675 ///    a 256-bit vector of [4 x double] given in the first parameter, and then
4676 ///    replacing either the upper or the lower 128 bits with the contents of a
4677 ///    128-bit vector of [2 x double] in the second parameter.
4680 ///    128 bits.
4691 ///    A 256-bit vector of [4 x double]. This vector is copied to the result
4692 ///    first, and then either the upper or the lower 128 bits of the result will
4693 ///    be replaced by the contents of \a V2.
4695 ///    A 128-bit vector of [2 x double]. The contents of this parameter are
4696 ///    written to either the upper or the lower 128 bits of the result depending
4697 ///    on the value of parameter \a M.
4701 ///    If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result,
4702 ///    and bits [255:128] of \a V1 are copied to bits [255:128] of the
4704 ///    If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the
4705 ///    result, and bits [127:0] of \a V1 are copied to bits [127:0] of the
4707 /// \returns A 256-bit vector of [4 x double] containing the interleaved values.
4712 /// Constructs a new 256-bit integer vector by first duplicating a
4714 ///    either the upper or the lower 128 bits with the contents of a 128-bit
4718 ///    128 bits.
4729 ///    A 256-bit integer vector. This vector is copied to the result first, and
4730 ///    then either the upper or the lower 128 bits of the result will be
4731 ///    replaced by the contents of \a V2.
4733 ///    A 128-bit integer vector. The contents of this parameter are written to
4734 ///    either the upper or the lower 128 bits of the result depending on the
4735 ///     value of parameter \a M.
4739 ///    If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result,
4740 ///    and bits [255:128] of \a V1 are copied to bits [255:128] of the
4742 ///    If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the
4743 ///    result, and bits [127:0] of \a V1 are copied to bits [127:0] of the
4745 /// \returns A 256-bit integer vector containing the interleaved values.
4753    invocations where the immediate M is a constant expression.
4755 /// Extracts either the upper or the lower 128 bits from a 256-bit vector
4757 ///    returns the extracted bits as a 128-bit vector of [4 x float].
4768 ///    A 256-bit vector of [8 x float].
4772 ///    If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the
4774 ///    If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
4775 /// \returns A 128-bit vector of [4 x float] containing the extracted bits.
4779 /// Extracts either the upper or the lower 128 bits from a 256-bit vector
4781 ///    returns the extracted bits as a 128-bit vector of [2 x double].
4792 ///    A 256-bit vector of [4 x double].
4796 ///    If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the
4798 ///    If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
4799 /// \returns A 128-bit vector of [2 x double] containing the extracted bits.
4803 /// Extracts either the upper or the lower 128 bits from a 256-bit
4805 ///    returns the extracted bits as a 128-bit integer vector.
4816 ///    A 256-bit integer vector.
4820 ///    If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the
4822 ///    If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
4823 /// \returns A 128-bit integer vector containing the extracted bits.
4827 /// Constructs a 256-bit floating-point vector of [8 x float] by
4828 ///    concatenating two 128-bit floating-point vectors of [4 x float].
4835 ///    A 128-bit floating-point vector of [4 x float] to be copied to the upper
4836 ///    128 bits of the result.
4838 ///    A 128-bit floating-point vector of [4 x float] to be copied to the lower
4839 ///    128 bits of the result.
4840 /// \returns A 256-bit floating-point vector of [8 x float] containing the
4848 /// Constructs a 256-bit floating-point vector of [4 x double] by
4849 ///    concatenating two 128-bit floating-point vectors of [2 x double].
4856 ///    A 128-bit floating-point vector of [2 x double] to be copied to the upper
4857 ///    128 bits of the result.
4859 ///    A 128-bit floating-point vector of [2 x double] to be copied to the lower
4860 ///    128 bits of the result.
4861 /// \returns A 256-bit floating-point vector of [4 x double] containing the
4869 /// Constructs a 256-bit integer vector by concatenating two 128-bit
4877 ///    A 128-bit integer vector to be copied to the upper 128 bits of the
4880 ///    A 128-bit integer vector to be copied to the lower 128 bits of the
4882 /// \returns A 256-bit integer vector containing the concatenated result.
4889 /// Constructs a 256-bit floating-point vector of [8 x float] by
4890 ///    concatenating two 128-bit floating-point vectors of [4 x float]. This is
4899 ///    A 128-bit floating-point vector of [4 x float] to be copied to the lower
4900 ///    128 bits of the result.
4902 ///    A 128-bit floating-point vector of [4 x float] to be copied to the upper
4903 ///    128 bits of the result.
4904 /// \returns A 256-bit floating-point vector of [8 x float] containing the
4912 /// Constructs a 256-bit floating-point vector of [4 x double] by
4913 ///    concatenating two 128-bit floating-point vectors of [2 x double]. This is
4922 ///    A 128-bit floating-point vector of [2 x double] to be copied to the lower
4923 ///    128 bits of the result.
4925 ///    A 128-bit floating-point vector of [2 x double] to be copied to the upper
4926 ///    128 bits of the result.
4927 /// \returns A 256-bit floating-point vector of [4 x double] containing the
4935 /// Constructs a 256-bit integer vector by concatenating two 128-bit
4944 ///    A 128-bit integer vector to be copied to the lower 128 bits of the
4947 ///    A 128-bit integer vector to be copied to the upper 128 bits of the
4949 /// \returns A 256-bit integer vector containing the concatenated result.
4957 /// Loads two 128-bit floating-point vectors of [4 x float] from
4958 ///    unaligned memory locations and constructs a 256-bit floating-point vector
4959 ///    of [8 x float] by concatenating the two 128-bit vectors.
4967 ///    A pointer to a 128-bit memory location containing 4 consecutive
4969 ///    bits[255:128] of the result. The address of the memory location does not
4972 ///    A pointer to a 128-bit memory location containing 4 consecutive
4976 /// \returns A 256-bit floating-point vector of [8 x float] containing the
4984 /// Loads two 128-bit floating-point vectors of [2 x double] from
4985 ///    unaligned memory locations and constructs a 256-bit floating-point vector
4986 ///    of [4 x double] by concatenating the two 128-bit vectors.
4994 ///    A pointer to a 128-bit memory location containing two consecutive
4996 ///    bits[255:128] of the result. The address of the memory location does not
4999 ///    A pointer to a 128-bit memory location containing two consecutive
5003 /// \returns A 256-bit floating-point vector of [4 x double] containing the
5011 /// Loads two 128-bit integer vectors from unaligned memory locations and
5012 ///    constructs a 256-bit integer vector by concatenating the two 128-bit
5021 ///    A pointer to a 128-bit memory location containing a 128-bit integer
5022 ///    vector. This vector is to be copied to bits[255:128] of the result. The
5025 ///    A pointer to a 128-bit memory location containing a 128-bit integer
5028 /// \returns A 256-bit integer vector containing the concatenated result.
5036 /// Stores the upper and lower 128 bits of a 256-bit floating-point
5045 ///    A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5049 ///    A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5053 ///    A 256-bit floating-point vector of [8 x float].
5065 /// Stores the upper and lower 128 bits of a 256-bit floating-point
5074 ///    A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5078 ///    A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5082 ///    A 256-bit floating-point vector of [4 x double].
5094 /// Stores the upper and lower 128 bits of a 256-bit integer vector into
5103 ///    A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5107 ///    A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5111 ///    A 256-bit integer vector.