Lines Matching +full:64 +full:- +full:bit
1 /*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
36 __attribute__((__always_inline__, __nodebug__, __target__("sse,no-evex512"), \
40 __target__("mmx,sse,no-evex512"), __min_vector_width__(64)))
42 /// Adds the 32-bit float values in the low-order bits of the operands.
49 /// A 128-bit vector of [4 x float] containing one of the source operands.
52 /// A 128-bit vector of [4 x float] containing one of the source operands.
54 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum
64 /// Adds two 128-bit vectors of [4 x float], and returns the results of
72 /// A 128-bit vector of [4 x float] containing one of the source operands.
74 /// A 128-bit vector of [4 x float] containing one of the source operands.
75 /// \returns A 128-bit vector of [4 x float] containing the sums of both
83 /// Subtracts the 32-bit float value in the low-order bits of the second
91 /// A 128-bit vector of [4 x float] containing the minuend. The lower 32 bits
94 /// A 128-bit vector of [4 x float] containing the subtrahend. The lower 32
96 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
102 __a[0] -= __b[0]; in _mm_sub_ss()
107 /// operand, both of which are 128-bit vectors of [4 x float] and returns
115 /// A 128-bit vector of [4 x float] containing the minuend.
117 /// A 128-bit vector of [4 x float] containing the subtrahend.
118 /// \returns A 128-bit vector of [4 x float] containing the differences between
123 return (__m128)((__v4sf)__a - (__v4sf)__b); in _mm_sub_ps()
126 /// Multiplies two 32-bit float values in the low-order bits of the
134 /// A 128-bit vector of [4 x float] containing one of the source operands.
137 /// A 128-bit vector of [4 x float] containing one of the source operands.
139 /// \returns A 128-bit vector of [4 x float] containing the product of the lower
149 /// Multiplies two 128-bit vectors of [4 x float] and returns the
157 /// A 128-bit vector of [4 x float] containing one of the source operands.
159 /// A 128-bit vector of [4 x float] containing one of the source operands.
160 /// \returns A 128-bit vector of [4 x float] containing the products of both
168 /// Divides the value in the low-order 32 bits of the first operand by
176 /// A 128-bit vector of [4 x float] containing the dividend. The lower 32
179 /// A 128-bit vector of [4 x float] containing the divisor. The lower 32 bits
181 /// \returns A 128-bit vector of [4 x float] containing the quotients of the
191 /// Divides two 128-bit vectors of [4 x float].
198 /// A 128-bit vector of [4 x float] containing the dividend.
200 /// A 128-bit vector of [4 x float] containing the divisor.
201 /// \returns A 128-bit vector of [4 x float] containing the quotients of both
209 /// Calculates the square root of the value stored in the low-order bits
210 /// of a 128-bit vector of [4 x float].
217 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
219 /// \returns A 128-bit vector of [4 x float] containing the square root of the
220 /// value in the low-order bits of the operand.
227 /// Calculates the square roots of the values stored in a 128-bit vector
235 /// A 128-bit vector of [4 x float].
236 /// \returns A 128-bit vector of [4 x float] containing the square roots of the
245 /// low-order bits of a 128-bit vector of [4 x float].
252 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
254 /// \returns A 128-bit vector of [4 x float] containing the approximate
255 /// reciprocal of the value in the low-order bits of the operand.
263 /// 128-bit vector of [4 x float].
270 /// A 128-bit vector of [4 x float].
271 /// \returns A 128-bit vector of [4 x float] containing the approximate
280 /// stored in the low-order bits of a 128-bit vector of [4 x float].
287 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
289 /// \returns A 128-bit vector of [4 x float] containing the approximate
290 /// reciprocal of the square root of the value in the low-order bits of the
299 /// values stored in a 128-bit vector of [4 x float].
306 /// A 128-bit vector of [4 x float].
307 /// \returns A 128-bit vector of [4 x float] containing the approximate
315 /// Compares two 32-bit float values in the low-order bits of both
316 /// operands and returns the lesser value in the low-order bits of the
326 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
329 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
331 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
340 /// Compares two 128-bit vectors of [4 x float] and returns the lesser
350 /// A 128-bit vector of [4 x float] containing one of the operands.
352 /// A 128-bit vector of [4 x float] containing one of the operands.
353 /// \returns A 128-bit vector of [4 x float] containing the minimum values
361 /// Compares two 32-bit float values in the low-order bits of both
362 /// operands and returns the greater value in the low-order bits of a 128-bit
372 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
375 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
377 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
386 /// Compares two 128-bit vectors of [4 x float] and returns the greater
396 /// A 128-bit vector of [4 x float] containing one of the operands.
398 /// A 128-bit vector of [4 x float] containing one of the operands.
399 /// \returns A 128-bit vector of [4 x float] containing the maximum values
407 /// Performs a bitwise AND of two 128-bit vectors of [4 x float].
414 /// A 128-bit vector containing one of the source operands.
416 /// A 128-bit vector containing one of the source operands.
417 /// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the
425 /// Performs a bitwise AND of two 128-bit vectors of [4 x float], using
434 /// A 128-bit vector of [4 x float] containing the first source operand. The
437 /// A 128-bit vector of [4 x float] containing the second source operand.
438 /// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the
447 /// Performs a bitwise OR of two 128-bit vectors of [4 x float].
454 /// A 128-bit vector of [4 x float] containing one of the source operands.
456 /// A 128-bit vector of [4 x float] containing one of the source operands.
457 /// \returns A 128-bit vector of [4 x float] containing the bitwise OR of the
465 /// Performs a bitwise exclusive OR of two 128-bit vectors of
473 /// A 128-bit vector of [4 x float] containing one of the source operands.
475 /// A 128-bit vector of [4 x float] containing one of the source operands.
476 /// \returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR
484 /// Compares two 32-bit float values in the low-order bits of both
488 /// low-order bits of a vector [4 x float].
496 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
499 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
501 /// \returns A 128-bit vector of [4 x float] containing the comparison results
502 /// in the low-order bits.
509 /// Compares each of the corresponding 32-bit float values of the
510 /// 128-bit vectors of [4 x float] for equality.
520 /// A 128-bit vector of [4 x float].
522 /// A 128-bit vector of [4 x float].
523 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
530 /// Compares two 32-bit float values in the low-order bits of both
535 /// low-order bits of a vector of [4 x float].
543 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
546 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
548 /// \returns A 128-bit vector of [4 x float] containing the comparison results
549 /// in the low-order bits.
556 /// Compares each of the corresponding 32-bit float values of the
557 /// 128-bit vectors of [4 x float] to determine if the values in the first
568 /// A 128-bit vector of [4 x float].
570 /// A 128-bit vector of [4 x float].
571 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
578 /// Compares two 32-bit float values in the low-order bits of both
583 /// the low-order bits of a vector of [4 x float].
591 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
594 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
596 /// \returns A 128-bit vector of [4 x float] containing the comparison results
597 /// in the low-order bits.
604 /// Compares each of the corresponding 32-bit float values of the
605 /// 128-bit vectors of [4 x float] to determine if the values in the first
616 /// A 128-bit vector of [4 x float].
618 /// A 128-bit vector of [4 x float].
619 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
626 /// Compares two 32-bit float values in the low-order bits of both
631 /// low-order bits of a vector of [4 x float].
639 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
642 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
644 /// \returns A 128-bit vector of [4 x float] containing the comparison results
645 /// in the low-order bits.
654 /// Compares each of the corresponding 32-bit float values of the
655 /// 128-bit vectors of [4 x float] to determine if the values in the first
666 /// A 128-bit vector of [4 x float].
668 /// A 128-bit vector of [4 x float].
669 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
676 /// Compares two 32-bit float values in the low-order bits of both
681 /// low-order bits of a vector of [4 x float].
689 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
692 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
694 /// \returns A 128-bit vector of [4 x float] containing the comparison results
695 /// in the low-order bits.
704 /// Compares each of the corresponding 32-bit float values of the
705 /// 128-bit vectors of [4 x float] to determine if the values in the first
716 /// A 128-bit vector of [4 x float].
718 /// A 128-bit vector of [4 x float].
719 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
726 /// Compares two 32-bit float values in the low-order bits of both operands
730 /// low-order bits of a vector of [4 x float].
739 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
742 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
744 /// \returns A 128-bit vector of [4 x float] containing the comparison results
745 /// in the low-order bits.
752 /// Compares each of the corresponding 32-bit float values of the
753 /// 128-bit vectors of [4 x float] for inequality.
764 /// A 128-bit vector of [4 x float].
766 /// A 128-bit vector of [4 x float].
767 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
774 /// Compares two 32-bit float values in the low-order bits of both
779 /// low-order bits of a vector of [4 x float].
788 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
791 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
793 /// \returns A 128-bit vector of [4 x float] containing the comparison results
794 /// in the low-order bits.
801 /// Compares each of the corresponding 32-bit float values of the
802 /// 128-bit vectors of [4 x float] to determine if the values in the first
814 /// A 128-bit vector of [4 x float].
816 /// A 128-bit vector of [4 x float].
817 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
824 /// Compares two 32-bit float values in the low-order bits of both
829 /// low-order bits of a vector of [4 x float].
838 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
841 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
843 /// \returns A 128-bit vector of [4 x float] containing the comparison results
844 /// in the low-order bits.
851 /// Compares each of the corresponding 32-bit float values of the
852 /// 128-bit vectors of [4 x float] to determine if the values in the first
864 /// A 128-bit vector of [4 x float].
866 /// A 128-bit vector of [4 x float].
867 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
874 /// Compares two 32-bit float values in the low-order bits of both
879 /// low-order bits of a vector of [4 x float].
888 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
891 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
893 /// \returns A 128-bit vector of [4 x float] containing the comparison results
894 /// in the low-order bits.
903 /// Compares each of the corresponding 32-bit float values of the
904 /// 128-bit vectors of [4 x float] to determine if the values in the first
916 /// A 128-bit vector of [4 x float].
918 /// A 128-bit vector of [4 x float].
919 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
926 /// Compares two 32-bit float values in the low-order bits of both
931 /// low-order bits of a vector of [4 x float].
940 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
943 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
945 /// \returns A 128-bit vector of [4 x float] containing the comparison results
946 /// in the low-order bits.
955 /// Compares each of the corresponding 32-bit float values of the
956 /// 128-bit vectors of [4 x float] to determine if the values in the first
968 /// A 128-bit vector of [4 x float].
970 /// A 128-bit vector of [4 x float].
971 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
978 /// Compares two 32-bit float values in the low-order bits of both
982 /// A pair of floating-point values are ordered with respect to each
992 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
995 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
997 /// \returns A 128-bit vector of [4 x float] containing the comparison results
998 /// in the low-order bits.
1005 /// Compares each of the corresponding 32-bit float values of the
1006 /// 128-bit vectors of [4 x float] to determine if the values in the first
1009 /// A pair of floating-point values are ordered with respect to each
1019 /// A 128-bit vector of [4 x float].
1021 /// A 128-bit vector of [4 x float].
1022 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
1029 /// Compares two 32-bit float values in the low-order bits of both
1033 /// A pair of double-precision values are unordered with respect to each
1043 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
1046 /// A 128-bit vector of [4 x float] containing one of the operands. The lower
1048 /// \returns A 128-bit vector of [4 x float] containing the comparison results
1049 /// in the low-order bits.
1056 /// Compares each of the corresponding 32-bit float values of the
1057 /// 128-bit vectors of [4 x float] to determine if the values in the first
1060 /// A pair of double-precision values are unordered with respect to each
1070 /// A 128-bit vector of [4 x float].
1072 /// A 128-bit vector of [4 x float].
1073 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
1080 /// Compares two 32-bit float values in the low-order bits of both
1092 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1095 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1104 /// Compares two 32-bit float values in the low-order bits of both
1117 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1120 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1129 /// Compares two 32-bit float values in the low-order bits of both
1141 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1144 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1153 /// Compares two 32-bit float values in the low-order bits of both
1165 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1168 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1177 /// Compares two 32-bit float values in the low-order bits of both
1189 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1192 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1201 /// Compares two 32-bit float values in the low-order bits of both
1213 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1216 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1225 /// Performs an unordered comparison of two 32-bit float values using
1226 /// the low-order bits of both operands to determine equality.
1236 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1239 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1248 /// Performs an unordered comparison of two 32-bit float values using
1249 /// the low-order bits of both operands to determine if the first operand is
1260 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1263 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1272 /// Performs an unordered comparison of two 32-bit float values using
1273 /// the low-order bits of both operands to determine if the first operand is
1284 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1287 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1296 /// Performs an unordered comparison of two 32-bit float values using
1297 /// the low-order bits of both operands to determine if the first operand is
1308 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1311 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1320 /// Performs an unordered comparison of two 32-bit float values using
1321 /// the low-order bits of both operands to determine if the first operand is
1332 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1335 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1344 /// Performs an unordered comparison of two 32-bit float values using
1345 /// the low-order bits of both operands to determine inequality.
1355 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1358 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1368 /// [4 x float] into a 32-bit integer.
1370 /// If the converted value does not fit in a 32-bit integer, raises a
1371 /// floating-point invalid exception. If the exception is masked, returns
1380 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1382 /// \returns A 32-bit integer containing the converted value.
1390 /// [4 x float] into a 32-bit integer.
1392 /// If the converted value does not fit in a 32-bit integer, raises a
1393 /// floating-point invalid exception. If the exception is masked, returns
1402 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1404 /// \returns A 32-bit integer containing the converted value.
1414 /// [4 x float] into a 64-bit integer.
1416 /// If the converted value does not fit in a 32-bit integer, raises a
1417 /// floating-point invalid exception. If the exception is masked, returns
1426 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1428 /// \returns A 64-bit integer containing the converted value.
1437 /// Converts two low-order float values in a 128-bit vector of
1438 /// [4 x float] into a 64-bit vector of [2 x i32].
1440 /// If a converted value does not fit in a 32-bit integer, raises a
1441 /// floating-point invalid exception. If the exception is masked, returns
1449 /// A 128-bit vector of [4 x float].
1450 /// \returns A 64-bit integer vector containing the converted values.
1457 /// Converts two low-order float values in a 128-bit vector of
1458 /// [4 x float] into a 64-bit vector of [2 x i32].
1460 /// If a converted value does not fit in a 32-bit integer, raises a
1461 /// floating-point invalid exception. If the exception is masked, returns
1469 /// A 128-bit vector of [4 x float].
1470 /// \returns A 64-bit integer vector containing the converted values.
1478 /// truncated (rounded toward zero) 32-bit integer.
1480 /// If the converted value does not fit in a 32-bit integer, raises a
1481 /// floating-point invalid exception. If the exception is masked, returns
1490 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1492 /// \returns A 32-bit integer containing the converted value.
1500 /// truncated (rounded toward zero) 32-bit integer.
1502 /// If the converted value does not fit in a 32-bit integer, raises a
1503 /// floating-point invalid exception. If the exception is masked, returns
1512 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1514 /// \returns A 32-bit integer containing the converted value.
1523 /// truncated (rounded toward zero) 64-bit integer.
1525 /// If the converted value does not fit in a 64-bit integer, raises a
1526 /// floating-point invalid exception. If the exception is masked, returns
1535 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1537 /// \returns A 64-bit integer containing the converted value.
1545 /// Converts the lower (first) two elements of a 128-bit vector of [4 x float]
1546 /// into two signed truncated (rounded toward zero) 32-bit integers,
1547 /// returned in a 64-bit vector of [2 x i32].
1549 /// If a converted value does not fit in a 32-bit integer, raises a
1550 /// floating-point invalid exception. If the exception is masked, returns
1559 /// A 128-bit vector of [4 x float].
1560 /// \returns A 64-bit integer vector containing the converted values.
1567 /// Converts the lower (first) two elements of a 128-bit vector of [4 x float]
1568 /// into two signed truncated (rounded toward zero) 64-bit integers,
1569 /// returned in a 64-bit vector of [2 x i32].
1571 /// If a converted value does not fit in a 32-bit integer, raises a
1572 /// floating-point invalid exception. If the exception is masked, returns
1580 /// A 128-bit vector of [4 x float].
1581 /// \returns A 64-bit integer vector containing the converted values.
1588 /// Converts a 32-bit signed integer value into a floating point value
1598 /// A 128-bit vector of [4 x float].
1600 /// A 32-bit signed integer operand containing the value to be converted.
1601 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
1611 /// Converts a 32-bit signed integer value into a floating point value
1621 /// A 128-bit vector of [4 x float].
1623 /// A 32-bit signed integer operand containing the value to be converted.
1624 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
1635 /// Converts a 64-bit signed integer value into a floating point value
1645 /// A 128-bit vector of [4 x float].
1647 /// A 64-bit signed integer operand containing the value to be converted.
1648 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
1660 /// Converts two elements of a 64-bit vector of [2 x i32] into two
1661 /// floating point values and writes them to the lower 64-bits of the
1670 /// A 128-bit vector of [4 x float].
1672 /// A 64-bit vector of [2 x i32]. The elements in this vector are converted
1673 /// and written to the corresponding low-order elements in the destination.
1674 /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
1675 /// converted value of the second operand. The upper 64 bits are copied from
1676 /// the upper 64 bits of the first operand.
1683 /// Converts two elements of a 64-bit vector of [2 x i32] into two
1684 /// floating point values and writes them to the lower 64-bits of the
1693 /// A 128-bit vector of [4 x float].
1695 /// A 64-bit vector of [2 x i32]. The elements in this vector are converted
1696 /// and written to the corresponding low-order elements in the destination.
1697 /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
1698 /// converted value from the second operand. The upper 64 bits are copied
1699 /// from the upper 64 bits of the first operand.
1714 /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1716 /// \returns A 32-bit float containing the extracted value.
1724 /// high-order bits of a 128-bit vector of [4 x float]. The low-order bits
1725 /// are copied from the low-order bits of the first operand.
1732 /// A 128-bit vector of [4 x float]. Bits [63:0] are written to bits [63:0]
1736 /// [127:64] of the destination.
1737 /// \returns A 128-bit vector of [4 x float] containing the moved values.
1745 __mm_loadh_pi_v2f32 __b = ((const struct __mm_loadh_pi_struct*)__p)->__u; in _mm_loadh_pi()
1751 /// low-order bits of a 128-bit vector of [4 x float]. The high-order bits
1752 /// are copied from the high-order bits of the first operand.
1759 /// A 128-bit vector of [4 x float]. Bits [127:64] are written to bits
1760 /// [127:64] of the destination.
1764 /// \returns A 128-bit vector of [4 x float] containing the moved values.
1772 __mm_loadl_pi_v2f32 __b = ((const struct __mm_loadl_pi_struct*)__p)->__u; in _mm_loadl_pi()
1777 /// Constructs a 128-bit floating-point vector of [4 x float]. The lower
1778 /// 32 bits of the vector are initialized with the single-precision
1779 /// floating-point value loaded from a specified memory location. The upper
1787 /// A pointer to a 32-bit memory location containing a single-precision
1788 /// floating-point value.
1789 /// \returns An initialized 128-bit floating-point vector of [4 x float]. The
1798 float __u = ((const struct __mm_load_ss_struct*)__p)->__u; in _mm_load_ss()
1802 /// Loads a 32-bit float value and duplicates it to all four vector
1803 /// elements of a 128-bit vector of [4 x float].
1812 /// \returns A 128-bit vector of [4 x float] containing the loaded and
1820 float __u = ((const struct __mm_load1_ps_struct*)__p)->__u; in _mm_load1_ps()
1826 /// Loads a 128-bit floating-point vector of [4 x float] from an aligned
1834 /// A pointer to a 128-bit memory location. The address of the memory
1835 /// location has to be 128-bit aligned.
1836 /// \returns A 128-bit vector of [4 x float] containing the loaded values.
1843 /// Loads a 128-bit floating-point vector of [4 x float] from an
1851 /// A pointer to a 128-bit memory location. The address of the memory
1853 /// \returns A 128-bit vector of [4 x float] containing the loaded values.
1860 return ((const struct __loadu_ps*)__p)->__v; in _mm_loadu_ps()
1864 /// memory location to 32-bit elements in a 128-bit vector of [4 x float].
1872 /// A pointer to a 128-bit memory location. The address of the memory
1873 /// location has to be 128-bit aligned.
1874 /// \returns A 128-bit vector of [4 x float] containing the moved values, loaded
1883 /// Create a 128-bit vector of [4 x float] with undefined values.
1889 /// \returns A 128-bit vector of [4 x float] containing undefined values.
1896 /// Constructs a 128-bit floating-point vector of [4 x float]. The lower
1897 /// 32 bits of the vector are initialized with the specified single-precision
1898 /// floating-point value. The upper 96 bits are set to zero.
1905 /// A single-precision floating-point value used to initialize the lower 32
1907 /// \returns An initialized 128-bit floating-point vector of [4 x float]. The
1916 /// Constructs a 128-bit floating-point vector of [4 x float], with each
1917 /// of the four single-precision floating-point vector elements set to the
1918 /// specified single-precision floating-point value.
1925 /// A single-precision floating-point value used to initialize each vector
1927 /// \returns An initialized 128-bit floating-point vector of [4 x float].
1935 /// Constructs a 128-bit floating-point vector of [4 x float], with each
1936 /// of the four single-precision floating-point vector elements set to the
1937 /// specified single-precision floating-point value.
1944 /// A single-precision floating-point value used to initialize each vector
1946 /// \returns An initialized 128-bit floating-point vector of [4 x float].
1953 /// Constructs a 128-bit floating-point vector of [4 x float]
1954 /// initialized with the specified single-precision floating-point values.
1962 /// A single-precision floating-point value used to initialize bits [127:96]
1965 /// A single-precision floating-point value used to initialize bits [95:64]
1968 /// A single-precision floating-point value used to initialize bits [63:32]
1971 /// A single-precision floating-point value used to initialize bits [31:0]
1973 /// \returns An initialized 128-bit floating-point vector of [4 x float].
1980 /// Constructs a 128-bit floating-point vector of [4 x float],
1981 /// initialized in reverse order with the specified 32-bit single-precision
1982 /// float-point values.
1990 /// A single-precision floating-point value used to initialize bits [31:0]
1993 /// A single-precision floating-point value used to initialize bits [63:32]
1996 /// A single-precision floating-point value used to initialize bits [95:64]
1999 /// A single-precision floating-point value used to initialize bits [127:96]
2001 /// \returns An initialized 128-bit floating-point vector of [4 x float].
2008 /// Constructs a 128-bit floating-point vector of [4 x float] initialized
2015 /// \returns An initialized 128-bit floating-point vector of [4 x float] with
2023 /// Stores the upper 64 bits of a 128-bit vector of [4 x float] to a
2031 /// A pointer to a 64-bit memory location.
2033 /// A 128-bit vector of [4 x float] containing the values to be stored.
2041 ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 2, 3); in _mm_storeh_pi()
2044 /// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a
2054 /// A 128-bit vector of [4 x float] containing the values to be stored.
2062 ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 0, 1); in _mm_storel_pi()
2065 /// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a
2073 /// A pointer to a 32-bit memory location.
2075 /// A 128-bit vector of [4 x float] containing the value to be stored.
2082 ((struct __mm_store_ss_struct*)__p)->__u = __a[0]; in _mm_store_ss()
2085 /// Stores a 128-bit vector of [4 x float] to an unaligned memory
2093 /// A pointer to a 128-bit memory location. The address of the memory
2096 /// A 128-bit vector of [4 x float] containing the values to be stored.
2103 ((struct __storeu_ps*)__p)->__v = __a; in _mm_storeu_ps()
2106 /// Stores a 128-bit vector of [4 x float] into an aligned memory
2114 /// A pointer to a 128-bit memory location. The address of the memory
2115 /// location has to be 16-byte aligned.
2117 /// A 128-bit vector of [4 x float] containing the values to be stored.
2124 /// Stores the lower 32 bits of a 128-bit vector of [4 x float] into
2133 /// A pointer to a 128-bit memory location.
2135 /// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each
2144 /// Stores the lower 32 bits of a 128-bit vector of [4 x float] into
2153 /// A pointer to a 128-bit memory location.
2155 /// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each
2163 /// Stores float values from a 128-bit vector of [4 x float] to an
2172 /// A pointer to a 128-bit memory location. The address of the memory
2173 /// location has to be 128-bit aligned.
2175 /// A 128-bit vector of [4 x float] containing the values to be stored.
2210 /// _MM_HINT_NTA: Move data using the non-temporal access (NTA) hint. The
2222 /// Stores a 64-bit integer in the specified aligned memory location. To
2223 /// minimize caching, the data is flagged as non-temporal (unlikely to be
2233 /// A 64-bit integer containing the value to be stored.
2240 /// Moves packed float values from a 128-bit vector of [4 x float] to a
2241 /// 128-bit aligned memory location. To minimize caching, the data is flagged
2242 /// as non-temporal (unlikely to be used again soon).
2249 /// A pointer to a 128-bit aligned memory location that will receive the
2250 /// single-precision floating-point values.
2252 /// A 128-bit vector of [4 x float] containing the values to be moved.
2278 /// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
2290 /// A 64-bit vector of [4 x i16].
2297 /// \returns A 16-bit integer containing the extracted 16 bits of packed data.
2301 /// Copies data from the 64-bit vector of [4 x i16] to the destination,
2302 /// and inserts the lower 16-bits of an integer operand at the 16-bit offset
2314 /// A 64-bit vector of [4 x i16].
2316 /// An integer. The lower 16-bit value from this operand is written to the
2327 /// \returns A 64-bit integer vector containing the copied packed data from the
2332 /// Compares each of the corresponding packed 16-bit integer values of
2333 /// the 64-bit integer vectors, and writes the greater value to the
2341 /// A 64-bit integer vector containing one of the source operands.
2343 /// A 64-bit integer vector containing one of the source operands.
2344 /// \returns A 64-bit integer vector containing the comparison results.
2351 /// Compares each of the corresponding packed 8-bit unsigned integer
2352 /// values of the 64-bit integer vectors, and writes the greater value to the
2360 /// A 64-bit integer vector containing one of the source operands.
2362 /// A 64-bit integer vector containing one of the source operands.
2363 /// \returns A 64-bit integer vector containing the comparison results.
2370 /// Compares each of the corresponding packed 16-bit integer values of
2371 /// the 64-bit integer vectors, and writes the lesser value to the
2379 /// A 64-bit integer vector containing one of the source operands.
2381 /// A 64-bit integer vector containing one of the source operands.
2382 /// \returns A 64-bit integer vector containing the comparison results.
2389 /// Compares each of the corresponding packed 8-bit unsigned integer
2390 /// values of the 64-bit integer vectors, and writes the lesser value to the
2398 /// A 64-bit integer vector containing one of the source operands.
2400 /// A 64-bit integer vector containing one of the source operands.
2401 /// \returns A 64-bit integer vector containing the comparison results.
2408 /// Takes the most significant bit from each 8-bit element in a 64-bit
2409 /// integer vector to create an 8-bit mask value. Zero-extends the value to
2410 /// 32-bit integer and writes it to the destination.
2417 /// A 64-bit integer vector containing the values with bits to be extracted.
2418 /// \returns The most significant bit from each 8-bit element in \a __a,
2426 /// Multiplies packed 16-bit unsigned integer values and writes the
2427 /// high-order 16 bits of each 32-bit product to the corresponding bits in
2435 /// A 64-bit integer vector containing one of the source operands.
2437 /// A 64-bit integer vector containing one of the source operands.
2438 /// \returns A 64-bit integer vector containing the products of both operands.
2445 /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
2457 /// A 64-bit integer vector containing the values to be shuffled.
2459 /// An immediate value containing an 8-bit value specifying which elements to
2460 /// copy from \a a. The destinations within the 64-bit destination are
2470 /// Bit value assignments: \n
2476 /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
2478 /// \returns A 64-bit integer vector containing the shuffled values.
2482 /// Conditionally copies the values from each 8-bit element in the first
2483 /// 64-bit integer vector operand to the specified memory location, as
2484 /// specified by the most significant bit in the corresponding element in the
2485 /// second 64-bit integer vector operand.
2487 /// To minimize caching, the data is flagged as non-temporal
2495 /// A 64-bit integer vector containing the values with elements to be copied.
2497 /// A 64-bit integer vector operand. The most significant bit from each 8-bit
2499 /// is copied. If the most significant bit of a given element is 1, the
2502 /// A pointer to a 64-bit memory location that will receive the conditionally
2511 /// Computes the rounded averages of the packed unsigned 8-bit integer
2520 /// A 64-bit integer vector containing one of the source operands.
2522 /// A 64-bit integer vector containing one of the source operands.
2523 /// \returns A 64-bit integer vector containing the averages of both operands.
2530 /// Computes the rounded averages of the packed unsigned 16-bit integer
2539 /// A 64-bit integer vector containing one of the source operands.
2541 /// A 64-bit integer vector containing one of the source operands.
2542 /// \returns A 64-bit integer vector containing the averages of both operands.
2549 /// Subtracts the corresponding 8-bit unsigned integer values of the two
2550 /// 64-bit vector operands and computes the absolute value for each of the
2559 /// A 64-bit integer vector containing one of the source operands.
2561 /// A 64-bit integer vector containing one of the source operands.
2562 /// \returns A 64-bit integer vector whose lower 16 bits contain the sums of the
2575 /// Returns the contents of the MXCSR register as a 32-bit unsigned
2598 /// For checking flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.
2602 /// For checking denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,
2623 /// \returns A 32-bit unsigned integer containing the contents of the MXCSR
2627 /// Sets the MXCSR register with the 32-bit unsigned integer value.
2650 /// For setting flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.
2655 /// For setting denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,
2661 /// For example, the following expression causes subsequent floating-point
2678 /// A 32-bit unsigned integer value to be written to the MXCSR register.
2685 /// Selects 4 float values from the 128-bit operands of [4 x float], as
2697 /// A 128-bit vector of [4 x float].
2699 /// A 128-bit vector of [4 x float].
2701 /// An immediate value containing an 8-bit value specifying which elements to
2705 /// The destinations within the 128-bit destination are assigned values as
2711 /// Bits [5:4] are used to assign values to bits [95:64] in the
2715 /// Bit value assignments: \n
2718 /// 10: Bits [95:64] copied from the specified operand. \n
2721 /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
2723 /// \returns A 128-bit vector of [4 x float] containing the shuffled values.
2728 /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
2729 /// [4 x float] and interleaves them into a 128-bit vector of [4 x float].
2736 /// A 128-bit vector of [4 x float]. \n
2737 /// Bits [95:64] are written to bits [31:0] of the destination. \n
2738 /// Bits [127:96] are written to bits [95:64] of the destination.
2740 /// A 128-bit vector of [4 x float].
2741 /// Bits [95:64] are written to bits [63:32] of the destination. \n
2743 /// \returns A 128-bit vector of [4 x float] containing the interleaved values.
2750 /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of
2751 /// [4 x float] and interleaves them into a 128-bit vector of [4 x float].
2758 /// A 128-bit vector of [4 x float]. \n
2760 /// Bits [63:32] are written to bits [95:64] of the destination.
2762 /// A 128-bit vector of [4 x float]. \n
2765 /// \returns A 128-bit vector of [4 x float] containing the interleaved values.
2772 /// Constructs a 128-bit floating-point vector of [4 x float]. The lower
2782 /// A 128-bit floating-point vector of [4 x float]. The upper 96 bits are
2785 /// A 128-bit floating-point vector of [4 x float]. The lower 32 bits are
2787 /// \returns A 128-bit floating-point vector of [4 x float].
2795 /// Constructs a 128-bit floating-point vector of [4 x float]. The lower
2796 /// 64 bits are set to the upper 64 bits of the second parameter. The upper
2797 /// 64 bits are set to the upper 64 bits of the first parameter.
2804 /// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are
2805 /// written to the upper 64 bits of the result.
2807 /// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are
2808 /// written to the lower 64 bits of the result.
2809 /// \returns A 128-bit floating-point vector of [4 x float].
2816 /// Constructs a 128-bit floating-point vector of [4 x float]. The lower
2817 /// 64 bits are set to the lower 64 bits of the first parameter. The upper
2818 /// 64 bits are set to the lower 64 bits of the second parameter.
2825 /// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are
2826 /// written to the lower 64 bits of the result.
2828 /// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are
2829 /// written to the upper 64 bits of the result.
2830 /// \returns A 128-bit floating-point vector of [4 x float].
2837 /// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x
2845 /// A 64-bit vector of [4 x i16]. The elements of the destination are copied
2847 /// \returns A 128-bit vector of [4 x float] containing the copied and converted
2867 /// Converts a 64-bit vector of 16-bit unsigned integer values into a
2868 /// 128-bit vector of [4 x float].
2875 /// A 64-bit vector of 16-bit unsigned integer values. The elements of the
2877 /// \returns A 128-bit vector of [4 x float] containing the copied and converted
2896 /// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]
2897 /// into a 128-bit vector of [4 x float].
2904 /// A 64-bit vector of [8 x i8]. The elements of the destination are copied
2906 /// \returns A 128-bit vector of [4 x float] containing the copied and converted
2920 /// Converts the lower four unsigned 8-bit integer values from a 64-bit
2921 /// vector of [8 x u8] into a 128-bit vector of [4 x float].
2928 /// A 64-bit vector of unsigned 8-bit integer values. The elements of the
2931 /// \returns A 128-bit vector of [4 x float] containing the copied and converted
2944 /// Converts the two 32-bit signed integer values from each 64-bit vector
2945 /// operand of [2 x i32] into a 128-bit vector of [4 x float].
2952 /// A 64-bit vector of [2 x i32]. The lower elements of the destination are
2955 /// A 64-bit vector of [2 x i32]. The upper elements of the destination are
2957 /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
2958 /// copied and converted values from the first operand. The upper 64 bits
2972 /// Converts each single-precision floating-point element of a 128-bit
2973 /// floating-point vector of [4 x float] into a 16-bit signed integer, and
2974 /// packs the results into a 64-bit integer vector of [4 x i16].
2976 /// If the floating-point element is NaN or infinity, or if the
2977 /// floating-point element is greater than 0x7FFFFFFF or less than -0x8000,
2978 /// it is converted to 0x8000. Otherwise if the floating-point element is
2986 /// A 128-bit floating-point vector of [4 x float].
2987 /// \returns A 64-bit integer vector of [4 x i16] containing the converted
3001 /// Converts each single-precision floating-point element of a 128-bit
3002 /// floating-point vector of [4 x float] into an 8-bit signed integer, and
3003 /// packs the results into the lower 32 bits of a 64-bit integer vector of
3006 /// If the floating-point element is NaN or infinity, or if the
3007 /// floating-point element is greater than 0x7FFFFFFF or less than -0x80, it
3008 /// is converted to 0x80. Otherwise if the floating-point element is greater
3016 /// 128-bit floating-point vector of [4 x float].
3017 /// \returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the
3030 /// Extracts the sign bits from each single-precision floating-point
3031 /// element of a 128-bit floating-point vector of [4 x float] and returns the
3040 /// A 128-bit floating-point vector of [4 x float].
3041 /// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each
3042 /// single-precision floating-point element of the parameter. Bits [31:4] are
3051 #define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
3052 #define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
3053 #define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
3054 #define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
3055 #define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
3056 #define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
3057 #define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
3058 #define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */
3060 /// Compares each of the corresponding values of two 128-bit vectors of
3077 /// A 128-bit vector of [4 x float].
3079 /// A 128-bit vector of [4 x float].
3083 /// 0x00: Equal (ordered, non-signaling) \n
3084 /// 0x01: Less-than (ordered, signaling) \n
3085 /// 0x02: Less-than-or-equal (ordered, signaling) \n
3086 /// 0x03: Unordered (non-signaling) \n
3087 /// 0x04: Not-equal (unordered, non-signaling) \n
3088 /// 0x05: Not-less-than (unordered, signaling) \n
3089 /// 0x06: Not-less-than-or-equal (unordered, signaling) \n
3090 /// 0x07: Ordered (non-signaling) \n
3091 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
3095 /// Compares each of the corresponding scalar values of two 128-bit
3112 /// A 128-bit vector of [4 x float].
3114 /// A 128-bit vector of [4 x float].
3118 /// 0x00: Equal (ordered, non-signaling) \n
3119 /// 0x01: Less-than (ordered, signaling) \n
3120 /// 0x02: Less-than-or-equal (ordered, signaling) \n
3121 /// 0x03: Unordered (non-signaling) \n
3122 /// 0x04: Not-equal (unordered, non-signaling) \n
3123 /// 0x05: Not-less-than (unordered, signaling) \n
3124 /// 0x06: Not-less-than-or-equal (unordered, signaling) \n
3125 /// 0x07: Ordered (non-signaling) \n
3126 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
3202 /* Ugly hack for backwards-compatibility (compatible with gcc) */