xmmintrin.h - OpenGrok cross reference for /freebsd/contrib/llvm-project/clang/lib/Headers/xmmintrin.h

Lines Matching +full:128 +full:a
28 /* This header should only be included in a hosted environment as it depends on
29  * a standard library to provide allocation routines. */
37                  __min_vector_width__(128)))
49 ///    A 128-bit vector of [4 x float] containing one of the source operands.
52 ///    A 128-bit vector of [4 x float] containing one of the source operands.
54 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum
64 /// Adds two 128-bit vectors of [4 x float], and returns the results of
72 ///    A 128-bit vector of [4 x float] containing one of the source operands.
74 ///    A 128-bit vector of [4 x float] containing one of the source operands.
75 /// \returns A 128-bit vector of [4 x float] containing the sums of both
91 ///    A 128-bit vector of [4 x float] containing the minuend. The lower 32 bits
94 ///    A 128-bit vector of [4 x float] containing the subtrahend. The lower 32
96 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
107 ///    operand, both of which are 128-bit vectors of [4 x float] and returns
115 ///    A 128-bit vector of [4 x float] containing the minuend.
117 ///    A 128-bit vector of [4 x float] containing the subtrahend.
118 /// \returns A 128-bit vector of [4 x float] containing the differences between
134 ///    A 128-bit vector of [4 x float] containing one of the source operands.
137 ///    A 128-bit vector of [4 x float] containing one of the source operands.
139 /// \returns A 128-bit vector of [4 x float] containing the product of the lower
149 /// Multiplies two 128-bit vectors of [4 x float] and returns the
157 ///    A 128-bit vector of [4 x float] containing one of the source operands.
159 ///    A 128-bit vector of [4 x float] containing one of the source operands.
160 /// \returns A 128-bit vector of [4 x float] containing the products of both
176 ///    A 128-bit vector of [4 x float] containing the dividend. The lower 32
179 ///    A 128-bit vector of [4 x float] containing the divisor. The lower 32 bits
181 /// \returns A 128-bit vector of [4 x float] containing the quotients of the
191 /// Divides two 128-bit vectors of [4 x float].
198 ///    A 128-bit vector of [4 x float] containing the dividend.
200 ///    A 128-bit vector of [4 x float] containing the divisor.
201 /// \returns A 128-bit vector of [4 x float] containing the quotients of both
210 ///    of a 128-bit vector of [4 x float].
217 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
219 /// \returns A 128-bit vector of [4 x float] containing the square root of the
227 /// Calculates the square roots of the values stored in a 128-bit vector
235 ///    A 128-bit vector of [4 x float].
236 /// \returns A 128-bit vector of [4 x float] containing the square roots of the
245 ///    low-order bits of a 128-bit vector of [4 x float].
252 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
254 /// \returns A 128-bit vector of [4 x float] containing the approximate
262 /// Calculates the approximate reciprocals of the values stored in a
263 ///    128-bit vector of [4 x float].
270 ///    A 128-bit vector of [4 x float].
271 /// \returns A 128-bit vector of [4 x float] containing the approximate
280 ///    stored in the low-order bits of a 128-bit vector of [4 x float].
287 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
289 /// \returns A 128-bit vector of [4 x float] containing the approximate
299 ///    values stored in a 128-bit vector of [4 x float].
306 ///    A 128-bit vector of [4 x float].
307 /// \returns A 128-bit vector of [4 x float] containing the approximate
319 ///    If either value in a comparison is NaN, returns the value from \a __b.
326 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
329 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
331 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
340 /// Compares two 128-bit vectors of [4 x float] and returns the lesser
343 ///    If either value in a comparison is NaN, returns the value from \a __b.
350 ///    A 128-bit vector of [4 x float] containing one of the operands.
352 ///    A 128-bit vector of [4 x float] containing one of the operands.
353 /// \returns A 128-bit vector of [4 x float] containing the minimum values
362 ///    operands and returns the greater value in the low-order bits of a 128-bit
365 ///    If either value in a comparison is NaN, returns the value from \a __b.
372 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
375 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
377 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
386 /// Compares two 128-bit vectors of [4 x float] and returns the greater
389 ///    If either value in a comparison is NaN, returns the value from \a __b.
396 ///    A 128-bit vector of [4 x float] containing one of the operands.
398 ///    A 128-bit vector of [4 x float] containing one of the operands.
399 /// \returns A 128-bit vector of [4 x float] containing the maximum values
407 /// Performs a bitwise AND of two 128-bit vectors of [4 x float].
414 ///    A 128-bit vector containing one of the source operands.
416 ///    A 128-bit vector containing one of the source operands.
417 /// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the
425 /// Performs a bitwise AND of two 128-bit vectors of [4 x float], using
434 ///    A 128-bit vector of [4 x float] containing the first source operand. The
437 ///    A 128-bit vector of [4 x float] containing the second source operand.
438 /// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the
447 /// Performs a bitwise OR of two 128-bit vectors of [4 x float].
454 ///    A 128-bit vector of [4 x float] containing one of the source operands.
456 ///    A 128-bit vector of [4 x float] containing one of the source operands.
457 /// \returns A 128-bit vector of [4 x float] containing the bitwise OR of the
465 /// Performs a bitwise exclusive OR of two 128-bit vectors of
473 ///    A 128-bit vector of [4 x float] containing one of the source operands.
475 ///    A 128-bit vector of [4 x float] containing one of the source operands.
476 /// \returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR
488 ///    low-order bits of a vector [4 x float].
489 ///    If either value in a comparison is NaN, returns false.
496 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
499 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
501 /// \returns A 128-bit vector of [4 x float] containing the comparison results
510 ///    128-bit vectors of [4 x float] for equality.
513 ///    If either value in a comparison is NaN, returns false.
520 ///    A 128-bit vector of [4 x float].
522 ///    A 128-bit vector of [4 x float].
523 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
535 ///    low-order bits of a vector of [4 x float].
536 ///    If either value in a comparison is NaN, returns false.
543 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
546 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
548 /// \returns A 128-bit vector of [4 x float] containing the comparison results
557 ///    128-bit vectors of [4 x float] to determine if the values in the first
561 ///    If either value in a comparison is NaN, returns false.
568 ///    A 128-bit vector of [4 x float].
570 ///    A 128-bit vector of [4 x float].
571 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
583 ///    the low-order bits of a vector of [4 x float].
584 ///    If either value in a comparison is NaN, returns false.
591 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
594 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
596 /// \returns A 128-bit vector of [4 x float] containing the comparison results
605 ///    128-bit vectors of [4 x float] to determine if the values in the first
609 ///    If either value in a comparison is NaN, returns false.
616 ///    A 128-bit vector of [4 x float].
618 ///    A 128-bit vector of [4 x float].
619 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
631 ///    low-order bits of a vector of [4 x float].
632 ///    If either value in a comparison is NaN, returns false.
639 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
642 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
644 /// \returns A 128-bit vector of [4 x float] containing the comparison results
655 ///    128-bit vectors of [4 x float] to determine if the values in the first
659 ///    If either value in a comparison is NaN, returns false.
666 ///    A 128-bit vector of [4 x float].
668 ///    A 128-bit vector of [4 x float].
669 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
681 ///    low-order bits of a vector of [4 x float].
682 ///    If either value in a comparison is NaN, returns false.
689 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
692 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
694 /// \returns A 128-bit vector of [4 x float] containing the comparison results
705 ///    128-bit vectors of [4 x float] to determine if the values in the first
709 ///    If either value in a comparison is NaN, returns false.
716 ///    A 128-bit vector of [4 x float].
718 ///    A 128-bit vector of [4 x float].
719 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
730 ///    low-order bits of a vector of [4 x float].
731 ///    If either value in a comparison is NaN, returns true.
739 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
742 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
744 /// \returns A 128-bit vector of [4 x float] containing the comparison results
753 ///    128-bit vectors of [4 x float] for inequality.
756 ///    If either value in a comparison is NaN, returns true.
764 ///    A 128-bit vector of [4 x float].
766 ///    A 128-bit vector of [4 x float].
767 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
779 ///    low-order bits of a vector of [4 x float].
780 ///    If either value in a comparison is NaN, returns true.
788 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
791 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
793 /// \returns A 128-bit vector of [4 x float] containing the comparison results
802 ///    128-bit vectors of [4 x float] to determine if the values in the first
806 ///    If either value in a comparison is NaN, returns true.
814 ///    A 128-bit vector of [4 x float].
816 ///    A 128-bit vector of [4 x float].
817 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
829 ///    low-order bits of a vector of [4 x float].
830 ///    If either value in a comparison is NaN, returns true.
838 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
841 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
843 /// \returns A 128-bit vector of [4 x float] containing the comparison results
852 ///    128-bit vectors of [4 x float] to determine if the values in the first
856 ///    If either value in a comparison is NaN, returns true.
864 ///    A 128-bit vector of [4 x float].
866 ///    A 128-bit vector of [4 x float].
867 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
879 ///    low-order bits of a vector of [4 x float].
880 ///    If either value in a comparison is NaN, returns true.
888 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
891 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
893 /// \returns A 128-bit vector of [4 x float] containing the comparison results
904 ///    128-bit vectors of [4 x float] to determine if the values in the first
908 ///    If either value in a comparison is NaN, returns true.
916 ///    A 128-bit vector of [4 x float].
918 ///    A 128-bit vector of [4 x float].
919 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
931 ///    low-order bits of a vector of [4 x float].
932 ///    If either value in a comparison is NaN, returns true.
940 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
943 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
945 /// \returns A 128-bit vector of [4 x float] containing the comparison results
956 ///    128-bit vectors of [4 x float] to determine if the values in the first
960 ///    If either value in a comparison is NaN, returns true.
968 ///    A 128-bit vector of [4 x float].
970 ///    A 128-bit vector of [4 x float].
971 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
982 ///    A pair of floating-point values are ordered with respect to each
983 ///    other if neither value is a NaN. Each comparison returns 0x0 for false,
992 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
995 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
997 /// \returns A 128-bit vector of [4 x float] containing the comparison results
1006 ///    128-bit vectors of [4 x float] to determine if the values in the first
1009 ///    A pair of floating-point values are ordered with respect to each
1010 ///    other if neither value is a NaN. Each comparison returns 0x0 for false,
1019 ///    A 128-bit vector of [4 x float].
1021 ///    A 128-bit vector of [4 x float].
1022 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
1033 ///    A pair of double-precision values are unordered with respect to each
1043 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
1046 ///    A 128-bit vector of [4 x float] containing one of the operands. The lower
1048 /// \returns A 128-bit vector of [4 x float] containing the comparison results
1057 ///    128-bit vectors of [4 x float] to determine if the values in the first
1060 ///    A pair of double-precision values are unordered with respect to each
1070 ///    A 128-bit vector of [4 x float].
1072 ///    A 128-bit vector of [4 x float].
1073 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
1083 ///    The comparison returns 0 for false, 1 for true. If either value in a
1092 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1095 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1108 ///    The comparison returns 0 for false, 1 for true. If either value in a
1117 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1120 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1133 ///    The comparison returns 0 for false, 1 for true. If either value in a
1141 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1144 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1157 ///    The comparison returns 0 for false, 1 for true. If either value in a
1165 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1168 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1181 ///    The comparison returns 0 for false, 1 for true. If either value in a
1189 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1192 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1205 ///    The comparison returns 0 for false, 1 for true. If either value in a
1213 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1216 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1228 ///    The comparison returns 0 for false, 1 for true. If either value in a
1236 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1239 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1252 ///    The comparison returns 0 for false, 1 for true. If either value in a
1260 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1263 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1276 ///    The comparison returns 0 for false, 1 for true. If either value in a
1284 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1287 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1300 ///    The comparison returns 0 for false, 1 for true. If either value in a
1308 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1311 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1324 ///    The comparison returns 0 for false, 1 for true. If either value in a
1332 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1335 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1347 ///    The comparison returns 0 for false, 1 for true. If either value in a
1355 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1358 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1367 /// Converts a float value contained in the lower 32 bits of a vector of
1368 ///    [4 x float] into a 32-bit integer.
1370 ///    If the converted value does not fit in a 32-bit integer, raises a
1380 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1382 /// \returns A 32-bit integer containing the converted value.
1389 /// Converts a float value contained in the lower 32 bits of a vector of
1390 ///    [4 x float] into a 32-bit integer.
1392 ///    If the converted value does not fit in a 32-bit integer, raises a
1402 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1404 /// \returns A 32-bit integer containing the converted value.
1413 /// Converts a float value contained in the lower 32 bits of a vector of
1414 ///    [4 x float] into a 64-bit integer.
1416 ///    If the converted value does not fit in a 32-bit integer, raises a
1426 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1428 /// \returns A 64-bit integer containing the converted value.
1437 /// Converts two low-order float values in a 128-bit vector of
1438 ///    [4 x float] into a 64-bit vector of [2 x i32].
1440 ///    If a converted value does not fit in a 32-bit integer, raises a
1449 ///    A 128-bit vector of [4 x float].
1450 /// \returns A 64-bit integer vector containing the converted values.
1457 /// Converts two low-order float values in a 128-bit vector of
1458 ///    [4 x float] into a 64-bit vector of [2 x i32].
1460 ///    If a converted value does not fit in a 32-bit integer, raises a
1469 ///    A 128-bit vector of [4 x float].
1470 /// \returns A 64-bit integer vector containing the converted values.
1477 /// Converts the lower (first) element of a vector of [4 x float] into a signed
1480 ///    If the converted value does not fit in a 32-bit integer, raises a
1490 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1492 /// \returns A 32-bit integer containing the converted value.
1499 /// Converts the lower (first) element of a vector of [4 x float] into a signed
1502 ///    If the converted value does not fit in a 32-bit integer, raises a
1512 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1514 /// \returns A 32-bit integer containing the converted value.
1522 /// Converts the lower (first) element of a vector of [4 x float] into a signed
1525 ///    If the converted value does not fit in a 64-bit integer, raises a
1535 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1537 /// \returns A 64-bit integer containing the converted value.
1545 /// Converts the lower (first) two elements of a 128-bit vector of [4 x float]
1547 ///    returned in a 64-bit vector of [2 x i32].
1549 ///    If a converted value does not fit in a 32-bit integer, raises a
1559 ///    A 128-bit vector of [4 x float].
1560 /// \returns A 64-bit integer vector containing the converted values.
1567 /// Converts the lower (first) two elements of a 128-bit vector of [4 x float]
1569 ///    returned in a 64-bit vector of [2 x i32].
1571 ///    If a converted value does not fit in a 32-bit integer, raises a
1580 ///    A 128-bit vector of [4 x float].
1581 /// \returns A 64-bit integer vector containing the converted values.
1588 /// Converts a 32-bit signed integer value into a floating point value
1598 ///    A 128-bit vector of [4 x float].
1600 ///    A 32-bit signed integer operand containing the value to be converted.
1601 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
1611 /// Converts a 32-bit signed integer value into a floating point value
1621 ///    A 128-bit vector of [4 x float].
1623 ///    A 32-bit signed integer operand containing the value to be converted.
1624 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
1635 /// Converts a 64-bit signed integer value into a floating point value
1645 ///    A 128-bit vector of [4 x float].
1647 ///    A 64-bit signed integer operand containing the value to be converted.
1648 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
1660 /// Converts two elements of a 64-bit vector of [2 x i32] into two
1670 ///    A 128-bit vector of [4 x float].
1672 ///    A 64-bit vector of [2 x i32]. The elements in this vector are converted
1674 /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
1683 /// Converts two elements of a 64-bit vector of [2 x i32] into two
1693 ///    A 128-bit vector of [4 x float].
1695 ///    A 64-bit vector of [2 x i32]. The elements in this vector are converted
1697 /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
1706 /// Extracts a float value contained in the lower 32 bits of a vector of
1714 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
1716 /// \returns A 32-bit float containing the extracted value.
1723 /// Loads two packed float values from the address \a __p into the
1724 ///     high-order bits of a 128-bit vector of [4 x float]. The low-order bits
1732 ///    A 128-bit vector of [4 x float]. Bits [63:0] are written to bits [63:0]
1735 ///    A pointer to two packed float values. Bits [63:0] are written to bits
1737 /// \returns A 128-bit vector of [4 x float] containing the moved values.
1750 /// Loads two packed float values from the address \a __p into the
1751 ///    low-order bits of a 128-bit vector of [4 x float]. The high-order bits
1759 ///    A 128-bit vector of [4 x float]. Bits [127:64] are written to bits
1762 ///    A pointer to two packed float values. Bits [63:0] are written to bits
1764 /// \returns A 128-bit vector of [4 x float] containing the moved values.
1777 /// Constructs a 128-bit floating-point vector of [4 x float]. The lower
1779 ///    floating-point value loaded from a specified memory location. The upper
1787 ///    A pointer to a 32-bit memory location containing a single-precision
1789 /// \returns An initialized 128-bit floating-point vector of [4 x float]. The
1802 /// Loads a 32-bit float value and duplicates it to all four vector
1803 ///    elements of a 128-bit vector of [4 x float].
1811 ///    A pointer to a float value to be loaded and duplicated.
1812 /// \returns A 128-bit vector of [4 x float] containing the loaded and
1826 /// Loads a 128-bit floating-point vector of [4 x float] from an aligned
1834 ///    A pointer to a 128-bit memory location. The address of the memory
1835 ///    location has to be 128-bit aligned.
1836 /// \returns A 128-bit vector of [4 x float] containing the loaded values.
1843 /// Loads a 128-bit floating-point vector of [4 x float] from an
1851 ///    A pointer to a 128-bit memory location. The address of the memory
1853 /// \returns A 128-bit vector of [4 x float] containing the loaded values.
1864 ///    memory location to 32-bit elements in a 128-bit vector of [4 x float].
1872 ///    A pointer to a 128-bit memory location. The address of the memory
1873 ///    location has to be 128-bit aligned.
1874 /// \returns A 128-bit vector of [4 x float] containing the moved values, loaded
1883 /// Create a 128-bit vector of [4 x float] with undefined values.
1889 /// \returns A 128-bit vector of [4 x float] containing undefined values.
1896 /// Constructs a 128-bit floating-point vector of [4 x float]. The lower
1905 ///    A single-precision floating-point value used to initialize the lower 32
1907 /// \returns An initialized 128-bit floating-point vector of [4 x float]. The
1916 /// Constructs a 128-bit floating-point vector of [4 x float], with each
1925 ///    A single-precision floating-point value used to initialize each vector
1927 /// \returns An initialized 128-bit floating-point vector of [4 x float].
1935 /// Constructs a 128-bit floating-point vector of [4 x float], with each
1944 ///    A single-precision floating-point value used to initialize each vector
1946 /// \returns An initialized 128-bit floating-point vector of [4 x float].
1953 /// Constructs a 128-bit floating-point vector of [4 x float]
1958 /// This intrinsic is a utility function and does not correspond to a specific
1962 ///    A single-precision floating-point value used to initialize bits [127:96]
1965 ///    A single-precision floating-point value used to initialize bits [95:64]
1968 ///    A single-precision floating-point value used to initialize bits [63:32]
1971 ///    A single-precision floating-point value used to initialize bits [31:0]
1973 /// \returns An initialized 128-bit floating-point vector of [4 x float].
1980 /// Constructs a 128-bit floating-point vector of [4 x float],
1986 /// This intrinsic is a utility function and does not correspond to a specific
1990 ///    A single-precision floating-point value used to initialize bits [31:0]
1993 ///    A single-precision floating-point value used to initialize bits [63:32]
1996 ///    A single-precision floating-point value used to initialize bits [95:64]
1999 ///    A single-precision floating-point value used to initialize bits [127:96]
2001 /// \returns An initialized 128-bit floating-point vector of [4 x float].
2008 /// Constructs a 128-bit floating-point vector of [4 x float] initialized
2015 /// \returns An initialized 128-bit floating-point vector of [4 x float] with
2023 /// Stores the upper 64 bits of a 128-bit vector of [4 x float] to a
2031 ///    A pointer to a 64-bit memory location.
2033 ///    A 128-bit vector of [4 x float] containing the values to be stored.
2044 /// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a
2052 ///    A pointer to a memory location that will receive the float values.
2054 ///    A 128-bit vector of [4 x float] containing the values to be stored.
2065 /// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a
2073 ///    A pointer to a 32-bit memory location.
2075 ///    A 128-bit vector of [4 x float] containing the value to be stored.
2085 /// Stores a 128-bit vector of [4 x float] to an unaligned memory
2093 ///    A pointer to a 128-bit memory location. The address of the memory
2096 ///    A 128-bit vector of [4 x float] containing the values to be stored.
2106 /// Stores a 128-bit vector of [4 x float] into an aligned memory
2114 ///    A pointer to a 128-bit memory location. The address of the memory
2117 ///    A 128-bit vector of [4 x float] containing the values to be stored.
2124 /// Stores the lower 32 bits of a 128-bit vector of [4 x float] into
2133 ///    A pointer to a 128-bit memory location.
2135 ///    A 128-bit vector of [4 x float] whose lower 32 bits are stored to each
2136 ///    of the four contiguous elements pointed by \a __p.
2144 /// Stores the lower 32 bits of a 128-bit vector of [4 x float] into
2153 ///    A pointer to a 128-bit memory location.
2155 ///    A 128-bit vector of [4 x float] whose lower 32 bits are stored to each
2156 ///    of the four contiguous elements pointed by \a __p.
2163 /// Stores float values from a 128-bit vector of [4 x float] to an
2172 ///    A pointer to a 128-bit memory location. The address of the memory
2173 ///    location has to be 128-bit aligned.
2175 ///    A 128-bit vector of [4 x float] containing the values to be stored.
2191 /* FIXME: We have to #define this because "sel" must be a constant integer, and
2194 /// Loads one cache line of data from the specified address to a location
2200 /// void _mm_prefetch(const void *a, const int sel);
2205 /// \param a
2206 ///    A pointer to a memory location containing a cache line of data.
2208 ///    A predefined integer constant specifying the type of prefetch
2218 #define _mm_prefetch(a, sel) (__builtin_prefetch((const void *)(a), \  argument
2222 /// Stores a 64-bit integer in the specified aligned memory location. To
2231 ///    A pointer to an aligned memory location used to store the register value.
2233 ///    A 64-bit integer containing the value to be stored.
2240 /// Moves packed float values from a 128-bit vector of [4 x float] to a
2241 ///    128-bit aligned memory location. To minimize caching, the data is flagged
2249 ///    A pointer to a 128-bit aligned memory location that will receive the
2252 ///    A 128-bit vector of [4 x float] containing the values to be moved.
2278 /// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
2284 /// int _mm_extract_pi16(__m64 a, int n);
2289 /// \param a
2290 ///    A 64-bit vector of [4 x i16].
2297 /// \returns A 16-bit integer containing the extracted 16 bits of packed data.
2298 #define _mm_extract_pi16(a, n) \  argument
2299   ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n))
2303 ///    specified by the immediate operand \a n.
2308 /// __m64 _mm_insert_pi16(__m64 a, int d, int n);
2313 /// \param a
2314 ///    A 64-bit vector of [4 x i16].
2317 ///    destination at the offset specified by operand \a n.
2326 ///    bits in operand \a a.
2327 /// \returns A 64-bit integer vector containing the copied packed data from the
2329 #define _mm_insert_pi16(a, d, n) \  argument
2330   ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n))
2341 ///    A 64-bit integer vector containing one of the source operands.
2343 ///    A 64-bit integer vector containing one of the source operands.
2344 /// \returns A 64-bit integer vector containing the comparison results.
2360 ///    A 64-bit integer vector containing one of the source operands.
2362 ///    A 64-bit integer vector containing one of the source operands.
2363 /// \returns A 64-bit integer vector containing the comparison results.
2379 ///    A 64-bit integer vector containing one of the source operands.
2381 ///    A 64-bit integer vector containing one of the source operands.
2382 /// \returns A 64-bit integer vector containing the comparison results.
2398 ///    A 64-bit integer vector containing one of the source operands.
2400 ///    A 64-bit integer vector containing one of the source operands.
2401 /// \returns A 64-bit integer vector containing the comparison results.
2408 /// Takes the most significant bit from each 8-bit element in a 64-bit
2417 ///    A 64-bit integer vector containing the values with bits to be extracted.
2418 /// \returns The most significant bit from each 8-bit element in \a __a,
2435 ///    A 64-bit integer vector containing one of the source operands.
2437 ///    A 64-bit integer vector containing one of the source operands.
2438 /// \returns A 64-bit integer vector containing the products of both operands.
2445 /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
2451 /// __m64 _mm_shuffle_pi16(__m64 a, const int n);
2456 /// \param a
2457 ///    A 64-bit integer vector containing the values to be shuffled.
2460 ///    copy from \a a. The destinations within the 64-bit destination are
2471 ///    00: assigned from bits [15:0] of \a a. \n
2472 ///    01: assigned from bits [31:16] of \a a. \n
2473 ///    10: assigned from bits [47:32] of \a a. \n
2474 ///    11: assigned from bits [63:48] of \a a. \n
2475 ///    Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
2478 /// \returns A 64-bit integer vector containing the shuffled values.
2479 #define _mm_shuffle_pi16(a, n) \  argument
2480   ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)))
2495 ///    A 64-bit integer vector containing the values with elements to be copied.
2497 ///    A 64-bit integer vector operand. The most significant bit from each 8-bit
2498 ///    element determines whether the corresponding element in operand \a __d
2499 ///    is copied. If the most significant bit of a given element is 1, the
2500 ///    corresponding element in operand \a __d is copied.
2502 ///    A pointer to a 64-bit memory location that will receive the conditionally
2520 ///    A 64-bit integer vector containing one of the source operands.
2522 ///    A 64-bit integer vector containing one of the source operands.
2523 /// \returns A 64-bit integer vector containing the averages of both operands.
2539 ///    A 64-bit integer vector containing one of the source operands.
2541 ///    A 64-bit integer vector containing one of the source operands.
2542 /// \returns A 64-bit integer vector containing the averages of both operands.
2559 ///    A 64-bit integer vector containing one of the source operands.
2561 ///    A 64-bit integer vector containing one of the source operands.
2562 /// \returns A 64-bit integer vector whose lower 16 bits contain the sums of the
2575 /// Returns the contents of the MXCSR register as a 32-bit unsigned
2584 ///      _MM_EXCEPT_INEXACT. There is a convenience wrapper
2590 ///      There is a convenience wrapper _MM_GET_EXCEPTION_MASK().
2594 ///      _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper
2599 ///      There is a convenience wrapper _MM_GET_FLUSH_ZERO_MODE().
2603 ///      _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper
2623 /// \returns A 32-bit unsigned integer containing the contents of the MXCSR
2635 ///      _MM_EXCEPT_INEXACT. There is a convenience wrapper
2641 ///      There is a convenience wrapper _MM_SET_EXCEPTION_MASK(x) where x is one
2646 ///      _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper
2651 ///      There is a convenience wrapper _MM_SET_FLUSH_ZERO_MODE(x) where x is
2656 ///      _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper
2678 ///    A 32-bit unsigned integer value to be written to the MXCSR register.
2685 /// Selects 4 float values from the 128-bit operands of [4 x float], as
2691 /// __m128 _mm_shuffle_ps(__m128 a, __m128 b, const int mask);
2696 /// \param a
2697 ///    A 128-bit vector of [4 x float].
2699 ///    A 128-bit vector of [4 x float].
2702 ///    copy from \a a and \a b. \n
2703 ///    Bits [3:0] specify the values copied from operand \a a. \n
2704 ///    Bits [7:4] specify the values copied from operand \a b. \n
2705 ///    The destinations within the 128-bit destination are assigned values as
2720 ///    Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
2723 /// \returns A 128-bit vector of [4 x float] containing the shuffled values.
2724 #define _mm_shuffle_ps(a, b, mask) \  argument
2725   ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
2728 /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
2729 ///    [4 x float] and interleaves them into a 128-bit vector of [4 x float].
2736 ///    A 128-bit vector of [4 x float]. \n
2740 ///    A 128-bit vector of [4 x float].
2743 /// \returns A 128-bit vector of [4 x float] containing the interleaved values.
2750 /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of
2751 ///    [4 x float] and interleaves them into a 128-bit vector of [4 x float].
2758 ///    A 128-bit vector of [4 x float]. \n
2762 ///    A 128-bit vector of [4 x float]. \n
2765 /// \returns A 128-bit vector of [4 x float] containing the interleaved values.
2772 /// Constructs a 128-bit floating-point vector of [4 x float]. The lower
2782 ///    A 128-bit floating-point vector of [4 x float]. The upper 96 bits are
2785 ///    A 128-bit floating-point vector of [4 x float]. The lower 32 bits are
2787 /// \returns A 128-bit floating-point vector of [4 x float].
2795 /// Constructs a 128-bit floating-point vector of [4 x float]. The lower
2804 ///    A 128-bit floating-point vector of [4 x float]. The upper 64 bits are
2807 ///    A 128-bit floating-point vector of [4 x float]. The upper 64 bits are
2809 /// \returns A 128-bit floating-point vector of [4 x float].
2816 /// Constructs a 128-bit floating-point vector of [4 x float]. The lower
2825 ///    A 128-bit floating-point vector of [4 x float]. The lower 64 bits are
2828 ///    A 128-bit floating-point vector of [4 x float]. The lower 64 bits are
2830 /// \returns A 128-bit floating-point vector of [4 x float].
2837 /// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x
2845 ///    A 64-bit vector of [4 x i16]. The elements of the destination are copied
2847 /// \returns A 128-bit vector of [4 x float] containing the copied and converted
2867 /// Converts a 64-bit vector of 16-bit unsigned integer values into a
2868 ///    128-bit vector of [4 x float].
2875 ///    A 64-bit vector of 16-bit unsigned integer values. The elements of the
2877 /// \returns A 128-bit vector of [4 x float] containing the copied and converted
2896 /// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]
2897 ///    into a 128-bit vector of [4 x float].
2904 ///    A 64-bit vector of [8 x i8]. The elements of the destination are copied
2906 /// \returns A 128-bit vector of [4 x float] containing the copied and converted
2920 /// Converts the lower four unsigned 8-bit integer values from a 64-bit
2921 ///    vector of [8 x u8] into a 128-bit vector of [4 x float].
2928 ///    A 64-bit vector of unsigned 8-bit integer values. The elements of the
2931 /// \returns A 128-bit vector of [4 x float] containing the copied and converted
2945 ///    operand of [2 x i32] into a 128-bit vector of [4 x float].
2952 ///    A 64-bit vector of [2 x i32]. The lower elements of the destination are
2955 ///    A 64-bit vector of [2 x i32]. The upper elements of the destination are
2957 /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
2972 /// Converts each single-precision floating-point element of a 128-bit
2973 ///    floating-point vector of [4 x float] into a 16-bit signed integer, and
2974 ///    packs the results into a 64-bit integer vector of [4 x i16].
2986 ///    A 128-bit floating-point vector of [4 x float].
2987 /// \returns A 64-bit integer vector of [4 x i16] containing the converted
3001 /// Converts each single-precision floating-point element of a 128-bit
3003 ///    packs the results into the lower 32 bits of a 64-bit integer vector of
3016 ///    128-bit floating-point vector of [4 x float].
3017 /// \returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the
3031 ///    element of a 128-bit floating-point vector of [4 x float] and returns the
3040 ///    A 128-bit floating-point vector of [4 x float].
3041 /// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each
3060 /// Compares each of the corresponding values of two 128-bit vectors of
3065 ///    If either value in a comparison is NaN, comparisons that are ordered
3071 /// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);
3076 /// \param a
3077 ///    A 128-bit vector of [4 x float].
3079 ///    A 128-bit vector of [4 x float].
3091 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
3092 #define _mm_cmp_ps(a, b, c)                                                    \  argument
3093   ((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c)))
3095 /// Compares each of the corresponding scalar values of two 128-bit
3100 ///    If either value in a comparison is NaN, comparisons that are ordered
3106 /// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);
3111 /// \param a
3112 ///    A 128-bit vector of [4 x float].
3114 ///    A 128-bit vector of [4 x float].
3126 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
3127 #define _mm_cmp_ss(a, b, c)                                                    \  argument
3128   ((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c)))