Lines Matching +full:64 +full:- +full:byte
41 * Pseudo-code to aid in understanding the control flow of the
46 * %l6 = curthread->t_lofault;
49 * curthread->t_lofault = .copyerr;
59 * if (curthread->t_lwp == NULL) {
71 * if (length < (large_length + (64 * 4))) {
72 * if (curthread->t_lwp == NULL)
86 * curthread->t_lofault = .copyerr2;
98 * if (curthread->t_lwp == NULL) {
103 * curthread->t_lofault = (%l6 & ~3);
118 * preserve - the rest of the kernel does not use fp and, anyway, fp
121 * - userland has fp state and is interrupted (device interrupt
124 * - another (higher level) interrupt or trap handler uses bcopy
126 * - an asynchronous error trap occurs while fp state exists (in
129 * - a user process with fp state incurs a copy-on-write fault and
132 * We therefore need a per-call place in which to preserve fp state -
164 * Less then or equal this number of bytes we will always copy byte-for-byte
183 * Size of stack frame in order to accomodate a 64-byte aligned
184 * floating-point register save area and 2 32-bit temp locations.
186 #define HWCOPYFRAMESIZE ((64 * 5) + (2 * 4))
188 #define SAVED_FPREGS_OFFSET (64 * 5)
486 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
513 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
516 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
523 add %fp, STACK_BIAS - 257, %o2
524 and %o2, -64, %o2
526 add %o2, 64, %o2
528 add %o2, 64, %o2
530 add %o2, 64, %o2
610 * Copy a block of storage - must not overlap (from + len <= to).
611 * Registers: l6 - saved t_lofault
619 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
660 * and (dest & ~63) - (src & ~63) can be 3 blocks even if
661 * src = dest + (64 * 3) + 63.
679 st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
685 cmp %i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger
715 ! save in-use fpregs on stack
717 add %fp, STACK_BIAS - 257, %o2
718 and %o2, -64, %o2
720 add %o2, 64, %o2
722 add %o2, 64, %o2
724 add %o2, 64, %o2
732 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
755 ! Align Destination on double-word boundary
762 stb %o4, [%i0 - 1]
766 sub %i3, 64, %i3
770 ! Copy %i3 bytes till dst is block (64 byte) aligned. use
797 ! %i3 = blocks (length - 64) / 64
798 ! %i4 = doubles remaining (length - blocks)
799 sub %i2, 64, %i3
816 add %l7, 64, %l7
818 add %l7, 64, %l7
820 add %l7, 64, %l7
823 ! switch statement to get us to the right 8 byte blk within a
824 ! 64 byte block
858 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
862 add %l7, 64, %l7
863 subcc %i3, 64, %i3
865 add %i0, 64, %i0
866 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
870 add %l7, 64, %l7
871 subcc %i3, 64, %i3
873 add %i0, 64, %i0
874 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
878 add %l7, 64, %l7
879 subcc %i3, 64, %i3
881 add %i0, 64, %i0
887 add %i0, 64, %i0
892 add %i0, 64, %i0
897 add %i0, 64, %i0
902 add %i0, 64, %i0
907 add %i0, 64, %i0
912 add %i0, 64, %i0
915 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
919 add %l7, 64, %l7
920 subcc %i3, 64, %i3
922 add %i0, 64, %i0
923 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
927 add %l7, 64, %l7
928 subcc %i3, 64, %i3
930 add %i0, 64, %i0
931 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
935 add %l7, 64, %l7
936 subcc %i3, 64, %i3
938 add %i0, 64, %i0
943 add %i0, 64, %i0
948 add %i0, 64, %i0
953 add %i0, 64, %i0
958 add %i0, 64, %i0
963 add %i0, 64, %i0
968 add %i0, 64, %i0
971 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
975 add %l7, 64, %l7
976 subcc %i3, 64, %i3
978 add %i0, 64, %i0
979 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
983 add %l7, 64, %l7
984 subcc %i3, 64, %i3
986 add %i0, 64, %i0
987 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
991 add %l7, 64, %l7
992 subcc %i3, 64, %i3
994 add %i0, 64, %i0
1000 add %i0, 64, %i0
1005 add %i0, 64, %i0
1010 add %i0, 64, %i0
1015 add %i0, 64, %i0
1020 add %i0, 64, %i0
1025 add %i0, 64, %i0
1028 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1032 add %l7, 64, %l7
1033 subcc %i3, 64, %i3
1035 add %i0, 64, %i0
1036 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1040 add %l7, 64, %l7
1041 subcc %i3, 64, %i3
1043 add %i0, 64, %i0
1044 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1048 add %l7, 64, %l7
1049 subcc %i3, 64, %i3
1051 add %i0, 64, %i0
1057 add %i0, 64, %i0
1062 add %i0, 64, %i0
1067 add %i0, 64, %i0
1072 add %i0, 64, %i0
1077 add %i0, 64, %i0
1082 add %i0, 64, %i0
1085 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1089 add %l7, 64, %l7
1090 subcc %i3, 64, %i3
1092 add %i0, 64, %i0
1093 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1097 add %l7, 64, %l7
1098 subcc %i3, 64, %i3
1100 add %i0, 64, %i0
1101 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1105 add %l7, 64, %l7
1106 subcc %i3, 64, %i3
1108 add %i0, 64, %i0
1114 add %i0, 64, %i0
1119 add %i0, 64, %i0
1124 add %i0, 64, %i0
1129 add %i0, 64, %i0
1134 add %i0, 64, %i0
1139 add %i0, 64, %i0
1142 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1146 add %l7, 64, %l7
1147 subcc %i3, 64, %i3
1149 add %i0, 64, %i0
1150 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1154 add %l7, 64, %l7
1155 subcc %i3, 64, %i3
1157 add %i0, 64, %i0
1158 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1162 add %l7, 64, %l7
1163 subcc %i3, 64, %i3
1165 add %i0, 64, %i0
1171 add %i0, 64, %i0
1176 add %i0, 64, %i0
1181 add %i0, 64, %i0
1186 add %i0, 64, %i0
1191 add %i0, 64, %i0
1196 add %i0, 64, %i0
1199 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1203 add %l7, 64, %l7
1204 subcc %i3, 64, %i3
1206 add %i0, 64, %i0
1207 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1211 add %l7, 64, %l7
1212 subcc %i3, 64, %i3
1214 add %i0, 64, %i0
1215 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1219 add %l7, 64, %l7
1220 subcc %i3, 64, %i3
1222 add %i0, 64, %i0
1228 add %i0, 64, %i0
1233 add %i0, 64, %i0
1238 add %i0, 64, %i0
1243 add %i0, 64, %i0
1248 add %i0, 64, %i0
1253 add %i0, 64, %i0
1256 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1260 add %l7, 64, %l7
1261 subcc %i3, 64, %i3
1263 add %i0, 64, %i0
1264 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1268 add %l7, 64, %l7
1269 subcc %i3, 64, %i3
1271 add %i0, 64, %i0
1272 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1276 add %l7, 64, %l7
1277 subcc %i3, 64, %i3
1279 add %i0, 64, %i0
1285 add %i0, 64, %i0
1290 add %i0, 64, %i0
1295 add %i0, 64, %i0
1300 add %i0, 64, %i0
1305 add %i0, 64, %i0
1310 add %i0, 64, %i0
1485 stb %i4, [%i0 - 1]
1496 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
1499 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
1506 add %fp, STACK_BIAS - 257, %o2
1507 and %o2, -64, %o2
1509 add %o2, 64, %o2
1511 add %o2, 64, %o2
1513 add %o2, 64, %o2
1604 ! i0 - src address, i1 - dest address, i2 - count
1605 ! i3, i4 - tmps for used generating complete word
1608 ! l1 size in bits of lower part of source word (LS = 32 - US)
1610 ! l3 size in bits of lower part of destination word (LD = 32 - UD)
1614 mov 32, %l5 ! load an oft-needed constant
1624 ldub [%i0], %i3 ! read a byte from source address
1630 sll %i4, 8, %i4 ! make room for next byte
1640 srl %i5, %l3, %i4 ! prepare to write a single byte
1641 stb %i4, [%i1] ! write a byte
1653 sll %i3, %l1, %i3 ! clear upper used byte(s)
1670 sll %i3, %l0, %i3 ! save remaining byte(s)
1685 srl %i4, %l0, %i3 ! prepare to write a single byte
1686 stb %i3, [%i1] ! write a byte
1715 srl %i3, %l0, %i4 ! upper src byte into lower dst byte
1716 stb %i4, [%i1] ! write a byte
1729 ldub [%i0], %i3 ! read a byte from source address
1735 sll %i4, 8, %i4 ! make room for next byte
1738 ! if from address unaligned for double-word moves,
1750 ! source and destination are now double-word aligned
1779 call .alignit ! go word-align it
1785 ! byte copy, works with any alignment
1792 ! differenced byte copy, works with any alignment
1793 ! assumes dest in %i1 and (source - dest) in %i0
1839 stb %o4, [%i1] ! write a byte
1845 ldub [%i0], %o4 ! read next byte
1865 2: cmp %o2, %o3 ! cmp size and abs(from - to)
1889 ldub [%o0 + %o2], %o3 ! get byte at end of src
1906 save %sp, -SA(MINFRAME + 4*64), %sp
1908 ! %i0 - source address (arg)
1909 ! %i1 - destination address (arg)
1910 ! %i2 - length of region (not arg)
1911 ! %l0 - saved fprs
1912 ! %l1 - pointer to saved fpregs
1919 ! save in-use fpregs on stack
1920 add %fp, STACK_BIAS - 193, %l1
1921 and %l1, -64, %l1
1923 add %l1, 64, %l3
1925 add %l3, 64, %l3
1931 add %i0, 64, %i0
1932 set PAGESIZE - 64, %i2
1944 add %i0, 64, %i0
1945 subcc %i2, 64, %i2
1947 add %i1, 64, %i1
1958 add %i0, 64, %i0
1959 sub %i2, 64, %i2
1961 add %i1, 64, %i1
1971 add %l1, 64, %l3
1973 add %l3, 64, %l3
1984 * Transfer data to and from user space -
1990 * DDI/DKI which specifies that they return '-1' on "errors."
1994 * So there's two extremely similar routines - xcopyin() and xcopyout()
2014 * while default_copyOP returns -1 (see above). copy{in,out}_noerr set
2029 * single byte aligned - 900 (hw_copy_limit_1)
2030 * two byte aligned - 1800 (hw_copy_limit_2)
2031 * four byte aligned - 3600 (hw_copy_limit_4)
2032 * eight byte aligned - 7200 (hw_copy_limit_8)
2045 * --> We'll use this handler if we end up grabbing a window
2046 * --> before we use VIS instructions.
2049 * always do a byte for byte copy.
2058 * If we don't exceed one of the limits, we store -count in %o3,
2059 * we store the number of chunks (8, 4, 2 or 1 byte) operated
2067 * unless there's only one byte left. We load that as we're
2073 * the single byte aligned copy limit and what VIS treats as its
2094 * kaddr - %g2
2095 * uaddr - %g3
2096 * count - %g4
2123 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
2126 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
2133 add %fp, STACK_BIAS - 257, %o2
2134 and %o2, -64, %o2
2136 add %o2, 64, %o2
2138 add %o2, 64, %o2
2140 add %o2, 64, %o2
2213 ldub [%o0 + %o3], %o4 ! load first byte
2246 ! See if we're single byte aligned. If we are, check the
2247 ! limit for single byte copies. If we're smaller or equal,
2248 ! bounce to the byte for byte copy loop. Otherwise do it in
2255 ! Single byte aligned. Do we do it via HW or via
2256 ! byte for byte? Do a quick no memory reference
2268 ! Is HW copy on? If not, do everything byte for byte.
2274 ! If we're less than or equal to the single byte copy limit,
2286 ! 8 byte aligned?
2319 ! Housekeeping for copy loops. Uses same idea as in the byte for
2320 ! byte copy loop above.
2326 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy
2328 ! 4 byte aligned?
2364 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy
2366 ! We must be 2 byte aligned. Off we go.
2389 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy
2394 ! and bounce back to a non-HW assisted copy. This dispatches those
2420 ! eight byte aligned copies end here.
2425 ! Something is left - do it byte for byte.
2428 ldub [%o0 + %o3], %o4 ! load next byte
2430 ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy.
2441 ! four byte aligned copies end here.
2446 ! Something is left. Do it byte for byte.
2449 ldub [%o0 + %o3], %o4 ! load next byte
2451 ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to
2467 ! Deal with the last byte
2492 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
2496 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
2505 cmp %i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger
2513 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
2514 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
2516 ! save in-use fpregs on stack
2520 add %fp, STACK_BIAS - 257, %o2
2521 and %o2, -64, %o2
2523 add %o2, 64, %o2
2525 add %o2, 64, %o2
2527 add %o2, 64, %o2
2535 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
2558 ! Align Destination on double-word boundary
2569 sub %i3, 64, %i3
2573 ! Copy %i3 bytes till dst is block (64 byte) aligned. use
2600 ! %i3 = blocks (length - 64) / 64
2601 ! %i4 = doubles remaining (length - blocks)
2602 sub %i2, 64, %i3
2619 add %l7, 64, %l7
2621 add %l7, 64, %l7
2623 add %l7, 64, %l7
2626 ! switch statement to get us to the right 8 byte blk within a
2627 ! 64 byte block
2661 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2665 add %l7, 64, %l7
2666 subcc %i3, 64, %i3
2668 add %i0, 64, %i0
2669 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2673 add %l7, 64, %l7
2674 subcc %i3, 64, %i3
2676 add %i0, 64, %i0
2677 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2681 add %l7, 64, %l7
2682 subcc %i3, 64, %i3
2684 add %i0, 64, %i0
2690 add %i0, 64, %i0
2695 add %i0, 64, %i0
2700 add %i0, 64, %i0
2705 add %i0, 64, %i0
2710 add %i0, 64, %i0
2715 add %i0, 64, %i0
2718 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2722 add %l7, 64, %l7
2723 subcc %i3, 64, %i3
2725 add %i0, 64, %i0
2726 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2730 add %l7, 64, %l7
2731 subcc %i3, 64, %i3
2733 add %i0, 64, %i0
2734 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2738 add %l7, 64, %l7
2739 subcc %i3, 64, %i3
2741 add %i0, 64, %i0
2746 add %i0, 64, %i0
2751 add %i0, 64, %i0
2756 add %i0, 64, %i0
2761 add %i0, 64, %i0
2766 add %i0, 64, %i0
2771 add %i0, 64, %i0
2774 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2778 add %l7, 64, %l7
2779 subcc %i3, 64, %i3
2781 add %i0, 64, %i0
2782 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2786 add %l7, 64, %l7
2787 subcc %i3, 64, %i3
2789 add %i0, 64, %i0
2790 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2794 add %l7, 64, %l7
2795 subcc %i3, 64, %i3
2797 add %i0, 64, %i0
2803 add %i0, 64, %i0
2808 add %i0, 64, %i0
2813 add %i0, 64, %i0
2818 add %i0, 64, %i0
2823 add %i0, 64, %i0
2828 add %i0, 64, %i0
2831 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2835 add %l7, 64, %l7
2836 subcc %i3, 64, %i3
2838 add %i0, 64, %i0
2839 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2843 add %l7, 64, %l7
2844 subcc %i3, 64, %i3
2846 add %i0, 64, %i0
2847 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2851 add %l7, 64, %l7
2852 subcc %i3, 64, %i3
2854 add %i0, 64, %i0
2860 add %i0, 64, %i0
2865 add %i0, 64, %i0
2870 add %i0, 64, %i0
2875 add %i0, 64, %i0
2880 add %i0, 64, %i0
2885 add %i0, 64, %i0
2888 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2892 add %l7, 64, %l7
2893 subcc %i3, 64, %i3
2895 add %i0, 64, %i0
2896 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2900 add %l7, 64, %l7
2901 subcc %i3, 64, %i3
2903 add %i0, 64, %i0
2904 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2908 add %l7, 64, %l7
2909 subcc %i3, 64, %i3
2911 add %i0, 64, %i0
2917 add %i0, 64, %i0
2922 add %i0, 64, %i0
2927 add %i0, 64, %i0
2932 add %i0, 64, %i0
2937 add %i0, 64, %i0
2942 add %i0, 64, %i0
2945 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2949 add %l7, 64, %l7
2950 subcc %i3, 64, %i3
2952 add %i0, 64, %i0
2953 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2957 add %l7, 64, %l7
2958 subcc %i3, 64, %i3
2960 add %i0, 64, %i0
2961 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2965 add %l7, 64, %l7
2966 subcc %i3, 64, %i3
2968 add %i0, 64, %i0
2974 add %i0, 64, %i0
2979 add %i0, 64, %i0
2984 add %i0, 64, %i0
2989 add %i0, 64, %i0
2994 add %i0, 64, %i0
2999 add %i0, 64, %i0
3002 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3006 add %l7, 64, %l7
3007 subcc %i3, 64, %i3
3009 add %i0, 64, %i0
3010 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3014 add %l7, 64, %l7
3015 subcc %i3, 64, %i3
3017 add %i0, 64, %i0
3018 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3022 add %l7, 64, %l7
3023 subcc %i3, 64, %i3
3025 add %i0, 64, %i0
3031 add %i0, 64, %i0
3036 add %i0, 64, %i0
3041 add %i0, 64, %i0
3046 add %i0, 64, %i0
3051 add %i0, 64, %i0
3056 add %i0, 64, %i0
3059 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3063 add %l7, 64, %l7
3064 subcc %i3, 64, %i3
3066 add %i0, 64, %i0
3067 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3071 add %l7, 64, %l7
3072 subcc %i3, 64, %i3
3074 add %i0, 64, %i0
3075 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3079 add %l7, 64, %l7
3080 subcc %i3, 64, %i3
3082 add %i0, 64, %i0
3088 add %i0, 64, %i0
3093 add %i0, 64, %i0
3098 add %i0, 64, %i0
3103 add %i0, 64, %i0
3108 add %i0, 64, %i0
3113 add %i0, 64, %i0
3297 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
3300 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
3307 add %fp, STACK_BIAS - 257, %o2
3308 and %o2, -64, %o2
3310 add %o2, 64, %o2
3312 add %o2, 64, %o2
3314 add %o2, 64, %o2
3341 mov -1, %o0
3372 add %o0, %o4, %o0 ! start w/last byte
3378 sub %o0, 2, %o0 ! get next byte
3459 ! See if we're single byte aligned. If we are, check the
3460 ! limit for single byte copies. If we're smaller, or equal,
3461 ! bounce to the byte for byte copy loop. Otherwise do it in
3468 ! We're single byte aligned.
3475 ! Is HW copy on? If not do everything byte for byte.
3482 ! go to byte for byte.
3493 ! 8 byte aligned?
3498 ! We're eight byte aligned.
3516 ! Housekeeping for copy loops. Uses same idea as in the byte for
3517 ! byte copy loop above.
3523 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy
3525 ! 4 byte aligned?
3548 ! Housekeeping for copy loops. Uses same idea as in the byte
3549 ! for byte copy loop above.
3555 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy
3558 ! We're two byte aligned. Check for "smallness"
3581 ! Housekeeping for copy loops. Uses same idea as in the byte
3582 ! for byte copy loop above.
3589 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy
3595 ! and bounce back to a non-hw assisted copy. This dispatches
3614 ! Eight byte aligned copies. A steal from the original .small_copyin
3615 ! with modifications. %o2 is number of 8 byte chunks to copy. When
3616 ! done, we examine %o3. If this is < 0, we have 1 - 7 bytes more
3627 ! End of copy loop. Most 8 byte aligned copies end here.
3632 ! Something is left. Do it byte for byte.
3637 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
3647 ! End of copy loop. Most 4 byte aligned copies end here.
3652 ! Something is left. Do it byte for byte.
3657 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
3668 ! End of copy loop. Most 2 byte aligned copies end here.
3673 ! Deal with the last byte
3698 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3702 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
3710 cmp %i2, VIS_COPY_THRESHOLD+(64*4)
3719 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3723 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
3725 ! save in-use fpregs on stack
3728 add %fp, STACK_BIAS - 257, %o2
3729 and %o2, -64, %o2
3731 add %o2, 64, %o2
3733 add %o2, 64, %o2
3735 add %o2, 64, %o2
3743 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
3766 ! Align Destination on double-word boundary
3773 stb %o4, [%i0-1]
3777 sub %i3, 64, %i3
3781 ! Copy %i3 bytes till dst is block (64 byte) aligned. use
3808 ! %i3 = blocks (length - 64) / 64
3809 ! %i4 = doubles remaining (length - blocks)
3810 sub %i2, 64, %i3
3827 add %l7, 64, %l7
3829 add %l7, 64, %l7
3831 add %l7, 64, %l7
3834 ! switch statement to get us to the right 8 byte blk within a
3835 ! 64 byte block
3869 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3873 add %l7, 64, %l7
3874 subcc %i3, 64, %i3
3876 add %i0, 64, %i0
3877 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3881 add %l7, 64, %l7
3882 subcc %i3, 64, %i3
3884 add %i0, 64, %i0
3885 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3889 add %l7, 64, %l7
3890 subcc %i3, 64, %i3
3892 add %i0, 64, %i0
3898 add %i0, 64, %i0
3903 add %i0, 64, %i0
3908 add %i0, 64, %i0
3913 add %i0, 64, %i0
3918 add %i0, 64, %i0
3923 add %i0, 64, %i0
3926 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3930 add %l7, 64, %l7
3931 subcc %i3, 64, %i3
3933 add %i0, 64, %i0
3934 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3938 add %l7, 64, %l7
3939 subcc %i3, 64, %i3
3941 add %i0, 64, %i0
3942 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3946 add %l7, 64, %l7
3947 subcc %i3, 64, %i3
3949 add %i0, 64, %i0
3954 add %i0, 64, %i0
3959 add %i0, 64, %i0
3964 add %i0, 64, %i0
3969 add %i0, 64, %i0
3974 add %i0, 64, %i0
3979 add %i0, 64, %i0
3981 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3985 add %l7, 64, %l7
3986 subcc %i3, 64, %i3
3988 add %i0, 64, %i0
3989 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3993 add %l7, 64, %l7
3994 subcc %i3, 64, %i3
3996 add %i0, 64, %i0
3997 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4001 add %l7, 64, %l7
4002 subcc %i3, 64, %i3
4004 add %i0, 64, %i0
4010 add %i0, 64, %i0
4015 add %i0, 64, %i0
4020 add %i0, 64, %i0
4025 add %i0, 64, %i0
4030 add %i0, 64, %i0
4035 add %i0, 64, %i0
4038 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4042 add %l7, 64, %l7
4043 subcc %i3, 64, %i3
4045 add %i0, 64, %i0
4046 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4050 add %l7, 64, %l7
4051 subcc %i3, 64, %i3
4053 add %i0, 64, %i0
4054 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4058 add %l7, 64, %l7
4059 subcc %i3, 64, %i3
4061 add %i0, 64, %i0
4067 add %i0, 64, %i0
4072 add %i0, 64, %i0
4077 add %i0, 64, %i0
4082 add %i0, 64, %i0
4087 add %i0, 64, %i0
4092 add %i0, 64, %i0
4095 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4099 add %l7, 64, %l7
4100 subcc %i3, 64, %i3
4102 add %i0, 64, %i0
4103 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4107 add %l7, 64, %l7
4108 subcc %i3, 64, %i3
4110 add %i0, 64, %i0
4111 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4115 add %l7, 64, %l7
4116 subcc %i3, 64, %i3
4118 add %i0, 64, %i0
4124 add %i0, 64, %i0
4129 add %i0, 64, %i0
4134 add %i0, 64, %i0
4139 add %i0, 64, %i0
4144 add %i0, 64, %i0
4149 add %i0, 64, %i0
4152 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4156 add %l7, 64, %l7
4157 subcc %i3, 64, %i3
4159 add %i0, 64, %i0
4160 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4164 add %l7, 64, %l7
4165 subcc %i3, 64, %i3
4167 add %i0, 64, %i0
4168 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4172 add %l7, 64, %l7
4173 subcc %i3, 64, %i3
4175 add %i0, 64, %i0
4181 add %i0, 64, %i0
4186 add %i0, 64, %i0
4191 add %i0, 64, %i0
4196 add %i0, 64, %i0
4201 add %i0, 64, %i0
4206 add %i0, 64, %i0
4209 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4213 add %l7, 64, %l7
4214 subcc %i3, 64, %i3
4216 add %i0, 64, %i0
4217 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4221 add %l7, 64, %l7
4222 subcc %i3, 64, %i3
4224 add %i0, 64, %i0
4225 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4229 add %l7, 64, %l7
4230 subcc %i3, 64, %i3
4232 add %i0, 64, %i0
4238 add %i0, 64, %i0
4243 add %i0, 64, %i0
4248 add %i0, 64, %i0
4253 add %i0, 64, %i0
4258 add %i0, 64, %i0
4263 add %i0, 64, %i0
4266 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4270 add %l7, 64, %l7
4271 subcc %i3, 64, %i3
4273 add %i0, 64, %i0
4274 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4278 add %l7, 64, %l7
4279 subcc %i3, 64, %i3
4281 add %i0, 64, %i0
4282 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4286 add %l7, 64, %l7
4287 subcc %i3, 64, %i3
4289 add %i0, 64, %i0
4295 add %i0, 64, %i0
4300 add %i0, 64, %i0
4305 add %i0, 64, %i0
4310 add %i0, 64, %i0
4315 add %i0, 64, %i0
4320 add %i0, 64, %i0
4495 stb %i4, [%i0 - 1]
4503 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
4506 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
4513 add %fp, STACK_BIAS - 257, %o2
4514 and %o2, -64, %o2
4516 add %o2, 64, %o2
4518 add %o2, 64, %o2
4520 add %o2, 64, %o2
4546 mov -1, %o0
4576 add %o0, %o4, %o0 ! start w/last byte
4582 sub %o0, 2, %o0 ! get next byte
4600 * Copy a block of storage - must not overlap (from + len <= to).
4614 * Copy a block of storage - must not overlap (from + len <= to).
4640 .align 64
4645 * hwblkclr - clears block-aligned, block-multiple-sized regions that are
4652 ! %i0 - start address
4653 ! %i1 - length of region (multiple of 64)
4654 ! %l0 - saved fprs
4655 ! %l1 - pointer to saved %d0 block
4656 ! %l2 - saved curthread->t_lwp
4660 save %sp, -SA(MINFRAME + 2*64), %sp
4662 ! Must be block-aligned
4663 andcc %i0, (64-1), %g0
4672 ! ... and length must be a multiple of 64
4673 andcc %i1, (64-1), %g0
4682 restore %g0, 1, %o0 ! return (1) - did not use block operations
4689 ! save in-use fpregs on stack
4691 add %fp, STACK_BIAS - 65, %l1
4692 and %l1, -64, %l1
4716 stda %d0, [%i0+64]%asi
4726 cmp %i1, 64
4729 andn %i1, (64-1), %i3