Lines Matching +full:left +full:- +full:aligned

15  * Fast assembler language version of the following C-program for memcpy
16 * which represents the `standard' for the C-library.
26 * } while (--n != 0);
37 * if src & dst aligned on word boundary but not long word boundary,
39 * if src & dst aligned on long word boundary
41 * if src & dst not aligned and length <= SHORTCHECK (SHORTCHECK=14)
59 * finish_long: src/dst aligned on 8 bytes
60 * copy with ldx/stx in 8-way unrolled loop;
61 * copy final 0-63 bytes; exit with dst addr
62 * } else { src/dst aligned; count > MED_MAX
66 * lines from memory. But pre-store first element of each cache line
72 * } else { src/dst not aligned on 8 bytes
73 * if src is word aligned and count < MED_WMAX
74 * move words in 8-way unrolled loop
75 * move final 0-31 bytes; exit with dst addr
77 * use alignaddr/faligndata combined with ldd/std in 8-way
83 * loadx8, falign, block-store, prefetch loop
84 * (only use block-init-store when src/dst on 8 byte boundaries.)
168 #define SHORT_LONG 64 /* max copy for short longword-aligned case */
171 #define MED_UMAX 1024 /* max copy for medium un-aligned case */
172 #define MED_WMAX 1024 /* max copy for medium word-aligned case */
173 #define MED_MAX 1024 /* max copy for medium longword-aligned case */
174 #define ST_CHUNK 24 /* ST_CHUNK - block of values for BIS Store */
175 #define ALIGN_PRE 24 /* distance for aligned prefetch loop */
201 andcc %o5, 7, %o5 ! bytes till DST 8 byte aligned
228 * Special case for handling when src and dest are both long word aligned
250 EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_63_16)
252 EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_63_16)
253 EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_63_8)
254 bgu,pt %xcc, .Lmedl64 ! repeat if at least 64 bytes left
255 EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_63_8)
258 ble,pt %xcc, .Lmedl31 ! to skip if 31 or fewer bytes left
266 EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_31_16)
268 EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_31_16)
269 EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_31_8)
270 EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_31_8)
273 ble,pt %xcc, .Lmedl15 ! skip if 15 or fewer bytes left
279 EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_15_8)
281 EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_15_8)
286 blt,pt %xcc, .Lmedw7 ! skip if 7 or fewer bytes left
293 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8) ! and store 8
299 ! DST is 8-byte aligned, src is not
302 bnz,pt %xcc, .Lunalignsetup ! branch to skip if not word aligned
306 * Handle all cases where src and dest are aligned on word
333 EX_LD(LOAD(ld, %o1-16, %o4), memcpy_retl_o2_plus_31_16)
335 EX_LD(LOAD(ld, %o1-12, %o4), memcpy_retl_o2_plus_31_16)
339 EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_31_8)
341 EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_31_8)
343 bgu,pt %xcc, .Lmedw32 ! repeat if at least 32 bytes left
344 EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_31_8)
360 EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_8)
363 EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_8)
365 EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_8)
369 blt,pn %xcc, .Lmedw7 ! skip if 7 or fewer bytes left
375 EX_LD(LOAD(ld, %o1-4, %o3), memcpy_retl_o2_plus_4) ! load 4 bytes
377 EX_ST(STORE(stw, %o3, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes
380 cmp %o2, 4 ! check for 4 bytes left
381 blt,pn %xcc, .Lsmallleft3 ! skip if 3 or fewer bytes left
388 EX_ST(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes
395 andcc %o0, 0x3f, %o3 ! %o3 == 0 means dst is 64 byte aligned
404 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
413 EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8)
415 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
428 EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8)
430 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
498 EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5)
529 EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5_64)
560 andn %o1, 0x7, %o4 ! %o4 has long word aligned src address
601 andcc %o0, 0x3f, %o3 ! is dst 64-byte block aligned?
604 neg %o3 ! bytes until dest is 64 byte aligned
613 ! Src is word aligned
619 EX_LD_FP(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_o3_plus_4)! load 4
622 EX_ST_FP(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_o3_plus_4)
626 ! Src is half-word aligned
629 sllx %o4, 32, %o5 ! shift left
643 ! Src is Byte aligned
668 ! Destination is now block (64 byte aligned)
675 andn %o1, 0x7, %o4 ! %o4 has long word aligned src address
703 EX_LD_FP(LOAD(ldd, %o4-8, %f30), memcpy_retl_o2_plus_o5_plus_40)
708 EX_ST_FP(STORE(std, %f14, %o0-8), memcpy_retl_o2_plus_o5_plus_40)
715 ! Dest long word aligned, Src not long word aligned
723 andn %o1, 0x7, %o4 ! %o4 has long word aligned src address
764 andncc %o2, 0x20 - 1, %o5
787 EX_ST(STORE(stx, %o3, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8)
797 EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_4)
801 /* First get dest 8 byte aligned. */
812 EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1)
822 andn %o2, 0x08 - 1, %o5
856 andn %o2, 0x4 - 1, %o5
864 EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4)
875 EX_ST(STORE(stb, %o3, %o0 - 0x01), memcpy_retl_o2_plus_1)
892 EX_LD(LOAD(ldub, %o1-2, %o3), memcpy_retl_o2_plus_5)
894 EX_ST(STORE(stb, %o3, %o0-2), memcpy_retl_o2_plus_5)
895 EX_LD(LOAD(ldub, %o1-1, %o3), memcpy_retl_o2_plus_4)
897 EX_ST(STORE(stb, %o3, %o0-1), memcpy_retl_o2_plus_4)
923 .size FUNC_NAME, .-FUNC_NAME