Lines Matching +full:sub +full:- +full:block
15 * Fast assembler language version of the following C-program for memcpy
16 * which represents the `standard' for the C-library.
26 * } while (--n != 0);
60 * copy with ldx/stx in 8-way unrolled loop;
61 * copy final 0-63 bytes; exit with dst addr
65 * Use BIS (block initializing store) to avoid copying store cache
66 * lines from memory. But pre-store first element of each cache line
74 * move words in 8-way unrolled loop
75 * move final 0-31 bytes; exit with dst addr
77 * use alignaddr/faligndata combined with ldd/std in 8-way
83 * loadx8, falign, block-store, prefetch loop
84 * (only use block-init-store when src/dst on 8 byte boundaries.)
168 #define SHORT_LONG 64 /* max copy for short longword-aligned case */
171 #define MED_UMAX 1024 /* max copy for medium un-aligned case */
172 #define MED_WMAX 1024 /* max copy for medium word-aligned case */
173 #define MED_MAX 1024 /* max copy for medium longword-aligned case */
174 #define ST_CHUNK 24 /* ST_CHUNK - block of values for BIS Store */
205 sub %o2, %o5, %o2
206 sub %o1, %o0, %o1 ! %o1 gets the difference
239 EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_63_56) ! a block of 64
247 EX_LD(LOAD(ldx, %o1+40, %o3), memcpy_retl_o2_plus_63_24)! a block of 64
250 EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_63_16)
252 EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_63_16)
253 EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_63_8)
255 EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_63_8)
261 sub %o2, 32, %o2 ! decrement length count
263 EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_31_24) ! a block of 32
266 EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_31_16)
268 EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_31_16)
269 EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_31_8)
270 EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_31_8)
278 sub %o2, 16, %o2 ! decrease count by 16
279 EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_15_8)
281 EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_15_8)
293 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8) ! and store 8
299 ! DST is 8-byte aligned, src is not
321 EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2_plus_31)! move a block of 32
333 EX_LD(LOAD(ld, %o1-16, %o4), memcpy_retl_o2_plus_31_16)
335 EX_LD(LOAD(ld, %o1-12, %o4), memcpy_retl_o2_plus_31_16)
339 EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_31_8)
341 EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_31_8)
344 EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_31_8)
353 EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2)! move a block of 16 bytes
360 EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_8)
363 EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_8)
365 EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_8)
375 EX_LD(LOAD(ld, %o1-4, %o3), memcpy_retl_o2_plus_4) ! load 4 bytes
377 EX_ST(STORE(stw, %o3, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes
388 EX_ST(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes
401 sub %o2, 8, %o2
404 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
410 sub %o2, 16, %o2
413 EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8)
415 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
421 sub %o2, 32, %o2
428 EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8)
430 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
433 ! Using block init store (BIS) instructions to avoid fetching cache
441 andn %o2, 0x3f, %o5 ! %o5 is multiple of block size
456 sub %o0, 8, %o0 ! adjust %o0 for ASI alignment
473 sub %o1, %o4, %o1 ! reset %o1
474 sub %o0, %o4, %o0 ! reset %o0
498 EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5)
499 sub %o5, 64, %o5
529 EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5_64)
536 sub %o2, 63, %o2 ! adjust length to allow cc test
556 andn %o2, 0x3f, %o5 ! %o5 is multiple of block size
562 sub %o5, 64, %o5 ! early if necessary
601 andcc %o0, 0x3f, %o3 ! is dst 64-byte block aligned?
603 sub %o3, 64, %o3 ! %o3 will be multiple of 8
605 sub %o2, %o3, %o2 ! update cnt with bytes to be moved
619 EX_LD_FP(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_o3_plus_4)! load 4
622 EX_ST_FP(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_o3_plus_4)
626 ! Src is half-word aligned
645 sub %o0, %o1, %o0 ! share pointer advance
662 sub %o0, %o1, %o0
668 ! Destination is now block (64 byte aligned)
670 andn %o2, 0x3f, %o5 ! %o5 is multiple of block size
673 sub %o5, 64, %o5 ! end of source buffer
703 EX_LD_FP(LOAD(ldd, %o4-8, %f30), memcpy_retl_o2_plus_o5_plus_40)
708 EX_ST_FP(STORE(std, %f14, %o0-8), memcpy_retl_o2_plus_o5_plus_40)
722 sub %o5, 8, %o5 ! insure we do not load past end of src
764 andncc %o2, 0x20 - 1, %o5
766 sub %o2, %o5, %o2
781 sub %o2, %o5, %o2
787 EX_ST(STORE(stx, %o3, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8)
797 EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_4)
802 sub %g0, %o0, %o3
805 sub %o2, %o3, %o2
812 EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1)
818 sub %g2, %o3, %g2
822 andn %o2, 0x08 - 1, %o5
823 sub %o2, %o5, %o2
856 andn %o2, 0x4 - 1, %o5
857 sub %o2, %o5, %o2
864 EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4)
875 EX_ST(STORE(stb, %o3, %o0 - 0x01), memcpy_retl_o2_plus_1)
884 sub %o2, 3, %o2
892 EX_LD(LOAD(ldub, %o1-2, %o3), memcpy_retl_o2_plus_5)
894 EX_ST(STORE(stb, %o3, %o0-2), memcpy_retl_o2_plus_5)
895 EX_LD(LOAD(ldub, %o1-1, %o3), memcpy_retl_o2_plus_4)
897 EX_ST(STORE(stb, %o3, %o0-1), memcpy_retl_o2_plus_4)
923 .size FUNC_NAME, .-FUNC_NAME