xref: /linux/arch/arc/lib/memcpy-archs.S (revision ac506b7f2233b35f17172304255e08cabc072aad)
11f7e3dc0SClaudiu Zissulescu/*
21f7e3dc0SClaudiu Zissulescu * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
31f7e3dc0SClaudiu Zissulescu *
41f7e3dc0SClaudiu Zissulescu * This program is free software; you can redistribute it and/or modify
51f7e3dc0SClaudiu Zissulescu * it under the terms of the GNU General Public License version 2 as
61f7e3dc0SClaudiu Zissulescu * published by the Free Software Foundation.
71f7e3dc0SClaudiu Zissulescu */
81f7e3dc0SClaudiu Zissulescu
91f7e3dc0SClaudiu Zissulescu#include <linux/linkage.h>
101f7e3dc0SClaudiu Zissulescu
111f7e3dc0SClaudiu Zissulescu#ifdef __LITTLE_ENDIAN__
121f7e3dc0SClaudiu Zissulescu# define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
131f7e3dc0SClaudiu Zissulescu# define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
141f7e3dc0SClaudiu Zissulescu# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM
151f7e3dc0SClaudiu Zissulescu# define MERGE_2(RX,RY,IMM)
161f7e3dc0SClaudiu Zissulescu# define EXTRACT_1(RX,RY,IMM)	and	RX, RY, 0xFFFF
171f7e3dc0SClaudiu Zissulescu# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, IMM
181f7e3dc0SClaudiu Zissulescu#else
191f7e3dc0SClaudiu Zissulescu# define SHIFT_1(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
201f7e3dc0SClaudiu Zissulescu# define SHIFT_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
211f7e3dc0SClaudiu Zissulescu# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
221f7e3dc0SClaudiu Zissulescu# define MERGE_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
231f7e3dc0SClaudiu Zissulescu# define EXTRACT_1(RX,RY,IMM)	lsr	RX, RY, IMM
241f7e3dc0SClaudiu Zissulescu# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, 0x08
251f7e3dc0SClaudiu Zissulescu#endif
261f7e3dc0SClaudiu Zissulescu
271f7e3dc0SClaudiu Zissulescu#ifdef CONFIG_ARC_HAS_LL64
281f7e3dc0SClaudiu Zissulescu# define PREFETCH_READ(RX)	prefetch    [RX, 56]
291f7e3dc0SClaudiu Zissulescu# define PREFETCH_WRITE(RX)	prefetchw   [RX, 64]
301f7e3dc0SClaudiu Zissulescu# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
311f7e3dc0SClaudiu Zissulescu# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
321f7e3dc0SClaudiu Zissulescu# define ZOLSHFT		5
331f7e3dc0SClaudiu Zissulescu# define ZOLAND			0x1F
341f7e3dc0SClaudiu Zissulescu#else
351f7e3dc0SClaudiu Zissulescu# define PREFETCH_READ(RX)	prefetch    [RX, 28]
361f7e3dc0SClaudiu Zissulescu# define PREFETCH_WRITE(RX)	prefetchw   [RX, 32]
371f7e3dc0SClaudiu Zissulescu# define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
381f7e3dc0SClaudiu Zissulescu# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
391f7e3dc0SClaudiu Zissulescu# define ZOLSHFT		4
401f7e3dc0SClaudiu Zissulescu# define ZOLAND			0xF
411f7e3dc0SClaudiu Zissulescu#endif
421f7e3dc0SClaudiu Zissulescu
431f7e3dc0SClaudiu ZissulescuENTRY(memcpy)
441f7e3dc0SClaudiu Zissulescu	prefetch [r1]		; Prefetch the read location
451f7e3dc0SClaudiu Zissulescu	prefetchw [r0]		; Prefetch the write location
461f7e3dc0SClaudiu Zissulescu	mov.f	0, r2
471f7e3dc0SClaudiu Zissulescu;;; if size is zero
481f7e3dc0SClaudiu Zissulescu	jz.d	[blink]
491f7e3dc0SClaudiu Zissulescu	mov	r3, r0		; don;t clobber ret val
501f7e3dc0SClaudiu Zissulescu
511f7e3dc0SClaudiu Zissulescu;;; if size <= 8
521f7e3dc0SClaudiu Zissulescu	cmp	r2, 8
53*ac506b7fSVineet Gupta	bls.d	@.Lsmallchunk
541f7e3dc0SClaudiu Zissulescu	mov.f	lp_count, r2
551f7e3dc0SClaudiu Zissulescu
561f7e3dc0SClaudiu Zissulescu	and.f	r4, r0, 0x03
571f7e3dc0SClaudiu Zissulescu	rsub	lp_count, r4, 4
58*ac506b7fSVineet Gupta	lpnz	@.Laligndestination
591f7e3dc0SClaudiu Zissulescu	;; LOOP BEGIN
601f7e3dc0SClaudiu Zissulescu	ldb.ab	r5, [r1,1]
611f7e3dc0SClaudiu Zissulescu	sub	r2, r2, 1
621f7e3dc0SClaudiu Zissulescu	stb.ab	r5, [r3,1]
63*ac506b7fSVineet Gupta.Laligndestination:
641f7e3dc0SClaudiu Zissulescu
651f7e3dc0SClaudiu Zissulescu;;; Check the alignment of the source
661f7e3dc0SClaudiu Zissulescu	and.f	r4, r1, 0x03
67*ac506b7fSVineet Gupta	bnz.d	@.Lsourceunaligned
681f7e3dc0SClaudiu Zissulescu
691f7e3dc0SClaudiu Zissulescu;;; CASE 0: Both source and destination are 32bit aligned
701f7e3dc0SClaudiu Zissulescu;;; Convert len to Dwords, unfold x4
711f7e3dc0SClaudiu Zissulescu	lsr.f	lp_count, r2, ZOLSHFT
72*ac506b7fSVineet Gupta	lpnz	@.Lcopy32_64bytes
731f7e3dc0SClaudiu Zissulescu	;; LOOP START
741f7e3dc0SClaudiu Zissulescu	LOADX (r6, r1)
751f7e3dc0SClaudiu Zissulescu	PREFETCH_READ (r1)
761f7e3dc0SClaudiu Zissulescu	PREFETCH_WRITE (r3)
771f7e3dc0SClaudiu Zissulescu	LOADX (r8, r1)
781f7e3dc0SClaudiu Zissulescu	LOADX (r10, r1)
791f7e3dc0SClaudiu Zissulescu	LOADX (r4, r1)
801f7e3dc0SClaudiu Zissulescu	STOREX (r6, r3)
811f7e3dc0SClaudiu Zissulescu	STOREX (r8, r3)
821f7e3dc0SClaudiu Zissulescu	STOREX (r10, r3)
831f7e3dc0SClaudiu Zissulescu	STOREX (r4, r3)
84*ac506b7fSVineet Gupta.Lcopy32_64bytes:
851f7e3dc0SClaudiu Zissulescu
861f7e3dc0SClaudiu Zissulescu	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes
87*ac506b7fSVineet Gupta.Lsmallchunk:
88*ac506b7fSVineet Gupta	lpnz	@.Lcopyremainingbytes
891f7e3dc0SClaudiu Zissulescu	;; LOOP START
901f7e3dc0SClaudiu Zissulescu	ldb.ab	r5, [r1,1]
911f7e3dc0SClaudiu Zissulescu	stb.ab	r5, [r3,1]
92*ac506b7fSVineet Gupta.Lcopyremainingbytes:
931f7e3dc0SClaudiu Zissulescu
941f7e3dc0SClaudiu Zissulescu	j	[blink]
951f7e3dc0SClaudiu Zissulescu;;; END CASE 0
961f7e3dc0SClaudiu Zissulescu
97*ac506b7fSVineet Gupta.Lsourceunaligned:
981f7e3dc0SClaudiu Zissulescu	cmp	r4, 2
99*ac506b7fSVineet Gupta	beq.d	@.LunalignedOffby2
1001f7e3dc0SClaudiu Zissulescu	sub	r2, r2, 1
1011f7e3dc0SClaudiu Zissulescu
102*ac506b7fSVineet Gupta	bhi.d	@.LunalignedOffby3
1031f7e3dc0SClaudiu Zissulescu	ldb.ab	r5, [r1, 1]
1041f7e3dc0SClaudiu Zissulescu
1051f7e3dc0SClaudiu Zissulescu;;; CASE 1: The source is unaligned, off by 1
1061f7e3dc0SClaudiu Zissulescu	;; Hence I need to read 1 byte for a 16bit alignment
1071f7e3dc0SClaudiu Zissulescu	;; and 2bytes to reach 32bit alignment
1081f7e3dc0SClaudiu Zissulescu	ldh.ab	r6, [r1, 2]
1091f7e3dc0SClaudiu Zissulescu	sub	r2, r2, 2
1101f7e3dc0SClaudiu Zissulescu	;; Convert to words, unfold x2
1111f7e3dc0SClaudiu Zissulescu	lsr.f	lp_count, r2, 3
1121f7e3dc0SClaudiu Zissulescu	MERGE_1 (r6, r6, 8)
1131f7e3dc0SClaudiu Zissulescu	MERGE_2 (r5, r5, 24)
1141f7e3dc0SClaudiu Zissulescu	or	r5, r5, r6
1151f7e3dc0SClaudiu Zissulescu
1161f7e3dc0SClaudiu Zissulescu	;; Both src and dst are aligned
117*ac506b7fSVineet Gupta	lpnz	@.Lcopy8bytes_1
1181f7e3dc0SClaudiu Zissulescu	;; LOOP START
1191f7e3dc0SClaudiu Zissulescu	ld.ab	r6, [r1, 4]
1201f7e3dc0SClaudiu Zissulescu	prefetch [r1, 28]	;Prefetch the next read location
1211f7e3dc0SClaudiu Zissulescu	ld.ab	r8, [r1,4]
1221f7e3dc0SClaudiu Zissulescu	prefetchw [r3, 32]	;Prefetch the next write location
1231f7e3dc0SClaudiu Zissulescu
1241f7e3dc0SClaudiu Zissulescu	SHIFT_1	(r7, r6, 24)
1251f7e3dc0SClaudiu Zissulescu	or	r7, r7, r5
1261f7e3dc0SClaudiu Zissulescu	SHIFT_2	(r5, r6, 8)
1271f7e3dc0SClaudiu Zissulescu
1281f7e3dc0SClaudiu Zissulescu	SHIFT_1	(r9, r8, 24)
1291f7e3dc0SClaudiu Zissulescu	or	r9, r9, r5
1301f7e3dc0SClaudiu Zissulescu	SHIFT_2	(r5, r8, 8)
1311f7e3dc0SClaudiu Zissulescu
1321f7e3dc0SClaudiu Zissulescu	st.ab	r7, [r3, 4]
1331f7e3dc0SClaudiu Zissulescu	st.ab	r9, [r3, 4]
134*ac506b7fSVineet Gupta.Lcopy8bytes_1:
1351f7e3dc0SClaudiu Zissulescu
1361f7e3dc0SClaudiu Zissulescu	;; Write back the remaining 16bits
1371f7e3dc0SClaudiu Zissulescu	EXTRACT_1 (r6, r5, 16)
1381f7e3dc0SClaudiu Zissulescu	sth.ab	r6, [r3, 2]
1391f7e3dc0SClaudiu Zissulescu	;; Write back the remaining 8bits
1401f7e3dc0SClaudiu Zissulescu	EXTRACT_2 (r5, r5, 16)
1411f7e3dc0SClaudiu Zissulescu	stb.ab	r5, [r3, 1]
1421f7e3dc0SClaudiu Zissulescu
1431f7e3dc0SClaudiu Zissulescu	and.f	lp_count, r2, 0x07 ;Last 8bytes
144*ac506b7fSVineet Gupta	lpnz	@.Lcopybytewise_1
1451f7e3dc0SClaudiu Zissulescu	;; LOOP START
1461f7e3dc0SClaudiu Zissulescu	ldb.ab	r6, [r1,1]
1471f7e3dc0SClaudiu Zissulescu	stb.ab	r6, [r3,1]
148*ac506b7fSVineet Gupta.Lcopybytewise_1:
1491f7e3dc0SClaudiu Zissulescu	j	[blink]
1501f7e3dc0SClaudiu Zissulescu
151*ac506b7fSVineet Gupta.LunalignedOffby2:
1521f7e3dc0SClaudiu Zissulescu;;; CASE 2: The source is unaligned, off by 2
1531f7e3dc0SClaudiu Zissulescu	ldh.ab	r5, [r1, 2]
1541f7e3dc0SClaudiu Zissulescu	sub	r2, r2, 1
1551f7e3dc0SClaudiu Zissulescu
1561f7e3dc0SClaudiu Zissulescu	;; Both src and dst are aligned
1571f7e3dc0SClaudiu Zissulescu	;; Convert to words, unfold x2
1581f7e3dc0SClaudiu Zissulescu	lsr.f	lp_count, r2, 3
1591f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__
1601f7e3dc0SClaudiu Zissulescu	asl.nz	r5, r5, 16
1611f7e3dc0SClaudiu Zissulescu#endif
162*ac506b7fSVineet Gupta	lpnz	@.Lcopy8bytes_2
1631f7e3dc0SClaudiu Zissulescu	;; LOOP START
1641f7e3dc0SClaudiu Zissulescu	ld.ab	r6, [r1, 4]
1651f7e3dc0SClaudiu Zissulescu	prefetch [r1, 28]	;Prefetch the next read location
1661f7e3dc0SClaudiu Zissulescu	ld.ab	r8, [r1,4]
1671f7e3dc0SClaudiu Zissulescu	prefetchw [r3, 32]	;Prefetch the next write location
1681f7e3dc0SClaudiu Zissulescu
1691f7e3dc0SClaudiu Zissulescu	SHIFT_1	(r7, r6, 16)
1701f7e3dc0SClaudiu Zissulescu	or	r7, r7, r5
1711f7e3dc0SClaudiu Zissulescu	SHIFT_2	(r5, r6, 16)
1721f7e3dc0SClaudiu Zissulescu
1731f7e3dc0SClaudiu Zissulescu	SHIFT_1	(r9, r8, 16)
1741f7e3dc0SClaudiu Zissulescu	or	r9, r9, r5
1751f7e3dc0SClaudiu Zissulescu	SHIFT_2	(r5, r8, 16)
1761f7e3dc0SClaudiu Zissulescu
1771f7e3dc0SClaudiu Zissulescu	st.ab	r7, [r3, 4]
1781f7e3dc0SClaudiu Zissulescu	st.ab	r9, [r3, 4]
179*ac506b7fSVineet Gupta.Lcopy8bytes_2:
1801f7e3dc0SClaudiu Zissulescu
1811f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__
1821f7e3dc0SClaudiu Zissulescu	lsr.nz	r5, r5, 16
1831f7e3dc0SClaudiu Zissulescu#endif
1841f7e3dc0SClaudiu Zissulescu	sth.ab	r5, [r3, 2]
1851f7e3dc0SClaudiu Zissulescu
1861f7e3dc0SClaudiu Zissulescu	and.f	lp_count, r2, 0x07 ;Last 8bytes
187*ac506b7fSVineet Gupta	lpnz	@.Lcopybytewise_2
1881f7e3dc0SClaudiu Zissulescu	;; LOOP START
1891f7e3dc0SClaudiu Zissulescu	ldb.ab	r6, [r1,1]
1901f7e3dc0SClaudiu Zissulescu	stb.ab	r6, [r3,1]
191*ac506b7fSVineet Gupta.Lcopybytewise_2:
1921f7e3dc0SClaudiu Zissulescu	j	[blink]
1931f7e3dc0SClaudiu Zissulescu
194*ac506b7fSVineet Gupta.LunalignedOffby3:
1951f7e3dc0SClaudiu Zissulescu;;; CASE 3: The source is unaligned, off by 3
1961f7e3dc0SClaudiu Zissulescu;;; Hence, I need to read 1byte for achieve the 32bit alignment
1971f7e3dc0SClaudiu Zissulescu
1981f7e3dc0SClaudiu Zissulescu	;; Both src and dst are aligned
1991f7e3dc0SClaudiu Zissulescu	;; Convert to words, unfold x2
2001f7e3dc0SClaudiu Zissulescu	lsr.f	lp_count, r2, 3
2011f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__
2021f7e3dc0SClaudiu Zissulescu	asl.ne	r5, r5, 24
2031f7e3dc0SClaudiu Zissulescu#endif
204*ac506b7fSVineet Gupta	lpnz	@.Lcopy8bytes_3
2051f7e3dc0SClaudiu Zissulescu	;; LOOP START
2061f7e3dc0SClaudiu Zissulescu	ld.ab	r6, [r1, 4]
2071f7e3dc0SClaudiu Zissulescu	prefetch [r1, 28]	;Prefetch the next read location
2081f7e3dc0SClaudiu Zissulescu	ld.ab	r8, [r1,4]
20921481f2cSVineet Gupta	prefetchw [r3, 32]	;Prefetch the next write location
2101f7e3dc0SClaudiu Zissulescu
2111f7e3dc0SClaudiu Zissulescu	SHIFT_1	(r7, r6, 8)
2121f7e3dc0SClaudiu Zissulescu	or	r7, r7, r5
2131f7e3dc0SClaudiu Zissulescu	SHIFT_2	(r5, r6, 24)
2141f7e3dc0SClaudiu Zissulescu
2151f7e3dc0SClaudiu Zissulescu	SHIFT_1	(r9, r8, 8)
2161f7e3dc0SClaudiu Zissulescu	or	r9, r9, r5
2171f7e3dc0SClaudiu Zissulescu	SHIFT_2	(r5, r8, 24)
2181f7e3dc0SClaudiu Zissulescu
2191f7e3dc0SClaudiu Zissulescu	st.ab	r7, [r3, 4]
2201f7e3dc0SClaudiu Zissulescu	st.ab	r9, [r3, 4]
221*ac506b7fSVineet Gupta.Lcopy8bytes_3:
2221f7e3dc0SClaudiu Zissulescu
2231f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__
2241f7e3dc0SClaudiu Zissulescu	lsr.nz	r5, r5, 24
2251f7e3dc0SClaudiu Zissulescu#endif
2261f7e3dc0SClaudiu Zissulescu	stb.ab	r5, [r3, 1]
2271f7e3dc0SClaudiu Zissulescu
2281f7e3dc0SClaudiu Zissulescu	and.f	lp_count, r2, 0x07 ;Last 8bytes
229*ac506b7fSVineet Gupta	lpnz	@.Lcopybytewise_3
2301f7e3dc0SClaudiu Zissulescu	;; LOOP START
2311f7e3dc0SClaudiu Zissulescu	ldb.ab	r6, [r1,1]
2321f7e3dc0SClaudiu Zissulescu	stb.ab	r6, [r3,1]
233*ac506b7fSVineet Gupta.Lcopybytewise_3:
2341f7e3dc0SClaudiu Zissulescu	j	[blink]
2351f7e3dc0SClaudiu Zissulescu
2361f7e3dc0SClaudiu ZissulescuEND(memcpy)
237