xref: /linux/arch/arc/lib/memcpy-archs.S (revision 75bf465f0bc33e9b776a46d6a1b9b990f5fb7c37)
1*d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
21f7e3dc0SClaudiu Zissulescu/*
31f7e3dc0SClaudiu Zissulescu * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
41f7e3dc0SClaudiu Zissulescu */
51f7e3dc0SClaudiu Zissulescu
61f7e3dc0SClaudiu Zissulescu#include <linux/linkage.h>
71f7e3dc0SClaudiu Zissulescu
81f7e3dc0SClaudiu Zissulescu#ifdef __LITTLE_ENDIAN__
91f7e3dc0SClaudiu Zissulescu# define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
101f7e3dc0SClaudiu Zissulescu# define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
111f7e3dc0SClaudiu Zissulescu# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM
121f7e3dc0SClaudiu Zissulescu# define MERGE_2(RX,RY,IMM)
131f7e3dc0SClaudiu Zissulescu# define EXTRACT_1(RX,RY,IMM)	and	RX, RY, 0xFFFF
141f7e3dc0SClaudiu Zissulescu# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, IMM
151f7e3dc0SClaudiu Zissulescu#else
161f7e3dc0SClaudiu Zissulescu# define SHIFT_1(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
171f7e3dc0SClaudiu Zissulescu# define SHIFT_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
181f7e3dc0SClaudiu Zissulescu# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
191f7e3dc0SClaudiu Zissulescu# define MERGE_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
201f7e3dc0SClaudiu Zissulescu# define EXTRACT_1(RX,RY,IMM)	lsr	RX, RY, IMM
211f7e3dc0SClaudiu Zissulescu# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, 0x08
221f7e3dc0SClaudiu Zissulescu#endif
231f7e3dc0SClaudiu Zissulescu
241f7e3dc0SClaudiu Zissulescu#ifdef CONFIG_ARC_HAS_LL64
251f7e3dc0SClaudiu Zissulescu# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
261f7e3dc0SClaudiu Zissulescu# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
271f7e3dc0SClaudiu Zissulescu# define ZOLSHFT		5
281f7e3dc0SClaudiu Zissulescu# define ZOLAND			0x1F
291f7e3dc0SClaudiu Zissulescu#else
301f7e3dc0SClaudiu Zissulescu# define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
311f7e3dc0SClaudiu Zissulescu# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
321f7e3dc0SClaudiu Zissulescu# define ZOLSHFT		4
331f7e3dc0SClaudiu Zissulescu# define ZOLAND			0xF
341f7e3dc0SClaudiu Zissulescu#endif
351f7e3dc0SClaudiu Zissulescu
3686effd0dSVineet GuptaENTRY_CFI(memcpy)
371f7e3dc0SClaudiu Zissulescu	mov.f	0, r2
381f7e3dc0SClaudiu Zissulescu;;; if size is zero
391f7e3dc0SClaudiu Zissulescu	jz.d	[blink]
401f7e3dc0SClaudiu Zissulescu	mov	r3, r0		; don;t clobber ret val
411f7e3dc0SClaudiu Zissulescu
421f7e3dc0SClaudiu Zissulescu;;; if size <= 8
431f7e3dc0SClaudiu Zissulescu	cmp	r2, 8
44ac506b7fSVineet Gupta	bls.d	@.Lsmallchunk
451f7e3dc0SClaudiu Zissulescu	mov.f	lp_count, r2
461f7e3dc0SClaudiu Zissulescu
471f7e3dc0SClaudiu Zissulescu	and.f	r4, r0, 0x03
481f7e3dc0SClaudiu Zissulescu	rsub	lp_count, r4, 4
49ac506b7fSVineet Gupta	lpnz	@.Laligndestination
501f7e3dc0SClaudiu Zissulescu	;; LOOP BEGIN
511f7e3dc0SClaudiu Zissulescu	ldb.ab	r5, [r1,1]
521f7e3dc0SClaudiu Zissulescu	sub	r2, r2, 1
531f7e3dc0SClaudiu Zissulescu	stb.ab	r5, [r3,1]
54ac506b7fSVineet Gupta.Laligndestination:
551f7e3dc0SClaudiu Zissulescu
561f7e3dc0SClaudiu Zissulescu;;; Check the alignment of the source
571f7e3dc0SClaudiu Zissulescu	and.f	r4, r1, 0x03
58ac506b7fSVineet Gupta	bnz.d	@.Lsourceunaligned
591f7e3dc0SClaudiu Zissulescu
601f7e3dc0SClaudiu Zissulescu;;; CASE 0: Both source and destination are 32bit aligned
611f7e3dc0SClaudiu Zissulescu;;; Convert len to Dwords, unfold x4
621f7e3dc0SClaudiu Zissulescu	lsr.f	lp_count, r2, ZOLSHFT
63ac506b7fSVineet Gupta	lpnz	@.Lcopy32_64bytes
641f7e3dc0SClaudiu Zissulescu	;; LOOP START
651f7e3dc0SClaudiu Zissulescu	LOADX (r6, r1)
661f7e3dc0SClaudiu Zissulescu	LOADX (r8, r1)
671f7e3dc0SClaudiu Zissulescu	LOADX (r10, r1)
681f7e3dc0SClaudiu Zissulescu	LOADX (r4, r1)
691f7e3dc0SClaudiu Zissulescu	STOREX (r6, r3)
701f7e3dc0SClaudiu Zissulescu	STOREX (r8, r3)
711f7e3dc0SClaudiu Zissulescu	STOREX (r10, r3)
721f7e3dc0SClaudiu Zissulescu	STOREX (r4, r3)
73ac506b7fSVineet Gupta.Lcopy32_64bytes:
741f7e3dc0SClaudiu Zissulescu
751f7e3dc0SClaudiu Zissulescu	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes
76ac506b7fSVineet Gupta.Lsmallchunk:
77ac506b7fSVineet Gupta	lpnz	@.Lcopyremainingbytes
781f7e3dc0SClaudiu Zissulescu	;; LOOP START
791f7e3dc0SClaudiu Zissulescu	ldb.ab	r5, [r1,1]
801f7e3dc0SClaudiu Zissulescu	stb.ab	r5, [r3,1]
81ac506b7fSVineet Gupta.Lcopyremainingbytes:
821f7e3dc0SClaudiu Zissulescu
831f7e3dc0SClaudiu Zissulescu	j	[blink]
841f7e3dc0SClaudiu Zissulescu;;; END CASE 0
851f7e3dc0SClaudiu Zissulescu
86ac506b7fSVineet Gupta.Lsourceunaligned:
871f7e3dc0SClaudiu Zissulescu	cmp	r4, 2
88ac506b7fSVineet Gupta	beq.d	@.LunalignedOffby2
891f7e3dc0SClaudiu Zissulescu	sub	r2, r2, 1
901f7e3dc0SClaudiu Zissulescu
91ac506b7fSVineet Gupta	bhi.d	@.LunalignedOffby3
921f7e3dc0SClaudiu Zissulescu	ldb.ab	r5, [r1, 1]
931f7e3dc0SClaudiu Zissulescu
941f7e3dc0SClaudiu Zissulescu;;; CASE 1: The source is unaligned, off by 1
951f7e3dc0SClaudiu Zissulescu	;; Hence I need to read 1 byte for a 16bit alignment
961f7e3dc0SClaudiu Zissulescu	;; and 2bytes to reach 32bit alignment
971f7e3dc0SClaudiu Zissulescu	ldh.ab	r6, [r1, 2]
981f7e3dc0SClaudiu Zissulescu	sub	r2, r2, 2
991f7e3dc0SClaudiu Zissulescu	;; Convert to words, unfold x2
1001f7e3dc0SClaudiu Zissulescu	lsr.f	lp_count, r2, 3
1011f7e3dc0SClaudiu Zissulescu	MERGE_1 (r6, r6, 8)
1021f7e3dc0SClaudiu Zissulescu	MERGE_2 (r5, r5, 24)
1031f7e3dc0SClaudiu Zissulescu	or	r5, r5, r6
1041f7e3dc0SClaudiu Zissulescu
1051f7e3dc0SClaudiu Zissulescu	;; Both src and dst are aligned
106ac506b7fSVineet Gupta	lpnz	@.Lcopy8bytes_1
1071f7e3dc0SClaudiu Zissulescu	;; LOOP START
1081f7e3dc0SClaudiu Zissulescu	ld.ab	r6, [r1, 4]
1091f7e3dc0SClaudiu Zissulescu	ld.ab	r8, [r1,4]
1101f7e3dc0SClaudiu Zissulescu
1111f7e3dc0SClaudiu Zissulescu	SHIFT_1	(r7, r6, 24)
1121f7e3dc0SClaudiu Zissulescu	or	r7, r7, r5
1131f7e3dc0SClaudiu Zissulescu	SHIFT_2	(r5, r6, 8)
1141f7e3dc0SClaudiu Zissulescu
1151f7e3dc0SClaudiu Zissulescu	SHIFT_1	(r9, r8, 24)
1161f7e3dc0SClaudiu Zissulescu	or	r9, r9, r5
1171f7e3dc0SClaudiu Zissulescu	SHIFT_2	(r5, r8, 8)
1181f7e3dc0SClaudiu Zissulescu
1191f7e3dc0SClaudiu Zissulescu	st.ab	r7, [r3, 4]
1201f7e3dc0SClaudiu Zissulescu	st.ab	r9, [r3, 4]
121ac506b7fSVineet Gupta.Lcopy8bytes_1:
1221f7e3dc0SClaudiu Zissulescu
1231f7e3dc0SClaudiu Zissulescu	;; Write back the remaining 16bits
1241f7e3dc0SClaudiu Zissulescu	EXTRACT_1 (r6, r5, 16)
1251f7e3dc0SClaudiu Zissulescu	sth.ab	r6, [r3, 2]
1261f7e3dc0SClaudiu Zissulescu	;; Write back the remaining 8bits
1271f7e3dc0SClaudiu Zissulescu	EXTRACT_2 (r5, r5, 16)
1281f7e3dc0SClaudiu Zissulescu	stb.ab	r5, [r3, 1]
1291f7e3dc0SClaudiu Zissulescu
1301f7e3dc0SClaudiu Zissulescu	and.f	lp_count, r2, 0x07 ;Last 8bytes
131ac506b7fSVineet Gupta	lpnz	@.Lcopybytewise_1
1321f7e3dc0SClaudiu Zissulescu	;; LOOP START
1331f7e3dc0SClaudiu Zissulescu	ldb.ab	r6, [r1,1]
1341f7e3dc0SClaudiu Zissulescu	stb.ab	r6, [r3,1]
135ac506b7fSVineet Gupta.Lcopybytewise_1:
1361f7e3dc0SClaudiu Zissulescu	j	[blink]
1371f7e3dc0SClaudiu Zissulescu
138ac506b7fSVineet Gupta.LunalignedOffby2:
1391f7e3dc0SClaudiu Zissulescu;;; CASE 2: The source is unaligned, off by 2
1401f7e3dc0SClaudiu Zissulescu	ldh.ab	r5, [r1, 2]
1411f7e3dc0SClaudiu Zissulescu	sub	r2, r2, 1
1421f7e3dc0SClaudiu Zissulescu
1431f7e3dc0SClaudiu Zissulescu	;; Both src and dst are aligned
1441f7e3dc0SClaudiu Zissulescu	;; Convert to words, unfold x2
1451f7e3dc0SClaudiu Zissulescu	lsr.f	lp_count, r2, 3
1461f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__
1471f7e3dc0SClaudiu Zissulescu	asl.nz	r5, r5, 16
1481f7e3dc0SClaudiu Zissulescu#endif
149ac506b7fSVineet Gupta	lpnz	@.Lcopy8bytes_2
1501f7e3dc0SClaudiu Zissulescu	;; LOOP START
1511f7e3dc0SClaudiu Zissulescu	ld.ab	r6, [r1, 4]
1521f7e3dc0SClaudiu Zissulescu	ld.ab	r8, [r1,4]
1531f7e3dc0SClaudiu Zissulescu
1541f7e3dc0SClaudiu Zissulescu	SHIFT_1	(r7, r6, 16)
1551f7e3dc0SClaudiu Zissulescu	or	r7, r7, r5
1561f7e3dc0SClaudiu Zissulescu	SHIFT_2	(r5, r6, 16)
1571f7e3dc0SClaudiu Zissulescu
1581f7e3dc0SClaudiu Zissulescu	SHIFT_1	(r9, r8, 16)
1591f7e3dc0SClaudiu Zissulescu	or	r9, r9, r5
1601f7e3dc0SClaudiu Zissulescu	SHIFT_2	(r5, r8, 16)
1611f7e3dc0SClaudiu Zissulescu
1621f7e3dc0SClaudiu Zissulescu	st.ab	r7, [r3, 4]
1631f7e3dc0SClaudiu Zissulescu	st.ab	r9, [r3, 4]
164ac506b7fSVineet Gupta.Lcopy8bytes_2:
1651f7e3dc0SClaudiu Zissulescu
1661f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__
1671f7e3dc0SClaudiu Zissulescu	lsr.nz	r5, r5, 16
1681f7e3dc0SClaudiu Zissulescu#endif
1691f7e3dc0SClaudiu Zissulescu	sth.ab	r5, [r3, 2]
1701f7e3dc0SClaudiu Zissulescu
1711f7e3dc0SClaudiu Zissulescu	and.f	lp_count, r2, 0x07 ;Last 8bytes
172ac506b7fSVineet Gupta	lpnz	@.Lcopybytewise_2
1731f7e3dc0SClaudiu Zissulescu	;; LOOP START
1741f7e3dc0SClaudiu Zissulescu	ldb.ab	r6, [r1,1]
1751f7e3dc0SClaudiu Zissulescu	stb.ab	r6, [r3,1]
176ac506b7fSVineet Gupta.Lcopybytewise_2:
1771f7e3dc0SClaudiu Zissulescu	j	[blink]
1781f7e3dc0SClaudiu Zissulescu
179ac506b7fSVineet Gupta.LunalignedOffby3:
1801f7e3dc0SClaudiu Zissulescu;;; CASE 3: The source is unaligned, off by 3
1811f7e3dc0SClaudiu Zissulescu;;; Hence, I need to read 1byte for achieve the 32bit alignment
1821f7e3dc0SClaudiu Zissulescu
1831f7e3dc0SClaudiu Zissulescu	;; Both src and dst are aligned
1841f7e3dc0SClaudiu Zissulescu	;; Convert to words, unfold x2
1851f7e3dc0SClaudiu Zissulescu	lsr.f	lp_count, r2, 3
1861f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__
1871f7e3dc0SClaudiu Zissulescu	asl.ne	r5, r5, 24
1881f7e3dc0SClaudiu Zissulescu#endif
189ac506b7fSVineet Gupta	lpnz	@.Lcopy8bytes_3
1901f7e3dc0SClaudiu Zissulescu	;; LOOP START
1911f7e3dc0SClaudiu Zissulescu	ld.ab	r6, [r1, 4]
1921f7e3dc0SClaudiu Zissulescu	ld.ab	r8, [r1,4]
1931f7e3dc0SClaudiu Zissulescu
1941f7e3dc0SClaudiu Zissulescu	SHIFT_1	(r7, r6, 8)
1951f7e3dc0SClaudiu Zissulescu	or	r7, r7, r5
1961f7e3dc0SClaudiu Zissulescu	SHIFT_2	(r5, r6, 24)
1971f7e3dc0SClaudiu Zissulescu
1981f7e3dc0SClaudiu Zissulescu	SHIFT_1	(r9, r8, 8)
1991f7e3dc0SClaudiu Zissulescu	or	r9, r9, r5
2001f7e3dc0SClaudiu Zissulescu	SHIFT_2	(r5, r8, 24)
2011f7e3dc0SClaudiu Zissulescu
2021f7e3dc0SClaudiu Zissulescu	st.ab	r7, [r3, 4]
2031f7e3dc0SClaudiu Zissulescu	st.ab	r9, [r3, 4]
204ac506b7fSVineet Gupta.Lcopy8bytes_3:
2051f7e3dc0SClaudiu Zissulescu
2061f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__
2071f7e3dc0SClaudiu Zissulescu	lsr.nz	r5, r5, 24
2081f7e3dc0SClaudiu Zissulescu#endif
2091f7e3dc0SClaudiu Zissulescu	stb.ab	r5, [r3, 1]
2101f7e3dc0SClaudiu Zissulescu
2111f7e3dc0SClaudiu Zissulescu	and.f	lp_count, r2, 0x07 ;Last 8bytes
212ac506b7fSVineet Gupta	lpnz	@.Lcopybytewise_3
2131f7e3dc0SClaudiu Zissulescu	;; LOOP START
2141f7e3dc0SClaudiu Zissulescu	ldb.ab	r6, [r1,1]
2151f7e3dc0SClaudiu Zissulescu	stb.ab	r6, [r3,1]
216ac506b7fSVineet Gupta.Lcopybytewise_3:
2171f7e3dc0SClaudiu Zissulescu	j	[blink]
2181f7e3dc0SClaudiu Zissulescu
21986effd0dSVineet GuptaEND_CFI(memcpy)
220