xref: /linux/arch/arc/lib/memcpy-archs.S (revision c8bfe3fad4f86a029da7157bae9699c816f0c309)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
4 */
5
6#include <linux/linkage.h>
7
8#ifdef __LITTLE_ENDIAN__
9# define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
10# define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
11# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM
12# define MERGE_2(RX,RY,IMM)
13# define EXTRACT_1(RX,RY,IMM)	and	RX, RY, 0xFFFF
14# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, IMM
15#else
16# define SHIFT_1(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
17# define SHIFT_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
18# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
19# define MERGE_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
20# define EXTRACT_1(RX,RY,IMM)	lsr	RX, RY, IMM
21# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, 0x08
22#endif
23
24#ifdef CONFIG_ARC_HAS_LL64
25# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
26# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
27# define ZOLSHFT		5
28# define ZOLAND			0x1F
29#else
30# define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
31# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
32# define ZOLSHFT		4
33# define ZOLAND			0xF
34#endif
35
36ENTRY_CFI(memcpy)
37	mov.f	0, r2
38;;; if size is zero
39	jz.d	[blink]
40	mov	r3, r0		; don;t clobber ret val
41
42;;; if size <= 8
43	cmp	r2, 8
44	bls.d	@.Lsmallchunk
45	mov.f	lp_count, r2
46
47	and.f	r4, r0, 0x03
48	rsub	lp_count, r4, 4
49	lpnz	@.Laligndestination
50	;; LOOP BEGIN
51	ldb.ab	r5, [r1,1]
52	sub	r2, r2, 1
53	stb.ab	r5, [r3,1]
54.Laligndestination:
55
56;;; Check the alignment of the source
57	and.f	r4, r1, 0x03
58	bnz.d	@.Lsourceunaligned
59
60;;; CASE 0: Both source and destination are 32bit aligned
61;;; Convert len to Dwords, unfold x4
62	lsr.f	lp_count, r2, ZOLSHFT
63	lpnz	@.Lcopy32_64bytes
64	;; LOOP START
65	LOADX (r6, r1)
66	LOADX (r8, r1)
67	LOADX (r10, r1)
68	LOADX (r4, r1)
69	STOREX (r6, r3)
70	STOREX (r8, r3)
71	STOREX (r10, r3)
72	STOREX (r4, r3)
73.Lcopy32_64bytes:
74
75	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes
76.Lsmallchunk:
77	lpnz	@.Lcopyremainingbytes
78	;; LOOP START
79	ldb.ab	r5, [r1,1]
80	stb.ab	r5, [r3,1]
81.Lcopyremainingbytes:
82
83	j	[blink]
84;;; END CASE 0
85
86.Lsourceunaligned:
87	cmp	r4, 2
88	beq.d	@.LunalignedOffby2
89	sub	r2, r2, 1
90
91	bhi.d	@.LunalignedOffby3
92	ldb.ab	r5, [r1, 1]
93
94;;; CASE 1: The source is unaligned, off by 1
95	;; Hence I need to read 1 byte for a 16bit alignment
96	;; and 2bytes to reach 32bit alignment
97	ldh.ab	r6, [r1, 2]
98	sub	r2, r2, 2
99	;; Convert to words, unfold x2
100	lsr.f	lp_count, r2, 3
101	MERGE_1 (r6, r6, 8)
102	MERGE_2 (r5, r5, 24)
103	or	r5, r5, r6
104
105	;; Both src and dst are aligned
106	lpnz	@.Lcopy8bytes_1
107	;; LOOP START
108	ld.ab	r6, [r1, 4]
109	ld.ab	r8, [r1,4]
110
111	SHIFT_1	(r7, r6, 24)
112	or	r7, r7, r5
113	SHIFT_2	(r5, r6, 8)
114
115	SHIFT_1	(r9, r8, 24)
116	or	r9, r9, r5
117	SHIFT_2	(r5, r8, 8)
118
119	st.ab	r7, [r3, 4]
120	st.ab	r9, [r3, 4]
121.Lcopy8bytes_1:
122
123	;; Write back the remaining 16bits
124	EXTRACT_1 (r6, r5, 16)
125	sth.ab	r6, [r3, 2]
126	;; Write back the remaining 8bits
127	EXTRACT_2 (r5, r5, 16)
128	stb.ab	r5, [r3, 1]
129
130	and.f	lp_count, r2, 0x07 ;Last 8bytes
131	lpnz	@.Lcopybytewise_1
132	;; LOOP START
133	ldb.ab	r6, [r1,1]
134	stb.ab	r6, [r3,1]
135.Lcopybytewise_1:
136	j	[blink]
137
138.LunalignedOffby2:
139;;; CASE 2: The source is unaligned, off by 2
140	ldh.ab	r5, [r1, 2]
141	sub	r2, r2, 1
142
143	;; Both src and dst are aligned
144	;; Convert to words, unfold x2
145	lsr.f	lp_count, r2, 3
146#ifdef __BIG_ENDIAN__
147	asl.nz	r5, r5, 16
148#endif
149	lpnz	@.Lcopy8bytes_2
150	;; LOOP START
151	ld.ab	r6, [r1, 4]
152	ld.ab	r8, [r1,4]
153
154	SHIFT_1	(r7, r6, 16)
155	or	r7, r7, r5
156	SHIFT_2	(r5, r6, 16)
157
158	SHIFT_1	(r9, r8, 16)
159	or	r9, r9, r5
160	SHIFT_2	(r5, r8, 16)
161
162	st.ab	r7, [r3, 4]
163	st.ab	r9, [r3, 4]
164.Lcopy8bytes_2:
165
166#ifdef __BIG_ENDIAN__
167	lsr.nz	r5, r5, 16
168#endif
169	sth.ab	r5, [r3, 2]
170
171	and.f	lp_count, r2, 0x07 ;Last 8bytes
172	lpnz	@.Lcopybytewise_2
173	;; LOOP START
174	ldb.ab	r6, [r1,1]
175	stb.ab	r6, [r3,1]
176.Lcopybytewise_2:
177	j	[blink]
178
179.LunalignedOffby3:
180;;; CASE 3: The source is unaligned, off by 3
181;;; Hence, I need to read 1byte for achieve the 32bit alignment
182
183	;; Both src and dst are aligned
184	;; Convert to words, unfold x2
185	lsr.f	lp_count, r2, 3
186#ifdef __BIG_ENDIAN__
187	asl.ne	r5, r5, 24
188#endif
189	lpnz	@.Lcopy8bytes_3
190	;; LOOP START
191	ld.ab	r6, [r1, 4]
192	ld.ab	r8, [r1,4]
193
194	SHIFT_1	(r7, r6, 8)
195	or	r7, r7, r5
196	SHIFT_2	(r5, r6, 24)
197
198	SHIFT_1	(r9, r8, 8)
199	or	r9, r9, r5
200	SHIFT_2	(r5, r8, 24)
201
202	st.ab	r7, [r3, 4]
203	st.ab	r9, [r3, 4]
204.Lcopy8bytes_3:
205
206#ifdef __BIG_ENDIAN__
207	lsr.nz	r5, r5, 24
208#endif
209	stb.ab	r5, [r3, 1]
210
211	and.f	lp_count, r2, 0x07 ;Last 8bytes
212	lpnz	@.Lcopybytewise_3
213	;; LOOP START
214	ldb.ab	r6, [r1,1]
215	stb.ab	r6, [r3,1]
216.Lcopybytewise_3:
217	j	[blink]
218
219END_CFI(memcpy)
220