xref: /freebsd/lib/libc/arm/string/memmove.S (revision 5def4c47d4bd90b209b9b4a4ba9faec15846d8fd)
1/*	$NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $	*/
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <machine/asm.h>
33__FBSDID("$FreeBSD$");
34
35.syntax	unified
36
37#ifndef _BCOPY
38/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
39ENTRY(memmove)
40#else
41/* bcopy = memcpy/memmove with arguments reversed. */
42/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
43ENTRY(bcopy)
44	/* switch the source and destination registers */
45	eor     r0, r1, r0
46	eor     r1, r0, r1
47	eor     r0, r1, r0
48#endif
49	/* Do the buffers overlap? */
50	cmp	r0, r1
51	it	eq
52	RETeq		/* Bail now if src/dst are the same */
53	ite	cc
54	subcc	r3, r0, r1	/* if (dst > src) r3 = dst - src */
55	subcs	r3, r1, r0	/* if (src > dsr) r3 = src - dst */
56	cmp	r3, r2		/* if (r3 < len) we have an overlap */
57	bcc	PIC_SYM(_C_LABEL(memcpy), PLT)
58
59	/* Determine copy direction */
60	cmp	r1, r0
61	it	cc
62	bcc	.Lmemmove_backwards
63
64	itt	eq
65	moveq	r0, #0			/* Quick abort for len=0 */
66	RETeq
67
68	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
69	subs	r2, r2, #4
70	blt	.Lmemmove_fl4		/* less than 4 bytes */
71	ands	r12, r0, #3
72	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
73	ands	r12, r1, #3
74	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
75
76.Lmemmove_ft8:
77	/* We have aligned source and destination */
78	subs	r2, r2, #8
79	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
80	subs	r2, r2, #0x14
81	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
82	stmdb	sp!, {r4}		/* borrow r4 */
83
84	/* blat 32 bytes at a time */
85	/* XXX for really big copies perhaps we should use more registers */
86.Lmemmove_floop32:
87	ldmia	r1!, {r3, r4, r12, lr}
88	stmia	r0!, {r3, r4, r12, lr}
89	ldmia	r1!, {r3, r4, r12, lr}
90	stmia	r0!, {r3, r4, r12, lr}
91	subs	r2, r2, #0x20
92	bge	.Lmemmove_floop32
93
94	cmn	r2, #0x10
95	ittt	ge
96	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
97	stmiage	r0!, {r3, r4, r12, lr}
98	subge	r2, r2, #0x10
99	ldmia	sp!, {r4}		/* return r4 */
100
101.Lmemmove_fl32:
102	adds	r2, r2, #0x14
103
104	/* blat 12 bytes at a time */
105.Lmemmove_floop12:
106	ittt	ge
107	ldmiage	r1!, {r3, r12, lr}
108	stmiage	r0!, {r3, r12, lr}
109	subsge	r2, r2, #0x0c
110	bge	.Lmemmove_floop12
111
112.Lmemmove_fl12:
113	adds	r2, r2, #8
114	blt	.Lmemmove_fl4
115
116	subs	r2, r2, #4
117	itt	lt
118	ldrlt	r3, [r1], #4
119	strlt	r3, [r0], #4
120	ittt	ge
121	ldmiage	r1!, {r3, r12}
122	stmiage	r0!, {r3, r12}
123	subge	r2, r2, #4
124
125.Lmemmove_fl4:
126	/* less than 4 bytes to go */
127	adds	r2, r2, #4
128	it	eq
129	ldmiaeq	sp!, {r0, pc}		/* done */
130
131	/* copy the crud byte at a time */
132	cmp	r2, #2
133	ldrb	r3, [r1], #1
134	strb	r3, [r0], #1
135	itt	ge
136	ldrbge	r3, [r1], #1
137	strbge	r3, [r0], #1
138	itt	gt
139	ldrbgt	r3, [r1], #1
140	strbgt	r3, [r0], #1
141	ldmia	sp!, {r0, pc}
142
143	/* erg - unaligned destination */
144.Lmemmove_fdestul:
145	rsb	r12, r12, #4
146	cmp	r12, #2
147
148	/* align destination with byte copies */
149	ldrb	r3, [r1], #1
150	strb	r3, [r0], #1
151	itt	ge
152	ldrbge	r3, [r1], #1
153	strbge	r3, [r0], #1
154	itt	gt
155	ldrbgt	r3, [r1], #1
156	strbgt	r3, [r0], #1
157	subs	r2, r2, r12
158	blt	.Lmemmove_fl4		/* less the 4 bytes */
159
160	ands	r12, r1, #3
161	beq	.Lmemmove_ft8		/* we have an aligned source */
162
163	/* erg - unaligned source */
164	/* This is where it gets nasty ... */
165.Lmemmove_fsrcul:
166	bic	r1, r1, #3
167	ldr	lr, [r1], #4
168	cmp	r12, #2
169	bgt	.Lmemmove_fsrcul3
170	beq	.Lmemmove_fsrcul2
171	cmp	r2, #0x0c
172	blt	.Lmemmove_fsrcul1loop4
173	sub	r2, r2, #0x0c
174	stmdb	sp!, {r4, r5}
175
176.Lmemmove_fsrcul1loop16:
177	mov	r3, lr, lsr #8
178	ldmia	r1!, {r4, r5, r12, lr}
179	orr	r3, r3, r4, lsl #24
180	mov	r4, r4, lsr #8
181	orr	r4, r4, r5, lsl #24
182	mov	r5, r5, lsr #8
183	orr	r5, r5, r12, lsl #24
184	mov	r12, r12, lsr #8
185	orr	r12, r12, lr, lsl #24
186	stmia	r0!, {r3-r5, r12}
187	subs	r2, r2, #0x10
188	bge	.Lmemmove_fsrcul1loop16
189	ldmia	sp!, {r4, r5}
190	adds	r2, r2, #0x0c
191	blt	.Lmemmove_fsrcul1l4
192
193.Lmemmove_fsrcul1loop4:
194	mov	r12, lr, lsr #8
195	ldr	lr, [r1], #4
196	orr	r12, r12, lr, lsl #24
197	str	r12, [r0], #4
198	subs	r2, r2, #4
199	bge	.Lmemmove_fsrcul1loop4
200
201.Lmemmove_fsrcul1l4:
202	sub	r1, r1, #3
203	b	.Lmemmove_fl4
204
205.Lmemmove_fsrcul2:
206	cmp	r2, #0x0c
207	blt	.Lmemmove_fsrcul2loop4
208	sub	r2, r2, #0x0c
209	stmdb	sp!, {r4, r5}
210
211.Lmemmove_fsrcul2loop16:
212	mov	r3, lr, lsr #16
213	ldmia	r1!, {r4, r5, r12, lr}
214	orr	r3, r3, r4, lsl #16
215	mov	r4, r4, lsr #16
216	orr	r4, r4, r5, lsl #16
217	mov	r5, r5, lsr #16
218	orr	r5, r5, r12, lsl #16
219	mov	r12, r12, lsr #16
220	orr	r12, r12, lr, lsl #16
221	stmia	r0!, {r3-r5, r12}
222	subs	r2, r2, #0x10
223	bge	.Lmemmove_fsrcul2loop16
224	ldmia	sp!, {r4, r5}
225	adds	r2, r2, #0x0c
226	blt	.Lmemmove_fsrcul2l4
227
228.Lmemmove_fsrcul2loop4:
229	mov	r12, lr, lsr #16
230	ldr	lr, [r1], #4
231	orr	r12, r12, lr, lsl #16
232	str	r12, [r0], #4
233	subs	r2, r2, #4
234	bge	.Lmemmove_fsrcul2loop4
235
236.Lmemmove_fsrcul2l4:
237	sub	r1, r1, #2
238	b	.Lmemmove_fl4
239
240.Lmemmove_fsrcul3:
241	cmp	r2, #0x0c
242	blt	.Lmemmove_fsrcul3loop4
243	sub	r2, r2, #0x0c
244	stmdb	sp!, {r4, r5}
245
246.Lmemmove_fsrcul3loop16:
247	mov	r3, lr, lsr #24
248	ldmia	r1!, {r4, r5, r12, lr}
249	orr	r3, r3, r4, lsl #8
250	mov	r4, r4, lsr #24
251	orr	r4, r4, r5, lsl #8
252	mov	r5, r5, lsr #24
253	orr	r5, r5, r12, lsl #8
254	mov	r12, r12, lsr #24
255	orr	r12, r12, lr, lsl #8
256	stmia	r0!, {r3-r5, r12}
257	subs	r2, r2, #0x10
258	bge	.Lmemmove_fsrcul3loop16
259	ldmia	sp!, {r4, r5}
260	adds	r2, r2, #0x0c
261	blt	.Lmemmove_fsrcul3l4
262
263.Lmemmove_fsrcul3loop4:
264	mov	r12, lr, lsr #24
265	ldr	lr, [r1], #4
266	orr	r12, r12, lr, lsl #8
267	str	r12, [r0], #4
268	subs	r2, r2, #4
269	bge	.Lmemmove_fsrcul3loop4
270
271.Lmemmove_fsrcul3l4:
272	sub	r1, r1, #1
273	b	.Lmemmove_fl4
274
275.Lmemmove_backwards:
276	add	r1, r1, r2
277	add	r0, r0, r2
278	subs	r2, r2, #4
279	blt	.Lmemmove_bl4		/* less than 4 bytes */
280	ands	r12, r0, #3
281	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
282	ands	r12, r1, #3
283	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
284
285.Lmemmove_bt8:
286	/* We have aligned source and destination */
287	subs	r2, r2, #8
288	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
289	stmdb	sp!, {r4, lr}
290	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
291	blt	.Lmemmove_bl32
292
293	/* blat 32 bytes at a time */
294	/* XXX for really big copies perhaps we should use more registers */
295.Lmemmove_bloop32:
296	ldmdb	r1!, {r3, r4, r12, lr}
297	stmdb	r0!, {r3, r4, r12, lr}
298	ldmdb	r1!, {r3, r4, r12, lr}
299	stmdb	r0!, {r3, r4, r12, lr}
300	subs	r2, r2, #0x20
301	bge	.Lmemmove_bloop32
302
303.Lmemmove_bl32:
304	cmn	r2, #0x10
305	ittt	ge
306	ldmdbge	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
307	stmdbge	r0!, {r3, r4, r12, lr}
308	subge	r2, r2, #0x10
309	adds	r2, r2, #0x14
310	ittt	ge
311	ldmdbge	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
312	stmdbge	r0!, {r3, r12, lr}
313	subge	r2, r2, #0x0c
314	ldmia	sp!, {r4, lr}
315
316.Lmemmove_bl12:
317	adds	r2, r2, #8
318	blt	.Lmemmove_bl4
319	subs	r2, r2, #4
320	itt	lt
321	ldrlt	r3, [r1, #-4]!
322	strlt	r3, [r0, #-4]!
323	ittt	ge
324	ldmdbge	r1!, {r3, r12}
325	stmdbge	r0!, {r3, r12}
326	subge	r2, r2, #4
327
328.Lmemmove_bl4:
329	/* less than 4 bytes to go */
330	adds	r2, r2, #4
331	it	eq
332	RETeq			/* done */
333
334	/* copy the crud byte at a time */
335	cmp	r2, #2
336	ldrb	r3, [r1, #-1]!
337	strb	r3, [r0, #-1]!
338	itt	ge
339	ldrbge	r3, [r1, #-1]!
340	strbge	r3, [r0, #-1]!
341	itt	gt
342	ldrbgt	r3, [r1, #-1]!
343	strbgt	r3, [r0, #-1]!
344	RET
345
346	/* erg - unaligned destination */
347.Lmemmove_bdestul:
348	cmp	r12, #2
349
350	/* align destination with byte copies */
351	ldrb	r3, [r1, #-1]!
352	strb	r3, [r0, #-1]!
353	itt	ge
354	ldrbge	r3, [r1, #-1]!
355	strbge	r3, [r0, #-1]!
356	itt	gt
357	ldrbgt	r3, [r1, #-1]!
358	strbgt	r3, [r0, #-1]!
359	subs	r2, r2, r12
360	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
361	ands	r12, r1, #3
362	beq	.Lmemmove_bt8		/* we have an aligned source */
363
364	/* erg - unaligned source */
365	/* This is where it gets nasty ... */
366.Lmemmove_bsrcul:
367	bic	r1, r1, #3
368	ldr	r3, [r1, #0]
369	cmp	r12, #2
370	blt	.Lmemmove_bsrcul1
371	beq	.Lmemmove_bsrcul2
372	cmp	r2, #0x0c
373	blt	.Lmemmove_bsrcul3loop4
374	sub	r2, r2, #0x0c
375	stmdb	sp!, {r4, r5, lr}
376
377.Lmemmove_bsrcul3loop16:
378	mov	lr, r3, lsl #8
379	ldmdb	r1!, {r3-r5, r12}
380	orr	lr, lr, r12, lsr #24
381	mov	r12, r12, lsl #8
382	orr	r12, r12, r5, lsr #24
383	mov	r5, r5, lsl #8
384	orr	r5, r5, r4, lsr #24
385	mov	r4, r4, lsl #8
386	orr	r4, r4, r3, lsr #24
387	stmdb	r0!, {r4, r5, r12, lr}
388	subs	r2, r2, #0x10
389	bge	.Lmemmove_bsrcul3loop16
390	ldmia	sp!, {r4, r5, lr}
391	adds	r2, r2, #0x0c
392	blt	.Lmemmove_bsrcul3l4
393
394.Lmemmove_bsrcul3loop4:
395	mov	r12, r3, lsl #8
396	ldr	r3, [r1, #-4]!
397	orr	r12, r12, r3, lsr #24
398	str	r12, [r0, #-4]!
399	subs	r2, r2, #4
400	bge	.Lmemmove_bsrcul3loop4
401
402.Lmemmove_bsrcul3l4:
403	add	r1, r1, #3
404	b	.Lmemmove_bl4
405
406.Lmemmove_bsrcul2:
407	cmp	r2, #0x0c
408	blt	.Lmemmove_bsrcul2loop4
409	sub	r2, r2, #0x0c
410	stmdb	sp!, {r4, r5, lr}
411
412.Lmemmove_bsrcul2loop16:
413	mov	lr, r3, lsl #16
414	ldmdb	r1!, {r3-r5, r12}
415	orr	lr, lr, r12, lsr #16
416	mov	r12, r12, lsl #16
417	orr	r12, r12, r5, lsr #16
418	mov	r5, r5, lsl #16
419	orr	r5, r5, r4, lsr #16
420	mov	r4, r4, lsl #16
421	orr	r4, r4, r3, lsr #16
422	stmdb	r0!, {r4, r5, r12, lr}
423	subs	r2, r2, #0x10
424	bge	.Lmemmove_bsrcul2loop16
425	ldmia	sp!, {r4, r5, lr}
426	adds	r2, r2, #0x0c
427	blt	.Lmemmove_bsrcul2l4
428
429.Lmemmove_bsrcul2loop4:
430	mov	r12, r3, lsl #16
431	ldr	r3, [r1, #-4]!
432	orr	r12, r12, r3, lsr #16
433	str	r12, [r0, #-4]!
434	subs	r2, r2, #4
435	bge	.Lmemmove_bsrcul2loop4
436
437.Lmemmove_bsrcul2l4:
438	add	r1, r1, #2
439	b	.Lmemmove_bl4
440
441.Lmemmove_bsrcul1:
442	cmp	r2, #0x0c
443	blt	.Lmemmove_bsrcul1loop4
444	sub	r2, r2, #0x0c
445	stmdb	sp!, {r4, r5, lr}
446
447.Lmemmove_bsrcul1loop32:
448	mov	lr, r3, lsl #24
449	ldmdb	r1!, {r3-r5, r12}
450	orr	lr, lr, r12, lsr #8
451	mov	r12, r12, lsl #24
452	orr	r12, r12, r5, lsr #8
453	mov	r5, r5, lsl #24
454	orr	r5, r5, r4, lsr #8
455	mov	r4, r4, lsl #24
456	orr	r4, r4, r3, lsr #8
457	stmdb	r0!, {r4, r5, r12, lr}
458	subs	r2, r2, #0x10
459	bge	.Lmemmove_bsrcul1loop32
460	ldmia	sp!, {r4, r5, lr}
461	adds	r2, r2, #0x0c
462	blt	.Lmemmove_bsrcul1l4
463
464.Lmemmove_bsrcul1loop4:
465	mov	r12, r3, lsl #24
466	ldr	r3, [r1, #-4]!
467	orr	r12, r12, r3, lsr #8
468	str	r12, [r0, #-4]!
469	subs	r2, r2, #4
470	bge	.Lmemmove_bsrcul1loop4
471
472.Lmemmove_bsrcul1l4:
473	add	r1, r1, #1
474	b	.Lmemmove_bl4
475#ifndef _BCOPY
476END(memmove)
477#else
478END(bcopy)
479#endif
480
481	.section .note.GNU-stack,"",%progbits
482