xref: /freebsd/lib/libc/arm/string/memmove.S (revision 783d3ff6d7fae619db8a7990b8a6387de0c677b5)
1/*	$NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $	*/
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <machine/asm.h>
33.syntax	unified
34
35#ifndef _BCOPY
36/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
37ENTRY(memmove)
38#else
39/* bcopy = memcpy/memmove with arguments reversed. */
40/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
41ENTRY(bcopy)
42	/* switch the source and destination registers */
43	eor     r0, r1, r0
44	eor     r1, r0, r1
45	eor     r0, r1, r0
46#endif
47	/* Do the buffers overlap? */
48	cmp	r0, r1
49	it	eq
50	RETeq		/* Bail now if src/dst are the same */
51	ite	cc
52	subcc	r3, r0, r1	/* if (dst > src) r3 = dst - src */
53	subcs	r3, r1, r0	/* if (src > dsr) r3 = src - dst */
54	cmp	r3, r2		/* if (r3 < len) we have an overlap */
55	bcc	PIC_SYM(_C_LABEL(memcpy), PLT)
56
57	/* Determine copy direction */
58	cmp	r1, r0
59	it	cc
60	bcc	.Lmemmove_backwards
61
62	itt	eq
63	moveq	r0, #0			/* Quick abort for len=0 */
64	RETeq
65
66	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
67	subs	r2, r2, #4
68	blt	.Lmemmove_fl4		/* less than 4 bytes */
69	ands	r12, r0, #3
70	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
71	ands	r12, r1, #3
72	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
73
74.Lmemmove_ft8:
75	/* We have aligned source and destination */
76	subs	r2, r2, #8
77	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
78	subs	r2, r2, #0x14
79	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
80	stmdb	sp!, {r4}		/* borrow r4 */
81
82	/* blat 32 bytes at a time */
83	/* XXX for really big copies perhaps we should use more registers */
84.Lmemmove_floop32:
85	ldmia	r1!, {r3, r4, r12, lr}
86	stmia	r0!, {r3, r4, r12, lr}
87	ldmia	r1!, {r3, r4, r12, lr}
88	stmia	r0!, {r3, r4, r12, lr}
89	subs	r2, r2, #0x20
90	bge	.Lmemmove_floop32
91
92	cmn	r2, #0x10
93	ittt	ge
94	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
95	stmiage	r0!, {r3, r4, r12, lr}
96	subge	r2, r2, #0x10
97	ldmia	sp!, {r4}		/* return r4 */
98
99.Lmemmove_fl32:
100	adds	r2, r2, #0x14
101
102	/* blat 12 bytes at a time */
103.Lmemmove_floop12:
104	ittt	ge
105	ldmiage	r1!, {r3, r12, lr}
106	stmiage	r0!, {r3, r12, lr}
107	subsge	r2, r2, #0x0c
108	bge	.Lmemmove_floop12
109
110.Lmemmove_fl12:
111	adds	r2, r2, #8
112	blt	.Lmemmove_fl4
113
114	subs	r2, r2, #4
115	itt	lt
116	ldrlt	r3, [r1], #4
117	strlt	r3, [r0], #4
118	ittt	ge
119	ldmiage	r1!, {r3, r12}
120	stmiage	r0!, {r3, r12}
121	subge	r2, r2, #4
122
123.Lmemmove_fl4:
124	/* less than 4 bytes to go */
125	adds	r2, r2, #4
126	it	eq
127	ldmiaeq	sp!, {r0, pc}		/* done */
128
129	/* copy the crud byte at a time */
130	cmp	r2, #2
131	ldrb	r3, [r1], #1
132	strb	r3, [r0], #1
133	itt	ge
134	ldrbge	r3, [r1], #1
135	strbge	r3, [r0], #1
136	itt	gt
137	ldrbgt	r3, [r1], #1
138	strbgt	r3, [r0], #1
139	ldmia	sp!, {r0, pc}
140
141	/* erg - unaligned destination */
142.Lmemmove_fdestul:
143	rsb	r12, r12, #4
144	cmp	r12, #2
145
146	/* align destination with byte copies */
147	ldrb	r3, [r1], #1
148	strb	r3, [r0], #1
149	itt	ge
150	ldrbge	r3, [r1], #1
151	strbge	r3, [r0], #1
152	itt	gt
153	ldrbgt	r3, [r1], #1
154	strbgt	r3, [r0], #1
155	subs	r2, r2, r12
156	blt	.Lmemmove_fl4		/* less the 4 bytes */
157
158	ands	r12, r1, #3
159	beq	.Lmemmove_ft8		/* we have an aligned source */
160
161	/* erg - unaligned source */
162	/* This is where it gets nasty ... */
163.Lmemmove_fsrcul:
164	bic	r1, r1, #3
165	ldr	lr, [r1], #4
166	cmp	r12, #2
167	bgt	.Lmemmove_fsrcul3
168	beq	.Lmemmove_fsrcul2
169	cmp	r2, #0x0c
170	blt	.Lmemmove_fsrcul1loop4
171	sub	r2, r2, #0x0c
172	stmdb	sp!, {r4, r5}
173
174.Lmemmove_fsrcul1loop16:
175	mov	r3, lr, lsr #8
176	ldmia	r1!, {r4, r5, r12, lr}
177	orr	r3, r3, r4, lsl #24
178	mov	r4, r4, lsr #8
179	orr	r4, r4, r5, lsl #24
180	mov	r5, r5, lsr #8
181	orr	r5, r5, r12, lsl #24
182	mov	r12, r12, lsr #8
183	orr	r12, r12, lr, lsl #24
184	stmia	r0!, {r3-r5, r12}
185	subs	r2, r2, #0x10
186	bge	.Lmemmove_fsrcul1loop16
187	ldmia	sp!, {r4, r5}
188	adds	r2, r2, #0x0c
189	blt	.Lmemmove_fsrcul1l4
190
191.Lmemmove_fsrcul1loop4:
192	mov	r12, lr, lsr #8
193	ldr	lr, [r1], #4
194	orr	r12, r12, lr, lsl #24
195	str	r12, [r0], #4
196	subs	r2, r2, #4
197	bge	.Lmemmove_fsrcul1loop4
198
199.Lmemmove_fsrcul1l4:
200	sub	r1, r1, #3
201	b	.Lmemmove_fl4
202
203.Lmemmove_fsrcul2:
204	cmp	r2, #0x0c
205	blt	.Lmemmove_fsrcul2loop4
206	sub	r2, r2, #0x0c
207	stmdb	sp!, {r4, r5}
208
209.Lmemmove_fsrcul2loop16:
210	mov	r3, lr, lsr #16
211	ldmia	r1!, {r4, r5, r12, lr}
212	orr	r3, r3, r4, lsl #16
213	mov	r4, r4, lsr #16
214	orr	r4, r4, r5, lsl #16
215	mov	r5, r5, lsr #16
216	orr	r5, r5, r12, lsl #16
217	mov	r12, r12, lsr #16
218	orr	r12, r12, lr, lsl #16
219	stmia	r0!, {r3-r5, r12}
220	subs	r2, r2, #0x10
221	bge	.Lmemmove_fsrcul2loop16
222	ldmia	sp!, {r4, r5}
223	adds	r2, r2, #0x0c
224	blt	.Lmemmove_fsrcul2l4
225
226.Lmemmove_fsrcul2loop4:
227	mov	r12, lr, lsr #16
228	ldr	lr, [r1], #4
229	orr	r12, r12, lr, lsl #16
230	str	r12, [r0], #4
231	subs	r2, r2, #4
232	bge	.Lmemmove_fsrcul2loop4
233
234.Lmemmove_fsrcul2l4:
235	sub	r1, r1, #2
236	b	.Lmemmove_fl4
237
238.Lmemmove_fsrcul3:
239	cmp	r2, #0x0c
240	blt	.Lmemmove_fsrcul3loop4
241	sub	r2, r2, #0x0c
242	stmdb	sp!, {r4, r5}
243
244.Lmemmove_fsrcul3loop16:
245	mov	r3, lr, lsr #24
246	ldmia	r1!, {r4, r5, r12, lr}
247	orr	r3, r3, r4, lsl #8
248	mov	r4, r4, lsr #24
249	orr	r4, r4, r5, lsl #8
250	mov	r5, r5, lsr #24
251	orr	r5, r5, r12, lsl #8
252	mov	r12, r12, lsr #24
253	orr	r12, r12, lr, lsl #8
254	stmia	r0!, {r3-r5, r12}
255	subs	r2, r2, #0x10
256	bge	.Lmemmove_fsrcul3loop16
257	ldmia	sp!, {r4, r5}
258	adds	r2, r2, #0x0c
259	blt	.Lmemmove_fsrcul3l4
260
261.Lmemmove_fsrcul3loop4:
262	mov	r12, lr, lsr #24
263	ldr	lr, [r1], #4
264	orr	r12, r12, lr, lsl #8
265	str	r12, [r0], #4
266	subs	r2, r2, #4
267	bge	.Lmemmove_fsrcul3loop4
268
269.Lmemmove_fsrcul3l4:
270	sub	r1, r1, #1
271	b	.Lmemmove_fl4
272
273.Lmemmove_backwards:
274	add	r1, r1, r2
275	add	r0, r0, r2
276	subs	r2, r2, #4
277	blt	.Lmemmove_bl4		/* less than 4 bytes */
278	ands	r12, r0, #3
279	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
280	ands	r12, r1, #3
281	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
282
283.Lmemmove_bt8:
284	/* We have aligned source and destination */
285	subs	r2, r2, #8
286	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
287	stmdb	sp!, {r4, lr}
288	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
289	blt	.Lmemmove_bl32
290
291	/* blat 32 bytes at a time */
292	/* XXX for really big copies perhaps we should use more registers */
293.Lmemmove_bloop32:
294	ldmdb	r1!, {r3, r4, r12, lr}
295	stmdb	r0!, {r3, r4, r12, lr}
296	ldmdb	r1!, {r3, r4, r12, lr}
297	stmdb	r0!, {r3, r4, r12, lr}
298	subs	r2, r2, #0x20
299	bge	.Lmemmove_bloop32
300
301.Lmemmove_bl32:
302	cmn	r2, #0x10
303	ittt	ge
304	ldmdbge	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
305	stmdbge	r0!, {r3, r4, r12, lr}
306	subge	r2, r2, #0x10
307	adds	r2, r2, #0x14
308	ittt	ge
309	ldmdbge	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
310	stmdbge	r0!, {r3, r12, lr}
311	subge	r2, r2, #0x0c
312	ldmia	sp!, {r4, lr}
313
314.Lmemmove_bl12:
315	adds	r2, r2, #8
316	blt	.Lmemmove_bl4
317	subs	r2, r2, #4
318	itt	lt
319	ldrlt	r3, [r1, #-4]!
320	strlt	r3, [r0, #-4]!
321	ittt	ge
322	ldmdbge	r1!, {r3, r12}
323	stmdbge	r0!, {r3, r12}
324	subge	r2, r2, #4
325
326.Lmemmove_bl4:
327	/* less than 4 bytes to go */
328	adds	r2, r2, #4
329	it	eq
330	RETeq			/* done */
331
332	/* copy the crud byte at a time */
333	cmp	r2, #2
334	ldrb	r3, [r1, #-1]!
335	strb	r3, [r0, #-1]!
336	itt	ge
337	ldrbge	r3, [r1, #-1]!
338	strbge	r3, [r0, #-1]!
339	itt	gt
340	ldrbgt	r3, [r1, #-1]!
341	strbgt	r3, [r0, #-1]!
342	RET
343
344	/* erg - unaligned destination */
345.Lmemmove_bdestul:
346	cmp	r12, #2
347
348	/* align destination with byte copies */
349	ldrb	r3, [r1, #-1]!
350	strb	r3, [r0, #-1]!
351	itt	ge
352	ldrbge	r3, [r1, #-1]!
353	strbge	r3, [r0, #-1]!
354	itt	gt
355	ldrbgt	r3, [r1, #-1]!
356	strbgt	r3, [r0, #-1]!
357	subs	r2, r2, r12
358	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
359	ands	r12, r1, #3
360	beq	.Lmemmove_bt8		/* we have an aligned source */
361
362	/* erg - unaligned source */
363	/* This is where it gets nasty ... */
364.Lmemmove_bsrcul:
365	bic	r1, r1, #3
366	ldr	r3, [r1, #0]
367	cmp	r12, #2
368	blt	.Lmemmove_bsrcul1
369	beq	.Lmemmove_bsrcul2
370	cmp	r2, #0x0c
371	blt	.Lmemmove_bsrcul3loop4
372	sub	r2, r2, #0x0c
373	stmdb	sp!, {r4, r5, lr}
374
375.Lmemmove_bsrcul3loop16:
376	mov	lr, r3, lsl #8
377	ldmdb	r1!, {r3-r5, r12}
378	orr	lr, lr, r12, lsr #24
379	mov	r12, r12, lsl #8
380	orr	r12, r12, r5, lsr #24
381	mov	r5, r5, lsl #8
382	orr	r5, r5, r4, lsr #24
383	mov	r4, r4, lsl #8
384	orr	r4, r4, r3, lsr #24
385	stmdb	r0!, {r4, r5, r12, lr}
386	subs	r2, r2, #0x10
387	bge	.Lmemmove_bsrcul3loop16
388	ldmia	sp!, {r4, r5, lr}
389	adds	r2, r2, #0x0c
390	blt	.Lmemmove_bsrcul3l4
391
392.Lmemmove_bsrcul3loop4:
393	mov	r12, r3, lsl #8
394	ldr	r3, [r1, #-4]!
395	orr	r12, r12, r3, lsr #24
396	str	r12, [r0, #-4]!
397	subs	r2, r2, #4
398	bge	.Lmemmove_bsrcul3loop4
399
400.Lmemmove_bsrcul3l4:
401	add	r1, r1, #3
402	b	.Lmemmove_bl4
403
404.Lmemmove_bsrcul2:
405	cmp	r2, #0x0c
406	blt	.Lmemmove_bsrcul2loop4
407	sub	r2, r2, #0x0c
408	stmdb	sp!, {r4, r5, lr}
409
410.Lmemmove_bsrcul2loop16:
411	mov	lr, r3, lsl #16
412	ldmdb	r1!, {r3-r5, r12}
413	orr	lr, lr, r12, lsr #16
414	mov	r12, r12, lsl #16
415	orr	r12, r12, r5, lsr #16
416	mov	r5, r5, lsl #16
417	orr	r5, r5, r4, lsr #16
418	mov	r4, r4, lsl #16
419	orr	r4, r4, r3, lsr #16
420	stmdb	r0!, {r4, r5, r12, lr}
421	subs	r2, r2, #0x10
422	bge	.Lmemmove_bsrcul2loop16
423	ldmia	sp!, {r4, r5, lr}
424	adds	r2, r2, #0x0c
425	blt	.Lmemmove_bsrcul2l4
426
427.Lmemmove_bsrcul2loop4:
428	mov	r12, r3, lsl #16
429	ldr	r3, [r1, #-4]!
430	orr	r12, r12, r3, lsr #16
431	str	r12, [r0, #-4]!
432	subs	r2, r2, #4
433	bge	.Lmemmove_bsrcul2loop4
434
435.Lmemmove_bsrcul2l4:
436	add	r1, r1, #2
437	b	.Lmemmove_bl4
438
439.Lmemmove_bsrcul1:
440	cmp	r2, #0x0c
441	blt	.Lmemmove_bsrcul1loop4
442	sub	r2, r2, #0x0c
443	stmdb	sp!, {r4, r5, lr}
444
445.Lmemmove_bsrcul1loop32:
446	mov	lr, r3, lsl #24
447	ldmdb	r1!, {r3-r5, r12}
448	orr	lr, lr, r12, lsr #8
449	mov	r12, r12, lsl #24
450	orr	r12, r12, r5, lsr #8
451	mov	r5, r5, lsl #24
452	orr	r5, r5, r4, lsr #8
453	mov	r4, r4, lsl #24
454	orr	r4, r4, r3, lsr #8
455	stmdb	r0!, {r4, r5, r12, lr}
456	subs	r2, r2, #0x10
457	bge	.Lmemmove_bsrcul1loop32
458	ldmia	sp!, {r4, r5, lr}
459	adds	r2, r2, #0x0c
460	blt	.Lmemmove_bsrcul1l4
461
462.Lmemmove_bsrcul1loop4:
463	mov	r12, r3, lsl #24
464	ldr	r3, [r1, #-4]!
465	orr	r12, r12, r3, lsr #8
466	str	r12, [r0, #-4]!
467	subs	r2, r2, #4
468	bge	.Lmemmove_bsrcul1loop4
469
470.Lmemmove_bsrcul1l4:
471	add	r1, r1, #1
472	b	.Lmemmove_bl4
473#ifndef _BCOPY
474END(memmove)
475#else
476END(bcopy)
477#endif
478
479	.section .note.GNU-stack,"",%progbits
480