xref: /freebsd/contrib/cortex-strings/src/aarch64/memmove.S (revision f2b7bf8afcfd630e0fbd8417f1ce974de79feaf0)
1/* Copyright (c) 2013, Linaro Limited
2   All rights reserved.
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6       * Redistributions of source code must retain the above copyright
7         notice, this list of conditions and the following disclaimer.
8       * Redistributions in binary form must reproduce the above copyright
9         notice, this list of conditions and the following disclaimer in the
10         documentation and/or other materials provided with the distribution.
11       * Neither the name of the Linaro nor the
12         names of its contributors may be used to endorse or promote products
13         derived from this software without specific prior written permission.
14
15   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
26
27/*
28 * Copyright (c) 2015 ARM Ltd
29 * All rights reserved.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 *    notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 *    notice, this list of conditions and the following disclaimer in the
38 *    documentation and/or other materials provided with the distribution.
39 * 3. The name of the company may not be used to endorse or promote
40 *    products derived from this software without specific prior written
41 *    permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
45 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
46 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
48 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
49 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
50 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
51 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
52 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 */
54
55/* Assumptions:
56 *
57 * ARMv8-a, AArch64, unaligned accesses
58 */
59
60	.macro def_fn f p2align=0
61	.text
62	.p2align \p2align
63	.global \f
64	.type \f, %function
65\f:
66	.endm
67
68/* Parameters and result.  */
69#define dstin	x0
70#define src	x1
71#define count	x2
72#define srcend	x3
73#define dstend	x4
74#define tmp1	x5
75#define A_l	x6
76#define A_h	x7
77#define B_l	x8
78#define B_h	x9
79#define C_l	x10
80#define C_h	x11
81#define D_l	x12
82#define D_h	x13
83#define E_l	count
84#define E_h	tmp1
85
86/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
87   Larger backwards copies are also handled by memcpy. The only remaining
88   case is forward large copies.  The destination is aligned, and an
89   unrolled loop processes 64 bytes per iteration.
90*/
91
92def_fn memmove, 6
93	sub	tmp1, dstin, src
94	cmp	count, 96
95	ccmp	tmp1, count, 2, hi
96	b.hs	memcpy
97
98	cbz	tmp1, 3f
99	add	dstend, dstin, count
100	add	srcend, src, count
101
102	/* Align dstend to 16 byte alignment so that we don't cross cache line
103	   boundaries on both loads and stores.	 There are at least 96 bytes
104	   to copy, so copy 16 bytes unaligned and then align.	The loop
105	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
106
107	and	tmp1, dstend, 15
108	ldp	D_l, D_h, [srcend, -16]
109	sub	srcend, srcend, tmp1
110	sub	count, count, tmp1
111	ldp	A_l, A_h, [srcend, -16]
112	stp	D_l, D_h, [dstend, -16]
113	ldp	B_l, B_h, [srcend, -32]
114	ldp	C_l, C_h, [srcend, -48]
115	ldp	D_l, D_h, [srcend, -64]!
116	sub	dstend, dstend, tmp1
117	subs	count, count, 128
118	b.ls	2f
119	nop
1201:
121	stp	A_l, A_h, [dstend, -16]
122	ldp	A_l, A_h, [srcend, -16]
123	stp	B_l, B_h, [dstend, -32]
124	ldp	B_l, B_h, [srcend, -32]
125	stp	C_l, C_h, [dstend, -48]
126	ldp	C_l, C_h, [srcend, -48]
127	stp	D_l, D_h, [dstend, -64]!
128	ldp	D_l, D_h, [srcend, -64]!
129	subs	count, count, 64
130	b.hi	1b
131
132	/* Write the last full set of 64 bytes.	 The remainder is at most 64
133	   bytes, so it is safe to always copy 64 bytes from the start even if
134	   there is just 1 byte left.  */
1352:
136	ldp	E_l, E_h, [src, 48]
137	stp	A_l, A_h, [dstend, -16]
138	ldp	A_l, A_h, [src, 32]
139	stp	B_l, B_h, [dstend, -32]
140	ldp	B_l, B_h, [src, 16]
141	stp	C_l, C_h, [dstend, -48]
142	ldp	C_l, C_h, [src]
143	stp	D_l, D_h, [dstend, -64]
144	stp	E_l, E_h, [dstin, 48]
145	stp	A_l, A_h, [dstin, 32]
146	stp	B_l, B_h, [dstin, 16]
147	stp	C_l, C_h, [dstin]
1483:	ret
149
150	.size	memmove, . - memmove
151