xref: /linux/arch/loongarch/lib/memmove.S (revision 9f2c9170934eace462499ba0bfe042cc72900173)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4 */
5
6#include <asm/alternative-asm.h>
7#include <asm/asm.h>
8#include <asm/asmmacro.h>
9#include <asm/cpu.h>
10#include <asm/export.h>
11#include <asm/regdef.h>
12
13SYM_FUNC_START(memmove)
14	blt	a0, a1, 1f	/* dst < src, memcpy */
15	blt	a1, a0, 3f	/* src < dst, rmemcpy */
16	jr	ra		/* dst == src, return */
17
18	/* if (src - dst) < 64, copy 1 byte at a time */
191:	ori	a3, zero, 64
20	sub.d	t0, a1, a0
21	blt	t0, a3, 2f
22	b	memcpy
232:	b	__memcpy_generic
24
25	/* if (dst - src) < 64, copy 1 byte at a time */
263:	ori	a3, zero, 64
27	sub.d	t0, a0, a1
28	blt	t0, a3, 4f
29	b	rmemcpy
304:	b	__rmemcpy_generic
31SYM_FUNC_END(memmove)
32
33EXPORT_SYMBOL(memmove)
34
35SYM_FUNC_START(rmemcpy)
36	/*
37	 * Some CPUs support hardware unaligned access
38	 */
39	ALTERNATIVE	"b __rmemcpy_generic", \
40			"b __rmemcpy_fast", CPU_FEATURE_UAL
41SYM_FUNC_END(rmemcpy)
42
43/*
44 * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
45 *
46 * a0: dst
47 * a1: src
48 * a2: n
49 */
50SYM_FUNC_START(__rmemcpy_generic)
51	move	a3, a0
52	beqz	a2, 2f
53
54	add.d	a0, a0, a2
55	add.d	a1, a1, a2
56
571:	ld.b	t0, a1, -1
58	st.b	t0, a0, -1
59	addi.d	a0, a0, -1
60	addi.d	a1, a1, -1
61	addi.d	a2, a2, -1
62	bgt	a2, zero, 1b
63
642:	move	a0, a3
65	jr	ra
66SYM_FUNC_END(__rmemcpy_generic)
67
68/*
69 * void *__rmemcpy_fast(void *dst, const void *src, size_t n)
70 *
71 * a0: dst
72 * a1: src
73 * a2: n
74 */
75SYM_FUNC_START(__rmemcpy_fast)
76	move	a3, a0
77	beqz	a2, 3f
78
79	add.d	a0, a0, a2
80	add.d	a1, a1, a2
81
82	ori	a4, zero, 64
83	blt	a2, a4, 2f
84
85	/* copy 64 bytes at a time */
861:	ld.d	t0, a1, -8
87	ld.d	t1, a1, -16
88	ld.d	t2, a1, -24
89	ld.d	t3, a1, -32
90	ld.d	t4, a1, -40
91	ld.d	t5, a1, -48
92	ld.d	t6, a1, -56
93	ld.d	t7, a1, -64
94	st.d	t0, a0, -8
95	st.d	t1, a0, -16
96	st.d	t2, a0, -24
97	st.d	t3, a0, -32
98	st.d	t4, a0, -40
99	st.d	t5, a0, -48
100	st.d	t6, a0, -56
101	st.d	t7, a0, -64
102
103	addi.d	a0, a0, -64
104	addi.d	a1, a1, -64
105	addi.d	a2, a2, -64
106	bge	a2, a4, 1b
107
108	beqz	a2, 3f
109
110	/* copy the remaining bytes */
1112:	ld.b	t0, a1, -1
112	st.b	t0, a0, -1
113	addi.d	a0, a0, -1
114	addi.d	a1, a1, -1
115	addi.d	a2, a2, -1
116	bgt	a2, zero, 2b
117
118	/* return */
1193:	move	a0, a3
120	jr	ra
121SYM_FUNC_END(__rmemcpy_fast)
122