xref: /linux/arch/loongarch/lib/memmove.S (revision 0526b56cbc3c489642bd6a5fe4b718dea7ef0ee8)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4 */
5
6#include <asm/alternative-asm.h>
7#include <asm/asm.h>
8#include <asm/asmmacro.h>
9#include <asm/cpu.h>
10#include <asm/export.h>
11#include <asm/regdef.h>
12
13SYM_FUNC_START(memmove)
14	blt	a0, a1, memcpy	/* dst < src, memcpy */
15	blt	a1, a0, rmemcpy	/* src < dst, rmemcpy */
16	jr	ra		/* dst == src, return */
17SYM_FUNC_END(memmove)
18_ASM_NOKPROBE(memmove)
19
20EXPORT_SYMBOL(memmove)
21
22SYM_FUNC_START(rmemcpy)
23	/*
24	 * Some CPUs support hardware unaligned access
25	 */
26	ALTERNATIVE	"b __rmemcpy_generic", \
27			"b __rmemcpy_fast", CPU_FEATURE_UAL
28SYM_FUNC_END(rmemcpy)
29_ASM_NOKPROBE(rmemcpy)
30
31/*
32 * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
33 *
34 * a0: dst
35 * a1: src
36 * a2: n
37 */
38SYM_FUNC_START(__rmemcpy_generic)
39	move	a3, a0
40	beqz	a2, 2f
41
42	add.d	a0, a0, a2
43	add.d	a1, a1, a2
44
451:	ld.b	t0, a1, -1
46	st.b	t0, a0, -1
47	addi.d	a0, a0, -1
48	addi.d	a1, a1, -1
49	addi.d	a2, a2, -1
50	bgt	a2, zero, 1b
51
522:	move	a0, a3
53	jr	ra
54SYM_FUNC_END(__rmemcpy_generic)
55_ASM_NOKPROBE(__rmemcpy_generic)
56
57/*
58 * void *__rmemcpy_fast(void *dst, const void *src, size_t n)
59 *
60 * a0: dst
61 * a1: src
62 * a2: n
63 */
64SYM_FUNC_START(__rmemcpy_fast)
65	sltui	t0, a2, 9
66	bnez	t0, __memcpy_small
67
68	add.d	a3, a1, a2
69	add.d	a2, a0, a2
70	ld.d	a6, a1, 0
71	ld.d	a7, a3, -8
72
73	/* align up destination address */
74	andi	t1, a2, 7
75	sub.d	a3, a3, t1
76	sub.d	a5, a2, t1
77
78	addi.d	a4, a1, 64
79	bgeu	a4, a3, .Llt64
80
81	/* copy 64 bytes at a time */
82.Lloop64:
83	ld.d	t0, a3, -8
84	ld.d	t1, a3, -16
85	ld.d	t2, a3, -24
86	ld.d	t3, a3, -32
87	ld.d	t4, a3, -40
88	ld.d	t5, a3, -48
89	ld.d	t6, a3, -56
90	ld.d	t7, a3, -64
91	addi.d	a3, a3, -64
92	st.d	t0, a5, -8
93	st.d	t1, a5, -16
94	st.d	t2, a5, -24
95	st.d	t3, a5, -32
96	st.d	t4, a5, -40
97	st.d	t5, a5, -48
98	st.d	t6, a5, -56
99	st.d	t7, a5, -64
100	addi.d	a5, a5, -64
101	bltu	a4, a3, .Lloop64
102
103	/* copy the remaining bytes */
104.Llt64:
105	addi.d	a4, a1, 32
106	bgeu	a4, a3, .Llt32
107	ld.d	t0, a3, -8
108	ld.d	t1, a3, -16
109	ld.d	t2, a3, -24
110	ld.d	t3, a3, -32
111	addi.d	a3, a3, -32
112	st.d	t0, a5, -8
113	st.d	t1, a5, -16
114	st.d	t2, a5, -24
115	st.d	t3, a5, -32
116	addi.d	a5, a5, -32
117
118.Llt32:
119	addi.d	a4, a1, 16
120	bgeu	a4, a3, .Llt16
121	ld.d	t0, a3, -8
122	ld.d	t1, a3, -16
123	addi.d	a3, a3, -16
124	st.d	t0, a5, -8
125	st.d	t1, a5, -16
126	addi.d	a5, a5, -16
127
128.Llt16:
129	addi.d	a4, a1, 8
130	bgeu	a4, a3, .Llt8
131	ld.d	t0, a3, -8
132	st.d	t0, a5, -8
133
134.Llt8:
135	st.d	a6, a0, 0
136	st.d	a7, a2, -8
137
138	/* return */
139	jr	ra
140SYM_FUNC_END(__rmemcpy_fast)
141_ASM_NOKPROBE(__rmemcpy_fast)
142