xref: /linux/arch/loongarch/lib/memmove.S (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4 */
5
6#include <linux/export.h>
7#include <asm/alternative-asm.h>
8#include <asm/asm.h>
9#include <asm/asmmacro.h>
10#include <asm/cpu.h>
11#include <asm/regdef.h>
12
13.section .noinstr.text, "ax"
14
15SYM_FUNC_START(memmove)
16	blt	a0, a1, __memcpy	/* dst < src, memcpy */
17	blt	a1, a0, __rmemcpy	/* src < dst, rmemcpy */
18	jr	ra			/* dst == src, return */
19SYM_FUNC_END(memmove)
20SYM_FUNC_ALIAS(__memmove, memmove)
21
22EXPORT_SYMBOL(memmove)
23EXPORT_SYMBOL(__memmove)
24
25_ASM_NOKPROBE(memmove)
26_ASM_NOKPROBE(__memmove)
27
28SYM_FUNC_START(__rmemcpy)
29	/*
30	 * Some CPUs support hardware unaligned access
31	 */
32	ALTERNATIVE	"b __rmemcpy_generic", \
33			"b __rmemcpy_fast", CPU_FEATURE_UAL
34SYM_FUNC_END(__rmemcpy)
35_ASM_NOKPROBE(__rmemcpy)
36
37/*
38 * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
39 *
40 * a0: dst
41 * a1: src
42 * a2: n
43 */
44SYM_FUNC_START(__rmemcpy_generic)
45	move	a3, a0
46	beqz	a2, 2f
47
48	add.d	a0, a0, a2
49	add.d	a1, a1, a2
50
511:	ld.b	t0, a1, -1
52	st.b	t0, a0, -1
53	addi.d	a0, a0, -1
54	addi.d	a1, a1, -1
55	addi.d	a2, a2, -1
56	bgt	a2, zero, 1b
57
582:	move	a0, a3
59	jr	ra
60SYM_FUNC_END(__rmemcpy_generic)
61_ASM_NOKPROBE(__rmemcpy_generic)
62
63/*
64 * void *__rmemcpy_fast(void *dst, const void *src, size_t n)
65 *
66 * a0: dst
67 * a1: src
68 * a2: n
69 */
70SYM_FUNC_START(__rmemcpy_fast)
71	sltui	t0, a2, 9
72	bnez	t0, __memcpy_small
73
74	add.d	a3, a1, a2
75	add.d	a2, a0, a2
76	ld.d	a6, a1, 0
77	ld.d	a7, a3, -8
78
79	/* align up destination address */
80	andi	t1, a2, 7
81	sub.d	a3, a3, t1
82	sub.d	a5, a2, t1
83
84	addi.d	a4, a1, 64
85	bgeu	a4, a3, .Llt64
86
87	/* copy 64 bytes at a time */
88.Lloop64:
89	ld.d	t0, a3, -8
90	ld.d	t1, a3, -16
91	ld.d	t2, a3, -24
92	ld.d	t3, a3, -32
93	ld.d	t4, a3, -40
94	ld.d	t5, a3, -48
95	ld.d	t6, a3, -56
96	ld.d	t7, a3, -64
97	addi.d	a3, a3, -64
98	st.d	t0, a5, -8
99	st.d	t1, a5, -16
100	st.d	t2, a5, -24
101	st.d	t3, a5, -32
102	st.d	t4, a5, -40
103	st.d	t5, a5, -48
104	st.d	t6, a5, -56
105	st.d	t7, a5, -64
106	addi.d	a5, a5, -64
107	bltu	a4, a3, .Lloop64
108
109	/* copy the remaining bytes */
110.Llt64:
111	addi.d	a4, a1, 32
112	bgeu	a4, a3, .Llt32
113	ld.d	t0, a3, -8
114	ld.d	t1, a3, -16
115	ld.d	t2, a3, -24
116	ld.d	t3, a3, -32
117	addi.d	a3, a3, -32
118	st.d	t0, a5, -8
119	st.d	t1, a5, -16
120	st.d	t2, a5, -24
121	st.d	t3, a5, -32
122	addi.d	a5, a5, -32
123
124.Llt32:
125	addi.d	a4, a1, 16
126	bgeu	a4, a3, .Llt16
127	ld.d	t0, a3, -8
128	ld.d	t1, a3, -16
129	addi.d	a3, a3, -16
130	st.d	t0, a5, -8
131	st.d	t1, a5, -16
132	addi.d	a5, a5, -16
133
134.Llt16:
135	addi.d	a4, a1, 8
136	bgeu	a4, a3, .Llt8
137	ld.d	t0, a3, -8
138	st.d	t0, a5, -8
139
140.Llt8:
141	st.d	a6, a0, 0
142	st.d	a7, a2, -8
143
144	/* return */
145	jr	ra
146SYM_FUNC_END(__rmemcpy_fast)
147_ASM_NOKPROBE(__rmemcpy_fast)
148