xref: /linux/arch/loongarch/lib/memmove.S (revision 03c11eb3b16dc0058589751dfd91f254be2be613)
1a275a82dSHuacai Chen/* SPDX-License-Identifier: GPL-2.0 */
2a275a82dSHuacai Chen/*
3a275a82dSHuacai Chen * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4a275a82dSHuacai Chen */
5a275a82dSHuacai Chen
655b46ff9SMasahiro Yamada#include <linux/export.h>
7a275a82dSHuacai Chen#include <asm/alternative-asm.h>
8a275a82dSHuacai Chen#include <asm/asm.h>
9a275a82dSHuacai Chen#include <asm/asmmacro.h>
10a275a82dSHuacai Chen#include <asm/cpu.h>
11a275a82dSHuacai Chen#include <asm/regdef.h>
12a275a82dSHuacai Chen
13*5aa4ac64SQing Zhang.section .noinstr.text, "ax"
14*5aa4ac64SQing Zhang
15a275a82dSHuacai ChenSYM_FUNC_START(memmove)
16*5aa4ac64SQing Zhang	blt	a0, a1, __memcpy	/* dst < src, memcpy */
17*5aa4ac64SQing Zhang	blt	a1, a0, __rmemcpy	/* src < dst, rmemcpy */
18a275a82dSHuacai Chen	jr	ra			/* dst == src, return */
19a275a82dSHuacai ChenSYM_FUNC_END(memmove)
20*5aa4ac64SQing ZhangSYM_FUNC_ALIAS(__memmove, memmove)
21a275a82dSHuacai Chen
22a275a82dSHuacai ChenEXPORT_SYMBOL(memmove)
23*5aa4ac64SQing ZhangEXPORT_SYMBOL(__memmove)
24a275a82dSHuacai Chen
25*5aa4ac64SQing Zhang_ASM_NOKPROBE(memmove)
26*5aa4ac64SQing Zhang_ASM_NOKPROBE(__memmove)
27*5aa4ac64SQing Zhang
28*5aa4ac64SQing ZhangSYM_FUNC_START(__rmemcpy)
29a275a82dSHuacai Chen	/*
30a275a82dSHuacai Chen	 * Some CPUs support hardware unaligned access
31a275a82dSHuacai Chen	 */
32a275a82dSHuacai Chen	ALTERNATIVE	"b __rmemcpy_generic", \
33a275a82dSHuacai Chen			"b __rmemcpy_fast", CPU_FEATURE_UAL
34*5aa4ac64SQing ZhangSYM_FUNC_END(__rmemcpy)
35*5aa4ac64SQing Zhang_ASM_NOKPROBE(__rmemcpy)
36a275a82dSHuacai Chen
37a275a82dSHuacai Chen/*
38a275a82dSHuacai Chen * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
39a275a82dSHuacai Chen *
40a275a82dSHuacai Chen * a0: dst
41a275a82dSHuacai Chen * a1: src
42a275a82dSHuacai Chen * a2: n
43a275a82dSHuacai Chen */
44a275a82dSHuacai ChenSYM_FUNC_START(__rmemcpy_generic)
45a275a82dSHuacai Chen	move	a3, a0
46a275a82dSHuacai Chen	beqz	a2, 2f
47a275a82dSHuacai Chen
48a275a82dSHuacai Chen	add.d	a0, a0, a2
49a275a82dSHuacai Chen	add.d	a1, a1, a2
50a275a82dSHuacai Chen
51a275a82dSHuacai Chen1:	ld.b	t0, a1, -1
52a275a82dSHuacai Chen	st.b	t0, a0, -1
53a275a82dSHuacai Chen	addi.d	a0, a0, -1
54a275a82dSHuacai Chen	addi.d	a1, a1, -1
55a275a82dSHuacai Chen	addi.d	a2, a2, -1
56a275a82dSHuacai Chen	bgt	a2, zero, 1b
57a275a82dSHuacai Chen
58a275a82dSHuacai Chen2:	move	a0, a3
59a275a82dSHuacai Chen	jr	ra
60a275a82dSHuacai ChenSYM_FUNC_END(__rmemcpy_generic)
61fcf77d01STiezhu Yang_ASM_NOKPROBE(__rmemcpy_generic)
62a275a82dSHuacai Chen
63a275a82dSHuacai Chen/*
64a275a82dSHuacai Chen * void *__rmemcpy_fast(void *dst, const void *src, size_t n)
65a275a82dSHuacai Chen *
66a275a82dSHuacai Chen * a0: dst
67a275a82dSHuacai Chen * a1: src
68a275a82dSHuacai Chen * a2: n
69a275a82dSHuacai Chen */
70a275a82dSHuacai ChenSYM_FUNC_START(__rmemcpy_fast)
718941e93cSWANG Rui	sltui	t0, a2, 9
728941e93cSWANG Rui	bnez	t0, __memcpy_small
73a275a82dSHuacai Chen
748941e93cSWANG Rui	add.d	a3, a1, a2
758941e93cSWANG Rui	add.d	a2, a0, a2
768941e93cSWANG Rui	ld.d	a6, a1, 0
778941e93cSWANG Rui	ld.d	a7, a3, -8
78a275a82dSHuacai Chen
798941e93cSWANG Rui	/* align up destination address */
808941e93cSWANG Rui	andi	t1, a2, 7
818941e93cSWANG Rui	sub.d	a3, a3, t1
828941e93cSWANG Rui	sub.d	a5, a2, t1
838941e93cSWANG Rui
848941e93cSWANG Rui	addi.d	a4, a1, 64
858941e93cSWANG Rui	bgeu	a4, a3, .Llt64
86a275a82dSHuacai Chen
87a275a82dSHuacai Chen	/* copy 64 bytes at a time */
888941e93cSWANG Rui.Lloop64:
898941e93cSWANG Rui	ld.d	t0, a3, -8
908941e93cSWANG Rui	ld.d	t1, a3, -16
918941e93cSWANG Rui	ld.d	t2, a3, -24
928941e93cSWANG Rui	ld.d	t3, a3, -32
938941e93cSWANG Rui	ld.d	t4, a3, -40
948941e93cSWANG Rui	ld.d	t5, a3, -48
958941e93cSWANG Rui	ld.d	t6, a3, -56
968941e93cSWANG Rui	ld.d	t7, a3, -64
978941e93cSWANG Rui	addi.d	a3, a3, -64
988941e93cSWANG Rui	st.d	t0, a5, -8
998941e93cSWANG Rui	st.d	t1, a5, -16
1008941e93cSWANG Rui	st.d	t2, a5, -24
1018941e93cSWANG Rui	st.d	t3, a5, -32
1028941e93cSWANG Rui	st.d	t4, a5, -40
1038941e93cSWANG Rui	st.d	t5, a5, -48
1048941e93cSWANG Rui	st.d	t6, a5, -56
1058941e93cSWANG Rui	st.d	t7, a5, -64
1068941e93cSWANG Rui	addi.d	a5, a5, -64
1078941e93cSWANG Rui	bltu	a4, a3, .Lloop64
108a275a82dSHuacai Chen
109a275a82dSHuacai Chen	/* copy the remaining bytes */
1108941e93cSWANG Rui.Llt64:
1118941e93cSWANG Rui	addi.d	a4, a1, 32
1128941e93cSWANG Rui	bgeu	a4, a3, .Llt32
1138941e93cSWANG Rui	ld.d	t0, a3, -8
1148941e93cSWANG Rui	ld.d	t1, a3, -16
1158941e93cSWANG Rui	ld.d	t2, a3, -24
1168941e93cSWANG Rui	ld.d	t3, a3, -32
1178941e93cSWANG Rui	addi.d	a3, a3, -32
1188941e93cSWANG Rui	st.d	t0, a5, -8
1198941e93cSWANG Rui	st.d	t1, a5, -16
1208941e93cSWANG Rui	st.d	t2, a5, -24
1218941e93cSWANG Rui	st.d	t3, a5, -32
1228941e93cSWANG Rui	addi.d	a5, a5, -32
1238941e93cSWANG Rui
1248941e93cSWANG Rui.Llt32:
1258941e93cSWANG Rui	addi.d	a4, a1, 16
1268941e93cSWANG Rui	bgeu	a4, a3, .Llt16
1278941e93cSWANG Rui	ld.d	t0, a3, -8
1288941e93cSWANG Rui	ld.d	t1, a3, -16
1298941e93cSWANG Rui	addi.d	a3, a3, -16
1308941e93cSWANG Rui	st.d	t0, a5, -8
1318941e93cSWANG Rui	st.d	t1, a5, -16
1328941e93cSWANG Rui	addi.d	a5, a5, -16
1338941e93cSWANG Rui
1348941e93cSWANG Rui.Llt16:
1358941e93cSWANG Rui	addi.d	a4, a1, 8
1368941e93cSWANG Rui	bgeu	a4, a3, .Llt8
1378941e93cSWANG Rui	ld.d	t0, a3, -8
1388941e93cSWANG Rui	st.d	t0, a5, -8
1398941e93cSWANG Rui
1408941e93cSWANG Rui.Llt8:
1418941e93cSWANG Rui	st.d	a6, a0, 0
1428941e93cSWANG Rui	st.d	a7, a2, -8
143a275a82dSHuacai Chen
144a275a82dSHuacai Chen	/* return */
145a275a82dSHuacai Chen	jr	ra
146a275a82dSHuacai ChenSYM_FUNC_END(__rmemcpy_fast)
147fcf77d01STiezhu Yang_ASM_NOKPROBE(__rmemcpy_fast)
148