xref: /linux/arch/loongarch/lib/memcpy.S (revision 0e685c3e7158d35626d6d76b9f859eae806d87fa)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4 */
5
6#include <asm/alternative-asm.h>
7#include <asm/asm.h>
8#include <asm/asmmacro.h>
9#include <asm/cpu.h>
10#include <asm/export.h>
11#include <asm/regdef.h>
12
13SYM_FUNC_START(memcpy)
14	/*
15	 * Some CPUs support hardware unaligned access
16	 */
17	ALTERNATIVE	"b __memcpy_generic", \
18			"b __memcpy_fast", CPU_FEATURE_UAL
19SYM_FUNC_END(memcpy)
20_ASM_NOKPROBE(memcpy)
21
22EXPORT_SYMBOL(memcpy)
23
24/*
25 * void *__memcpy_generic(void *dst, const void *src, size_t n)
26 *
27 * a0: dst
28 * a1: src
29 * a2: n
30 */
31SYM_FUNC_START(__memcpy_generic)
32	move	a3, a0
33	beqz	a2, 2f
34
351:	ld.b	t0, a1, 0
36	st.b	t0, a0, 0
37	addi.d	a0, a0, 1
38	addi.d	a1, a1, 1
39	addi.d	a2, a2, -1
40	bgt	a2, zero, 1b
41
422:	move	a0, a3
43	jr	ra
44SYM_FUNC_END(__memcpy_generic)
45_ASM_NOKPROBE(__memcpy_generic)
46
47/*
48 * void *__memcpy_fast(void *dst, const void *src, size_t n)
49 *
50 * a0: dst
51 * a1: src
52 * a2: n
53 */
54SYM_FUNC_START(__memcpy_fast)
55	move	a3, a0
56	beqz	a2, 3f
57
58	ori	a4, zero, 64
59	blt	a2, a4, 2f
60
61	/* copy 64 bytes at a time */
621:	ld.d	t0, a1, 0
63	ld.d	t1, a1, 8
64	ld.d	t2, a1, 16
65	ld.d	t3, a1, 24
66	ld.d	t4, a1, 32
67	ld.d	t5, a1, 40
68	ld.d	t6, a1, 48
69	ld.d	t7, a1, 56
70	st.d	t0, a0, 0
71	st.d	t1, a0, 8
72	st.d	t2, a0, 16
73	st.d	t3, a0, 24
74	st.d	t4, a0, 32
75	st.d	t5, a0, 40
76	st.d	t6, a0, 48
77	st.d	t7, a0, 56
78
79	addi.d	a0, a0, 64
80	addi.d	a1, a1, 64
81	addi.d	a2, a2, -64
82	bge	a2, a4, 1b
83
84	beqz	a2, 3f
85
86	/* copy the remaining bytes */
872:	ld.b	t0, a1, 0
88	st.b	t0, a0, 0
89	addi.d	a0, a0, 1
90	addi.d	a1, a1, 1
91	addi.d	a2, a2, -1
92	bgt	a2, zero, 2b
93
94	/* return */
953:	move	a0, a3
96	jr	ra
97SYM_FUNC_END(__memcpy_fast)
98_ASM_NOKPROBE(__memcpy_fast)
99