xref: /linux/arch/loongarch/lib/memset.S (revision 4b132aacb0768ac1e652cf517097ea6f237214b9)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4 */
5
6#include <linux/export.h>
7#include <asm/alternative-asm.h>
8#include <asm/asm.h>
9#include <asm/asmmacro.h>
10#include <asm/cpu.h>
11#include <asm/regdef.h>
12#include <asm/unwind_hints.h>
13
14.macro fill_to_64 r0
15	bstrins.d \r0, \r0, 15, 8
16	bstrins.d \r0, \r0, 31, 16
17	bstrins.d \r0, \r0, 63, 32
18.endm
19
20.section .noinstr.text, "ax"
21
22SYM_FUNC_START(memset)
23	/*
24	 * Some CPUs support hardware unaligned access
25	 */
26	ALTERNATIVE	"b __memset_generic", \
27			"b __memset_fast", CPU_FEATURE_UAL
28SYM_FUNC_END(memset)
29SYM_FUNC_ALIAS(__memset, memset)
30
31EXPORT_SYMBOL(memset)
32EXPORT_SYMBOL(__memset)
33
34_ASM_NOKPROBE(memset)
35_ASM_NOKPROBE(__memset)
36
37/*
38 * void *__memset_generic(void *s, int c, size_t n)
39 *
40 * a0: s
41 * a1: c
42 * a2: n
43 */
44SYM_FUNC_START(__memset_generic)
45	move	a3, a0
46	beqz	a2, 2f
47
481:	st.b	a1, a0, 0
49	addi.d	a0, a0, 1
50	addi.d	a2, a2, -1
51	bgt	a2, zero, 1b
52
532:	move	a0, a3
54	jr	ra
55SYM_FUNC_END(__memset_generic)
56_ASM_NOKPROBE(__memset_generic)
57
58/*
59 * void *__memset_fast(void *s, int c, size_t n)
60 *
61 * a0: s
62 * a1: c
63 * a2: n
64 */
65SYM_FUNC_START(__memset_fast)
66	/* fill a1 to 64 bits */
67	fill_to_64 a1
68
69	sltui	t0, a2, 9
70	bnez	t0, .Lsmall
71
72	add.d	a2, a0, a2
73	st.d	a1, a0, 0
74
75	/* align up address */
76	addi.d	a3, a0, 8
77	bstrins.d	a3, zero, 2, 0
78
79	addi.d	a4, a2, -64
80	bgeu	a3, a4, .Llt64
81
82	/* set 64 bytes at a time */
83.Lloop64:
84	st.d	a1, a3, 0
85	st.d	a1, a3, 8
86	st.d	a1, a3, 16
87	st.d	a1, a3, 24
88	st.d	a1, a3, 32
89	st.d	a1, a3, 40
90	st.d	a1, a3, 48
91	st.d	a1, a3, 56
92	addi.d	a3, a3, 64
93	bltu	a3, a4, .Lloop64
94
95	/* set the remaining bytes */
96.Llt64:
97	addi.d	a4, a2, -32
98	bgeu	a3, a4, .Llt32
99	st.d	a1, a3, 0
100	st.d	a1, a3, 8
101	st.d	a1, a3, 16
102	st.d	a1, a3, 24
103	addi.d	a3, a3, 32
104
105.Llt32:
106	addi.d	a4, a2, -16
107	bgeu	a3, a4, .Llt16
108	st.d	a1, a3, 0
109	st.d	a1, a3, 8
110	addi.d	a3, a3, 16
111
112.Llt16:
113	addi.d	a4, a2, -8
114	bgeu	a3, a4, .Llt8
115	st.d	a1, a3, 0
116
117.Llt8:
118	st.d	a1, a2, -8
119
120	/* return */
121	jr	ra
122
123	.align	4
124.Lsmall:
125	pcaddi	t0, 4
126	slli.d	a2, a2, 4
127	add.d	t0, t0, a2
128	jr	t0
129
130	.align	4
1310:	jr	ra
132
133	.align	4
1341:	st.b	a1, a0, 0
135	jr	ra
136
137	.align	4
1382:	st.h	a1, a0, 0
139	jr	ra
140
141	.align	4
1423:	st.h	a1, a0, 0
143	st.b	a1, a0, 2
144	jr	ra
145
146	.align	4
1474:	st.w	a1, a0, 0
148	jr	ra
149
150	.align	4
1515:	st.w	a1, a0, 0
152	st.b	a1, a0, 4
153	jr	ra
154
155	.align	4
1566:	st.w	a1, a0, 0
157	st.h	a1, a0, 4
158	jr	ra
159
160	.align	4
1617:	st.w	a1, a0, 0
162	st.w	a1, a0, 3
163	jr	ra
164
165	.align	4
1668:	st.d	a1, a0, 0
167	jr	ra
168SYM_FUNC_END(__memset_fast)
169_ASM_NOKPROBE(__memset_fast)
170
171STACK_FRAME_NON_STANDARD __memset_fast
172