xref: /linux/arch/loongarch/lib/memset.S (revision be239684b18e1cdcafcf8c7face4a2f562c745ad)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4 */
5
6#include <linux/export.h>
7#include <asm/alternative-asm.h>
8#include <asm/asm.h>
9#include <asm/asmmacro.h>
10#include <asm/cpu.h>
11#include <asm/regdef.h>
12
13.macro fill_to_64 r0
14	bstrins.d \r0, \r0, 15, 8
15	bstrins.d \r0, \r0, 31, 16
16	bstrins.d \r0, \r0, 63, 32
17.endm
18
19.section .noinstr.text, "ax"
20
21SYM_FUNC_START(memset)
22	/*
23	 * Some CPUs support hardware unaligned access
24	 */
25	ALTERNATIVE	"b __memset_generic", \
26			"b __memset_fast", CPU_FEATURE_UAL
27SYM_FUNC_END(memset)
28SYM_FUNC_ALIAS(__memset, memset)
29
30EXPORT_SYMBOL(memset)
31EXPORT_SYMBOL(__memset)
32
33_ASM_NOKPROBE(memset)
34_ASM_NOKPROBE(__memset)
35
36/*
37 * void *__memset_generic(void *s, int c, size_t n)
38 *
39 * a0: s
40 * a1: c
41 * a2: n
42 */
43SYM_FUNC_START(__memset_generic)
44	move	a3, a0
45	beqz	a2, 2f
46
471:	st.b	a1, a0, 0
48	addi.d	a0, a0, 1
49	addi.d	a2, a2, -1
50	bgt	a2, zero, 1b
51
522:	move	a0, a3
53	jr	ra
54SYM_FUNC_END(__memset_generic)
55_ASM_NOKPROBE(__memset_generic)
56
57/*
58 * void *__memset_fast(void *s, int c, size_t n)
59 *
60 * a0: s
61 * a1: c
62 * a2: n
63 */
64SYM_FUNC_START(__memset_fast)
65	/* fill a1 to 64 bits */
66	fill_to_64 a1
67
68	sltui	t0, a2, 9
69	bnez	t0, .Lsmall
70
71	add.d	a2, a0, a2
72	st.d	a1, a0, 0
73
74	/* align up address */
75	addi.d	a3, a0, 8
76	bstrins.d	a3, zero, 2, 0
77
78	addi.d	a4, a2, -64
79	bgeu	a3, a4, .Llt64
80
81	/* set 64 bytes at a time */
82.Lloop64:
83	st.d	a1, a3, 0
84	st.d	a1, a3, 8
85	st.d	a1, a3, 16
86	st.d	a1, a3, 24
87	st.d	a1, a3, 32
88	st.d	a1, a3, 40
89	st.d	a1, a3, 48
90	st.d	a1, a3, 56
91	addi.d	a3, a3, 64
92	bltu	a3, a4, .Lloop64
93
94	/* set the remaining bytes */
95.Llt64:
96	addi.d	a4, a2, -32
97	bgeu	a3, a4, .Llt32
98	st.d	a1, a3, 0
99	st.d	a1, a3, 8
100	st.d	a1, a3, 16
101	st.d	a1, a3, 24
102	addi.d	a3, a3, 32
103
104.Llt32:
105	addi.d	a4, a2, -16
106	bgeu	a3, a4, .Llt16
107	st.d	a1, a3, 0
108	st.d	a1, a3, 8
109	addi.d	a3, a3, 16
110
111.Llt16:
112	addi.d	a4, a2, -8
113	bgeu	a3, a4, .Llt8
114	st.d	a1, a3, 0
115
116.Llt8:
117	st.d	a1, a2, -8
118
119	/* return */
120	jr	ra
121
122	.align	4
123.Lsmall:
124	pcaddi	t0, 4
125	slli.d	a2, a2, 4
126	add.d	t0, t0, a2
127	jr	t0
128
129	.align	4
1300:	jr	ra
131
132	.align	4
1331:	st.b	a1, a0, 0
134	jr	ra
135
136	.align	4
1372:	st.h	a1, a0, 0
138	jr	ra
139
140	.align	4
1413:	st.h	a1, a0, 0
142	st.b	a1, a0, 2
143	jr	ra
144
145	.align	4
1464:	st.w	a1, a0, 0
147	jr	ra
148
149	.align	4
1505:	st.w	a1, a0, 0
151	st.b	a1, a0, 4
152	jr	ra
153
154	.align	4
1556:	st.w	a1, a0, 0
156	st.h	a1, a0, 4
157	jr	ra
158
159	.align	4
1607:	st.w	a1, a0, 0
161	st.w	a1, a0, 3
162	jr	ra
163
164	.align	4
1658:	st.d	a1, a0, 0
166	jr	ra
167SYM_FUNC_END(__memset_fast)
168_ASM_NOKPROBE(__memset_fast)
169