xref: /linux/arch/loongarch/lib/memset.S (revision 79790b6818e96c58fe2bffee1b418c16e64e7b80)
1a275a82dSHuacai Chen/* SPDX-License-Identifier: GPL-2.0 */
2a275a82dSHuacai Chen/*
3a275a82dSHuacai Chen * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4a275a82dSHuacai Chen */
5a275a82dSHuacai Chen
655b46ff9SMasahiro Yamada#include <linux/export.h>
7a275a82dSHuacai Chen#include <asm/alternative-asm.h>
8a275a82dSHuacai Chen#include <asm/asm.h>
9a275a82dSHuacai Chen#include <asm/asmmacro.h>
10a275a82dSHuacai Chen#include <asm/cpu.h>
11a275a82dSHuacai Chen#include <asm/regdef.h>
12*cb8a2ef0STiezhu Yang#include <asm/unwind_hints.h>
13a275a82dSHuacai Chen
14a275a82dSHuacai Chen.macro fill_to_64 r0
15a275a82dSHuacai Chen	bstrins.d \r0, \r0, 15, 8
16a275a82dSHuacai Chen	bstrins.d \r0, \r0, 31, 16
17a275a82dSHuacai Chen	bstrins.d \r0, \r0, 63, 32
18a275a82dSHuacai Chen.endm
19a275a82dSHuacai Chen
205aa4ac64SQing Zhang.section .noinstr.text, "ax"
215aa4ac64SQing Zhang
22a275a82dSHuacai ChenSYM_FUNC_START(memset)
23a275a82dSHuacai Chen	/*
24a275a82dSHuacai Chen	 * Some CPUs support hardware unaligned access
25a275a82dSHuacai Chen	 */
26a275a82dSHuacai Chen	ALTERNATIVE	"b __memset_generic", \
27a275a82dSHuacai Chen			"b __memset_fast", CPU_FEATURE_UAL
28a275a82dSHuacai ChenSYM_FUNC_END(memset)
295aa4ac64SQing ZhangSYM_FUNC_ALIAS(__memset, memset)
30a275a82dSHuacai Chen
31a275a82dSHuacai ChenEXPORT_SYMBOL(memset)
325aa4ac64SQing ZhangEXPORT_SYMBOL(__memset)
335aa4ac64SQing Zhang
345aa4ac64SQing Zhang_ASM_NOKPROBE(memset)
355aa4ac64SQing Zhang_ASM_NOKPROBE(__memset)
36a275a82dSHuacai Chen
37a275a82dSHuacai Chen/*
38a275a82dSHuacai Chen * void *__memset_generic(void *s, int c, size_t n)
39a275a82dSHuacai Chen *
40a275a82dSHuacai Chen * a0: s
41a275a82dSHuacai Chen * a1: c
42a275a82dSHuacai Chen * a2: n
43a275a82dSHuacai Chen */
44a275a82dSHuacai ChenSYM_FUNC_START(__memset_generic)
45a275a82dSHuacai Chen	move	a3, a0
46a275a82dSHuacai Chen	beqz	a2, 2f
47a275a82dSHuacai Chen
48a275a82dSHuacai Chen1:	st.b	a1, a0, 0
49a275a82dSHuacai Chen	addi.d	a0, a0, 1
50a275a82dSHuacai Chen	addi.d	a2, a2, -1
51a275a82dSHuacai Chen	bgt	a2, zero, 1b
52a275a82dSHuacai Chen
53a275a82dSHuacai Chen2:	move	a0, a3
54a275a82dSHuacai Chen	jr	ra
55a275a82dSHuacai ChenSYM_FUNC_END(__memset_generic)
56fcf77d01STiezhu Yang_ASM_NOKPROBE(__memset_generic)
57a275a82dSHuacai Chen
58a275a82dSHuacai Chen/*
59a275a82dSHuacai Chen * void *__memset_fast(void *s, int c, size_t n)
60a275a82dSHuacai Chen *
61a275a82dSHuacai Chen * a0: s
62a275a82dSHuacai Chen * a1: c
63a275a82dSHuacai Chen * a2: n
64a275a82dSHuacai Chen */
65a275a82dSHuacai ChenSYM_FUNC_START(__memset_fast)
66a275a82dSHuacai Chen	/* fill a1 to 64 bits */
67a275a82dSHuacai Chen	fill_to_64 a1
68a275a82dSHuacai Chen
698941e93cSWANG Rui	sltui	t0, a2, 9
708941e93cSWANG Rui	bnez	t0, .Lsmall
718941e93cSWANG Rui
728941e93cSWANG Rui	add.d	a2, a0, a2
738941e93cSWANG Rui	st.d	a1, a0, 0
748941e93cSWANG Rui
758941e93cSWANG Rui	/* align up address */
768941e93cSWANG Rui	addi.d	a3, a0, 8
778941e93cSWANG Rui	bstrins.d	a3, zero, 2, 0
788941e93cSWANG Rui
798941e93cSWANG Rui	addi.d	a4, a2, -64
808941e93cSWANG Rui	bgeu	a3, a4, .Llt64
818941e93cSWANG Rui
82a275a82dSHuacai Chen	/* set 64 bytes at a time */
838941e93cSWANG Rui.Lloop64:
848941e93cSWANG Rui	st.d	a1, a3, 0
858941e93cSWANG Rui	st.d	a1, a3, 8
868941e93cSWANG Rui	st.d	a1, a3, 16
878941e93cSWANG Rui	st.d	a1, a3, 24
888941e93cSWANG Rui	st.d	a1, a3, 32
898941e93cSWANG Rui	st.d	a1, a3, 40
908941e93cSWANG Rui	st.d	a1, a3, 48
918941e93cSWANG Rui	st.d	a1, a3, 56
928941e93cSWANG Rui	addi.d	a3, a3, 64
938941e93cSWANG Rui	bltu	a3, a4, .Lloop64
94a275a82dSHuacai Chen
95a275a82dSHuacai Chen	/* set the remaining bytes */
968941e93cSWANG Rui.Llt64:
978941e93cSWANG Rui	addi.d	a4, a2, -32
988941e93cSWANG Rui	bgeu	a3, a4, .Llt32
998941e93cSWANG Rui	st.d	a1, a3, 0
1008941e93cSWANG Rui	st.d	a1, a3, 8
1018941e93cSWANG Rui	st.d	a1, a3, 16
1028941e93cSWANG Rui	st.d	a1, a3, 24
1038941e93cSWANG Rui	addi.d	a3, a3, 32
1048941e93cSWANG Rui
1058941e93cSWANG Rui.Llt32:
1068941e93cSWANG Rui	addi.d	a4, a2, -16
1078941e93cSWANG Rui	bgeu	a3, a4, .Llt16
1088941e93cSWANG Rui	st.d	a1, a3, 0
1098941e93cSWANG Rui	st.d	a1, a3, 8
1108941e93cSWANG Rui	addi.d	a3, a3, 16
1118941e93cSWANG Rui
1128941e93cSWANG Rui.Llt16:
1138941e93cSWANG Rui	addi.d	a4, a2, -8
1148941e93cSWANG Rui	bgeu	a3, a4, .Llt8
1158941e93cSWANG Rui	st.d	a1, a3, 0
1168941e93cSWANG Rui
1178941e93cSWANG Rui.Llt8:
1188941e93cSWANG Rui	st.d	a1, a2, -8
119a275a82dSHuacai Chen
120a275a82dSHuacai Chen	/* return */
1218941e93cSWANG Rui	jr	ra
1228941e93cSWANG Rui
1238941e93cSWANG Rui	.align	4
1248941e93cSWANG Rui.Lsmall:
1258941e93cSWANG Rui	pcaddi	t0, 4
1268941e93cSWANG Rui	slli.d	a2, a2, 4
1278941e93cSWANG Rui	add.d	t0, t0, a2
1288941e93cSWANG Rui	jr	t0
1298941e93cSWANG Rui
1308941e93cSWANG Rui	.align	4
1318941e93cSWANG Rui0:	jr	ra
1328941e93cSWANG Rui
1338941e93cSWANG Rui	.align	4
1348941e93cSWANG Rui1:	st.b	a1, a0, 0
1358941e93cSWANG Rui	jr	ra
1368941e93cSWANG Rui
1378941e93cSWANG Rui	.align	4
1388941e93cSWANG Rui2:	st.h	a1, a0, 0
1398941e93cSWANG Rui	jr	ra
1408941e93cSWANG Rui
1418941e93cSWANG Rui	.align	4
1428941e93cSWANG Rui3:	st.h	a1, a0, 0
1438941e93cSWANG Rui	st.b	a1, a0, 2
1448941e93cSWANG Rui	jr	ra
1458941e93cSWANG Rui
1468941e93cSWANG Rui	.align	4
1478941e93cSWANG Rui4:	st.w	a1, a0, 0
1488941e93cSWANG Rui	jr	ra
1498941e93cSWANG Rui
1508941e93cSWANG Rui	.align	4
1518941e93cSWANG Rui5:	st.w	a1, a0, 0
1528941e93cSWANG Rui	st.b	a1, a0, 4
1538941e93cSWANG Rui	jr	ra
1548941e93cSWANG Rui
1558941e93cSWANG Rui	.align	4
1568941e93cSWANG Rui6:	st.w	a1, a0, 0
1578941e93cSWANG Rui	st.h	a1, a0, 4
1588941e93cSWANG Rui	jr	ra
1598941e93cSWANG Rui
1608941e93cSWANG Rui	.align	4
1618941e93cSWANG Rui7:	st.w	a1, a0, 0
1628941e93cSWANG Rui	st.w	a1, a0, 3
1638941e93cSWANG Rui	jr	ra
1648941e93cSWANG Rui
1658941e93cSWANG Rui	.align	4
1668941e93cSWANG Rui8:	st.d	a1, a0, 0
167a275a82dSHuacai Chen	jr	ra
168a275a82dSHuacai ChenSYM_FUNC_END(__memset_fast)
169fcf77d01STiezhu Yang_ASM_NOKPROBE(__memset_fast)
170*cb8a2ef0STiezhu Yang
171*cb8a2ef0STiezhu YangSTACK_FRAME_NON_STANDARD __memset_fast
172