1a275a82dSHuacai Chen/* SPDX-License-Identifier: GPL-2.0 */ 2a275a82dSHuacai Chen/* 3a275a82dSHuacai Chen * Copyright (C) 2020-2022 Loongson Technology Corporation Limited 4a275a82dSHuacai Chen */ 5a275a82dSHuacai Chen 655b46ff9SMasahiro Yamada#include <linux/export.h> 7a275a82dSHuacai Chen#include <asm/alternative-asm.h> 8a275a82dSHuacai Chen#include <asm/asm.h> 9a275a82dSHuacai Chen#include <asm/asmmacro.h> 10a275a82dSHuacai Chen#include <asm/cpu.h> 11a275a82dSHuacai Chen#include <asm/regdef.h> 12*cb8a2ef0STiezhu Yang#include <asm/unwind_hints.h> 13a275a82dSHuacai Chen 14a275a82dSHuacai Chen.macro fill_to_64 r0 15a275a82dSHuacai Chen bstrins.d \r0, \r0, 15, 8 16a275a82dSHuacai Chen bstrins.d \r0, \r0, 31, 16 17a275a82dSHuacai Chen bstrins.d \r0, \r0, 63, 32 18a275a82dSHuacai Chen.endm 19a275a82dSHuacai Chen 205aa4ac64SQing Zhang.section .noinstr.text, "ax" 215aa4ac64SQing Zhang 22a275a82dSHuacai ChenSYM_FUNC_START(memset) 23a275a82dSHuacai Chen /* 24a275a82dSHuacai Chen * Some CPUs support hardware unaligned access 25a275a82dSHuacai Chen */ 26a275a82dSHuacai Chen ALTERNATIVE "b __memset_generic", \ 27a275a82dSHuacai Chen "b __memset_fast", CPU_FEATURE_UAL 28a275a82dSHuacai ChenSYM_FUNC_END(memset) 295aa4ac64SQing ZhangSYM_FUNC_ALIAS(__memset, memset) 30a275a82dSHuacai Chen 31a275a82dSHuacai ChenEXPORT_SYMBOL(memset) 325aa4ac64SQing ZhangEXPORT_SYMBOL(__memset) 335aa4ac64SQing Zhang 345aa4ac64SQing Zhang_ASM_NOKPROBE(memset) 355aa4ac64SQing Zhang_ASM_NOKPROBE(__memset) 36a275a82dSHuacai Chen 37a275a82dSHuacai Chen/* 38a275a82dSHuacai Chen * void *__memset_generic(void *s, int c, size_t n) 39a275a82dSHuacai Chen * 40a275a82dSHuacai Chen * a0: s 41a275a82dSHuacai Chen * a1: c 42a275a82dSHuacai Chen * a2: n 43a275a82dSHuacai Chen */ 44a275a82dSHuacai ChenSYM_FUNC_START(__memset_generic) 45a275a82dSHuacai Chen move a3, a0 46a275a82dSHuacai Chen beqz a2, 2f 47a275a82dSHuacai Chen 48a275a82dSHuacai Chen1: st.b a1, a0, 0 49a275a82dSHuacai Chen addi.d a0, a0, 1 50a275a82dSHuacai Chen addi.d a2, a2, -1 51a275a82dSHuacai Chen bgt a2, zero, 1b 52a275a82dSHuacai Chen 53a275a82dSHuacai Chen2: move a0, a3 54a275a82dSHuacai Chen jr ra 55a275a82dSHuacai ChenSYM_FUNC_END(__memset_generic) 56fcf77d01STiezhu Yang_ASM_NOKPROBE(__memset_generic) 57a275a82dSHuacai Chen 58a275a82dSHuacai Chen/* 59a275a82dSHuacai Chen * void *__memset_fast(void *s, int c, size_t n) 60a275a82dSHuacai Chen * 61a275a82dSHuacai Chen * a0: s 62a275a82dSHuacai Chen * a1: c 63a275a82dSHuacai Chen * a2: n 64a275a82dSHuacai Chen */ 65a275a82dSHuacai ChenSYM_FUNC_START(__memset_fast) 66a275a82dSHuacai Chen /* fill a1 to 64 bits */ 67a275a82dSHuacai Chen fill_to_64 a1 68a275a82dSHuacai Chen 698941e93cSWANG Rui sltui t0, a2, 9 708941e93cSWANG Rui bnez t0, .Lsmall 718941e93cSWANG Rui 728941e93cSWANG Rui add.d a2, a0, a2 738941e93cSWANG Rui st.d a1, a0, 0 748941e93cSWANG Rui 758941e93cSWANG Rui /* align up address */ 768941e93cSWANG Rui addi.d a3, a0, 8 778941e93cSWANG Rui bstrins.d a3, zero, 2, 0 788941e93cSWANG Rui 798941e93cSWANG Rui addi.d a4, a2, -64 808941e93cSWANG Rui bgeu a3, a4, .Llt64 818941e93cSWANG Rui 82a275a82dSHuacai Chen /* set 64 bytes at a time */ 838941e93cSWANG Rui.Lloop64: 848941e93cSWANG Rui st.d a1, a3, 0 858941e93cSWANG Rui st.d a1, a3, 8 868941e93cSWANG Rui st.d a1, a3, 16 878941e93cSWANG Rui st.d a1, a3, 24 888941e93cSWANG Rui st.d a1, a3, 32 898941e93cSWANG Rui st.d a1, a3, 40 908941e93cSWANG Rui st.d a1, a3, 48 918941e93cSWANG Rui st.d a1, a3, 56 928941e93cSWANG Rui addi.d a3, a3, 64 938941e93cSWANG Rui bltu a3, a4, .Lloop64 94a275a82dSHuacai Chen 95a275a82dSHuacai Chen /* set the remaining bytes */ 968941e93cSWANG Rui.Llt64: 978941e93cSWANG Rui addi.d a4, a2, -32 988941e93cSWANG Rui bgeu a3, a4, .Llt32 998941e93cSWANG Rui st.d a1, a3, 0 1008941e93cSWANG Rui st.d a1, a3, 8 1018941e93cSWANG Rui st.d a1, a3, 16 1028941e93cSWANG Rui st.d a1, a3, 24 1038941e93cSWANG Rui addi.d a3, a3, 32 1048941e93cSWANG Rui 1058941e93cSWANG Rui.Llt32: 1068941e93cSWANG Rui addi.d a4, a2, -16 1078941e93cSWANG Rui bgeu a3, a4, .Llt16 1088941e93cSWANG Rui st.d a1, a3, 0 1098941e93cSWANG Rui st.d a1, a3, 8 1108941e93cSWANG Rui addi.d a3, a3, 16 1118941e93cSWANG Rui 1128941e93cSWANG Rui.Llt16: 1138941e93cSWANG Rui addi.d a4, a2, -8 1148941e93cSWANG Rui bgeu a3, a4, .Llt8 1158941e93cSWANG Rui st.d a1, a3, 0 1168941e93cSWANG Rui 1178941e93cSWANG Rui.Llt8: 1188941e93cSWANG Rui st.d a1, a2, -8 119a275a82dSHuacai Chen 120a275a82dSHuacai Chen /* return */ 1218941e93cSWANG Rui jr ra 1228941e93cSWANG Rui 1238941e93cSWANG Rui .align 4 1248941e93cSWANG Rui.Lsmall: 1258941e93cSWANG Rui pcaddi t0, 4 1268941e93cSWANG Rui slli.d a2, a2, 4 1278941e93cSWANG Rui add.d t0, t0, a2 1288941e93cSWANG Rui jr t0 1298941e93cSWANG Rui 1308941e93cSWANG Rui .align 4 1318941e93cSWANG Rui0: jr ra 1328941e93cSWANG Rui 1338941e93cSWANG Rui .align 4 1348941e93cSWANG Rui1: st.b a1, a0, 0 1358941e93cSWANG Rui jr ra 1368941e93cSWANG Rui 1378941e93cSWANG Rui .align 4 1388941e93cSWANG Rui2: st.h a1, a0, 0 1398941e93cSWANG Rui jr ra 1408941e93cSWANG Rui 1418941e93cSWANG Rui .align 4 1428941e93cSWANG Rui3: st.h a1, a0, 0 1438941e93cSWANG Rui st.b a1, a0, 2 1448941e93cSWANG Rui jr ra 1458941e93cSWANG Rui 1468941e93cSWANG Rui .align 4 1478941e93cSWANG Rui4: st.w a1, a0, 0 1488941e93cSWANG Rui jr ra 1498941e93cSWANG Rui 1508941e93cSWANG Rui .align 4 1518941e93cSWANG Rui5: st.w a1, a0, 0 1528941e93cSWANG Rui st.b a1, a0, 4 1538941e93cSWANG Rui jr ra 1548941e93cSWANG Rui 1558941e93cSWANG Rui .align 4 1568941e93cSWANG Rui6: st.w a1, a0, 0 1578941e93cSWANG Rui st.h a1, a0, 4 1588941e93cSWANG Rui jr ra 1598941e93cSWANG Rui 1608941e93cSWANG Rui .align 4 1618941e93cSWANG Rui7: st.w a1, a0, 0 1628941e93cSWANG Rui st.w a1, a0, 3 1638941e93cSWANG Rui jr ra 1648941e93cSWANG Rui 1658941e93cSWANG Rui .align 4 1668941e93cSWANG Rui8: st.d a1, a0, 0 167a275a82dSHuacai Chen jr ra 168a275a82dSHuacai ChenSYM_FUNC_END(__memset_fast) 169fcf77d01STiezhu Yang_ASM_NOKPROBE(__memset_fast) 170*cb8a2ef0STiezhu Yang 171*cb8a2ef0STiezhu YangSTACK_FRAME_NON_STANDARD __memset_fast 172