1a275a82dSHuacai Chen/* SPDX-License-Identifier: GPL-2.0 */ 2a275a82dSHuacai Chen/* 3a275a82dSHuacai Chen * Copyright (C) 2020-2022 Loongson Technology Corporation Limited 4a275a82dSHuacai Chen */ 5a275a82dSHuacai Chen 655b46ff9SMasahiro Yamada#include <linux/export.h> 7a275a82dSHuacai Chen#include <asm/alternative-asm.h> 8a275a82dSHuacai Chen#include <asm/asm.h> 9a275a82dSHuacai Chen#include <asm/asmmacro.h> 10a275a82dSHuacai Chen#include <asm/cpu.h> 11a275a82dSHuacai Chen#include <asm/regdef.h> 12a275a82dSHuacai Chen 13*5aa4ac64SQing Zhang.section .noinstr.text, "ax" 14*5aa4ac64SQing Zhang 15a275a82dSHuacai ChenSYM_FUNC_START(memmove) 16*5aa4ac64SQing Zhang blt a0, a1, __memcpy /* dst < src, memcpy */ 17*5aa4ac64SQing Zhang blt a1, a0, __rmemcpy /* src < dst, rmemcpy */ 18a275a82dSHuacai Chen jr ra /* dst == src, return */ 19a275a82dSHuacai ChenSYM_FUNC_END(memmove) 20*5aa4ac64SQing ZhangSYM_FUNC_ALIAS(__memmove, memmove) 21a275a82dSHuacai Chen 22a275a82dSHuacai ChenEXPORT_SYMBOL(memmove) 23*5aa4ac64SQing ZhangEXPORT_SYMBOL(__memmove) 24a275a82dSHuacai Chen 25*5aa4ac64SQing Zhang_ASM_NOKPROBE(memmove) 26*5aa4ac64SQing Zhang_ASM_NOKPROBE(__memmove) 27*5aa4ac64SQing Zhang 28*5aa4ac64SQing ZhangSYM_FUNC_START(__rmemcpy) 29a275a82dSHuacai Chen /* 30a275a82dSHuacai Chen * Some CPUs support hardware unaligned access 31a275a82dSHuacai Chen */ 32a275a82dSHuacai Chen ALTERNATIVE "b __rmemcpy_generic", \ 33a275a82dSHuacai Chen "b __rmemcpy_fast", CPU_FEATURE_UAL 34*5aa4ac64SQing ZhangSYM_FUNC_END(__rmemcpy) 35*5aa4ac64SQing Zhang_ASM_NOKPROBE(__rmemcpy) 36a275a82dSHuacai Chen 37a275a82dSHuacai Chen/* 38a275a82dSHuacai Chen * void *__rmemcpy_generic(void *dst, const void *src, size_t n) 39a275a82dSHuacai Chen * 40a275a82dSHuacai Chen * a0: dst 41a275a82dSHuacai Chen * a1: src 42a275a82dSHuacai Chen * a2: n 43a275a82dSHuacai Chen */ 44a275a82dSHuacai ChenSYM_FUNC_START(__rmemcpy_generic) 45a275a82dSHuacai Chen move a3, a0 46a275a82dSHuacai Chen beqz a2, 2f 47a275a82dSHuacai Chen 48a275a82dSHuacai Chen add.d a0, a0, a2 49a275a82dSHuacai Chen add.d a1, a1, a2 50a275a82dSHuacai Chen 51a275a82dSHuacai Chen1: ld.b t0, a1, -1 52a275a82dSHuacai Chen st.b t0, a0, -1 53a275a82dSHuacai Chen addi.d a0, a0, -1 54a275a82dSHuacai Chen addi.d a1, a1, -1 55a275a82dSHuacai Chen addi.d a2, a2, -1 56a275a82dSHuacai Chen bgt a2, zero, 1b 57a275a82dSHuacai Chen 58a275a82dSHuacai Chen2: move a0, a3 59a275a82dSHuacai Chen jr ra 60a275a82dSHuacai ChenSYM_FUNC_END(__rmemcpy_generic) 61fcf77d01STiezhu Yang_ASM_NOKPROBE(__rmemcpy_generic) 62a275a82dSHuacai Chen 63a275a82dSHuacai Chen/* 64a275a82dSHuacai Chen * void *__rmemcpy_fast(void *dst, const void *src, size_t n) 65a275a82dSHuacai Chen * 66a275a82dSHuacai Chen * a0: dst 67a275a82dSHuacai Chen * a1: src 68a275a82dSHuacai Chen * a2: n 69a275a82dSHuacai Chen */ 70a275a82dSHuacai ChenSYM_FUNC_START(__rmemcpy_fast) 718941e93cSWANG Rui sltui t0, a2, 9 728941e93cSWANG Rui bnez t0, __memcpy_small 73a275a82dSHuacai Chen 748941e93cSWANG Rui add.d a3, a1, a2 758941e93cSWANG Rui add.d a2, a0, a2 768941e93cSWANG Rui ld.d a6, a1, 0 778941e93cSWANG Rui ld.d a7, a3, -8 78a275a82dSHuacai Chen 798941e93cSWANG Rui /* align up destination address */ 808941e93cSWANG Rui andi t1, a2, 7 818941e93cSWANG Rui sub.d a3, a3, t1 828941e93cSWANG Rui sub.d a5, a2, t1 838941e93cSWANG Rui 848941e93cSWANG Rui addi.d a4, a1, 64 858941e93cSWANG Rui bgeu a4, a3, .Llt64 86a275a82dSHuacai Chen 87a275a82dSHuacai Chen /* copy 64 bytes at a time */ 888941e93cSWANG Rui.Lloop64: 898941e93cSWANG Rui ld.d t0, a3, -8 908941e93cSWANG Rui ld.d t1, a3, -16 918941e93cSWANG Rui ld.d t2, a3, -24 928941e93cSWANG Rui ld.d t3, a3, -32 938941e93cSWANG Rui ld.d t4, a3, -40 948941e93cSWANG Rui ld.d t5, a3, -48 958941e93cSWANG Rui ld.d t6, a3, -56 968941e93cSWANG Rui ld.d t7, a3, -64 978941e93cSWANG Rui addi.d a3, a3, -64 988941e93cSWANG Rui st.d t0, a5, -8 998941e93cSWANG Rui st.d t1, a5, -16 1008941e93cSWANG Rui st.d t2, a5, -24 1018941e93cSWANG Rui st.d t3, a5, -32 1028941e93cSWANG Rui st.d t4, a5, -40 1038941e93cSWANG Rui st.d t5, a5, -48 1048941e93cSWANG Rui st.d t6, a5, -56 1058941e93cSWANG Rui st.d t7, a5, -64 1068941e93cSWANG Rui addi.d a5, a5, -64 1078941e93cSWANG Rui bltu a4, a3, .Lloop64 108a275a82dSHuacai Chen 109a275a82dSHuacai Chen /* copy the remaining bytes */ 1108941e93cSWANG Rui.Llt64: 1118941e93cSWANG Rui addi.d a4, a1, 32 1128941e93cSWANG Rui bgeu a4, a3, .Llt32 1138941e93cSWANG Rui ld.d t0, a3, -8 1148941e93cSWANG Rui ld.d t1, a3, -16 1158941e93cSWANG Rui ld.d t2, a3, -24 1168941e93cSWANG Rui ld.d t3, a3, -32 1178941e93cSWANG Rui addi.d a3, a3, -32 1188941e93cSWANG Rui st.d t0, a5, -8 1198941e93cSWANG Rui st.d t1, a5, -16 1208941e93cSWANG Rui st.d t2, a5, -24 1218941e93cSWANG Rui st.d t3, a5, -32 1228941e93cSWANG Rui addi.d a5, a5, -32 1238941e93cSWANG Rui 1248941e93cSWANG Rui.Llt32: 1258941e93cSWANG Rui addi.d a4, a1, 16 1268941e93cSWANG Rui bgeu a4, a3, .Llt16 1278941e93cSWANG Rui ld.d t0, a3, -8 1288941e93cSWANG Rui ld.d t1, a3, -16 1298941e93cSWANG Rui addi.d a3, a3, -16 1308941e93cSWANG Rui st.d t0, a5, -8 1318941e93cSWANG Rui st.d t1, a5, -16 1328941e93cSWANG Rui addi.d a5, a5, -16 1338941e93cSWANG Rui 1348941e93cSWANG Rui.Llt16: 1358941e93cSWANG Rui addi.d a4, a1, 8 1368941e93cSWANG Rui bgeu a4, a3, .Llt8 1378941e93cSWANG Rui ld.d t0, a3, -8 1388941e93cSWANG Rui st.d t0, a5, -8 1398941e93cSWANG Rui 1408941e93cSWANG Rui.Llt8: 1418941e93cSWANG Rui st.d a6, a0, 0 1428941e93cSWANG Rui st.d a7, a2, -8 143a275a82dSHuacai Chen 144a275a82dSHuacai Chen /* return */ 145a275a82dSHuacai Chen jr ra 146a275a82dSHuacai ChenSYM_FUNC_END(__rmemcpy_fast) 147fcf77d01STiezhu Yang_ASM_NOKPROBE(__rmemcpy_fast) 148