1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited 4 */ 5 6#include <linux/export.h> 7#include <asm/alternative-asm.h> 8#include <asm/asm.h> 9#include <asm/asmmacro.h> 10#include <asm/cpu.h> 11#include <asm/regdef.h> 12#include <asm/unwind_hints.h> 13 14.section .noinstr.text, "ax" 15 16SYM_FUNC_START(memcpy) 17 /* 18 * Some CPUs support hardware unaligned access 19 */ 20 ALTERNATIVE "b __memcpy_generic", \ 21 "b __memcpy_fast", CPU_FEATURE_UAL 22SYM_FUNC_END(memcpy) 23SYM_FUNC_ALIAS(__memcpy, memcpy) 24 25EXPORT_SYMBOL(memcpy) 26EXPORT_SYMBOL(__memcpy) 27 28_ASM_NOKPROBE(memcpy) 29_ASM_NOKPROBE(__memcpy) 30 31/* 32 * void *__memcpy_generic(void *dst, const void *src, size_t n) 33 * 34 * a0: dst 35 * a1: src 36 * a2: n 37 */ 38SYM_FUNC_START(__memcpy_generic) 39 move a3, a0 40 beqz a2, 2f 41 421: ld.b t0, a1, 0 43 st.b t0, a0, 0 44 addi.d a0, a0, 1 45 addi.d a1, a1, 1 46 addi.d a2, a2, -1 47 bgt a2, zero, 1b 48 492: move a0, a3 50 jr ra 51SYM_FUNC_END(__memcpy_generic) 52_ASM_NOKPROBE(__memcpy_generic) 53 54 .align 5 55SYM_FUNC_START_NOALIGN(__memcpy_small) 56 pcaddi t0, 8 57 slli.d a2, a2, 5 58 add.d t0, t0, a2 59 jr t0 60 61 .align 5 620: jr ra 63 64 .align 5 651: ld.b t0, a1, 0 66 st.b t0, a0, 0 67 jr ra 68 69 .align 5 702: ld.h t0, a1, 0 71 st.h t0, a0, 0 72 jr ra 73 74 .align 5 753: ld.h t0, a1, 0 76 ld.b t1, a1, 2 77 st.h t0, a0, 0 78 st.b t1, a0, 2 79 jr ra 80 81 .align 5 824: ld.w t0, a1, 0 83 st.w t0, a0, 0 84 jr ra 85 86 .align 5 875: ld.w t0, a1, 0 88 ld.b t1, a1, 4 89 st.w t0, a0, 0 90 st.b t1, a0, 4 91 jr ra 92 93 .align 5 946: ld.w t0, a1, 0 95 ld.h t1, a1, 4 96 st.w t0, a0, 0 97 st.h t1, a0, 4 98 jr ra 99 100 .align 5 1017: ld.w t0, a1, 0 102 ld.w t1, a1, 3 103 st.w t0, a0, 0 104 st.w t1, a0, 3 105 jr ra 106 107 .align 5 1088: ld.d t0, a1, 0 109 st.d t0, a0, 0 110 jr ra 111SYM_FUNC_END(__memcpy_small) 112_ASM_NOKPROBE(__memcpy_small) 113 114/* 115 * void *__memcpy_fast(void *dst, const void *src, size_t n) 116 * 117 * a0: dst 118 * a1: src 119 * a2: n 120 */ 121SYM_FUNC_START(__memcpy_fast) 122 sltui t0, a2, 9 123 bnez t0, __memcpy_small 124 125 add.d a3, a1, a2 126 add.d a2, a0, a2 127 ld.d a6, a1, 0 128 ld.d a7, a3, -8 129 130 /* align up destination address */ 131 andi t1, a0, 7 132 sub.d t0, zero, t1 133 addi.d t0, t0, 8 134 add.d a1, a1, t0 135 add.d a5, a0, t0 136 137 addi.d a4, a3, -64 138 bgeu a1, a4, .Llt64 139 140 /* copy 64 bytes at a time */ 141.Lloop64: 142 ld.d t0, a1, 0 143 ld.d t1, a1, 8 144 ld.d t2, a1, 16 145 ld.d t3, a1, 24 146 ld.d t4, a1, 32 147 ld.d t5, a1, 40 148 ld.d t6, a1, 48 149 ld.d t7, a1, 56 150 addi.d a1, a1, 64 151 st.d t0, a5, 0 152 st.d t1, a5, 8 153 st.d t2, a5, 16 154 st.d t3, a5, 24 155 st.d t4, a5, 32 156 st.d t5, a5, 40 157 st.d t6, a5, 48 158 st.d t7, a5, 56 159 addi.d a5, a5, 64 160 bltu a1, a4, .Lloop64 161 162 /* copy the remaining bytes */ 163.Llt64: 164 addi.d a4, a3, -32 165 bgeu a1, a4, .Llt32 166 ld.d t0, a1, 0 167 ld.d t1, a1, 8 168 ld.d t2, a1, 16 169 ld.d t3, a1, 24 170 addi.d a1, a1, 32 171 st.d t0, a5, 0 172 st.d t1, a5, 8 173 st.d t2, a5, 16 174 st.d t3, a5, 24 175 addi.d a5, a5, 32 176 177.Llt32: 178 addi.d a4, a3, -16 179 bgeu a1, a4, .Llt16 180 ld.d t0, a1, 0 181 ld.d t1, a1, 8 182 addi.d a1, a1, 16 183 st.d t0, a5, 0 184 st.d t1, a5, 8 185 addi.d a5, a5, 16 186 187.Llt16: 188 addi.d a4, a3, -8 189 bgeu a1, a4, .Llt8 190 ld.d t0, a1, 0 191 st.d t0, a5, 0 192 193.Llt8: 194 st.d a6, a0, 0 195 st.d a7, a2, -8 196 197 /* return */ 198 jr ra 199SYM_FUNC_END(__memcpy_fast) 200_ASM_NOKPROBE(__memcpy_fast) 201 202STACK_FRAME_NON_STANDARD __memcpy_small 203