1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited 4 */ 5 6#include <asm/alternative-asm.h> 7#include <asm/asm.h> 8#include <asm/asmmacro.h> 9#include <asm/cpu.h> 10#include <asm/export.h> 11#include <asm/regdef.h> 12 13SYM_FUNC_START(memcpy) 14 /* 15 * Some CPUs support hardware unaligned access 16 */ 17 ALTERNATIVE "b __memcpy_generic", \ 18 "b __memcpy_fast", CPU_FEATURE_UAL 19SYM_FUNC_END(memcpy) 20_ASM_NOKPROBE(memcpy) 21 22EXPORT_SYMBOL(memcpy) 23 24/* 25 * void *__memcpy_generic(void *dst, const void *src, size_t n) 26 * 27 * a0: dst 28 * a1: src 29 * a2: n 30 */ 31SYM_FUNC_START(__memcpy_generic) 32 move a3, a0 33 beqz a2, 2f 34 351: ld.b t0, a1, 0 36 st.b t0, a0, 0 37 addi.d a0, a0, 1 38 addi.d a1, a1, 1 39 addi.d a2, a2, -1 40 bgt a2, zero, 1b 41 422: move a0, a3 43 jr ra 44SYM_FUNC_END(__memcpy_generic) 45_ASM_NOKPROBE(__memcpy_generic) 46 47 .align 5 48SYM_FUNC_START_NOALIGN(__memcpy_small) 49 pcaddi t0, 8 50 slli.d a2, a2, 5 51 add.d t0, t0, a2 52 jr t0 53 54 .align 5 550: jr ra 56 57 .align 5 581: ld.b t0, a1, 0 59 st.b t0, a0, 0 60 jr ra 61 62 .align 5 632: ld.h t0, a1, 0 64 st.h t0, a0, 0 65 jr ra 66 67 .align 5 683: ld.h t0, a1, 0 69 ld.b t1, a1, 2 70 st.h t0, a0, 0 71 st.b t1, a0, 2 72 jr ra 73 74 .align 5 754: ld.w t0, a1, 0 76 st.w t0, a0, 0 77 jr ra 78 79 .align 5 805: ld.w t0, a1, 0 81 ld.b t1, a1, 4 82 st.w t0, a0, 0 83 st.b t1, a0, 4 84 jr ra 85 86 .align 5 876: ld.w t0, a1, 0 88 ld.h t1, a1, 4 89 st.w t0, a0, 0 90 st.h t1, a0, 4 91 jr ra 92 93 .align 5 947: ld.w t0, a1, 0 95 ld.w t1, a1, 3 96 st.w t0, a0, 0 97 st.w t1, a0, 3 98 jr ra 99 100 .align 5 1018: ld.d t0, a1, 0 102 st.d t0, a0, 0 103 jr ra 104SYM_FUNC_END(__memcpy_small) 105_ASM_NOKPROBE(__memcpy_small) 106 107/* 108 * void *__memcpy_fast(void *dst, const void *src, size_t n) 109 * 110 * a0: dst 111 * a1: src 112 * a2: n 113 */ 114SYM_FUNC_START(__memcpy_fast) 115 sltui t0, a2, 9 116 bnez t0, __memcpy_small 117 118 add.d a3, a1, a2 119 add.d a2, a0, a2 120 ld.d a6, a1, 0 121 ld.d a7, a3, -8 122 123 /* align up destination address */ 124 andi t1, a0, 7 125 sub.d t0, zero, t1 126 addi.d t0, t0, 8 127 add.d a1, a1, t0 128 add.d a5, a0, t0 129 130 addi.d a4, a3, -64 131 bgeu a1, a4, .Llt64 132 133 /* copy 64 bytes at a time */ 134.Lloop64: 135 ld.d t0, a1, 0 136 ld.d t1, a1, 8 137 ld.d t2, a1, 16 138 ld.d t3, a1, 24 139 ld.d t4, a1, 32 140 ld.d t5, a1, 40 141 ld.d t6, a1, 48 142 ld.d t7, a1, 56 143 addi.d a1, a1, 64 144 st.d t0, a5, 0 145 st.d t1, a5, 8 146 st.d t2, a5, 16 147 st.d t3, a5, 24 148 st.d t4, a5, 32 149 st.d t5, a5, 40 150 st.d t6, a5, 48 151 st.d t7, a5, 56 152 addi.d a5, a5, 64 153 bltu a1, a4, .Lloop64 154 155 /* copy the remaining bytes */ 156.Llt64: 157 addi.d a4, a3, -32 158 bgeu a1, a4, .Llt32 159 ld.d t0, a1, 0 160 ld.d t1, a1, 8 161 ld.d t2, a1, 16 162 ld.d t3, a1, 24 163 addi.d a1, a1, 32 164 st.d t0, a5, 0 165 st.d t1, a5, 8 166 st.d t2, a5, 16 167 st.d t3, a5, 24 168 addi.d a5, a5, 32 169 170.Llt32: 171 addi.d a4, a3, -16 172 bgeu a1, a4, .Llt16 173 ld.d t0, a1, 0 174 ld.d t1, a1, 8 175 addi.d a1, a1, 16 176 st.d t0, a5, 0 177 st.d t1, a5, 8 178 addi.d a5, a5, 16 179 180.Llt16: 181 addi.d a4, a3, -8 182 bgeu a1, a4, .Llt8 183 ld.d t0, a1, 0 184 st.d t0, a5, 0 185 186.Llt8: 187 st.d a6, a0, 0 188 st.d a7, a2, -8 189 190 /* return */ 191 jr ra 192SYM_FUNC_END(__memcpy_fast) 193_ASM_NOKPROBE(__memcpy_fast) 194