1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org> 5 */ 6 7#include <machine/asm.h> 8 9/* 10 * a0 - void* dst 11 * a1 - const void* src 12 * a2 - size_t len 13 */ 14ENTRY(memcpy) 15 beqz a2, .Lreturn 16 17 /* diff = (dstv - srcv) & 0b111 */ 18 sub t0, a0, a1 19 andi t0, t0, 0b111 20 21 sltiu t1, a2, 8 22 23 /* we never change a0, because memcpy returns the original dst */ 24 mv a3, a0 25 26 /* len < 8 */ 27 bnez t1, .Lend 28 29 /* t1 = (-dst) & 0b111 */ 30 neg t1, a0 31 andi t1, t1, 0b111 32 33 sub a2, a2, t1 34 35 la t2, .Lduff_start 36 slli t3, t1, 3 37 sub t2, t2, t3 38 jr t2 39 lb t3, 6(a1) 40 sb t3, 6(a3) 41 lb t3, 5(a1) 42 sb t3, 5(a3) 43 lb t3, 4(a1) 44 sb t3, 4(a3) 45 lb t3, 3(a1) 46 sb t3, 3(a3) 47 lb t3, 2(a1) 48 sb t3, 2(a3) 49 lb t3, 1(a1) 50 sb t3, 1(a3) 51 lb t3, 0(a1) 52 sb t3, 0(a3) 53.Lduff_start: 54 55 add a1, a1, t1 56 add a3, a3, t1 57 58 beqz a2, .Lreturn 59 60 beqz t0, .Lmemcpy8 61 62 /* 63 * a4 - size_t right_shift 64 * a5 - size_t left_shift 65 * a6 - size_t whole (number of dword stores) 66 */ 67 68 /* right_shift = (src % 0b111) * 8; */ 69 andi a4, a1, 0b111 70 slli a4, a4, 3 71 72 /* left_shift = 64 - right_shift */ 73 neg a5, a4 74 75 /* whole = len / 8 */ 76 srli a6, a2, 3 77 78 /* len = len % 8 */ 79 andi a2, a2, 0b111 80 81 /* t0 - uint64_t* ptr */ 82 83 /* ptr = src & ~0b111 */ 84 andi t0, a1, ~0b111 85 86 /* src += whole * 8 */ 87 slli t1, a6, 3 88 add a1, a1, t1 89 90 /* 91 * t1 - uint64_t low 92 * t2 - uint64_t high 93 */ 94 95 /* low = *ptr++ */ 96 ld t1, (t0) 97 addi t0, t0, 8 98 99 /* low >>= right_shift */ 100 srl t1, t1, a4 101 102 beqz a6, .Llmain_skip 103.Llmain: 104 /* high = *ptr++ */ 105 ld t2, (t0) 106 addi t0, t0, 8 107 108 /* whole-- */ 109 addi a6, a6, -1 110 111 /* temp = (high << left_shift) | low */ 112 sll t3, t2, a5 113 or t3, t3, t1 114 115 /* low = high >> right_shift */ 116 srl t1, t2, a4 117 118 /* *dst++ = temp */ 119 sd t3, (a3) 120 addi a3, a3, 8 121 122 bnez a6, .Llmain 123 124.Llmain_skip: 125 126.Lend: 127 la t1, .Lduff_end 128 slli t2, a2, 3 129 sub t1, t1, t2 130 jr t1 131 lb t2, 6(a1) 132 sb t2, 6(a3) 133 lb t2, 5(a1) 134 sb t2, 5(a3) 135 lb t2, 4(a1) 136 sb t2, 4(a3) 137 lb t2, 3(a1) 138 sb t2, 3(a3) 139 lb t2, 2(a1) 140 sb t2, 2(a3) 141 lb t2, 1(a1) 142 sb t2, 1(a3) 143 lb t2, 0(a1) 144 sb t2, 0(a3) 145.Lduff_end: 146 147.Lreturn: 148 ret 149 150/* exectued when dst - src is multiple of 8 151 * a0 - void* dst 152 * a1 - const void* src 153 * a2 - size_t len 154 */ 155.Lmemcpy8: 156 157 beqz a2, .Lreturn 158 159 slti t0, a2, 128 160 bnez t0, .Llmain8_64_skip 161 162 /* a4 - uint64_t* end_unroll */ 163 164 /* end_unroll = dst + len / 64 * 64 */ 165 andi t0, a2, ~0b111111 166 add a4, a3, t0 167 168 /* len = len % 64 */ 169 andi a2, a2, 0b111111 170 171.Llmain8_64: 172 ld t0, 0(a1) 173 ld t1, 8(a1) 174 ld t2, 16(a1) 175 ld t3, 24(a1) 176 sd t0, 0(a3) 177 sd t1, 8(a3) 178 sd t2, 16(a3) 179 sd t3, 24(a3) 180 ld t0, 32(a1) 181 ld t1, 40(a1) 182 ld t2, 48(a1) 183 ld t3, 56(a1) 184 sd t0, 32(a3) 185 sd t1, 40(a3) 186 sd t2, 48(a3) 187 sd t3, 56(a3) 188 addi a3, a3, 64 189 addi a1, a1, 64 190 bne a3, a4, .Llmain8_64 191.Llmain8_64_skip: 192 193 beqz a2, .Lreturn 194 195 /* a4 - uint64_t* end_align */ 196 197 /* end_align = (dst + len) & ~0b111 */ 198 add a4, a3, a2 199 andi a4, a4, ~0b111 200 201 /* len = len % 8 */ 202 andi a2, a2, 0b111 203 204 beq a3, a4, .Llmain8_skip 205.Llmain8: 206 ld t0, (a1) 207 sd t0, (a3) 208 addi a3, a3, 8 209 addi a1, a1, 8 210 bne a3, a4, .Llmain8 211.Llmain8_skip: 212 213 la t1, .Lduff_end 214 slli t2, a2, 3 215 sub t1, t1, t2 216 jr t1 217END(memcpy) 218