1/*- 2 * Copyright (c) 2018 Instituto de Pesquisas Eldorado 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the author nor the names of its contributors may 14 * be used to endorse or promote products derived from this software 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 */ 29 30#include <machine/asm.h> 31__FBSDID("$FreeBSD$"); 32 33#define BLOCK_SIZE_BITS 6 34#define BLOCK_SIZE (1 << BLOCK_SIZE_BITS) 35#define BLOCK_SIZE_MASK (BLOCK_SIZE - 1) 36 37#define MULTI_PHASE_THRESHOLD 512 38 39#ifndef FN_NAME 40#ifdef MEMMOVE 41#define FN_NAME __memmove 42WEAK_REFERENCE(__memmove, memmove); 43#else 44#define FN_NAME __bcopy 45WEAK_REFERENCE(__bcopy, bcopy); 46#endif 47#endif 48 49/* 50 * r3: dst 51 * r4: src 52 * r5: len 53 */ 54 55ENTRY(FN_NAME) 56 cmpld %r3, %r4 /* src == dst? nothing to do */ 57 beqlr- 58 cmpdi %r5, 0 /* len == 0? nothing to do */ 59 beqlr- 60 61#ifdef MEMMOVE 62 std %r3, -8(%r1) /* save dst */ 63#else /* bcopy: swap src/dst */ 64 mr %r0, %r3 65 mr %r3, %r4 66 mr %r4, %r0 67#endif 68 69 cmpldi %r5, MULTI_PHASE_THRESHOLD 70 bge .Lmulti_phase 71 72 /* align src */ 73 cmpd %r4, %r3 /* forward or backward copy? */ 74 blt .Lbackward_align 75 76 .align 5 77.Lalign: 78 andi. %r0, %r4, 15 79 beq .Lsingle_copy 80 lbz %r0, 0(%r4) 81 addi %r4, %r4, 1 82 stb %r0, 0(%r3) 83 addi %r3, %r3, 1 84 addi %r5, %r5, -1 85 cmpdi %r5, 0 86 beq- .Ldone 87 b .Lalign 88 89.Lbackward_align: 90 /* advance src and dst to end (past last byte) */ 91 add %r3, %r3, %r5 92 add %r4, %r4, %r5 93 .align 5 94.Lbackward_align_loop: 95 andi. %r0, %r4, 15 96 beq .Lbackward_single_copy 97 lbzu %r0, -1(%r4) 98 addi %r5, %r5, -1 99 stbu %r0, -1(%r3) 100 cmpdi %r5, 0 101 beq- .Ldone 102 b .Lbackward_align_loop 103 104.Lsingle_copy: 105 /* forward copy */ 106 li %r0, 1 107 li %r8, 16 108 li %r9, 0 109 b .Lsingle_phase 110 111.Lbackward_single_copy: 112 /* backward copy */ 113 li %r0, -1 114 li %r8, -16 115 li %r9, -15 116 /* point src and dst to last byte */ 117 addi %r3, %r3, -1 118 addi %r4, %r4, -1 119 120.Lsingle_phase: 121 srdi. %r6, %r5, 4 /* number of 16-bytes */ 122 beq .Lsingle_1 123 124 /* pre-adjustment */ 125 add %r3, %r3, %r9 126 add %r4, %r4, %r9 127 128 mtctr %r6 129 .align 5 130.Lsingle_16_loop: 131 ld %r6, 0(%r4) 132 ld %r7, 8(%r4) 133 add %r4, %r4, %r8 134 std %r6, 0(%r3) 135 std %r7, 8(%r3) 136 add %r3, %r3, %r8 137 bdnz .Lsingle_16_loop 138 139 /* post-adjustment */ 140 sub %r3, %r3, %r9 141 sub %r4, %r4, %r9 142 143.Lsingle_1: 144 andi. %r6, %r5, 0x0f /* number of 1-bytes */ 145 beq .Ldone /* 1-bytes == 0? done */ 146 147 mtctr %r6 148 .align 5 149.Lsingle_1_loop: 150 lbz %r6, 0(%r4) 151 add %r4, %r4, %r0 /* increment */ 152 stb %r6, 0(%r3) 153 add %r3, %r3, %r0 /* increment */ 154 bdnz .Lsingle_1_loop 155 156.Ldone: 157#ifdef MEMMOVE 158 ld %r3, -8(%r1) /* restore dst */ 159#endif 160 blr 161 162 163.Lmulti_phase: 164 /* set up multi-phase copy parameters */ 165 166 /* r7 = bytes before the aligned section of the buffer */ 167 andi. %r6, %r4, 15 168 subfic %r7, %r6, 16 169 /* r8 = bytes in and after the aligned section of the buffer */ 170 sub %r8, %r5, %r7 171 /* r9 = bytes after the aligned section of the buffer */ 172 andi. %r9, %r8, BLOCK_SIZE_MASK 173 /* r10 = BLOCKS in the aligned section of the buffer */ 174 srdi %r10, %r8, BLOCK_SIZE_BITS 175 176 /* forward or backward copy? */ 177 cmpd %r4, %r3 178 blt .Lbackward_multi_copy 179 180 /* set up forward copy parameters */ 181 std %r7, -32(%r1) /* bytes to copy in phase 1 */ 182 std %r10, -40(%r1) /* BLOCKS to copy in phase 2 */ 183 std %r9, -48(%r1) /* bytes to copy in phase 3 */ 184 185 li %r0, 1 /* increment for phases 1 and 3 */ 186 li %r5, BLOCK_SIZE /* increment for phase 2 */ 187 188 /* op offsets for phase 2 */ 189 li %r7, 0 190 li %r8, 16 191 li %r9, 32 192 li %r10, 48 193 194 std %r8, -16(%r1) /* 16-byte increment (16) */ 195 std %r7, -24(%r1) /* 16-byte pre/post adjustment (0) */ 196 197 b .Lphase1 198 199.Lbackward_multi_copy: 200 /* set up backward copy parameters */ 201 std %r9, -32(%r1) /* bytes to copy in phase 1 */ 202 std %r10, -40(%r1) /* BLOCKS to copy in phase 2 */ 203 std %r7, -48(%r1) /* bytes to copy in phase 3 */ 204 205 li %r0, -1 /* increment for phases 1 and 3 */ 206 add %r6, %r5, %r0 /* r6 = len - 1 */ 207 li %r5, -BLOCK_SIZE /* increment for phase 2 */ 208 /* advance src and dst to the last position */ 209 add %r3, %r3, %r6 210 add %r4, %r4, %r6 211 212 /* op offsets for phase 2 */ 213 li %r7, -15 214 li %r8, -31 215 li %r9, -47 216 li %r10, -63 217 218 add %r6, %r7, %r0 /* r6 = -16 */ 219 std %r6, -16(%r1) /* 16-byte increment (-16) */ 220 std %r7, -24(%r1) /* 16-byte pre/post adjustment (-15) */ 221 222.Lphase1: 223 ld %r6, -32(%r1) /* bytes to copy in phase 1 */ 224 cmpldi %r6, 0 /* r6 == 0? skip phase 1 */ 225 beq+ .Lphase2 226 227 mtctr %r6 228 .align 5 229.Lphase1_loop: 230 lbz %r6, 0(%r4) 231 add %r4, %r4, %r0 /* phase 1 increment */ 232 stb %r6, 0(%r3) 233 add %r3, %r3, %r0 /* phase 1 increment */ 234 bdnz .Lphase1_loop 235 236.Lphase2: 237 ld %r6, -40(%r1) /* BLOCKS to copy in phase 2 */ 238 cmpldi %r6, 0 /* %r6 == 0? skip phase 2 */ 239 beq .Lphase3 240 241#ifdef FN_PHASE2 242FN_PHASE2 243#else 244 /* save registers */ 245 std %r14, -56(%r1) 246 std %r15, -64(%r1) 247 std %r16, -72(%r1) 248 std %r17, -80(%r1) 249 std %r18, -88(%r1) 250 std %r19, -96(%r1) 251 std %r20, -104(%r1) 252 std %r21, -112(%r1) 253 254 addi %r18, %r7, 8 255 addi %r19, %r8, 8 256 addi %r20, %r9, 8 257 addi %r21, %r10, 8 258 259 mtctr %r6 260 .align 5 261.Lphase2_loop: 262 ldx %r14, %r7, %r4 263 ldx %r15, %r18, %r4 264 ldx %r16, %r8, %r4 265 ldx %r17, %r19, %r4 266 stdx %r14, %r7, %r3 267 stdx %r15, %r18, %r3 268 stdx %r16, %r8, %r3 269 stdx %r17, %r19, %r3 270 271 ldx %r14, %r9, %r4 272 ldx %r15, %r20, %r4 273 ldx %r16, %r10, %r4 274 ldx %r17, %r21, %r4 275 stdx %r14, %r9, %r3 276 stdx %r15, %r20, %r3 277 stdx %r16, %r10, %r3 278 stdx %r17, %r21, %r3 279 280 add %r4, %r4, %r5 /* phase 2 increment */ 281 add %r3, %r3, %r5 /* phase 2 increment */ 282 283 bdnz .Lphase2_loop 284 285 /* restore registers */ 286 ld %r14, -56(%r1) 287 ld %r15, -64(%r1) 288 ld %r16, -72(%r1) 289 ld %r17, -80(%r1) 290 ld %r18, -88(%r1) 291 ld %r19, -96(%r1) 292 ld %r20, -104(%r1) 293 ld %r21, -112(%r1) 294#endif 295 296.Lphase3: 297 /* load registers for transitioning into the single-phase logic */ 298 ld %r5, -48(%r1) /* bytes to copy in phase 3 */ 299 ld %r8, -16(%r1) /* 16-byte increment */ 300 ld %r9, -24(%r1) /* 16-byte pre/post adjustment */ 301 b .Lsingle_phase 302 303END(FN_NAME) 304 305 .section .note.GNU-stack,"",%progbits 306 307