1e16c1865SLeandro Lupori/*- 2e16c1865SLeandro Lupori * Copyright (c) 2018 Instituto de Pesquisas Eldorado 3e16c1865SLeandro Lupori * All rights reserved. 4e16c1865SLeandro Lupori * 5e16c1865SLeandro Lupori * Redistribution and use in source and binary forms, with or without 6e16c1865SLeandro Lupori * modification, are permitted provided that the following conditions 7e16c1865SLeandro Lupori * are met: 8e16c1865SLeandro Lupori * 1. Redistributions of source code must retain the above copyright 9e16c1865SLeandro Lupori * notice, this list of conditions and the following disclaimer. 10e16c1865SLeandro Lupori * 2. Redistributions in binary form must reproduce the above copyright 11e16c1865SLeandro Lupori * notice, this list of conditions and the following disclaimer in the 12e16c1865SLeandro Lupori * documentation and/or other materials provided with the distribution. 13e16c1865SLeandro Lupori * 3. Neither the name of the author nor the names of its contributors may 14e16c1865SLeandro Lupori * be used to endorse or promote products derived from this software 15e16c1865SLeandro Lupori * 16e16c1865SLeandro Lupori * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17e16c1865SLeandro Lupori * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18e16c1865SLeandro Lupori * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19e16c1865SLeandro Lupori * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20e16c1865SLeandro Lupori * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21e16c1865SLeandro Lupori * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22e16c1865SLeandro Lupori * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23e16c1865SLeandro Lupori * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24e16c1865SLeandro Lupori * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25e16c1865SLeandro Lupori * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26e16c1865SLeandro Lupori * SUCH DAMAGE. 27e16c1865SLeandro Lupori * 28e16c1865SLeandro Lupori */ 29e16c1865SLeandro Lupori 30e16c1865SLeandro Lupori#include <machine/asm.h> 31e16c1865SLeandro Lupori#define BLOCK_SIZE_BITS 6 32e16c1865SLeandro Lupori#define BLOCK_SIZE (1 << BLOCK_SIZE_BITS) 33e16c1865SLeandro Lupori#define BLOCK_SIZE_MASK (BLOCK_SIZE - 1) 34e16c1865SLeandro Lupori 35*2f561284SLeandro Lupori/* Minimum 8 byte alignment, to avoid cache-inhibited alignment faults.*/ 36*2f561284SLeandro Lupori#ifndef ALIGN_MASK 37*2f561284SLeandro Lupori#define ALIGN_MASK 0x7 38*2f561284SLeandro Lupori#endif 39*2f561284SLeandro Lupori 40e16c1865SLeandro Lupori#define MULTI_PHASE_THRESHOLD 512 41e16c1865SLeandro Lupori 42e16c1865SLeandro Lupori#ifndef FN_NAME 43e16c1865SLeandro Lupori#ifdef MEMMOVE 44e16c1865SLeandro Lupori#define FN_NAME __memmove 45e16c1865SLeandro LuporiWEAK_REFERENCE(__memmove, memmove); 46e16c1865SLeandro Lupori#else 47e16c1865SLeandro Lupori#define FN_NAME __bcopy 48e16c1865SLeandro LuporiWEAK_REFERENCE(__bcopy, bcopy); 49e16c1865SLeandro Lupori#endif 50e16c1865SLeandro Lupori#endif 51e16c1865SLeandro Lupori 52e16c1865SLeandro Lupori/* 53e16c1865SLeandro Lupori * r3: dst 54e16c1865SLeandro Lupori * r4: src 55e16c1865SLeandro Lupori * r5: len 56e16c1865SLeandro Lupori */ 57e16c1865SLeandro Lupori 58e16c1865SLeandro LuporiENTRY(FN_NAME) 59e16c1865SLeandro Lupori cmpld %r3, %r4 /* src == dst? nothing to do */ 60e16c1865SLeandro Lupori beqlr- 61e16c1865SLeandro Lupori cmpdi %r5, 0 /* len == 0? nothing to do */ 62e16c1865SLeandro Lupori beqlr- 63e16c1865SLeandro Lupori 64e16c1865SLeandro Lupori#ifdef MEMMOVE 65e16c1865SLeandro Lupori std %r3, -8(%r1) /* save dst */ 66e16c1865SLeandro Lupori#else /* bcopy: swap src/dst */ 67e16c1865SLeandro Lupori mr %r0, %r3 68e16c1865SLeandro Lupori mr %r3, %r4 69e16c1865SLeandro Lupori mr %r4, %r0 70e16c1865SLeandro Lupori#endif 71e16c1865SLeandro Lupori 72*2f561284SLeandro Lupori /* First check for relative alignment, if unaligned copy one byte at a time */ 73*2f561284SLeandro Lupori andi. %r8, %r3, ALIGN_MASK 74*2f561284SLeandro Lupori andi. %r7, %r4, ALIGN_MASK 75*2f561284SLeandro Lupori cmpd %r7, %r8 76*2f561284SLeandro Lupori bne .Lunaligned 77*2f561284SLeandro Lupori 78*2f561284SLeandro Lupori 79e16c1865SLeandro Lupori cmpldi %r5, MULTI_PHASE_THRESHOLD 80e16c1865SLeandro Lupori bge .Lmulti_phase 81*2f561284SLeandro Lupori b .Lfast_copy 82e16c1865SLeandro Lupori 83*2f561284SLeandro Lupori.Lunaligned: 84*2f561284SLeandro Lupori /* forward or backward copy? */ 85*2f561284SLeandro Lupori cmpd %r4, %r3 86*2f561284SLeandro Lupori blt .Lbackward_unaligned 87*2f561284SLeandro Lupori 88*2f561284SLeandro Lupori /* Just need to setup increment and jump to copy */ 89*2f561284SLeandro Lupori li %r0, 1 90*2f561284SLeandro Lupori mtctr %r5 91*2f561284SLeandro Lupori b .Lsingle_1_loop 92*2f561284SLeandro Lupori 93*2f561284SLeandro Lupori.Lbackward_unaligned: 94*2f561284SLeandro Lupori /* advance src and dst to last byte, set decrement and jump to copy */ 95*2f561284SLeandro Lupori add %r3, %r3, %r5 96*2f561284SLeandro Lupori addi %r3, %r3, -1 97*2f561284SLeandro Lupori add %r4, %r4, %r5 98*2f561284SLeandro Lupori addi %r4, %r4, -1 99*2f561284SLeandro Lupori li %r0, -1 100*2f561284SLeandro Lupori mtctr %r5 101*2f561284SLeandro Lupori b .Lsingle_1_loop 102*2f561284SLeandro Lupori 103*2f561284SLeandro Lupori.Lfast_copy: 104e16c1865SLeandro Lupori /* align src */ 105e16c1865SLeandro Lupori cmpd %r4, %r3 /* forward or backward copy? */ 106e16c1865SLeandro Lupori blt .Lbackward_align 107e16c1865SLeandro Lupori 108e16c1865SLeandro Lupori .align 5 109e16c1865SLeandro Lupori.Lalign: 110e16c1865SLeandro Lupori andi. %r0, %r4, 15 111e16c1865SLeandro Lupori beq .Lsingle_copy 112e16c1865SLeandro Lupori lbz %r0, 0(%r4) 113e16c1865SLeandro Lupori addi %r4, %r4, 1 114e16c1865SLeandro Lupori stb %r0, 0(%r3) 115e16c1865SLeandro Lupori addi %r3, %r3, 1 116e16c1865SLeandro Lupori addi %r5, %r5, -1 117e16c1865SLeandro Lupori cmpdi %r5, 0 118e16c1865SLeandro Lupori beq- .Ldone 119e16c1865SLeandro Lupori b .Lalign 120e16c1865SLeandro Lupori 121e16c1865SLeandro Lupori.Lbackward_align: 122e16c1865SLeandro Lupori /* advance src and dst to end (past last byte) */ 123e16c1865SLeandro Lupori add %r3, %r3, %r5 124e16c1865SLeandro Lupori add %r4, %r4, %r5 125e16c1865SLeandro Lupori .align 5 126e16c1865SLeandro Lupori.Lbackward_align_loop: 127e16c1865SLeandro Lupori andi. %r0, %r4, 15 128e16c1865SLeandro Lupori beq .Lbackward_single_copy 129e16c1865SLeandro Lupori lbzu %r0, -1(%r4) 130e16c1865SLeandro Lupori addi %r5, %r5, -1 131e16c1865SLeandro Lupori stbu %r0, -1(%r3) 132e16c1865SLeandro Lupori cmpdi %r5, 0 133e16c1865SLeandro Lupori beq- .Ldone 134e16c1865SLeandro Lupori b .Lbackward_align_loop 135e16c1865SLeandro Lupori 136e16c1865SLeandro Lupori.Lsingle_copy: 137e16c1865SLeandro Lupori /* forward copy */ 138e16c1865SLeandro Lupori li %r0, 1 139e16c1865SLeandro Lupori li %r8, 16 140e16c1865SLeandro Lupori li %r9, 0 141e16c1865SLeandro Lupori b .Lsingle_phase 142e16c1865SLeandro Lupori 143e16c1865SLeandro Lupori.Lbackward_single_copy: 144e16c1865SLeandro Lupori /* backward copy */ 145e16c1865SLeandro Lupori li %r0, -1 146e16c1865SLeandro Lupori li %r8, -16 147e16c1865SLeandro Lupori li %r9, -15 148e16c1865SLeandro Lupori /* point src and dst to last byte */ 149e16c1865SLeandro Lupori addi %r3, %r3, -1 150e16c1865SLeandro Lupori addi %r4, %r4, -1 151e16c1865SLeandro Lupori 152e16c1865SLeandro Lupori.Lsingle_phase: 153e16c1865SLeandro Lupori srdi. %r6, %r5, 4 /* number of 16-bytes */ 154e16c1865SLeandro Lupori beq .Lsingle_1 155e16c1865SLeandro Lupori 156e16c1865SLeandro Lupori /* pre-adjustment */ 157e16c1865SLeandro Lupori add %r3, %r3, %r9 158e16c1865SLeandro Lupori add %r4, %r4, %r9 159e16c1865SLeandro Lupori 160e16c1865SLeandro Lupori mtctr %r6 161e16c1865SLeandro Lupori .align 5 162e16c1865SLeandro Lupori.Lsingle_16_loop: 163e16c1865SLeandro Lupori ld %r6, 0(%r4) 164e16c1865SLeandro Lupori ld %r7, 8(%r4) 165e16c1865SLeandro Lupori add %r4, %r4, %r8 166e16c1865SLeandro Lupori std %r6, 0(%r3) 167e16c1865SLeandro Lupori std %r7, 8(%r3) 168e16c1865SLeandro Lupori add %r3, %r3, %r8 169e16c1865SLeandro Lupori bdnz .Lsingle_16_loop 170e16c1865SLeandro Lupori 171e16c1865SLeandro Lupori /* post-adjustment */ 172e16c1865SLeandro Lupori sub %r3, %r3, %r9 173e16c1865SLeandro Lupori sub %r4, %r4, %r9 174e16c1865SLeandro Lupori 175e16c1865SLeandro Lupori.Lsingle_1: 176e16c1865SLeandro Lupori andi. %r6, %r5, 0x0f /* number of 1-bytes */ 177e16c1865SLeandro Lupori beq .Ldone /* 1-bytes == 0? done */ 178e16c1865SLeandro Lupori 179e16c1865SLeandro Lupori mtctr %r6 180e16c1865SLeandro Lupori .align 5 181e16c1865SLeandro Lupori.Lsingle_1_loop: 182e16c1865SLeandro Lupori lbz %r6, 0(%r4) 183e16c1865SLeandro Lupori add %r4, %r4, %r0 /* increment */ 184e16c1865SLeandro Lupori stb %r6, 0(%r3) 185e16c1865SLeandro Lupori add %r3, %r3, %r0 /* increment */ 186e16c1865SLeandro Lupori bdnz .Lsingle_1_loop 187e16c1865SLeandro Lupori 188e16c1865SLeandro Lupori.Ldone: 189e16c1865SLeandro Lupori#ifdef MEMMOVE 190e16c1865SLeandro Lupori ld %r3, -8(%r1) /* restore dst */ 191e16c1865SLeandro Lupori#endif 192e16c1865SLeandro Lupori blr 193e16c1865SLeandro Lupori 194e16c1865SLeandro Lupori 195e16c1865SLeandro Lupori.Lmulti_phase: 196e16c1865SLeandro Lupori /* set up multi-phase copy parameters */ 197e16c1865SLeandro Lupori 198e16c1865SLeandro Lupori /* r7 = bytes before the aligned section of the buffer */ 199e16c1865SLeandro Lupori andi. %r6, %r4, 15 200e16c1865SLeandro Lupori subfic %r7, %r6, 16 201e16c1865SLeandro Lupori /* r8 = bytes in and after the aligned section of the buffer */ 202e16c1865SLeandro Lupori sub %r8, %r5, %r7 203e16c1865SLeandro Lupori /* r9 = bytes after the aligned section of the buffer */ 204e16c1865SLeandro Lupori andi. %r9, %r8, BLOCK_SIZE_MASK 205e16c1865SLeandro Lupori /* r10 = BLOCKS in the aligned section of the buffer */ 206e16c1865SLeandro Lupori srdi %r10, %r8, BLOCK_SIZE_BITS 207e16c1865SLeandro Lupori 208e16c1865SLeandro Lupori /* forward or backward copy? */ 209e16c1865SLeandro Lupori cmpd %r4, %r3 210e16c1865SLeandro Lupori blt .Lbackward_multi_copy 211e16c1865SLeandro Lupori 212e16c1865SLeandro Lupori /* set up forward copy parameters */ 213e16c1865SLeandro Lupori std %r7, -32(%r1) /* bytes to copy in phase 1 */ 214e16c1865SLeandro Lupori std %r10, -40(%r1) /* BLOCKS to copy in phase 2 */ 215e16c1865SLeandro Lupori std %r9, -48(%r1) /* bytes to copy in phase 3 */ 216e16c1865SLeandro Lupori 217e16c1865SLeandro Lupori li %r0, 1 /* increment for phases 1 and 3 */ 218e16c1865SLeandro Lupori li %r5, BLOCK_SIZE /* increment for phase 2 */ 219e16c1865SLeandro Lupori 220e16c1865SLeandro Lupori /* op offsets for phase 2 */ 221e16c1865SLeandro Lupori li %r7, 0 222e16c1865SLeandro Lupori li %r8, 16 223e16c1865SLeandro Lupori li %r9, 32 224e16c1865SLeandro Lupori li %r10, 48 225e16c1865SLeandro Lupori 226e16c1865SLeandro Lupori std %r8, -16(%r1) /* 16-byte increment (16) */ 227e16c1865SLeandro Lupori std %r7, -24(%r1) /* 16-byte pre/post adjustment (0) */ 228e16c1865SLeandro Lupori 229e16c1865SLeandro Lupori b .Lphase1 230e16c1865SLeandro Lupori 231e16c1865SLeandro Lupori.Lbackward_multi_copy: 232e16c1865SLeandro Lupori /* set up backward copy parameters */ 233e16c1865SLeandro Lupori std %r9, -32(%r1) /* bytes to copy in phase 1 */ 234e16c1865SLeandro Lupori std %r10, -40(%r1) /* BLOCKS to copy in phase 2 */ 235e16c1865SLeandro Lupori std %r7, -48(%r1) /* bytes to copy in phase 3 */ 236e16c1865SLeandro Lupori 237e16c1865SLeandro Lupori li %r0, -1 /* increment for phases 1 and 3 */ 238e16c1865SLeandro Lupori add %r6, %r5, %r0 /* r6 = len - 1 */ 239e16c1865SLeandro Lupori li %r5, -BLOCK_SIZE /* increment for phase 2 */ 240e16c1865SLeandro Lupori /* advance src and dst to the last position */ 241e16c1865SLeandro Lupori add %r3, %r3, %r6 242e16c1865SLeandro Lupori add %r4, %r4, %r6 243e16c1865SLeandro Lupori 244e16c1865SLeandro Lupori /* op offsets for phase 2 */ 245e16c1865SLeandro Lupori li %r7, -15 246e16c1865SLeandro Lupori li %r8, -31 247e16c1865SLeandro Lupori li %r9, -47 248e16c1865SLeandro Lupori li %r10, -63 249e16c1865SLeandro Lupori 250e16c1865SLeandro Lupori add %r6, %r7, %r0 /* r6 = -16 */ 251e16c1865SLeandro Lupori std %r6, -16(%r1) /* 16-byte increment (-16) */ 252e16c1865SLeandro Lupori std %r7, -24(%r1) /* 16-byte pre/post adjustment (-15) */ 253e16c1865SLeandro Lupori 254e16c1865SLeandro Lupori.Lphase1: 255e16c1865SLeandro Lupori ld %r6, -32(%r1) /* bytes to copy in phase 1 */ 256e16c1865SLeandro Lupori cmpldi %r6, 0 /* r6 == 0? skip phase 1 */ 257e16c1865SLeandro Lupori beq+ .Lphase2 258e16c1865SLeandro Lupori 259e16c1865SLeandro Lupori mtctr %r6 260e16c1865SLeandro Lupori .align 5 261e16c1865SLeandro Lupori.Lphase1_loop: 262e16c1865SLeandro Lupori lbz %r6, 0(%r4) 263e16c1865SLeandro Lupori add %r4, %r4, %r0 /* phase 1 increment */ 264e16c1865SLeandro Lupori stb %r6, 0(%r3) 265e16c1865SLeandro Lupori add %r3, %r3, %r0 /* phase 1 increment */ 266e16c1865SLeandro Lupori bdnz .Lphase1_loop 267e16c1865SLeandro Lupori 268e16c1865SLeandro Lupori.Lphase2: 269e16c1865SLeandro Lupori ld %r6, -40(%r1) /* BLOCKS to copy in phase 2 */ 270e16c1865SLeandro Lupori cmpldi %r6, 0 /* %r6 == 0? skip phase 2 */ 271e16c1865SLeandro Lupori beq .Lphase3 272e16c1865SLeandro Lupori 273e16c1865SLeandro Lupori#ifdef FN_PHASE2 274e16c1865SLeandro LuporiFN_PHASE2 275e16c1865SLeandro Lupori#else 276e16c1865SLeandro Lupori /* save registers */ 277e16c1865SLeandro Lupori std %r14, -56(%r1) 278e16c1865SLeandro Lupori std %r15, -64(%r1) 279e16c1865SLeandro Lupori std %r16, -72(%r1) 280e16c1865SLeandro Lupori std %r17, -80(%r1) 281e16c1865SLeandro Lupori std %r18, -88(%r1) 282e16c1865SLeandro Lupori std %r19, -96(%r1) 283e16c1865SLeandro Lupori std %r20, -104(%r1) 284e16c1865SLeandro Lupori std %r21, -112(%r1) 285e16c1865SLeandro Lupori 286e16c1865SLeandro Lupori addi %r18, %r7, 8 287e16c1865SLeandro Lupori addi %r19, %r8, 8 288e16c1865SLeandro Lupori addi %r20, %r9, 8 289e16c1865SLeandro Lupori addi %r21, %r10, 8 290e16c1865SLeandro Lupori 291e16c1865SLeandro Lupori mtctr %r6 292e16c1865SLeandro Lupori .align 5 293e16c1865SLeandro Lupori.Lphase2_loop: 294e16c1865SLeandro Lupori ldx %r14, %r7, %r4 295e16c1865SLeandro Lupori ldx %r15, %r18, %r4 296e16c1865SLeandro Lupori ldx %r16, %r8, %r4 297e16c1865SLeandro Lupori ldx %r17, %r19, %r4 298e16c1865SLeandro Lupori stdx %r14, %r7, %r3 299e16c1865SLeandro Lupori stdx %r15, %r18, %r3 300e16c1865SLeandro Lupori stdx %r16, %r8, %r3 301e16c1865SLeandro Lupori stdx %r17, %r19, %r3 302e16c1865SLeandro Lupori 303e16c1865SLeandro Lupori ldx %r14, %r9, %r4 304e16c1865SLeandro Lupori ldx %r15, %r20, %r4 305e16c1865SLeandro Lupori ldx %r16, %r10, %r4 306e16c1865SLeandro Lupori ldx %r17, %r21, %r4 307e16c1865SLeandro Lupori stdx %r14, %r9, %r3 308e16c1865SLeandro Lupori stdx %r15, %r20, %r3 309e16c1865SLeandro Lupori stdx %r16, %r10, %r3 310e16c1865SLeandro Lupori stdx %r17, %r21, %r3 311e16c1865SLeandro Lupori 312e16c1865SLeandro Lupori add %r4, %r4, %r5 /* phase 2 increment */ 313e16c1865SLeandro Lupori add %r3, %r3, %r5 /* phase 2 increment */ 314e16c1865SLeandro Lupori 315e16c1865SLeandro Lupori bdnz .Lphase2_loop 316e16c1865SLeandro Lupori 317e16c1865SLeandro Lupori /* restore registers */ 318e16c1865SLeandro Lupori ld %r14, -56(%r1) 319e16c1865SLeandro Lupori ld %r15, -64(%r1) 320e16c1865SLeandro Lupori ld %r16, -72(%r1) 321e16c1865SLeandro Lupori ld %r17, -80(%r1) 322e16c1865SLeandro Lupori ld %r18, -88(%r1) 323e16c1865SLeandro Lupori ld %r19, -96(%r1) 324e16c1865SLeandro Lupori ld %r20, -104(%r1) 325e16c1865SLeandro Lupori ld %r21, -112(%r1) 326e16c1865SLeandro Lupori#endif 327e16c1865SLeandro Lupori 328e16c1865SLeandro Lupori.Lphase3: 329e16c1865SLeandro Lupori /* load registers for transitioning into the single-phase logic */ 330e16c1865SLeandro Lupori ld %r5, -48(%r1) /* bytes to copy in phase 3 */ 331e16c1865SLeandro Lupori ld %r8, -16(%r1) /* 16-byte increment */ 332e16c1865SLeandro Lupori ld %r9, -24(%r1) /* 16-byte pre/post adjustment */ 333e16c1865SLeandro Lupori b .Lsingle_phase 334e16c1865SLeandro Lupori 335e16c1865SLeandro LuporiEND(FN_NAME) 336e16c1865SLeandro Lupori 337e16c1865SLeandro Lupori .section .note.GNU-stack,"",%progbits 338e16c1865SLeandro Lupori 339