1*4d2ff233SAndrew Turner/* $NetBSD: memcpy_xscale.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */ 2*4d2ff233SAndrew Turner 3*4d2ff233SAndrew Turner/* 4*4d2ff233SAndrew Turner * Copyright 2003 Wasabi Systems, Inc. 5*4d2ff233SAndrew Turner * All rights reserved. 6*4d2ff233SAndrew Turner * 7*4d2ff233SAndrew Turner * Written by Steve C. Woodford for Wasabi Systems, Inc. 8*4d2ff233SAndrew Turner * 9*4d2ff233SAndrew Turner * Redistribution and use in source and binary forms, with or without 10*4d2ff233SAndrew Turner * modification, are permitted provided that the following conditions 11*4d2ff233SAndrew Turner * are met: 12*4d2ff233SAndrew Turner * 1. Redistributions of source code must retain the above copyright 13*4d2ff233SAndrew Turner * notice, this list of conditions and the following disclaimer. 14*4d2ff233SAndrew Turner * 2. Redistributions in binary form must reproduce the above copyright 15*4d2ff233SAndrew Turner * notice, this list of conditions and the following disclaimer in the 16*4d2ff233SAndrew Turner * documentation and/or other materials provided with the distribution. 17*4d2ff233SAndrew Turner * 3. All advertising materials mentioning features or use of this software 18*4d2ff233SAndrew Turner * must display the following acknowledgement: 19*4d2ff233SAndrew Turner * This product includes software developed for the NetBSD Project by 20*4d2ff233SAndrew Turner * Wasabi Systems, Inc. 21*4d2ff233SAndrew Turner * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22*4d2ff233SAndrew Turner * or promote products derived from this software without specific prior 23*4d2ff233SAndrew Turner * written permission. 24*4d2ff233SAndrew Turner * 25*4d2ff233SAndrew Turner * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26*4d2ff233SAndrew Turner * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27*4d2ff233SAndrew Turner * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28*4d2ff233SAndrew Turner * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29*4d2ff233SAndrew Turner * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30*4d2ff233SAndrew Turner * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31*4d2ff233SAndrew Turner * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32*4d2ff233SAndrew Turner * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33*4d2ff233SAndrew Turner * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34*4d2ff233SAndrew Turner * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35*4d2ff233SAndrew Turner * POSSIBILITY OF SUCH DAMAGE. 36*4d2ff233SAndrew Turner */ 372357939bSOlivier Houchard 382357939bSOlivier Houchard#include <machine/asm.h> 39*4d2ff233SAndrew Turner.syntax unified 40*4d2ff233SAndrew Turner 41*4d2ff233SAndrew Turner/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 42*4d2ff233SAndrew TurnerENTRY(memcpy) 43*4d2ff233SAndrew Turner pld [r1] 44*4d2ff233SAndrew Turner cmp r2, #0x0c 45*4d2ff233SAndrew Turner ble .Lmemcpy_short /* <= 12 bytes */ 46*4d2ff233SAndrew Turner mov r3, r0 /* We must not clobber r0 */ 47*4d2ff233SAndrew Turner 48*4d2ff233SAndrew Turner /* Word-align the destination buffer */ 49*4d2ff233SAndrew Turner ands ip, r3, #0x03 /* Already word aligned? */ 50*4d2ff233SAndrew Turner beq .Lmemcpy_wordaligned /* Yup */ 51*4d2ff233SAndrew Turner cmp ip, #0x02 52*4d2ff233SAndrew Turner ldrb ip, [r1], #0x01 53*4d2ff233SAndrew Turner sub r2, r2, #0x01 54*4d2ff233SAndrew Turner strb ip, [r3], #0x01 55*4d2ff233SAndrew Turner ldrble ip, [r1], #0x01 56*4d2ff233SAndrew Turner suble r2, r2, #0x01 57*4d2ff233SAndrew Turner strble ip, [r3], #0x01 58*4d2ff233SAndrew Turner ldrblt ip, [r1], #0x01 59*4d2ff233SAndrew Turner sublt r2, r2, #0x01 60*4d2ff233SAndrew Turner strblt ip, [r3], #0x01 61*4d2ff233SAndrew Turner 62*4d2ff233SAndrew Turner /* Destination buffer is now word aligned */ 63*4d2ff233SAndrew Turner.Lmemcpy_wordaligned: 64*4d2ff233SAndrew Turner ands ip, r1, #0x03 /* Is src also word-aligned? */ 65*4d2ff233SAndrew Turner bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 66*4d2ff233SAndrew Turner 67*4d2ff233SAndrew Turner /* Quad-align the destination buffer */ 68*4d2ff233SAndrew Turner tst r3, #0x07 /* Already quad aligned? */ 69*4d2ff233SAndrew Turner ldrne ip, [r1], #0x04 70*4d2ff233SAndrew Turner stmfd sp!, {r4-r9} /* Free up some registers */ 71*4d2ff233SAndrew Turner subne r2, r2, #0x04 72*4d2ff233SAndrew Turner strne ip, [r3], #0x04 73*4d2ff233SAndrew Turner 74*4d2ff233SAndrew Turner /* Destination buffer quad aligned, source is at least word aligned */ 75*4d2ff233SAndrew Turner subs r2, r2, #0x80 76*4d2ff233SAndrew Turner blt .Lmemcpy_w_lessthan128 77*4d2ff233SAndrew Turner 78*4d2ff233SAndrew Turner /* Copy 128 bytes at a time */ 79*4d2ff233SAndrew Turner.Lmemcpy_w_loop128: 80*4d2ff233SAndrew Turner ldr r4, [r1], #0x04 /* LD:00-03 */ 81*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 /* LD:04-07 */ 82*4d2ff233SAndrew Turner pld [r1, #0x18] /* Prefetch 0x20 */ 83*4d2ff233SAndrew Turner ldr r6, [r1], #0x04 /* LD:08-0b */ 84*4d2ff233SAndrew Turner ldr r7, [r1], #0x04 /* LD:0c-0f */ 85*4d2ff233SAndrew Turner ldr r8, [r1], #0x04 /* LD:10-13 */ 86*4d2ff233SAndrew Turner ldr r9, [r1], #0x04 /* LD:14-17 */ 87*4d2ff233SAndrew Turner strd r4, [r3], #0x08 /* ST:00-07 */ 88*4d2ff233SAndrew Turner ldr r4, [r1], #0x04 /* LD:18-1b */ 89*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 /* LD:1c-1f */ 90*4d2ff233SAndrew Turner strd r6, [r3], #0x08 /* ST:08-0f */ 91*4d2ff233SAndrew Turner ldr r6, [r1], #0x04 /* LD:20-23 */ 92*4d2ff233SAndrew Turner ldr r7, [r1], #0x04 /* LD:24-27 */ 93*4d2ff233SAndrew Turner pld [r1, #0x18] /* Prefetch 0x40 */ 94*4d2ff233SAndrew Turner strd r8, [r3], #0x08 /* ST:10-17 */ 95*4d2ff233SAndrew Turner ldr r8, [r1], #0x04 /* LD:28-2b */ 96*4d2ff233SAndrew Turner ldr r9, [r1], #0x04 /* LD:2c-2f */ 97*4d2ff233SAndrew Turner strd r4, [r3], #0x08 /* ST:18-1f */ 98*4d2ff233SAndrew Turner ldr r4, [r1], #0x04 /* LD:30-33 */ 99*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 /* LD:34-37 */ 100*4d2ff233SAndrew Turner strd r6, [r3], #0x08 /* ST:20-27 */ 101*4d2ff233SAndrew Turner ldr r6, [r1], #0x04 /* LD:38-3b */ 102*4d2ff233SAndrew Turner ldr r7, [r1], #0x04 /* LD:3c-3f */ 103*4d2ff233SAndrew Turner strd r8, [r3], #0x08 /* ST:28-2f */ 104*4d2ff233SAndrew Turner ldr r8, [r1], #0x04 /* LD:40-43 */ 105*4d2ff233SAndrew Turner ldr r9, [r1], #0x04 /* LD:44-47 */ 106*4d2ff233SAndrew Turner pld [r1, #0x18] /* Prefetch 0x60 */ 107*4d2ff233SAndrew Turner strd r4, [r3], #0x08 /* ST:30-37 */ 108*4d2ff233SAndrew Turner ldr r4, [r1], #0x04 /* LD:48-4b */ 109*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 /* LD:4c-4f */ 110*4d2ff233SAndrew Turner strd r6, [r3], #0x08 /* ST:38-3f */ 111*4d2ff233SAndrew Turner ldr r6, [r1], #0x04 /* LD:50-53 */ 112*4d2ff233SAndrew Turner ldr r7, [r1], #0x04 /* LD:54-57 */ 113*4d2ff233SAndrew Turner strd r8, [r3], #0x08 /* ST:40-47 */ 114*4d2ff233SAndrew Turner ldr r8, [r1], #0x04 /* LD:58-5b */ 115*4d2ff233SAndrew Turner ldr r9, [r1], #0x04 /* LD:5c-5f */ 116*4d2ff233SAndrew Turner strd r4, [r3], #0x08 /* ST:48-4f */ 117*4d2ff233SAndrew Turner ldr r4, [r1], #0x04 /* LD:60-63 */ 118*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 /* LD:64-67 */ 119*4d2ff233SAndrew Turner pld [r1, #0x18] /* Prefetch 0x80 */ 120*4d2ff233SAndrew Turner strd r6, [r3], #0x08 /* ST:50-57 */ 121*4d2ff233SAndrew Turner ldr r6, [r1], #0x04 /* LD:68-6b */ 122*4d2ff233SAndrew Turner ldr r7, [r1], #0x04 /* LD:6c-6f */ 123*4d2ff233SAndrew Turner strd r8, [r3], #0x08 /* ST:58-5f */ 124*4d2ff233SAndrew Turner ldr r8, [r1], #0x04 /* LD:70-73 */ 125*4d2ff233SAndrew Turner ldr r9, [r1], #0x04 /* LD:74-77 */ 126*4d2ff233SAndrew Turner strd r4, [r3], #0x08 /* ST:60-67 */ 127*4d2ff233SAndrew Turner ldr r4, [r1], #0x04 /* LD:78-7b */ 128*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 /* LD:7c-7f */ 129*4d2ff233SAndrew Turner strd r6, [r3], #0x08 /* ST:68-6f */ 130*4d2ff233SAndrew Turner strd r8, [r3], #0x08 /* ST:70-77 */ 131*4d2ff233SAndrew Turner subs r2, r2, #0x80 132*4d2ff233SAndrew Turner strd r4, [r3], #0x08 /* ST:78-7f */ 133*4d2ff233SAndrew Turner bge .Lmemcpy_w_loop128 134*4d2ff233SAndrew Turner 135*4d2ff233SAndrew Turner.Lmemcpy_w_lessthan128: 136*4d2ff233SAndrew Turner adds r2, r2, #0x80 /* Adjust for extra sub */ 137*4d2ff233SAndrew Turner ldmfdeq sp!, {r4-r9} 138*4d2ff233SAndrew Turner bxeq lr /* Return now if done */ 139*4d2ff233SAndrew Turner subs r2, r2, #0x20 140*4d2ff233SAndrew Turner blt .Lmemcpy_w_lessthan32 141*4d2ff233SAndrew Turner 142*4d2ff233SAndrew Turner /* Copy 32 bytes at a time */ 143*4d2ff233SAndrew Turner.Lmemcpy_w_loop32: 144*4d2ff233SAndrew Turner ldr r4, [r1], #0x04 145*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 146*4d2ff233SAndrew Turner pld [r1, #0x18] 147*4d2ff233SAndrew Turner ldr r6, [r1], #0x04 148*4d2ff233SAndrew Turner ldr r7, [r1], #0x04 149*4d2ff233SAndrew Turner ldr r8, [r1], #0x04 150*4d2ff233SAndrew Turner ldr r9, [r1], #0x04 151*4d2ff233SAndrew Turner strd r4, [r3], #0x08 152*4d2ff233SAndrew Turner ldr r4, [r1], #0x04 153*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 154*4d2ff233SAndrew Turner strd r6, [r3], #0x08 155*4d2ff233SAndrew Turner strd r8, [r3], #0x08 156*4d2ff233SAndrew Turner subs r2, r2, #0x20 157*4d2ff233SAndrew Turner strd r4, [r3], #0x08 158*4d2ff233SAndrew Turner bge .Lmemcpy_w_loop32 159*4d2ff233SAndrew Turner 160*4d2ff233SAndrew Turner.Lmemcpy_w_lessthan32: 161*4d2ff233SAndrew Turner adds r2, r2, #0x20 /* Adjust for extra sub */ 162*4d2ff233SAndrew Turner ldmfdeq sp!, {r4-r9} 163*4d2ff233SAndrew Turner bxeq lr /* Return now if done */ 164*4d2ff233SAndrew Turner 165*4d2ff233SAndrew Turner and r4, r2, #0x18 166*4d2ff233SAndrew Turner rsbs r4, r4, #0x18 167*4d2ff233SAndrew Turner addne pc, pc, r4, lsl #1 168*4d2ff233SAndrew Turner nop 169*4d2ff233SAndrew Turner 170*4d2ff233SAndrew Turner /* At least 24 bytes remaining */ 171*4d2ff233SAndrew Turner ldr r4, [r1], #0x04 172*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 173*4d2ff233SAndrew Turner sub r2, r2, #0x08 174*4d2ff233SAndrew Turner strd r4, [r3], #0x08 175*4d2ff233SAndrew Turner 176*4d2ff233SAndrew Turner /* At least 16 bytes remaining */ 177*4d2ff233SAndrew Turner ldr r4, [r1], #0x04 178*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 179*4d2ff233SAndrew Turner sub r2, r2, #0x08 180*4d2ff233SAndrew Turner strd r4, [r3], #0x08 181*4d2ff233SAndrew Turner 182*4d2ff233SAndrew Turner /* At least 8 bytes remaining */ 183*4d2ff233SAndrew Turner ldr r4, [r1], #0x04 184*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 185*4d2ff233SAndrew Turner subs r2, r2, #0x08 186*4d2ff233SAndrew Turner strd r4, [r3], #0x08 187*4d2ff233SAndrew Turner 188*4d2ff233SAndrew Turner /* Less than 8 bytes remaining */ 189*4d2ff233SAndrew Turner ldmfd sp!, {r4-r9} 190*4d2ff233SAndrew Turner bxeq lr /* Return now if done */ 191*4d2ff233SAndrew Turner subs r2, r2, #0x04 192*4d2ff233SAndrew Turner ldrge ip, [r1], #0x04 193*4d2ff233SAndrew Turner strge ip, [r3], #0x04 194*4d2ff233SAndrew Turner bxeq lr /* Return now if done */ 195*4d2ff233SAndrew Turner addlt r2, r2, #0x04 196*4d2ff233SAndrew Turner ldrb ip, [r1], #0x01 197*4d2ff233SAndrew Turner cmp r2, #0x02 198*4d2ff233SAndrew Turner ldrbge r2, [r1], #0x01 199*4d2ff233SAndrew Turner strb ip, [r3], #0x01 200*4d2ff233SAndrew Turner ldrbgt ip, [r1] 201*4d2ff233SAndrew Turner strbge r2, [r3], #0x01 202*4d2ff233SAndrew Turner strbgt ip, [r3] 203*4d2ff233SAndrew Turner bx lr 204*4d2ff233SAndrew Turner 205*4d2ff233SAndrew Turner 206*4d2ff233SAndrew Turner/* 207*4d2ff233SAndrew Turner * At this point, it has not been possible to word align both buffers. 208*4d2ff233SAndrew Turner * The destination buffer is word aligned, but the source buffer is not. 209*4d2ff233SAndrew Turner */ 210*4d2ff233SAndrew Turner.Lmemcpy_bad_align: 211*4d2ff233SAndrew Turner stmfd sp!, {r4-r7} 212*4d2ff233SAndrew Turner bic r1, r1, #0x03 213*4d2ff233SAndrew Turner cmp ip, #2 214*4d2ff233SAndrew Turner ldr ip, [r1], #0x04 215*4d2ff233SAndrew Turner bgt .Lmemcpy_bad3 216*4d2ff233SAndrew Turner beq .Lmemcpy_bad2 217*4d2ff233SAndrew Turner b .Lmemcpy_bad1 218*4d2ff233SAndrew Turner 219*4d2ff233SAndrew Turner.Lmemcpy_bad1_loop16: 220*4d2ff233SAndrew Turner mov r4, ip, lsr #8 221*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 222*4d2ff233SAndrew Turner pld [r1, #0x018] 223*4d2ff233SAndrew Turner ldr r6, [r1], #0x04 224*4d2ff233SAndrew Turner ldr r7, [r1], #0x04 225*4d2ff233SAndrew Turner ldr ip, [r1], #0x04 226*4d2ff233SAndrew Turner orr r4, r4, r5, lsl #24 227*4d2ff233SAndrew Turner mov r5, r5, lsr #8 228*4d2ff233SAndrew Turner orr r5, r5, r6, lsl #24 229*4d2ff233SAndrew Turner mov r6, r6, lsr #8 230*4d2ff233SAndrew Turner orr r6, r6, r7, lsl #24 231*4d2ff233SAndrew Turner mov r7, r7, lsr #8 232*4d2ff233SAndrew Turner orr r7, r7, ip, lsl #24 233*4d2ff233SAndrew Turner str r4, [r3], #0x04 234*4d2ff233SAndrew Turner str r5, [r3], #0x04 235*4d2ff233SAndrew Turner str r6, [r3], #0x04 236*4d2ff233SAndrew Turner str r7, [r3], #0x04 237*4d2ff233SAndrew Turner.Lmemcpy_bad1: 238*4d2ff233SAndrew Turner subs r2, r2, #0x10 239*4d2ff233SAndrew Turner bge .Lmemcpy_bad1_loop16 240*4d2ff233SAndrew Turner 241*4d2ff233SAndrew Turner adds r2, r2, #0x10 242*4d2ff233SAndrew Turner ldmfdeq sp!, {r4-r7} 243*4d2ff233SAndrew Turner bxeq lr /* Return now if done */ 244*4d2ff233SAndrew Turner subs r2, r2, #0x04 245*4d2ff233SAndrew Turner sublt r1, r1, #0x03 246*4d2ff233SAndrew Turner blt .Lmemcpy_bad_done 247*4d2ff233SAndrew Turner 248*4d2ff233SAndrew Turner.Lmemcpy_bad1_loop4: 249*4d2ff233SAndrew Turner mov r4, ip, lsr #8 250*4d2ff233SAndrew Turner ldr ip, [r1], #0x04 251*4d2ff233SAndrew Turner subs r2, r2, #0x04 252*4d2ff233SAndrew Turner orr r4, r4, ip, lsl #24 253*4d2ff233SAndrew Turner str r4, [r3], #0x04 254*4d2ff233SAndrew Turner bge .Lmemcpy_bad1_loop4 255*4d2ff233SAndrew Turner sub r1, r1, #0x03 256*4d2ff233SAndrew Turner b .Lmemcpy_bad_done 257*4d2ff233SAndrew Turner 258*4d2ff233SAndrew Turner.Lmemcpy_bad2_loop16: 259*4d2ff233SAndrew Turner mov r4, ip, lsr #16 260*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 261*4d2ff233SAndrew Turner pld [r1, #0x018] 262*4d2ff233SAndrew Turner ldr r6, [r1], #0x04 263*4d2ff233SAndrew Turner ldr r7, [r1], #0x04 264*4d2ff233SAndrew Turner ldr ip, [r1], #0x04 265*4d2ff233SAndrew Turner orr r4, r4, r5, lsl #16 266*4d2ff233SAndrew Turner mov r5, r5, lsr #16 267*4d2ff233SAndrew Turner orr r5, r5, r6, lsl #16 268*4d2ff233SAndrew Turner mov r6, r6, lsr #16 269*4d2ff233SAndrew Turner orr r6, r6, r7, lsl #16 270*4d2ff233SAndrew Turner mov r7, r7, lsr #16 271*4d2ff233SAndrew Turner orr r7, r7, ip, lsl #16 272*4d2ff233SAndrew Turner str r4, [r3], #0x04 273*4d2ff233SAndrew Turner str r5, [r3], #0x04 274*4d2ff233SAndrew Turner str r6, [r3], #0x04 275*4d2ff233SAndrew Turner str r7, [r3], #0x04 276*4d2ff233SAndrew Turner.Lmemcpy_bad2: 277*4d2ff233SAndrew Turner subs r2, r2, #0x10 278*4d2ff233SAndrew Turner bge .Lmemcpy_bad2_loop16 279*4d2ff233SAndrew Turner 280*4d2ff233SAndrew Turner adds r2, r2, #0x10 281*4d2ff233SAndrew Turner ldmfdeq sp!, {r4-r7} 282*4d2ff233SAndrew Turner bxeq lr /* Return now if done */ 283*4d2ff233SAndrew Turner subs r2, r2, #0x04 284*4d2ff233SAndrew Turner sublt r1, r1, #0x02 285*4d2ff233SAndrew Turner blt .Lmemcpy_bad_done 286*4d2ff233SAndrew Turner 287*4d2ff233SAndrew Turner.Lmemcpy_bad2_loop4: 288*4d2ff233SAndrew Turner mov r4, ip, lsr #16 289*4d2ff233SAndrew Turner ldr ip, [r1], #0x04 290*4d2ff233SAndrew Turner subs r2, r2, #0x04 291*4d2ff233SAndrew Turner orr r4, r4, ip, lsl #16 292*4d2ff233SAndrew Turner str r4, [r3], #0x04 293*4d2ff233SAndrew Turner bge .Lmemcpy_bad2_loop4 294*4d2ff233SAndrew Turner sub r1, r1, #0x02 295*4d2ff233SAndrew Turner b .Lmemcpy_bad_done 296*4d2ff233SAndrew Turner 297*4d2ff233SAndrew Turner.Lmemcpy_bad3_loop16: 298*4d2ff233SAndrew Turner mov r4, ip, lsr #24 299*4d2ff233SAndrew Turner ldr r5, [r1], #0x04 300*4d2ff233SAndrew Turner pld [r1, #0x018] 301*4d2ff233SAndrew Turner ldr r6, [r1], #0x04 302*4d2ff233SAndrew Turner ldr r7, [r1], #0x04 303*4d2ff233SAndrew Turner ldr ip, [r1], #0x04 304*4d2ff233SAndrew Turner orr r4, r4, r5, lsl #8 305*4d2ff233SAndrew Turner mov r5, r5, lsr #24 306*4d2ff233SAndrew Turner orr r5, r5, r6, lsl #8 307*4d2ff233SAndrew Turner mov r6, r6, lsr #24 308*4d2ff233SAndrew Turner orr r6, r6, r7, lsl #8 309*4d2ff233SAndrew Turner mov r7, r7, lsr #24 310*4d2ff233SAndrew Turner orr r7, r7, ip, lsl #8 311*4d2ff233SAndrew Turner str r4, [r3], #0x04 312*4d2ff233SAndrew Turner str r5, [r3], #0x04 313*4d2ff233SAndrew Turner str r6, [r3], #0x04 314*4d2ff233SAndrew Turner str r7, [r3], #0x04 315*4d2ff233SAndrew Turner.Lmemcpy_bad3: 316*4d2ff233SAndrew Turner subs r2, r2, #0x10 317*4d2ff233SAndrew Turner bge .Lmemcpy_bad3_loop16 318*4d2ff233SAndrew Turner 319*4d2ff233SAndrew Turner adds r2, r2, #0x10 320*4d2ff233SAndrew Turner ldmfdeq sp!, {r4-r7} 321*4d2ff233SAndrew Turner bxeq lr /* Return now if done */ 322*4d2ff233SAndrew Turner subs r2, r2, #0x04 323*4d2ff233SAndrew Turner sublt r1, r1, #0x01 324*4d2ff233SAndrew Turner blt .Lmemcpy_bad_done 325*4d2ff233SAndrew Turner 326*4d2ff233SAndrew Turner.Lmemcpy_bad3_loop4: 327*4d2ff233SAndrew Turner mov r4, ip, lsr #24 328*4d2ff233SAndrew Turner ldr ip, [r1], #0x04 329*4d2ff233SAndrew Turner subs r2, r2, #0x04 330*4d2ff233SAndrew Turner orr r4, r4, ip, lsl #8 331*4d2ff233SAndrew Turner str r4, [r3], #0x04 332*4d2ff233SAndrew Turner bge .Lmemcpy_bad3_loop4 333*4d2ff233SAndrew Turner sub r1, r1, #0x01 334*4d2ff233SAndrew Turner 335*4d2ff233SAndrew Turner.Lmemcpy_bad_done: 336*4d2ff233SAndrew Turner ldmfd sp!, {r4-r7} 337*4d2ff233SAndrew Turner adds r2, r2, #0x04 338*4d2ff233SAndrew Turner bxeq lr 339*4d2ff233SAndrew Turner ldrb ip, [r1], #0x01 340*4d2ff233SAndrew Turner cmp r2, #0x02 341*4d2ff233SAndrew Turner ldrbge r2, [r1], #0x01 342*4d2ff233SAndrew Turner strb ip, [r3], #0x01 343*4d2ff233SAndrew Turner ldrbgt ip, [r1] 344*4d2ff233SAndrew Turner strbge r2, [r3], #0x01 345*4d2ff233SAndrew Turner strbgt ip, [r3] 346*4d2ff233SAndrew Turner bx lr 347*4d2ff233SAndrew Turner 348*4d2ff233SAndrew Turner 349*4d2ff233SAndrew Turner/* 350*4d2ff233SAndrew Turner * Handle short copies (less than 16 bytes), possibly misaligned. 351*4d2ff233SAndrew Turner * Some of these are *very* common, thanks to the network stack, 352*4d2ff233SAndrew Turner * and so are handled specially. 353*4d2ff233SAndrew Turner */ 354*4d2ff233SAndrew Turner.Lmemcpy_short: 355*4d2ff233SAndrew Turner#ifndef _STANDALONE 356*4d2ff233SAndrew Turner add pc, pc, r2, lsl #2 357*4d2ff233SAndrew Turner nop 358*4d2ff233SAndrew Turner bx lr /* 0x00 */ 359*4d2ff233SAndrew Turner b .Lmemcpy_bytewise /* 0x01 */ 360*4d2ff233SAndrew Turner b .Lmemcpy_bytewise /* 0x02 */ 361*4d2ff233SAndrew Turner b .Lmemcpy_bytewise /* 0x03 */ 362*4d2ff233SAndrew Turner b .Lmemcpy_4 /* 0x04 */ 363*4d2ff233SAndrew Turner b .Lmemcpy_bytewise /* 0x05 */ 364*4d2ff233SAndrew Turner b .Lmemcpy_6 /* 0x06 */ 365*4d2ff233SAndrew Turner b .Lmemcpy_bytewise /* 0x07 */ 366*4d2ff233SAndrew Turner b .Lmemcpy_8 /* 0x08 */ 367*4d2ff233SAndrew Turner b .Lmemcpy_bytewise /* 0x09 */ 368*4d2ff233SAndrew Turner b .Lmemcpy_bytewise /* 0x0a */ 369*4d2ff233SAndrew Turner b .Lmemcpy_bytewise /* 0x0b */ 370*4d2ff233SAndrew Turner b .Lmemcpy_c /* 0x0c */ 371c81b12e0SWarner Losh#endif 372*4d2ff233SAndrew Turner.Lmemcpy_bytewise: 373*4d2ff233SAndrew Turner mov r3, r0 /* We must not clobber r0 */ 374*4d2ff233SAndrew Turner ldrb ip, [r1], #0x01 375*4d2ff233SAndrew Turner1: subs r2, r2, #0x01 376*4d2ff233SAndrew Turner strb ip, [r3], #0x01 377*4d2ff233SAndrew Turner ldrbne ip, [r1], #0x01 378*4d2ff233SAndrew Turner bne 1b 379*4d2ff233SAndrew Turner bx lr 380*4d2ff233SAndrew Turner 381*4d2ff233SAndrew Turner#ifndef _STANDALONE 382*4d2ff233SAndrew Turner/****************************************************************************** 383*4d2ff233SAndrew Turner * Special case for 4 byte copies 384*4d2ff233SAndrew Turner */ 385*4d2ff233SAndrew Turner#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 386*4d2ff233SAndrew Turner#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 387*4d2ff233SAndrew Turner LMEMCPY_4_PAD 388*4d2ff233SAndrew Turner.Lmemcpy_4: 389*4d2ff233SAndrew Turner and r2, r1, #0x03 390*4d2ff233SAndrew Turner orr r2, r2, r0, lsl #2 391*4d2ff233SAndrew Turner ands r2, r2, #0x0f 392*4d2ff233SAndrew Turner sub r3, pc, #0x14 393*4d2ff233SAndrew Turner addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 394*4d2ff233SAndrew Turner 395*4d2ff233SAndrew Turner/* 396*4d2ff233SAndrew Turner * 0000: dst is 32-bit aligned, src is 32-bit aligned 397*4d2ff233SAndrew Turner */ 398*4d2ff233SAndrew Turner ldr r2, [r1] 399*4d2ff233SAndrew Turner str r2, [r0] 400*4d2ff233SAndrew Turner bx lr 401*4d2ff233SAndrew Turner LMEMCPY_4_PAD 402*4d2ff233SAndrew Turner 403*4d2ff233SAndrew Turner/* 404*4d2ff233SAndrew Turner * 0001: dst is 32-bit aligned, src is 8-bit aligned 405*4d2ff233SAndrew Turner */ 406*4d2ff233SAndrew Turner ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 407*4d2ff233SAndrew Turner ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 408*4d2ff233SAndrew Turner mov r3, r3, lsr #8 /* r3 = .210 */ 409*4d2ff233SAndrew Turner orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 410*4d2ff233SAndrew Turner str r3, [r0] 411*4d2ff233SAndrew Turner bx lr 412*4d2ff233SAndrew Turner LMEMCPY_4_PAD 413*4d2ff233SAndrew Turner 414*4d2ff233SAndrew Turner/* 415*4d2ff233SAndrew Turner * 0010: dst is 32-bit aligned, src is 16-bit aligned 416*4d2ff233SAndrew Turner */ 417*4d2ff233SAndrew Turner ldrh r3, [r1, #0x02] 418*4d2ff233SAndrew Turner ldrh r2, [r1] 419*4d2ff233SAndrew Turner orr r3, r2, r3, lsl #16 420*4d2ff233SAndrew Turner str r3, [r0] 421*4d2ff233SAndrew Turner bx lr 422*4d2ff233SAndrew Turner LMEMCPY_4_PAD 423*4d2ff233SAndrew Turner 424*4d2ff233SAndrew Turner/* 425*4d2ff233SAndrew Turner * 0011: dst is 32-bit aligned, src is 8-bit aligned 426*4d2ff233SAndrew Turner */ 427*4d2ff233SAndrew Turner ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 428*4d2ff233SAndrew Turner ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 429*4d2ff233SAndrew Turner mov r3, r3, lsr #24 /* r3 = ...0 */ 430*4d2ff233SAndrew Turner orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 431*4d2ff233SAndrew Turner str r3, [r0] 432*4d2ff233SAndrew Turner bx lr 433*4d2ff233SAndrew Turner LMEMCPY_4_PAD 434*4d2ff233SAndrew Turner 435*4d2ff233SAndrew Turner/* 436*4d2ff233SAndrew Turner * 0100: dst is 8-bit aligned, src is 32-bit aligned 437*4d2ff233SAndrew Turner */ 438*4d2ff233SAndrew Turner ldr r2, [r1] 439*4d2ff233SAndrew Turner strb r2, [r0] 440*4d2ff233SAndrew Turner mov r3, r2, lsr #8 441*4d2ff233SAndrew Turner mov r1, r2, lsr #24 442*4d2ff233SAndrew Turner strb r1, [r0, #0x03] 443*4d2ff233SAndrew Turner strh r3, [r0, #0x01] 444*4d2ff233SAndrew Turner bx lr 445*4d2ff233SAndrew Turner LMEMCPY_4_PAD 446*4d2ff233SAndrew Turner 447*4d2ff233SAndrew Turner/* 448*4d2ff233SAndrew Turner * 0101: dst is 8-bit aligned, src is 8-bit aligned 449*4d2ff233SAndrew Turner */ 450*4d2ff233SAndrew Turner ldrb r2, [r1] 451*4d2ff233SAndrew Turner ldrh r3, [r1, #0x01] 452*4d2ff233SAndrew Turner ldrb r1, [r1, #0x03] 453*4d2ff233SAndrew Turner strb r2, [r0] 454*4d2ff233SAndrew Turner strh r3, [r0, #0x01] 455*4d2ff233SAndrew Turner strb r1, [r0, #0x03] 456*4d2ff233SAndrew Turner bx lr 457*4d2ff233SAndrew Turner LMEMCPY_4_PAD 458*4d2ff233SAndrew Turner 459*4d2ff233SAndrew Turner/* 460*4d2ff233SAndrew Turner * 0110: dst is 8-bit aligned, src is 16-bit aligned 461*4d2ff233SAndrew Turner */ 462*4d2ff233SAndrew Turner ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 463*4d2ff233SAndrew Turner ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 464*4d2ff233SAndrew Turner strb r2, [r0] 465*4d2ff233SAndrew Turner mov r2, r2, lsr #8 /* r2 = ...1 */ 466*4d2ff233SAndrew Turner orr r2, r2, r3, lsl #8 /* r2 = .321 */ 467*4d2ff233SAndrew Turner mov r3, r3, lsr #8 /* r3 = ...3 */ 468*4d2ff233SAndrew Turner strh r2, [r0, #0x01] 469*4d2ff233SAndrew Turner strb r3, [r0, #0x03] 470*4d2ff233SAndrew Turner bx lr 471*4d2ff233SAndrew Turner LMEMCPY_4_PAD 472*4d2ff233SAndrew Turner 473*4d2ff233SAndrew Turner/* 474*4d2ff233SAndrew Turner * 0111: dst is 8-bit aligned, src is 8-bit aligned 475*4d2ff233SAndrew Turner */ 476*4d2ff233SAndrew Turner ldrb r2, [r1] 477*4d2ff233SAndrew Turner ldrh r3, [r1, #0x01] 478*4d2ff233SAndrew Turner ldrb r1, [r1, #0x03] 479*4d2ff233SAndrew Turner strb r2, [r0] 480*4d2ff233SAndrew Turner strh r3, [r0, #0x01] 481*4d2ff233SAndrew Turner strb r1, [r0, #0x03] 482*4d2ff233SAndrew Turner bx lr 483*4d2ff233SAndrew Turner LMEMCPY_4_PAD 484*4d2ff233SAndrew Turner 485*4d2ff233SAndrew Turner/* 486*4d2ff233SAndrew Turner * 1000: dst is 16-bit aligned, src is 32-bit aligned 487*4d2ff233SAndrew Turner */ 488*4d2ff233SAndrew Turner ldr r2, [r1] 489*4d2ff233SAndrew Turner strh r2, [r0] 490*4d2ff233SAndrew Turner mov r3, r2, lsr #16 491*4d2ff233SAndrew Turner strh r3, [r0, #0x02] 492*4d2ff233SAndrew Turner bx lr 493*4d2ff233SAndrew Turner LMEMCPY_4_PAD 494*4d2ff233SAndrew Turner 495*4d2ff233SAndrew Turner/* 496*4d2ff233SAndrew Turner * 1001: dst is 16-bit aligned, src is 8-bit aligned 497*4d2ff233SAndrew Turner */ 498*4d2ff233SAndrew Turner ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 499*4d2ff233SAndrew Turner ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 500*4d2ff233SAndrew Turner mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 501*4d2ff233SAndrew Turner strh r1, [r0] 502*4d2ff233SAndrew Turner mov r2, r2, lsr #24 /* r2 = ...2 */ 503*4d2ff233SAndrew Turner orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 504*4d2ff233SAndrew Turner strh r2, [r0, #0x02] 505*4d2ff233SAndrew Turner bx lr 506*4d2ff233SAndrew Turner LMEMCPY_4_PAD 507*4d2ff233SAndrew Turner 508*4d2ff233SAndrew Turner/* 509*4d2ff233SAndrew Turner * 1010: dst is 16-bit aligned, src is 16-bit aligned 510*4d2ff233SAndrew Turner */ 511*4d2ff233SAndrew Turner ldrh r2, [r1] 512*4d2ff233SAndrew Turner ldrh r3, [r1, #0x02] 513*4d2ff233SAndrew Turner strh r2, [r0] 514*4d2ff233SAndrew Turner strh r3, [r0, #0x02] 515*4d2ff233SAndrew Turner bx lr 516*4d2ff233SAndrew Turner LMEMCPY_4_PAD 517*4d2ff233SAndrew Turner 518*4d2ff233SAndrew Turner/* 519*4d2ff233SAndrew Turner * 1011: dst is 16-bit aligned, src is 8-bit aligned 520*4d2ff233SAndrew Turner */ 521*4d2ff233SAndrew Turner ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 522*4d2ff233SAndrew Turner ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 523*4d2ff233SAndrew Turner mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 524*4d2ff233SAndrew Turner strh r1, [r0, #0x02] 525*4d2ff233SAndrew Turner mov r3, r3, lsl #8 /* r3 = 321. */ 526*4d2ff233SAndrew Turner orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 527*4d2ff233SAndrew Turner strh r3, [r0] 528*4d2ff233SAndrew Turner bx lr 529*4d2ff233SAndrew Turner LMEMCPY_4_PAD 530*4d2ff233SAndrew Turner 531*4d2ff233SAndrew Turner/* 532*4d2ff233SAndrew Turner * 1100: dst is 8-bit aligned, src is 32-bit aligned 533*4d2ff233SAndrew Turner */ 534*4d2ff233SAndrew Turner ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 535*4d2ff233SAndrew Turner strb r2, [r0] 536*4d2ff233SAndrew Turner mov r3, r2, lsr #8 537*4d2ff233SAndrew Turner mov r1, r2, lsr #24 538*4d2ff233SAndrew Turner strh r3, [r0, #0x01] 539*4d2ff233SAndrew Turner strb r1, [r0, #0x03] 540*4d2ff233SAndrew Turner bx lr 541*4d2ff233SAndrew Turner LMEMCPY_4_PAD 542*4d2ff233SAndrew Turner 543*4d2ff233SAndrew Turner/* 544*4d2ff233SAndrew Turner * 1101: dst is 8-bit aligned, src is 8-bit aligned 545*4d2ff233SAndrew Turner */ 546*4d2ff233SAndrew Turner ldrb r2, [r1] 547*4d2ff233SAndrew Turner ldrh r3, [r1, #0x01] 548*4d2ff233SAndrew Turner ldrb r1, [r1, #0x03] 549*4d2ff233SAndrew Turner strb r2, [r0] 550*4d2ff233SAndrew Turner strh r3, [r0, #0x01] 551*4d2ff233SAndrew Turner strb r1, [r0, #0x03] 552*4d2ff233SAndrew Turner bx lr 553*4d2ff233SAndrew Turner LMEMCPY_4_PAD 554*4d2ff233SAndrew Turner 555*4d2ff233SAndrew Turner/* 556*4d2ff233SAndrew Turner * 1110: dst is 8-bit aligned, src is 16-bit aligned 557*4d2ff233SAndrew Turner */ 558*4d2ff233SAndrew Turner ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 559*4d2ff233SAndrew Turner ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 560*4d2ff233SAndrew Turner strb r2, [r0] 561*4d2ff233SAndrew Turner mov r2, r2, lsr #8 /* r2 = ...1 */ 562*4d2ff233SAndrew Turner orr r2, r2, r3, lsl #8 /* r2 = .321 */ 563*4d2ff233SAndrew Turner strh r2, [r0, #0x01] 564*4d2ff233SAndrew Turner mov r3, r3, lsr #8 /* r3 = ...3 */ 565*4d2ff233SAndrew Turner strb r3, [r0, #0x03] 566*4d2ff233SAndrew Turner bx lr 567*4d2ff233SAndrew Turner LMEMCPY_4_PAD 568*4d2ff233SAndrew Turner 569*4d2ff233SAndrew Turner/* 570*4d2ff233SAndrew Turner * 1111: dst is 8-bit aligned, src is 8-bit aligned 571*4d2ff233SAndrew Turner */ 572*4d2ff233SAndrew Turner ldrb r2, [r1] 573*4d2ff233SAndrew Turner ldrh r3, [r1, #0x01] 574*4d2ff233SAndrew Turner ldrb r1, [r1, #0x03] 575*4d2ff233SAndrew Turner strb r2, [r0] 576*4d2ff233SAndrew Turner strh r3, [r0, #0x01] 577*4d2ff233SAndrew Turner strb r1, [r0, #0x03] 578*4d2ff233SAndrew Turner bx lr 579*4d2ff233SAndrew Turner LMEMCPY_4_PAD 580*4d2ff233SAndrew Turner 581*4d2ff233SAndrew Turner 582*4d2ff233SAndrew Turner/****************************************************************************** 583*4d2ff233SAndrew Turner * Special case for 6 byte copies 584*4d2ff233SAndrew Turner */ 585*4d2ff233SAndrew Turner#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 586*4d2ff233SAndrew Turner#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 587*4d2ff233SAndrew Turner LMEMCPY_6_PAD 588*4d2ff233SAndrew Turner.Lmemcpy_6: 589*4d2ff233SAndrew Turner and r2, r1, #0x03 590*4d2ff233SAndrew Turner orr r2, r2, r0, lsl #2 591*4d2ff233SAndrew Turner ands r2, r2, #0x0f 592*4d2ff233SAndrew Turner sub r3, pc, #0x14 593*4d2ff233SAndrew Turner addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 594*4d2ff233SAndrew Turner 595*4d2ff233SAndrew Turner/* 596*4d2ff233SAndrew Turner * 0000: dst is 32-bit aligned, src is 32-bit aligned 597*4d2ff233SAndrew Turner */ 598*4d2ff233SAndrew Turner ldr r2, [r1] 599*4d2ff233SAndrew Turner ldrh r3, [r1, #0x04] 600*4d2ff233SAndrew Turner str r2, [r0] 601*4d2ff233SAndrew Turner strh r3, [r0, #0x04] 602*4d2ff233SAndrew Turner bx lr 603*4d2ff233SAndrew Turner LMEMCPY_6_PAD 604*4d2ff233SAndrew Turner 605*4d2ff233SAndrew Turner/* 606*4d2ff233SAndrew Turner * 0001: dst is 32-bit aligned, src is 8-bit aligned 607*4d2ff233SAndrew Turner */ 608*4d2ff233SAndrew Turner ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 609*4d2ff233SAndrew Turner ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 610*4d2ff233SAndrew Turner mov r2, r2, lsr #8 /* r2 = .210 */ 611*4d2ff233SAndrew Turner orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 612*4d2ff233SAndrew Turner mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 613*4d2ff233SAndrew Turner str r2, [r0] 614*4d2ff233SAndrew Turner strh r3, [r0, #0x04] 615*4d2ff233SAndrew Turner bx lr 616*4d2ff233SAndrew Turner LMEMCPY_6_PAD 617*4d2ff233SAndrew Turner 618*4d2ff233SAndrew Turner/* 619*4d2ff233SAndrew Turner * 0010: dst is 32-bit aligned, src is 16-bit aligned 620*4d2ff233SAndrew Turner */ 621*4d2ff233SAndrew Turner ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 622*4d2ff233SAndrew Turner ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 623*4d2ff233SAndrew Turner mov r1, r3, lsr #16 /* r1 = ..54 */ 624*4d2ff233SAndrew Turner orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 625*4d2ff233SAndrew Turner str r2, [r0] 626*4d2ff233SAndrew Turner strh r1, [r0, #0x04] 627*4d2ff233SAndrew Turner bx lr 628*4d2ff233SAndrew Turner LMEMCPY_6_PAD 629*4d2ff233SAndrew Turner 630*4d2ff233SAndrew Turner/* 631*4d2ff233SAndrew Turner * 0011: dst is 32-bit aligned, src is 8-bit aligned 632*4d2ff233SAndrew Turner */ 633*4d2ff233SAndrew Turner ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 634*4d2ff233SAndrew Turner ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 635*4d2ff233SAndrew Turner ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 636*4d2ff233SAndrew Turner mov r2, r2, lsr #24 /* r2 = ...0 */ 637*4d2ff233SAndrew Turner orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 638*4d2ff233SAndrew Turner mov r1, r1, lsl #8 /* r1 = xx5. */ 639*4d2ff233SAndrew Turner orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 640*4d2ff233SAndrew Turner str r2, [r0] 641*4d2ff233SAndrew Turner strh r1, [r0, #0x04] 642*4d2ff233SAndrew Turner bx lr 643*4d2ff233SAndrew Turner LMEMCPY_6_PAD 644*4d2ff233SAndrew Turner 645*4d2ff233SAndrew Turner/* 646*4d2ff233SAndrew Turner * 0100: dst is 8-bit aligned, src is 32-bit aligned 647*4d2ff233SAndrew Turner */ 648*4d2ff233SAndrew Turner ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 649*4d2ff233SAndrew Turner ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 650*4d2ff233SAndrew Turner mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 651*4d2ff233SAndrew Turner strh r1, [r0, #0x01] 652*4d2ff233SAndrew Turner strb r3, [r0] 653*4d2ff233SAndrew Turner mov r3, r3, lsr #24 /* r3 = ...3 */ 654*4d2ff233SAndrew Turner orr r3, r3, r2, lsl #8 /* r3 = .543 */ 655*4d2ff233SAndrew Turner mov r2, r2, lsr #8 /* r2 = ...5 */ 656*4d2ff233SAndrew Turner strh r3, [r0, #0x03] 657*4d2ff233SAndrew Turner strb r2, [r0, #0x05] 658*4d2ff233SAndrew Turner bx lr 659*4d2ff233SAndrew Turner LMEMCPY_6_PAD 660*4d2ff233SAndrew Turner 661*4d2ff233SAndrew Turner/* 662*4d2ff233SAndrew Turner * 0101: dst is 8-bit aligned, src is 8-bit aligned 663*4d2ff233SAndrew Turner */ 664*4d2ff233SAndrew Turner ldrb r2, [r1] 665*4d2ff233SAndrew Turner ldrh r3, [r1, #0x01] 666*4d2ff233SAndrew Turner ldrh ip, [r1, #0x03] 667*4d2ff233SAndrew Turner ldrb r1, [r1, #0x05] 668*4d2ff233SAndrew Turner strb r2, [r0] 669*4d2ff233SAndrew Turner strh r3, [r0, #0x01] 670*4d2ff233SAndrew Turner strh ip, [r0, #0x03] 671*4d2ff233SAndrew Turner strb r1, [r0, #0x05] 672*4d2ff233SAndrew Turner bx lr 673*4d2ff233SAndrew Turner LMEMCPY_6_PAD 674*4d2ff233SAndrew Turner 675*4d2ff233SAndrew Turner/* 676*4d2ff233SAndrew Turner * 0110: dst is 8-bit aligned, src is 16-bit aligned 677*4d2ff233SAndrew Turner */ 678*4d2ff233SAndrew Turner ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 679*4d2ff233SAndrew Turner ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 680*4d2ff233SAndrew Turner strb r2, [r0] 681*4d2ff233SAndrew Turner mov r3, r1, lsr #24 682*4d2ff233SAndrew Turner strb r3, [r0, #0x05] 683*4d2ff233SAndrew Turner mov r3, r1, lsr #8 /* r3 = .543 */ 684*4d2ff233SAndrew Turner strh r3, [r0, #0x03] 685*4d2ff233SAndrew Turner mov r3, r2, lsr #8 /* r3 = ...1 */ 686*4d2ff233SAndrew Turner orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 687*4d2ff233SAndrew Turner strh r3, [r0, #0x01] 688*4d2ff233SAndrew Turner bx lr 689*4d2ff233SAndrew Turner LMEMCPY_6_PAD 690*4d2ff233SAndrew Turner 691*4d2ff233SAndrew Turner/* 692*4d2ff233SAndrew Turner * 0111: dst is 8-bit aligned, src is 8-bit aligned 693*4d2ff233SAndrew Turner */ 694*4d2ff233SAndrew Turner ldrb r2, [r1] 695*4d2ff233SAndrew Turner ldrh r3, [r1, #0x01] 696*4d2ff233SAndrew Turner ldrh ip, [r1, #0x03] 697*4d2ff233SAndrew Turner ldrb r1, [r1, #0x05] 698*4d2ff233SAndrew Turner strb r2, [r0] 699*4d2ff233SAndrew Turner strh r3, [r0, #0x01] 700*4d2ff233SAndrew Turner strh ip, [r0, #0x03] 701*4d2ff233SAndrew Turner strb r1, [r0, #0x05] 702*4d2ff233SAndrew Turner bx lr 703*4d2ff233SAndrew Turner LMEMCPY_6_PAD 704*4d2ff233SAndrew Turner 705*4d2ff233SAndrew Turner/* 706*4d2ff233SAndrew Turner * 1000: dst is 16-bit aligned, src is 32-bit aligned 707*4d2ff233SAndrew Turner */ 708*4d2ff233SAndrew Turner ldrh r2, [r1, #0x04] /* r2 = ..54 */ 709*4d2ff233SAndrew Turner ldr r3, [r1] /* r3 = 3210 */ 710*4d2ff233SAndrew Turner mov r2, r2, lsl #16 /* r2 = 54.. */ 711*4d2ff233SAndrew Turner orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 712*4d2ff233SAndrew Turner strh r3, [r0] 713*4d2ff233SAndrew Turner str r2, [r0, #0x02] 714*4d2ff233SAndrew Turner bx lr 715*4d2ff233SAndrew Turner LMEMCPY_6_PAD 716*4d2ff233SAndrew Turner 717*4d2ff233SAndrew Turner/* 718*4d2ff233SAndrew Turner * 1001: dst is 16-bit aligned, src is 8-bit aligned 719*4d2ff233SAndrew Turner */ 720*4d2ff233SAndrew Turner ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 721*4d2ff233SAndrew Turner ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 722*4d2ff233SAndrew Turner mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 723*4d2ff233SAndrew Turner mov r2, r2, lsl #8 /* r2 = 543. */ 724*4d2ff233SAndrew Turner orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 725*4d2ff233SAndrew Turner strh r1, [r0] 726*4d2ff233SAndrew Turner str r2, [r0, #0x02] 727*4d2ff233SAndrew Turner bx lr 728*4d2ff233SAndrew Turner LMEMCPY_6_PAD 729*4d2ff233SAndrew Turner 730*4d2ff233SAndrew Turner/* 731*4d2ff233SAndrew Turner * 1010: dst is 16-bit aligned, src is 16-bit aligned 732*4d2ff233SAndrew Turner */ 733*4d2ff233SAndrew Turner ldrh r2, [r1] 734*4d2ff233SAndrew Turner ldr r3, [r1, #0x02] 735*4d2ff233SAndrew Turner strh r2, [r0] 736*4d2ff233SAndrew Turner str r3, [r0, #0x02] 737*4d2ff233SAndrew Turner bx lr 738*4d2ff233SAndrew Turner LMEMCPY_6_PAD 739*4d2ff233SAndrew Turner 740*4d2ff233SAndrew Turner/* 741*4d2ff233SAndrew Turner * 1011: dst is 16-bit aligned, src is 8-bit aligned 742*4d2ff233SAndrew Turner */ 743*4d2ff233SAndrew Turner ldrb r3, [r1] /* r3 = ...0 */ 744*4d2ff233SAndrew Turner ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 745*4d2ff233SAndrew Turner ldrb r1, [r1, #0x05] /* r1 = ...5 */ 746*4d2ff233SAndrew Turner orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 747*4d2ff233SAndrew Turner mov r1, r1, lsl #24 /* r1 = 5... */ 748*4d2ff233SAndrew Turner orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 749*4d2ff233SAndrew Turner strh r3, [r0] 750*4d2ff233SAndrew Turner str r1, [r0, #0x02] 751*4d2ff233SAndrew Turner bx lr 752*4d2ff233SAndrew Turner LMEMCPY_6_PAD 753*4d2ff233SAndrew Turner 754*4d2ff233SAndrew Turner/* 755*4d2ff233SAndrew Turner * 1100: dst is 8-bit aligned, src is 32-bit aligned 756*4d2ff233SAndrew Turner */ 757*4d2ff233SAndrew Turner ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 758*4d2ff233SAndrew Turner ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 759*4d2ff233SAndrew Turner strb r2, [r0] 760*4d2ff233SAndrew Turner mov r2, r2, lsr #8 /* r2 = .321 */ 761*4d2ff233SAndrew Turner orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 762*4d2ff233SAndrew Turner mov r1, r1, lsr #8 /* r1 = ...5 */ 763*4d2ff233SAndrew Turner str r2, [r0, #0x01] 764*4d2ff233SAndrew Turner strb r1, [r0, #0x05] 765*4d2ff233SAndrew Turner bx lr 766*4d2ff233SAndrew Turner LMEMCPY_6_PAD 767*4d2ff233SAndrew Turner 768*4d2ff233SAndrew Turner/* 769*4d2ff233SAndrew Turner * 1101: dst is 8-bit aligned, src is 8-bit aligned 770*4d2ff233SAndrew Turner */ 771*4d2ff233SAndrew Turner ldrb r2, [r1] 772*4d2ff233SAndrew Turner ldrh r3, [r1, #0x01] 773*4d2ff233SAndrew Turner ldrh ip, [r1, #0x03] 774*4d2ff233SAndrew Turner ldrb r1, [r1, #0x05] 775*4d2ff233SAndrew Turner strb r2, [r0] 776*4d2ff233SAndrew Turner strh r3, [r0, #0x01] 777*4d2ff233SAndrew Turner strh ip, [r0, #0x03] 778*4d2ff233SAndrew Turner strb r1, [r0, #0x05] 779*4d2ff233SAndrew Turner bx lr 780*4d2ff233SAndrew Turner LMEMCPY_6_PAD 781*4d2ff233SAndrew Turner 782*4d2ff233SAndrew Turner/* 783*4d2ff233SAndrew Turner * 1110: dst is 8-bit aligned, src is 16-bit aligned 784*4d2ff233SAndrew Turner */ 785*4d2ff233SAndrew Turner ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 786*4d2ff233SAndrew Turner ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 787*4d2ff233SAndrew Turner strb r2, [r0] 788*4d2ff233SAndrew Turner mov r2, r2, lsr #8 /* r2 = ...1 */ 789*4d2ff233SAndrew Turner orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 790*4d2ff233SAndrew Turner mov r1, r1, lsr #24 /* r1 = ...5 */ 791*4d2ff233SAndrew Turner str r2, [r0, #0x01] 792*4d2ff233SAndrew Turner strb r1, [r0, #0x05] 793*4d2ff233SAndrew Turner bx lr 794*4d2ff233SAndrew Turner LMEMCPY_6_PAD 795*4d2ff233SAndrew Turner 796*4d2ff233SAndrew Turner/* 797*4d2ff233SAndrew Turner * 1111: dst is 8-bit aligned, src is 8-bit aligned 798*4d2ff233SAndrew Turner */ 799*4d2ff233SAndrew Turner ldrb r2, [r1] 800*4d2ff233SAndrew Turner ldr r3, [r1, #0x01] 801*4d2ff233SAndrew Turner ldrb r1, [r1, #0x05] 802*4d2ff233SAndrew Turner strb r2, [r0] 803*4d2ff233SAndrew Turner str r3, [r0, #0x01] 804*4d2ff233SAndrew Turner strb r1, [r0, #0x05] 805*4d2ff233SAndrew Turner bx lr 806*4d2ff233SAndrew Turner LMEMCPY_6_PAD 807*4d2ff233SAndrew Turner 808*4d2ff233SAndrew Turner 809*4d2ff233SAndrew Turner/****************************************************************************** 810*4d2ff233SAndrew Turner * Special case for 8 byte copies 811*4d2ff233SAndrew Turner */ 812*4d2ff233SAndrew Turner#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 813*4d2ff233SAndrew Turner#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 814*4d2ff233SAndrew Turner LMEMCPY_8_PAD 815*4d2ff233SAndrew Turner.Lmemcpy_8: 816*4d2ff233SAndrew Turner and r2, r1, #0x03 817*4d2ff233SAndrew Turner orr r2, r2, r0, lsl #2 818*4d2ff233SAndrew Turner ands r2, r2, #0x0f 819*4d2ff233SAndrew Turner sub r3, pc, #0x14 820*4d2ff233SAndrew Turner addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 821*4d2ff233SAndrew Turner 822*4d2ff233SAndrew Turner/* 823*4d2ff233SAndrew Turner * 0000: dst is 32-bit aligned, src is 32-bit aligned 824*4d2ff233SAndrew Turner */ 825*4d2ff233SAndrew Turner ldr r2, [r1] 826*4d2ff233SAndrew Turner ldr r3, [r1, #0x04] 827*4d2ff233SAndrew Turner str r2, [r0] 828*4d2ff233SAndrew Turner str r3, [r0, #0x04] 829*4d2ff233SAndrew Turner bx lr 830*4d2ff233SAndrew Turner LMEMCPY_8_PAD 831*4d2ff233SAndrew Turner 832*4d2ff233SAndrew Turner/* 833*4d2ff233SAndrew Turner * 0001: dst is 32-bit aligned, src is 8-bit aligned 834*4d2ff233SAndrew Turner */ 835*4d2ff233SAndrew Turner ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 836*4d2ff233SAndrew Turner ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 837*4d2ff233SAndrew Turner ldrb r1, [r1, #0x07] /* r1 = ...7 */ 838*4d2ff233SAndrew Turner mov r3, r3, lsr #8 /* r3 = .210 */ 839*4d2ff233SAndrew Turner orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 840*4d2ff233SAndrew Turner mov r1, r1, lsl #24 /* r1 = 7... */ 841*4d2ff233SAndrew Turner orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 842*4d2ff233SAndrew Turner str r3, [r0] 843*4d2ff233SAndrew Turner str r2, [r0, #0x04] 844*4d2ff233SAndrew Turner bx lr 845*4d2ff233SAndrew Turner LMEMCPY_8_PAD 846*4d2ff233SAndrew Turner 847*4d2ff233SAndrew Turner/* 848*4d2ff233SAndrew Turner * 0010: dst is 32-bit aligned, src is 16-bit aligned 849*4d2ff233SAndrew Turner */ 850*4d2ff233SAndrew Turner ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 851*4d2ff233SAndrew Turner ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 852*4d2ff233SAndrew Turner ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 853*4d2ff233SAndrew Turner orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 854*4d2ff233SAndrew Turner mov r3, r3, lsr #16 /* r3 = ..54 */ 855*4d2ff233SAndrew Turner orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 856*4d2ff233SAndrew Turner str r2, [r0] 857*4d2ff233SAndrew Turner str r3, [r0, #0x04] 858*4d2ff233SAndrew Turner bx lr 859*4d2ff233SAndrew Turner LMEMCPY_8_PAD 860*4d2ff233SAndrew Turner 861*4d2ff233SAndrew Turner/* 862*4d2ff233SAndrew Turner * 0011: dst is 32-bit aligned, src is 8-bit aligned 863*4d2ff233SAndrew Turner */ 864*4d2ff233SAndrew Turner ldrb r3, [r1] /* r3 = ...0 */ 865*4d2ff233SAndrew Turner ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 866*4d2ff233SAndrew Turner ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 867*4d2ff233SAndrew Turner orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 868*4d2ff233SAndrew Turner mov r2, r2, lsr #24 /* r2 = ...4 */ 869*4d2ff233SAndrew Turner orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 870*4d2ff233SAndrew Turner str r3, [r0] 871*4d2ff233SAndrew Turner str r2, [r0, #0x04] 872*4d2ff233SAndrew Turner bx lr 873*4d2ff233SAndrew Turner LMEMCPY_8_PAD 874*4d2ff233SAndrew Turner 875*4d2ff233SAndrew Turner/* 876*4d2ff233SAndrew Turner * 0100: dst is 8-bit aligned, src is 32-bit aligned 877*4d2ff233SAndrew Turner */ 878*4d2ff233SAndrew Turner ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 879*4d2ff233SAndrew Turner ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 880*4d2ff233SAndrew Turner strb r3, [r0] 881*4d2ff233SAndrew Turner mov r1, r2, lsr #24 /* r1 = ...7 */ 882*4d2ff233SAndrew Turner strb r1, [r0, #0x07] 883*4d2ff233SAndrew Turner mov r1, r3, lsr #8 /* r1 = .321 */ 884*4d2ff233SAndrew Turner mov r3, r3, lsr #24 /* r3 = ...3 */ 885*4d2ff233SAndrew Turner orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 886*4d2ff233SAndrew Turner strh r1, [r0, #0x01] 887*4d2ff233SAndrew Turner str r3, [r0, #0x03] 888*4d2ff233SAndrew Turner bx lr 889*4d2ff233SAndrew Turner LMEMCPY_8_PAD 890*4d2ff233SAndrew Turner 891*4d2ff233SAndrew Turner/* 892*4d2ff233SAndrew Turner * 0101: dst is 8-bit aligned, src is 8-bit aligned 893*4d2ff233SAndrew Turner */ 894*4d2ff233SAndrew Turner ldrb r2, [r1] 895*4d2ff233SAndrew Turner ldrh r3, [r1, #0x01] 896*4d2ff233SAndrew Turner ldr ip, [r1, #0x03] 897*4d2ff233SAndrew Turner ldrb r1, [r1, #0x07] 898*4d2ff233SAndrew Turner strb r2, [r0] 899*4d2ff233SAndrew Turner strh r3, [r0, #0x01] 900*4d2ff233SAndrew Turner str ip, [r0, #0x03] 901*4d2ff233SAndrew Turner strb r1, [r0, #0x07] 902*4d2ff233SAndrew Turner bx lr 903*4d2ff233SAndrew Turner LMEMCPY_8_PAD 904*4d2ff233SAndrew Turner 905*4d2ff233SAndrew Turner/* 906*4d2ff233SAndrew Turner * 0110: dst is 8-bit aligned, src is 16-bit aligned 907*4d2ff233SAndrew Turner */ 908*4d2ff233SAndrew Turner ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 909*4d2ff233SAndrew Turner ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 910*4d2ff233SAndrew Turner ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 911*4d2ff233SAndrew Turner strb r2, [r0] /* 0 */ 912*4d2ff233SAndrew Turner mov ip, r1, lsr #8 /* ip = ...7 */ 913*4d2ff233SAndrew Turner strb ip, [r0, #0x07] /* 7 */ 914*4d2ff233SAndrew Turner mov ip, r2, lsr #8 /* ip = ...1 */ 915*4d2ff233SAndrew Turner orr ip, ip, r3, lsl #8 /* ip = 4321 */ 916*4d2ff233SAndrew Turner mov r3, r3, lsr #8 /* r3 = .543 */ 917*4d2ff233SAndrew Turner orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 918*4d2ff233SAndrew Turner strh ip, [r0, #0x01] 919*4d2ff233SAndrew Turner str r3, [r0, #0x03] 920*4d2ff233SAndrew Turner bx lr 921*4d2ff233SAndrew Turner LMEMCPY_8_PAD 922*4d2ff233SAndrew Turner 923*4d2ff233SAndrew Turner/* 924*4d2ff233SAndrew Turner * 0111: dst is 8-bit aligned, src is 8-bit aligned 925*4d2ff233SAndrew Turner */ 926*4d2ff233SAndrew Turner ldrb r3, [r1] /* r3 = ...0 */ 927*4d2ff233SAndrew Turner ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 928*4d2ff233SAndrew Turner ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 929*4d2ff233SAndrew Turner ldrb r1, [r1, #0x07] /* r1 = ...7 */ 930*4d2ff233SAndrew Turner strb r3, [r0] 931*4d2ff233SAndrew Turner mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 932*4d2ff233SAndrew Turner strh ip, [r0, #0x01] 933*4d2ff233SAndrew Turner orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 934*4d2ff233SAndrew Turner str r2, [r0, #0x03] 935*4d2ff233SAndrew Turner strb r1, [r0, #0x07] 936*4d2ff233SAndrew Turner bx lr 937*4d2ff233SAndrew Turner LMEMCPY_8_PAD 938*4d2ff233SAndrew Turner 939*4d2ff233SAndrew Turner/* 940*4d2ff233SAndrew Turner * 1000: dst is 16-bit aligned, src is 32-bit aligned 941*4d2ff233SAndrew Turner */ 942*4d2ff233SAndrew Turner ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 943*4d2ff233SAndrew Turner ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 944*4d2ff233SAndrew Turner mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 945*4d2ff233SAndrew Turner strh r2, [r0] 946*4d2ff233SAndrew Turner orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 947*4d2ff233SAndrew Turner mov r3, r3, lsr #16 /* r3 = ..76 */ 948*4d2ff233SAndrew Turner str r2, [r0, #0x02] 949*4d2ff233SAndrew Turner strh r3, [r0, #0x06] 950*4d2ff233SAndrew Turner bx lr 951*4d2ff233SAndrew Turner LMEMCPY_8_PAD 952*4d2ff233SAndrew Turner 953*4d2ff233SAndrew Turner/* 954*4d2ff233SAndrew Turner * 1001: dst is 16-bit aligned, src is 8-bit aligned 955*4d2ff233SAndrew Turner */ 956*4d2ff233SAndrew Turner ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 957*4d2ff233SAndrew Turner ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 958*4d2ff233SAndrew Turner ldrb ip, [r1, #0x07] /* ip = ...7 */ 959*4d2ff233SAndrew Turner mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 960*4d2ff233SAndrew Turner strh r1, [r0] 961*4d2ff233SAndrew Turner mov r1, r2, lsr #24 /* r1 = ...2 */ 962*4d2ff233SAndrew Turner orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 963*4d2ff233SAndrew Turner mov r3, r3, lsr #24 /* r3 = ...6 */ 964*4d2ff233SAndrew Turner orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 965*4d2ff233SAndrew Turner str r1, [r0, #0x02] 966*4d2ff233SAndrew Turner strh r3, [r0, #0x06] 967*4d2ff233SAndrew Turner bx lr 968*4d2ff233SAndrew Turner LMEMCPY_8_PAD 969*4d2ff233SAndrew Turner 970*4d2ff233SAndrew Turner/* 971*4d2ff233SAndrew Turner * 1010: dst is 16-bit aligned, src is 16-bit aligned 972*4d2ff233SAndrew Turner */ 973*4d2ff233SAndrew Turner ldrh r2, [r1] 974*4d2ff233SAndrew Turner ldr ip, [r1, #0x02] 975*4d2ff233SAndrew Turner ldrh r3, [r1, #0x06] 976*4d2ff233SAndrew Turner strh r2, [r0] 977*4d2ff233SAndrew Turner str ip, [r0, #0x02] 978*4d2ff233SAndrew Turner strh r3, [r0, #0x06] 979*4d2ff233SAndrew Turner bx lr 980*4d2ff233SAndrew Turner LMEMCPY_8_PAD 981*4d2ff233SAndrew Turner 982*4d2ff233SAndrew Turner/* 983*4d2ff233SAndrew Turner * 1011: dst is 16-bit aligned, src is 8-bit aligned 984*4d2ff233SAndrew Turner */ 985*4d2ff233SAndrew Turner ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 986*4d2ff233SAndrew Turner ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 987*4d2ff233SAndrew Turner ldrb ip, [r1] /* ip = ...0 */ 988*4d2ff233SAndrew Turner mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 989*4d2ff233SAndrew Turner strh r1, [r0, #0x06] 990*4d2ff233SAndrew Turner mov r3, r3, lsl #24 /* r3 = 5... */ 991*4d2ff233SAndrew Turner orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 992*4d2ff233SAndrew Turner orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 993*4d2ff233SAndrew Turner str r3, [r0, #0x02] 994*4d2ff233SAndrew Turner strh r2, [r0] 995*4d2ff233SAndrew Turner bx lr 996*4d2ff233SAndrew Turner LMEMCPY_8_PAD 997*4d2ff233SAndrew Turner 998*4d2ff233SAndrew Turner/* 999*4d2ff233SAndrew Turner * 1100: dst is 8-bit aligned, src is 32-bit aligned 1000*4d2ff233SAndrew Turner */ 1001*4d2ff233SAndrew Turner ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1002*4d2ff233SAndrew Turner ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1003*4d2ff233SAndrew Turner mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 1004*4d2ff233SAndrew Turner strh r1, [r0, #0x05] 1005*4d2ff233SAndrew Turner strb r2, [r0] 1006*4d2ff233SAndrew Turner mov r1, r3, lsr #24 /* r1 = ...7 */ 1007*4d2ff233SAndrew Turner strb r1, [r0, #0x07] 1008*4d2ff233SAndrew Turner mov r2, r2, lsr #8 /* r2 = .321 */ 1009*4d2ff233SAndrew Turner orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 1010*4d2ff233SAndrew Turner str r2, [r0, #0x01] 1011*4d2ff233SAndrew Turner bx lr 1012*4d2ff233SAndrew Turner LMEMCPY_8_PAD 1013*4d2ff233SAndrew Turner 1014*4d2ff233SAndrew Turner/* 1015*4d2ff233SAndrew Turner * 1101: dst is 8-bit aligned, src is 8-bit aligned 1016*4d2ff233SAndrew Turner */ 1017*4d2ff233SAndrew Turner ldrb r3, [r1] /* r3 = ...0 */ 1018*4d2ff233SAndrew Turner ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 1019*4d2ff233SAndrew Turner ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1020*4d2ff233SAndrew Turner ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1021*4d2ff233SAndrew Turner strb r3, [r0] 1022*4d2ff233SAndrew Turner mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 1023*4d2ff233SAndrew Turner strh r3, [r0, #0x05] 1024*4d2ff233SAndrew Turner orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 1025*4d2ff233SAndrew Turner str r2, [r0, #0x01] 1026*4d2ff233SAndrew Turner strb r1, [r0, #0x07] 1027*4d2ff233SAndrew Turner bx lr 1028*4d2ff233SAndrew Turner LMEMCPY_8_PAD 1029*4d2ff233SAndrew Turner 1030*4d2ff233SAndrew Turner/* 1031*4d2ff233SAndrew Turner * 1110: dst is 8-bit aligned, src is 16-bit aligned 1032*4d2ff233SAndrew Turner */ 1033*4d2ff233SAndrew Turner ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1034*4d2ff233SAndrew Turner ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1035*4d2ff233SAndrew Turner ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1036*4d2ff233SAndrew Turner strb r2, [r0] 1037*4d2ff233SAndrew Turner mov ip, r2, lsr #8 /* ip = ...1 */ 1038*4d2ff233SAndrew Turner orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1039*4d2ff233SAndrew Turner mov r2, r1, lsr #8 /* r2 = ...7 */ 1040*4d2ff233SAndrew Turner strb r2, [r0, #0x07] 1041*4d2ff233SAndrew Turner mov r1, r1, lsl #8 /* r1 = .76. */ 1042*4d2ff233SAndrew Turner orr r1, r1, r3, lsr #24 /* r1 = .765 */ 1043*4d2ff233SAndrew Turner str ip, [r0, #0x01] 1044*4d2ff233SAndrew Turner strh r1, [r0, #0x05] 1045*4d2ff233SAndrew Turner bx lr 1046*4d2ff233SAndrew Turner LMEMCPY_8_PAD 1047*4d2ff233SAndrew Turner 1048*4d2ff233SAndrew Turner/* 1049*4d2ff233SAndrew Turner * 1111: dst is 8-bit aligned, src is 8-bit aligned 1050*4d2ff233SAndrew Turner */ 1051*4d2ff233SAndrew Turner ldrb r2, [r1] 1052*4d2ff233SAndrew Turner ldr ip, [r1, #0x01] 1053*4d2ff233SAndrew Turner ldrh r3, [r1, #0x05] 1054*4d2ff233SAndrew Turner ldrb r1, [r1, #0x07] 1055*4d2ff233SAndrew Turner strb r2, [r0] 1056*4d2ff233SAndrew Turner str ip, [r0, #0x01] 1057*4d2ff233SAndrew Turner strh r3, [r0, #0x05] 1058*4d2ff233SAndrew Turner strb r1, [r0, #0x07] 1059*4d2ff233SAndrew Turner bx lr 1060*4d2ff233SAndrew Turner LMEMCPY_8_PAD 1061*4d2ff233SAndrew Turner 1062*4d2ff233SAndrew Turner/****************************************************************************** 1063*4d2ff233SAndrew Turner * Special case for 12 byte copies 1064*4d2ff233SAndrew Turner */ 1065*4d2ff233SAndrew Turner#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 1066*4d2ff233SAndrew Turner#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 1067*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1068*4d2ff233SAndrew Turner.Lmemcpy_c: 1069*4d2ff233SAndrew Turner and r2, r1, #0x03 1070*4d2ff233SAndrew Turner orr r2, r2, r0, lsl #2 1071*4d2ff233SAndrew Turner ands r2, r2, #0x0f 1072*4d2ff233SAndrew Turner sub r3, pc, #0x14 1073*4d2ff233SAndrew Turner addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 1074*4d2ff233SAndrew Turner 1075*4d2ff233SAndrew Turner/* 1076*4d2ff233SAndrew Turner * 0000: dst is 32-bit aligned, src is 32-bit aligned 1077*4d2ff233SAndrew Turner */ 1078*4d2ff233SAndrew Turner ldr r2, [r1] 1079*4d2ff233SAndrew Turner ldr r3, [r1, #0x04] 1080*4d2ff233SAndrew Turner ldr r1, [r1, #0x08] 1081*4d2ff233SAndrew Turner str r2, [r0] 1082*4d2ff233SAndrew Turner str r3, [r0, #0x04] 1083*4d2ff233SAndrew Turner str r1, [r0, #0x08] 1084*4d2ff233SAndrew Turner bx lr 1085*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1086*4d2ff233SAndrew Turner 1087*4d2ff233SAndrew Turner/* 1088*4d2ff233SAndrew Turner * 0001: dst is 32-bit aligned, src is 8-bit aligned 1089*4d2ff233SAndrew Turner */ 1090*4d2ff233SAndrew Turner ldrb r2, [r1, #0xb] /* r2 = ...B */ 1091*4d2ff233SAndrew Turner ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1092*4d2ff233SAndrew Turner ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1093*4d2ff233SAndrew Turner ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1094*4d2ff233SAndrew Turner mov r2, r2, lsl #24 /* r2 = B... */ 1095*4d2ff233SAndrew Turner orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 1096*4d2ff233SAndrew Turner str r2, [r0, #0x08] 1097*4d2ff233SAndrew Turner mov r2, ip, lsl #24 /* r2 = 7... */ 1098*4d2ff233SAndrew Turner orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 1099*4d2ff233SAndrew Turner mov r1, r1, lsr #8 /* r1 = .210 */ 1100*4d2ff233SAndrew Turner orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 1101*4d2ff233SAndrew Turner str r2, [r0, #0x04] 1102*4d2ff233SAndrew Turner str r1, [r0] 1103*4d2ff233SAndrew Turner bx lr 1104*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1105*4d2ff233SAndrew Turner 1106*4d2ff233SAndrew Turner/* 1107*4d2ff233SAndrew Turner * 0010: dst is 32-bit aligned, src is 16-bit aligned 1108*4d2ff233SAndrew Turner */ 1109*4d2ff233SAndrew Turner ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1110*4d2ff233SAndrew Turner ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1111*4d2ff233SAndrew Turner ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1112*4d2ff233SAndrew Turner ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1113*4d2ff233SAndrew Turner orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1114*4d2ff233SAndrew Turner str r2, [r0] 1115*4d2ff233SAndrew Turner mov r3, r3, lsr #16 /* r3 = ..54 */ 1116*4d2ff233SAndrew Turner orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 1117*4d2ff233SAndrew Turner mov r1, r1, lsl #16 /* r1 = BA.. */ 1118*4d2ff233SAndrew Turner orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 1119*4d2ff233SAndrew Turner str r3, [r0, #0x04] 1120*4d2ff233SAndrew Turner str r1, [r0, #0x08] 1121*4d2ff233SAndrew Turner bx lr 1122*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1123*4d2ff233SAndrew Turner 1124*4d2ff233SAndrew Turner/* 1125*4d2ff233SAndrew Turner * 0011: dst is 32-bit aligned, src is 8-bit aligned 1126*4d2ff233SAndrew Turner */ 1127*4d2ff233SAndrew Turner ldrb r2, [r1] /* r2 = ...0 */ 1128*4d2ff233SAndrew Turner ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1129*4d2ff233SAndrew Turner ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1130*4d2ff233SAndrew Turner ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1131*4d2ff233SAndrew Turner orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1132*4d2ff233SAndrew Turner str r2, [r0] 1133*4d2ff233SAndrew Turner mov r3, r3, lsr #24 /* r3 = ...4 */ 1134*4d2ff233SAndrew Turner orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 1135*4d2ff233SAndrew Turner mov r1, r1, lsl #8 /* r1 = BA9. */ 1136*4d2ff233SAndrew Turner orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 1137*4d2ff233SAndrew Turner str r3, [r0, #0x04] 1138*4d2ff233SAndrew Turner str r1, [r0, #0x08] 1139*4d2ff233SAndrew Turner bx lr 1140*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1141*4d2ff233SAndrew Turner 1142*4d2ff233SAndrew Turner/* 1143*4d2ff233SAndrew Turner * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 1144*4d2ff233SAndrew Turner */ 1145*4d2ff233SAndrew Turner ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1146*4d2ff233SAndrew Turner ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1147*4d2ff233SAndrew Turner ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 1148*4d2ff233SAndrew Turner mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1149*4d2ff233SAndrew Turner strh r1, [r0, #0x01] 1150*4d2ff233SAndrew Turner strb r2, [r0] 1151*4d2ff233SAndrew Turner mov r1, r2, lsr #24 /* r1 = ...3 */ 1152*4d2ff233SAndrew Turner orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 1153*4d2ff233SAndrew Turner mov r1, r3, lsr #24 /* r1 = ...7 */ 1154*4d2ff233SAndrew Turner orr r1, r1, ip, lsl #8 /* r1 = A987 */ 1155*4d2ff233SAndrew Turner mov ip, ip, lsr #24 /* ip = ...B */ 1156*4d2ff233SAndrew Turner str r2, [r0, #0x03] 1157*4d2ff233SAndrew Turner str r1, [r0, #0x07] 1158*4d2ff233SAndrew Turner strb ip, [r0, #0x0b] 1159*4d2ff233SAndrew Turner bx lr 1160*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1161*4d2ff233SAndrew Turner 1162*4d2ff233SAndrew Turner/* 1163*4d2ff233SAndrew Turner * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 1164*4d2ff233SAndrew Turner */ 1165*4d2ff233SAndrew Turner ldrb r2, [r1] 1166*4d2ff233SAndrew Turner ldrh r3, [r1, #0x01] 1167*4d2ff233SAndrew Turner ldr ip, [r1, #0x03] 1168*4d2ff233SAndrew Turner strb r2, [r0] 1169*4d2ff233SAndrew Turner ldr r2, [r1, #0x07] 1170*4d2ff233SAndrew Turner ldrb r1, [r1, #0x0b] 1171*4d2ff233SAndrew Turner strh r3, [r0, #0x01] 1172*4d2ff233SAndrew Turner str ip, [r0, #0x03] 1173*4d2ff233SAndrew Turner str r2, [r0, #0x07] 1174*4d2ff233SAndrew Turner strb r1, [r0, #0x0b] 1175*4d2ff233SAndrew Turner bx lr 1176*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1177*4d2ff233SAndrew Turner 1178*4d2ff233SAndrew Turner/* 1179*4d2ff233SAndrew Turner * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 1180*4d2ff233SAndrew Turner */ 1181*4d2ff233SAndrew Turner ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1182*4d2ff233SAndrew Turner ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1183*4d2ff233SAndrew Turner ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1184*4d2ff233SAndrew Turner ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1185*4d2ff233SAndrew Turner strb r2, [r0] 1186*4d2ff233SAndrew Turner mov r2, r2, lsr #8 /* r2 = ...1 */ 1187*4d2ff233SAndrew Turner orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1188*4d2ff233SAndrew Turner strh r2, [r0, #0x01] 1189*4d2ff233SAndrew Turner mov r2, r3, lsr #8 /* r2 = .543 */ 1190*4d2ff233SAndrew Turner orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 1191*4d2ff233SAndrew Turner mov r2, ip, lsr #8 /* r2 = .987 */ 1192*4d2ff233SAndrew Turner orr r2, r2, r1, lsl #24 /* r2 = A987 */ 1193*4d2ff233SAndrew Turner mov r1, r1, lsr #8 /* r1 = ...B */ 1194*4d2ff233SAndrew Turner str r3, [r0, #0x03] 1195*4d2ff233SAndrew Turner str r2, [r0, #0x07] 1196*4d2ff233SAndrew Turner strb r1, [r0, #0x0b] 1197*4d2ff233SAndrew Turner bx lr 1198*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1199*4d2ff233SAndrew Turner 1200*4d2ff233SAndrew Turner/* 1201*4d2ff233SAndrew Turner * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 1202*4d2ff233SAndrew Turner */ 1203*4d2ff233SAndrew Turner ldrb r2, [r1] 1204*4d2ff233SAndrew Turner ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1205*4d2ff233SAndrew Turner ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1206*4d2ff233SAndrew Turner ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1207*4d2ff233SAndrew Turner strb r2, [r0] 1208*4d2ff233SAndrew Turner strh r3, [r0, #0x01] 1209*4d2ff233SAndrew Turner mov r3, r3, lsr #16 /* r3 = ..43 */ 1210*4d2ff233SAndrew Turner orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 1211*4d2ff233SAndrew Turner mov ip, ip, lsr #16 /* ip = ..87 */ 1212*4d2ff233SAndrew Turner orr ip, ip, r1, lsl #16 /* ip = A987 */ 1213*4d2ff233SAndrew Turner mov r1, r1, lsr #16 /* r1 = ..xB */ 1214*4d2ff233SAndrew Turner str r3, [r0, #0x03] 1215*4d2ff233SAndrew Turner str ip, [r0, #0x07] 1216*4d2ff233SAndrew Turner strb r1, [r0, #0x0b] 1217*4d2ff233SAndrew Turner bx lr 1218*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1219*4d2ff233SAndrew Turner 1220*4d2ff233SAndrew Turner/* 1221*4d2ff233SAndrew Turner * 1000: dst is 16-bit aligned, src is 32-bit aligned 1222*4d2ff233SAndrew Turner */ 1223*4d2ff233SAndrew Turner ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 1224*4d2ff233SAndrew Turner ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1225*4d2ff233SAndrew Turner ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 1226*4d2ff233SAndrew Turner mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1227*4d2ff233SAndrew Turner strh ip, [r0] 1228*4d2ff233SAndrew Turner orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 1229*4d2ff233SAndrew Turner mov r3, r3, lsr #16 /* r3 = ..76 */ 1230*4d2ff233SAndrew Turner orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 1231*4d2ff233SAndrew Turner mov r2, r2, lsr #16 /* r2 = ..BA */ 1232*4d2ff233SAndrew Turner str r1, [r0, #0x02] 1233*4d2ff233SAndrew Turner str r3, [r0, #0x06] 1234*4d2ff233SAndrew Turner strh r2, [r0, #0x0a] 1235*4d2ff233SAndrew Turner bx lr 1236*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1237*4d2ff233SAndrew Turner 1238*4d2ff233SAndrew Turner/* 1239*4d2ff233SAndrew Turner * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 1240*4d2ff233SAndrew Turner */ 1241*4d2ff233SAndrew Turner ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1242*4d2ff233SAndrew Turner ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1243*4d2ff233SAndrew Turner mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 1244*4d2ff233SAndrew Turner strh ip, [r0] 1245*4d2ff233SAndrew Turner ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1246*4d2ff233SAndrew Turner ldrb r1, [r1, #0x0b] /* r1 = ...B */ 1247*4d2ff233SAndrew Turner mov r2, r2, lsr #24 /* r2 = ...2 */ 1248*4d2ff233SAndrew Turner orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 1249*4d2ff233SAndrew Turner mov r3, r3, lsr #24 /* r3 = ...6 */ 1250*4d2ff233SAndrew Turner orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 1251*4d2ff233SAndrew Turner mov r1, r1, lsl #8 /* r1 = ..B. */ 1252*4d2ff233SAndrew Turner orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 1253*4d2ff233SAndrew Turner str r2, [r0, #0x02] 1254*4d2ff233SAndrew Turner str r3, [r0, #0x06] 1255*4d2ff233SAndrew Turner strh r1, [r0, #0x0a] 1256*4d2ff233SAndrew Turner bx lr 1257*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1258*4d2ff233SAndrew Turner 1259*4d2ff233SAndrew Turner/* 1260*4d2ff233SAndrew Turner * 1010: dst is 16-bit aligned, src is 16-bit aligned 1261*4d2ff233SAndrew Turner */ 1262*4d2ff233SAndrew Turner ldrh r2, [r1] 1263*4d2ff233SAndrew Turner ldr r3, [r1, #0x02] 1264*4d2ff233SAndrew Turner ldr ip, [r1, #0x06] 1265*4d2ff233SAndrew Turner ldrh r1, [r1, #0x0a] 1266*4d2ff233SAndrew Turner strh r2, [r0] 1267*4d2ff233SAndrew Turner str r3, [r0, #0x02] 1268*4d2ff233SAndrew Turner str ip, [r0, #0x06] 1269*4d2ff233SAndrew Turner strh r1, [r0, #0x0a] 1270*4d2ff233SAndrew Turner bx lr 1271*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1272*4d2ff233SAndrew Turner 1273*4d2ff233SAndrew Turner/* 1274*4d2ff233SAndrew Turner * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 1275*4d2ff233SAndrew Turner */ 1276*4d2ff233SAndrew Turner ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 1277*4d2ff233SAndrew Turner ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 1278*4d2ff233SAndrew Turner mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 1279*4d2ff233SAndrew Turner strh ip, [r0, #0x0a] 1280*4d2ff233SAndrew Turner ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1281*4d2ff233SAndrew Turner ldrb r1, [r1] /* r1 = ...0 */ 1282*4d2ff233SAndrew Turner mov r2, r2, lsl #24 /* r2 = 9... */ 1283*4d2ff233SAndrew Turner orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 1284*4d2ff233SAndrew Turner mov r3, r3, lsl #24 /* r3 = 5... */ 1285*4d2ff233SAndrew Turner orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 1286*4d2ff233SAndrew Turner orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 1287*4d2ff233SAndrew Turner str r2, [r0, #0x06] 1288*4d2ff233SAndrew Turner str r3, [r0, #0x02] 1289*4d2ff233SAndrew Turner strh r1, [r0] 1290*4d2ff233SAndrew Turner bx lr 1291*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1292*4d2ff233SAndrew Turner 1293*4d2ff233SAndrew Turner/* 1294*4d2ff233SAndrew Turner * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 1295*4d2ff233SAndrew Turner */ 1296*4d2ff233SAndrew Turner ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1297*4d2ff233SAndrew Turner ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 1298*4d2ff233SAndrew Turner ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 1299*4d2ff233SAndrew Turner strb r2, [r0] 1300*4d2ff233SAndrew Turner mov r3, r2, lsr #8 /* r3 = .321 */ 1301*4d2ff233SAndrew Turner orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 1302*4d2ff233SAndrew Turner str r3, [r0, #0x01] 1303*4d2ff233SAndrew Turner mov r3, ip, lsr #8 /* r3 = .765 */ 1304*4d2ff233SAndrew Turner orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 1305*4d2ff233SAndrew Turner str r3, [r0, #0x05] 1306*4d2ff233SAndrew Turner mov r1, r1, lsr #8 /* r1 = .BA9 */ 1307*4d2ff233SAndrew Turner strh r1, [r0, #0x09] 1308*4d2ff233SAndrew Turner mov r1, r1, lsr #16 /* r1 = ...B */ 1309*4d2ff233SAndrew Turner strb r1, [r0, #0x0b] 1310*4d2ff233SAndrew Turner bx lr 1311*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1312*4d2ff233SAndrew Turner 1313*4d2ff233SAndrew Turner/* 1314*4d2ff233SAndrew Turner * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 1315*4d2ff233SAndrew Turner */ 1316*4d2ff233SAndrew Turner ldrb r2, [r1, #0x0b] /* r2 = ...B */ 1317*4d2ff233SAndrew Turner ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 1318*4d2ff233SAndrew Turner ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1319*4d2ff233SAndrew Turner ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1320*4d2ff233SAndrew Turner strb r2, [r0, #0x0b] 1321*4d2ff233SAndrew Turner mov r2, r3, lsr #16 /* r2 = ..A9 */ 1322*4d2ff233SAndrew Turner strh r2, [r0, #0x09] 1323*4d2ff233SAndrew Turner mov r3, r3, lsl #16 /* r3 = 87.. */ 1324*4d2ff233SAndrew Turner orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 1325*4d2ff233SAndrew Turner mov ip, ip, lsl #16 /* ip = 43.. */ 1326*4d2ff233SAndrew Turner orr ip, ip, r1, lsr #16 /* ip = 4321 */ 1327*4d2ff233SAndrew Turner mov r1, r1, lsr #8 /* r1 = .210 */ 1328*4d2ff233SAndrew Turner str r3, [r0, #0x05] 1329*4d2ff233SAndrew Turner str ip, [r0, #0x01] 1330*4d2ff233SAndrew Turner strb r1, [r0] 1331*4d2ff233SAndrew Turner bx lr 1332*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1333*4d2ff233SAndrew Turner 1334*4d2ff233SAndrew Turner/* 1335*4d2ff233SAndrew Turner * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 1336*4d2ff233SAndrew Turner */ 1337*4d2ff233SAndrew Turner ldrh r2, [r1] /* r2 = ..10 */ 1338*4d2ff233SAndrew Turner ldr r3, [r1, #0x02] /* r3 = 5432 */ 1339*4d2ff233SAndrew Turner ldr ip, [r1, #0x06] /* ip = 9876 */ 1340*4d2ff233SAndrew Turner ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 1341*4d2ff233SAndrew Turner strb r2, [r0] 1342*4d2ff233SAndrew Turner mov r2, r2, lsr #8 /* r2 = ...1 */ 1343*4d2ff233SAndrew Turner orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1344*4d2ff233SAndrew Turner mov r3, r3, lsr #24 /* r3 = ...5 */ 1345*4d2ff233SAndrew Turner orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 1346*4d2ff233SAndrew Turner mov ip, ip, lsr #24 /* ip = ...9 */ 1347*4d2ff233SAndrew Turner orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 1348*4d2ff233SAndrew Turner mov r1, r1, lsr #8 /* r1 = ...B */ 1349*4d2ff233SAndrew Turner str r2, [r0, #0x01] 1350*4d2ff233SAndrew Turner str r3, [r0, #0x05] 1351*4d2ff233SAndrew Turner strh ip, [r0, #0x09] 1352*4d2ff233SAndrew Turner strb r1, [r0, #0x0b] 1353*4d2ff233SAndrew Turner bx lr 1354*4d2ff233SAndrew Turner LMEMCPY_C_PAD 1355*4d2ff233SAndrew Turner 1356*4d2ff233SAndrew Turner/* 1357*4d2ff233SAndrew Turner * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 1358*4d2ff233SAndrew Turner */ 1359*4d2ff233SAndrew Turner ldrb r2, [r1] 1360*4d2ff233SAndrew Turner ldr r3, [r1, #0x01] 1361*4d2ff233SAndrew Turner ldr ip, [r1, #0x05] 1362*4d2ff233SAndrew Turner strb r2, [r0] 1363*4d2ff233SAndrew Turner ldrh r2, [r1, #0x09] 1364*4d2ff233SAndrew Turner ldrb r1, [r1, #0x0b] 1365*4d2ff233SAndrew Turner str r3, [r0, #0x01] 1366*4d2ff233SAndrew Turner str ip, [r0, #0x05] 1367*4d2ff233SAndrew Turner strh r2, [r0, #0x09] 1368*4d2ff233SAndrew Turner strb r1, [r0, #0x0b] 1369*4d2ff233SAndrew Turner bx lr 1370*4d2ff233SAndrew Turner#endif /* !_STANDALONE */ 1371*4d2ff233SAndrew TurnerEND(memcpy) 1372*4d2ff233SAndrew Turner 1373*4d2ff233SAndrew Turner .section .note.GNU-stack,"",%progbits 1374