1/* $NetBSD: memcpy_xscale.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */ 2 3/* 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <machine/asm.h> 39.syntax unified 40 41/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 42ENTRY(memcpy) 43 pld [r1] 44 cmp r2, #0x0c 45 ble .Lmemcpy_short /* <= 12 bytes */ 46 mov r3, r0 /* We must not clobber r0 */ 47 48 /* Word-align the destination buffer */ 49 ands ip, r3, #0x03 /* Already word aligned? */ 50 beq .Lmemcpy_wordaligned /* Yup */ 51 cmp ip, #0x02 52 ldrb ip, [r1], #0x01 53 sub r2, r2, #0x01 54 strb ip, [r3], #0x01 55 ldrble ip, [r1], #0x01 56 suble r2, r2, #0x01 57 strble ip, [r3], #0x01 58 ldrblt ip, [r1], #0x01 59 sublt r2, r2, #0x01 60 strblt ip, [r3], #0x01 61 62 /* Destination buffer is now word aligned */ 63.Lmemcpy_wordaligned: 64 ands ip, r1, #0x03 /* Is src also word-aligned? */ 65 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 66 67 /* Quad-align the destination buffer */ 68 tst r3, #0x07 /* Already quad aligned? */ 69 ldrne ip, [r1], #0x04 70 stmfd sp!, {r4-r9} /* Free up some registers */ 71 subne r2, r2, #0x04 72 strne ip, [r3], #0x04 73 74 /* Destination buffer quad aligned, source is at least word aligned */ 75 subs r2, r2, #0x80 76 blt .Lmemcpy_w_lessthan128 77 78 /* Copy 128 bytes at a time */ 79.Lmemcpy_w_loop128: 80 ldr r4, [r1], #0x04 /* LD:00-03 */ 81 ldr r5, [r1], #0x04 /* LD:04-07 */ 82 pld [r1, #0x18] /* Prefetch 0x20 */ 83 ldr r6, [r1], #0x04 /* LD:08-0b */ 84 ldr r7, [r1], #0x04 /* LD:0c-0f */ 85 ldr r8, [r1], #0x04 /* LD:10-13 */ 86 ldr r9, [r1], #0x04 /* LD:14-17 */ 87 strd r4, [r3], #0x08 /* ST:00-07 */ 88 ldr r4, [r1], #0x04 /* LD:18-1b */ 89 ldr r5, [r1], #0x04 /* LD:1c-1f */ 90 strd r6, [r3], #0x08 /* ST:08-0f */ 91 ldr r6, [r1], #0x04 /* LD:20-23 */ 92 ldr r7, [r1], #0x04 /* LD:24-27 */ 93 pld [r1, #0x18] /* Prefetch 0x40 */ 94 strd r8, [r3], #0x08 /* ST:10-17 */ 95 ldr r8, [r1], #0x04 /* LD:28-2b */ 96 ldr r9, [r1], #0x04 /* LD:2c-2f */ 97 strd r4, [r3], #0x08 /* ST:18-1f */ 98 ldr r4, [r1], #0x04 /* LD:30-33 */ 99 ldr r5, [r1], #0x04 /* LD:34-37 */ 100 strd r6, [r3], #0x08 /* ST:20-27 */ 101 ldr r6, [r1], #0x04 /* LD:38-3b */ 102 ldr r7, [r1], #0x04 /* LD:3c-3f */ 103 strd r8, [r3], #0x08 /* ST:28-2f */ 104 ldr r8, [r1], #0x04 /* LD:40-43 */ 105 ldr r9, [r1], #0x04 /* LD:44-47 */ 106 pld [r1, #0x18] /* Prefetch 0x60 */ 107 strd r4, [r3], #0x08 /* ST:30-37 */ 108 ldr r4, [r1], #0x04 /* LD:48-4b */ 109 ldr r5, [r1], #0x04 /* LD:4c-4f */ 110 strd r6, [r3], #0x08 /* ST:38-3f */ 111 ldr r6, [r1], #0x04 /* LD:50-53 */ 112 ldr r7, [r1], #0x04 /* LD:54-57 */ 113 strd r8, [r3], #0x08 /* ST:40-47 */ 114 ldr r8, [r1], #0x04 /* LD:58-5b */ 115 ldr r9, [r1], #0x04 /* LD:5c-5f */ 116 strd r4, [r3], #0x08 /* ST:48-4f */ 117 ldr r4, [r1], #0x04 /* LD:60-63 */ 118 ldr r5, [r1], #0x04 /* LD:64-67 */ 119 pld [r1, #0x18] /* Prefetch 0x80 */ 120 strd r6, [r3], #0x08 /* ST:50-57 */ 121 ldr r6, [r1], #0x04 /* LD:68-6b */ 122 ldr r7, [r1], #0x04 /* LD:6c-6f */ 123 strd r8, [r3], #0x08 /* ST:58-5f */ 124 ldr r8, [r1], #0x04 /* LD:70-73 */ 125 ldr r9, [r1], #0x04 /* LD:74-77 */ 126 strd r4, [r3], #0x08 /* ST:60-67 */ 127 ldr r4, [r1], #0x04 /* LD:78-7b */ 128 ldr r5, [r1], #0x04 /* LD:7c-7f */ 129 strd r6, [r3], #0x08 /* ST:68-6f */ 130 strd r8, [r3], #0x08 /* ST:70-77 */ 131 subs r2, r2, #0x80 132 strd r4, [r3], #0x08 /* ST:78-7f */ 133 bge .Lmemcpy_w_loop128 134 135.Lmemcpy_w_lessthan128: 136 adds r2, r2, #0x80 /* Adjust for extra sub */ 137 ldmfdeq sp!, {r4-r9} 138 bxeq lr /* Return now if done */ 139 subs r2, r2, #0x20 140 blt .Lmemcpy_w_lessthan32 141 142 /* Copy 32 bytes at a time */ 143.Lmemcpy_w_loop32: 144 ldr r4, [r1], #0x04 145 ldr r5, [r1], #0x04 146 pld [r1, #0x18] 147 ldr r6, [r1], #0x04 148 ldr r7, [r1], #0x04 149 ldr r8, [r1], #0x04 150 ldr r9, [r1], #0x04 151 strd r4, [r3], #0x08 152 ldr r4, [r1], #0x04 153 ldr r5, [r1], #0x04 154 strd r6, [r3], #0x08 155 strd r8, [r3], #0x08 156 subs r2, r2, #0x20 157 strd r4, [r3], #0x08 158 bge .Lmemcpy_w_loop32 159 160.Lmemcpy_w_lessthan32: 161 adds r2, r2, #0x20 /* Adjust for extra sub */ 162 ldmfdeq sp!, {r4-r9} 163 bxeq lr /* Return now if done */ 164 165 and r4, r2, #0x18 166 rsbs r4, r4, #0x18 167 addne pc, pc, r4, lsl #1 168 nop 169 170 /* At least 24 bytes remaining */ 171 ldr r4, [r1], #0x04 172 ldr r5, [r1], #0x04 173 sub r2, r2, #0x08 174 strd r4, [r3], #0x08 175 176 /* At least 16 bytes remaining */ 177 ldr r4, [r1], #0x04 178 ldr r5, [r1], #0x04 179 sub r2, r2, #0x08 180 strd r4, [r3], #0x08 181 182 /* At least 8 bytes remaining */ 183 ldr r4, [r1], #0x04 184 ldr r5, [r1], #0x04 185 subs r2, r2, #0x08 186 strd r4, [r3], #0x08 187 188 /* Less than 8 bytes remaining */ 189 ldmfd sp!, {r4-r9} 190 bxeq lr /* Return now if done */ 191 subs r2, r2, #0x04 192 ldrge ip, [r1], #0x04 193 strge ip, [r3], #0x04 194 bxeq lr /* Return now if done */ 195 addlt r2, r2, #0x04 196 ldrb ip, [r1], #0x01 197 cmp r2, #0x02 198 ldrbge r2, [r1], #0x01 199 strb ip, [r3], #0x01 200 ldrbgt ip, [r1] 201 strbge r2, [r3], #0x01 202 strbgt ip, [r3] 203 bx lr 204 205 206/* 207 * At this point, it has not been possible to word align both buffers. 208 * The destination buffer is word aligned, but the source buffer is not. 209 */ 210.Lmemcpy_bad_align: 211 stmfd sp!, {r4-r7} 212 bic r1, r1, #0x03 213 cmp ip, #2 214 ldr ip, [r1], #0x04 215 bgt .Lmemcpy_bad3 216 beq .Lmemcpy_bad2 217 b .Lmemcpy_bad1 218 219.Lmemcpy_bad1_loop16: 220 mov r4, ip, lsr #8 221 ldr r5, [r1], #0x04 222 pld [r1, #0x018] 223 ldr r6, [r1], #0x04 224 ldr r7, [r1], #0x04 225 ldr ip, [r1], #0x04 226 orr r4, r4, r5, lsl #24 227 mov r5, r5, lsr #8 228 orr r5, r5, r6, lsl #24 229 mov r6, r6, lsr #8 230 orr r6, r6, r7, lsl #24 231 mov r7, r7, lsr #8 232 orr r7, r7, ip, lsl #24 233 str r4, [r3], #0x04 234 str r5, [r3], #0x04 235 str r6, [r3], #0x04 236 str r7, [r3], #0x04 237.Lmemcpy_bad1: 238 subs r2, r2, #0x10 239 bge .Lmemcpy_bad1_loop16 240 241 adds r2, r2, #0x10 242 ldmfdeq sp!, {r4-r7} 243 bxeq lr /* Return now if done */ 244 subs r2, r2, #0x04 245 sublt r1, r1, #0x03 246 blt .Lmemcpy_bad_done 247 248.Lmemcpy_bad1_loop4: 249 mov r4, ip, lsr #8 250 ldr ip, [r1], #0x04 251 subs r2, r2, #0x04 252 orr r4, r4, ip, lsl #24 253 str r4, [r3], #0x04 254 bge .Lmemcpy_bad1_loop4 255 sub r1, r1, #0x03 256 b .Lmemcpy_bad_done 257 258.Lmemcpy_bad2_loop16: 259 mov r4, ip, lsr #16 260 ldr r5, [r1], #0x04 261 pld [r1, #0x018] 262 ldr r6, [r1], #0x04 263 ldr r7, [r1], #0x04 264 ldr ip, [r1], #0x04 265 orr r4, r4, r5, lsl #16 266 mov r5, r5, lsr #16 267 orr r5, r5, r6, lsl #16 268 mov r6, r6, lsr #16 269 orr r6, r6, r7, lsl #16 270 mov r7, r7, lsr #16 271 orr r7, r7, ip, lsl #16 272 str r4, [r3], #0x04 273 str r5, [r3], #0x04 274 str r6, [r3], #0x04 275 str r7, [r3], #0x04 276.Lmemcpy_bad2: 277 subs r2, r2, #0x10 278 bge .Lmemcpy_bad2_loop16 279 280 adds r2, r2, #0x10 281 ldmfdeq sp!, {r4-r7} 282 bxeq lr /* Return now if done */ 283 subs r2, r2, #0x04 284 sublt r1, r1, #0x02 285 blt .Lmemcpy_bad_done 286 287.Lmemcpy_bad2_loop4: 288 mov r4, ip, lsr #16 289 ldr ip, [r1], #0x04 290 subs r2, r2, #0x04 291 orr r4, r4, ip, lsl #16 292 str r4, [r3], #0x04 293 bge .Lmemcpy_bad2_loop4 294 sub r1, r1, #0x02 295 b .Lmemcpy_bad_done 296 297.Lmemcpy_bad3_loop16: 298 mov r4, ip, lsr #24 299 ldr r5, [r1], #0x04 300 pld [r1, #0x018] 301 ldr r6, [r1], #0x04 302 ldr r7, [r1], #0x04 303 ldr ip, [r1], #0x04 304 orr r4, r4, r5, lsl #8 305 mov r5, r5, lsr #24 306 orr r5, r5, r6, lsl #8 307 mov r6, r6, lsr #24 308 orr r6, r6, r7, lsl #8 309 mov r7, r7, lsr #24 310 orr r7, r7, ip, lsl #8 311 str r4, [r3], #0x04 312 str r5, [r3], #0x04 313 str r6, [r3], #0x04 314 str r7, [r3], #0x04 315.Lmemcpy_bad3: 316 subs r2, r2, #0x10 317 bge .Lmemcpy_bad3_loop16 318 319 adds r2, r2, #0x10 320 ldmfdeq sp!, {r4-r7} 321 bxeq lr /* Return now if done */ 322 subs r2, r2, #0x04 323 sublt r1, r1, #0x01 324 blt .Lmemcpy_bad_done 325 326.Lmemcpy_bad3_loop4: 327 mov r4, ip, lsr #24 328 ldr ip, [r1], #0x04 329 subs r2, r2, #0x04 330 orr r4, r4, ip, lsl #8 331 str r4, [r3], #0x04 332 bge .Lmemcpy_bad3_loop4 333 sub r1, r1, #0x01 334 335.Lmemcpy_bad_done: 336 ldmfd sp!, {r4-r7} 337 adds r2, r2, #0x04 338 bxeq lr 339 ldrb ip, [r1], #0x01 340 cmp r2, #0x02 341 ldrbge r2, [r1], #0x01 342 strb ip, [r3], #0x01 343 ldrbgt ip, [r1] 344 strbge r2, [r3], #0x01 345 strbgt ip, [r3] 346 bx lr 347 348 349/* 350 * Handle short copies (less than 16 bytes), possibly misaligned. 351 * Some of these are *very* common, thanks to the network stack, 352 * and so are handled specially. 353 */ 354.Lmemcpy_short: 355#ifndef _STANDALONE 356 add pc, pc, r2, lsl #2 357 nop 358 bx lr /* 0x00 */ 359 b .Lmemcpy_bytewise /* 0x01 */ 360 b .Lmemcpy_bytewise /* 0x02 */ 361 b .Lmemcpy_bytewise /* 0x03 */ 362 b .Lmemcpy_4 /* 0x04 */ 363 b .Lmemcpy_bytewise /* 0x05 */ 364 b .Lmemcpy_6 /* 0x06 */ 365 b .Lmemcpy_bytewise /* 0x07 */ 366 b .Lmemcpy_8 /* 0x08 */ 367 b .Lmemcpy_bytewise /* 0x09 */ 368 b .Lmemcpy_bytewise /* 0x0a */ 369 b .Lmemcpy_bytewise /* 0x0b */ 370 b .Lmemcpy_c /* 0x0c */ 371#endif 372.Lmemcpy_bytewise: 373 mov r3, r0 /* We must not clobber r0 */ 374 ldrb ip, [r1], #0x01 3751: subs r2, r2, #0x01 376 strb ip, [r3], #0x01 377 ldrbne ip, [r1], #0x01 378 bne 1b 379 bx lr 380 381#ifndef _STANDALONE 382/****************************************************************************** 383 * Special case for 4 byte copies 384 */ 385#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 386#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 387 LMEMCPY_4_PAD 388.Lmemcpy_4: 389 and r2, r1, #0x03 390 orr r2, r2, r0, lsl #2 391 ands r2, r2, #0x0f 392 sub r3, pc, #0x14 393 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 394 395/* 396 * 0000: dst is 32-bit aligned, src is 32-bit aligned 397 */ 398 ldr r2, [r1] 399 str r2, [r0] 400 bx lr 401 LMEMCPY_4_PAD 402 403/* 404 * 0001: dst is 32-bit aligned, src is 8-bit aligned 405 */ 406 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 407 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 408 mov r3, r3, lsr #8 /* r3 = .210 */ 409 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 410 str r3, [r0] 411 bx lr 412 LMEMCPY_4_PAD 413 414/* 415 * 0010: dst is 32-bit aligned, src is 16-bit aligned 416 */ 417 ldrh r3, [r1, #0x02] 418 ldrh r2, [r1] 419 orr r3, r2, r3, lsl #16 420 str r3, [r0] 421 bx lr 422 LMEMCPY_4_PAD 423 424/* 425 * 0011: dst is 32-bit aligned, src is 8-bit aligned 426 */ 427 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 428 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 429 mov r3, r3, lsr #24 /* r3 = ...0 */ 430 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 431 str r3, [r0] 432 bx lr 433 LMEMCPY_4_PAD 434 435/* 436 * 0100: dst is 8-bit aligned, src is 32-bit aligned 437 */ 438 ldr r2, [r1] 439 strb r2, [r0] 440 mov r3, r2, lsr #8 441 mov r1, r2, lsr #24 442 strb r1, [r0, #0x03] 443 strh r3, [r0, #0x01] 444 bx lr 445 LMEMCPY_4_PAD 446 447/* 448 * 0101: dst is 8-bit aligned, src is 8-bit aligned 449 */ 450 ldrb r2, [r1] 451 ldrh r3, [r1, #0x01] 452 ldrb r1, [r1, #0x03] 453 strb r2, [r0] 454 strh r3, [r0, #0x01] 455 strb r1, [r0, #0x03] 456 bx lr 457 LMEMCPY_4_PAD 458 459/* 460 * 0110: dst is 8-bit aligned, src is 16-bit aligned 461 */ 462 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 463 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 464 strb r2, [r0] 465 mov r2, r2, lsr #8 /* r2 = ...1 */ 466 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 467 mov r3, r3, lsr #8 /* r3 = ...3 */ 468 strh r2, [r0, #0x01] 469 strb r3, [r0, #0x03] 470 bx lr 471 LMEMCPY_4_PAD 472 473/* 474 * 0111: dst is 8-bit aligned, src is 8-bit aligned 475 */ 476 ldrb r2, [r1] 477 ldrh r3, [r1, #0x01] 478 ldrb r1, [r1, #0x03] 479 strb r2, [r0] 480 strh r3, [r0, #0x01] 481 strb r1, [r0, #0x03] 482 bx lr 483 LMEMCPY_4_PAD 484 485/* 486 * 1000: dst is 16-bit aligned, src is 32-bit aligned 487 */ 488 ldr r2, [r1] 489 strh r2, [r0] 490 mov r3, r2, lsr #16 491 strh r3, [r0, #0x02] 492 bx lr 493 LMEMCPY_4_PAD 494 495/* 496 * 1001: dst is 16-bit aligned, src is 8-bit aligned 497 */ 498 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 499 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 500 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 501 strh r1, [r0] 502 mov r2, r2, lsr #24 /* r2 = ...2 */ 503 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 504 strh r2, [r0, #0x02] 505 bx lr 506 LMEMCPY_4_PAD 507 508/* 509 * 1010: dst is 16-bit aligned, src is 16-bit aligned 510 */ 511 ldrh r2, [r1] 512 ldrh r3, [r1, #0x02] 513 strh r2, [r0] 514 strh r3, [r0, #0x02] 515 bx lr 516 LMEMCPY_4_PAD 517 518/* 519 * 1011: dst is 16-bit aligned, src is 8-bit aligned 520 */ 521 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 522 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 523 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 524 strh r1, [r0, #0x02] 525 mov r3, r3, lsl #8 /* r3 = 321. */ 526 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 527 strh r3, [r0] 528 bx lr 529 LMEMCPY_4_PAD 530 531/* 532 * 1100: dst is 8-bit aligned, src is 32-bit aligned 533 */ 534 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 535 strb r2, [r0] 536 mov r3, r2, lsr #8 537 mov r1, r2, lsr #24 538 strh r3, [r0, #0x01] 539 strb r1, [r0, #0x03] 540 bx lr 541 LMEMCPY_4_PAD 542 543/* 544 * 1101: dst is 8-bit aligned, src is 8-bit aligned 545 */ 546 ldrb r2, [r1] 547 ldrh r3, [r1, #0x01] 548 ldrb r1, [r1, #0x03] 549 strb r2, [r0] 550 strh r3, [r0, #0x01] 551 strb r1, [r0, #0x03] 552 bx lr 553 LMEMCPY_4_PAD 554 555/* 556 * 1110: dst is 8-bit aligned, src is 16-bit aligned 557 */ 558 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 559 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 560 strb r2, [r0] 561 mov r2, r2, lsr #8 /* r2 = ...1 */ 562 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 563 strh r2, [r0, #0x01] 564 mov r3, r3, lsr #8 /* r3 = ...3 */ 565 strb r3, [r0, #0x03] 566 bx lr 567 LMEMCPY_4_PAD 568 569/* 570 * 1111: dst is 8-bit aligned, src is 8-bit aligned 571 */ 572 ldrb r2, [r1] 573 ldrh r3, [r1, #0x01] 574 ldrb r1, [r1, #0x03] 575 strb r2, [r0] 576 strh r3, [r0, #0x01] 577 strb r1, [r0, #0x03] 578 bx lr 579 LMEMCPY_4_PAD 580 581 582/****************************************************************************** 583 * Special case for 6 byte copies 584 */ 585#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 586#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 587 LMEMCPY_6_PAD 588.Lmemcpy_6: 589 and r2, r1, #0x03 590 orr r2, r2, r0, lsl #2 591 ands r2, r2, #0x0f 592 sub r3, pc, #0x14 593 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 594 595/* 596 * 0000: dst is 32-bit aligned, src is 32-bit aligned 597 */ 598 ldr r2, [r1] 599 ldrh r3, [r1, #0x04] 600 str r2, [r0] 601 strh r3, [r0, #0x04] 602 bx lr 603 LMEMCPY_6_PAD 604 605/* 606 * 0001: dst is 32-bit aligned, src is 8-bit aligned 607 */ 608 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 609 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 610 mov r2, r2, lsr #8 /* r2 = .210 */ 611 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 612 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 613 str r2, [r0] 614 strh r3, [r0, #0x04] 615 bx lr 616 LMEMCPY_6_PAD 617 618/* 619 * 0010: dst is 32-bit aligned, src is 16-bit aligned 620 */ 621 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 622 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 623 mov r1, r3, lsr #16 /* r1 = ..54 */ 624 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 625 str r2, [r0] 626 strh r1, [r0, #0x04] 627 bx lr 628 LMEMCPY_6_PAD 629 630/* 631 * 0011: dst is 32-bit aligned, src is 8-bit aligned 632 */ 633 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 634 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 635 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 636 mov r2, r2, lsr #24 /* r2 = ...0 */ 637 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 638 mov r1, r1, lsl #8 /* r1 = xx5. */ 639 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 640 str r2, [r0] 641 strh r1, [r0, #0x04] 642 bx lr 643 LMEMCPY_6_PAD 644 645/* 646 * 0100: dst is 8-bit aligned, src is 32-bit aligned 647 */ 648 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 649 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 650 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 651 strh r1, [r0, #0x01] 652 strb r3, [r0] 653 mov r3, r3, lsr #24 /* r3 = ...3 */ 654 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 655 mov r2, r2, lsr #8 /* r2 = ...5 */ 656 strh r3, [r0, #0x03] 657 strb r2, [r0, #0x05] 658 bx lr 659 LMEMCPY_6_PAD 660 661/* 662 * 0101: dst is 8-bit aligned, src is 8-bit aligned 663 */ 664 ldrb r2, [r1] 665 ldrh r3, [r1, #0x01] 666 ldrh ip, [r1, #0x03] 667 ldrb r1, [r1, #0x05] 668 strb r2, [r0] 669 strh r3, [r0, #0x01] 670 strh ip, [r0, #0x03] 671 strb r1, [r0, #0x05] 672 bx lr 673 LMEMCPY_6_PAD 674 675/* 676 * 0110: dst is 8-bit aligned, src is 16-bit aligned 677 */ 678 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 679 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 680 strb r2, [r0] 681 mov r3, r1, lsr #24 682 strb r3, [r0, #0x05] 683 mov r3, r1, lsr #8 /* r3 = .543 */ 684 strh r3, [r0, #0x03] 685 mov r3, r2, lsr #8 /* r3 = ...1 */ 686 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 687 strh r3, [r0, #0x01] 688 bx lr 689 LMEMCPY_6_PAD 690 691/* 692 * 0111: dst is 8-bit aligned, src is 8-bit aligned 693 */ 694 ldrb r2, [r1] 695 ldrh r3, [r1, #0x01] 696 ldrh ip, [r1, #0x03] 697 ldrb r1, [r1, #0x05] 698 strb r2, [r0] 699 strh r3, [r0, #0x01] 700 strh ip, [r0, #0x03] 701 strb r1, [r0, #0x05] 702 bx lr 703 LMEMCPY_6_PAD 704 705/* 706 * 1000: dst is 16-bit aligned, src is 32-bit aligned 707 */ 708 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 709 ldr r3, [r1] /* r3 = 3210 */ 710 mov r2, r2, lsl #16 /* r2 = 54.. */ 711 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 712 strh r3, [r0] 713 str r2, [r0, #0x02] 714 bx lr 715 LMEMCPY_6_PAD 716 717/* 718 * 1001: dst is 16-bit aligned, src is 8-bit aligned 719 */ 720 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 721 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 722 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 723 mov r2, r2, lsl #8 /* r2 = 543. */ 724 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 725 strh r1, [r0] 726 str r2, [r0, #0x02] 727 bx lr 728 LMEMCPY_6_PAD 729 730/* 731 * 1010: dst is 16-bit aligned, src is 16-bit aligned 732 */ 733 ldrh r2, [r1] 734 ldr r3, [r1, #0x02] 735 strh r2, [r0] 736 str r3, [r0, #0x02] 737 bx lr 738 LMEMCPY_6_PAD 739 740/* 741 * 1011: dst is 16-bit aligned, src is 8-bit aligned 742 */ 743 ldrb r3, [r1] /* r3 = ...0 */ 744 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 745 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 746 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 747 mov r1, r1, lsl #24 /* r1 = 5... */ 748 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 749 strh r3, [r0] 750 str r1, [r0, #0x02] 751 bx lr 752 LMEMCPY_6_PAD 753 754/* 755 * 1100: dst is 8-bit aligned, src is 32-bit aligned 756 */ 757 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 758 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 759 strb r2, [r0] 760 mov r2, r2, lsr #8 /* r2 = .321 */ 761 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 762 mov r1, r1, lsr #8 /* r1 = ...5 */ 763 str r2, [r0, #0x01] 764 strb r1, [r0, #0x05] 765 bx lr 766 LMEMCPY_6_PAD 767 768/* 769 * 1101: dst is 8-bit aligned, src is 8-bit aligned 770 */ 771 ldrb r2, [r1] 772 ldrh r3, [r1, #0x01] 773 ldrh ip, [r1, #0x03] 774 ldrb r1, [r1, #0x05] 775 strb r2, [r0] 776 strh r3, [r0, #0x01] 777 strh ip, [r0, #0x03] 778 strb r1, [r0, #0x05] 779 bx lr 780 LMEMCPY_6_PAD 781 782/* 783 * 1110: dst is 8-bit aligned, src is 16-bit aligned 784 */ 785 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 786 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 787 strb r2, [r0] 788 mov r2, r2, lsr #8 /* r2 = ...1 */ 789 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 790 mov r1, r1, lsr #24 /* r1 = ...5 */ 791 str r2, [r0, #0x01] 792 strb r1, [r0, #0x05] 793 bx lr 794 LMEMCPY_6_PAD 795 796/* 797 * 1111: dst is 8-bit aligned, src is 8-bit aligned 798 */ 799 ldrb r2, [r1] 800 ldr r3, [r1, #0x01] 801 ldrb r1, [r1, #0x05] 802 strb r2, [r0] 803 str r3, [r0, #0x01] 804 strb r1, [r0, #0x05] 805 bx lr 806 LMEMCPY_6_PAD 807 808 809/****************************************************************************** 810 * Special case for 8 byte copies 811 */ 812#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 813#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 814 LMEMCPY_8_PAD 815.Lmemcpy_8: 816 and r2, r1, #0x03 817 orr r2, r2, r0, lsl #2 818 ands r2, r2, #0x0f 819 sub r3, pc, #0x14 820 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 821 822/* 823 * 0000: dst is 32-bit aligned, src is 32-bit aligned 824 */ 825 ldr r2, [r1] 826 ldr r3, [r1, #0x04] 827 str r2, [r0] 828 str r3, [r0, #0x04] 829 bx lr 830 LMEMCPY_8_PAD 831 832/* 833 * 0001: dst is 32-bit aligned, src is 8-bit aligned 834 */ 835 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 836 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 837 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 838 mov r3, r3, lsr #8 /* r3 = .210 */ 839 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 840 mov r1, r1, lsl #24 /* r1 = 7... */ 841 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 842 str r3, [r0] 843 str r2, [r0, #0x04] 844 bx lr 845 LMEMCPY_8_PAD 846 847/* 848 * 0010: dst is 32-bit aligned, src is 16-bit aligned 849 */ 850 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 851 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 852 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 853 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 854 mov r3, r3, lsr #16 /* r3 = ..54 */ 855 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 856 str r2, [r0] 857 str r3, [r0, #0x04] 858 bx lr 859 LMEMCPY_8_PAD 860 861/* 862 * 0011: dst is 32-bit aligned, src is 8-bit aligned 863 */ 864 ldrb r3, [r1] /* r3 = ...0 */ 865 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 866 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 867 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 868 mov r2, r2, lsr #24 /* r2 = ...4 */ 869 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 870 str r3, [r0] 871 str r2, [r0, #0x04] 872 bx lr 873 LMEMCPY_8_PAD 874 875/* 876 * 0100: dst is 8-bit aligned, src is 32-bit aligned 877 */ 878 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 879 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 880 strb r3, [r0] 881 mov r1, r2, lsr #24 /* r1 = ...7 */ 882 strb r1, [r0, #0x07] 883 mov r1, r3, lsr #8 /* r1 = .321 */ 884 mov r3, r3, lsr #24 /* r3 = ...3 */ 885 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 886 strh r1, [r0, #0x01] 887 str r3, [r0, #0x03] 888 bx lr 889 LMEMCPY_8_PAD 890 891/* 892 * 0101: dst is 8-bit aligned, src is 8-bit aligned 893 */ 894 ldrb r2, [r1] 895 ldrh r3, [r1, #0x01] 896 ldr ip, [r1, #0x03] 897 ldrb r1, [r1, #0x07] 898 strb r2, [r0] 899 strh r3, [r0, #0x01] 900 str ip, [r0, #0x03] 901 strb r1, [r0, #0x07] 902 bx lr 903 LMEMCPY_8_PAD 904 905/* 906 * 0110: dst is 8-bit aligned, src is 16-bit aligned 907 */ 908 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 909 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 910 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 911 strb r2, [r0] /* 0 */ 912 mov ip, r1, lsr #8 /* ip = ...7 */ 913 strb ip, [r0, #0x07] /* 7 */ 914 mov ip, r2, lsr #8 /* ip = ...1 */ 915 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 916 mov r3, r3, lsr #8 /* r3 = .543 */ 917 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 918 strh ip, [r0, #0x01] 919 str r3, [r0, #0x03] 920 bx lr 921 LMEMCPY_8_PAD 922 923/* 924 * 0111: dst is 8-bit aligned, src is 8-bit aligned 925 */ 926 ldrb r3, [r1] /* r3 = ...0 */ 927 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 928 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 929 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 930 strb r3, [r0] 931 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 932 strh ip, [r0, #0x01] 933 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 934 str r2, [r0, #0x03] 935 strb r1, [r0, #0x07] 936 bx lr 937 LMEMCPY_8_PAD 938 939/* 940 * 1000: dst is 16-bit aligned, src is 32-bit aligned 941 */ 942 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 943 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 944 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 945 strh r2, [r0] 946 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 947 mov r3, r3, lsr #16 /* r3 = ..76 */ 948 str r2, [r0, #0x02] 949 strh r3, [r0, #0x06] 950 bx lr 951 LMEMCPY_8_PAD 952 953/* 954 * 1001: dst is 16-bit aligned, src is 8-bit aligned 955 */ 956 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 957 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 958 ldrb ip, [r1, #0x07] /* ip = ...7 */ 959 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 960 strh r1, [r0] 961 mov r1, r2, lsr #24 /* r1 = ...2 */ 962 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 963 mov r3, r3, lsr #24 /* r3 = ...6 */ 964 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 965 str r1, [r0, #0x02] 966 strh r3, [r0, #0x06] 967 bx lr 968 LMEMCPY_8_PAD 969 970/* 971 * 1010: dst is 16-bit aligned, src is 16-bit aligned 972 */ 973 ldrh r2, [r1] 974 ldr ip, [r1, #0x02] 975 ldrh r3, [r1, #0x06] 976 strh r2, [r0] 977 str ip, [r0, #0x02] 978 strh r3, [r0, #0x06] 979 bx lr 980 LMEMCPY_8_PAD 981 982/* 983 * 1011: dst is 16-bit aligned, src is 8-bit aligned 984 */ 985 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 986 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 987 ldrb ip, [r1] /* ip = ...0 */ 988 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 989 strh r1, [r0, #0x06] 990 mov r3, r3, lsl #24 /* r3 = 5... */ 991 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 992 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 993 str r3, [r0, #0x02] 994 strh r2, [r0] 995 bx lr 996 LMEMCPY_8_PAD 997 998/* 999 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1000 */ 1001 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1002 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1003 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 1004 strh r1, [r0, #0x05] 1005 strb r2, [r0] 1006 mov r1, r3, lsr #24 /* r1 = ...7 */ 1007 strb r1, [r0, #0x07] 1008 mov r2, r2, lsr #8 /* r2 = .321 */ 1009 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 1010 str r2, [r0, #0x01] 1011 bx lr 1012 LMEMCPY_8_PAD 1013 1014/* 1015 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1016 */ 1017 ldrb r3, [r1] /* r3 = ...0 */ 1018 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 1019 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1020 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1021 strb r3, [r0] 1022 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 1023 strh r3, [r0, #0x05] 1024 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 1025 str r2, [r0, #0x01] 1026 strb r1, [r0, #0x07] 1027 bx lr 1028 LMEMCPY_8_PAD 1029 1030/* 1031 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1032 */ 1033 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1034 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1035 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1036 strb r2, [r0] 1037 mov ip, r2, lsr #8 /* ip = ...1 */ 1038 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1039 mov r2, r1, lsr #8 /* r2 = ...7 */ 1040 strb r2, [r0, #0x07] 1041 mov r1, r1, lsl #8 /* r1 = .76. */ 1042 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 1043 str ip, [r0, #0x01] 1044 strh r1, [r0, #0x05] 1045 bx lr 1046 LMEMCPY_8_PAD 1047 1048/* 1049 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1050 */ 1051 ldrb r2, [r1] 1052 ldr ip, [r1, #0x01] 1053 ldrh r3, [r1, #0x05] 1054 ldrb r1, [r1, #0x07] 1055 strb r2, [r0] 1056 str ip, [r0, #0x01] 1057 strh r3, [r0, #0x05] 1058 strb r1, [r0, #0x07] 1059 bx lr 1060 LMEMCPY_8_PAD 1061 1062/****************************************************************************** 1063 * Special case for 12 byte copies 1064 */ 1065#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 1066#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 1067 LMEMCPY_C_PAD 1068.Lmemcpy_c: 1069 and r2, r1, #0x03 1070 orr r2, r2, r0, lsl #2 1071 ands r2, r2, #0x0f 1072 sub r3, pc, #0x14 1073 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 1074 1075/* 1076 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1077 */ 1078 ldr r2, [r1] 1079 ldr r3, [r1, #0x04] 1080 ldr r1, [r1, #0x08] 1081 str r2, [r0] 1082 str r3, [r0, #0x04] 1083 str r1, [r0, #0x08] 1084 bx lr 1085 LMEMCPY_C_PAD 1086 1087/* 1088 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1089 */ 1090 ldrb r2, [r1, #0xb] /* r2 = ...B */ 1091 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1092 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1093 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1094 mov r2, r2, lsl #24 /* r2 = B... */ 1095 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 1096 str r2, [r0, #0x08] 1097 mov r2, ip, lsl #24 /* r2 = 7... */ 1098 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 1099 mov r1, r1, lsr #8 /* r1 = .210 */ 1100 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 1101 str r2, [r0, #0x04] 1102 str r1, [r0] 1103 bx lr 1104 LMEMCPY_C_PAD 1105 1106/* 1107 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1108 */ 1109 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1110 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1111 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1112 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1113 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1114 str r2, [r0] 1115 mov r3, r3, lsr #16 /* r3 = ..54 */ 1116 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 1117 mov r1, r1, lsl #16 /* r1 = BA.. */ 1118 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 1119 str r3, [r0, #0x04] 1120 str r1, [r0, #0x08] 1121 bx lr 1122 LMEMCPY_C_PAD 1123 1124/* 1125 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1126 */ 1127 ldrb r2, [r1] /* r2 = ...0 */ 1128 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1129 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1130 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1131 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1132 str r2, [r0] 1133 mov r3, r3, lsr #24 /* r3 = ...4 */ 1134 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 1135 mov r1, r1, lsl #8 /* r1 = BA9. */ 1136 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 1137 str r3, [r0, #0x04] 1138 str r1, [r0, #0x08] 1139 bx lr 1140 LMEMCPY_C_PAD 1141 1142/* 1143 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 1144 */ 1145 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1146 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1147 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 1148 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1149 strh r1, [r0, #0x01] 1150 strb r2, [r0] 1151 mov r1, r2, lsr #24 /* r1 = ...3 */ 1152 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 1153 mov r1, r3, lsr #24 /* r1 = ...7 */ 1154 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 1155 mov ip, ip, lsr #24 /* ip = ...B */ 1156 str r2, [r0, #0x03] 1157 str r1, [r0, #0x07] 1158 strb ip, [r0, #0x0b] 1159 bx lr 1160 LMEMCPY_C_PAD 1161 1162/* 1163 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 1164 */ 1165 ldrb r2, [r1] 1166 ldrh r3, [r1, #0x01] 1167 ldr ip, [r1, #0x03] 1168 strb r2, [r0] 1169 ldr r2, [r1, #0x07] 1170 ldrb r1, [r1, #0x0b] 1171 strh r3, [r0, #0x01] 1172 str ip, [r0, #0x03] 1173 str r2, [r0, #0x07] 1174 strb r1, [r0, #0x0b] 1175 bx lr 1176 LMEMCPY_C_PAD 1177 1178/* 1179 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 1180 */ 1181 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1182 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1183 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1184 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1185 strb r2, [r0] 1186 mov r2, r2, lsr #8 /* r2 = ...1 */ 1187 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1188 strh r2, [r0, #0x01] 1189 mov r2, r3, lsr #8 /* r2 = .543 */ 1190 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 1191 mov r2, ip, lsr #8 /* r2 = .987 */ 1192 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 1193 mov r1, r1, lsr #8 /* r1 = ...B */ 1194 str r3, [r0, #0x03] 1195 str r2, [r0, #0x07] 1196 strb r1, [r0, #0x0b] 1197 bx lr 1198 LMEMCPY_C_PAD 1199 1200/* 1201 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 1202 */ 1203 ldrb r2, [r1] 1204 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1205 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1206 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1207 strb r2, [r0] 1208 strh r3, [r0, #0x01] 1209 mov r3, r3, lsr #16 /* r3 = ..43 */ 1210 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 1211 mov ip, ip, lsr #16 /* ip = ..87 */ 1212 orr ip, ip, r1, lsl #16 /* ip = A987 */ 1213 mov r1, r1, lsr #16 /* r1 = ..xB */ 1214 str r3, [r0, #0x03] 1215 str ip, [r0, #0x07] 1216 strb r1, [r0, #0x0b] 1217 bx lr 1218 LMEMCPY_C_PAD 1219 1220/* 1221 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1222 */ 1223 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 1224 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1225 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 1226 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1227 strh ip, [r0] 1228 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 1229 mov r3, r3, lsr #16 /* r3 = ..76 */ 1230 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 1231 mov r2, r2, lsr #16 /* r2 = ..BA */ 1232 str r1, [r0, #0x02] 1233 str r3, [r0, #0x06] 1234 strh r2, [r0, #0x0a] 1235 bx lr 1236 LMEMCPY_C_PAD 1237 1238/* 1239 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 1240 */ 1241 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1242 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1243 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 1244 strh ip, [r0] 1245 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1246 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 1247 mov r2, r2, lsr #24 /* r2 = ...2 */ 1248 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 1249 mov r3, r3, lsr #24 /* r3 = ...6 */ 1250 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 1251 mov r1, r1, lsl #8 /* r1 = ..B. */ 1252 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 1253 str r2, [r0, #0x02] 1254 str r3, [r0, #0x06] 1255 strh r1, [r0, #0x0a] 1256 bx lr 1257 LMEMCPY_C_PAD 1258 1259/* 1260 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1261 */ 1262 ldrh r2, [r1] 1263 ldr r3, [r1, #0x02] 1264 ldr ip, [r1, #0x06] 1265 ldrh r1, [r1, #0x0a] 1266 strh r2, [r0] 1267 str r3, [r0, #0x02] 1268 str ip, [r0, #0x06] 1269 strh r1, [r0, #0x0a] 1270 bx lr 1271 LMEMCPY_C_PAD 1272 1273/* 1274 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 1275 */ 1276 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 1277 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 1278 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 1279 strh ip, [r0, #0x0a] 1280 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1281 ldrb r1, [r1] /* r1 = ...0 */ 1282 mov r2, r2, lsl #24 /* r2 = 9... */ 1283 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 1284 mov r3, r3, lsl #24 /* r3 = 5... */ 1285 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 1286 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 1287 str r2, [r0, #0x06] 1288 str r3, [r0, #0x02] 1289 strh r1, [r0] 1290 bx lr 1291 LMEMCPY_C_PAD 1292 1293/* 1294 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 1295 */ 1296 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1297 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 1298 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 1299 strb r2, [r0] 1300 mov r3, r2, lsr #8 /* r3 = .321 */ 1301 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 1302 str r3, [r0, #0x01] 1303 mov r3, ip, lsr #8 /* r3 = .765 */ 1304 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 1305 str r3, [r0, #0x05] 1306 mov r1, r1, lsr #8 /* r1 = .BA9 */ 1307 strh r1, [r0, #0x09] 1308 mov r1, r1, lsr #16 /* r1 = ...B */ 1309 strb r1, [r0, #0x0b] 1310 bx lr 1311 LMEMCPY_C_PAD 1312 1313/* 1314 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 1315 */ 1316 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 1317 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 1318 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1319 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1320 strb r2, [r0, #0x0b] 1321 mov r2, r3, lsr #16 /* r2 = ..A9 */ 1322 strh r2, [r0, #0x09] 1323 mov r3, r3, lsl #16 /* r3 = 87.. */ 1324 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 1325 mov ip, ip, lsl #16 /* ip = 43.. */ 1326 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 1327 mov r1, r1, lsr #8 /* r1 = .210 */ 1328 str r3, [r0, #0x05] 1329 str ip, [r0, #0x01] 1330 strb r1, [r0] 1331 bx lr 1332 LMEMCPY_C_PAD 1333 1334/* 1335 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 1336 */ 1337 ldrh r2, [r1] /* r2 = ..10 */ 1338 ldr r3, [r1, #0x02] /* r3 = 5432 */ 1339 ldr ip, [r1, #0x06] /* ip = 9876 */ 1340 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 1341 strb r2, [r0] 1342 mov r2, r2, lsr #8 /* r2 = ...1 */ 1343 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1344 mov r3, r3, lsr #24 /* r3 = ...5 */ 1345 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 1346 mov ip, ip, lsr #24 /* ip = ...9 */ 1347 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 1348 mov r1, r1, lsr #8 /* r1 = ...B */ 1349 str r2, [r0, #0x01] 1350 str r3, [r0, #0x05] 1351 strh ip, [r0, #0x09] 1352 strb r1, [r0, #0x0b] 1353 bx lr 1354 LMEMCPY_C_PAD 1355 1356/* 1357 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 1358 */ 1359 ldrb r2, [r1] 1360 ldr r3, [r1, #0x01] 1361 ldr ip, [r1, #0x05] 1362 strb r2, [r0] 1363 ldrh r2, [r1, #0x09] 1364 ldrb r1, [r1, #0x0b] 1365 str r3, [r0, #0x01] 1366 str ip, [r0, #0x05] 1367 strh r2, [r0, #0x09] 1368 strb r1, [r0, #0x0b] 1369 bx lr 1370#endif /* !_STANDALONE */ 1371END(memcpy) 1372 1373 .section .note.GNU-stack,"",%progbits 1374