1/* $NetBSD: memcpy_xscale.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */ 2 3/* 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <machine/asm.h> 39__FBSDID("$FreeBSD$"); 40 41.syntax unified 42 43/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 44ENTRY(memcpy) 45 pld [r1] 46 cmp r2, #0x0c 47 ble .Lmemcpy_short /* <= 12 bytes */ 48 mov r3, r0 /* We must not clobber r0 */ 49 50 /* Word-align the destination buffer */ 51 ands ip, r3, #0x03 /* Already word aligned? */ 52 beq .Lmemcpy_wordaligned /* Yup */ 53 cmp ip, #0x02 54 ldrb ip, [r1], #0x01 55 sub r2, r2, #0x01 56 strb ip, [r3], #0x01 57 ldrble ip, [r1], #0x01 58 suble r2, r2, #0x01 59 strble ip, [r3], #0x01 60 ldrblt ip, [r1], #0x01 61 sublt r2, r2, #0x01 62 strblt ip, [r3], #0x01 63 64 /* Destination buffer is now word aligned */ 65.Lmemcpy_wordaligned: 66 ands ip, r1, #0x03 /* Is src also word-aligned? */ 67 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 68 69 /* Quad-align the destination buffer */ 70 tst r3, #0x07 /* Already quad aligned? */ 71 ldrne ip, [r1], #0x04 72 stmfd sp!, {r4-r9} /* Free up some registers */ 73 subne r2, r2, #0x04 74 strne ip, [r3], #0x04 75 76 /* Destination buffer quad aligned, source is at least word aligned */ 77 subs r2, r2, #0x80 78 blt .Lmemcpy_w_lessthan128 79 80 /* Copy 128 bytes at a time */ 81.Lmemcpy_w_loop128: 82 ldr r4, [r1], #0x04 /* LD:00-03 */ 83 ldr r5, [r1], #0x04 /* LD:04-07 */ 84 pld [r1, #0x18] /* Prefetch 0x20 */ 85 ldr r6, [r1], #0x04 /* LD:08-0b */ 86 ldr r7, [r1], #0x04 /* LD:0c-0f */ 87 ldr r8, [r1], #0x04 /* LD:10-13 */ 88 ldr r9, [r1], #0x04 /* LD:14-17 */ 89 strd r4, [r3], #0x08 /* ST:00-07 */ 90 ldr r4, [r1], #0x04 /* LD:18-1b */ 91 ldr r5, [r1], #0x04 /* LD:1c-1f */ 92 strd r6, [r3], #0x08 /* ST:08-0f */ 93 ldr r6, [r1], #0x04 /* LD:20-23 */ 94 ldr r7, [r1], #0x04 /* LD:24-27 */ 95 pld [r1, #0x18] /* Prefetch 0x40 */ 96 strd r8, [r3], #0x08 /* ST:10-17 */ 97 ldr r8, [r1], #0x04 /* LD:28-2b */ 98 ldr r9, [r1], #0x04 /* LD:2c-2f */ 99 strd r4, [r3], #0x08 /* ST:18-1f */ 100 ldr r4, [r1], #0x04 /* LD:30-33 */ 101 ldr r5, [r1], #0x04 /* LD:34-37 */ 102 strd r6, [r3], #0x08 /* ST:20-27 */ 103 ldr r6, [r1], #0x04 /* LD:38-3b */ 104 ldr r7, [r1], #0x04 /* LD:3c-3f */ 105 strd r8, [r3], #0x08 /* ST:28-2f */ 106 ldr r8, [r1], #0x04 /* LD:40-43 */ 107 ldr r9, [r1], #0x04 /* LD:44-47 */ 108 pld [r1, #0x18] /* Prefetch 0x60 */ 109 strd r4, [r3], #0x08 /* ST:30-37 */ 110 ldr r4, [r1], #0x04 /* LD:48-4b */ 111 ldr r5, [r1], #0x04 /* LD:4c-4f */ 112 strd r6, [r3], #0x08 /* ST:38-3f */ 113 ldr r6, [r1], #0x04 /* LD:50-53 */ 114 ldr r7, [r1], #0x04 /* LD:54-57 */ 115 strd r8, [r3], #0x08 /* ST:40-47 */ 116 ldr r8, [r1], #0x04 /* LD:58-5b */ 117 ldr r9, [r1], #0x04 /* LD:5c-5f */ 118 strd r4, [r3], #0x08 /* ST:48-4f */ 119 ldr r4, [r1], #0x04 /* LD:60-63 */ 120 ldr r5, [r1], #0x04 /* LD:64-67 */ 121 pld [r1, #0x18] /* Prefetch 0x80 */ 122 strd r6, [r3], #0x08 /* ST:50-57 */ 123 ldr r6, [r1], #0x04 /* LD:68-6b */ 124 ldr r7, [r1], #0x04 /* LD:6c-6f */ 125 strd r8, [r3], #0x08 /* ST:58-5f */ 126 ldr r8, [r1], #0x04 /* LD:70-73 */ 127 ldr r9, [r1], #0x04 /* LD:74-77 */ 128 strd r4, [r3], #0x08 /* ST:60-67 */ 129 ldr r4, [r1], #0x04 /* LD:78-7b */ 130 ldr r5, [r1], #0x04 /* LD:7c-7f */ 131 strd r6, [r3], #0x08 /* ST:68-6f */ 132 strd r8, [r3], #0x08 /* ST:70-77 */ 133 subs r2, r2, #0x80 134 strd r4, [r3], #0x08 /* ST:78-7f */ 135 bge .Lmemcpy_w_loop128 136 137.Lmemcpy_w_lessthan128: 138 adds r2, r2, #0x80 /* Adjust for extra sub */ 139 ldmfdeq sp!, {r4-r9} 140 bxeq lr /* Return now if done */ 141 subs r2, r2, #0x20 142 blt .Lmemcpy_w_lessthan32 143 144 /* Copy 32 bytes at a time */ 145.Lmemcpy_w_loop32: 146 ldr r4, [r1], #0x04 147 ldr r5, [r1], #0x04 148 pld [r1, #0x18] 149 ldr r6, [r1], #0x04 150 ldr r7, [r1], #0x04 151 ldr r8, [r1], #0x04 152 ldr r9, [r1], #0x04 153 strd r4, [r3], #0x08 154 ldr r4, [r1], #0x04 155 ldr r5, [r1], #0x04 156 strd r6, [r3], #0x08 157 strd r8, [r3], #0x08 158 subs r2, r2, #0x20 159 strd r4, [r3], #0x08 160 bge .Lmemcpy_w_loop32 161 162.Lmemcpy_w_lessthan32: 163 adds r2, r2, #0x20 /* Adjust for extra sub */ 164 ldmfdeq sp!, {r4-r9} 165 bxeq lr /* Return now if done */ 166 167 and r4, r2, #0x18 168 rsbs r4, r4, #0x18 169 addne pc, pc, r4, lsl #1 170 nop 171 172 /* At least 24 bytes remaining */ 173 ldr r4, [r1], #0x04 174 ldr r5, [r1], #0x04 175 sub r2, r2, #0x08 176 strd r4, [r3], #0x08 177 178 /* At least 16 bytes remaining */ 179 ldr r4, [r1], #0x04 180 ldr r5, [r1], #0x04 181 sub r2, r2, #0x08 182 strd r4, [r3], #0x08 183 184 /* At least 8 bytes remaining */ 185 ldr r4, [r1], #0x04 186 ldr r5, [r1], #0x04 187 subs r2, r2, #0x08 188 strd r4, [r3], #0x08 189 190 /* Less than 8 bytes remaining */ 191 ldmfd sp!, {r4-r9} 192 bxeq lr /* Return now if done */ 193 subs r2, r2, #0x04 194 ldrge ip, [r1], #0x04 195 strge ip, [r3], #0x04 196 bxeq lr /* Return now if done */ 197 addlt r2, r2, #0x04 198 ldrb ip, [r1], #0x01 199 cmp r2, #0x02 200 ldrbge r2, [r1], #0x01 201 strb ip, [r3], #0x01 202 ldrbgt ip, [r1] 203 strbge r2, [r3], #0x01 204 strbgt ip, [r3] 205 bx lr 206 207 208/* 209 * At this point, it has not been possible to word align both buffers. 210 * The destination buffer is word aligned, but the source buffer is not. 211 */ 212.Lmemcpy_bad_align: 213 stmfd sp!, {r4-r7} 214 bic r1, r1, #0x03 215 cmp ip, #2 216 ldr ip, [r1], #0x04 217 bgt .Lmemcpy_bad3 218 beq .Lmemcpy_bad2 219 b .Lmemcpy_bad1 220 221.Lmemcpy_bad1_loop16: 222 mov r4, ip, lsr #8 223 ldr r5, [r1], #0x04 224 pld [r1, #0x018] 225 ldr r6, [r1], #0x04 226 ldr r7, [r1], #0x04 227 ldr ip, [r1], #0x04 228 orr r4, r4, r5, lsl #24 229 mov r5, r5, lsr #8 230 orr r5, r5, r6, lsl #24 231 mov r6, r6, lsr #8 232 orr r6, r6, r7, lsl #24 233 mov r7, r7, lsr #8 234 orr r7, r7, ip, lsl #24 235 str r4, [r3], #0x04 236 str r5, [r3], #0x04 237 str r6, [r3], #0x04 238 str r7, [r3], #0x04 239.Lmemcpy_bad1: 240 subs r2, r2, #0x10 241 bge .Lmemcpy_bad1_loop16 242 243 adds r2, r2, #0x10 244 ldmfdeq sp!, {r4-r7} 245 bxeq lr /* Return now if done */ 246 subs r2, r2, #0x04 247 sublt r1, r1, #0x03 248 blt .Lmemcpy_bad_done 249 250.Lmemcpy_bad1_loop4: 251 mov r4, ip, lsr #8 252 ldr ip, [r1], #0x04 253 subs r2, r2, #0x04 254 orr r4, r4, ip, lsl #24 255 str r4, [r3], #0x04 256 bge .Lmemcpy_bad1_loop4 257 sub r1, r1, #0x03 258 b .Lmemcpy_bad_done 259 260.Lmemcpy_bad2_loop16: 261 mov r4, ip, lsr #16 262 ldr r5, [r1], #0x04 263 pld [r1, #0x018] 264 ldr r6, [r1], #0x04 265 ldr r7, [r1], #0x04 266 ldr ip, [r1], #0x04 267 orr r4, r4, r5, lsl #16 268 mov r5, r5, lsr #16 269 orr r5, r5, r6, lsl #16 270 mov r6, r6, lsr #16 271 orr r6, r6, r7, lsl #16 272 mov r7, r7, lsr #16 273 orr r7, r7, ip, lsl #16 274 str r4, [r3], #0x04 275 str r5, [r3], #0x04 276 str r6, [r3], #0x04 277 str r7, [r3], #0x04 278.Lmemcpy_bad2: 279 subs r2, r2, #0x10 280 bge .Lmemcpy_bad2_loop16 281 282 adds r2, r2, #0x10 283 ldmfdeq sp!, {r4-r7} 284 bxeq lr /* Return now if done */ 285 subs r2, r2, #0x04 286 sublt r1, r1, #0x02 287 blt .Lmemcpy_bad_done 288 289.Lmemcpy_bad2_loop4: 290 mov r4, ip, lsr #16 291 ldr ip, [r1], #0x04 292 subs r2, r2, #0x04 293 orr r4, r4, ip, lsl #16 294 str r4, [r3], #0x04 295 bge .Lmemcpy_bad2_loop4 296 sub r1, r1, #0x02 297 b .Lmemcpy_bad_done 298 299.Lmemcpy_bad3_loop16: 300 mov r4, ip, lsr #24 301 ldr r5, [r1], #0x04 302 pld [r1, #0x018] 303 ldr r6, [r1], #0x04 304 ldr r7, [r1], #0x04 305 ldr ip, [r1], #0x04 306 orr r4, r4, r5, lsl #8 307 mov r5, r5, lsr #24 308 orr r5, r5, r6, lsl #8 309 mov r6, r6, lsr #24 310 orr r6, r6, r7, lsl #8 311 mov r7, r7, lsr #24 312 orr r7, r7, ip, lsl #8 313 str r4, [r3], #0x04 314 str r5, [r3], #0x04 315 str r6, [r3], #0x04 316 str r7, [r3], #0x04 317.Lmemcpy_bad3: 318 subs r2, r2, #0x10 319 bge .Lmemcpy_bad3_loop16 320 321 adds r2, r2, #0x10 322 ldmfdeq sp!, {r4-r7} 323 bxeq lr /* Return now if done */ 324 subs r2, r2, #0x04 325 sublt r1, r1, #0x01 326 blt .Lmemcpy_bad_done 327 328.Lmemcpy_bad3_loop4: 329 mov r4, ip, lsr #24 330 ldr ip, [r1], #0x04 331 subs r2, r2, #0x04 332 orr r4, r4, ip, lsl #8 333 str r4, [r3], #0x04 334 bge .Lmemcpy_bad3_loop4 335 sub r1, r1, #0x01 336 337.Lmemcpy_bad_done: 338 ldmfd sp!, {r4-r7} 339 adds r2, r2, #0x04 340 bxeq lr 341 ldrb ip, [r1], #0x01 342 cmp r2, #0x02 343 ldrbge r2, [r1], #0x01 344 strb ip, [r3], #0x01 345 ldrbgt ip, [r1] 346 strbge r2, [r3], #0x01 347 strbgt ip, [r3] 348 bx lr 349 350 351/* 352 * Handle short copies (less than 16 bytes), possibly misaligned. 353 * Some of these are *very* common, thanks to the network stack, 354 * and so are handled specially. 355 */ 356.Lmemcpy_short: 357#ifndef _STANDALONE 358 add pc, pc, r2, lsl #2 359 nop 360 bx lr /* 0x00 */ 361 b .Lmemcpy_bytewise /* 0x01 */ 362 b .Lmemcpy_bytewise /* 0x02 */ 363 b .Lmemcpy_bytewise /* 0x03 */ 364 b .Lmemcpy_4 /* 0x04 */ 365 b .Lmemcpy_bytewise /* 0x05 */ 366 b .Lmemcpy_6 /* 0x06 */ 367 b .Lmemcpy_bytewise /* 0x07 */ 368 b .Lmemcpy_8 /* 0x08 */ 369 b .Lmemcpy_bytewise /* 0x09 */ 370 b .Lmemcpy_bytewise /* 0x0a */ 371 b .Lmemcpy_bytewise /* 0x0b */ 372 b .Lmemcpy_c /* 0x0c */ 373#endif 374.Lmemcpy_bytewise: 375 mov r3, r0 /* We must not clobber r0 */ 376 ldrb ip, [r1], #0x01 3771: subs r2, r2, #0x01 378 strb ip, [r3], #0x01 379 ldrbne ip, [r1], #0x01 380 bne 1b 381 bx lr 382 383#ifndef _STANDALONE 384/****************************************************************************** 385 * Special case for 4 byte copies 386 */ 387#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 388#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 389 LMEMCPY_4_PAD 390.Lmemcpy_4: 391 and r2, r1, #0x03 392 orr r2, r2, r0, lsl #2 393 ands r2, r2, #0x0f 394 sub r3, pc, #0x14 395 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 396 397/* 398 * 0000: dst is 32-bit aligned, src is 32-bit aligned 399 */ 400 ldr r2, [r1] 401 str r2, [r0] 402 bx lr 403 LMEMCPY_4_PAD 404 405/* 406 * 0001: dst is 32-bit aligned, src is 8-bit aligned 407 */ 408 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 409 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 410 mov r3, r3, lsr #8 /* r3 = .210 */ 411 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 412 str r3, [r0] 413 bx lr 414 LMEMCPY_4_PAD 415 416/* 417 * 0010: dst is 32-bit aligned, src is 16-bit aligned 418 */ 419 ldrh r3, [r1, #0x02] 420 ldrh r2, [r1] 421 orr r3, r2, r3, lsl #16 422 str r3, [r0] 423 bx lr 424 LMEMCPY_4_PAD 425 426/* 427 * 0011: dst is 32-bit aligned, src is 8-bit aligned 428 */ 429 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 430 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 431 mov r3, r3, lsr #24 /* r3 = ...0 */ 432 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 433 str r3, [r0] 434 bx lr 435 LMEMCPY_4_PAD 436 437/* 438 * 0100: dst is 8-bit aligned, src is 32-bit aligned 439 */ 440 ldr r2, [r1] 441 strb r2, [r0] 442 mov r3, r2, lsr #8 443 mov r1, r2, lsr #24 444 strb r1, [r0, #0x03] 445 strh r3, [r0, #0x01] 446 bx lr 447 LMEMCPY_4_PAD 448 449/* 450 * 0101: dst is 8-bit aligned, src is 8-bit aligned 451 */ 452 ldrb r2, [r1] 453 ldrh r3, [r1, #0x01] 454 ldrb r1, [r1, #0x03] 455 strb r2, [r0] 456 strh r3, [r0, #0x01] 457 strb r1, [r0, #0x03] 458 bx lr 459 LMEMCPY_4_PAD 460 461/* 462 * 0110: dst is 8-bit aligned, src is 16-bit aligned 463 */ 464 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 465 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 466 strb r2, [r0] 467 mov r2, r2, lsr #8 /* r2 = ...1 */ 468 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 469 mov r3, r3, lsr #8 /* r3 = ...3 */ 470 strh r2, [r0, #0x01] 471 strb r3, [r0, #0x03] 472 bx lr 473 LMEMCPY_4_PAD 474 475/* 476 * 0111: dst is 8-bit aligned, src is 8-bit aligned 477 */ 478 ldrb r2, [r1] 479 ldrh r3, [r1, #0x01] 480 ldrb r1, [r1, #0x03] 481 strb r2, [r0] 482 strh r3, [r0, #0x01] 483 strb r1, [r0, #0x03] 484 bx lr 485 LMEMCPY_4_PAD 486 487/* 488 * 1000: dst is 16-bit aligned, src is 32-bit aligned 489 */ 490 ldr r2, [r1] 491 strh r2, [r0] 492 mov r3, r2, lsr #16 493 strh r3, [r0, #0x02] 494 bx lr 495 LMEMCPY_4_PAD 496 497/* 498 * 1001: dst is 16-bit aligned, src is 8-bit aligned 499 */ 500 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 501 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 502 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 503 strh r1, [r0] 504 mov r2, r2, lsr #24 /* r2 = ...2 */ 505 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 506 strh r2, [r0, #0x02] 507 bx lr 508 LMEMCPY_4_PAD 509 510/* 511 * 1010: dst is 16-bit aligned, src is 16-bit aligned 512 */ 513 ldrh r2, [r1] 514 ldrh r3, [r1, #0x02] 515 strh r2, [r0] 516 strh r3, [r0, #0x02] 517 bx lr 518 LMEMCPY_4_PAD 519 520/* 521 * 1011: dst is 16-bit aligned, src is 8-bit aligned 522 */ 523 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 524 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 525 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 526 strh r1, [r0, #0x02] 527 mov r3, r3, lsl #8 /* r3 = 321. */ 528 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 529 strh r3, [r0] 530 bx lr 531 LMEMCPY_4_PAD 532 533/* 534 * 1100: dst is 8-bit aligned, src is 32-bit aligned 535 */ 536 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 537 strb r2, [r0] 538 mov r3, r2, lsr #8 539 mov r1, r2, lsr #24 540 strh r3, [r0, #0x01] 541 strb r1, [r0, #0x03] 542 bx lr 543 LMEMCPY_4_PAD 544 545/* 546 * 1101: dst is 8-bit aligned, src is 8-bit aligned 547 */ 548 ldrb r2, [r1] 549 ldrh r3, [r1, #0x01] 550 ldrb r1, [r1, #0x03] 551 strb r2, [r0] 552 strh r3, [r0, #0x01] 553 strb r1, [r0, #0x03] 554 bx lr 555 LMEMCPY_4_PAD 556 557/* 558 * 1110: dst is 8-bit aligned, src is 16-bit aligned 559 */ 560 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 561 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 562 strb r2, [r0] 563 mov r2, r2, lsr #8 /* r2 = ...1 */ 564 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 565 strh r2, [r0, #0x01] 566 mov r3, r3, lsr #8 /* r3 = ...3 */ 567 strb r3, [r0, #0x03] 568 bx lr 569 LMEMCPY_4_PAD 570 571/* 572 * 1111: dst is 8-bit aligned, src is 8-bit aligned 573 */ 574 ldrb r2, [r1] 575 ldrh r3, [r1, #0x01] 576 ldrb r1, [r1, #0x03] 577 strb r2, [r0] 578 strh r3, [r0, #0x01] 579 strb r1, [r0, #0x03] 580 bx lr 581 LMEMCPY_4_PAD 582 583 584/****************************************************************************** 585 * Special case for 6 byte copies 586 */ 587#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 588#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 589 LMEMCPY_6_PAD 590.Lmemcpy_6: 591 and r2, r1, #0x03 592 orr r2, r2, r0, lsl #2 593 ands r2, r2, #0x0f 594 sub r3, pc, #0x14 595 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 596 597/* 598 * 0000: dst is 32-bit aligned, src is 32-bit aligned 599 */ 600 ldr r2, [r1] 601 ldrh r3, [r1, #0x04] 602 str r2, [r0] 603 strh r3, [r0, #0x04] 604 bx lr 605 LMEMCPY_6_PAD 606 607/* 608 * 0001: dst is 32-bit aligned, src is 8-bit aligned 609 */ 610 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 611 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 612 mov r2, r2, lsr #8 /* r2 = .210 */ 613 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 614 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 615 str r2, [r0] 616 strh r3, [r0, #0x04] 617 bx lr 618 LMEMCPY_6_PAD 619 620/* 621 * 0010: dst is 32-bit aligned, src is 16-bit aligned 622 */ 623 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 624 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 625 mov r1, r3, lsr #16 /* r1 = ..54 */ 626 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 627 str r2, [r0] 628 strh r1, [r0, #0x04] 629 bx lr 630 LMEMCPY_6_PAD 631 632/* 633 * 0011: dst is 32-bit aligned, src is 8-bit aligned 634 */ 635 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 636 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 637 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 638 mov r2, r2, lsr #24 /* r2 = ...0 */ 639 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 640 mov r1, r1, lsl #8 /* r1 = xx5. */ 641 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 642 str r2, [r0] 643 strh r1, [r0, #0x04] 644 bx lr 645 LMEMCPY_6_PAD 646 647/* 648 * 0100: dst is 8-bit aligned, src is 32-bit aligned 649 */ 650 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 651 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 652 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 653 strh r1, [r0, #0x01] 654 strb r3, [r0] 655 mov r3, r3, lsr #24 /* r3 = ...3 */ 656 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 657 mov r2, r2, lsr #8 /* r2 = ...5 */ 658 strh r3, [r0, #0x03] 659 strb r2, [r0, #0x05] 660 bx lr 661 LMEMCPY_6_PAD 662 663/* 664 * 0101: dst is 8-bit aligned, src is 8-bit aligned 665 */ 666 ldrb r2, [r1] 667 ldrh r3, [r1, #0x01] 668 ldrh ip, [r1, #0x03] 669 ldrb r1, [r1, #0x05] 670 strb r2, [r0] 671 strh r3, [r0, #0x01] 672 strh ip, [r0, #0x03] 673 strb r1, [r0, #0x05] 674 bx lr 675 LMEMCPY_6_PAD 676 677/* 678 * 0110: dst is 8-bit aligned, src is 16-bit aligned 679 */ 680 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 681 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 682 strb r2, [r0] 683 mov r3, r1, lsr #24 684 strb r3, [r0, #0x05] 685 mov r3, r1, lsr #8 /* r3 = .543 */ 686 strh r3, [r0, #0x03] 687 mov r3, r2, lsr #8 /* r3 = ...1 */ 688 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 689 strh r3, [r0, #0x01] 690 bx lr 691 LMEMCPY_6_PAD 692 693/* 694 * 0111: dst is 8-bit aligned, src is 8-bit aligned 695 */ 696 ldrb r2, [r1] 697 ldrh r3, [r1, #0x01] 698 ldrh ip, [r1, #0x03] 699 ldrb r1, [r1, #0x05] 700 strb r2, [r0] 701 strh r3, [r0, #0x01] 702 strh ip, [r0, #0x03] 703 strb r1, [r0, #0x05] 704 bx lr 705 LMEMCPY_6_PAD 706 707/* 708 * 1000: dst is 16-bit aligned, src is 32-bit aligned 709 */ 710 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 711 ldr r3, [r1] /* r3 = 3210 */ 712 mov r2, r2, lsl #16 /* r2 = 54.. */ 713 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 714 strh r3, [r0] 715 str r2, [r0, #0x02] 716 bx lr 717 LMEMCPY_6_PAD 718 719/* 720 * 1001: dst is 16-bit aligned, src is 8-bit aligned 721 */ 722 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 723 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 724 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 725 mov r2, r2, lsl #8 /* r2 = 543. */ 726 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 727 strh r1, [r0] 728 str r2, [r0, #0x02] 729 bx lr 730 LMEMCPY_6_PAD 731 732/* 733 * 1010: dst is 16-bit aligned, src is 16-bit aligned 734 */ 735 ldrh r2, [r1] 736 ldr r3, [r1, #0x02] 737 strh r2, [r0] 738 str r3, [r0, #0x02] 739 bx lr 740 LMEMCPY_6_PAD 741 742/* 743 * 1011: dst is 16-bit aligned, src is 8-bit aligned 744 */ 745 ldrb r3, [r1] /* r3 = ...0 */ 746 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 747 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 748 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 749 mov r1, r1, lsl #24 /* r1 = 5... */ 750 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 751 strh r3, [r0] 752 str r1, [r0, #0x02] 753 bx lr 754 LMEMCPY_6_PAD 755 756/* 757 * 1100: dst is 8-bit aligned, src is 32-bit aligned 758 */ 759 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 760 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 761 strb r2, [r0] 762 mov r2, r2, lsr #8 /* r2 = .321 */ 763 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 764 mov r1, r1, lsr #8 /* r1 = ...5 */ 765 str r2, [r0, #0x01] 766 strb r1, [r0, #0x05] 767 bx lr 768 LMEMCPY_6_PAD 769 770/* 771 * 1101: dst is 8-bit aligned, src is 8-bit aligned 772 */ 773 ldrb r2, [r1] 774 ldrh r3, [r1, #0x01] 775 ldrh ip, [r1, #0x03] 776 ldrb r1, [r1, #0x05] 777 strb r2, [r0] 778 strh r3, [r0, #0x01] 779 strh ip, [r0, #0x03] 780 strb r1, [r0, #0x05] 781 bx lr 782 LMEMCPY_6_PAD 783 784/* 785 * 1110: dst is 8-bit aligned, src is 16-bit aligned 786 */ 787 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 788 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 789 strb r2, [r0] 790 mov r2, r2, lsr #8 /* r2 = ...1 */ 791 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 792 mov r1, r1, lsr #24 /* r1 = ...5 */ 793 str r2, [r0, #0x01] 794 strb r1, [r0, #0x05] 795 bx lr 796 LMEMCPY_6_PAD 797 798/* 799 * 1111: dst is 8-bit aligned, src is 8-bit aligned 800 */ 801 ldrb r2, [r1] 802 ldr r3, [r1, #0x01] 803 ldrb r1, [r1, #0x05] 804 strb r2, [r0] 805 str r3, [r0, #0x01] 806 strb r1, [r0, #0x05] 807 bx lr 808 LMEMCPY_6_PAD 809 810 811/****************************************************************************** 812 * Special case for 8 byte copies 813 */ 814#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 815#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 816 LMEMCPY_8_PAD 817.Lmemcpy_8: 818 and r2, r1, #0x03 819 orr r2, r2, r0, lsl #2 820 ands r2, r2, #0x0f 821 sub r3, pc, #0x14 822 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 823 824/* 825 * 0000: dst is 32-bit aligned, src is 32-bit aligned 826 */ 827 ldr r2, [r1] 828 ldr r3, [r1, #0x04] 829 str r2, [r0] 830 str r3, [r0, #0x04] 831 bx lr 832 LMEMCPY_8_PAD 833 834/* 835 * 0001: dst is 32-bit aligned, src is 8-bit aligned 836 */ 837 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 838 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 839 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 840 mov r3, r3, lsr #8 /* r3 = .210 */ 841 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 842 mov r1, r1, lsl #24 /* r1 = 7... */ 843 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 844 str r3, [r0] 845 str r2, [r0, #0x04] 846 bx lr 847 LMEMCPY_8_PAD 848 849/* 850 * 0010: dst is 32-bit aligned, src is 16-bit aligned 851 */ 852 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 853 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 854 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 855 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 856 mov r3, r3, lsr #16 /* r3 = ..54 */ 857 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 858 str r2, [r0] 859 str r3, [r0, #0x04] 860 bx lr 861 LMEMCPY_8_PAD 862 863/* 864 * 0011: dst is 32-bit aligned, src is 8-bit aligned 865 */ 866 ldrb r3, [r1] /* r3 = ...0 */ 867 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 868 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 869 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 870 mov r2, r2, lsr #24 /* r2 = ...4 */ 871 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 872 str r3, [r0] 873 str r2, [r0, #0x04] 874 bx lr 875 LMEMCPY_8_PAD 876 877/* 878 * 0100: dst is 8-bit aligned, src is 32-bit aligned 879 */ 880 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 881 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 882 strb r3, [r0] 883 mov r1, r2, lsr #24 /* r1 = ...7 */ 884 strb r1, [r0, #0x07] 885 mov r1, r3, lsr #8 /* r1 = .321 */ 886 mov r3, r3, lsr #24 /* r3 = ...3 */ 887 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 888 strh r1, [r0, #0x01] 889 str r3, [r0, #0x03] 890 bx lr 891 LMEMCPY_8_PAD 892 893/* 894 * 0101: dst is 8-bit aligned, src is 8-bit aligned 895 */ 896 ldrb r2, [r1] 897 ldrh r3, [r1, #0x01] 898 ldr ip, [r1, #0x03] 899 ldrb r1, [r1, #0x07] 900 strb r2, [r0] 901 strh r3, [r0, #0x01] 902 str ip, [r0, #0x03] 903 strb r1, [r0, #0x07] 904 bx lr 905 LMEMCPY_8_PAD 906 907/* 908 * 0110: dst is 8-bit aligned, src is 16-bit aligned 909 */ 910 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 911 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 912 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 913 strb r2, [r0] /* 0 */ 914 mov ip, r1, lsr #8 /* ip = ...7 */ 915 strb ip, [r0, #0x07] /* 7 */ 916 mov ip, r2, lsr #8 /* ip = ...1 */ 917 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 918 mov r3, r3, lsr #8 /* r3 = .543 */ 919 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 920 strh ip, [r0, #0x01] 921 str r3, [r0, #0x03] 922 bx lr 923 LMEMCPY_8_PAD 924 925/* 926 * 0111: dst is 8-bit aligned, src is 8-bit aligned 927 */ 928 ldrb r3, [r1] /* r3 = ...0 */ 929 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 930 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 931 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 932 strb r3, [r0] 933 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 934 strh ip, [r0, #0x01] 935 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 936 str r2, [r0, #0x03] 937 strb r1, [r0, #0x07] 938 bx lr 939 LMEMCPY_8_PAD 940 941/* 942 * 1000: dst is 16-bit aligned, src is 32-bit aligned 943 */ 944 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 945 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 946 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 947 strh r2, [r0] 948 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 949 mov r3, r3, lsr #16 /* r3 = ..76 */ 950 str r2, [r0, #0x02] 951 strh r3, [r0, #0x06] 952 bx lr 953 LMEMCPY_8_PAD 954 955/* 956 * 1001: dst is 16-bit aligned, src is 8-bit aligned 957 */ 958 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 959 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 960 ldrb ip, [r1, #0x07] /* ip = ...7 */ 961 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 962 strh r1, [r0] 963 mov r1, r2, lsr #24 /* r1 = ...2 */ 964 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 965 mov r3, r3, lsr #24 /* r3 = ...6 */ 966 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 967 str r1, [r0, #0x02] 968 strh r3, [r0, #0x06] 969 bx lr 970 LMEMCPY_8_PAD 971 972/* 973 * 1010: dst is 16-bit aligned, src is 16-bit aligned 974 */ 975 ldrh r2, [r1] 976 ldr ip, [r1, #0x02] 977 ldrh r3, [r1, #0x06] 978 strh r2, [r0] 979 str ip, [r0, #0x02] 980 strh r3, [r0, #0x06] 981 bx lr 982 LMEMCPY_8_PAD 983 984/* 985 * 1011: dst is 16-bit aligned, src is 8-bit aligned 986 */ 987 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 988 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 989 ldrb ip, [r1] /* ip = ...0 */ 990 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 991 strh r1, [r0, #0x06] 992 mov r3, r3, lsl #24 /* r3 = 5... */ 993 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 994 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 995 str r3, [r0, #0x02] 996 strh r2, [r0] 997 bx lr 998 LMEMCPY_8_PAD 999 1000/* 1001 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1002 */ 1003 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1004 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1005 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 1006 strh r1, [r0, #0x05] 1007 strb r2, [r0] 1008 mov r1, r3, lsr #24 /* r1 = ...7 */ 1009 strb r1, [r0, #0x07] 1010 mov r2, r2, lsr #8 /* r2 = .321 */ 1011 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 1012 str r2, [r0, #0x01] 1013 bx lr 1014 LMEMCPY_8_PAD 1015 1016/* 1017 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1018 */ 1019 ldrb r3, [r1] /* r3 = ...0 */ 1020 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 1021 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1022 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1023 strb r3, [r0] 1024 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 1025 strh r3, [r0, #0x05] 1026 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 1027 str r2, [r0, #0x01] 1028 strb r1, [r0, #0x07] 1029 bx lr 1030 LMEMCPY_8_PAD 1031 1032/* 1033 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1034 */ 1035 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1036 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1037 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1038 strb r2, [r0] 1039 mov ip, r2, lsr #8 /* ip = ...1 */ 1040 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1041 mov r2, r1, lsr #8 /* r2 = ...7 */ 1042 strb r2, [r0, #0x07] 1043 mov r1, r1, lsl #8 /* r1 = .76. */ 1044 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 1045 str ip, [r0, #0x01] 1046 strh r1, [r0, #0x05] 1047 bx lr 1048 LMEMCPY_8_PAD 1049 1050/* 1051 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1052 */ 1053 ldrb r2, [r1] 1054 ldr ip, [r1, #0x01] 1055 ldrh r3, [r1, #0x05] 1056 ldrb r1, [r1, #0x07] 1057 strb r2, [r0] 1058 str ip, [r0, #0x01] 1059 strh r3, [r0, #0x05] 1060 strb r1, [r0, #0x07] 1061 bx lr 1062 LMEMCPY_8_PAD 1063 1064/****************************************************************************** 1065 * Special case for 12 byte copies 1066 */ 1067#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 1068#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 1069 LMEMCPY_C_PAD 1070.Lmemcpy_c: 1071 and r2, r1, #0x03 1072 orr r2, r2, r0, lsl #2 1073 ands r2, r2, #0x0f 1074 sub r3, pc, #0x14 1075 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 1076 1077/* 1078 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1079 */ 1080 ldr r2, [r1] 1081 ldr r3, [r1, #0x04] 1082 ldr r1, [r1, #0x08] 1083 str r2, [r0] 1084 str r3, [r0, #0x04] 1085 str r1, [r0, #0x08] 1086 bx lr 1087 LMEMCPY_C_PAD 1088 1089/* 1090 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1091 */ 1092 ldrb r2, [r1, #0xb] /* r2 = ...B */ 1093 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1094 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1095 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1096 mov r2, r2, lsl #24 /* r2 = B... */ 1097 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 1098 str r2, [r0, #0x08] 1099 mov r2, ip, lsl #24 /* r2 = 7... */ 1100 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 1101 mov r1, r1, lsr #8 /* r1 = .210 */ 1102 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 1103 str r2, [r0, #0x04] 1104 str r1, [r0] 1105 bx lr 1106 LMEMCPY_C_PAD 1107 1108/* 1109 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1110 */ 1111 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1112 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1113 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1114 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1115 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1116 str r2, [r0] 1117 mov r3, r3, lsr #16 /* r3 = ..54 */ 1118 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 1119 mov r1, r1, lsl #16 /* r1 = BA.. */ 1120 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 1121 str r3, [r0, #0x04] 1122 str r1, [r0, #0x08] 1123 bx lr 1124 LMEMCPY_C_PAD 1125 1126/* 1127 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1128 */ 1129 ldrb r2, [r1] /* r2 = ...0 */ 1130 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1131 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1132 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1133 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1134 str r2, [r0] 1135 mov r3, r3, lsr #24 /* r3 = ...4 */ 1136 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 1137 mov r1, r1, lsl #8 /* r1 = BA9. */ 1138 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 1139 str r3, [r0, #0x04] 1140 str r1, [r0, #0x08] 1141 bx lr 1142 LMEMCPY_C_PAD 1143 1144/* 1145 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 1146 */ 1147 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1148 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1149 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 1150 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1151 strh r1, [r0, #0x01] 1152 strb r2, [r0] 1153 mov r1, r2, lsr #24 /* r1 = ...3 */ 1154 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 1155 mov r1, r3, lsr #24 /* r1 = ...7 */ 1156 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 1157 mov ip, ip, lsr #24 /* ip = ...B */ 1158 str r2, [r0, #0x03] 1159 str r1, [r0, #0x07] 1160 strb ip, [r0, #0x0b] 1161 bx lr 1162 LMEMCPY_C_PAD 1163 1164/* 1165 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 1166 */ 1167 ldrb r2, [r1] 1168 ldrh r3, [r1, #0x01] 1169 ldr ip, [r1, #0x03] 1170 strb r2, [r0] 1171 ldr r2, [r1, #0x07] 1172 ldrb r1, [r1, #0x0b] 1173 strh r3, [r0, #0x01] 1174 str ip, [r0, #0x03] 1175 str r2, [r0, #0x07] 1176 strb r1, [r0, #0x0b] 1177 bx lr 1178 LMEMCPY_C_PAD 1179 1180/* 1181 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 1182 */ 1183 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1184 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1185 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1186 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1187 strb r2, [r0] 1188 mov r2, r2, lsr #8 /* r2 = ...1 */ 1189 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1190 strh r2, [r0, #0x01] 1191 mov r2, r3, lsr #8 /* r2 = .543 */ 1192 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 1193 mov r2, ip, lsr #8 /* r2 = .987 */ 1194 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 1195 mov r1, r1, lsr #8 /* r1 = ...B */ 1196 str r3, [r0, #0x03] 1197 str r2, [r0, #0x07] 1198 strb r1, [r0, #0x0b] 1199 bx lr 1200 LMEMCPY_C_PAD 1201 1202/* 1203 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 1204 */ 1205 ldrb r2, [r1] 1206 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1207 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1208 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1209 strb r2, [r0] 1210 strh r3, [r0, #0x01] 1211 mov r3, r3, lsr #16 /* r3 = ..43 */ 1212 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 1213 mov ip, ip, lsr #16 /* ip = ..87 */ 1214 orr ip, ip, r1, lsl #16 /* ip = A987 */ 1215 mov r1, r1, lsr #16 /* r1 = ..xB */ 1216 str r3, [r0, #0x03] 1217 str ip, [r0, #0x07] 1218 strb r1, [r0, #0x0b] 1219 bx lr 1220 LMEMCPY_C_PAD 1221 1222/* 1223 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1224 */ 1225 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 1226 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1227 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 1228 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1229 strh ip, [r0] 1230 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 1231 mov r3, r3, lsr #16 /* r3 = ..76 */ 1232 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 1233 mov r2, r2, lsr #16 /* r2 = ..BA */ 1234 str r1, [r0, #0x02] 1235 str r3, [r0, #0x06] 1236 strh r2, [r0, #0x0a] 1237 bx lr 1238 LMEMCPY_C_PAD 1239 1240/* 1241 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 1242 */ 1243 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1244 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1245 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 1246 strh ip, [r0] 1247 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1248 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 1249 mov r2, r2, lsr #24 /* r2 = ...2 */ 1250 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 1251 mov r3, r3, lsr #24 /* r3 = ...6 */ 1252 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 1253 mov r1, r1, lsl #8 /* r1 = ..B. */ 1254 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 1255 str r2, [r0, #0x02] 1256 str r3, [r0, #0x06] 1257 strh r1, [r0, #0x0a] 1258 bx lr 1259 LMEMCPY_C_PAD 1260 1261/* 1262 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1263 */ 1264 ldrh r2, [r1] 1265 ldr r3, [r1, #0x02] 1266 ldr ip, [r1, #0x06] 1267 ldrh r1, [r1, #0x0a] 1268 strh r2, [r0] 1269 str r3, [r0, #0x02] 1270 str ip, [r0, #0x06] 1271 strh r1, [r0, #0x0a] 1272 bx lr 1273 LMEMCPY_C_PAD 1274 1275/* 1276 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 1277 */ 1278 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 1279 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 1280 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 1281 strh ip, [r0, #0x0a] 1282 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1283 ldrb r1, [r1] /* r1 = ...0 */ 1284 mov r2, r2, lsl #24 /* r2 = 9... */ 1285 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 1286 mov r3, r3, lsl #24 /* r3 = 5... */ 1287 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 1288 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 1289 str r2, [r0, #0x06] 1290 str r3, [r0, #0x02] 1291 strh r1, [r0] 1292 bx lr 1293 LMEMCPY_C_PAD 1294 1295/* 1296 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 1297 */ 1298 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1299 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 1300 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 1301 strb r2, [r0] 1302 mov r3, r2, lsr #8 /* r3 = .321 */ 1303 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 1304 str r3, [r0, #0x01] 1305 mov r3, ip, lsr #8 /* r3 = .765 */ 1306 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 1307 str r3, [r0, #0x05] 1308 mov r1, r1, lsr #8 /* r1 = .BA9 */ 1309 strh r1, [r0, #0x09] 1310 mov r1, r1, lsr #16 /* r1 = ...B */ 1311 strb r1, [r0, #0x0b] 1312 bx lr 1313 LMEMCPY_C_PAD 1314 1315/* 1316 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 1317 */ 1318 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 1319 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 1320 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1321 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1322 strb r2, [r0, #0x0b] 1323 mov r2, r3, lsr #16 /* r2 = ..A9 */ 1324 strh r2, [r0, #0x09] 1325 mov r3, r3, lsl #16 /* r3 = 87.. */ 1326 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 1327 mov ip, ip, lsl #16 /* ip = 43.. */ 1328 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 1329 mov r1, r1, lsr #8 /* r1 = .210 */ 1330 str r3, [r0, #0x05] 1331 str ip, [r0, #0x01] 1332 strb r1, [r0] 1333 bx lr 1334 LMEMCPY_C_PAD 1335 1336/* 1337 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 1338 */ 1339 ldrh r2, [r1] /* r2 = ..10 */ 1340 ldr r3, [r1, #0x02] /* r3 = 5432 */ 1341 ldr ip, [r1, #0x06] /* ip = 9876 */ 1342 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 1343 strb r2, [r0] 1344 mov r2, r2, lsr #8 /* r2 = ...1 */ 1345 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1346 mov r3, r3, lsr #24 /* r3 = ...5 */ 1347 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 1348 mov ip, ip, lsr #24 /* ip = ...9 */ 1349 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 1350 mov r1, r1, lsr #8 /* r1 = ...B */ 1351 str r2, [r0, #0x01] 1352 str r3, [r0, #0x05] 1353 strh ip, [r0, #0x09] 1354 strb r1, [r0, #0x0b] 1355 bx lr 1356 LMEMCPY_C_PAD 1357 1358/* 1359 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 1360 */ 1361 ldrb r2, [r1] 1362 ldr r3, [r1, #0x01] 1363 ldr ip, [r1, #0x05] 1364 strb r2, [r0] 1365 ldrh r2, [r1, #0x09] 1366 ldrb r1, [r1, #0x0b] 1367 str r3, [r0, #0x01] 1368 str ip, [r0, #0x05] 1369 strh r2, [r0, #0x09] 1370 strb r1, [r0, #0x0b] 1371 bx lr 1372#endif /* !_STANDALONE */ 1373END(memcpy) 1374 1375 .section .note.GNU-stack,"",%progbits 1376