1/* $NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33__FBSDID("$FreeBSD$"); 34 35.syntax unified 36 37#ifndef _BCOPY 38/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */ 39ENTRY(memmove) 40#else 41/* bcopy = memcpy/memmove with arguments reversed. */ 42/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */ 43ENTRY(bcopy) 44 /* switch the source and destination registers */ 45 eor r0, r1, r0 46 eor r1, r0, r1 47 eor r0, r1, r0 48#endif 49 /* Do the buffers overlap? */ 50 cmp r0, r1 51 it eq 52 RETeq /* Bail now if src/dst are the same */ 53 ite cc 54 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 55 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 56 cmp r3, r2 /* if (r3 < len) we have an overlap */ 57 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 58 59 /* Determine copy direction */ 60 cmp r1, r0 61 it cc 62 bcc .Lmemmove_backwards 63 64 itt eq 65 moveq r0, #0 /* Quick abort for len=0 */ 66 RETeq 67 68 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 69 subs r2, r2, #4 70 blt .Lmemmove_fl4 /* less than 4 bytes */ 71 ands r12, r0, #3 72 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 73 ands r12, r1, #3 74 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 75 76.Lmemmove_ft8: 77 /* We have aligned source and destination */ 78 subs r2, r2, #8 79 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 80 subs r2, r2, #0x14 81 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 82 stmdb sp!, {r4} /* borrow r4 */ 83 84 /* blat 32 bytes at a time */ 85 /* XXX for really big copies perhaps we should use more registers */ 86.Lmemmove_floop32: 87 ldmia r1!, {r3, r4, r12, lr} 88 stmia r0!, {r3, r4, r12, lr} 89 ldmia r1!, {r3, r4, r12, lr} 90 stmia r0!, {r3, r4, r12, lr} 91 subs r2, r2, #0x20 92 bge .Lmemmove_floop32 93 94 cmn r2, #0x10 95 ittt ge 96 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 97 stmiage r0!, {r3, r4, r12, lr} 98 subge r2, r2, #0x10 99 ldmia sp!, {r4} /* return r4 */ 100 101.Lmemmove_fl32: 102 adds r2, r2, #0x14 103 104 /* blat 12 bytes at a time */ 105.Lmemmove_floop12: 106 ittt ge 107 ldmiage r1!, {r3, r12, lr} 108 stmiage r0!, {r3, r12, lr} 109 subsge r2, r2, #0x0c 110 bge .Lmemmove_floop12 111 112.Lmemmove_fl12: 113 adds r2, r2, #8 114 blt .Lmemmove_fl4 115 116 subs r2, r2, #4 117 itt lt 118 ldrlt r3, [r1], #4 119 strlt r3, [r0], #4 120 ittt ge 121 ldmiage r1!, {r3, r12} 122 stmiage r0!, {r3, r12} 123 subge r2, r2, #4 124 125.Lmemmove_fl4: 126 /* less than 4 bytes to go */ 127 adds r2, r2, #4 128 it eq 129 ldmiaeq sp!, {r0, pc} /* done */ 130 131 /* copy the crud byte at a time */ 132 cmp r2, #2 133 ldrb r3, [r1], #1 134 strb r3, [r0], #1 135 itt ge 136 ldrbge r3, [r1], #1 137 strbge r3, [r0], #1 138 itt gt 139 ldrbgt r3, [r1], #1 140 strbgt r3, [r0], #1 141 ldmia sp!, {r0, pc} 142 143 /* erg - unaligned destination */ 144.Lmemmove_fdestul: 145 rsb r12, r12, #4 146 cmp r12, #2 147 148 /* align destination with byte copies */ 149 ldrb r3, [r1], #1 150 strb r3, [r0], #1 151 itt ge 152 ldrbge r3, [r1], #1 153 strbge r3, [r0], #1 154 itt gt 155 ldrbgt r3, [r1], #1 156 strbgt r3, [r0], #1 157 subs r2, r2, r12 158 blt .Lmemmove_fl4 /* less the 4 bytes */ 159 160 ands r12, r1, #3 161 beq .Lmemmove_ft8 /* we have an aligned source */ 162 163 /* erg - unaligned source */ 164 /* This is where it gets nasty ... */ 165.Lmemmove_fsrcul: 166 bic r1, r1, #3 167 ldr lr, [r1], #4 168 cmp r12, #2 169 bgt .Lmemmove_fsrcul3 170 beq .Lmemmove_fsrcul2 171 cmp r2, #0x0c 172 blt .Lmemmove_fsrcul1loop4 173 sub r2, r2, #0x0c 174 stmdb sp!, {r4, r5} 175 176.Lmemmove_fsrcul1loop16: 177#ifdef __ARMEB__ 178 mov r3, lr, lsl #8 179#else 180 mov r3, lr, lsr #8 181#endif 182 ldmia r1!, {r4, r5, r12, lr} 183#ifdef __ARMEB__ 184 orr r3, r3, r4, lsr #24 185 mov r4, r4, lsl #8 186 orr r4, r4, r5, lsr #24 187 mov r5, r5, lsl #8 188 orr r5, r5, r12, lsr #24 189 mov r12, r12, lsl #8 190 orr r12, r12, lr, lsr #24 191#else 192 orr r3, r3, r4, lsl #24 193 mov r4, r4, lsr #8 194 orr r4, r4, r5, lsl #24 195 mov r5, r5, lsr #8 196 orr r5, r5, r12, lsl #24 197 mov r12, r12, lsr #8 198 orr r12, r12, lr, lsl #24 199#endif 200 stmia r0!, {r3-r5, r12} 201 subs r2, r2, #0x10 202 bge .Lmemmove_fsrcul1loop16 203 ldmia sp!, {r4, r5} 204 adds r2, r2, #0x0c 205 blt .Lmemmove_fsrcul1l4 206 207.Lmemmove_fsrcul1loop4: 208#ifdef __ARMEB__ 209 mov r12, lr, lsl #8 210#else 211 mov r12, lr, lsr #8 212#endif 213 ldr lr, [r1], #4 214#ifdef __ARMEB__ 215 orr r12, r12, lr, lsr #24 216#else 217 orr r12, r12, lr, lsl #24 218#endif 219 str r12, [r0], #4 220 subs r2, r2, #4 221 bge .Lmemmove_fsrcul1loop4 222 223.Lmemmove_fsrcul1l4: 224 sub r1, r1, #3 225 b .Lmemmove_fl4 226 227.Lmemmove_fsrcul2: 228 cmp r2, #0x0c 229 blt .Lmemmove_fsrcul2loop4 230 sub r2, r2, #0x0c 231 stmdb sp!, {r4, r5} 232 233.Lmemmove_fsrcul2loop16: 234#ifdef __ARMEB__ 235 mov r3, lr, lsl #16 236#else 237 mov r3, lr, lsr #16 238#endif 239 ldmia r1!, {r4, r5, r12, lr} 240#ifdef __ARMEB__ 241 orr r3, r3, r4, lsr #16 242 mov r4, r4, lsl #16 243 orr r4, r4, r5, lsr #16 244 mov r5, r5, lsl #16 245 orr r5, r5, r12, lsr #16 246 mov r12, r12, lsl #16 247 orr r12, r12, lr, lsr #16 248#else 249 orr r3, r3, r4, lsl #16 250 mov r4, r4, lsr #16 251 orr r4, r4, r5, lsl #16 252 mov r5, r5, lsr #16 253 orr r5, r5, r12, lsl #16 254 mov r12, r12, lsr #16 255 orr r12, r12, lr, lsl #16 256#endif 257 stmia r0!, {r3-r5, r12} 258 subs r2, r2, #0x10 259 bge .Lmemmove_fsrcul2loop16 260 ldmia sp!, {r4, r5} 261 adds r2, r2, #0x0c 262 blt .Lmemmove_fsrcul2l4 263 264.Lmemmove_fsrcul2loop4: 265#ifdef __ARMEB__ 266 mov r12, lr, lsl #16 267#else 268 mov r12, lr, lsr #16 269#endif 270 ldr lr, [r1], #4 271#ifdef __ARMEB__ 272 orr r12, r12, lr, lsr #16 273#else 274 orr r12, r12, lr, lsl #16 275#endif 276 str r12, [r0], #4 277 subs r2, r2, #4 278 bge .Lmemmove_fsrcul2loop4 279 280.Lmemmove_fsrcul2l4: 281 sub r1, r1, #2 282 b .Lmemmove_fl4 283 284.Lmemmove_fsrcul3: 285 cmp r2, #0x0c 286 blt .Lmemmove_fsrcul3loop4 287 sub r2, r2, #0x0c 288 stmdb sp!, {r4, r5} 289 290.Lmemmove_fsrcul3loop16: 291#ifdef __ARMEB__ 292 mov r3, lr, lsl #24 293#else 294 mov r3, lr, lsr #24 295#endif 296 ldmia r1!, {r4, r5, r12, lr} 297#ifdef __ARMEB__ 298 orr r3, r3, r4, lsr #8 299 mov r4, r4, lsl #24 300 orr r4, r4, r5, lsr #8 301 mov r5, r5, lsl #24 302 orr r5, r5, r12, lsr #8 303 mov r12, r12, lsl #24 304 orr r12, r12, lr, lsr #8 305#else 306 orr r3, r3, r4, lsl #8 307 mov r4, r4, lsr #24 308 orr r4, r4, r5, lsl #8 309 mov r5, r5, lsr #24 310 orr r5, r5, r12, lsl #8 311 mov r12, r12, lsr #24 312 orr r12, r12, lr, lsl #8 313#endif 314 stmia r0!, {r3-r5, r12} 315 subs r2, r2, #0x10 316 bge .Lmemmove_fsrcul3loop16 317 ldmia sp!, {r4, r5} 318 adds r2, r2, #0x0c 319 blt .Lmemmove_fsrcul3l4 320 321.Lmemmove_fsrcul3loop4: 322#ifdef __ARMEB__ 323 mov r12, lr, lsl #24 324#else 325 mov r12, lr, lsr #24 326#endif 327 ldr lr, [r1], #4 328#ifdef __ARMEB__ 329 orr r12, r12, lr, lsr #8 330#else 331 orr r12, r12, lr, lsl #8 332#endif 333 str r12, [r0], #4 334 subs r2, r2, #4 335 bge .Lmemmove_fsrcul3loop4 336 337.Lmemmove_fsrcul3l4: 338 sub r1, r1, #1 339 b .Lmemmove_fl4 340 341.Lmemmove_backwards: 342 add r1, r1, r2 343 add r0, r0, r2 344 subs r2, r2, #4 345 blt .Lmemmove_bl4 /* less than 4 bytes */ 346 ands r12, r0, #3 347 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 348 ands r12, r1, #3 349 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 350 351.Lmemmove_bt8: 352 /* We have aligned source and destination */ 353 subs r2, r2, #8 354 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 355 stmdb sp!, {r4, lr} 356 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 357 blt .Lmemmove_bl32 358 359 /* blat 32 bytes at a time */ 360 /* XXX for really big copies perhaps we should use more registers */ 361.Lmemmove_bloop32: 362 ldmdb r1!, {r3, r4, r12, lr} 363 stmdb r0!, {r3, r4, r12, lr} 364 ldmdb r1!, {r3, r4, r12, lr} 365 stmdb r0!, {r3, r4, r12, lr} 366 subs r2, r2, #0x20 367 bge .Lmemmove_bloop32 368 369.Lmemmove_bl32: 370 cmn r2, #0x10 371 ittt ge 372 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 373 stmdbge r0!, {r3, r4, r12, lr} 374 subge r2, r2, #0x10 375 adds r2, r2, #0x14 376 ittt ge 377 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 378 stmdbge r0!, {r3, r12, lr} 379 subge r2, r2, #0x0c 380 ldmia sp!, {r4, lr} 381 382.Lmemmove_bl12: 383 adds r2, r2, #8 384 blt .Lmemmove_bl4 385 subs r2, r2, #4 386 itt lt 387 ldrlt r3, [r1, #-4]! 388 strlt r3, [r0, #-4]! 389 ittt ge 390 ldmdbge r1!, {r3, r12} 391 stmdbge r0!, {r3, r12} 392 subge r2, r2, #4 393 394.Lmemmove_bl4: 395 /* less than 4 bytes to go */ 396 adds r2, r2, #4 397 it eq 398 RETeq /* done */ 399 400 /* copy the crud byte at a time */ 401 cmp r2, #2 402 ldrb r3, [r1, #-1]! 403 strb r3, [r0, #-1]! 404 itt ge 405 ldrbge r3, [r1, #-1]! 406 strbge r3, [r0, #-1]! 407 itt gt 408 ldrbgt r3, [r1, #-1]! 409 strbgt r3, [r0, #-1]! 410 RET 411 412 /* erg - unaligned destination */ 413.Lmemmove_bdestul: 414 cmp r12, #2 415 416 /* align destination with byte copies */ 417 ldrb r3, [r1, #-1]! 418 strb r3, [r0, #-1]! 419 itt ge 420 ldrbge r3, [r1, #-1]! 421 strbge r3, [r0, #-1]! 422 itt gt 423 ldrbgt r3, [r1, #-1]! 424 strbgt r3, [r0, #-1]! 425 subs r2, r2, r12 426 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 427 ands r12, r1, #3 428 beq .Lmemmove_bt8 /* we have an aligned source */ 429 430 /* erg - unaligned source */ 431 /* This is where it gets nasty ... */ 432.Lmemmove_bsrcul: 433 bic r1, r1, #3 434 ldr r3, [r1, #0] 435 cmp r12, #2 436 blt .Lmemmove_bsrcul1 437 beq .Lmemmove_bsrcul2 438 cmp r2, #0x0c 439 blt .Lmemmove_bsrcul3loop4 440 sub r2, r2, #0x0c 441 stmdb sp!, {r4, r5, lr} 442 443.Lmemmove_bsrcul3loop16: 444#ifdef __ARMEB__ 445 mov lr, r3, lsr #8 446#else 447 mov lr, r3, lsl #8 448#endif 449 ldmdb r1!, {r3-r5, r12} 450#ifdef __ARMEB__ 451 orr lr, lr, r12, lsl #24 452 mov r12, r12, lsr #8 453 orr r12, r12, r5, lsl #24 454 mov r5, r5, lsr #8 455 orr r5, r5, r4, lsl #24 456 mov r4, r4, lsr #8 457 orr r4, r4, r3, lsl #24 458#else 459 orr lr, lr, r12, lsr #24 460 mov r12, r12, lsl #8 461 orr r12, r12, r5, lsr #24 462 mov r5, r5, lsl #8 463 orr r5, r5, r4, lsr #24 464 mov r4, r4, lsl #8 465 orr r4, r4, r3, lsr #24 466#endif 467 stmdb r0!, {r4, r5, r12, lr} 468 subs r2, r2, #0x10 469 bge .Lmemmove_bsrcul3loop16 470 ldmia sp!, {r4, r5, lr} 471 adds r2, r2, #0x0c 472 blt .Lmemmove_bsrcul3l4 473 474.Lmemmove_bsrcul3loop4: 475#ifdef __ARMEB__ 476 mov r12, r3, lsr #8 477#else 478 mov r12, r3, lsl #8 479#endif 480 ldr r3, [r1, #-4]! 481#ifdef __ARMEB__ 482 orr r12, r12, r3, lsl #24 483#else 484 orr r12, r12, r3, lsr #24 485#endif 486 str r12, [r0, #-4]! 487 subs r2, r2, #4 488 bge .Lmemmove_bsrcul3loop4 489 490.Lmemmove_bsrcul3l4: 491 add r1, r1, #3 492 b .Lmemmove_bl4 493 494.Lmemmove_bsrcul2: 495 cmp r2, #0x0c 496 blt .Lmemmove_bsrcul2loop4 497 sub r2, r2, #0x0c 498 stmdb sp!, {r4, r5, lr} 499 500.Lmemmove_bsrcul2loop16: 501#ifdef __ARMEB__ 502 mov lr, r3, lsr #16 503#else 504 mov lr, r3, lsl #16 505#endif 506 ldmdb r1!, {r3-r5, r12} 507#ifdef __ARMEB__ 508 orr lr, lr, r12, lsl #16 509 mov r12, r12, lsr #16 510 orr r12, r12, r5, lsl #16 511 mov r5, r5, lsr #16 512 orr r5, r5, r4, lsl #16 513 mov r4, r4, lsr #16 514 orr r4, r4, r3, lsl #16 515#else 516 orr lr, lr, r12, lsr #16 517 mov r12, r12, lsl #16 518 orr r12, r12, r5, lsr #16 519 mov r5, r5, lsl #16 520 orr r5, r5, r4, lsr #16 521 mov r4, r4, lsl #16 522 orr r4, r4, r3, lsr #16 523#endif 524 stmdb r0!, {r4, r5, r12, lr} 525 subs r2, r2, #0x10 526 bge .Lmemmove_bsrcul2loop16 527 ldmia sp!, {r4, r5, lr} 528 adds r2, r2, #0x0c 529 blt .Lmemmove_bsrcul2l4 530 531.Lmemmove_bsrcul2loop4: 532#ifdef __ARMEB__ 533 mov r12, r3, lsr #16 534#else 535 mov r12, r3, lsl #16 536#endif 537 ldr r3, [r1, #-4]! 538#ifdef __ARMEB__ 539 orr r12, r12, r3, lsl #16 540#else 541 orr r12, r12, r3, lsr #16 542#endif 543 str r12, [r0, #-4]! 544 subs r2, r2, #4 545 bge .Lmemmove_bsrcul2loop4 546 547.Lmemmove_bsrcul2l4: 548 add r1, r1, #2 549 b .Lmemmove_bl4 550 551.Lmemmove_bsrcul1: 552 cmp r2, #0x0c 553 blt .Lmemmove_bsrcul1loop4 554 sub r2, r2, #0x0c 555 stmdb sp!, {r4, r5, lr} 556 557.Lmemmove_bsrcul1loop32: 558#ifdef __ARMEB__ 559 mov lr, r3, lsr #24 560#else 561 mov lr, r3, lsl #24 562#endif 563 ldmdb r1!, {r3-r5, r12} 564#ifdef __ARMEB__ 565 orr lr, lr, r12, lsl #8 566 mov r12, r12, lsr #24 567 orr r12, r12, r5, lsl #8 568 mov r5, r5, lsr #24 569 orr r5, r5, r4, lsl #8 570 mov r4, r4, lsr #24 571 orr r4, r4, r3, lsl #8 572#else 573 orr lr, lr, r12, lsr #8 574 mov r12, r12, lsl #24 575 orr r12, r12, r5, lsr #8 576 mov r5, r5, lsl #24 577 orr r5, r5, r4, lsr #8 578 mov r4, r4, lsl #24 579 orr r4, r4, r3, lsr #8 580#endif 581 stmdb r0!, {r4, r5, r12, lr} 582 subs r2, r2, #0x10 583 bge .Lmemmove_bsrcul1loop32 584 ldmia sp!, {r4, r5, lr} 585 adds r2, r2, #0x0c 586 blt .Lmemmove_bsrcul1l4 587 588.Lmemmove_bsrcul1loop4: 589#ifdef __ARMEB__ 590 mov r12, r3, lsr #24 591#else 592 mov r12, r3, lsl #24 593#endif 594 ldr r3, [r1, #-4]! 595#ifdef __ARMEB__ 596 orr r12, r12, r3, lsl #8 597#else 598 orr r12, r12, r3, lsr #8 599#endif 600 str r12, [r0, #-4]! 601 subs r2, r2, #4 602 bge .Lmemmove_bsrcul1loop4 603 604.Lmemmove_bsrcul1l4: 605 add r1, r1, #1 606 b .Lmemmove_bl4 607#ifndef _BCOPY 608END(memmove) 609#else 610END(bcopy) 611#endif 612 613 .section .note.GNU-stack,"",%progbits 614