1/* $NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33__FBSDID("$FreeBSD$"); 34 35.syntax unified 36 37#ifndef _BCOPY 38/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */ 39ENTRY(memmove) 40#else 41/* bcopy = memcpy/memmove with arguments reversed. */ 42/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */ 43ENTRY(bcopy) 44 /* switch the source and destination registers */ 45 eor r0, r1, r0 46 eor r1, r0, r1 47 eor r0, r1, r0 48#endif 49 /* Do the buffers overlap? */ 50 cmp r0, r1 51 RETeq /* Bail now if src/dst are the same */ 52 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 53 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 54 cmp r3, r2 /* if (r3 < len) we have an overlap */ 55 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 56 57 /* Determine copy direction */ 58 cmp r1, r0 59 bcc .Lmemmove_backwards 60 61 moveq r0, #0 /* Quick abort for len=0 */ 62 RETeq 63 64 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 65 subs r2, r2, #4 66 blt .Lmemmove_fl4 /* less than 4 bytes */ 67 ands r12, r0, #3 68 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 69 ands r12, r1, #3 70 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 71 72.Lmemmove_ft8: 73 /* We have aligned source and destination */ 74 subs r2, r2, #8 75 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 76 subs r2, r2, #0x14 77 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 78 stmdb sp!, {r4} /* borrow r4 */ 79 80 /* blat 32 bytes at a time */ 81 /* XXX for really big copies perhaps we should use more registers */ 82.Lmemmove_floop32: 83 ldmia r1!, {r3, r4, r12, lr} 84 stmia r0!, {r3, r4, r12, lr} 85 ldmia r1!, {r3, r4, r12, lr} 86 stmia r0!, {r3, r4, r12, lr} 87 subs r2, r2, #0x20 88 bge .Lmemmove_floop32 89 90 cmn r2, #0x10 91 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 92 stmiage r0!, {r3, r4, r12, lr} 93 subge r2, r2, #0x10 94 ldmia sp!, {r4} /* return r4 */ 95 96.Lmemmove_fl32: 97 adds r2, r2, #0x14 98 99 /* blat 12 bytes at a time */ 100.Lmemmove_floop12: 101 ldmiage r1!, {r3, r12, lr} 102 stmiage r0!, {r3, r12, lr} 103 subsge r2, r2, #0x0c 104 bge .Lmemmove_floop12 105 106.Lmemmove_fl12: 107 adds r2, r2, #8 108 blt .Lmemmove_fl4 109 110 subs r2, r2, #4 111 ldrlt r3, [r1], #4 112 strlt r3, [r0], #4 113 ldmiage r1!, {r3, r12} 114 stmiage r0!, {r3, r12} 115 subge r2, r2, #4 116 117.Lmemmove_fl4: 118 /* less than 4 bytes to go */ 119 adds r2, r2, #4 120 ldmiaeq sp!, {r0, pc} /* done */ 121 122 /* copy the crud byte at a time */ 123 cmp r2, #2 124 ldrb r3, [r1], #1 125 strb r3, [r0], #1 126 ldrbge r3, [r1], #1 127 strbge r3, [r0], #1 128 ldrbgt r3, [r1], #1 129 strbgt r3, [r0], #1 130 ldmia sp!, {r0, pc} 131 132 /* erg - unaligned destination */ 133.Lmemmove_fdestul: 134 rsb r12, r12, #4 135 cmp r12, #2 136 137 /* align destination with byte copies */ 138 ldrb r3, [r1], #1 139 strb r3, [r0], #1 140 ldrbge r3, [r1], #1 141 strbge r3, [r0], #1 142 ldrbgt r3, [r1], #1 143 strbgt r3, [r0], #1 144 subs r2, r2, r12 145 blt .Lmemmove_fl4 /* less the 4 bytes */ 146 147 ands r12, r1, #3 148 beq .Lmemmove_ft8 /* we have an aligned source */ 149 150 /* erg - unaligned source */ 151 /* This is where it gets nasty ... */ 152.Lmemmove_fsrcul: 153 bic r1, r1, #3 154 ldr lr, [r1], #4 155 cmp r12, #2 156 bgt .Lmemmove_fsrcul3 157 beq .Lmemmove_fsrcul2 158 cmp r2, #0x0c 159 blt .Lmemmove_fsrcul1loop4 160 sub r2, r2, #0x0c 161 stmdb sp!, {r4, r5} 162 163.Lmemmove_fsrcul1loop16: 164#ifdef __ARMEB__ 165 mov r3, lr, lsl #8 166#else 167 mov r3, lr, lsr #8 168#endif 169 ldmia r1!, {r4, r5, r12, lr} 170#ifdef __ARMEB__ 171 orr r3, r3, r4, lsr #24 172 mov r4, r4, lsl #8 173 orr r4, r4, r5, lsr #24 174 mov r5, r5, lsl #8 175 orr r5, r5, r12, lsr #24 176 mov r12, r12, lsl #8 177 orr r12, r12, lr, lsr #24 178#else 179 orr r3, r3, r4, lsl #24 180 mov r4, r4, lsr #8 181 orr r4, r4, r5, lsl #24 182 mov r5, r5, lsr #8 183 orr r5, r5, r12, lsl #24 184 mov r12, r12, lsr #8 185 orr r12, r12, lr, lsl #24 186#endif 187 stmia r0!, {r3-r5, r12} 188 subs r2, r2, #0x10 189 bge .Lmemmove_fsrcul1loop16 190 ldmia sp!, {r4, r5} 191 adds r2, r2, #0x0c 192 blt .Lmemmove_fsrcul1l4 193 194.Lmemmove_fsrcul1loop4: 195#ifdef __ARMEB__ 196 mov r12, lr, lsl #8 197#else 198 mov r12, lr, lsr #8 199#endif 200 ldr lr, [r1], #4 201#ifdef __ARMEB__ 202 orr r12, r12, lr, lsr #24 203#else 204 orr r12, r12, lr, lsl #24 205#endif 206 str r12, [r0], #4 207 subs r2, r2, #4 208 bge .Lmemmove_fsrcul1loop4 209 210.Lmemmove_fsrcul1l4: 211 sub r1, r1, #3 212 b .Lmemmove_fl4 213 214.Lmemmove_fsrcul2: 215 cmp r2, #0x0c 216 blt .Lmemmove_fsrcul2loop4 217 sub r2, r2, #0x0c 218 stmdb sp!, {r4, r5} 219 220.Lmemmove_fsrcul2loop16: 221#ifdef __ARMEB__ 222 mov r3, lr, lsl #16 223#else 224 mov r3, lr, lsr #16 225#endif 226 ldmia r1!, {r4, r5, r12, lr} 227#ifdef __ARMEB__ 228 orr r3, r3, r4, lsr #16 229 mov r4, r4, lsl #16 230 orr r4, r4, r5, lsr #16 231 mov r5, r5, lsl #16 232 orr r5, r5, r12, lsr #16 233 mov r12, r12, lsl #16 234 orr r12, r12, lr, lsr #16 235#else 236 orr r3, r3, r4, lsl #16 237 mov r4, r4, lsr #16 238 orr r4, r4, r5, lsl #16 239 mov r5, r5, lsr #16 240 orr r5, r5, r12, lsl #16 241 mov r12, r12, lsr #16 242 orr r12, r12, lr, lsl #16 243#endif 244 stmia r0!, {r3-r5, r12} 245 subs r2, r2, #0x10 246 bge .Lmemmove_fsrcul2loop16 247 ldmia sp!, {r4, r5} 248 adds r2, r2, #0x0c 249 blt .Lmemmove_fsrcul2l4 250 251.Lmemmove_fsrcul2loop4: 252#ifdef __ARMEB__ 253 mov r12, lr, lsl #16 254#else 255 mov r12, lr, lsr #16 256#endif 257 ldr lr, [r1], #4 258#ifdef __ARMEB__ 259 orr r12, r12, lr, lsr #16 260#else 261 orr r12, r12, lr, lsl #16 262#endif 263 str r12, [r0], #4 264 subs r2, r2, #4 265 bge .Lmemmove_fsrcul2loop4 266 267.Lmemmove_fsrcul2l4: 268 sub r1, r1, #2 269 b .Lmemmove_fl4 270 271.Lmemmove_fsrcul3: 272 cmp r2, #0x0c 273 blt .Lmemmove_fsrcul3loop4 274 sub r2, r2, #0x0c 275 stmdb sp!, {r4, r5} 276 277.Lmemmove_fsrcul3loop16: 278#ifdef __ARMEB__ 279 mov r3, lr, lsl #24 280#else 281 mov r3, lr, lsr #24 282#endif 283 ldmia r1!, {r4, r5, r12, lr} 284#ifdef __ARMEB__ 285 orr r3, r3, r4, lsr #8 286 mov r4, r4, lsl #24 287 orr r4, r4, r5, lsr #8 288 mov r5, r5, lsl #24 289 orr r5, r5, r12, lsr #8 290 mov r12, r12, lsl #24 291 orr r12, r12, lr, lsr #8 292#else 293 orr r3, r3, r4, lsl #8 294 mov r4, r4, lsr #24 295 orr r4, r4, r5, lsl #8 296 mov r5, r5, lsr #24 297 orr r5, r5, r12, lsl #8 298 mov r12, r12, lsr #24 299 orr r12, r12, lr, lsl #8 300#endif 301 stmia r0!, {r3-r5, r12} 302 subs r2, r2, #0x10 303 bge .Lmemmove_fsrcul3loop16 304 ldmia sp!, {r4, r5} 305 adds r2, r2, #0x0c 306 blt .Lmemmove_fsrcul3l4 307 308.Lmemmove_fsrcul3loop4: 309#ifdef __ARMEB__ 310 mov r12, lr, lsl #24 311#else 312 mov r12, lr, lsr #24 313#endif 314 ldr lr, [r1], #4 315#ifdef __ARMEB__ 316 orr r12, r12, lr, lsr #8 317#else 318 orr r12, r12, lr, lsl #8 319#endif 320 str r12, [r0], #4 321 subs r2, r2, #4 322 bge .Lmemmove_fsrcul3loop4 323 324.Lmemmove_fsrcul3l4: 325 sub r1, r1, #1 326 b .Lmemmove_fl4 327 328.Lmemmove_backwards: 329 add r1, r1, r2 330 add r0, r0, r2 331 subs r2, r2, #4 332 blt .Lmemmove_bl4 /* less than 4 bytes */ 333 ands r12, r0, #3 334 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 335 ands r12, r1, #3 336 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 337 338.Lmemmove_bt8: 339 /* We have aligned source and destination */ 340 subs r2, r2, #8 341 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 342 stmdb sp!, {r4, lr} 343 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 344 blt .Lmemmove_bl32 345 346 /* blat 32 bytes at a time */ 347 /* XXX for really big copies perhaps we should use more registers */ 348.Lmemmove_bloop32: 349 ldmdb r1!, {r3, r4, r12, lr} 350 stmdb r0!, {r3, r4, r12, lr} 351 ldmdb r1!, {r3, r4, r12, lr} 352 stmdb r0!, {r3, r4, r12, lr} 353 subs r2, r2, #0x20 354 bge .Lmemmove_bloop32 355 356.Lmemmove_bl32: 357 cmn r2, #0x10 358 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 359 stmdbge r0!, {r3, r4, r12, lr} 360 subge r2, r2, #0x10 361 adds r2, r2, #0x14 362 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 363 stmdbge r0!, {r3, r12, lr} 364 subge r2, r2, #0x0c 365 ldmia sp!, {r4, lr} 366 367.Lmemmove_bl12: 368 adds r2, r2, #8 369 blt .Lmemmove_bl4 370 subs r2, r2, #4 371 ldrlt r3, [r1, #-4]! 372 strlt r3, [r0, #-4]! 373 ldmdbge r1!, {r3, r12} 374 stmdbge r0!, {r3, r12} 375 subge r2, r2, #4 376 377.Lmemmove_bl4: 378 /* less than 4 bytes to go */ 379 adds r2, r2, #4 380 RETeq /* done */ 381 382 /* copy the crud byte at a time */ 383 cmp r2, #2 384 ldrb r3, [r1, #-1]! 385 strb r3, [r0, #-1]! 386 ldrbge r3, [r1, #-1]! 387 strbge r3, [r0, #-1]! 388 ldrbgt r3, [r1, #-1]! 389 strbgt r3, [r0, #-1]! 390 RET 391 392 /* erg - unaligned destination */ 393.Lmemmove_bdestul: 394 cmp r12, #2 395 396 /* align destination with byte copies */ 397 ldrb r3, [r1, #-1]! 398 strb r3, [r0, #-1]! 399 ldrbge r3, [r1, #-1]! 400 strbge r3, [r0, #-1]! 401 ldrbgt r3, [r1, #-1]! 402 strbgt r3, [r0, #-1]! 403 subs r2, r2, r12 404 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 405 ands r12, r1, #3 406 beq .Lmemmove_bt8 /* we have an aligned source */ 407 408 /* erg - unaligned source */ 409 /* This is where it gets nasty ... */ 410.Lmemmove_bsrcul: 411 bic r1, r1, #3 412 ldr r3, [r1, #0] 413 cmp r12, #2 414 blt .Lmemmove_bsrcul1 415 beq .Lmemmove_bsrcul2 416 cmp r2, #0x0c 417 blt .Lmemmove_bsrcul3loop4 418 sub r2, r2, #0x0c 419 stmdb sp!, {r4, r5, lr} 420 421.Lmemmove_bsrcul3loop16: 422#ifdef __ARMEB__ 423 mov lr, r3, lsr #8 424#else 425 mov lr, r3, lsl #8 426#endif 427 ldmdb r1!, {r3-r5, r12} 428#ifdef __ARMEB__ 429 orr lr, lr, r12, lsl #24 430 mov r12, r12, lsr #8 431 orr r12, r12, r5, lsl #24 432 mov r5, r5, lsr #8 433 orr r5, r5, r4, lsl #24 434 mov r4, r4, lsr #8 435 orr r4, r4, r3, lsl #24 436#else 437 orr lr, lr, r12, lsr #24 438 mov r12, r12, lsl #8 439 orr r12, r12, r5, lsr #24 440 mov r5, r5, lsl #8 441 orr r5, r5, r4, lsr #24 442 mov r4, r4, lsl #8 443 orr r4, r4, r3, lsr #24 444#endif 445 stmdb r0!, {r4, r5, r12, lr} 446 subs r2, r2, #0x10 447 bge .Lmemmove_bsrcul3loop16 448 ldmia sp!, {r4, r5, lr} 449 adds r2, r2, #0x0c 450 blt .Lmemmove_bsrcul3l4 451 452.Lmemmove_bsrcul3loop4: 453#ifdef __ARMEB__ 454 mov r12, r3, lsr #8 455#else 456 mov r12, r3, lsl #8 457#endif 458 ldr r3, [r1, #-4]! 459#ifdef __ARMEB__ 460 orr r12, r12, r3, lsl #24 461#else 462 orr r12, r12, r3, lsr #24 463#endif 464 str r12, [r0, #-4]! 465 subs r2, r2, #4 466 bge .Lmemmove_bsrcul3loop4 467 468.Lmemmove_bsrcul3l4: 469 add r1, r1, #3 470 b .Lmemmove_bl4 471 472.Lmemmove_bsrcul2: 473 cmp r2, #0x0c 474 blt .Lmemmove_bsrcul2loop4 475 sub r2, r2, #0x0c 476 stmdb sp!, {r4, r5, lr} 477 478.Lmemmove_bsrcul2loop16: 479#ifdef __ARMEB__ 480 mov lr, r3, lsr #16 481#else 482 mov lr, r3, lsl #16 483#endif 484 ldmdb r1!, {r3-r5, r12} 485#ifdef __ARMEB__ 486 orr lr, lr, r12, lsl #16 487 mov r12, r12, lsr #16 488 orr r12, r12, r5, lsl #16 489 mov r5, r5, lsr #16 490 orr r5, r5, r4, lsl #16 491 mov r4, r4, lsr #16 492 orr r4, r4, r3, lsl #16 493#else 494 orr lr, lr, r12, lsr #16 495 mov r12, r12, lsl #16 496 orr r12, r12, r5, lsr #16 497 mov r5, r5, lsl #16 498 orr r5, r5, r4, lsr #16 499 mov r4, r4, lsl #16 500 orr r4, r4, r3, lsr #16 501#endif 502 stmdb r0!, {r4, r5, r12, lr} 503 subs r2, r2, #0x10 504 bge .Lmemmove_bsrcul2loop16 505 ldmia sp!, {r4, r5, lr} 506 adds r2, r2, #0x0c 507 blt .Lmemmove_bsrcul2l4 508 509.Lmemmove_bsrcul2loop4: 510#ifdef __ARMEB__ 511 mov r12, r3, lsr #16 512#else 513 mov r12, r3, lsl #16 514#endif 515 ldr r3, [r1, #-4]! 516#ifdef __ARMEB__ 517 orr r12, r12, r3, lsl #16 518#else 519 orr r12, r12, r3, lsr #16 520#endif 521 str r12, [r0, #-4]! 522 subs r2, r2, #4 523 bge .Lmemmove_bsrcul2loop4 524 525.Lmemmove_bsrcul2l4: 526 add r1, r1, #2 527 b .Lmemmove_bl4 528 529.Lmemmove_bsrcul1: 530 cmp r2, #0x0c 531 blt .Lmemmove_bsrcul1loop4 532 sub r2, r2, #0x0c 533 stmdb sp!, {r4, r5, lr} 534 535.Lmemmove_bsrcul1loop32: 536#ifdef __ARMEB__ 537 mov lr, r3, lsr #24 538#else 539 mov lr, r3, lsl #24 540#endif 541 ldmdb r1!, {r3-r5, r12} 542#ifdef __ARMEB__ 543 orr lr, lr, r12, lsl #8 544 mov r12, r12, lsr #24 545 orr r12, r12, r5, lsl #8 546 mov r5, r5, lsr #24 547 orr r5, r5, r4, lsl #8 548 mov r4, r4, lsr #24 549 orr r4, r4, r3, lsl #8 550#else 551 orr lr, lr, r12, lsr #8 552 mov r12, r12, lsl #24 553 orr r12, r12, r5, lsr #8 554 mov r5, r5, lsl #24 555 orr r5, r5, r4, lsr #8 556 mov r4, r4, lsl #24 557 orr r4, r4, r3, lsr #8 558#endif 559 stmdb r0!, {r4, r5, r12, lr} 560 subs r2, r2, #0x10 561 bge .Lmemmove_bsrcul1loop32 562 ldmia sp!, {r4, r5, lr} 563 adds r2, r2, #0x0c 564 blt .Lmemmove_bsrcul1l4 565 566.Lmemmove_bsrcul1loop4: 567#ifdef __ARMEB__ 568 mov r12, r3, lsr #24 569#else 570 mov r12, r3, lsl #24 571#endif 572 ldr r3, [r1, #-4]! 573#ifdef __ARMEB__ 574 orr r12, r12, r3, lsl #8 575#else 576 orr r12, r12, r3, lsr #8 577#endif 578 str r12, [r0, #-4]! 579 subs r2, r2, #4 580 bge .Lmemmove_bsrcul1loop4 581 582.Lmemmove_bsrcul1l4: 583 add r1, r1, #1 584 b .Lmemmove_bl4 585#ifndef _BCOPY 586END(memmove) 587#else 588END(bcopy) 589#endif 590