1/* $NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39#include <machine/asm.h> 40__FBSDID("$FreeBSD$"); 41 42#ifndef _BCOPY 43/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */ 44ENTRY(memmove) 45#else 46/* bcopy = memcpy/memmove with arguments reversed. */ 47/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */ 48ENTRY(bcopy) 49 /* switch the source and destination registers */ 50 eor r0, r1, r0 51 eor r1, r0, r1 52 eor r0, r1, r0 53#endif 54 /* Do the buffers overlap? */ 55 cmp r0, r1 56 RETeq /* Bail now if src/dst are the same */ 57 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 58 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 59 cmp r3, r2 /* if (r3 < len) we have an overlap */ 60 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 61 62 /* Determine copy direction */ 63 cmp r1, r0 64 bcc .Lmemmove_backwards 65 66 moveq r0, #0 /* Quick abort for len=0 */ 67 RETeq 68 69 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 70 subs r2, r2, #4 71 blt .Lmemmove_fl4 /* less than 4 bytes */ 72 ands r12, r0, #3 73 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 74 ands r12, r1, #3 75 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 76 77.Lmemmove_ft8: 78 /* We have aligned source and destination */ 79 subs r2, r2, #8 80 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 81 subs r2, r2, #0x14 82 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 83 stmdb sp!, {r4} /* borrow r4 */ 84 85 /* blat 32 bytes at a time */ 86 /* XXX for really big copies perhaps we should use more registers */ 87.Lmemmove_floop32: 88 ldmia r1!, {r3, r4, r12, lr} 89 stmia r0!, {r3, r4, r12, lr} 90 ldmia r1!, {r3, r4, r12, lr} 91 stmia r0!, {r3, r4, r12, lr} 92 subs r2, r2, #0x20 93 bge .Lmemmove_floop32 94 95 cmn r2, #0x10 96 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 97 stmgeia r0!, {r3, r4, r12, lr} 98 subge r2, r2, #0x10 99 ldmia sp!, {r4} /* return r4 */ 100 101.Lmemmove_fl32: 102 adds r2, r2, #0x14 103 104 /* blat 12 bytes at a time */ 105.Lmemmove_floop12: 106 ldmgeia r1!, {r3, r12, lr} 107 stmgeia r0!, {r3, r12, lr} 108 subges r2, r2, #0x0c 109 bge .Lmemmove_floop12 110 111.Lmemmove_fl12: 112 adds r2, r2, #8 113 blt .Lmemmove_fl4 114 115 subs r2, r2, #4 116 ldrlt r3, [r1], #4 117 strlt r3, [r0], #4 118 ldmgeia r1!, {r3, r12} 119 stmgeia r0!, {r3, r12} 120 subge r2, r2, #4 121 122.Lmemmove_fl4: 123 /* less than 4 bytes to go */ 124 adds r2, r2, #4 125 ldmeqia sp!, {r0, pc} /* done */ 126 127 /* copy the crud byte at a time */ 128 cmp r2, #2 129 ldrb r3, [r1], #1 130 strb r3, [r0], #1 131 ldrgeb r3, [r1], #1 132 strgeb r3, [r0], #1 133 ldrgtb r3, [r1], #1 134 strgtb r3, [r0], #1 135 ldmia sp!, {r0, pc} 136 137 /* erg - unaligned destination */ 138.Lmemmove_fdestul: 139 rsb r12, r12, #4 140 cmp r12, #2 141 142 /* align destination with byte copies */ 143 ldrb r3, [r1], #1 144 strb r3, [r0], #1 145 ldrgeb r3, [r1], #1 146 strgeb r3, [r0], #1 147 ldrgtb r3, [r1], #1 148 strgtb r3, [r0], #1 149 subs r2, r2, r12 150 blt .Lmemmove_fl4 /* less the 4 bytes */ 151 152 ands r12, r1, #3 153 beq .Lmemmove_ft8 /* we have an aligned source */ 154 155 /* erg - unaligned source */ 156 /* This is where it gets nasty ... */ 157.Lmemmove_fsrcul: 158 bic r1, r1, #3 159 ldr lr, [r1], #4 160 cmp r12, #2 161 bgt .Lmemmove_fsrcul3 162 beq .Lmemmove_fsrcul2 163 cmp r2, #0x0c 164 blt .Lmemmove_fsrcul1loop4 165 sub r2, r2, #0x0c 166 stmdb sp!, {r4, r5} 167 168.Lmemmove_fsrcul1loop16: 169#ifdef __ARMEB__ 170 mov r3, lr, lsl #8 171#else 172 mov r3, lr, lsr #8 173#endif 174 ldmia r1!, {r4, r5, r12, lr} 175#ifdef __ARMEB__ 176 orr r3, r3, r4, lsr #24 177 mov r4, r4, lsl #8 178 orr r4, r4, r5, lsr #24 179 mov r5, r5, lsl #8 180 orr r5, r5, r12, lsr #24 181 mov r12, r12, lsl #8 182 orr r12, r12, lr, lsr #24 183#else 184 orr r3, r3, r4, lsl #24 185 mov r4, r4, lsr #8 186 orr r4, r4, r5, lsl #24 187 mov r5, r5, lsr #8 188 orr r5, r5, r12, lsl #24 189 mov r12, r12, lsr #8 190 orr r12, r12, lr, lsl #24 191#endif 192 stmia r0!, {r3-r5, r12} 193 subs r2, r2, #0x10 194 bge .Lmemmove_fsrcul1loop16 195 ldmia sp!, {r4, r5} 196 adds r2, r2, #0x0c 197 blt .Lmemmove_fsrcul1l4 198 199.Lmemmove_fsrcul1loop4: 200#ifdef __ARMEB__ 201 mov r12, lr, lsl #8 202#else 203 mov r12, lr, lsr #8 204#endif 205 ldr lr, [r1], #4 206#ifdef __ARMEB__ 207 orr r12, r12, lr, lsr #24 208#else 209 orr r12, r12, lr, lsl #24 210#endif 211 str r12, [r0], #4 212 subs r2, r2, #4 213 bge .Lmemmove_fsrcul1loop4 214 215.Lmemmove_fsrcul1l4: 216 sub r1, r1, #3 217 b .Lmemmove_fl4 218 219.Lmemmove_fsrcul2: 220 cmp r2, #0x0c 221 blt .Lmemmove_fsrcul2loop4 222 sub r2, r2, #0x0c 223 stmdb sp!, {r4, r5} 224 225.Lmemmove_fsrcul2loop16: 226#ifdef __ARMEB__ 227 mov r3, lr, lsl #16 228#else 229 mov r3, lr, lsr #16 230#endif 231 ldmia r1!, {r4, r5, r12, lr} 232#ifdef __ARMEB__ 233 orr r3, r3, r4, lsr #16 234 mov r4, r4, lsl #16 235 orr r4, r4, r5, lsr #16 236 mov r5, r5, lsl #16 237 orr r5, r5, r12, lsr #16 238 mov r12, r12, lsl #16 239 orr r12, r12, lr, lsr #16 240#else 241 orr r3, r3, r4, lsl #16 242 mov r4, r4, lsr #16 243 orr r4, r4, r5, lsl #16 244 mov r5, r5, lsr #16 245 orr r5, r5, r12, lsl #16 246 mov r12, r12, lsr #16 247 orr r12, r12, lr, lsl #16 248#endif 249 stmia r0!, {r3-r5, r12} 250 subs r2, r2, #0x10 251 bge .Lmemmove_fsrcul2loop16 252 ldmia sp!, {r4, r5} 253 adds r2, r2, #0x0c 254 blt .Lmemmove_fsrcul2l4 255 256.Lmemmove_fsrcul2loop4: 257#ifdef __ARMEB__ 258 mov r12, lr, lsl #16 259#else 260 mov r12, lr, lsr #16 261#endif 262 ldr lr, [r1], #4 263#ifdef __ARMEB__ 264 orr r12, r12, lr, lsr #16 265#else 266 orr r12, r12, lr, lsl #16 267#endif 268 str r12, [r0], #4 269 subs r2, r2, #4 270 bge .Lmemmove_fsrcul2loop4 271 272.Lmemmove_fsrcul2l4: 273 sub r1, r1, #2 274 b .Lmemmove_fl4 275 276.Lmemmove_fsrcul3: 277 cmp r2, #0x0c 278 blt .Lmemmove_fsrcul3loop4 279 sub r2, r2, #0x0c 280 stmdb sp!, {r4, r5} 281 282.Lmemmove_fsrcul3loop16: 283#ifdef __ARMEB__ 284 mov r3, lr, lsl #24 285#else 286 mov r3, lr, lsr #24 287#endif 288 ldmia r1!, {r4, r5, r12, lr} 289#ifdef __ARMEB__ 290 orr r3, r3, r4, lsr #8 291 mov r4, r4, lsl #24 292 orr r4, r4, r5, lsr #8 293 mov r5, r5, lsl #24 294 orr r5, r5, r12, lsr #8 295 mov r12, r12, lsl #24 296 orr r12, r12, lr, lsr #8 297#else 298 orr r3, r3, r4, lsl #8 299 mov r4, r4, lsr #24 300 orr r4, r4, r5, lsl #8 301 mov r5, r5, lsr #24 302 orr r5, r5, r12, lsl #8 303 mov r12, r12, lsr #24 304 orr r12, r12, lr, lsl #8 305#endif 306 stmia r0!, {r3-r5, r12} 307 subs r2, r2, #0x10 308 bge .Lmemmove_fsrcul3loop16 309 ldmia sp!, {r4, r5} 310 adds r2, r2, #0x0c 311 blt .Lmemmove_fsrcul3l4 312 313.Lmemmove_fsrcul3loop4: 314#ifdef __ARMEB__ 315 mov r12, lr, lsl #24 316#else 317 mov r12, lr, lsr #24 318#endif 319 ldr lr, [r1], #4 320#ifdef __ARMEB__ 321 orr r12, r12, lr, lsr #8 322#else 323 orr r12, r12, lr, lsl #8 324#endif 325 str r12, [r0], #4 326 subs r2, r2, #4 327 bge .Lmemmove_fsrcul3loop4 328 329.Lmemmove_fsrcul3l4: 330 sub r1, r1, #1 331 b .Lmemmove_fl4 332 333.Lmemmove_backwards: 334 add r1, r1, r2 335 add r0, r0, r2 336 subs r2, r2, #4 337 blt .Lmemmove_bl4 /* less than 4 bytes */ 338 ands r12, r0, #3 339 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 340 ands r12, r1, #3 341 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 342 343.Lmemmove_bt8: 344 /* We have aligned source and destination */ 345 subs r2, r2, #8 346 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 347 stmdb sp!, {r4, lr} 348 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 349 blt .Lmemmove_bl32 350 351 /* blat 32 bytes at a time */ 352 /* XXX for really big copies perhaps we should use more registers */ 353.Lmemmove_bloop32: 354 ldmdb r1!, {r3, r4, r12, lr} 355 stmdb r0!, {r3, r4, r12, lr} 356 ldmdb r1!, {r3, r4, r12, lr} 357 stmdb r0!, {r3, r4, r12, lr} 358 subs r2, r2, #0x20 359 bge .Lmemmove_bloop32 360 361.Lmemmove_bl32: 362 cmn r2, #0x10 363 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 364 stmgedb r0!, {r3, r4, r12, lr} 365 subge r2, r2, #0x10 366 adds r2, r2, #0x14 367 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 368 stmgedb r0!, {r3, r12, lr} 369 subge r2, r2, #0x0c 370 ldmia sp!, {r4, lr} 371 372.Lmemmove_bl12: 373 adds r2, r2, #8 374 blt .Lmemmove_bl4 375 subs r2, r2, #4 376 ldrlt r3, [r1, #-4]! 377 strlt r3, [r0, #-4]! 378 ldmgedb r1!, {r3, r12} 379 stmgedb r0!, {r3, r12} 380 subge r2, r2, #4 381 382.Lmemmove_bl4: 383 /* less than 4 bytes to go */ 384 adds r2, r2, #4 385 RETeq /* done */ 386 387 /* copy the crud byte at a time */ 388 cmp r2, #2 389 ldrb r3, [r1, #-1]! 390 strb r3, [r0, #-1]! 391 ldrgeb r3, [r1, #-1]! 392 strgeb r3, [r0, #-1]! 393 ldrgtb r3, [r1, #-1]! 394 strgtb r3, [r0, #-1]! 395 RET 396 397 /* erg - unaligned destination */ 398.Lmemmove_bdestul: 399 cmp r12, #2 400 401 /* align destination with byte copies */ 402 ldrb r3, [r1, #-1]! 403 strb r3, [r0, #-1]! 404 ldrgeb r3, [r1, #-1]! 405 strgeb r3, [r0, #-1]! 406 ldrgtb r3, [r1, #-1]! 407 strgtb r3, [r0, #-1]! 408 subs r2, r2, r12 409 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 410 ands r12, r1, #3 411 beq .Lmemmove_bt8 /* we have an aligned source */ 412 413 /* erg - unaligned source */ 414 /* This is where it gets nasty ... */ 415.Lmemmove_bsrcul: 416 bic r1, r1, #3 417 ldr r3, [r1, #0] 418 cmp r12, #2 419 blt .Lmemmove_bsrcul1 420 beq .Lmemmove_bsrcul2 421 cmp r2, #0x0c 422 blt .Lmemmove_bsrcul3loop4 423 sub r2, r2, #0x0c 424 stmdb sp!, {r4, r5, lr} 425 426.Lmemmove_bsrcul3loop16: 427#ifdef __ARMEB__ 428 mov lr, r3, lsr #8 429#else 430 mov lr, r3, lsl #8 431#endif 432 ldmdb r1!, {r3-r5, r12} 433#ifdef __ARMEB__ 434 orr lr, lr, r12, lsl #24 435 mov r12, r12, lsr #8 436 orr r12, r12, r5, lsl #24 437 mov r5, r5, lsr #8 438 orr r5, r5, r4, lsl #24 439 mov r4, r4, lsr #8 440 orr r4, r4, r3, lsl #24 441#else 442 orr lr, lr, r12, lsr #24 443 mov r12, r12, lsl #8 444 orr r12, r12, r5, lsr #24 445 mov r5, r5, lsl #8 446 orr r5, r5, r4, lsr #24 447 mov r4, r4, lsl #8 448 orr r4, r4, r3, lsr #24 449#endif 450 stmdb r0!, {r4, r5, r12, lr} 451 subs r2, r2, #0x10 452 bge .Lmemmove_bsrcul3loop16 453 ldmia sp!, {r4, r5, lr} 454 adds r2, r2, #0x0c 455 blt .Lmemmove_bsrcul3l4 456 457.Lmemmove_bsrcul3loop4: 458#ifdef __ARMEB__ 459 mov r12, r3, lsr #8 460#else 461 mov r12, r3, lsl #8 462#endif 463 ldr r3, [r1, #-4]! 464#ifdef __ARMEB__ 465 orr r12, r12, r3, lsl #24 466#else 467 orr r12, r12, r3, lsr #24 468#endif 469 str r12, [r0, #-4]! 470 subs r2, r2, #4 471 bge .Lmemmove_bsrcul3loop4 472 473.Lmemmove_bsrcul3l4: 474 add r1, r1, #3 475 b .Lmemmove_bl4 476 477.Lmemmove_bsrcul2: 478 cmp r2, #0x0c 479 blt .Lmemmove_bsrcul2loop4 480 sub r2, r2, #0x0c 481 stmdb sp!, {r4, r5, lr} 482 483.Lmemmove_bsrcul2loop16: 484#ifdef __ARMEB__ 485 mov lr, r3, lsr #16 486#else 487 mov lr, r3, lsl #16 488#endif 489 ldmdb r1!, {r3-r5, r12} 490#ifdef __ARMEB__ 491 orr lr, lr, r12, lsl #16 492 mov r12, r12, lsr #16 493 orr r12, r12, r5, lsl #16 494 mov r5, r5, lsr #16 495 orr r5, r5, r4, lsl #16 496 mov r4, r4, lsr #16 497 orr r4, r4, r3, lsl #16 498#else 499 orr lr, lr, r12, lsr #16 500 mov r12, r12, lsl #16 501 orr r12, r12, r5, lsr #16 502 mov r5, r5, lsl #16 503 orr r5, r5, r4, lsr #16 504 mov r4, r4, lsl #16 505 orr r4, r4, r3, lsr #16 506#endif 507 stmdb r0!, {r4, r5, r12, lr} 508 subs r2, r2, #0x10 509 bge .Lmemmove_bsrcul2loop16 510 ldmia sp!, {r4, r5, lr} 511 adds r2, r2, #0x0c 512 blt .Lmemmove_bsrcul2l4 513 514.Lmemmove_bsrcul2loop4: 515#ifdef __ARMEB__ 516 mov r12, r3, lsr #16 517#else 518 mov r12, r3, lsl #16 519#endif 520 ldr r3, [r1, #-4]! 521#ifdef __ARMEB__ 522 orr r12, r12, r3, lsl #16 523#else 524 orr r12, r12, r3, lsr #16 525#endif 526 str r12, [r0, #-4]! 527 subs r2, r2, #4 528 bge .Lmemmove_bsrcul2loop4 529 530.Lmemmove_bsrcul2l4: 531 add r1, r1, #2 532 b .Lmemmove_bl4 533 534.Lmemmove_bsrcul1: 535 cmp r2, #0x0c 536 blt .Lmemmove_bsrcul1loop4 537 sub r2, r2, #0x0c 538 stmdb sp!, {r4, r5, lr} 539 540.Lmemmove_bsrcul1loop32: 541#ifdef __ARMEB__ 542 mov lr, r3, lsr #24 543#else 544 mov lr, r3, lsl #24 545#endif 546 ldmdb r1!, {r3-r5, r12} 547#ifdef __ARMEB__ 548 orr lr, lr, r12, lsl #8 549 mov r12, r12, lsr #24 550 orr r12, r12, r5, lsl #8 551 mov r5, r5, lsr #24 552 orr r5, r5, r4, lsl #8 553 mov r4, r4, lsr #24 554 orr r4, r4, r3, lsl #8 555#else 556 orr lr, lr, r12, lsr #8 557 mov r12, r12, lsl #24 558 orr r12, r12, r5, lsr #8 559 mov r5, r5, lsl #24 560 orr r5, r5, r4, lsr #8 561 mov r4, r4, lsl #24 562 orr r4, r4, r3, lsr #8 563#endif 564 stmdb r0!, {r4, r5, r12, lr} 565 subs r2, r2, #0x10 566 bge .Lmemmove_bsrcul1loop32 567 ldmia sp!, {r4, r5, lr} 568 adds r2, r2, #0x0c 569 blt .Lmemmove_bsrcul1l4 570 571.Lmemmove_bsrcul1loop4: 572#ifdef __ARMEB__ 573 mov r12, r3, lsr #24 574#else 575 mov r12, r3, lsl #24 576#endif 577 ldr r3, [r1, #-4]! 578#ifdef __ARMEB__ 579 orr r12, r12, r3, lsl #8 580#else 581 orr r12, r12, r3, lsr #8 582#endif 583 str r12, [r0, #-4]! 584 subs r2, r2, #4 585 bge .Lmemmove_bsrcul1loop4 586 587.Lmemmove_bsrcul1l4: 588 add r1, r1, #1 589 b .Lmemmove_bl4 590