1/* $NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33__FBSDID("$FreeBSD$"); 34 35#ifndef _BCOPY 36/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */ 37ENTRY(memmove) 38#else 39/* bcopy = memcpy/memmove with arguments reversed. */ 40/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */ 41ENTRY(bcopy) 42 /* switch the source and destination registers */ 43 eor r0, r1, r0 44 eor r1, r0, r1 45 eor r0, r1, r0 46#endif 47 /* Do the buffers overlap? */ 48 cmp r0, r1 49 RETeq /* Bail now if src/dst are the same */ 50 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 51 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 52 cmp r3, r2 /* if (r3 < len) we have an overlap */ 53 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 54 55 /* Determine copy direction */ 56 cmp r1, r0 57 bcc .Lmemmove_backwards 58 59 moveq r0, #0 /* Quick abort for len=0 */ 60 RETeq 61 62 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 63 subs r2, r2, #4 64 blt .Lmemmove_fl4 /* less than 4 bytes */ 65 ands r12, r0, #3 66 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 67 ands r12, r1, #3 68 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 69 70.Lmemmove_ft8: 71 /* We have aligned source and destination */ 72 subs r2, r2, #8 73 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 74 subs r2, r2, #0x14 75 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 76 stmdb sp!, {r4} /* borrow r4 */ 77 78 /* blat 32 bytes at a time */ 79 /* XXX for really big copies perhaps we should use more registers */ 80.Lmemmove_floop32: 81 ldmia r1!, {r3, r4, r12, lr} 82 stmia r0!, {r3, r4, r12, lr} 83 ldmia r1!, {r3, r4, r12, lr} 84 stmia r0!, {r3, r4, r12, lr} 85 subs r2, r2, #0x20 86 bge .Lmemmove_floop32 87 88 cmn r2, #0x10 89 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 90 stmgeia r0!, {r3, r4, r12, lr} 91 subge r2, r2, #0x10 92 ldmia sp!, {r4} /* return r4 */ 93 94.Lmemmove_fl32: 95 adds r2, r2, #0x14 96 97 /* blat 12 bytes at a time */ 98.Lmemmove_floop12: 99 ldmgeia r1!, {r3, r12, lr} 100 stmgeia r0!, {r3, r12, lr} 101 subges r2, r2, #0x0c 102 bge .Lmemmove_floop12 103 104.Lmemmove_fl12: 105 adds r2, r2, #8 106 blt .Lmemmove_fl4 107 108 subs r2, r2, #4 109 ldrlt r3, [r1], #4 110 strlt r3, [r0], #4 111 ldmgeia r1!, {r3, r12} 112 stmgeia r0!, {r3, r12} 113 subge r2, r2, #4 114 115.Lmemmove_fl4: 116 /* less than 4 bytes to go */ 117 adds r2, r2, #4 118 ldmeqia sp!, {r0, pc} /* done */ 119 120 /* copy the crud byte at a time */ 121 cmp r2, #2 122 ldrb r3, [r1], #1 123 strb r3, [r0], #1 124 ldrgeb r3, [r1], #1 125 strgeb r3, [r0], #1 126 ldrgtb r3, [r1], #1 127 strgtb r3, [r0], #1 128 ldmia sp!, {r0, pc} 129 130 /* erg - unaligned destination */ 131.Lmemmove_fdestul: 132 rsb r12, r12, #4 133 cmp r12, #2 134 135 /* align destination with byte copies */ 136 ldrb r3, [r1], #1 137 strb r3, [r0], #1 138 ldrgeb r3, [r1], #1 139 strgeb r3, [r0], #1 140 ldrgtb r3, [r1], #1 141 strgtb r3, [r0], #1 142 subs r2, r2, r12 143 blt .Lmemmove_fl4 /* less the 4 bytes */ 144 145 ands r12, r1, #3 146 beq .Lmemmove_ft8 /* we have an aligned source */ 147 148 /* erg - unaligned source */ 149 /* This is where it gets nasty ... */ 150.Lmemmove_fsrcul: 151 bic r1, r1, #3 152 ldr lr, [r1], #4 153 cmp r12, #2 154 bgt .Lmemmove_fsrcul3 155 beq .Lmemmove_fsrcul2 156 cmp r2, #0x0c 157 blt .Lmemmove_fsrcul1loop4 158 sub r2, r2, #0x0c 159 stmdb sp!, {r4, r5} 160 161.Lmemmove_fsrcul1loop16: 162#ifdef __ARMEB__ 163 mov r3, lr, lsl #8 164#else 165 mov r3, lr, lsr #8 166#endif 167 ldmia r1!, {r4, r5, r12, lr} 168#ifdef __ARMEB__ 169 orr r3, r3, r4, lsr #24 170 mov r4, r4, lsl #8 171 orr r4, r4, r5, lsr #24 172 mov r5, r5, lsl #8 173 orr r5, r5, r12, lsr #24 174 mov r12, r12, lsl #8 175 orr r12, r12, lr, lsr #24 176#else 177 orr r3, r3, r4, lsl #24 178 mov r4, r4, lsr #8 179 orr r4, r4, r5, lsl #24 180 mov r5, r5, lsr #8 181 orr r5, r5, r12, lsl #24 182 mov r12, r12, lsr #8 183 orr r12, r12, lr, lsl #24 184#endif 185 stmia r0!, {r3-r5, r12} 186 subs r2, r2, #0x10 187 bge .Lmemmove_fsrcul1loop16 188 ldmia sp!, {r4, r5} 189 adds r2, r2, #0x0c 190 blt .Lmemmove_fsrcul1l4 191 192.Lmemmove_fsrcul1loop4: 193#ifdef __ARMEB__ 194 mov r12, lr, lsl #8 195#else 196 mov r12, lr, lsr #8 197#endif 198 ldr lr, [r1], #4 199#ifdef __ARMEB__ 200 orr r12, r12, lr, lsr #24 201#else 202 orr r12, r12, lr, lsl #24 203#endif 204 str r12, [r0], #4 205 subs r2, r2, #4 206 bge .Lmemmove_fsrcul1loop4 207 208.Lmemmove_fsrcul1l4: 209 sub r1, r1, #3 210 b .Lmemmove_fl4 211 212.Lmemmove_fsrcul2: 213 cmp r2, #0x0c 214 blt .Lmemmove_fsrcul2loop4 215 sub r2, r2, #0x0c 216 stmdb sp!, {r4, r5} 217 218.Lmemmove_fsrcul2loop16: 219#ifdef __ARMEB__ 220 mov r3, lr, lsl #16 221#else 222 mov r3, lr, lsr #16 223#endif 224 ldmia r1!, {r4, r5, r12, lr} 225#ifdef __ARMEB__ 226 orr r3, r3, r4, lsr #16 227 mov r4, r4, lsl #16 228 orr r4, r4, r5, lsr #16 229 mov r5, r5, lsl #16 230 orr r5, r5, r12, lsr #16 231 mov r12, r12, lsl #16 232 orr r12, r12, lr, lsr #16 233#else 234 orr r3, r3, r4, lsl #16 235 mov r4, r4, lsr #16 236 orr r4, r4, r5, lsl #16 237 mov r5, r5, lsr #16 238 orr r5, r5, r12, lsl #16 239 mov r12, r12, lsr #16 240 orr r12, r12, lr, lsl #16 241#endif 242 stmia r0!, {r3-r5, r12} 243 subs r2, r2, #0x10 244 bge .Lmemmove_fsrcul2loop16 245 ldmia sp!, {r4, r5} 246 adds r2, r2, #0x0c 247 blt .Lmemmove_fsrcul2l4 248 249.Lmemmove_fsrcul2loop4: 250#ifdef __ARMEB__ 251 mov r12, lr, lsl #16 252#else 253 mov r12, lr, lsr #16 254#endif 255 ldr lr, [r1], #4 256#ifdef __ARMEB__ 257 orr r12, r12, lr, lsr #16 258#else 259 orr r12, r12, lr, lsl #16 260#endif 261 str r12, [r0], #4 262 subs r2, r2, #4 263 bge .Lmemmove_fsrcul2loop4 264 265.Lmemmove_fsrcul2l4: 266 sub r1, r1, #2 267 b .Lmemmove_fl4 268 269.Lmemmove_fsrcul3: 270 cmp r2, #0x0c 271 blt .Lmemmove_fsrcul3loop4 272 sub r2, r2, #0x0c 273 stmdb sp!, {r4, r5} 274 275.Lmemmove_fsrcul3loop16: 276#ifdef __ARMEB__ 277 mov r3, lr, lsl #24 278#else 279 mov r3, lr, lsr #24 280#endif 281 ldmia r1!, {r4, r5, r12, lr} 282#ifdef __ARMEB__ 283 orr r3, r3, r4, lsr #8 284 mov r4, r4, lsl #24 285 orr r4, r4, r5, lsr #8 286 mov r5, r5, lsl #24 287 orr r5, r5, r12, lsr #8 288 mov r12, r12, lsl #24 289 orr r12, r12, lr, lsr #8 290#else 291 orr r3, r3, r4, lsl #8 292 mov r4, r4, lsr #24 293 orr r4, r4, r5, lsl #8 294 mov r5, r5, lsr #24 295 orr r5, r5, r12, lsl #8 296 mov r12, r12, lsr #24 297 orr r12, r12, lr, lsl #8 298#endif 299 stmia r0!, {r3-r5, r12} 300 subs r2, r2, #0x10 301 bge .Lmemmove_fsrcul3loop16 302 ldmia sp!, {r4, r5} 303 adds r2, r2, #0x0c 304 blt .Lmemmove_fsrcul3l4 305 306.Lmemmove_fsrcul3loop4: 307#ifdef __ARMEB__ 308 mov r12, lr, lsl #24 309#else 310 mov r12, lr, lsr #24 311#endif 312 ldr lr, [r1], #4 313#ifdef __ARMEB__ 314 orr r12, r12, lr, lsr #8 315#else 316 orr r12, r12, lr, lsl #8 317#endif 318 str r12, [r0], #4 319 subs r2, r2, #4 320 bge .Lmemmove_fsrcul3loop4 321 322.Lmemmove_fsrcul3l4: 323 sub r1, r1, #1 324 b .Lmemmove_fl4 325 326.Lmemmove_backwards: 327 add r1, r1, r2 328 add r0, r0, r2 329 subs r2, r2, #4 330 blt .Lmemmove_bl4 /* less than 4 bytes */ 331 ands r12, r0, #3 332 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 333 ands r12, r1, #3 334 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 335 336.Lmemmove_bt8: 337 /* We have aligned source and destination */ 338 subs r2, r2, #8 339 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 340 stmdb sp!, {r4, lr} 341 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 342 blt .Lmemmove_bl32 343 344 /* blat 32 bytes at a time */ 345 /* XXX for really big copies perhaps we should use more registers */ 346.Lmemmove_bloop32: 347 ldmdb r1!, {r3, r4, r12, lr} 348 stmdb r0!, {r3, r4, r12, lr} 349 ldmdb r1!, {r3, r4, r12, lr} 350 stmdb r0!, {r3, r4, r12, lr} 351 subs r2, r2, #0x20 352 bge .Lmemmove_bloop32 353 354.Lmemmove_bl32: 355 cmn r2, #0x10 356 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 357 stmgedb r0!, {r3, r4, r12, lr} 358 subge r2, r2, #0x10 359 adds r2, r2, #0x14 360 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 361 stmgedb r0!, {r3, r12, lr} 362 subge r2, r2, #0x0c 363 ldmia sp!, {r4, lr} 364 365.Lmemmove_bl12: 366 adds r2, r2, #8 367 blt .Lmemmove_bl4 368 subs r2, r2, #4 369 ldrlt r3, [r1, #-4]! 370 strlt r3, [r0, #-4]! 371 ldmgedb r1!, {r3, r12} 372 stmgedb r0!, {r3, r12} 373 subge r2, r2, #4 374 375.Lmemmove_bl4: 376 /* less than 4 bytes to go */ 377 adds r2, r2, #4 378 RETeq /* done */ 379 380 /* copy the crud byte at a time */ 381 cmp r2, #2 382 ldrb r3, [r1, #-1]! 383 strb r3, [r0, #-1]! 384 ldrgeb r3, [r1, #-1]! 385 strgeb r3, [r0, #-1]! 386 ldrgtb r3, [r1, #-1]! 387 strgtb r3, [r0, #-1]! 388 RET 389 390 /* erg - unaligned destination */ 391.Lmemmove_bdestul: 392 cmp r12, #2 393 394 /* align destination with byte copies */ 395 ldrb r3, [r1, #-1]! 396 strb r3, [r0, #-1]! 397 ldrgeb r3, [r1, #-1]! 398 strgeb r3, [r0, #-1]! 399 ldrgtb r3, [r1, #-1]! 400 strgtb r3, [r0, #-1]! 401 subs r2, r2, r12 402 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 403 ands r12, r1, #3 404 beq .Lmemmove_bt8 /* we have an aligned source */ 405 406 /* erg - unaligned source */ 407 /* This is where it gets nasty ... */ 408.Lmemmove_bsrcul: 409 bic r1, r1, #3 410 ldr r3, [r1, #0] 411 cmp r12, #2 412 blt .Lmemmove_bsrcul1 413 beq .Lmemmove_bsrcul2 414 cmp r2, #0x0c 415 blt .Lmemmove_bsrcul3loop4 416 sub r2, r2, #0x0c 417 stmdb sp!, {r4, r5, lr} 418 419.Lmemmove_bsrcul3loop16: 420#ifdef __ARMEB__ 421 mov lr, r3, lsr #8 422#else 423 mov lr, r3, lsl #8 424#endif 425 ldmdb r1!, {r3-r5, r12} 426#ifdef __ARMEB__ 427 orr lr, lr, r12, lsl #24 428 mov r12, r12, lsr #8 429 orr r12, r12, r5, lsl #24 430 mov r5, r5, lsr #8 431 orr r5, r5, r4, lsl #24 432 mov r4, r4, lsr #8 433 orr r4, r4, r3, lsl #24 434#else 435 orr lr, lr, r12, lsr #24 436 mov r12, r12, lsl #8 437 orr r12, r12, r5, lsr #24 438 mov r5, r5, lsl #8 439 orr r5, r5, r4, lsr #24 440 mov r4, r4, lsl #8 441 orr r4, r4, r3, lsr #24 442#endif 443 stmdb r0!, {r4, r5, r12, lr} 444 subs r2, r2, #0x10 445 bge .Lmemmove_bsrcul3loop16 446 ldmia sp!, {r4, r5, lr} 447 adds r2, r2, #0x0c 448 blt .Lmemmove_bsrcul3l4 449 450.Lmemmove_bsrcul3loop4: 451#ifdef __ARMEB__ 452 mov r12, r3, lsr #8 453#else 454 mov r12, r3, lsl #8 455#endif 456 ldr r3, [r1, #-4]! 457#ifdef __ARMEB__ 458 orr r12, r12, r3, lsl #24 459#else 460 orr r12, r12, r3, lsr #24 461#endif 462 str r12, [r0, #-4]! 463 subs r2, r2, #4 464 bge .Lmemmove_bsrcul3loop4 465 466.Lmemmove_bsrcul3l4: 467 add r1, r1, #3 468 b .Lmemmove_bl4 469 470.Lmemmove_bsrcul2: 471 cmp r2, #0x0c 472 blt .Lmemmove_bsrcul2loop4 473 sub r2, r2, #0x0c 474 stmdb sp!, {r4, r5, lr} 475 476.Lmemmove_bsrcul2loop16: 477#ifdef __ARMEB__ 478 mov lr, r3, lsr #16 479#else 480 mov lr, r3, lsl #16 481#endif 482 ldmdb r1!, {r3-r5, r12} 483#ifdef __ARMEB__ 484 orr lr, lr, r12, lsl #16 485 mov r12, r12, lsr #16 486 orr r12, r12, r5, lsl #16 487 mov r5, r5, lsr #16 488 orr r5, r5, r4, lsl #16 489 mov r4, r4, lsr #16 490 orr r4, r4, r3, lsl #16 491#else 492 orr lr, lr, r12, lsr #16 493 mov r12, r12, lsl #16 494 orr r12, r12, r5, lsr #16 495 mov r5, r5, lsl #16 496 orr r5, r5, r4, lsr #16 497 mov r4, r4, lsl #16 498 orr r4, r4, r3, lsr #16 499#endif 500 stmdb r0!, {r4, r5, r12, lr} 501 subs r2, r2, #0x10 502 bge .Lmemmove_bsrcul2loop16 503 ldmia sp!, {r4, r5, lr} 504 adds r2, r2, #0x0c 505 blt .Lmemmove_bsrcul2l4 506 507.Lmemmove_bsrcul2loop4: 508#ifdef __ARMEB__ 509 mov r12, r3, lsr #16 510#else 511 mov r12, r3, lsl #16 512#endif 513 ldr r3, [r1, #-4]! 514#ifdef __ARMEB__ 515 orr r12, r12, r3, lsl #16 516#else 517 orr r12, r12, r3, lsr #16 518#endif 519 str r12, [r0, #-4]! 520 subs r2, r2, #4 521 bge .Lmemmove_bsrcul2loop4 522 523.Lmemmove_bsrcul2l4: 524 add r1, r1, #2 525 b .Lmemmove_bl4 526 527.Lmemmove_bsrcul1: 528 cmp r2, #0x0c 529 blt .Lmemmove_bsrcul1loop4 530 sub r2, r2, #0x0c 531 stmdb sp!, {r4, r5, lr} 532 533.Lmemmove_bsrcul1loop32: 534#ifdef __ARMEB__ 535 mov lr, r3, lsr #24 536#else 537 mov lr, r3, lsl #24 538#endif 539 ldmdb r1!, {r3-r5, r12} 540#ifdef __ARMEB__ 541 orr lr, lr, r12, lsl #8 542 mov r12, r12, lsr #24 543 orr r12, r12, r5, lsl #8 544 mov r5, r5, lsr #24 545 orr r5, r5, r4, lsl #8 546 mov r4, r4, lsr #24 547 orr r4, r4, r3, lsl #8 548#else 549 orr lr, lr, r12, lsr #8 550 mov r12, r12, lsl #24 551 orr r12, r12, r5, lsr #8 552 mov r5, r5, lsl #24 553 orr r5, r5, r4, lsr #8 554 mov r4, r4, lsl #24 555 orr r4, r4, r3, lsr #8 556#endif 557 stmdb r0!, {r4, r5, r12, lr} 558 subs r2, r2, #0x10 559 bge .Lmemmove_bsrcul1loop32 560 ldmia sp!, {r4, r5, lr} 561 adds r2, r2, #0x0c 562 blt .Lmemmove_bsrcul1l4 563 564.Lmemmove_bsrcul1loop4: 565#ifdef __ARMEB__ 566 mov r12, r3, lsr #24 567#else 568 mov r12, r3, lsl #24 569#endif 570 ldr r3, [r1, #-4]! 571#ifdef __ARMEB__ 572 orr r12, r12, r3, lsl #8 573#else 574 orr r12, r12, r3, lsr #8 575#endif 576 str r12, [r0, #-4]! 577 subs r2, r2, #4 578 bge .Lmemmove_bsrcul1loop4 579 580.Lmemmove_bsrcul1l4: 581 add r1, r1, #1 582 b .Lmemmove_bl4 583