1/* $NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33__FBSDID("$FreeBSD$"); 34 35.syntax unified 36 37#ifndef _BCOPY 38/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */ 39ENTRY(memmove) 40#else 41/* bcopy = memcpy/memmove with arguments reversed. */ 42/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */ 43ENTRY(bcopy) 44 /* switch the source and destination registers */ 45 eor r0, r1, r0 46 eor r1, r0, r1 47 eor r0, r1, r0 48#endif 49 /* Do the buffers overlap? */ 50 cmp r0, r1 51 it eq 52 RETeq /* Bail now if src/dst are the same */ 53 ite cc 54 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 55 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 56 cmp r3, r2 /* if (r3 < len) we have an overlap */ 57 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 58 59 /* Determine copy direction */ 60 cmp r1, r0 61 it cc 62 bcc .Lmemmove_backwards 63 64 itt eq 65 moveq r0, #0 /* Quick abort for len=0 */ 66 RETeq 67 68 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 69 subs r2, r2, #4 70 blt .Lmemmove_fl4 /* less than 4 bytes */ 71 ands r12, r0, #3 72 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 73 ands r12, r1, #3 74 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 75 76.Lmemmove_ft8: 77 /* We have aligned source and destination */ 78 subs r2, r2, #8 79 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 80 subs r2, r2, #0x14 81 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 82 stmdb sp!, {r4} /* borrow r4 */ 83 84 /* blat 32 bytes at a time */ 85 /* XXX for really big copies perhaps we should use more registers */ 86.Lmemmove_floop32: 87 ldmia r1!, {r3, r4, r12, lr} 88 stmia r0!, {r3, r4, r12, lr} 89 ldmia r1!, {r3, r4, r12, lr} 90 stmia r0!, {r3, r4, r12, lr} 91 subs r2, r2, #0x20 92 bge .Lmemmove_floop32 93 94 cmn r2, #0x10 95 ittt ge 96 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 97 stmiage r0!, {r3, r4, r12, lr} 98 subge r2, r2, #0x10 99 ldmia sp!, {r4} /* return r4 */ 100 101.Lmemmove_fl32: 102 adds r2, r2, #0x14 103 104 /* blat 12 bytes at a time */ 105.Lmemmove_floop12: 106 ittt ge 107 ldmiage r1!, {r3, r12, lr} 108 stmiage r0!, {r3, r12, lr} 109 subsge r2, r2, #0x0c 110 bge .Lmemmove_floop12 111 112.Lmemmove_fl12: 113 adds r2, r2, #8 114 blt .Lmemmove_fl4 115 116 subs r2, r2, #4 117 itt lt 118 ldrlt r3, [r1], #4 119 strlt r3, [r0], #4 120 ittt ge 121 ldmiage r1!, {r3, r12} 122 stmiage r0!, {r3, r12} 123 subge r2, r2, #4 124 125.Lmemmove_fl4: 126 /* less than 4 bytes to go */ 127 adds r2, r2, #4 128 it eq 129 ldmiaeq sp!, {r0, pc} /* done */ 130 131 /* copy the crud byte at a time */ 132 cmp r2, #2 133 ldrb r3, [r1], #1 134 strb r3, [r0], #1 135 itt ge 136 ldrbge r3, [r1], #1 137 strbge r3, [r0], #1 138 itt gt 139 ldrbgt r3, [r1], #1 140 strbgt r3, [r0], #1 141 ldmia sp!, {r0, pc} 142 143 /* erg - unaligned destination */ 144.Lmemmove_fdestul: 145 rsb r12, r12, #4 146 cmp r12, #2 147 148 /* align destination with byte copies */ 149 ldrb r3, [r1], #1 150 strb r3, [r0], #1 151 itt ge 152 ldrbge r3, [r1], #1 153 strbge r3, [r0], #1 154 itt gt 155 ldrbgt r3, [r1], #1 156 strbgt r3, [r0], #1 157 subs r2, r2, r12 158 blt .Lmemmove_fl4 /* less the 4 bytes */ 159 160 ands r12, r1, #3 161 beq .Lmemmove_ft8 /* we have an aligned source */ 162 163 /* erg - unaligned source */ 164 /* This is where it gets nasty ... */ 165.Lmemmove_fsrcul: 166 bic r1, r1, #3 167 ldr lr, [r1], #4 168 cmp r12, #2 169 bgt .Lmemmove_fsrcul3 170 beq .Lmemmove_fsrcul2 171 cmp r2, #0x0c 172 blt .Lmemmove_fsrcul1loop4 173 sub r2, r2, #0x0c 174 stmdb sp!, {r4, r5} 175 176.Lmemmove_fsrcul1loop16: 177 mov r3, lr, lsr #8 178 ldmia r1!, {r4, r5, r12, lr} 179 orr r3, r3, r4, lsl #24 180 mov r4, r4, lsr #8 181 orr r4, r4, r5, lsl #24 182 mov r5, r5, lsr #8 183 orr r5, r5, r12, lsl #24 184 mov r12, r12, lsr #8 185 orr r12, r12, lr, lsl #24 186 stmia r0!, {r3-r5, r12} 187 subs r2, r2, #0x10 188 bge .Lmemmove_fsrcul1loop16 189 ldmia sp!, {r4, r5} 190 adds r2, r2, #0x0c 191 blt .Lmemmove_fsrcul1l4 192 193.Lmemmove_fsrcul1loop4: 194 mov r12, lr, lsr #8 195 ldr lr, [r1], #4 196 orr r12, r12, lr, lsl #24 197 str r12, [r0], #4 198 subs r2, r2, #4 199 bge .Lmemmove_fsrcul1loop4 200 201.Lmemmove_fsrcul1l4: 202 sub r1, r1, #3 203 b .Lmemmove_fl4 204 205.Lmemmove_fsrcul2: 206 cmp r2, #0x0c 207 blt .Lmemmove_fsrcul2loop4 208 sub r2, r2, #0x0c 209 stmdb sp!, {r4, r5} 210 211.Lmemmove_fsrcul2loop16: 212 mov r3, lr, lsr #16 213 ldmia r1!, {r4, r5, r12, lr} 214 orr r3, r3, r4, lsl #16 215 mov r4, r4, lsr #16 216 orr r4, r4, r5, lsl #16 217 mov r5, r5, lsr #16 218 orr r5, r5, r12, lsl #16 219 mov r12, r12, lsr #16 220 orr r12, r12, lr, lsl #16 221 stmia r0!, {r3-r5, r12} 222 subs r2, r2, #0x10 223 bge .Lmemmove_fsrcul2loop16 224 ldmia sp!, {r4, r5} 225 adds r2, r2, #0x0c 226 blt .Lmemmove_fsrcul2l4 227 228.Lmemmove_fsrcul2loop4: 229 mov r12, lr, lsr #16 230 ldr lr, [r1], #4 231 orr r12, r12, lr, lsl #16 232 str r12, [r0], #4 233 subs r2, r2, #4 234 bge .Lmemmove_fsrcul2loop4 235 236.Lmemmove_fsrcul2l4: 237 sub r1, r1, #2 238 b .Lmemmove_fl4 239 240.Lmemmove_fsrcul3: 241 cmp r2, #0x0c 242 blt .Lmemmove_fsrcul3loop4 243 sub r2, r2, #0x0c 244 stmdb sp!, {r4, r5} 245 246.Lmemmove_fsrcul3loop16: 247 mov r3, lr, lsr #24 248 ldmia r1!, {r4, r5, r12, lr} 249 orr r3, r3, r4, lsl #8 250 mov r4, r4, lsr #24 251 orr r4, r4, r5, lsl #8 252 mov r5, r5, lsr #24 253 orr r5, r5, r12, lsl #8 254 mov r12, r12, lsr #24 255 orr r12, r12, lr, lsl #8 256 stmia r0!, {r3-r5, r12} 257 subs r2, r2, #0x10 258 bge .Lmemmove_fsrcul3loop16 259 ldmia sp!, {r4, r5} 260 adds r2, r2, #0x0c 261 blt .Lmemmove_fsrcul3l4 262 263.Lmemmove_fsrcul3loop4: 264 mov r12, lr, lsr #24 265 ldr lr, [r1], #4 266 orr r12, r12, lr, lsl #8 267 str r12, [r0], #4 268 subs r2, r2, #4 269 bge .Lmemmove_fsrcul3loop4 270 271.Lmemmove_fsrcul3l4: 272 sub r1, r1, #1 273 b .Lmemmove_fl4 274 275.Lmemmove_backwards: 276 add r1, r1, r2 277 add r0, r0, r2 278 subs r2, r2, #4 279 blt .Lmemmove_bl4 /* less than 4 bytes */ 280 ands r12, r0, #3 281 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 282 ands r12, r1, #3 283 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 284 285.Lmemmove_bt8: 286 /* We have aligned source and destination */ 287 subs r2, r2, #8 288 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 289 stmdb sp!, {r4, lr} 290 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 291 blt .Lmemmove_bl32 292 293 /* blat 32 bytes at a time */ 294 /* XXX for really big copies perhaps we should use more registers */ 295.Lmemmove_bloop32: 296 ldmdb r1!, {r3, r4, r12, lr} 297 stmdb r0!, {r3, r4, r12, lr} 298 ldmdb r1!, {r3, r4, r12, lr} 299 stmdb r0!, {r3, r4, r12, lr} 300 subs r2, r2, #0x20 301 bge .Lmemmove_bloop32 302 303.Lmemmove_bl32: 304 cmn r2, #0x10 305 ittt ge 306 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 307 stmdbge r0!, {r3, r4, r12, lr} 308 subge r2, r2, #0x10 309 adds r2, r2, #0x14 310 ittt ge 311 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 312 stmdbge r0!, {r3, r12, lr} 313 subge r2, r2, #0x0c 314 ldmia sp!, {r4, lr} 315 316.Lmemmove_bl12: 317 adds r2, r2, #8 318 blt .Lmemmove_bl4 319 subs r2, r2, #4 320 itt lt 321 ldrlt r3, [r1, #-4]! 322 strlt r3, [r0, #-4]! 323 ittt ge 324 ldmdbge r1!, {r3, r12} 325 stmdbge r0!, {r3, r12} 326 subge r2, r2, #4 327 328.Lmemmove_bl4: 329 /* less than 4 bytes to go */ 330 adds r2, r2, #4 331 it eq 332 RETeq /* done */ 333 334 /* copy the crud byte at a time */ 335 cmp r2, #2 336 ldrb r3, [r1, #-1]! 337 strb r3, [r0, #-1]! 338 itt ge 339 ldrbge r3, [r1, #-1]! 340 strbge r3, [r0, #-1]! 341 itt gt 342 ldrbgt r3, [r1, #-1]! 343 strbgt r3, [r0, #-1]! 344 RET 345 346 /* erg - unaligned destination */ 347.Lmemmove_bdestul: 348 cmp r12, #2 349 350 /* align destination with byte copies */ 351 ldrb r3, [r1, #-1]! 352 strb r3, [r0, #-1]! 353 itt ge 354 ldrbge r3, [r1, #-1]! 355 strbge r3, [r0, #-1]! 356 itt gt 357 ldrbgt r3, [r1, #-1]! 358 strbgt r3, [r0, #-1]! 359 subs r2, r2, r12 360 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 361 ands r12, r1, #3 362 beq .Lmemmove_bt8 /* we have an aligned source */ 363 364 /* erg - unaligned source */ 365 /* This is where it gets nasty ... */ 366.Lmemmove_bsrcul: 367 bic r1, r1, #3 368 ldr r3, [r1, #0] 369 cmp r12, #2 370 blt .Lmemmove_bsrcul1 371 beq .Lmemmove_bsrcul2 372 cmp r2, #0x0c 373 blt .Lmemmove_bsrcul3loop4 374 sub r2, r2, #0x0c 375 stmdb sp!, {r4, r5, lr} 376 377.Lmemmove_bsrcul3loop16: 378 mov lr, r3, lsl #8 379 ldmdb r1!, {r3-r5, r12} 380 orr lr, lr, r12, lsr #24 381 mov r12, r12, lsl #8 382 orr r12, r12, r5, lsr #24 383 mov r5, r5, lsl #8 384 orr r5, r5, r4, lsr #24 385 mov r4, r4, lsl #8 386 orr r4, r4, r3, lsr #24 387 stmdb r0!, {r4, r5, r12, lr} 388 subs r2, r2, #0x10 389 bge .Lmemmove_bsrcul3loop16 390 ldmia sp!, {r4, r5, lr} 391 adds r2, r2, #0x0c 392 blt .Lmemmove_bsrcul3l4 393 394.Lmemmove_bsrcul3loop4: 395 mov r12, r3, lsl #8 396 ldr r3, [r1, #-4]! 397 orr r12, r12, r3, lsr #24 398 str r12, [r0, #-4]! 399 subs r2, r2, #4 400 bge .Lmemmove_bsrcul3loop4 401 402.Lmemmove_bsrcul3l4: 403 add r1, r1, #3 404 b .Lmemmove_bl4 405 406.Lmemmove_bsrcul2: 407 cmp r2, #0x0c 408 blt .Lmemmove_bsrcul2loop4 409 sub r2, r2, #0x0c 410 stmdb sp!, {r4, r5, lr} 411 412.Lmemmove_bsrcul2loop16: 413 mov lr, r3, lsl #16 414 ldmdb r1!, {r3-r5, r12} 415 orr lr, lr, r12, lsr #16 416 mov r12, r12, lsl #16 417 orr r12, r12, r5, lsr #16 418 mov r5, r5, lsl #16 419 orr r5, r5, r4, lsr #16 420 mov r4, r4, lsl #16 421 orr r4, r4, r3, lsr #16 422 stmdb r0!, {r4, r5, r12, lr} 423 subs r2, r2, #0x10 424 bge .Lmemmove_bsrcul2loop16 425 ldmia sp!, {r4, r5, lr} 426 adds r2, r2, #0x0c 427 blt .Lmemmove_bsrcul2l4 428 429.Lmemmove_bsrcul2loop4: 430 mov r12, r3, lsl #16 431 ldr r3, [r1, #-4]! 432 orr r12, r12, r3, lsr #16 433 str r12, [r0, #-4]! 434 subs r2, r2, #4 435 bge .Lmemmove_bsrcul2loop4 436 437.Lmemmove_bsrcul2l4: 438 add r1, r1, #2 439 b .Lmemmove_bl4 440 441.Lmemmove_bsrcul1: 442 cmp r2, #0x0c 443 blt .Lmemmove_bsrcul1loop4 444 sub r2, r2, #0x0c 445 stmdb sp!, {r4, r5, lr} 446 447.Lmemmove_bsrcul1loop32: 448 mov lr, r3, lsl #24 449 ldmdb r1!, {r3-r5, r12} 450 orr lr, lr, r12, lsr #8 451 mov r12, r12, lsl #24 452 orr r12, r12, r5, lsr #8 453 mov r5, r5, lsl #24 454 orr r5, r5, r4, lsr #8 455 mov r4, r4, lsl #24 456 orr r4, r4, r3, lsr #8 457 stmdb r0!, {r4, r5, r12, lr} 458 subs r2, r2, #0x10 459 bge .Lmemmove_bsrcul1loop32 460 ldmia sp!, {r4, r5, lr} 461 adds r2, r2, #0x0c 462 blt .Lmemmove_bsrcul1l4 463 464.Lmemmove_bsrcul1loop4: 465 mov r12, r3, lsl #24 466 ldr r3, [r1, #-4]! 467 orr r12, r12, r3, lsr #8 468 str r12, [r0, #-4]! 469 subs r2, r2, #4 470 bge .Lmemmove_bsrcul1loop4 471 472.Lmemmove_bsrcul1l4: 473 add r1, r1, #1 474 b .Lmemmove_bl4 475#ifndef _BCOPY 476END(memmove) 477#else 478END(bcopy) 479#endif 480 481 .section .note.GNU-stack,"",%progbits 482