1/* $NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33.syntax unified 34 35#ifndef _BCOPY 36/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */ 37ENTRY(memmove) 38#else 39/* bcopy = memcpy/memmove with arguments reversed. */ 40/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */ 41ENTRY(bcopy) 42 /* switch the source and destination registers */ 43 eor r0, r1, r0 44 eor r1, r0, r1 45 eor r0, r1, r0 46#endif 47 /* Do the buffers overlap? */ 48 cmp r0, r1 49 it eq 50 RETeq /* Bail now if src/dst are the same */ 51 ite cc 52 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 53 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 54 cmp r3, r2 /* if (r3 < len) we have an overlap */ 55 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 56 57 /* Determine copy direction */ 58 cmp r1, r0 59 it cc 60 bcc .Lmemmove_backwards 61 62 itt eq 63 moveq r0, #0 /* Quick abort for len=0 */ 64 RETeq 65 66 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 67 subs r2, r2, #4 68 blt .Lmemmove_fl4 /* less than 4 bytes */ 69 ands r12, r0, #3 70 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 71 ands r12, r1, #3 72 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 73 74.Lmemmove_ft8: 75 /* We have aligned source and destination */ 76 subs r2, r2, #8 77 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 78 subs r2, r2, #0x14 79 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 80 stmdb sp!, {r4} /* borrow r4 */ 81 82 /* blat 32 bytes at a time */ 83 /* XXX for really big copies perhaps we should use more registers */ 84.Lmemmove_floop32: 85 ldmia r1!, {r3, r4, r12, lr} 86 stmia r0!, {r3, r4, r12, lr} 87 ldmia r1!, {r3, r4, r12, lr} 88 stmia r0!, {r3, r4, r12, lr} 89 subs r2, r2, #0x20 90 bge .Lmemmove_floop32 91 92 cmn r2, #0x10 93 ittt ge 94 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 95 stmiage r0!, {r3, r4, r12, lr} 96 subge r2, r2, #0x10 97 ldmia sp!, {r4} /* return r4 */ 98 99.Lmemmove_fl32: 100 adds r2, r2, #0x14 101 102 /* blat 12 bytes at a time */ 103.Lmemmove_floop12: 104 ittt ge 105 ldmiage r1!, {r3, r12, lr} 106 stmiage r0!, {r3, r12, lr} 107 subsge r2, r2, #0x0c 108 bge .Lmemmove_floop12 109 110.Lmemmove_fl12: 111 adds r2, r2, #8 112 blt .Lmemmove_fl4 113 114 subs r2, r2, #4 115 itt lt 116 ldrlt r3, [r1], #4 117 strlt r3, [r0], #4 118 ittt ge 119 ldmiage r1!, {r3, r12} 120 stmiage r0!, {r3, r12} 121 subge r2, r2, #4 122 123.Lmemmove_fl4: 124 /* less than 4 bytes to go */ 125 adds r2, r2, #4 126 it eq 127 ldmiaeq sp!, {r0, pc} /* done */ 128 129 /* copy the crud byte at a time */ 130 cmp r2, #2 131 ldrb r3, [r1], #1 132 strb r3, [r0], #1 133 itt ge 134 ldrbge r3, [r1], #1 135 strbge r3, [r0], #1 136 itt gt 137 ldrbgt r3, [r1], #1 138 strbgt r3, [r0], #1 139 ldmia sp!, {r0, pc} 140 141 /* erg - unaligned destination */ 142.Lmemmove_fdestul: 143 rsb r12, r12, #4 144 cmp r12, #2 145 146 /* align destination with byte copies */ 147 ldrb r3, [r1], #1 148 strb r3, [r0], #1 149 itt ge 150 ldrbge r3, [r1], #1 151 strbge r3, [r0], #1 152 itt gt 153 ldrbgt r3, [r1], #1 154 strbgt r3, [r0], #1 155 subs r2, r2, r12 156 blt .Lmemmove_fl4 /* less the 4 bytes */ 157 158 ands r12, r1, #3 159 beq .Lmemmove_ft8 /* we have an aligned source */ 160 161 /* erg - unaligned source */ 162 /* This is where it gets nasty ... */ 163.Lmemmove_fsrcul: 164 bic r1, r1, #3 165 ldr lr, [r1], #4 166 cmp r12, #2 167 bgt .Lmemmove_fsrcul3 168 beq .Lmemmove_fsrcul2 169 cmp r2, #0x0c 170 blt .Lmemmove_fsrcul1loop4 171 sub r2, r2, #0x0c 172 stmdb sp!, {r4, r5} 173 174.Lmemmove_fsrcul1loop16: 175 mov r3, lr, lsr #8 176 ldmia r1!, {r4, r5, r12, lr} 177 orr r3, r3, r4, lsl #24 178 mov r4, r4, lsr #8 179 orr r4, r4, r5, lsl #24 180 mov r5, r5, lsr #8 181 orr r5, r5, r12, lsl #24 182 mov r12, r12, lsr #8 183 orr r12, r12, lr, lsl #24 184 stmia r0!, {r3-r5, r12} 185 subs r2, r2, #0x10 186 bge .Lmemmove_fsrcul1loop16 187 ldmia sp!, {r4, r5} 188 adds r2, r2, #0x0c 189 blt .Lmemmove_fsrcul1l4 190 191.Lmemmove_fsrcul1loop4: 192 mov r12, lr, lsr #8 193 ldr lr, [r1], #4 194 orr r12, r12, lr, lsl #24 195 str r12, [r0], #4 196 subs r2, r2, #4 197 bge .Lmemmove_fsrcul1loop4 198 199.Lmemmove_fsrcul1l4: 200 sub r1, r1, #3 201 b .Lmemmove_fl4 202 203.Lmemmove_fsrcul2: 204 cmp r2, #0x0c 205 blt .Lmemmove_fsrcul2loop4 206 sub r2, r2, #0x0c 207 stmdb sp!, {r4, r5} 208 209.Lmemmove_fsrcul2loop16: 210 mov r3, lr, lsr #16 211 ldmia r1!, {r4, r5, r12, lr} 212 orr r3, r3, r4, lsl #16 213 mov r4, r4, lsr #16 214 orr r4, r4, r5, lsl #16 215 mov r5, r5, lsr #16 216 orr r5, r5, r12, lsl #16 217 mov r12, r12, lsr #16 218 orr r12, r12, lr, lsl #16 219 stmia r0!, {r3-r5, r12} 220 subs r2, r2, #0x10 221 bge .Lmemmove_fsrcul2loop16 222 ldmia sp!, {r4, r5} 223 adds r2, r2, #0x0c 224 blt .Lmemmove_fsrcul2l4 225 226.Lmemmove_fsrcul2loop4: 227 mov r12, lr, lsr #16 228 ldr lr, [r1], #4 229 orr r12, r12, lr, lsl #16 230 str r12, [r0], #4 231 subs r2, r2, #4 232 bge .Lmemmove_fsrcul2loop4 233 234.Lmemmove_fsrcul2l4: 235 sub r1, r1, #2 236 b .Lmemmove_fl4 237 238.Lmemmove_fsrcul3: 239 cmp r2, #0x0c 240 blt .Lmemmove_fsrcul3loop4 241 sub r2, r2, #0x0c 242 stmdb sp!, {r4, r5} 243 244.Lmemmove_fsrcul3loop16: 245 mov r3, lr, lsr #24 246 ldmia r1!, {r4, r5, r12, lr} 247 orr r3, r3, r4, lsl #8 248 mov r4, r4, lsr #24 249 orr r4, r4, r5, lsl #8 250 mov r5, r5, lsr #24 251 orr r5, r5, r12, lsl #8 252 mov r12, r12, lsr #24 253 orr r12, r12, lr, lsl #8 254 stmia r0!, {r3-r5, r12} 255 subs r2, r2, #0x10 256 bge .Lmemmove_fsrcul3loop16 257 ldmia sp!, {r4, r5} 258 adds r2, r2, #0x0c 259 blt .Lmemmove_fsrcul3l4 260 261.Lmemmove_fsrcul3loop4: 262 mov r12, lr, lsr #24 263 ldr lr, [r1], #4 264 orr r12, r12, lr, lsl #8 265 str r12, [r0], #4 266 subs r2, r2, #4 267 bge .Lmemmove_fsrcul3loop4 268 269.Lmemmove_fsrcul3l4: 270 sub r1, r1, #1 271 b .Lmemmove_fl4 272 273.Lmemmove_backwards: 274 add r1, r1, r2 275 add r0, r0, r2 276 subs r2, r2, #4 277 blt .Lmemmove_bl4 /* less than 4 bytes */ 278 ands r12, r0, #3 279 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 280 ands r12, r1, #3 281 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 282 283.Lmemmove_bt8: 284 /* We have aligned source and destination */ 285 subs r2, r2, #8 286 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 287 stmdb sp!, {r4, lr} 288 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 289 blt .Lmemmove_bl32 290 291 /* blat 32 bytes at a time */ 292 /* XXX for really big copies perhaps we should use more registers */ 293.Lmemmove_bloop32: 294 ldmdb r1!, {r3, r4, r12, lr} 295 stmdb r0!, {r3, r4, r12, lr} 296 ldmdb r1!, {r3, r4, r12, lr} 297 stmdb r0!, {r3, r4, r12, lr} 298 subs r2, r2, #0x20 299 bge .Lmemmove_bloop32 300 301.Lmemmove_bl32: 302 cmn r2, #0x10 303 ittt ge 304 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 305 stmdbge r0!, {r3, r4, r12, lr} 306 subge r2, r2, #0x10 307 adds r2, r2, #0x14 308 ittt ge 309 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 310 stmdbge r0!, {r3, r12, lr} 311 subge r2, r2, #0x0c 312 ldmia sp!, {r4, lr} 313 314.Lmemmove_bl12: 315 adds r2, r2, #8 316 blt .Lmemmove_bl4 317 subs r2, r2, #4 318 itt lt 319 ldrlt r3, [r1, #-4]! 320 strlt r3, [r0, #-4]! 321 ittt ge 322 ldmdbge r1!, {r3, r12} 323 stmdbge r0!, {r3, r12} 324 subge r2, r2, #4 325 326.Lmemmove_bl4: 327 /* less than 4 bytes to go */ 328 adds r2, r2, #4 329 it eq 330 RETeq /* done */ 331 332 /* copy the crud byte at a time */ 333 cmp r2, #2 334 ldrb r3, [r1, #-1]! 335 strb r3, [r0, #-1]! 336 itt ge 337 ldrbge r3, [r1, #-1]! 338 strbge r3, [r0, #-1]! 339 itt gt 340 ldrbgt r3, [r1, #-1]! 341 strbgt r3, [r0, #-1]! 342 RET 343 344 /* erg - unaligned destination */ 345.Lmemmove_bdestul: 346 cmp r12, #2 347 348 /* align destination with byte copies */ 349 ldrb r3, [r1, #-1]! 350 strb r3, [r0, #-1]! 351 itt ge 352 ldrbge r3, [r1, #-1]! 353 strbge r3, [r0, #-1]! 354 itt gt 355 ldrbgt r3, [r1, #-1]! 356 strbgt r3, [r0, #-1]! 357 subs r2, r2, r12 358 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 359 ands r12, r1, #3 360 beq .Lmemmove_bt8 /* we have an aligned source */ 361 362 /* erg - unaligned source */ 363 /* This is where it gets nasty ... */ 364.Lmemmove_bsrcul: 365 bic r1, r1, #3 366 ldr r3, [r1, #0] 367 cmp r12, #2 368 blt .Lmemmove_bsrcul1 369 beq .Lmemmove_bsrcul2 370 cmp r2, #0x0c 371 blt .Lmemmove_bsrcul3loop4 372 sub r2, r2, #0x0c 373 stmdb sp!, {r4, r5, lr} 374 375.Lmemmove_bsrcul3loop16: 376 mov lr, r3, lsl #8 377 ldmdb r1!, {r3-r5, r12} 378 orr lr, lr, r12, lsr #24 379 mov r12, r12, lsl #8 380 orr r12, r12, r5, lsr #24 381 mov r5, r5, lsl #8 382 orr r5, r5, r4, lsr #24 383 mov r4, r4, lsl #8 384 orr r4, r4, r3, lsr #24 385 stmdb r0!, {r4, r5, r12, lr} 386 subs r2, r2, #0x10 387 bge .Lmemmove_bsrcul3loop16 388 ldmia sp!, {r4, r5, lr} 389 adds r2, r2, #0x0c 390 blt .Lmemmove_bsrcul3l4 391 392.Lmemmove_bsrcul3loop4: 393 mov r12, r3, lsl #8 394 ldr r3, [r1, #-4]! 395 orr r12, r12, r3, lsr #24 396 str r12, [r0, #-4]! 397 subs r2, r2, #4 398 bge .Lmemmove_bsrcul3loop4 399 400.Lmemmove_bsrcul3l4: 401 add r1, r1, #3 402 b .Lmemmove_bl4 403 404.Lmemmove_bsrcul2: 405 cmp r2, #0x0c 406 blt .Lmemmove_bsrcul2loop4 407 sub r2, r2, #0x0c 408 stmdb sp!, {r4, r5, lr} 409 410.Lmemmove_bsrcul2loop16: 411 mov lr, r3, lsl #16 412 ldmdb r1!, {r3-r5, r12} 413 orr lr, lr, r12, lsr #16 414 mov r12, r12, lsl #16 415 orr r12, r12, r5, lsr #16 416 mov r5, r5, lsl #16 417 orr r5, r5, r4, lsr #16 418 mov r4, r4, lsl #16 419 orr r4, r4, r3, lsr #16 420 stmdb r0!, {r4, r5, r12, lr} 421 subs r2, r2, #0x10 422 bge .Lmemmove_bsrcul2loop16 423 ldmia sp!, {r4, r5, lr} 424 adds r2, r2, #0x0c 425 blt .Lmemmove_bsrcul2l4 426 427.Lmemmove_bsrcul2loop4: 428 mov r12, r3, lsl #16 429 ldr r3, [r1, #-4]! 430 orr r12, r12, r3, lsr #16 431 str r12, [r0, #-4]! 432 subs r2, r2, #4 433 bge .Lmemmove_bsrcul2loop4 434 435.Lmemmove_bsrcul2l4: 436 add r1, r1, #2 437 b .Lmemmove_bl4 438 439.Lmemmove_bsrcul1: 440 cmp r2, #0x0c 441 blt .Lmemmove_bsrcul1loop4 442 sub r2, r2, #0x0c 443 stmdb sp!, {r4, r5, lr} 444 445.Lmemmove_bsrcul1loop32: 446 mov lr, r3, lsl #24 447 ldmdb r1!, {r3-r5, r12} 448 orr lr, lr, r12, lsr #8 449 mov r12, r12, lsl #24 450 orr r12, r12, r5, lsr #8 451 mov r5, r5, lsl #24 452 orr r5, r5, r4, lsr #8 453 mov r4, r4, lsl #24 454 orr r4, r4, r3, lsr #8 455 stmdb r0!, {r4, r5, r12, lr} 456 subs r2, r2, #0x10 457 bge .Lmemmove_bsrcul1loop32 458 ldmia sp!, {r4, r5, lr} 459 adds r2, r2, #0x0c 460 blt .Lmemmove_bsrcul1l4 461 462.Lmemmove_bsrcul1loop4: 463 mov r12, r3, lsl #24 464 ldr r3, [r1, #-4]! 465 orr r12, r12, r3, lsr #8 466 str r12, [r0, #-4]! 467 subs r2, r2, #4 468 bge .Lmemmove_bsrcul1loop4 469 470.Lmemmove_bsrcul1l4: 471 add r1, r1, #1 472 b .Lmemmove_bl4 473#ifndef _BCOPY 474END(memmove) 475#else 476END(bcopy) 477#endif 478 479 .section .note.GNU-stack,"",%progbits 480