1/*- 2 * Copyright (c) 2004 Olivier Houchard 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26/* 27 * Copyright 2003 Wasabi Systems, Inc. 28 * All rights reserved. 29 * 30 * Written by Steve C. Woodford for Wasabi Systems, Inc. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed for the NetBSD Project by 43 * Wasabi Systems, Inc. 44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 45 * or promote products derived from this software without specific prior 46 * written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 58 * POSSIBILITY OF SUCH DAMAGE. 59 */ 60/* 61 * Copyright (c) 1997 The NetBSD Foundation, Inc. 62 * All rights reserved. 63 * 64 * This code is derived from software contributed to The NetBSD Foundation 65 * by Neil A. Carson and Mark Brinicombe 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 86 * POSSIBILITY OF SUCH DAMAGE. 87 */ 88 89#include <machine/asm.h> 90#include "assym.inc" 91 92 .syntax unified 93 94/* 95 * memset: Sets a block of memory to the specified value 96 * 97 * On entry: 98 * r0 - dest address 99 * r1 - byte to write 100 * r2 - number of bytes to write 101 * 102 * On exit: 103 * r0 - dest address 104 */ 105/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 106ENTRY(memset) 107 and r3, r1, #0xff /* We deal with bytes */ 108 mov r1, r2 109do_memset: 110 cmp r1, #0x04 /* Do we have less than 4 bytes */ 111 mov ip, r0 112 blt .Lmemset_lessthanfour 113 114 /* Ok first we will word align the address */ 115 ands r2, ip, #0x03 /* Get the bottom two bits */ 116 bne .Lmemset_wordunaligned /* The address is not word aligned */ 117 118 /* We are now word aligned */ 119.Lmemset_wordaligned: 120 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ 121 tst ip, #0x04 /* Quad-align for armv5e */ 122 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ 123 subne r1, r1, #0x04 /* Quad-align if necessary */ 124 strne r3, [ip], #0x04 125 cmp r1, #0x10 126 blt .Lmemset_loop4 /* If less than 16 then use words */ 127 mov r2, r3 /* Duplicate data */ 128 cmp r1, #0x80 /* If < 128 then skip the big loop */ 129 blt .Lmemset_loop32 130 131 /* Do 128 bytes at a time */ 132.Lmemset_loop128: 133 subs r1, r1, #0x80 134 strdge r2, [ip], #0x08 135 strdge r2, [ip], #0x08 136 strdge r2, [ip], #0x08 137 strdge r2, [ip], #0x08 138 strdge r2, [ip], #0x08 139 strdge r2, [ip], #0x08 140 strdge r2, [ip], #0x08 141 strdge r2, [ip], #0x08 142 strdge r2, [ip], #0x08 143 strdge r2, [ip], #0x08 144 strdge r2, [ip], #0x08 145 strdge r2, [ip], #0x08 146 strdge r2, [ip], #0x08 147 strdge r2, [ip], #0x08 148 strdge r2, [ip], #0x08 149 strdge r2, [ip], #0x08 150 bgt .Lmemset_loop128 151 RETeq /* Zero length so just exit */ 152 153 add r1, r1, #0x80 /* Adjust for extra sub */ 154 155 /* Do 32 bytes at a time */ 156.Lmemset_loop32: 157 subs r1, r1, #0x20 158 strdge r2, [ip], #0x08 159 strdge r2, [ip], #0x08 160 strdge r2, [ip], #0x08 161 strdge r2, [ip], #0x08 162 bgt .Lmemset_loop32 163 RETeq /* Zero length so just exit */ 164 165 adds r1, r1, #0x10 /* Partially adjust for extra sub */ 166 167 /* Deal with 16 bytes or more */ 168 strdge r2, [ip], #0x08 169 strdge r2, [ip], #0x08 170 RETeq /* Zero length so just exit */ 171 172 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ 173 174 /* We have at least 4 bytes so copy as words */ 175.Lmemset_loop4: 176 subs r1, r1, #0x04 177 strge r3, [ip], #0x04 178 bgt .Lmemset_loop4 179 RETeq /* Zero length so just exit */ 180 181 /* Compensate for 64-bit alignment check */ 182 adds r1, r1, #0x04 183 RETeq 184 cmp r1, #2 185 186 strb r3, [ip], #0x01 /* Set 1 byte */ 187 strbge r3, [ip], #0x01 /* Set another byte */ 188 strbgt r3, [ip] /* and a third */ 189 RET /* Exit */ 190 191.Lmemset_wordunaligned: 192 rsb r2, r2, #0x004 193 strb r3, [ip], #0x01 /* Set 1 byte */ 194 cmp r2, #0x02 195 strbge r3, [ip], #0x01 /* Set another byte */ 196 sub r1, r1, r2 197 strbgt r3, [ip], #0x01 /* and a third */ 198 cmp r1, #0x04 /* More than 4 bytes left? */ 199 bge .Lmemset_wordaligned /* Yup */ 200 201.Lmemset_lessthanfour: 202 cmp r1, #0x00 203 RETeq /* Zero length so exit */ 204 strb r3, [ip], #0x01 /* Set 1 byte */ 205 cmp r1, #0x02 206 strbge r3, [ip], #0x01 /* Set another byte */ 207 strbgt r3, [ip] /* and a third */ 208 RET /* Exit */ 209END(memset) 210 211ENTRY(memcmp) 212 mov ip, r0 213 cmp r2, #0x06 214 beq .Lmemcmp_6bytes 215 mov r0, #0x00 216 217 /* Are both addresses aligned the same way? */ 218 cmp r2, #0x00 219 eorsne r3, ip, r1 220 RETeq /* len == 0, or same addresses! */ 221 tst r3, #0x03 222 subne r2, r2, #0x01 223 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ 224 225 /* Word-align the addresses, if necessary */ 226 sub r3, r1, #0x05 227 ands r3, r3, #0x03 228 add r3, r3, r3, lsl #1 229 addne pc, pc, r3, lsl #3 230 nop 231 232 /* Compare up to 3 bytes */ 233 ldrb r0, [ip], #0x01 234 ldrb r3, [r1], #0x01 235 subs r0, r0, r3 236 RETne 237 subs r2, r2, #0x01 238 RETeq 239 240 /* Compare up to 2 bytes */ 241 ldrb r0, [ip], #0x01 242 ldrb r3, [r1], #0x01 243 subs r0, r0, r3 244 RETne 245 subs r2, r2, #0x01 246 RETeq 247 248 /* Compare 1 byte */ 249 ldrb r0, [ip], #0x01 250 ldrb r3, [r1], #0x01 251 subs r0, r0, r3 252 RETne 253 subs r2, r2, #0x01 254 RETeq 255 256 /* Compare 4 bytes at a time, if possible */ 257 subs r2, r2, #0x04 258 bcc .Lmemcmp_bytewise 259.Lmemcmp_word_aligned: 260 ldr r0, [ip], #0x04 261 ldr r3, [r1], #0x04 262 subs r2, r2, #0x04 263 cmpcs r0, r3 264 beq .Lmemcmp_word_aligned 265 sub r0, r0, r3 266 267 /* Correct for extra subtraction, and check if done */ 268 adds r2, r2, #0x04 269 cmpeq r0, #0x00 /* If done, did all bytes match? */ 270 RETeq /* Yup. Just return */ 271 272 /* Re-do the final word byte-wise */ 273 sub ip, ip, #0x04 274 sub r1, r1, #0x04 275 276.Lmemcmp_bytewise: 277 add r2, r2, #0x03 278.Lmemcmp_bytewise2: 279 ldrb r0, [ip], #0x01 280 ldrb r3, [r1], #0x01 281 subs r2, r2, #0x01 282 cmpcs r0, r3 283 beq .Lmemcmp_bytewise2 284 sub r0, r0, r3 285 RET 286 287 /* 288 * 6 byte compares are very common, thanks to the network stack. 289 * This code is hand-scheduled to reduce the number of stalls for 290 * load results. Everything else being equal, this will be ~32% 291 * faster than a byte-wise memcmp. 292 */ 293 .align 5 294.Lmemcmp_6bytes: 295 ldrb r3, [r1, #0x00] /* r3 = b2#0 */ 296 ldrb r0, [ip, #0x00] /* r0 = b1#0 */ 297 ldrb r2, [r1, #0x01] /* r2 = b2#1 */ 298 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ 299 ldrbeq r3, [ip, #0x01] /* r3 = b1#1 */ 300 RETne /* Return if mismatch on #0 */ 301 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ 302 ldrbeq r3, [r1, #0x02] /* r3 = b2#2 */ 303 ldrbeq r0, [ip, #0x02] /* r0 = b1#2 */ 304 RETne /* Return if mismatch on #1 */ 305 ldrb r2, [r1, #0x03] /* r2 = b2#3 */ 306 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ 307 ldrbeq r3, [ip, #0x03] /* r3 = b1#3 */ 308 RETne /* Return if mismatch on #2 */ 309 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ 310 ldrbeq r3, [r1, #0x04] /* r3 = b2#4 */ 311 ldrbeq r0, [ip, #0x04] /* r0 = b1#4 */ 312 RETne /* Return if mismatch on #3 */ 313 ldrb r2, [r1, #0x05] /* r2 = b2#5 */ 314 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ 315 ldrbeq r3, [ip, #0x05] /* r3 = b1#5 */ 316 RETne /* Return if mismatch on #4 */ 317 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ 318 RET 319END(memcmp) 320 321ENTRY(memmove) 322 /* Do the buffers overlap? */ 323 cmp r0, r1 324 RETeq /* Bail now if src/dst are the same */ 325 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 326 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 327 cmp r3, r2 /* if (r3 < len) we have an overlap */ 328 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 329 330 /* Determine copy direction */ 331 cmp r1, r0 332 bcc .Lmemmove_backwards 333 334 moveq r0, #0 /* Quick abort for len=0 */ 335 RETeq 336 337 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 338 subs r2, r2, #4 339 blt .Lmemmove_fl4 /* less than 4 bytes */ 340 ands r12, r0, #3 341 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 342 ands r12, r1, #3 343 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 344 345.Lmemmove_ft8: 346 /* We have aligned source and destination */ 347 subs r2, r2, #8 348 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 349 subs r2, r2, #0x14 350 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 351 stmdb sp!, {r4} /* borrow r4 */ 352 353 /* blat 32 bytes at a time */ 354 /* XXX for really big copies perhaps we should use more registers */ 355.Lmemmove_floop32: 356 ldmia r1!, {r3, r4, r12, lr} 357 stmia r0!, {r3, r4, r12, lr} 358 ldmia r1!, {r3, r4, r12, lr} 359 stmia r0!, {r3, r4, r12, lr} 360 subs r2, r2, #0x20 361 bge .Lmemmove_floop32 362 363 cmn r2, #0x10 364 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 365 stmiage r0!, {r3, r4, r12, lr} 366 subge r2, r2, #0x10 367 ldmia sp!, {r4} /* return r4 */ 368 369.Lmemmove_fl32: 370 adds r2, r2, #0x14 371 372 /* blat 12 bytes at a time */ 373.Lmemmove_floop12: 374 ldmiage r1!, {r3, r12, lr} 375 stmiage r0!, {r3, r12, lr} 376 subsge r2, r2, #0x0c 377 bge .Lmemmove_floop12 378 379.Lmemmove_fl12: 380 adds r2, r2, #8 381 blt .Lmemmove_fl4 382 383 subs r2, r2, #4 384 ldrlt r3, [r1], #4 385 strlt r3, [r0], #4 386 ldmiage r1!, {r3, r12} 387 stmiage r0!, {r3, r12} 388 subge r2, r2, #4 389 390.Lmemmove_fl4: 391 /* less than 4 bytes to go */ 392 adds r2, r2, #4 393 ldmiaeq sp!, {r0, pc} /* done */ 394 395 /* copy the crud byte at a time */ 396 cmp r2, #2 397 ldrb r3, [r1], #1 398 strb r3, [r0], #1 399 ldrbge r3, [r1], #1 400 strbge r3, [r0], #1 401 ldrbgt r3, [r1], #1 402 strbgt r3, [r0], #1 403 ldmia sp!, {r0, pc} 404 405 /* erg - unaligned destination */ 406.Lmemmove_fdestul: 407 rsb r12, r12, #4 408 cmp r12, #2 409 410 /* align destination with byte copies */ 411 ldrb r3, [r1], #1 412 strb r3, [r0], #1 413 ldrbge r3, [r1], #1 414 strbge r3, [r0], #1 415 ldrbgt r3, [r1], #1 416 strbgt r3, [r0], #1 417 subs r2, r2, r12 418 blt .Lmemmove_fl4 /* less the 4 bytes */ 419 420 ands r12, r1, #3 421 beq .Lmemmove_ft8 /* we have an aligned source */ 422 423 /* erg - unaligned source */ 424 /* This is where it gets nasty ... */ 425.Lmemmove_fsrcul: 426 bic r1, r1, #3 427 ldr lr, [r1], #4 428 cmp r12, #2 429 bgt .Lmemmove_fsrcul3 430 beq .Lmemmove_fsrcul2 431 cmp r2, #0x0c 432 blt .Lmemmove_fsrcul1loop4 433 sub r2, r2, #0x0c 434 stmdb sp!, {r4, r5} 435 436.Lmemmove_fsrcul1loop16: 437 mov r3, lr, lsr #8 438 ldmia r1!, {r4, r5, r12, lr} 439 orr r3, r3, r4, lsl #24 440 mov r4, r4, lsr #8 441 orr r4, r4, r5, lsl #24 442 mov r5, r5, lsr #8 443 orr r5, r5, r12, lsl #24 444 mov r12, r12, lsr #8 445 orr r12, r12, lr, lsl #24 446 stmia r0!, {r3-r5, r12} 447 subs r2, r2, #0x10 448 bge .Lmemmove_fsrcul1loop16 449 ldmia sp!, {r4, r5} 450 adds r2, r2, #0x0c 451 blt .Lmemmove_fsrcul1l4 452 453.Lmemmove_fsrcul1loop4: 454 mov r12, lr, lsr #8 455 ldr lr, [r1], #4 456 orr r12, r12, lr, lsl #24 457 str r12, [r0], #4 458 subs r2, r2, #4 459 bge .Lmemmove_fsrcul1loop4 460 461.Lmemmove_fsrcul1l4: 462 sub r1, r1, #3 463 b .Lmemmove_fl4 464 465.Lmemmove_fsrcul2: 466 cmp r2, #0x0c 467 blt .Lmemmove_fsrcul2loop4 468 sub r2, r2, #0x0c 469 stmdb sp!, {r4, r5} 470 471.Lmemmove_fsrcul2loop16: 472 mov r3, lr, lsr #16 473 ldmia r1!, {r4, r5, r12, lr} 474 orr r3, r3, r4, lsl #16 475 mov r4, r4, lsr #16 476 orr r4, r4, r5, lsl #16 477 mov r5, r5, lsr #16 478 orr r5, r5, r12, lsl #16 479 mov r12, r12, lsr #16 480 orr r12, r12, lr, lsl #16 481 stmia r0!, {r3-r5, r12} 482 subs r2, r2, #0x10 483 bge .Lmemmove_fsrcul2loop16 484 ldmia sp!, {r4, r5} 485 adds r2, r2, #0x0c 486 blt .Lmemmove_fsrcul2l4 487 488.Lmemmove_fsrcul2loop4: 489 mov r12, lr, lsr #16 490 ldr lr, [r1], #4 491 orr r12, r12, lr, lsl #16 492 str r12, [r0], #4 493 subs r2, r2, #4 494 bge .Lmemmove_fsrcul2loop4 495 496.Lmemmove_fsrcul2l4: 497 sub r1, r1, #2 498 b .Lmemmove_fl4 499 500.Lmemmove_fsrcul3: 501 cmp r2, #0x0c 502 blt .Lmemmove_fsrcul3loop4 503 sub r2, r2, #0x0c 504 stmdb sp!, {r4, r5} 505 506.Lmemmove_fsrcul3loop16: 507 mov r3, lr, lsr #24 508 ldmia r1!, {r4, r5, r12, lr} 509 orr r3, r3, r4, lsl #8 510 mov r4, r4, lsr #24 511 orr r4, r4, r5, lsl #8 512 mov r5, r5, lsr #24 513 orr r5, r5, r12, lsl #8 514 mov r12, r12, lsr #24 515 orr r12, r12, lr, lsl #8 516 stmia r0!, {r3-r5, r12} 517 subs r2, r2, #0x10 518 bge .Lmemmove_fsrcul3loop16 519 ldmia sp!, {r4, r5} 520 adds r2, r2, #0x0c 521 blt .Lmemmove_fsrcul3l4 522 523.Lmemmove_fsrcul3loop4: 524 mov r12, lr, lsr #24 525 ldr lr, [r1], #4 526 orr r12, r12, lr, lsl #8 527 str r12, [r0], #4 528 subs r2, r2, #4 529 bge .Lmemmove_fsrcul3loop4 530 531.Lmemmove_fsrcul3l4: 532 sub r1, r1, #1 533 b .Lmemmove_fl4 534 535.Lmemmove_backwards: 536 add r1, r1, r2 537 add r0, r0, r2 538 subs r2, r2, #4 539 blt .Lmemmove_bl4 /* less than 4 bytes */ 540 ands r12, r0, #3 541 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 542 ands r12, r1, #3 543 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 544 545.Lmemmove_bt8: 546 /* We have aligned source and destination */ 547 subs r2, r2, #8 548 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 549 stmdb sp!, {r4, lr} 550 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 551 blt .Lmemmove_bl32 552 553 /* blat 32 bytes at a time */ 554 /* XXX for really big copies perhaps we should use more registers */ 555.Lmemmove_bloop32: 556 ldmdb r1!, {r3, r4, r12, lr} 557 stmdb r0!, {r3, r4, r12, lr} 558 ldmdb r1!, {r3, r4, r12, lr} 559 stmdb r0!, {r3, r4, r12, lr} 560 subs r2, r2, #0x20 561 bge .Lmemmove_bloop32 562 563.Lmemmove_bl32: 564 cmn r2, #0x10 565 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 566 stmdbge r0!, {r3, r4, r12, lr} 567 subge r2, r2, #0x10 568 adds r2, r2, #0x14 569 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 570 stmdbge r0!, {r3, r12, lr} 571 subge r2, r2, #0x0c 572 ldmia sp!, {r4, lr} 573 574.Lmemmove_bl12: 575 adds r2, r2, #8 576 blt .Lmemmove_bl4 577 subs r2, r2, #4 578 ldrlt r3, [r1, #-4]! 579 strlt r3, [r0, #-4]! 580 ldmdbge r1!, {r3, r12} 581 stmdbge r0!, {r3, r12} 582 subge r2, r2, #4 583 584.Lmemmove_bl4: 585 /* less than 4 bytes to go */ 586 adds r2, r2, #4 587 RETeq /* done */ 588 589 /* copy the crud byte at a time */ 590 cmp r2, #2 591 ldrb r3, [r1, #-1]! 592 strb r3, [r0, #-1]! 593 ldrbge r3, [r1, #-1]! 594 strbge r3, [r0, #-1]! 595 ldrbgt r3, [r1, #-1]! 596 strbgt r3, [r0, #-1]! 597 RET 598 599 /* erg - unaligned destination */ 600.Lmemmove_bdestul: 601 cmp r12, #2 602 603 /* align destination with byte copies */ 604 ldrb r3, [r1, #-1]! 605 strb r3, [r0, #-1]! 606 ldrbge r3, [r1, #-1]! 607 strbge r3, [r0, #-1]! 608 ldrbgt r3, [r1, #-1]! 609 strbgt r3, [r0, #-1]! 610 subs r2, r2, r12 611 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 612 ands r12, r1, #3 613 beq .Lmemmove_bt8 /* we have an aligned source */ 614 615 /* erg - unaligned source */ 616 /* This is where it gets nasty ... */ 617.Lmemmove_bsrcul: 618 bic r1, r1, #3 619 ldr r3, [r1, #0] 620 cmp r12, #2 621 blt .Lmemmove_bsrcul1 622 beq .Lmemmove_bsrcul2 623 cmp r2, #0x0c 624 blt .Lmemmove_bsrcul3loop4 625 sub r2, r2, #0x0c 626 stmdb sp!, {r4, r5, lr} 627 628.Lmemmove_bsrcul3loop16: 629 mov lr, r3, lsl #8 630 ldmdb r1!, {r3-r5, r12} 631 orr lr, lr, r12, lsr #24 632 mov r12, r12, lsl #8 633 orr r12, r12, r5, lsr #24 634 mov r5, r5, lsl #8 635 orr r5, r5, r4, lsr #24 636 mov r4, r4, lsl #8 637 orr r4, r4, r3, lsr #24 638 stmdb r0!, {r4, r5, r12, lr} 639 subs r2, r2, #0x10 640 bge .Lmemmove_bsrcul3loop16 641 ldmia sp!, {r4, r5, lr} 642 adds r2, r2, #0x0c 643 blt .Lmemmove_bsrcul3l4 644 645.Lmemmove_bsrcul3loop4: 646 mov r12, r3, lsl #8 647 ldr r3, [r1, #-4]! 648 orr r12, r12, r3, lsr #24 649 str r12, [r0, #-4]! 650 subs r2, r2, #4 651 bge .Lmemmove_bsrcul3loop4 652 653.Lmemmove_bsrcul3l4: 654 add r1, r1, #3 655 b .Lmemmove_bl4 656 657.Lmemmove_bsrcul2: 658 cmp r2, #0x0c 659 blt .Lmemmove_bsrcul2loop4 660 sub r2, r2, #0x0c 661 stmdb sp!, {r4, r5, lr} 662 663.Lmemmove_bsrcul2loop16: 664 mov lr, r3, lsl #16 665 ldmdb r1!, {r3-r5, r12} 666 orr lr, lr, r12, lsr #16 667 mov r12, r12, lsl #16 668 orr r12, r12, r5, lsr #16 669 mov r5, r5, lsl #16 670 orr r5, r5, r4, lsr #16 671 mov r4, r4, lsl #16 672 orr r4, r4, r3, lsr #16 673 stmdb r0!, {r4, r5, r12, lr} 674 subs r2, r2, #0x10 675 bge .Lmemmove_bsrcul2loop16 676 ldmia sp!, {r4, r5, lr} 677 adds r2, r2, #0x0c 678 blt .Lmemmove_bsrcul2l4 679 680.Lmemmove_bsrcul2loop4: 681 mov r12, r3, lsl #16 682 ldr r3, [r1, #-4]! 683 orr r12, r12, r3, lsr #16 684 str r12, [r0, #-4]! 685 subs r2, r2, #4 686 bge .Lmemmove_bsrcul2loop4 687 688.Lmemmove_bsrcul2l4: 689 add r1, r1, #2 690 b .Lmemmove_bl4 691 692.Lmemmove_bsrcul1: 693 cmp r2, #0x0c 694 blt .Lmemmove_bsrcul1loop4 695 sub r2, r2, #0x0c 696 stmdb sp!, {r4, r5, lr} 697 698.Lmemmove_bsrcul1loop32: 699 mov lr, r3, lsl #24 700 ldmdb r1!, {r3-r5, r12} 701 orr lr, lr, r12, lsr #8 702 mov r12, r12, lsl #24 703 orr r12, r12, r5, lsr #8 704 mov r5, r5, lsl #24 705 orr r5, r5, r4, lsr #8 706 mov r4, r4, lsl #24 707 orr r4, r4, r3, lsr #8 708 stmdb r0!, {r4, r5, r12, lr} 709 subs r2, r2, #0x10 710 bge .Lmemmove_bsrcul1loop32 711 ldmia sp!, {r4, r5, lr} 712 adds r2, r2, #0x0c 713 blt .Lmemmove_bsrcul1l4 714 715.Lmemmove_bsrcul1loop4: 716 mov r12, r3, lsl #24 717 ldr r3, [r1, #-4]! 718 orr r12, r12, r3, lsr #8 719 str r12, [r0, #-4]! 720 subs r2, r2, #4 721 bge .Lmemmove_bsrcul1loop4 722 723.Lmemmove_bsrcul1l4: 724 add r1, r1, #1 725 b .Lmemmove_bl4 726END(memmove) 727 728/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 729ENTRY(memcpy) 730 pld [r1] 731 cmp r2, #0x0c 732 ble .Lmemcpy_short /* <= 12 bytes */ 733 mov r3, r0 /* We must not clobber r0 */ 734 735 /* Word-align the destination buffer */ 736 ands ip, r3, #0x03 /* Already word aligned? */ 737 beq .Lmemcpy_wordaligned /* Yup */ 738 cmp ip, #0x02 739 ldrb ip, [r1], #0x01 740 sub r2, r2, #0x01 741 strb ip, [r3], #0x01 742 ldrble ip, [r1], #0x01 743 suble r2, r2, #0x01 744 strble ip, [r3], #0x01 745 ldrblt ip, [r1], #0x01 746 sublt r2, r2, #0x01 747 strblt ip, [r3], #0x01 748 749 /* Destination buffer is now word aligned */ 750.Lmemcpy_wordaligned: 751 ands ip, r1, #0x03 /* Is src also word-aligned? */ 752 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 753 754 /* Quad-align the destination buffer */ 755 tst r3, #0x07 /* Already quad aligned? */ 756 ldrne ip, [r1], #0x04 757 stmfd sp!, {r4-r9} /* Free up some registers */ 758 subne r2, r2, #0x04 759 strne ip, [r3], #0x04 760 761 /* Destination buffer quad aligned, source is at least word aligned */ 762 subs r2, r2, #0x80 763 blt .Lmemcpy_w_lessthan128 764 765 /* Copy 128 bytes at a time */ 766.Lmemcpy_w_loop128: 767 ldr r4, [r1], #0x04 /* LD:00-03 */ 768 ldr r5, [r1], #0x04 /* LD:04-07 */ 769 pld [r1, #0x18] /* Prefetch 0x20 */ 770 ldr r6, [r1], #0x04 /* LD:08-0b */ 771 ldr r7, [r1], #0x04 /* LD:0c-0f */ 772 ldr r8, [r1], #0x04 /* LD:10-13 */ 773 ldr r9, [r1], #0x04 /* LD:14-17 */ 774 strd r4, [r3], #0x08 /* ST:00-07 */ 775 ldr r4, [r1], #0x04 /* LD:18-1b */ 776 ldr r5, [r1], #0x04 /* LD:1c-1f */ 777 strd r6, [r3], #0x08 /* ST:08-0f */ 778 ldr r6, [r1], #0x04 /* LD:20-23 */ 779 ldr r7, [r1], #0x04 /* LD:24-27 */ 780 pld [r1, #0x18] /* Prefetch 0x40 */ 781 strd r8, [r3], #0x08 /* ST:10-17 */ 782 ldr r8, [r1], #0x04 /* LD:28-2b */ 783 ldr r9, [r1], #0x04 /* LD:2c-2f */ 784 strd r4, [r3], #0x08 /* ST:18-1f */ 785 ldr r4, [r1], #0x04 /* LD:30-33 */ 786 ldr r5, [r1], #0x04 /* LD:34-37 */ 787 strd r6, [r3], #0x08 /* ST:20-27 */ 788 ldr r6, [r1], #0x04 /* LD:38-3b */ 789 ldr r7, [r1], #0x04 /* LD:3c-3f */ 790 strd r8, [r3], #0x08 /* ST:28-2f */ 791 ldr r8, [r1], #0x04 /* LD:40-43 */ 792 ldr r9, [r1], #0x04 /* LD:44-47 */ 793 pld [r1, #0x18] /* Prefetch 0x60 */ 794 strd r4, [r3], #0x08 /* ST:30-37 */ 795 ldr r4, [r1], #0x04 /* LD:48-4b */ 796 ldr r5, [r1], #0x04 /* LD:4c-4f */ 797 strd r6, [r3], #0x08 /* ST:38-3f */ 798 ldr r6, [r1], #0x04 /* LD:50-53 */ 799 ldr r7, [r1], #0x04 /* LD:54-57 */ 800 strd r8, [r3], #0x08 /* ST:40-47 */ 801 ldr r8, [r1], #0x04 /* LD:58-5b */ 802 ldr r9, [r1], #0x04 /* LD:5c-5f */ 803 strd r4, [r3], #0x08 /* ST:48-4f */ 804 ldr r4, [r1], #0x04 /* LD:60-63 */ 805 ldr r5, [r1], #0x04 /* LD:64-67 */ 806 pld [r1, #0x18] /* Prefetch 0x80 */ 807 strd r6, [r3], #0x08 /* ST:50-57 */ 808 ldr r6, [r1], #0x04 /* LD:68-6b */ 809 ldr r7, [r1], #0x04 /* LD:6c-6f */ 810 strd r8, [r3], #0x08 /* ST:58-5f */ 811 ldr r8, [r1], #0x04 /* LD:70-73 */ 812 ldr r9, [r1], #0x04 /* LD:74-77 */ 813 strd r4, [r3], #0x08 /* ST:60-67 */ 814 ldr r4, [r1], #0x04 /* LD:78-7b */ 815 ldr r5, [r1], #0x04 /* LD:7c-7f */ 816 strd r6, [r3], #0x08 /* ST:68-6f */ 817 strd r8, [r3], #0x08 /* ST:70-77 */ 818 subs r2, r2, #0x80 819 strd r4, [r3], #0x08 /* ST:78-7f */ 820 bge .Lmemcpy_w_loop128 821 822.Lmemcpy_w_lessthan128: 823 adds r2, r2, #0x80 /* Adjust for extra sub */ 824 ldmfdeq sp!, {r4-r9} 825 RETeq /* Return now if done */ 826 subs r2, r2, #0x20 827 blt .Lmemcpy_w_lessthan32 828 829 /* Copy 32 bytes at a time */ 830.Lmemcpy_w_loop32: 831 ldr r4, [r1], #0x04 832 ldr r5, [r1], #0x04 833 pld [r1, #0x18] 834 ldr r6, [r1], #0x04 835 ldr r7, [r1], #0x04 836 ldr r8, [r1], #0x04 837 ldr r9, [r1], #0x04 838 strd r4, [r3], #0x08 839 ldr r4, [r1], #0x04 840 ldr r5, [r1], #0x04 841 strd r6, [r3], #0x08 842 strd r8, [r3], #0x08 843 subs r2, r2, #0x20 844 strd r4, [r3], #0x08 845 bge .Lmemcpy_w_loop32 846 847.Lmemcpy_w_lessthan32: 848 adds r2, r2, #0x20 /* Adjust for extra sub */ 849 ldmfdeq sp!, {r4-r9} 850 RETeq /* Return now if done */ 851 852 and r4, r2, #0x18 853 rsbs r4, r4, #0x18 854 addne pc, pc, r4, lsl #1 855 nop 856 857 /* At least 24 bytes remaining */ 858 ldr r4, [r1], #0x04 859 ldr r5, [r1], #0x04 860 sub r2, r2, #0x08 861 strd r4, [r3], #0x08 862 863 /* At least 16 bytes remaining */ 864 ldr r4, [r1], #0x04 865 ldr r5, [r1], #0x04 866 sub r2, r2, #0x08 867 strd r4, [r3], #0x08 868 869 /* At least 8 bytes remaining */ 870 ldr r4, [r1], #0x04 871 ldr r5, [r1], #0x04 872 subs r2, r2, #0x08 873 strd r4, [r3], #0x08 874 875 /* Less than 8 bytes remaining */ 876 ldmfd sp!, {r4-r9} 877 RETeq /* Return now if done */ 878 subs r2, r2, #0x04 879 ldrge ip, [r1], #0x04 880 strge ip, [r3], #0x04 881 RETeq /* Return now if done */ 882 addlt r2, r2, #0x04 883 ldrb ip, [r1], #0x01 884 cmp r2, #0x02 885 ldrbge r2, [r1], #0x01 886 strb ip, [r3], #0x01 887 ldrbgt ip, [r1] 888 strbge r2, [r3], #0x01 889 strbgt ip, [r3] 890 RET 891/* Place a literal pool here for the above ldr instructions to use */ 892.ltorg 893 894 895/* 896 * At this point, it has not been possible to word align both buffers. 897 * The destination buffer is word aligned, but the source buffer is not. 898 */ 899.Lmemcpy_bad_align: 900 stmfd sp!, {r4-r7} 901 bic r1, r1, #0x03 902 cmp ip, #2 903 ldr ip, [r1], #0x04 904 bgt .Lmemcpy_bad3 905 beq .Lmemcpy_bad2 906 b .Lmemcpy_bad1 907 908.Lmemcpy_bad1_loop16: 909 mov r4, ip, lsr #8 910 ldr r5, [r1], #0x04 911 pld [r1, #0x018] 912 ldr r6, [r1], #0x04 913 ldr r7, [r1], #0x04 914 ldr ip, [r1], #0x04 915 orr r4, r4, r5, lsl #24 916 mov r5, r5, lsr #8 917 orr r5, r5, r6, lsl #24 918 mov r6, r6, lsr #8 919 orr r6, r6, r7, lsl #24 920 mov r7, r7, lsr #8 921 orr r7, r7, ip, lsl #24 922 str r4, [r3], #0x04 923 str r5, [r3], #0x04 924 str r6, [r3], #0x04 925 str r7, [r3], #0x04 926.Lmemcpy_bad1: 927 subs r2, r2, #0x10 928 bge .Lmemcpy_bad1_loop16 929 930 adds r2, r2, #0x10 931 ldmfdeq sp!, {r4-r7} 932 RETeq /* Return now if done */ 933 subs r2, r2, #0x04 934 sublt r1, r1, #0x03 935 blt .Lmemcpy_bad_done 936 937.Lmemcpy_bad1_loop4: 938 mov r4, ip, lsr #8 939 ldr ip, [r1], #0x04 940 subs r2, r2, #0x04 941 orr r4, r4, ip, lsl #24 942 str r4, [r3], #0x04 943 bge .Lmemcpy_bad1_loop4 944 sub r1, r1, #0x03 945 b .Lmemcpy_bad_done 946 947.Lmemcpy_bad2_loop16: 948 mov r4, ip, lsr #16 949 ldr r5, [r1], #0x04 950 pld [r1, #0x018] 951 ldr r6, [r1], #0x04 952 ldr r7, [r1], #0x04 953 ldr ip, [r1], #0x04 954 orr r4, r4, r5, lsl #16 955 mov r5, r5, lsr #16 956 orr r5, r5, r6, lsl #16 957 mov r6, r6, lsr #16 958 orr r6, r6, r7, lsl #16 959 mov r7, r7, lsr #16 960 orr r7, r7, ip, lsl #16 961 str r4, [r3], #0x04 962 str r5, [r3], #0x04 963 str r6, [r3], #0x04 964 str r7, [r3], #0x04 965.Lmemcpy_bad2: 966 subs r2, r2, #0x10 967 bge .Lmemcpy_bad2_loop16 968 969 adds r2, r2, #0x10 970 ldmfdeq sp!, {r4-r7} 971 RETeq /* Return now if done */ 972 subs r2, r2, #0x04 973 sublt r1, r1, #0x02 974 blt .Lmemcpy_bad_done 975 976.Lmemcpy_bad2_loop4: 977 mov r4, ip, lsr #16 978 ldr ip, [r1], #0x04 979 subs r2, r2, #0x04 980 orr r4, r4, ip, lsl #16 981 str r4, [r3], #0x04 982 bge .Lmemcpy_bad2_loop4 983 sub r1, r1, #0x02 984 b .Lmemcpy_bad_done 985 986.Lmemcpy_bad3_loop16: 987 mov r4, ip, lsr #24 988 ldr r5, [r1], #0x04 989 pld [r1, #0x018] 990 ldr r6, [r1], #0x04 991 ldr r7, [r1], #0x04 992 ldr ip, [r1], #0x04 993 orr r4, r4, r5, lsl #8 994 mov r5, r5, lsr #24 995 orr r5, r5, r6, lsl #8 996 mov r6, r6, lsr #24 997 orr r6, r6, r7, lsl #8 998 mov r7, r7, lsr #24 999 orr r7, r7, ip, lsl #8 1000 str r4, [r3], #0x04 1001 str r5, [r3], #0x04 1002 str r6, [r3], #0x04 1003 str r7, [r3], #0x04 1004.Lmemcpy_bad3: 1005 subs r2, r2, #0x10 1006 bge .Lmemcpy_bad3_loop16 1007 1008 adds r2, r2, #0x10 1009 ldmfdeq sp!, {r4-r7} 1010 RETeq /* Return now if done */ 1011 subs r2, r2, #0x04 1012 sublt r1, r1, #0x01 1013 blt .Lmemcpy_bad_done 1014 1015.Lmemcpy_bad3_loop4: 1016 mov r4, ip, lsr #24 1017 ldr ip, [r1], #0x04 1018 subs r2, r2, #0x04 1019 orr r4, r4, ip, lsl #8 1020 str r4, [r3], #0x04 1021 bge .Lmemcpy_bad3_loop4 1022 sub r1, r1, #0x01 1023 1024.Lmemcpy_bad_done: 1025 ldmfd sp!, {r4-r7} 1026 adds r2, r2, #0x04 1027 RETeq 1028 ldrb ip, [r1], #0x01 1029 cmp r2, #0x02 1030 ldrbge r2, [r1], #0x01 1031 strb ip, [r3], #0x01 1032 ldrbgt ip, [r1] 1033 strbge r2, [r3], #0x01 1034 strbgt ip, [r3] 1035 RET 1036 1037 1038/* 1039 * Handle short copies (less than 16 bytes), possibly misaligned. 1040 * Some of these are *very* common, thanks to the network stack, 1041 * and so are handled specially. 1042 */ 1043.Lmemcpy_short: 1044 add pc, pc, r2, lsl #2 1045 nop 1046 RET /* 0x00 */ 1047 b .Lmemcpy_bytewise /* 0x01 */ 1048 b .Lmemcpy_bytewise /* 0x02 */ 1049 b .Lmemcpy_bytewise /* 0x03 */ 1050 b .Lmemcpy_4 /* 0x04 */ 1051 b .Lmemcpy_bytewise /* 0x05 */ 1052 b .Lmemcpy_6 /* 0x06 */ 1053 b .Lmemcpy_bytewise /* 0x07 */ 1054 b .Lmemcpy_8 /* 0x08 */ 1055 b .Lmemcpy_bytewise /* 0x09 */ 1056 b .Lmemcpy_bytewise /* 0x0a */ 1057 b .Lmemcpy_bytewise /* 0x0b */ 1058 b .Lmemcpy_c /* 0x0c */ 1059.Lmemcpy_bytewise: 1060 mov r3, r0 /* We must not clobber r0 */ 1061 ldrb ip, [r1], #0x01 10621: subs r2, r2, #0x01 1063 strb ip, [r3], #0x01 1064 ldrbne ip, [r1], #0x01 1065 bne 1b 1066 RET 1067 1068/****************************************************************************** 1069 * Special case for 4 byte copies 1070 */ 1071#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 1072#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 1073 LMEMCPY_4_PAD 1074.Lmemcpy_4: 1075 and r2, r1, #0x03 1076 orr r2, r2, r0, lsl #2 1077 ands r2, r2, #0x0f 1078 sub r3, pc, #0x14 1079 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 1080 1081/* 1082 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1083 */ 1084 ldr r2, [r1] 1085 str r2, [r0] 1086 RET 1087 LMEMCPY_4_PAD 1088 1089/* 1090 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1091 */ 1092 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1093 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 1094 mov r3, r3, lsr #8 /* r3 = .210 */ 1095 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1096 str r3, [r0] 1097 RET 1098 LMEMCPY_4_PAD 1099 1100/* 1101 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1102 */ 1103 ldrh r3, [r1, #0x02] 1104 ldrh r2, [r1] 1105 orr r3, r2, r3, lsl #16 1106 str r3, [r0] 1107 RET 1108 LMEMCPY_4_PAD 1109 1110/* 1111 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1112 */ 1113 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 1114 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 1115 mov r3, r3, lsr #24 /* r3 = ...0 */ 1116 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1117 str r3, [r0] 1118 RET 1119 LMEMCPY_4_PAD 1120 1121/* 1122 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1123 */ 1124 ldr r2, [r1] 1125 strb r2, [r0] 1126 mov r3, r2, lsr #8 1127 mov r1, r2, lsr #24 1128 strb r1, [r0, #0x03] 1129 strh r3, [r0, #0x01] 1130 RET 1131 LMEMCPY_4_PAD 1132 1133/* 1134 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1135 */ 1136 ldrb r2, [r1] 1137 ldrh r3, [r1, #0x01] 1138 ldrb r1, [r1, #0x03] 1139 strb r2, [r0] 1140 strh r3, [r0, #0x01] 1141 strb r1, [r0, #0x03] 1142 RET 1143 LMEMCPY_4_PAD 1144 1145/* 1146 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1147 */ 1148 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1149 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 1150 strb r2, [r0] 1151 mov r2, r2, lsr #8 /* r2 = ...1 */ 1152 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1153 mov r3, r3, lsr #8 /* r3 = ...3 */ 1154 strh r2, [r0, #0x01] 1155 strb r3, [r0, #0x03] 1156 RET 1157 LMEMCPY_4_PAD 1158 1159/* 1160 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1161 */ 1162 ldrb r2, [r1] 1163 ldrh r3, [r1, #0x01] 1164 ldrb r1, [r1, #0x03] 1165 strb r2, [r0] 1166 strh r3, [r0, #0x01] 1167 strb r1, [r0, #0x03] 1168 RET 1169 LMEMCPY_4_PAD 1170 1171/* 1172 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1173 */ 1174 ldr r2, [r1] 1175 strh r2, [r0] 1176 mov r3, r2, lsr #16 1177 strh r3, [r0, #0x02] 1178 RET 1179 LMEMCPY_4_PAD 1180 1181/* 1182 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1183 */ 1184 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1185 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 1186 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1187 strh r1, [r0] 1188 mov r2, r2, lsr #24 /* r2 = ...2 */ 1189 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 1190 strh r2, [r0, #0x02] 1191 RET 1192 LMEMCPY_4_PAD 1193 1194/* 1195 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1196 */ 1197 ldrh r2, [r1] 1198 ldrh r3, [r1, #0x02] 1199 strh r2, [r0] 1200 strh r3, [r0, #0x02] 1201 RET 1202 LMEMCPY_4_PAD 1203 1204/* 1205 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1206 */ 1207 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 1208 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1209 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 1210 strh r1, [r0, #0x02] 1211 mov r3, r3, lsl #8 /* r3 = 321. */ 1212 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 1213 strh r3, [r0] 1214 RET 1215 LMEMCPY_4_PAD 1216 1217/* 1218 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1219 */ 1220 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1221 strb r2, [r0] 1222 mov r3, r2, lsr #8 1223 mov r1, r2, lsr #24 1224 strh r3, [r0, #0x01] 1225 strb r1, [r0, #0x03] 1226 RET 1227 LMEMCPY_4_PAD 1228 1229/* 1230 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1231 */ 1232 ldrb r2, [r1] 1233 ldrh r3, [r1, #0x01] 1234 ldrb r1, [r1, #0x03] 1235 strb r2, [r0] 1236 strh r3, [r0, #0x01] 1237 strb r1, [r0, #0x03] 1238 RET 1239 LMEMCPY_4_PAD 1240 1241/* 1242 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1243 */ 1244 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1245 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1246 strb r2, [r0] 1247 mov r2, r2, lsr #8 /* r2 = ...1 */ 1248 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1249 strh r2, [r0, #0x01] 1250 mov r3, r3, lsr #8 /* r3 = ...3 */ 1251 strb r3, [r0, #0x03] 1252 RET 1253 LMEMCPY_4_PAD 1254 1255/* 1256 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1257 */ 1258 ldrb r2, [r1] 1259 ldrh r3, [r1, #0x01] 1260 ldrb r1, [r1, #0x03] 1261 strb r2, [r0] 1262 strh r3, [r0, #0x01] 1263 strb r1, [r0, #0x03] 1264 RET 1265 LMEMCPY_4_PAD 1266 1267 1268/****************************************************************************** 1269 * Special case for 6 byte copies 1270 */ 1271#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 1272#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 1273 LMEMCPY_6_PAD 1274.Lmemcpy_6: 1275 and r2, r1, #0x03 1276 orr r2, r2, r0, lsl #2 1277 ands r2, r2, #0x0f 1278 sub r3, pc, #0x14 1279 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 1280 1281/* 1282 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1283 */ 1284 ldr r2, [r1] 1285 ldrh r3, [r1, #0x04] 1286 str r2, [r0] 1287 strh r3, [r0, #0x04] 1288 RET 1289 LMEMCPY_6_PAD 1290 1291/* 1292 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1293 */ 1294 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1295 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 1296 mov r2, r2, lsr #8 /* r2 = .210 */ 1297 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 1298 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 1299 str r2, [r0] 1300 strh r3, [r0, #0x04] 1301 RET 1302 LMEMCPY_6_PAD 1303 1304/* 1305 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1306 */ 1307 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1308 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1309 mov r1, r3, lsr #16 /* r1 = ..54 */ 1310 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1311 str r2, [r0] 1312 strh r1, [r0, #0x04] 1313 RET 1314 LMEMCPY_6_PAD 1315 1316/* 1317 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1318 */ 1319 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1320 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 1321 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 1322 mov r2, r2, lsr #24 /* r2 = ...0 */ 1323 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1324 mov r1, r1, lsl #8 /* r1 = xx5. */ 1325 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 1326 str r2, [r0] 1327 strh r1, [r0, #0x04] 1328 RET 1329 LMEMCPY_6_PAD 1330 1331/* 1332 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1333 */ 1334 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1335 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 1336 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1337 strh r1, [r0, #0x01] 1338 strb r3, [r0] 1339 mov r3, r3, lsr #24 /* r3 = ...3 */ 1340 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 1341 mov r2, r2, lsr #8 /* r2 = ...5 */ 1342 strh r3, [r0, #0x03] 1343 strb r2, [r0, #0x05] 1344 RET 1345 LMEMCPY_6_PAD 1346 1347/* 1348 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1349 */ 1350 ldrb r2, [r1] 1351 ldrh r3, [r1, #0x01] 1352 ldrh ip, [r1, #0x03] 1353 ldrb r1, [r1, #0x05] 1354 strb r2, [r0] 1355 strh r3, [r0, #0x01] 1356 strh ip, [r0, #0x03] 1357 strb r1, [r0, #0x05] 1358 RET 1359 LMEMCPY_6_PAD 1360 1361/* 1362 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1363 */ 1364 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1365 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1366 strb r2, [r0] 1367 mov r3, r1, lsr #24 1368 strb r3, [r0, #0x05] 1369 mov r3, r1, lsr #8 /* r3 = .543 */ 1370 strh r3, [r0, #0x03] 1371 mov r3, r2, lsr #8 /* r3 = ...1 */ 1372 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 1373 strh r3, [r0, #0x01] 1374 RET 1375 LMEMCPY_6_PAD 1376 1377/* 1378 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1379 */ 1380 ldrb r2, [r1] 1381 ldrh r3, [r1, #0x01] 1382 ldrh ip, [r1, #0x03] 1383 ldrb r1, [r1, #0x05] 1384 strb r2, [r0] 1385 strh r3, [r0, #0x01] 1386 strh ip, [r0, #0x03] 1387 strb r1, [r0, #0x05] 1388 RET 1389 LMEMCPY_6_PAD 1390 1391/* 1392 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1393 */ 1394 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 1395 ldr r3, [r1] /* r3 = 3210 */ 1396 mov r2, r2, lsl #16 /* r2 = 54.. */ 1397 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 1398 strh r3, [r0] 1399 str r2, [r0, #0x02] 1400 RET 1401 LMEMCPY_6_PAD 1402 1403/* 1404 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1405 */ 1406 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1407 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 1408 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1409 mov r2, r2, lsl #8 /* r2 = 543. */ 1410 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 1411 strh r1, [r0] 1412 str r2, [r0, #0x02] 1413 RET 1414 LMEMCPY_6_PAD 1415 1416/* 1417 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1418 */ 1419 ldrh r2, [r1] 1420 ldr r3, [r1, #0x02] 1421 strh r2, [r0] 1422 str r3, [r0, #0x02] 1423 RET 1424 LMEMCPY_6_PAD 1425 1426/* 1427 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1428 */ 1429 ldrb r3, [r1] /* r3 = ...0 */ 1430 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1431 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 1432 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1433 mov r1, r1, lsl #24 /* r1 = 5... */ 1434 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 1435 strh r3, [r0] 1436 str r1, [r0, #0x02] 1437 RET 1438 LMEMCPY_6_PAD 1439 1440/* 1441 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1442 */ 1443 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1444 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 1445 strb r2, [r0] 1446 mov r2, r2, lsr #8 /* r2 = .321 */ 1447 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 1448 mov r1, r1, lsr #8 /* r1 = ...5 */ 1449 str r2, [r0, #0x01] 1450 strb r1, [r0, #0x05] 1451 RET 1452 LMEMCPY_6_PAD 1453 1454/* 1455 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1456 */ 1457 ldrb r2, [r1] 1458 ldrh r3, [r1, #0x01] 1459 ldrh ip, [r1, #0x03] 1460 ldrb r1, [r1, #0x05] 1461 strb r2, [r0] 1462 strh r3, [r0, #0x01] 1463 strh ip, [r0, #0x03] 1464 strb r1, [r0, #0x05] 1465 RET 1466 LMEMCPY_6_PAD 1467 1468/* 1469 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1470 */ 1471 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1472 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1473 strb r2, [r0] 1474 mov r2, r2, lsr #8 /* r2 = ...1 */ 1475 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 1476 mov r1, r1, lsr #24 /* r1 = ...5 */ 1477 str r2, [r0, #0x01] 1478 strb r1, [r0, #0x05] 1479 RET 1480 LMEMCPY_6_PAD 1481 1482/* 1483 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1484 */ 1485 ldrb r2, [r1] 1486 ldr r3, [r1, #0x01] 1487 ldrb r1, [r1, #0x05] 1488 strb r2, [r0] 1489 str r3, [r0, #0x01] 1490 strb r1, [r0, #0x05] 1491 RET 1492 LMEMCPY_6_PAD 1493 1494 1495/****************************************************************************** 1496 * Special case for 8 byte copies 1497 */ 1498#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 1499#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 1500 LMEMCPY_8_PAD 1501.Lmemcpy_8: 1502 and r2, r1, #0x03 1503 orr r2, r2, r0, lsl #2 1504 ands r2, r2, #0x0f 1505 sub r3, pc, #0x14 1506 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 1507 1508/* 1509 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1510 */ 1511 ldr r2, [r1] 1512 ldr r3, [r1, #0x04] 1513 str r2, [r0] 1514 str r3, [r0, #0x04] 1515 RET 1516 LMEMCPY_8_PAD 1517 1518/* 1519 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1520 */ 1521 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1522 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 1523 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1524 mov r3, r3, lsr #8 /* r3 = .210 */ 1525 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1526 mov r1, r1, lsl #24 /* r1 = 7... */ 1527 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 1528 str r3, [r0] 1529 str r2, [r0, #0x04] 1530 RET 1531 LMEMCPY_8_PAD 1532 1533/* 1534 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1535 */ 1536 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1537 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1538 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1539 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1540 mov r3, r3, lsr #16 /* r3 = ..54 */ 1541 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 1542 str r2, [r0] 1543 str r3, [r0, #0x04] 1544 RET 1545 LMEMCPY_8_PAD 1546 1547/* 1548 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1549 */ 1550 ldrb r3, [r1] /* r3 = ...0 */ 1551 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1552 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 1553 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1554 mov r2, r2, lsr #24 /* r2 = ...4 */ 1555 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 1556 str r3, [r0] 1557 str r2, [r0, #0x04] 1558 RET 1559 LMEMCPY_8_PAD 1560 1561/* 1562 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1563 */ 1564 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1565 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 1566 strb r3, [r0] 1567 mov r1, r2, lsr #24 /* r1 = ...7 */ 1568 strb r1, [r0, #0x07] 1569 mov r1, r3, lsr #8 /* r1 = .321 */ 1570 mov r3, r3, lsr #24 /* r3 = ...3 */ 1571 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 1572 strh r1, [r0, #0x01] 1573 str r3, [r0, #0x03] 1574 RET 1575 LMEMCPY_8_PAD 1576 1577/* 1578 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1579 */ 1580 ldrb r2, [r1] 1581 ldrh r3, [r1, #0x01] 1582 ldr ip, [r1, #0x03] 1583 ldrb r1, [r1, #0x07] 1584 strb r2, [r0] 1585 strh r3, [r0, #0x01] 1586 str ip, [r0, #0x03] 1587 strb r1, [r0, #0x07] 1588 RET 1589 LMEMCPY_8_PAD 1590 1591/* 1592 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1593 */ 1594 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1595 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1596 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1597 strb r2, [r0] /* 0 */ 1598 mov ip, r1, lsr #8 /* ip = ...7 */ 1599 strb ip, [r0, #0x07] /* 7 */ 1600 mov ip, r2, lsr #8 /* ip = ...1 */ 1601 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1602 mov r3, r3, lsr #8 /* r3 = .543 */ 1603 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 1604 strh ip, [r0, #0x01] 1605 str r3, [r0, #0x03] 1606 RET 1607 LMEMCPY_8_PAD 1608 1609/* 1610 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1611 */ 1612 ldrb r3, [r1] /* r3 = ...0 */ 1613 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1614 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 1615 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1616 strb r3, [r0] 1617 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 1618 strh ip, [r0, #0x01] 1619 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 1620 str r2, [r0, #0x03] 1621 strb r1, [r0, #0x07] 1622 RET 1623 LMEMCPY_8_PAD 1624 1625/* 1626 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1627 */ 1628 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1629 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1630 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1631 strh r2, [r0] 1632 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 1633 mov r3, r3, lsr #16 /* r3 = ..76 */ 1634 str r2, [r0, #0x02] 1635 strh r3, [r0, #0x06] 1636 RET 1637 LMEMCPY_8_PAD 1638 1639/* 1640 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1641 */ 1642 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1643 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1644 ldrb ip, [r1, #0x07] /* ip = ...7 */ 1645 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1646 strh r1, [r0] 1647 mov r1, r2, lsr #24 /* r1 = ...2 */ 1648 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 1649 mov r3, r3, lsr #24 /* r3 = ...6 */ 1650 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 1651 str r1, [r0, #0x02] 1652 strh r3, [r0, #0x06] 1653 RET 1654 LMEMCPY_8_PAD 1655 1656/* 1657 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1658 */ 1659 ldrh r2, [r1] 1660 ldr ip, [r1, #0x02] 1661 ldrh r3, [r1, #0x06] 1662 strh r2, [r0] 1663 str ip, [r0, #0x02] 1664 strh r3, [r0, #0x06] 1665 RET 1666 LMEMCPY_8_PAD 1667 1668/* 1669 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1670 */ 1671 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 1672 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1673 ldrb ip, [r1] /* ip = ...0 */ 1674 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 1675 strh r1, [r0, #0x06] 1676 mov r3, r3, lsl #24 /* r3 = 5... */ 1677 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 1678 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 1679 str r3, [r0, #0x02] 1680 strh r2, [r0] 1681 RET 1682 LMEMCPY_8_PAD 1683 1684/* 1685 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1686 */ 1687 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1688 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1689 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 1690 strh r1, [r0, #0x05] 1691 strb r2, [r0] 1692 mov r1, r3, lsr #24 /* r1 = ...7 */ 1693 strb r1, [r0, #0x07] 1694 mov r2, r2, lsr #8 /* r2 = .321 */ 1695 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 1696 str r2, [r0, #0x01] 1697 RET 1698 LMEMCPY_8_PAD 1699 1700/* 1701 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1702 */ 1703 ldrb r3, [r1] /* r3 = ...0 */ 1704 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 1705 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1706 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1707 strb r3, [r0] 1708 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 1709 strh r3, [r0, #0x05] 1710 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 1711 str r2, [r0, #0x01] 1712 strb r1, [r0, #0x07] 1713 RET 1714 LMEMCPY_8_PAD 1715 1716/* 1717 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1718 */ 1719 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1720 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1721 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1722 strb r2, [r0] 1723 mov ip, r2, lsr #8 /* ip = ...1 */ 1724 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1725 mov r2, r1, lsr #8 /* r2 = ...7 */ 1726 strb r2, [r0, #0x07] 1727 mov r1, r1, lsl #8 /* r1 = .76. */ 1728 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 1729 str ip, [r0, #0x01] 1730 strh r1, [r0, #0x05] 1731 RET 1732 LMEMCPY_8_PAD 1733 1734/* 1735 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1736 */ 1737 ldrb r2, [r1] 1738 ldr ip, [r1, #0x01] 1739 ldrh r3, [r1, #0x05] 1740 ldrb r1, [r1, #0x07] 1741 strb r2, [r0] 1742 str ip, [r0, #0x01] 1743 strh r3, [r0, #0x05] 1744 strb r1, [r0, #0x07] 1745 RET 1746 LMEMCPY_8_PAD 1747 1748/****************************************************************************** 1749 * Special case for 12 byte copies 1750 */ 1751#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 1752#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 1753 LMEMCPY_C_PAD 1754.Lmemcpy_c: 1755 and r2, r1, #0x03 1756 orr r2, r2, r0, lsl #2 1757 ands r2, r2, #0x0f 1758 sub r3, pc, #0x14 1759 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 1760 1761/* 1762 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1763 */ 1764 ldr r2, [r1] 1765 ldr r3, [r1, #0x04] 1766 ldr r1, [r1, #0x08] 1767 str r2, [r0] 1768 str r3, [r0, #0x04] 1769 str r1, [r0, #0x08] 1770 RET 1771 LMEMCPY_C_PAD 1772 1773/* 1774 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1775 */ 1776 ldrb r2, [r1, #0xb] /* r2 = ...B */ 1777 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1778 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1779 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1780 mov r2, r2, lsl #24 /* r2 = B... */ 1781 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 1782 str r2, [r0, #0x08] 1783 mov r2, ip, lsl #24 /* r2 = 7... */ 1784 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 1785 mov r1, r1, lsr #8 /* r1 = .210 */ 1786 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 1787 str r2, [r0, #0x04] 1788 str r1, [r0] 1789 RET 1790 LMEMCPY_C_PAD 1791 1792/* 1793 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1794 */ 1795 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1796 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1797 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1798 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1799 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1800 str r2, [r0] 1801 mov r3, r3, lsr #16 /* r3 = ..54 */ 1802 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 1803 mov r1, r1, lsl #16 /* r1 = BA.. */ 1804 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 1805 str r3, [r0, #0x04] 1806 str r1, [r0, #0x08] 1807 RET 1808 LMEMCPY_C_PAD 1809 1810/* 1811 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1812 */ 1813 ldrb r2, [r1] /* r2 = ...0 */ 1814 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1815 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1816 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1817 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1818 str r2, [r0] 1819 mov r3, r3, lsr #24 /* r3 = ...4 */ 1820 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 1821 mov r1, r1, lsl #8 /* r1 = BA9. */ 1822 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 1823 str r3, [r0, #0x04] 1824 str r1, [r0, #0x08] 1825 RET 1826 LMEMCPY_C_PAD 1827 1828/* 1829 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 1830 */ 1831 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1832 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1833 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 1834 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1835 strh r1, [r0, #0x01] 1836 strb r2, [r0] 1837 mov r1, r2, lsr #24 /* r1 = ...3 */ 1838 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 1839 mov r1, r3, lsr #24 /* r1 = ...7 */ 1840 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 1841 mov ip, ip, lsr #24 /* ip = ...B */ 1842 str r2, [r0, #0x03] 1843 str r1, [r0, #0x07] 1844 strb ip, [r0, #0x0b] 1845 RET 1846 LMEMCPY_C_PAD 1847 1848/* 1849 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 1850 */ 1851 ldrb r2, [r1] 1852 ldrh r3, [r1, #0x01] 1853 ldr ip, [r1, #0x03] 1854 strb r2, [r0] 1855 ldr r2, [r1, #0x07] 1856 ldrb r1, [r1, #0x0b] 1857 strh r3, [r0, #0x01] 1858 str ip, [r0, #0x03] 1859 str r2, [r0, #0x07] 1860 strb r1, [r0, #0x0b] 1861 RET 1862 LMEMCPY_C_PAD 1863 1864/* 1865 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 1866 */ 1867 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1868 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1869 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1870 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1871 strb r2, [r0] 1872 mov r2, r2, lsr #8 /* r2 = ...1 */ 1873 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1874 strh r2, [r0, #0x01] 1875 mov r2, r3, lsr #8 /* r2 = .543 */ 1876 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 1877 mov r2, ip, lsr #8 /* r2 = .987 */ 1878 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 1879 mov r1, r1, lsr #8 /* r1 = ...B */ 1880 str r3, [r0, #0x03] 1881 str r2, [r0, #0x07] 1882 strb r1, [r0, #0x0b] 1883 RET 1884 LMEMCPY_C_PAD 1885 1886/* 1887 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 1888 */ 1889 ldrb r2, [r1] 1890 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1891 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1892 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1893 strb r2, [r0] 1894 strh r3, [r0, #0x01] 1895 mov r3, r3, lsr #16 /* r3 = ..43 */ 1896 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 1897 mov ip, ip, lsr #16 /* ip = ..87 */ 1898 orr ip, ip, r1, lsl #16 /* ip = A987 */ 1899 mov r1, r1, lsr #16 /* r1 = ..xB */ 1900 str r3, [r0, #0x03] 1901 str ip, [r0, #0x07] 1902 strb r1, [r0, #0x0b] 1903 RET 1904 LMEMCPY_C_PAD 1905 1906/* 1907 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1908 */ 1909 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 1910 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1911 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 1912 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1913 strh ip, [r0] 1914 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 1915 mov r3, r3, lsr #16 /* r3 = ..76 */ 1916 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 1917 mov r2, r2, lsr #16 /* r2 = ..BA */ 1918 str r1, [r0, #0x02] 1919 str r3, [r0, #0x06] 1920 strh r2, [r0, #0x0a] 1921 RET 1922 LMEMCPY_C_PAD 1923 1924/* 1925 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 1926 */ 1927 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1928 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1929 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 1930 strh ip, [r0] 1931 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1932 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 1933 mov r2, r2, lsr #24 /* r2 = ...2 */ 1934 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 1935 mov r3, r3, lsr #24 /* r3 = ...6 */ 1936 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 1937 mov r1, r1, lsl #8 /* r1 = ..B. */ 1938 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 1939 str r2, [r0, #0x02] 1940 str r3, [r0, #0x06] 1941 strh r1, [r0, #0x0a] 1942 RET 1943 LMEMCPY_C_PAD 1944 1945/* 1946 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1947 */ 1948 ldrh r2, [r1] 1949 ldr r3, [r1, #0x02] 1950 ldr ip, [r1, #0x06] 1951 ldrh r1, [r1, #0x0a] 1952 strh r2, [r0] 1953 str r3, [r0, #0x02] 1954 str ip, [r0, #0x06] 1955 strh r1, [r0, #0x0a] 1956 RET 1957 LMEMCPY_C_PAD 1958 1959/* 1960 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 1961 */ 1962 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 1963 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 1964 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 1965 strh ip, [r0, #0x0a] 1966 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1967 ldrb r1, [r1] /* r1 = ...0 */ 1968 mov r2, r2, lsl #24 /* r2 = 9... */ 1969 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 1970 mov r3, r3, lsl #24 /* r3 = 5... */ 1971 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 1972 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 1973 str r2, [r0, #0x06] 1974 str r3, [r0, #0x02] 1975 strh r1, [r0] 1976 RET 1977 LMEMCPY_C_PAD 1978 1979/* 1980 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 1981 */ 1982 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1983 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 1984 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 1985 strb r2, [r0] 1986 mov r3, r2, lsr #8 /* r3 = .321 */ 1987 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 1988 str r3, [r0, #0x01] 1989 mov r3, ip, lsr #8 /* r3 = .765 */ 1990 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 1991 str r3, [r0, #0x05] 1992 mov r1, r1, lsr #8 /* r1 = .BA9 */ 1993 strh r1, [r0, #0x09] 1994 mov r1, r1, lsr #16 /* r1 = ...B */ 1995 strb r1, [r0, #0x0b] 1996 RET 1997 LMEMCPY_C_PAD 1998 1999/* 2000 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 2001 */ 2002 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 2003 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 2004 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2005 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2006 strb r2, [r0, #0x0b] 2007 mov r2, r3, lsr #16 /* r2 = ..A9 */ 2008 strh r2, [r0, #0x09] 2009 mov r3, r3, lsl #16 /* r3 = 87.. */ 2010 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 2011 mov ip, ip, lsl #16 /* ip = 43.. */ 2012 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 2013 mov r1, r1, lsr #8 /* r1 = .210 */ 2014 str r3, [r0, #0x05] 2015 str ip, [r0, #0x01] 2016 strb r1, [r0] 2017 RET 2018 LMEMCPY_C_PAD 2019 2020/* 2021 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 2022 */ 2023 ldrh r2, [r1] /* r2 = ..10 */ 2024 ldr r3, [r1, #0x02] /* r3 = 5432 */ 2025 ldr ip, [r1, #0x06] /* ip = 9876 */ 2026 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 2027 strb r2, [r0] 2028 mov r2, r2, lsr #8 /* r2 = ...1 */ 2029 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2030 mov r3, r3, lsr #24 /* r3 = ...5 */ 2031 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 2032 mov ip, ip, lsr #24 /* ip = ...9 */ 2033 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 2034 mov r1, r1, lsr #8 /* r1 = ...B */ 2035 str r2, [r0, #0x01] 2036 str r3, [r0, #0x05] 2037 strh ip, [r0, #0x09] 2038 strb r1, [r0, #0x0b] 2039 RET 2040 LMEMCPY_C_PAD 2041 2042/* 2043 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 2044 */ 2045 ldrb r2, [r1] 2046 ldr r3, [r1, #0x01] 2047 ldr ip, [r1, #0x05] 2048 strb r2, [r0] 2049 ldrh r2, [r1, #0x09] 2050 ldrb r1, [r1, #0x0b] 2051 str r3, [r0, #0x01] 2052 str ip, [r0, #0x05] 2053 strh r2, [r0, #0x09] 2054 strb r1, [r0, #0x0b] 2055 RET 2056END(memcpy) 2057