1/* 2 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6/* 7 * Copyright (c) 2002 Advanced Micro Devices, Inc. 8 * 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the 13 * following conditions are met: 14 * 15 * + Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the 17 * following disclaimer. 18 * 19 * + Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the 21 * following disclaimer in the documentation and/or other 22 * materials provided with the distribution. 23 * 24 * + Neither the name of Advanced Micro Devices, Inc. nor the 25 * names of its contributors may be used to endorse or 26 * promote products derived from this software without 27 * specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 32 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 33 * DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, 34 * INC. OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 35 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 36 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 37 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 39 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 41 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 42 * POSSIBILITY OF SUCH DAMAGE. 43 * 44 * It is licensee's responsibility to comply with any export 45 * regulations applicable in licensee's jurisdiction. 46 */ 47 48 .file "strcpy.s" 49 50#include "SYS.h" 51#include "cache.h" 52 53#define LABEL(s) .strcpy/**/s 54 55#ifdef USE_AS_STRNCPY 56 ENTRY(strncpy) 57#else 58 ENTRY(strcpy) /* (char *, const char *) */ 59#endif 60 61#ifdef USE_AS_STRNCPY 62 test %rdx, %rdx /* (char *, const char *, size_t) */ 63 mov %rdx, %r11 64 jz LABEL(exitn) /* early exit */ 65#endif 66 67 xor %edx, %edx 68 69LABEL(aligntry): 70 mov %rsi, %r8 /* align by source */ 71 and $7, %r8 72 jz LABEL(alignafter) 73 74LABEL(align): /* 8-byte align */ 75 sub $8, %r8 76 77 .p2align 4 78 79LABEL(alignloop): 80#ifdef USE_AS_STRNCPY 81 dec %r11 82 jl LABEL(exitn) 83#endif 84 85 mov (%rsi, %rdx), %al /* check if same character */ 86 test %al, %al /* check if character a NUL */ 87 mov %al, (%rdi, %rdx) 88 jz LABEL(exit) 89 90 inc %edx 91 inc %r8 92 jnz LABEL(alignloop) 93 94#ifdef USE_AS_STRNCPY 95 test %r11, %r11 /* must check remaining size */ 96 jz LABEL(exitn) /* If we've already done, exit */ 97#endif 98 99 .p2align 4 100 101LABEL(alignafter): 102 103LABEL(8try): 104 mov $0xfefefefefefefeff, %rcx 105 106LABEL(8): /* 8-byte */ 107 mov (%rsi, %rdx), %rax 108 109LABEL(8loop): 110#ifdef USE_AS_STRNCPY 111 sub $8, %r11 112 jle LABEL(tail) 113#endif 114 115 mov %rcx, %r8 116 add %rax, %r8 117 sbb %r10, %r10 118 119 xor %rax, %r8 120 or %rcx, %r8 121 sub %r10, %r8 122 jnz LABEL(tail) 123 124 mov %rax, (%rdi, %rdx) 125 mov 8 (%rsi, %rdx), %rax 126 add $8, %edx 127 128#ifdef USE_AS_STRNCPY 129 sub $8, %r11 130 jle LABEL(tail) 131#endif 132 133 mov %rcx, %r8 134 add %rax, %r8 135 sbb %r10, %r10 136 137 xor %rax, %r8 138 or %rcx, %r8 139 sub %r10, %r8 140 jnz LABEL(tail) 141 142 mov %rax, (%rdi, %rdx) 143 mov 8 (%rsi, %rdx), %rax 144 add $8, %edx 145 146#ifdef USE_AS_STRNCPY 147 sub $8, %r11 148 jle LABEL(tail) 149#endif 150 151 mov %rcx, %r8 152 add %rax, %r8 153 sbb %r10, %r10 154 155 xor %rax, %r8 156 or %rcx, %r8 157 sub %r10, %r8 158 jnz LABEL(tail) 159 160 mov %rax, (%rdi, %rdx) 161 mov 8 (%rsi, %rdx), %rax 162 add $8, %edx 163 164#ifdef USE_AS_STRNCPY 165 sub $8, %r11 166 jle LABEL(tail) 167#endif 168 169 mov %rcx, %r8 170 add %rax, %r8 171 sbb %r10, %r10 172 173 xor %rax, %r8 174 or %rcx, %r8 175 sub %r10, %r8 176 jnz LABEL(tail) 177 178 mov %rax, (%rdi, %rdx) 179 mov 8 (%rsi, %rdx), %rax 180 add $8, %edx 181 182#ifdef USE_AS_STRNCPY 183 sub $8, %r11 184 jle LABEL(tail) 185#endif 186 187 mov %rcx, %r8 188 add %rax, %r8 189 sbb %r10, %r10 190 191 xor %rax, %r8 192 or %rcx, %r8 193 sub %r10, %r8 194 jnz LABEL(tail) 195 196 mov %rax, (%rdi, %rdx) 197 mov 8 (%rsi, %rdx), %rax 198 add $8, %edx 199 200#ifdef USE_AS_STRNCPY 201 sub $8, %r11 202 jle LABEL(tail) 203#endif 204 205 mov %rcx, %r8 206 add %rax, %r8 207 sbb %r10, %r10 208 209 xor %rax, %r8 210 or %rcx, %r8 211 sub %r10, %r8 212 jnz LABEL(tail) 213 214 mov %rax, (%rdi, %rdx) 215 mov 8 (%rsi, %rdx), %rax 216 add $8, %edx 217 218#ifdef USE_AS_STRNCPY 219 sub $8, %r11 220 jle LABEL(tail) 221#endif 222 223 mov %rcx, %r8 224 add %rax, %r8 225 sbb %r10, %r10 226 227 xor %rax, %r8 228 or %rcx, %r8 229 sub %r10, %r8 230 jnz LABEL(tail) 231 232 mov %rax, (%rdi, %rdx) 233 mov 8 (%rsi, %rdx), %rax 234 add $8, %edx 235 236#ifdef USE_AS_STRNCPY 237 sub $8, %r11 238 jle LABEL(tail) 239#endif 240 241 mov %rcx, %r8 242 add %rax, %r8 243 sbb %r10, %r10 244 245 xor %rax, %r8 246 or %rcx, %r8 247 sub %r10, %r8 248 jnz LABEL(tail) 249 250 mov %rax, (%rdi, %rdx) 251 mov 8 (%rsi, %rdx), %rax 252 add $8, %edx 253 254LABEL(8after): 255 256LABEL(64try): 257 mov _sref_(.amd64cache1half), %r9 258 259LABEL(64): /* 64-byte */ 260 261 .p2align 4 262 263LABEL(64loop): 264#ifdef USE_AS_STRNCPY 265 sub $8, %r11 266 jle LABEL(tail) 267#endif 268 269 mov %rcx, %r8 270 add %rax, %r8 271 sbb %r10, %r10 272 273 xor %rax, %r8 274 or %rcx, %r8 275 sub %r10, %r8 276 jnz LABEL(tail) 277 278 mov %rax, (%rdi, %rdx) 279 mov 8 (%rsi, %rdx), %rax 280 add $8, %edx 281 282#ifdef USE_AS_STRNCPY 283 sub $8, %r11 284 jle LABEL(tail) 285#endif 286 287 mov %rcx, %r8 288 add %rax, %r8 289 sbb %r10, %r10 290 291 xor %rax, %r8 292 or %rcx, %r8 293 sub %r10, %r8 294 jnz LABEL(tail) 295 296 mov %rax, (%rdi, %rdx) 297 mov 8 (%rsi, %rdx), %rax 298 add $8, %edx 299 300#ifdef USE_AS_STRNCPY 301 sub $8, %r11 302 jle LABEL(tail) 303#endif 304 305 mov %rcx, %r8 306 add %rax, %r8 307 sbb %r10, %r10 308 309 xor %rax, %r8 310 or %rcx, %r8 311 sub %r10, %r8 312 jnz LABEL(tail) 313 314 mov %rax, (%rdi, %rdx) 315 mov 8 (%rsi, %rdx), %rax 316 add $8, %edx 317 318#ifdef USE_AS_STRNCPY 319 sub $8, %r11 320 jle LABEL(tail) 321#endif 322 323 mov %rcx, %r8 324 add %rax, %r8 325 sbb %r10, %r10 326 327 xor %rax, %r8 328 or %rcx, %r8 329 sub %r10, %r8 330 jnz LABEL(tail) 331 332 mov %rax, (%rdi, %rdx) 333 mov 8 (%rsi, %rdx), %rax 334 add $8, %edx 335 336#ifdef USE_AS_STRNCPY 337 sub $8, %r11 338 jle LABEL(tail) 339#endif 340 341 mov %rcx, %r8 342 add %rax, %r8 343 sbb %r10, %r10 344 345 xor %rax, %r8 346 or %rcx, %r8 347 sub %r10, %r8 348 jnz LABEL(tail) 349 350 mov %rax, (%rdi, %rdx) 351 mov 8 (%rsi, %rdx), %rax 352 add $8, %edx 353 354#ifdef USE_AS_STRNCPY 355 sub $8, %r11 356 jle LABEL(tail) 357#endif 358 359 mov %rcx, %r8 360 add %rax, %r8 361 sbb %r10, %r10 362 363 xor %rax, %r8 364 or %rcx, %r8 365 sub %r10, %r8 366 jnz LABEL(tail) 367 368 mov %rax, (%rdi, %rdx) 369 mov 8 (%rsi, %rdx), %rax 370 add $8, %edx 371 372#ifdef USE_AS_STRNCPY 373 sub $8, %r11 374 jle LABEL(tail) 375#endif 376 377 mov %rcx, %r8 378 add %rax, %r8 379 sbb %r10, %r10 380 381 xor %rax, %r8 382 or %rcx, %r8 383 sub %r10, %r8 384 jnz LABEL(tail) 385 386 mov %rax, (%rdi, %rdx) 387 mov 8 (%rsi, %rdx), %rax 388 add $8, %edx 389 390#ifdef USE_AS_STRNCPY 391 sub $8, %r11 392 jle LABEL(tail) 393#endif 394 395 mov %rcx, %r8 396 add %rax, %r8 397 sbb %r10, %r10 398 399 xor %rax, %r8 400 or %rcx, %r8 401 sub %r10, %r8 402 jnz LABEL(tail) 403 404 cmp %r9, %rdx 405 406 mov %rax, (%rdi, %rdx) 407 mov 8 (%rsi, %rdx), %rax 408 lea 8 (%rdx), %rdx 409 410 jbe LABEL(64loop) 411 412LABEL(64after): 413 414LABEL(pretry): 415 mov _sref_(.amd64cache2half), %r9 416 417LABEL(pre): /* 64-byte prefetch */ 418 419 .p2align 4 420 421LABEL(preloop): 422#ifdef USE_AS_STRNCPY 423 sub $8, %r11 424 jle LABEL(tail) 425#endif 426 427 mov %rcx, %r8 428 add %rax, %r8 429 sbb %r10, %r10 430 431 xor %rax, %r8 432 or %rcx, %r8 433 sub %r10, %r8 434 jnz LABEL(tail) 435 436 mov %rax, (%rdi, %rdx) 437 mov 8 (%rsi, %rdx), %rax 438 add $8, %edx 439 440#ifdef USE_AS_STRNCPY 441 sub $8, %r11 442 jle LABEL(tail) 443#endif 444 445 mov %rcx, %r8 446 add %rax, %r8 447 sbb %r10, %r10 448 449 xor %rax, %r8 450 or %rcx, %r8 451 sub %r10, %r8 452 jnz LABEL(tail) 453 454 mov %rax, (%rdi, %rdx) 455 mov 8 (%rsi, %rdx), %rax 456 add $8, %edx 457 458#ifdef USE_AS_STRNCPY 459 sub $8, %r11 460 jle LABEL(tail) 461#endif 462 463 mov %rcx, %r8 464 add %rax, %r8 465 sbb %r10, %r10 466 467 xor %rax, %r8 468 or %rcx, %r8 469 sub %r10, %r8 470 jnz LABEL(tail) 471 472 mov %rax, (%rdi, %rdx) 473 mov 8 (%rsi, %rdx), %rax 474 add $8, %edx 475 476#ifdef USE_AS_STRNCPY 477 sub $8, %r11 478 jle LABEL(tail) 479#endif 480 481 mov %rcx, %r8 482 add %rax, %r8 483 sbb %r10, %r10 484 485 xor %rax, %r8 486 or %rcx, %r8 487 sub %r10, %r8 488 jnz LABEL(tail) 489 490 mov %rax, (%rdi, %rdx) 491 mov 8 (%rsi, %rdx), %rax 492 add $8, %edx 493 494#ifdef USE_AS_STRNCPY 495 sub $8, %r11 496 jle LABEL(tail) 497#endif 498 499 mov %rcx, %r8 500 add %rax, %r8 501 sbb %r10, %r10 502 503 xor %rax, %r8 504 or %rcx, %r8 505 sub %r10, %r8 506 jnz LABEL(tail) 507 508 mov %rax, (%rdi, %rdx) 509 mov 8 (%rsi, %rdx), %rax 510 add $8, %edx 511 512#ifdef USE_AS_STRNCPY 513 sub $8, %r11 514 jle LABEL(tail) 515#endif 516 517 mov %rcx, %r8 518 add %rax, %r8 519 sbb %r10, %r10 520 521 xor %rax, %r8 522 or %rcx, %r8 523 sub %r10, %r8 524 jnz LABEL(tail) 525 526 mov %rax, (%rdi, %rdx) 527 mov 8 (%rsi, %rdx), %rax 528 add $8, %edx 529 530#ifdef USE_AS_STRNCPY 531 sub $8, %r11 532 jle LABEL(tail) 533#endif 534 535 mov %rcx, %r8 536 add %rax, %r8 537 sbb %r10, %r10 538 539 xor %rax, %r8 540 or %rcx, %r8 541 sub %r10, %r8 542 jnz LABEL(tail) 543 544 mov %rax, (%rdi, %rdx) 545 mov 8 (%rsi, %rdx), %rax 546 add $8, %edx 547 548#ifdef USE_AS_STRNCPY 549 sub $8, %r11 550 jle LABEL(tail) 551#endif 552 553 mov %rcx, %r8 554 add %rax, %r8 555 sbb %r10, %r10 556 557 xor %rax, %r8 558 or %rcx, %r8 559 sub %r10, %r8 560 jnz LABEL(tail) 561 562 cmp %r9, %rdx 563 564 mov %rax, (%rdi, %rdx) 565 prefetchnta 512 + 8 (%rdi, %rdx) /* 3DNow: use prefetchw */ 566 mov 8 (%rsi, %rdx), %rax 567 prefetchnta 512 + 8 (%rsi, %rdx) /* 3DNow: use prefetch */ 568 lea 8 (%rdx), %rdx 569 570 jb LABEL(preloop) 571 572 .p2align 4 573 574LABEL(preafter): 575 576LABEL(NTtry): 577 mfence 578 579LABEL(NT): /* 64-byte NT */ 580 581 .p2align 4 582 583LABEL(NTloop): 584#ifdef USE_AS_STRNCPY 585 sub $8, %r11 586 jle LABEL(tail) 587#endif 588 589 mov %rcx, %r8 590 add %rax, %r8 591 sbb %r10, %r10 592 593 xor %rax, %r8 594 or %rcx, %r8 595 sub %r10, %r8 596 jnz LABEL(NTtail) 597 598 movnti %rax, (%rdi, %rdx) 599 mov 8 (%rsi, %rdx), %rax 600 add $8, %rdx 601 602#ifdef USE_AS_STRNCPY 603 sub $8, %r11 604 jle LABEL(tail) 605#endif 606 607 mov %rcx, %r8 608 add %rax, %r8 609 sbb %r10, %r10 610 611 xor %rax, %r8 612 or %rcx, %r8 613 sub %r10, %r8 614 jnz LABEL(NTtail) 615 616 movnti %rax, (%rdi, %rdx) 617 mov 8 (%rsi, %rdx), %rax 618 add $8, %rdx 619 620#ifdef USE_AS_STRNCPY 621 sub $8, %r11 622 jle LABEL(tail) 623#endif 624 625 mov %rcx, %r8 626 add %rax, %r8 627 sbb %r10, %r10 628 629 xor %rax, %r8 630 or %rcx, %r8 631 sub %r10, %r8 632 jnz LABEL(NTtail) 633 634 movnti %rax, (%rdi, %rdx) 635 mov 8 (%rsi, %rdx), %rax 636 add $8, %rdx 637 638#ifdef USE_AS_STRNCPY 639 sub $8, %r11 640 jle LABEL(tail) 641#endif 642 643 mov %rcx, %r8 644 add %rax, %r8 645 sbb %r10, %r10 646 647 xor %rax, %r8 648 or %rcx, %r8 649 sub %r10, %r8 650 jnz LABEL(NTtail) 651 652 movnti %rax, (%rdi, %rdx) 653 mov 8 (%rsi, %rdx), %rax 654 add $8, %rdx 655 656#ifdef USE_AS_STRNCPY 657 sub $8, %r11 658 jle LABEL(tail) 659#endif 660 661 mov %rcx, %r8 662 add %rax, %r8 663 sbb %r10, %r10 664 665 xor %rax, %r8 666 or %rcx, %r8 667 sub %r10, %r8 668 jnz LABEL(NTtail) 669 670 movnti %rax, (%rdi, %rdx) 671 mov 8 (%rsi, %rdx), %rax 672 add $8, %rdx 673 674#ifdef USE_AS_STRNCPY 675 sub $8, %r11 676 jle LABEL(tail) 677#endif 678 679 mov %rcx, %r8 680 add %rax, %r8 681 sbb %r10, %r10 682 683 xor %rax, %r8 684 or %rcx, %r8 685 sub %r10, %r8 686 jnz LABEL(NTtail) 687 688 movnti %rax, (%rdi, %rdx) 689 mov 8 (%rsi, %rdx), %rax 690 add $8, %rdx 691 692#ifdef USE_AS_STRNCPY 693 sub $8, %r11 694 jle LABEL(tail) 695#endif 696 697 mov %rcx, %r8 698 add %rax, %r8 699 sbb %r10, %r10 700 701 xor %rax, %r8 702 or %rcx, %r8 703 sub %r10, %r8 704 jnz LABEL(NTtail) 705 706 movnti %rax, (%rdi, %rdx) 707 mov 8 (%rsi, %rdx), %rax 708 add $8, %rdx 709 710#ifdef USE_AS_STRNCPY 711 sub $8, %r11 712 jle LABEL(tail) 713#endif 714 715 mov %rcx, %r8 716 add %rax, %r8 717 sbb %r10, %r10 718 719 xor %rax, %r8 720 or %rcx, %r8 721 sub %r10, %r8 722 jnz LABEL(NTtail) 723 724 movnti %rax, (%rdi, %rdx) 725 mov 8 (%rsi, %rdx), %rax 726 prefetchnta 768 + 8 (%rsi, %rdx) 727 add $8, %rdx 728 729 jmp LABEL(NTloop) 730 731 .p2align 4 732 733LABEL(NTtail): 734 mfence 735 736 .p2align 4 737 738LABEL(NTafter): 739 740LABEL(tailtry): 741 742LABEL(tail): /* 1-byte tail */ 743#ifdef USE_AS_STRNCPY 744 add $8, %r11 745#endif 746 747 .p2align 4 748 749LABEL(tailloop): 750#ifdef USE_AS_STRNCPY 751 dec %r11 752 jl LABEL(exitn) 753#endif 754 755 test %al, %al 756 mov %al, (%rdi, %rdx) 757 jz LABEL(exit) 758 759 inc %rdx 760 761#ifdef USE_AS_STRNCPY 762 dec %r11 763 jl LABEL(exitn) 764 765 mov %ah, %al 766#endif 767 768 test %ah, %ah 769 mov %ah, (%rdi, %rdx) 770 jz LABEL(exit) 771 772 inc %rdx 773 774#ifdef USE_AS_STRNCPY 775 dec %r11 776 jl LABEL(exitn) 777#endif 778 779 shr $16, %rax 780 781 test %al, %al 782 mov %al, (%rdi, %rdx) 783 jz LABEL(exit) 784 785 inc %rdx 786 787#ifdef USE_AS_STRNCPY 788 dec %r11 789 jl LABEL(exitn) 790 791 mov %ah, %al 792#endif 793 794 test %ah, %ah 795 mov %ah, (%rdi, %rdx) 796 jz LABEL(exit) 797 798 shr $16, %rax 799 inc %rdx 800 801 jmp LABEL(tailloop) 802 803 .p2align 4 804 805LABEL(tailafter): 806 807LABEL(exit): 808#ifdef USE_AS_STRNCPY 809 test %r11, %r11 810 mov %r11, %rcx 811 812#ifdef USE_AS_STPCPY 813 lea (%rdi, %rdx), %r8 814#else 815 mov %rdi, %r8 816#endif 817 818 jz 2f 819 820 xor %eax, %eax /* bzero () would do too, but usually there are only a handfull of bytes left */ 821 shr $3, %rcx 822 lea 1 (%rdi, %rdx), %rdi 823 jz 1f 824 825 rep stosq 826 8271: 828 mov %r11d, %ecx 829 and $7, %ecx 830 jz 2f 831 832 .p2align 4,, 3 833 8343: 835 dec %ecx 836 mov %al, (%rdi, %rcx) 837 jnz 3b 838 839 .p2align 4,, 3 840 8412: 842 mov %r8, %rax 843 ret 844 845#endif 846 847 .p2align 4 848 849LABEL(exitn): 850#ifdef USE_AS_STPCPY 851 lea (%rdi, %rdx), %rax 852#else 853 mov %rdi, %rax 854#endif 855 856 ret 857 858#ifdef USE_AS_STRNCPY 859 SET_SIZE(strncpy) 860#else 861 SET_SIZE(strcpy) /* (char *, const char *) */ 862#endif 863