1/* 2 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6/* 7 * Copyright (c) 2002 Advanced Micro Devices, Inc. 8 * 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the 13 * following conditions are met: 14 * 15 * + Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the 17 * following disclaimer. 18 * 19 * + Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the 21 * following disclaimer in the documentation and/or other 22 * materials provided with the distribution. 23 * 24 * + Neither the name of Advanced Micro Devices, Inc. nor the 25 * names of its contributors may be used to endorse or 26 * promote products derived from this software without 27 * specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 32 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 33 * DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, 34 * INC. OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 35 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 36 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 37 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 39 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 41 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 42 * POSSIBILITY OF SUCH DAMAGE. 43 * 44 * It is licensee's responsibility to comply with any export 45 * regulations applicable in licensee's jurisdiction. 46 */ 47 48 .file "strcpy.s" 49 50#include "SYS.h" 51#include "cache.h" 52 53#define LABEL(s) .strcpy/**/s 54 55#ifdef USE_AS_STRNCPY 56 ENTRY(strncpy) 57#else 58 ENTRY(strcpy) /* (char *, const char *) */ 59#endif 60 61#ifdef USE_AS_STRNCPY 62 test %rdx, %rdx /* (char *, const char *, size_t) */ 63 mov %rdx, %r11 64 jz LABEL(exitn) /* early exit */ 65#endif 66 67 xor %edx, %edx 68 69LABEL(aligntry): 70 mov %rsi, %r8 /* align by source */ 71 and $7, %r8 72 jz LABEL(alignafter) 73 74LABEL(align): /* 8-byte align */ 75 sub $8, %r8 76 77 .p2align 4 78 79LABEL(alignloop): 80#ifdef USE_AS_STRNCPY 81 dec %r11 82 jl LABEL(exitn) 83#endif 84 85 mov (%rsi, %rdx), %al /* check if same character */ 86 test %al, %al /* check if character a NUL */ 87 mov %al, (%rdi, %rdx) 88 jz LABEL(exit) 89 90 inc %edx 91 inc %r8 92 jnz LABEL(alignloop) 93 94 .p2align 4 95 96LABEL(alignafter): 97 98LABEL(8try): 99 mov $0xfefefefefefefeff, %rcx 100 101LABEL(8): /* 8-byte */ 102 mov (%rsi, %rdx), %rax 103 104LABEL(8loop): 105#ifdef USE_AS_STRNCPY 106 sub $8, %r11 107 jl LABEL(tail) 108#endif 109 110 mov %rcx, %r8 111 add %rax, %r8 112 sbb %r10, %r10 113 114 xor %rax, %r8 115 or %rcx, %r8 116 sub %r10, %r8 117 jnz LABEL(tail) 118 119 mov %rax, (%rdi, %rdx) 120 mov 8 (%rsi, %rdx), %rax 121 add $8, %edx 122 123#ifdef USE_AS_STRNCPY 124 sub $8, %r11 125 jl LABEL(tail) 126#endif 127 128 mov %rcx, %r8 129 add %rax, %r8 130 sbb %r10, %r10 131 132 xor %rax, %r8 133 or %rcx, %r8 134 sub %r10, %r8 135 jnz LABEL(tail) 136 137 mov %rax, (%rdi, %rdx) 138 mov 8 (%rsi, %rdx), %rax 139 add $8, %edx 140 141#ifdef USE_AS_STRNCPY 142 sub $8, %r11 143 jl LABEL(tail) 144#endif 145 146 mov %rcx, %r8 147 add %rax, %r8 148 sbb %r10, %r10 149 150 xor %rax, %r8 151 or %rcx, %r8 152 sub %r10, %r8 153 jnz LABEL(tail) 154 155 mov %rax, (%rdi, %rdx) 156 mov 8 (%rsi, %rdx), %rax 157 add $8, %edx 158 159#ifdef USE_AS_STRNCPY 160 sub $8, %r11 161 jl LABEL(tail) 162#endif 163 164 mov %rcx, %r8 165 add %rax, %r8 166 sbb %r10, %r10 167 168 xor %rax, %r8 169 or %rcx, %r8 170 sub %r10, %r8 171 jnz LABEL(tail) 172 173 mov %rax, (%rdi, %rdx) 174 mov 8 (%rsi, %rdx), %rax 175 add $8, %edx 176 177#ifdef USE_AS_STRNCPY 178 sub $8, %r11 179 jl LABEL(tail) 180#endif 181 182 mov %rcx, %r8 183 add %rax, %r8 184 sbb %r10, %r10 185 186 xor %rax, %r8 187 or %rcx, %r8 188 sub %r10, %r8 189 jnz LABEL(tail) 190 191 mov %rax, (%rdi, %rdx) 192 mov 8 (%rsi, %rdx), %rax 193 add $8, %edx 194 195#ifdef USE_AS_STRNCPY 196 sub $8, %r11 197 jl LABEL(tail) 198#endif 199 200 mov %rcx, %r8 201 add %rax, %r8 202 sbb %r10, %r10 203 204 xor %rax, %r8 205 or %rcx, %r8 206 sub %r10, %r8 207 jnz LABEL(tail) 208 209 mov %rax, (%rdi, %rdx) 210 mov 8 (%rsi, %rdx), %rax 211 add $8, %edx 212 213#ifdef USE_AS_STRNCPY 214 sub $8, %r11 215 jl LABEL(tail) 216#endif 217 218 mov %rcx, %r8 219 add %rax, %r8 220 sbb %r10, %r10 221 222 xor %rax, %r8 223 or %rcx, %r8 224 sub %r10, %r8 225 jnz LABEL(tail) 226 227 mov %rax, (%rdi, %rdx) 228 mov 8 (%rsi, %rdx), %rax 229 add $8, %edx 230 231#ifdef USE_AS_STRNCPY 232 sub $8, %r11 233 jl LABEL(tail) 234#endif 235 236 mov %rcx, %r8 237 add %rax, %r8 238 sbb %r10, %r10 239 240 xor %rax, %r8 241 or %rcx, %r8 242 sub %r10, %r8 243 jnz LABEL(tail) 244 245 mov %rax, (%rdi, %rdx) 246 mov 8 (%rsi, %rdx), %rax 247 add $8, %edx 248 249LABEL(8after): 250 251LABEL(64try): 252 mov _sref_(.amd64cache1half), %r9 253 254LABEL(64): /* 64-byte */ 255 256 .p2align 4 257 258LABEL(64loop): 259#ifdef USE_AS_STRNCPY 260 sub $8, %r11 261 jl LABEL(tail) 262#endif 263 264 mov %rcx, %r8 265 add %rax, %r8 266 sbb %r10, %r10 267 268 xor %rax, %r8 269 or %rcx, %r8 270 sub %r10, %r8 271 jnz LABEL(tail) 272 273 mov %rax, (%rdi, %rdx) 274 mov 8 (%rsi, %rdx), %rax 275 add $8, %edx 276 277#ifdef USE_AS_STRNCPY 278 sub $8, %r11 279 jl LABEL(tail) 280#endif 281 282 mov %rcx, %r8 283 add %rax, %r8 284 sbb %r10, %r10 285 286 xor %rax, %r8 287 or %rcx, %r8 288 sub %r10, %r8 289 jnz LABEL(tail) 290 291 mov %rax, (%rdi, %rdx) 292 mov 8 (%rsi, %rdx), %rax 293 add $8, %edx 294 295#ifdef USE_AS_STRNCPY 296 sub $8, %r11 297 jl LABEL(tail) 298#endif 299 300 mov %rcx, %r8 301 add %rax, %r8 302 sbb %r10, %r10 303 304 xor %rax, %r8 305 or %rcx, %r8 306 sub %r10, %r8 307 jnz LABEL(tail) 308 309 mov %rax, (%rdi, %rdx) 310 mov 8 (%rsi, %rdx), %rax 311 add $8, %edx 312 313#ifdef USE_AS_STRNCPY 314 sub $8, %r11 315 jl LABEL(tail) 316#endif 317 318 mov %rcx, %r8 319 add %rax, %r8 320 sbb %r10, %r10 321 322 xor %rax, %r8 323 or %rcx, %r8 324 sub %r10, %r8 325 jnz LABEL(tail) 326 327 mov %rax, (%rdi, %rdx) 328 mov 8 (%rsi, %rdx), %rax 329 add $8, %edx 330 331#ifdef USE_AS_STRNCPY 332 sub $8, %r11 333 jl LABEL(tail) 334#endif 335 336 mov %rcx, %r8 337 add %rax, %r8 338 sbb %r10, %r10 339 340 xor %rax, %r8 341 or %rcx, %r8 342 sub %r10, %r8 343 jnz LABEL(tail) 344 345 mov %rax, (%rdi, %rdx) 346 mov 8 (%rsi, %rdx), %rax 347 add $8, %edx 348 349#ifdef USE_AS_STRNCPY 350 sub $8, %r11 351 jl LABEL(tail) 352#endif 353 354 mov %rcx, %r8 355 add %rax, %r8 356 sbb %r10, %r10 357 358 xor %rax, %r8 359 or %rcx, %r8 360 sub %r10, %r8 361 jnz LABEL(tail) 362 363 mov %rax, (%rdi, %rdx) 364 mov 8 (%rsi, %rdx), %rax 365 add $8, %edx 366 367#ifdef USE_AS_STRNCPY 368 sub $8, %r11 369 jl LABEL(tail) 370#endif 371 372 mov %rcx, %r8 373 add %rax, %r8 374 sbb %r10, %r10 375 376 xor %rax, %r8 377 or %rcx, %r8 378 sub %r10, %r8 379 jnz LABEL(tail) 380 381 mov %rax, (%rdi, %rdx) 382 mov 8 (%rsi, %rdx), %rax 383 add $8, %edx 384 385#ifdef USE_AS_STRNCPY 386 sub $8, %r11 387 jl LABEL(tail) 388#endif 389 390 mov %rcx, %r8 391 add %rax, %r8 392 sbb %r10, %r10 393 394 xor %rax, %r8 395 or %rcx, %r8 396 sub %r10, %r8 397 jnz LABEL(tail) 398 399 cmp %r9, %rdx 400 401 mov %rax, (%rdi, %rdx) 402 mov 8 (%rsi, %rdx), %rax 403 lea 8 (%rdx), %rdx 404 405 jbe LABEL(64loop) 406 407LABEL(64after): 408 409LABEL(pretry): 410 mov _sref_(.amd64cache2half), %r9 411 412LABEL(pre): /* 64-byte prefetch */ 413 414 .p2align 4 415 416LABEL(preloop): 417#ifdef USE_AS_STRNCPY 418 sub $8, %r11 419 jl LABEL(tail) 420#endif 421 422 mov %rcx, %r8 423 add %rax, %r8 424 sbb %r10, %r10 425 426 xor %rax, %r8 427 or %rcx, %r8 428 sub %r10, %r8 429 jnz LABEL(tail) 430 431 mov %rax, (%rdi, %rdx) 432 mov 8 (%rsi, %rdx), %rax 433 add $8, %edx 434 435#ifdef USE_AS_STRNCPY 436 sub $8, %r11 437 jl LABEL(tail) 438#endif 439 440 mov %rcx, %r8 441 add %rax, %r8 442 sbb %r10, %r10 443 444 xor %rax, %r8 445 or %rcx, %r8 446 sub %r10, %r8 447 jnz LABEL(tail) 448 449 mov %rax, (%rdi, %rdx) 450 mov 8 (%rsi, %rdx), %rax 451 add $8, %edx 452 453#ifdef USE_AS_STRNCPY 454 sub $8, %r11 455 jl LABEL(tail) 456#endif 457 458 mov %rcx, %r8 459 add %rax, %r8 460 sbb %r10, %r10 461 462 xor %rax, %r8 463 or %rcx, %r8 464 sub %r10, %r8 465 jnz LABEL(tail) 466 467 mov %rax, (%rdi, %rdx) 468 mov 8 (%rsi, %rdx), %rax 469 add $8, %edx 470 471#ifdef USE_AS_STRNCPY 472 sub $8, %r11 473 jl LABEL(tail) 474#endif 475 476 mov %rcx, %r8 477 add %rax, %r8 478 sbb %r10, %r10 479 480 xor %rax, %r8 481 or %rcx, %r8 482 sub %r10, %r8 483 jnz LABEL(tail) 484 485 mov %rax, (%rdi, %rdx) 486 mov 8 (%rsi, %rdx), %rax 487 add $8, %edx 488 489#ifdef USE_AS_STRNCPY 490 sub $8, %r11 491 jl LABEL(tail) 492#endif 493 494 mov %rcx, %r8 495 add %rax, %r8 496 sbb %r10, %r10 497 498 xor %rax, %r8 499 or %rcx, %r8 500 sub %r10, %r8 501 jnz LABEL(tail) 502 503 mov %rax, (%rdi, %rdx) 504 mov 8 (%rsi, %rdx), %rax 505 add $8, %edx 506 507#ifdef USE_AS_STRNCPY 508 sub $8, %r11 509 jl LABEL(tail) 510#endif 511 512 mov %rcx, %r8 513 add %rax, %r8 514 sbb %r10, %r10 515 516 xor %rax, %r8 517 or %rcx, %r8 518 sub %r10, %r8 519 jnz LABEL(tail) 520 521 mov %rax, (%rdi, %rdx) 522 mov 8 (%rsi, %rdx), %rax 523 add $8, %edx 524 525#ifdef USE_AS_STRNCPY 526 sub $8, %r11 527 jl LABEL(tail) 528#endif 529 530 mov %rcx, %r8 531 add %rax, %r8 532 sbb %r10, %r10 533 534 xor %rax, %r8 535 or %rcx, %r8 536 sub %r10, %r8 537 jnz LABEL(tail) 538 539 mov %rax, (%rdi, %rdx) 540 mov 8 (%rsi, %rdx), %rax 541 add $8, %edx 542 543#ifdef USE_AS_STRNCPY 544 sub $8, %r11 545 jl LABEL(tail) 546#endif 547 548 mov %rcx, %r8 549 add %rax, %r8 550 sbb %r10, %r10 551 552 xor %rax, %r8 553 or %rcx, %r8 554 sub %r10, %r8 555 jnz LABEL(tail) 556 557 cmp %r9, %rdx 558 559 mov %rax, (%rdi, %rdx) 560 prefetchnta 512 + 8 (%rdi, %rdx) /* 3DNow: use prefetchw */ 561 mov 8 (%rsi, %rdx), %rax 562 prefetchnta 512 + 8 (%rsi, %rdx) /* 3DNow: use prefetch */ 563 lea 8 (%rdx), %rdx 564 565 jb LABEL(preloop) 566 567 .p2align 4 568 569LABEL(preafter): 570 571LABEL(NTtry): 572 mfence 573 574LABEL(NT): /* 64-byte NT */ 575 576 .p2align 4 577 578LABEL(NTloop): 579#ifdef USE_AS_STRNCPY 580 sub $8, %r11 581 jl LABEL(tail) 582#endif 583 584 mov %rcx, %r8 585 add %rax, %r8 586 sbb %r10, %r10 587 588 xor %rax, %r8 589 or %rcx, %r8 590 sub %r10, %r8 591 jnz LABEL(NTtail) 592 593 movnti %rax, (%rdi, %rdx) 594 mov 8 (%rsi, %rdx), %rax 595 add $8, %rdx 596 597#ifdef USE_AS_STRNCPY 598 sub $8, %r11 599 jl LABEL(tail) 600#endif 601 602 mov %rcx, %r8 603 add %rax, %r8 604 sbb %r10, %r10 605 606 xor %rax, %r8 607 or %rcx, %r8 608 sub %r10, %r8 609 jnz LABEL(NTtail) 610 611 movnti %rax, (%rdi, %rdx) 612 mov 8 (%rsi, %rdx), %rax 613 add $8, %rdx 614 615#ifdef USE_AS_STRNCPY 616 sub $8, %r11 617 jl LABEL(tail) 618#endif 619 620 mov %rcx, %r8 621 add %rax, %r8 622 sbb %r10, %r10 623 624 xor %rax, %r8 625 or %rcx, %r8 626 sub %r10, %r8 627 jnz LABEL(NTtail) 628 629 movnti %rax, (%rdi, %rdx) 630 mov 8 (%rsi, %rdx), %rax 631 add $8, %rdx 632 633#ifdef USE_AS_STRNCPY 634 sub $8, %r11 635 jl LABEL(tail) 636#endif 637 638 mov %rcx, %r8 639 add %rax, %r8 640 sbb %r10, %r10 641 642 xor %rax, %r8 643 or %rcx, %r8 644 sub %r10, %r8 645 jnz LABEL(NTtail) 646 647 movnti %rax, (%rdi, %rdx) 648 mov 8 (%rsi, %rdx), %rax 649 add $8, %rdx 650 651#ifdef USE_AS_STRNCPY 652 sub $8, %r11 653 jl LABEL(tail) 654#endif 655 656 mov %rcx, %r8 657 add %rax, %r8 658 sbb %r10, %r10 659 660 xor %rax, %r8 661 or %rcx, %r8 662 sub %r10, %r8 663 jnz LABEL(NTtail) 664 665 movnti %rax, (%rdi, %rdx) 666 mov 8 (%rsi, %rdx), %rax 667 add $8, %rdx 668 669#ifdef USE_AS_STRNCPY 670 sub $8, %r11 671 jl LABEL(tail) 672#endif 673 674 mov %rcx, %r8 675 add %rax, %r8 676 sbb %r10, %r10 677 678 xor %rax, %r8 679 or %rcx, %r8 680 sub %r10, %r8 681 jnz LABEL(NTtail) 682 683 movnti %rax, (%rdi, %rdx) 684 mov 8 (%rsi, %rdx), %rax 685 add $8, %rdx 686 687#ifdef USE_AS_STRNCPY 688 sub $8, %r11 689 jl LABEL(tail) 690#endif 691 692 mov %rcx, %r8 693 add %rax, %r8 694 sbb %r10, %r10 695 696 xor %rax, %r8 697 or %rcx, %r8 698 sub %r10, %r8 699 jnz LABEL(NTtail) 700 701 movnti %rax, (%rdi, %rdx) 702 mov 8 (%rsi, %rdx), %rax 703 add $8, %rdx 704 705#ifdef USE_AS_STRNCPY 706 sub $8, %r11 707 jl LABEL(tail) 708#endif 709 710 mov %rcx, %r8 711 add %rax, %r8 712 sbb %r10, %r10 713 714 xor %rax, %r8 715 or %rcx, %r8 716 sub %r10, %r8 717 jnz LABEL(NTtail) 718 719 movnti %rax, (%rdi, %rdx) 720 mov 8 (%rsi, %rdx), %rax 721 prefetchnta 768 + 8 (%rsi, %rdx) 722 add $8, %rdx 723 724 jmp LABEL(NTloop) 725 726 .p2align 4 727 728LABEL(NTtail): 729 mfence 730 731 .p2align 4 732 733LABEL(NTafter): 734 735LABEL(tailtry): 736 737LABEL(tail): /* 1-byte tail */ 738#ifdef USE_AS_STRNCPY 739 add $8, %r11 740#endif 741 742 .p2align 4 743 744LABEL(tailloop): 745#ifdef USE_AS_STRNCPY 746 dec %r11 747 jl LABEL(exitn) 748#endif 749 750 test %al, %al 751 mov %al, (%rdi, %rdx) 752 jz LABEL(exit) 753 754 inc %rdx 755 756#ifdef USE_AS_STRNCPY 757 dec %r11 758 jl LABEL(exitn) 759 760 mov %ah, %al 761#endif 762 763 test %ah, %ah 764 mov %ah, (%rdi, %rdx) 765 jz LABEL(exit) 766 767 inc %rdx 768 769#ifdef USE_AS_STRNCPY 770 dec %r11 771 jl LABEL(exitn) 772#endif 773 774 shr $16, %rax 775 776 test %al, %al 777 mov %al, (%rdi, %rdx) 778 jz LABEL(exit) 779 780 inc %rdx 781 782#ifdef USE_AS_STRNCPY 783 dec %r11 784 jl LABEL(exitn) 785 786 mov %ah, %al 787#endif 788 789 test %ah, %ah 790 mov %ah, (%rdi, %rdx) 791 jz LABEL(exit) 792 793 shr $16, %rax 794 inc %rdx 795 796 jmp LABEL(tailloop) 797 798 .p2align 4 799 800LABEL(tailafter): 801 802LABEL(exit): 803#ifdef USE_AS_STRNCPY 804 test %r11, %r11 805 mov %r11, %rcx 806 807#ifdef USE_AS_STPCPY 808 lea (%rdi, %rdx), %r8 809#else 810 mov %rdi, %r8 811#endif 812 813 jz 2f 814 815 xor %eax, %eax /* bzero () would do too, but usually there are only a handfull of bytes left */ 816 shr $3, %rcx 817 lea 1 (%rdi, %rdx), %rdi 818 jz 1f 819 820 rep stosq 821 8221: 823 mov %r11d, %ecx 824 and $7, %ecx 825 jz 2f 826 827 .p2align 4,, 3 828 8293: 830 dec %ecx 831 mov %al, (%rdi, %rcx) 832 jnz 3b 833 834 .p2align 4,, 3 835 8362: 837 mov %r8, %rax 838 ret 839 840#endif 841 842 .p2align 4 843 844LABEL(exitn): 845#ifdef USE_AS_STPCPY 846 lea (%rdi, %rdx), %rax 847#else 848 mov %rdi, %rax 849#endif 850 851 ret 852 853#ifdef USE_AS_STRNCPY 854 SET_SIZE(strncpy) 855#else 856 SET_SIZE(strcpy) /* (char *, const char *) */ 857#endif 858