1/* 2 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6/* 7 * Copyright (c) 2002 Advanced Micro Devices, Inc. 8 * 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the 13 * following conditions are met: 14 * 15 * + Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the 17 * following disclaimer. 18 * 19 * + Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the 21 * following disclaimer in the documentation and/or other 22 * materials provided with the distribution. 23 * 24 * + Neither the name of Advanced Micro Devices, Inc. nor the 25 * names of its contributors may be used to endorse or 26 * promote products derived from this software without 27 * specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 32 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 33 * DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, 34 * INC. OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 35 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 36 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 37 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 39 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 41 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 42 * POSSIBILITY OF SUCH DAMAGE. 43 * 44 * It is licensee's responsibility to comply with any export 45 * regulations applicable in licensee's jurisdiction. 46 */ 47 48 .ident "%Z%%M% %I% %E% SMI" 49 50 .file "%M%" 51 52#include "SYS.h" 53#include "cache.h" 54 55#define LABEL(s) .strcpy/**/s 56 57#ifdef USE_AS_STRNCPY 58 ENTRY(strncpy) 59#else 60 ENTRY(strcpy) /* (char *, const char *) */ 61#endif 62 63#ifdef USE_AS_STRNCPY 64 test %rdx, %rdx /* (char *, const char *, size_t) */ 65 mov %rdx, %r11 66 jz LABEL(exitn) /* early exit */ 67#endif 68 69 xor %edx, %edx 70 71LABEL(aligntry): 72 mov %rsi, %r8 /* align by source */ 73 and $7, %r8 74 jz LABEL(alignafter) 75 76LABEL(align): /* 8-byte align */ 77 sub $8, %r8 78 79 .p2align 4 80 81LABEL(alignloop): 82#ifdef USE_AS_STRNCPY 83 dec %r11 84 jl LABEL(exitn) 85#endif 86 87 mov (%rsi, %rdx), %al /* check if same character */ 88 test %al, %al /* check if character a NUL */ 89 mov %al, (%rdi, %rdx) 90 jz LABEL(exit) 91 92 inc %edx 93 inc %r8 94 jnz LABEL(alignloop) 95 96 .p2align 4 97 98LABEL(alignafter): 99 100LABEL(8try): 101 mov $0xfefefefefefefeff, %rcx 102 103LABEL(8): /* 8-byte */ 104 mov (%rsi, %rdx), %rax 105 106LABEL(8loop): 107#ifdef USE_AS_STRNCPY 108 sub $8, %r11 109 jl LABEL(tail) 110#endif 111 112 mov %rcx, %r8 113 add %rax, %r8 114 sbb %r10, %r10 115 116 xor %rax, %r8 117 or %rcx, %r8 118 sub %r10, %r8 119 jnz LABEL(tail) 120 121 mov %rax, (%rdi, %rdx) 122 mov 8 (%rsi, %rdx), %rax 123 add $8, %edx 124 125#ifdef USE_AS_STRNCPY 126 sub $8, %r11 127 jl LABEL(tail) 128#endif 129 130 mov %rcx, %r8 131 add %rax, %r8 132 sbb %r10, %r10 133 134 xor %rax, %r8 135 or %rcx, %r8 136 sub %r10, %r8 137 jnz LABEL(tail) 138 139 mov %rax, (%rdi, %rdx) 140 mov 8 (%rsi, %rdx), %rax 141 add $8, %edx 142 143#ifdef USE_AS_STRNCPY 144 sub $8, %r11 145 jl LABEL(tail) 146#endif 147 148 mov %rcx, %r8 149 add %rax, %r8 150 sbb %r10, %r10 151 152 xor %rax, %r8 153 or %rcx, %r8 154 sub %r10, %r8 155 jnz LABEL(tail) 156 157 mov %rax, (%rdi, %rdx) 158 mov 8 (%rsi, %rdx), %rax 159 add $8, %edx 160 161#ifdef USE_AS_STRNCPY 162 sub $8, %r11 163 jl LABEL(tail) 164#endif 165 166 mov %rcx, %r8 167 add %rax, %r8 168 sbb %r10, %r10 169 170 xor %rax, %r8 171 or %rcx, %r8 172 sub %r10, %r8 173 jnz LABEL(tail) 174 175 mov %rax, (%rdi, %rdx) 176 mov 8 (%rsi, %rdx), %rax 177 add $8, %edx 178 179#ifdef USE_AS_STRNCPY 180 sub $8, %r11 181 jl LABEL(tail) 182#endif 183 184 mov %rcx, %r8 185 add %rax, %r8 186 sbb %r10, %r10 187 188 xor %rax, %r8 189 or %rcx, %r8 190 sub %r10, %r8 191 jnz LABEL(tail) 192 193 mov %rax, (%rdi, %rdx) 194 mov 8 (%rsi, %rdx), %rax 195 add $8, %edx 196 197#ifdef USE_AS_STRNCPY 198 sub $8, %r11 199 jl LABEL(tail) 200#endif 201 202 mov %rcx, %r8 203 add %rax, %r8 204 sbb %r10, %r10 205 206 xor %rax, %r8 207 or %rcx, %r8 208 sub %r10, %r8 209 jnz LABEL(tail) 210 211 mov %rax, (%rdi, %rdx) 212 mov 8 (%rsi, %rdx), %rax 213 add $8, %edx 214 215#ifdef USE_AS_STRNCPY 216 sub $8, %r11 217 jl LABEL(tail) 218#endif 219 220 mov %rcx, %r8 221 add %rax, %r8 222 sbb %r10, %r10 223 224 xor %rax, %r8 225 or %rcx, %r8 226 sub %r10, %r8 227 jnz LABEL(tail) 228 229 mov %rax, (%rdi, %rdx) 230 mov 8 (%rsi, %rdx), %rax 231 add $8, %edx 232 233#ifdef USE_AS_STRNCPY 234 sub $8, %r11 235 jl LABEL(tail) 236#endif 237 238 mov %rcx, %r8 239 add %rax, %r8 240 sbb %r10, %r10 241 242 xor %rax, %r8 243 or %rcx, %r8 244 sub %r10, %r8 245 jnz LABEL(tail) 246 247 mov %rax, (%rdi, %rdx) 248 mov 8 (%rsi, %rdx), %rax 249 add $8, %edx 250 251LABEL(8after): 252 253LABEL(64try): 254 mov _sref_(.amd64cache1half), %r9 255 256LABEL(64): /* 64-byte */ 257 258 .p2align 4 259 260LABEL(64loop): 261#ifdef USE_AS_STRNCPY 262 sub $8, %r11 263 jl LABEL(tail) 264#endif 265 266 mov %rcx, %r8 267 add %rax, %r8 268 sbb %r10, %r10 269 270 xor %rax, %r8 271 or %rcx, %r8 272 sub %r10, %r8 273 jnz LABEL(tail) 274 275 mov %rax, (%rdi, %rdx) 276 mov 8 (%rsi, %rdx), %rax 277 add $8, %edx 278 279#ifdef USE_AS_STRNCPY 280 sub $8, %r11 281 jl LABEL(tail) 282#endif 283 284 mov %rcx, %r8 285 add %rax, %r8 286 sbb %r10, %r10 287 288 xor %rax, %r8 289 or %rcx, %r8 290 sub %r10, %r8 291 jnz LABEL(tail) 292 293 mov %rax, (%rdi, %rdx) 294 mov 8 (%rsi, %rdx), %rax 295 add $8, %edx 296 297#ifdef USE_AS_STRNCPY 298 sub $8, %r11 299 jl LABEL(tail) 300#endif 301 302 mov %rcx, %r8 303 add %rax, %r8 304 sbb %r10, %r10 305 306 xor %rax, %r8 307 or %rcx, %r8 308 sub %r10, %r8 309 jnz LABEL(tail) 310 311 mov %rax, (%rdi, %rdx) 312 mov 8 (%rsi, %rdx), %rax 313 add $8, %edx 314 315#ifdef USE_AS_STRNCPY 316 sub $8, %r11 317 jl LABEL(tail) 318#endif 319 320 mov %rcx, %r8 321 add %rax, %r8 322 sbb %r10, %r10 323 324 xor %rax, %r8 325 or %rcx, %r8 326 sub %r10, %r8 327 jnz LABEL(tail) 328 329 mov %rax, (%rdi, %rdx) 330 mov 8 (%rsi, %rdx), %rax 331 add $8, %edx 332 333#ifdef USE_AS_STRNCPY 334 sub $8, %r11 335 jl LABEL(tail) 336#endif 337 338 mov %rcx, %r8 339 add %rax, %r8 340 sbb %r10, %r10 341 342 xor %rax, %r8 343 or %rcx, %r8 344 sub %r10, %r8 345 jnz LABEL(tail) 346 347 mov %rax, (%rdi, %rdx) 348 mov 8 (%rsi, %rdx), %rax 349 add $8, %edx 350 351#ifdef USE_AS_STRNCPY 352 sub $8, %r11 353 jl LABEL(tail) 354#endif 355 356 mov %rcx, %r8 357 add %rax, %r8 358 sbb %r10, %r10 359 360 xor %rax, %r8 361 or %rcx, %r8 362 sub %r10, %r8 363 jnz LABEL(tail) 364 365 mov %rax, (%rdi, %rdx) 366 mov 8 (%rsi, %rdx), %rax 367 add $8, %edx 368 369#ifdef USE_AS_STRNCPY 370 sub $8, %r11 371 jl LABEL(tail) 372#endif 373 374 mov %rcx, %r8 375 add %rax, %r8 376 sbb %r10, %r10 377 378 xor %rax, %r8 379 or %rcx, %r8 380 sub %r10, %r8 381 jnz LABEL(tail) 382 383 mov %rax, (%rdi, %rdx) 384 mov 8 (%rsi, %rdx), %rax 385 add $8, %edx 386 387#ifdef USE_AS_STRNCPY 388 sub $8, %r11 389 jl LABEL(tail) 390#endif 391 392 mov %rcx, %r8 393 add %rax, %r8 394 sbb %r10, %r10 395 396 xor %rax, %r8 397 or %rcx, %r8 398 sub %r10, %r8 399 jnz LABEL(tail) 400 401 cmp %r9, %rdx 402 403 mov %rax, (%rdi, %rdx) 404 mov 8 (%rsi, %rdx), %rax 405 lea 8 (%rdx), %rdx 406 407 jbe LABEL(64loop) 408 409LABEL(64after): 410 411LABEL(pretry): 412 mov _sref_(.amd64cache2half), %r9 413 414LABEL(pre): /* 64-byte prefetch */ 415 416 .p2align 4 417 418LABEL(preloop): 419#ifdef USE_AS_STRNCPY 420 sub $8, %r11 421 jl LABEL(tail) 422#endif 423 424 mov %rcx, %r8 425 add %rax, %r8 426 sbb %r10, %r10 427 428 xor %rax, %r8 429 or %rcx, %r8 430 sub %r10, %r8 431 jnz LABEL(tail) 432 433 mov %rax, (%rdi, %rdx) 434 mov 8 (%rsi, %rdx), %rax 435 add $8, %edx 436 437#ifdef USE_AS_STRNCPY 438 sub $8, %r11 439 jl LABEL(tail) 440#endif 441 442 mov %rcx, %r8 443 add %rax, %r8 444 sbb %r10, %r10 445 446 xor %rax, %r8 447 or %rcx, %r8 448 sub %r10, %r8 449 jnz LABEL(tail) 450 451 mov %rax, (%rdi, %rdx) 452 mov 8 (%rsi, %rdx), %rax 453 add $8, %edx 454 455#ifdef USE_AS_STRNCPY 456 sub $8, %r11 457 jl LABEL(tail) 458#endif 459 460 mov %rcx, %r8 461 add %rax, %r8 462 sbb %r10, %r10 463 464 xor %rax, %r8 465 or %rcx, %r8 466 sub %r10, %r8 467 jnz LABEL(tail) 468 469 mov %rax, (%rdi, %rdx) 470 mov 8 (%rsi, %rdx), %rax 471 add $8, %edx 472 473#ifdef USE_AS_STRNCPY 474 sub $8, %r11 475 jl LABEL(tail) 476#endif 477 478 mov %rcx, %r8 479 add %rax, %r8 480 sbb %r10, %r10 481 482 xor %rax, %r8 483 or %rcx, %r8 484 sub %r10, %r8 485 jnz LABEL(tail) 486 487 mov %rax, (%rdi, %rdx) 488 mov 8 (%rsi, %rdx), %rax 489 add $8, %edx 490 491#ifdef USE_AS_STRNCPY 492 sub $8, %r11 493 jl LABEL(tail) 494#endif 495 496 mov %rcx, %r8 497 add %rax, %r8 498 sbb %r10, %r10 499 500 xor %rax, %r8 501 or %rcx, %r8 502 sub %r10, %r8 503 jnz LABEL(tail) 504 505 mov %rax, (%rdi, %rdx) 506 mov 8 (%rsi, %rdx), %rax 507 add $8, %edx 508 509#ifdef USE_AS_STRNCPY 510 sub $8, %r11 511 jl LABEL(tail) 512#endif 513 514 mov %rcx, %r8 515 add %rax, %r8 516 sbb %r10, %r10 517 518 xor %rax, %r8 519 or %rcx, %r8 520 sub %r10, %r8 521 jnz LABEL(tail) 522 523 mov %rax, (%rdi, %rdx) 524 mov 8 (%rsi, %rdx), %rax 525 add $8, %edx 526 527#ifdef USE_AS_STRNCPY 528 sub $8, %r11 529 jl LABEL(tail) 530#endif 531 532 mov %rcx, %r8 533 add %rax, %r8 534 sbb %r10, %r10 535 536 xor %rax, %r8 537 or %rcx, %r8 538 sub %r10, %r8 539 jnz LABEL(tail) 540 541 mov %rax, (%rdi, %rdx) 542 mov 8 (%rsi, %rdx), %rax 543 add $8, %edx 544 545#ifdef USE_AS_STRNCPY 546 sub $8, %r11 547 jl LABEL(tail) 548#endif 549 550 mov %rcx, %r8 551 add %rax, %r8 552 sbb %r10, %r10 553 554 xor %rax, %r8 555 or %rcx, %r8 556 sub %r10, %r8 557 jnz LABEL(tail) 558 559 cmp %r9, %rdx 560 561 mov %rax, (%rdi, %rdx) 562 prefetchnta 512 + 8 (%rdi, %rdx) /* 3DNow: use prefetchw */ 563 mov 8 (%rsi, %rdx), %rax 564 prefetchnta 512 + 8 (%rsi, %rdx) /* 3DNow: use prefetch */ 565 lea 8 (%rdx), %rdx 566 567 jb LABEL(preloop) 568 569 .p2align 4 570 571LABEL(preafter): 572 573LABEL(NTtry): 574 mfence 575 576LABEL(NT): /* 64-byte NT */ 577 578 .p2align 4 579 580LABEL(NTloop): 581#ifdef USE_AS_STRNCPY 582 sub $8, %r11 583 jl LABEL(tail) 584#endif 585 586 mov %rcx, %r8 587 add %rax, %r8 588 sbb %r10, %r10 589 590 xor %rax, %r8 591 or %rcx, %r8 592 sub %r10, %r8 593 jnz LABEL(NTtail) 594 595 movnti %rax, (%rdi, %rdx) 596 mov 8 (%rsi, %rdx), %rax 597 add $8, %rdx 598 599#ifdef USE_AS_STRNCPY 600 sub $8, %r11 601 jl LABEL(tail) 602#endif 603 604 mov %rcx, %r8 605 add %rax, %r8 606 sbb %r10, %r10 607 608 xor %rax, %r8 609 or %rcx, %r8 610 sub %r10, %r8 611 jnz LABEL(NTtail) 612 613 movnti %rax, (%rdi, %rdx) 614 mov 8 (%rsi, %rdx), %rax 615 add $8, %rdx 616 617#ifdef USE_AS_STRNCPY 618 sub $8, %r11 619 jl LABEL(tail) 620#endif 621 622 mov %rcx, %r8 623 add %rax, %r8 624 sbb %r10, %r10 625 626 xor %rax, %r8 627 or %rcx, %r8 628 sub %r10, %r8 629 jnz LABEL(NTtail) 630 631 movnti %rax, (%rdi, %rdx) 632 mov 8 (%rsi, %rdx), %rax 633 add $8, %rdx 634 635#ifdef USE_AS_STRNCPY 636 sub $8, %r11 637 jl LABEL(tail) 638#endif 639 640 mov %rcx, %r8 641 add %rax, %r8 642 sbb %r10, %r10 643 644 xor %rax, %r8 645 or %rcx, %r8 646 sub %r10, %r8 647 jnz LABEL(NTtail) 648 649 movnti %rax, (%rdi, %rdx) 650 mov 8 (%rsi, %rdx), %rax 651 add $8, %rdx 652 653#ifdef USE_AS_STRNCPY 654 sub $8, %r11 655 jl LABEL(tail) 656#endif 657 658 mov %rcx, %r8 659 add %rax, %r8 660 sbb %r10, %r10 661 662 xor %rax, %r8 663 or %rcx, %r8 664 sub %r10, %r8 665 jnz LABEL(NTtail) 666 667 movnti %rax, (%rdi, %rdx) 668 mov 8 (%rsi, %rdx), %rax 669 add $8, %rdx 670 671#ifdef USE_AS_STRNCPY 672 sub $8, %r11 673 jl LABEL(tail) 674#endif 675 676 mov %rcx, %r8 677 add %rax, %r8 678 sbb %r10, %r10 679 680 xor %rax, %r8 681 or %rcx, %r8 682 sub %r10, %r8 683 jnz LABEL(NTtail) 684 685 movnti %rax, (%rdi, %rdx) 686 mov 8 (%rsi, %rdx), %rax 687 add $8, %rdx 688 689#ifdef USE_AS_STRNCPY 690 sub $8, %r11 691 jl LABEL(tail) 692#endif 693 694 mov %rcx, %r8 695 add %rax, %r8 696 sbb %r10, %r10 697 698 xor %rax, %r8 699 or %rcx, %r8 700 sub %r10, %r8 701 jnz LABEL(NTtail) 702 703 movnti %rax, (%rdi, %rdx) 704 mov 8 (%rsi, %rdx), %rax 705 add $8, %rdx 706 707#ifdef USE_AS_STRNCPY 708 sub $8, %r11 709 jl LABEL(tail) 710#endif 711 712 mov %rcx, %r8 713 add %rax, %r8 714 sbb %r10, %r10 715 716 xor %rax, %r8 717 or %rcx, %r8 718 sub %r10, %r8 719 jnz LABEL(NTtail) 720 721 movnti %rax, (%rdi, %rdx) 722 mov 8 (%rsi, %rdx), %rax 723 prefetchnta 768 + 8 (%rsi, %rdx) 724 add $8, %rdx 725 726 jmp LABEL(NTloop) 727 728 .p2align 4 729 730LABEL(NTtail): 731 mfence 732 733 .p2align 4 734 735LABEL(NTafter): 736 737LABEL(tailtry): 738 739LABEL(tail): /* 1-byte tail */ 740#ifdef USE_AS_STRNCPY 741 add $8, %r11 742#endif 743 744 .p2align 4 745 746LABEL(tailloop): 747#ifdef USE_AS_STRNCPY 748 dec %r11 749 jl LABEL(exitn) 750#endif 751 752 test %al, %al 753 mov %al, (%rdi, %rdx) 754 jz LABEL(exit) 755 756 inc %rdx 757 758#ifdef USE_AS_STRNCPY 759 dec %r11 760 jl LABEL(exitn) 761 762 mov %ah, %al 763#endif 764 765 test %ah, %ah 766 mov %ah, (%rdi, %rdx) 767 jz LABEL(exit) 768 769 inc %rdx 770 771#ifdef USE_AS_STRNCPY 772 dec %r11 773 jl LABEL(exitn) 774#endif 775 776 shr $16, %rax 777 778 test %al, %al 779 mov %al, (%rdi, %rdx) 780 jz LABEL(exit) 781 782 inc %rdx 783 784#ifdef USE_AS_STRNCPY 785 dec %r11 786 jl LABEL(exitn) 787 788 mov %ah, %al 789#endif 790 791 test %ah, %ah 792 mov %ah, (%rdi, %rdx) 793 jz LABEL(exit) 794 795 shr $16, %rax 796 inc %rdx 797 798 jmp LABEL(tailloop) 799 800 .p2align 4 801 802LABEL(tailafter): 803 804LABEL(exit): 805#ifdef USE_AS_STRNCPY 806 test %r11, %r11 807 mov %r11, %rcx 808 809#ifdef USE_AS_STPCPY 810 lea (%rdi, %rdx), %r8 811#else 812 mov %rdi, %r8 813#endif 814 815 jz 2f 816 817 xor %eax, %eax /* bzero () would do too, but usually there are only a handfull of bytes left */ 818 shr $3, %rcx 819 lea 1 (%rdi, %rdx), %rdi 820 jz 1f 821 822 rep stosq 823 8241: 825 mov %r11d, %ecx 826 and $7, %ecx 827 jz 2f 828 829 .p2align 4,, 3 830 8313: 832 dec %ecx 833 mov %al, (%rdi, %rcx) 834 jnz 3b 835 836 .p2align 4,, 3 837 8382: 839 mov %r8, %rax 840 ret 841 842#endif 843 844 .p2align 4 845 846LABEL(exitn): 847#ifdef USE_AS_STPCPY 848 lea (%rdi, %rdx), %rax 849#else 850 mov %rdi, %rax 851#endif 852 853 ret 854 855#ifdef USE_AS_STRNCPY 856 SET_SIZE(strncpy) 857#else 858 SET_SIZE(strcpy) /* (char *, const char *) */ 859#endif 860