1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22/* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27/* 28 * Copyright (c) 2002 Advanced Micro Devices, Inc. 29 * 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or 33 * without modification, are permitted provided that the 34 * following conditions are met: 35 * 36 * + Redistributions of source code must retain the above 37 * copyright notice, this list of conditions and the 38 * following disclaimer. 39 * 40 * + Redistributions in binary form must reproduce the above 41 * copyright notice, this list of conditions and the 42 * following disclaimer in the documentation and/or other 43 * materials provided with the distribution. 44 * 45 * + Neither the name of Advanced Micro Devices, Inc. nor the 46 * names of its contributors may be used to endorse or 47 * promote products derived from this software without 48 * specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 51 * CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, 52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 54 * DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, 55 * INC. OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 56 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 57 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 58 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 59 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 60 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 61 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 62 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 63 * POSSIBILITY OF SUCH DAMAGE. 64 * 65 * It is licensee's responsibility to comply with any export 66 * regulations applicable in licensee's jurisdiction. 67 */ 68 69 .ident "%Z%%M% %I% %E% SMI" 70 71 .file "%M%" 72 73#include "SYS.h" 74#include "cache.h" 75 76#define LABEL(s) .strcpy/**/s 77 78#ifdef USE_AS_STRNCPY 79 ENTRY(strncpy) 80#else 81 ENTRY(strcpy) /* (char *, const char *) */ 82#endif 83 84#ifdef USE_AS_STRNCPY 85 test %rdx, %rdx /* (char *, const char *, size_t) */ 86 mov %rdx, %r11 87 jz LABEL(exitn) /* early exit */ 88#endif 89 90 xor %edx, %edx 91 92LABEL(aligntry): 93 mov %rsi, %r8 /* align by source */ 94 and $7, %r8 95 jz LABEL(alignafter) 96 97LABEL(align): /* 8-byte align */ 98 sub $8, %r8 99 100 .p2align 4 101 102LABEL(alignloop): 103#ifdef USE_AS_STRNCPY 104 dec %r11 105 jl LABEL(exitn) 106#endif 107 108 mov (%rsi, %rdx), %al /* check if same character */ 109 test %al, %al /* check if character a NUL */ 110 mov %al, (%rdi, %rdx) 111 jz LABEL(exit) 112 113 inc %edx 114 inc %r8 115 jnz LABEL(alignloop) 116 117 .p2align 4 118 119LABEL(alignafter): 120 121LABEL(8try): 122 mov $0xfefefefefefefeff, %rcx 123 124LABEL(8): /* 8-byte */ 125 mov (%rsi, %rdx), %rax 126 127LABEL(8loop): 128#ifdef USE_AS_STRNCPY 129 sub $8, %r11 130 jl LABEL(tail) 131#endif 132 133 mov %rcx, %r8 134 add %rax, %r8 135 sbb %r10, %r10 136 137 xor %rax, %r8 138 or %rcx, %r8 139 sub %r10, %r8 140 jnz LABEL(tail) 141 142 mov %rax, (%rdi, %rdx) 143 mov 8 (%rsi, %rdx), %rax 144 add $8, %edx 145 146#ifdef USE_AS_STRNCPY 147 sub $8, %r11 148 jl LABEL(tail) 149#endif 150 151 mov %rcx, %r8 152 add %rax, %r8 153 sbb %r10, %r10 154 155 xor %rax, %r8 156 or %rcx, %r8 157 sub %r10, %r8 158 jnz LABEL(tail) 159 160 mov %rax, (%rdi, %rdx) 161 mov 8 (%rsi, %rdx), %rax 162 add $8, %edx 163 164#ifdef USE_AS_STRNCPY 165 sub $8, %r11 166 jl LABEL(tail) 167#endif 168 169 mov %rcx, %r8 170 add %rax, %r8 171 sbb %r10, %r10 172 173 xor %rax, %r8 174 or %rcx, %r8 175 sub %r10, %r8 176 jnz LABEL(tail) 177 178 mov %rax, (%rdi, %rdx) 179 mov 8 (%rsi, %rdx), %rax 180 add $8, %edx 181 182#ifdef USE_AS_STRNCPY 183 sub $8, %r11 184 jl LABEL(tail) 185#endif 186 187 mov %rcx, %r8 188 add %rax, %r8 189 sbb %r10, %r10 190 191 xor %rax, %r8 192 or %rcx, %r8 193 sub %r10, %r8 194 jnz LABEL(tail) 195 196 mov %rax, (%rdi, %rdx) 197 mov 8 (%rsi, %rdx), %rax 198 add $8, %edx 199 200#ifdef USE_AS_STRNCPY 201 sub $8, %r11 202 jl LABEL(tail) 203#endif 204 205 mov %rcx, %r8 206 add %rax, %r8 207 sbb %r10, %r10 208 209 xor %rax, %r8 210 or %rcx, %r8 211 sub %r10, %r8 212 jnz LABEL(tail) 213 214 mov %rax, (%rdi, %rdx) 215 mov 8 (%rsi, %rdx), %rax 216 add $8, %edx 217 218#ifdef USE_AS_STRNCPY 219 sub $8, %r11 220 jl LABEL(tail) 221#endif 222 223 mov %rcx, %r8 224 add %rax, %r8 225 sbb %r10, %r10 226 227 xor %rax, %r8 228 or %rcx, %r8 229 sub %r10, %r8 230 jnz LABEL(tail) 231 232 mov %rax, (%rdi, %rdx) 233 mov 8 (%rsi, %rdx), %rax 234 add $8, %edx 235 236#ifdef USE_AS_STRNCPY 237 sub $8, %r11 238 jl LABEL(tail) 239#endif 240 241 mov %rcx, %r8 242 add %rax, %r8 243 sbb %r10, %r10 244 245 xor %rax, %r8 246 or %rcx, %r8 247 sub %r10, %r8 248 jnz LABEL(tail) 249 250 mov %rax, (%rdi, %rdx) 251 mov 8 (%rsi, %rdx), %rax 252 add $8, %edx 253 254#ifdef USE_AS_STRNCPY 255 sub $8, %r11 256 jl LABEL(tail) 257#endif 258 259 mov %rcx, %r8 260 add %rax, %r8 261 sbb %r10, %r10 262 263 xor %rax, %r8 264 or %rcx, %r8 265 sub %r10, %r8 266 jnz LABEL(tail) 267 268 mov %rax, (%rdi, %rdx) 269 mov 8 (%rsi, %rdx), %rax 270 add $8, %edx 271 272LABEL(8after): 273 274LABEL(64try): 275 mov _sref_(.amd64cache1half), %r9 276 277LABEL(64): /* 64-byte */ 278 279 .p2align 4 280 281LABEL(64loop): 282#ifdef USE_AS_STRNCPY 283 sub $8, %r11 284 jl LABEL(tail) 285#endif 286 287 mov %rcx, %r8 288 add %rax, %r8 289 sbb %r10, %r10 290 291 xor %rax, %r8 292 or %rcx, %r8 293 sub %r10, %r8 294 jnz LABEL(tail) 295 296 mov %rax, (%rdi, %rdx) 297 mov 8 (%rsi, %rdx), %rax 298 add $8, %edx 299 300#ifdef USE_AS_STRNCPY 301 sub $8, %r11 302 jl LABEL(tail) 303#endif 304 305 mov %rcx, %r8 306 add %rax, %r8 307 sbb %r10, %r10 308 309 xor %rax, %r8 310 or %rcx, %r8 311 sub %r10, %r8 312 jnz LABEL(tail) 313 314 mov %rax, (%rdi, %rdx) 315 mov 8 (%rsi, %rdx), %rax 316 add $8, %edx 317 318#ifdef USE_AS_STRNCPY 319 sub $8, %r11 320 jl LABEL(tail) 321#endif 322 323 mov %rcx, %r8 324 add %rax, %r8 325 sbb %r10, %r10 326 327 xor %rax, %r8 328 or %rcx, %r8 329 sub %r10, %r8 330 jnz LABEL(tail) 331 332 mov %rax, (%rdi, %rdx) 333 mov 8 (%rsi, %rdx), %rax 334 add $8, %edx 335 336#ifdef USE_AS_STRNCPY 337 sub $8, %r11 338 jl LABEL(tail) 339#endif 340 341 mov %rcx, %r8 342 add %rax, %r8 343 sbb %r10, %r10 344 345 xor %rax, %r8 346 or %rcx, %r8 347 sub %r10, %r8 348 jnz LABEL(tail) 349 350 mov %rax, (%rdi, %rdx) 351 mov 8 (%rsi, %rdx), %rax 352 add $8, %edx 353 354#ifdef USE_AS_STRNCPY 355 sub $8, %r11 356 jl LABEL(tail) 357#endif 358 359 mov %rcx, %r8 360 add %rax, %r8 361 sbb %r10, %r10 362 363 xor %rax, %r8 364 or %rcx, %r8 365 sub %r10, %r8 366 jnz LABEL(tail) 367 368 mov %rax, (%rdi, %rdx) 369 mov 8 (%rsi, %rdx), %rax 370 add $8, %edx 371 372#ifdef USE_AS_STRNCPY 373 sub $8, %r11 374 jl LABEL(tail) 375#endif 376 377 mov %rcx, %r8 378 add %rax, %r8 379 sbb %r10, %r10 380 381 xor %rax, %r8 382 or %rcx, %r8 383 sub %r10, %r8 384 jnz LABEL(tail) 385 386 mov %rax, (%rdi, %rdx) 387 mov 8 (%rsi, %rdx), %rax 388 add $8, %edx 389 390#ifdef USE_AS_STRNCPY 391 sub $8, %r11 392 jl LABEL(tail) 393#endif 394 395 mov %rcx, %r8 396 add %rax, %r8 397 sbb %r10, %r10 398 399 xor %rax, %r8 400 or %rcx, %r8 401 sub %r10, %r8 402 jnz LABEL(tail) 403 404 mov %rax, (%rdi, %rdx) 405 mov 8 (%rsi, %rdx), %rax 406 add $8, %edx 407 408#ifdef USE_AS_STRNCPY 409 sub $8, %r11 410 jl LABEL(tail) 411#endif 412 413 mov %rcx, %r8 414 add %rax, %r8 415 sbb %r10, %r10 416 417 xor %rax, %r8 418 or %rcx, %r8 419 sub %r10, %r8 420 jnz LABEL(tail) 421 422 cmp %r9, %rdx 423 424 mov %rax, (%rdi, %rdx) 425 mov 8 (%rsi, %rdx), %rax 426 lea 8 (%rdx), %rdx 427 428 jbe LABEL(64loop) 429 430LABEL(64after): 431 432LABEL(pretry): 433 mov _sref_(.amd64cache2half), %r9 434 435LABEL(pre): /* 64-byte prefetch */ 436 437 .p2align 4 438 439LABEL(preloop): 440#ifdef USE_AS_STRNCPY 441 sub $8, %r11 442 jl LABEL(tail) 443#endif 444 445 mov %rcx, %r8 446 add %rax, %r8 447 sbb %r10, %r10 448 449 xor %rax, %r8 450 or %rcx, %r8 451 sub %r10, %r8 452 jnz LABEL(tail) 453 454 mov %rax, (%rdi, %rdx) 455 mov 8 (%rsi, %rdx), %rax 456 add $8, %edx 457 458#ifdef USE_AS_STRNCPY 459 sub $8, %r11 460 jl LABEL(tail) 461#endif 462 463 mov %rcx, %r8 464 add %rax, %r8 465 sbb %r10, %r10 466 467 xor %rax, %r8 468 or %rcx, %r8 469 sub %r10, %r8 470 jnz LABEL(tail) 471 472 mov %rax, (%rdi, %rdx) 473 mov 8 (%rsi, %rdx), %rax 474 add $8, %edx 475 476#ifdef USE_AS_STRNCPY 477 sub $8, %r11 478 jl LABEL(tail) 479#endif 480 481 mov %rcx, %r8 482 add %rax, %r8 483 sbb %r10, %r10 484 485 xor %rax, %r8 486 or %rcx, %r8 487 sub %r10, %r8 488 jnz LABEL(tail) 489 490 mov %rax, (%rdi, %rdx) 491 mov 8 (%rsi, %rdx), %rax 492 add $8, %edx 493 494#ifdef USE_AS_STRNCPY 495 sub $8, %r11 496 jl LABEL(tail) 497#endif 498 499 mov %rcx, %r8 500 add %rax, %r8 501 sbb %r10, %r10 502 503 xor %rax, %r8 504 or %rcx, %r8 505 sub %r10, %r8 506 jnz LABEL(tail) 507 508 mov %rax, (%rdi, %rdx) 509 mov 8 (%rsi, %rdx), %rax 510 add $8, %edx 511 512#ifdef USE_AS_STRNCPY 513 sub $8, %r11 514 jl LABEL(tail) 515#endif 516 517 mov %rcx, %r8 518 add %rax, %r8 519 sbb %r10, %r10 520 521 xor %rax, %r8 522 or %rcx, %r8 523 sub %r10, %r8 524 jnz LABEL(tail) 525 526 mov %rax, (%rdi, %rdx) 527 mov 8 (%rsi, %rdx), %rax 528 add $8, %edx 529 530#ifdef USE_AS_STRNCPY 531 sub $8, %r11 532 jl LABEL(tail) 533#endif 534 535 mov %rcx, %r8 536 add %rax, %r8 537 sbb %r10, %r10 538 539 xor %rax, %r8 540 or %rcx, %r8 541 sub %r10, %r8 542 jnz LABEL(tail) 543 544 mov %rax, (%rdi, %rdx) 545 mov 8 (%rsi, %rdx), %rax 546 add $8, %edx 547 548#ifdef USE_AS_STRNCPY 549 sub $8, %r11 550 jl LABEL(tail) 551#endif 552 553 mov %rcx, %r8 554 add %rax, %r8 555 sbb %r10, %r10 556 557 xor %rax, %r8 558 or %rcx, %r8 559 sub %r10, %r8 560 jnz LABEL(tail) 561 562 mov %rax, (%rdi, %rdx) 563 mov 8 (%rsi, %rdx), %rax 564 add $8, %edx 565 566#ifdef USE_AS_STRNCPY 567 sub $8, %r11 568 jl LABEL(tail) 569#endif 570 571 mov %rcx, %r8 572 add %rax, %r8 573 sbb %r10, %r10 574 575 xor %rax, %r8 576 or %rcx, %r8 577 sub %r10, %r8 578 jnz LABEL(tail) 579 580 cmp %r9, %rdx 581 582 mov %rax, (%rdi, %rdx) 583 prefetchnta 512 + 8 (%rdi, %rdx) /* 3DNow: use prefetchw */ 584 mov 8 (%rsi, %rdx), %rax 585 prefetchnta 512 + 8 (%rsi, %rdx) /* 3DNow: use prefetch */ 586 lea 8 (%rdx), %rdx 587 588 jb LABEL(preloop) 589 590 .p2align 4 591 592LABEL(preafter): 593 594LABEL(NTtry): 595 mfence 596 597LABEL(NT): /* 64-byte NT */ 598 599 .p2align 4 600 601LABEL(NTloop): 602#ifdef USE_AS_STRNCPY 603 sub $8, %r11 604 jl LABEL(tail) 605#endif 606 607 mov %rcx, %r8 608 add %rax, %r8 609 sbb %r10, %r10 610 611 xor %rax, %r8 612 or %rcx, %r8 613 sub %r10, %r8 614 jnz LABEL(NTtail) 615 616 movnti %rax, (%rdi, %rdx) 617 mov 8 (%rsi, %rdx), %rax 618 add $8, %rdx 619 620#ifdef USE_AS_STRNCPY 621 sub $8, %r11 622 jl LABEL(tail) 623#endif 624 625 mov %rcx, %r8 626 add %rax, %r8 627 sbb %r10, %r10 628 629 xor %rax, %r8 630 or %rcx, %r8 631 sub %r10, %r8 632 jnz LABEL(NTtail) 633 634 movnti %rax, (%rdi, %rdx) 635 mov 8 (%rsi, %rdx), %rax 636 add $8, %rdx 637 638#ifdef USE_AS_STRNCPY 639 sub $8, %r11 640 jl LABEL(tail) 641#endif 642 643 mov %rcx, %r8 644 add %rax, %r8 645 sbb %r10, %r10 646 647 xor %rax, %r8 648 or %rcx, %r8 649 sub %r10, %r8 650 jnz LABEL(NTtail) 651 652 movnti %rax, (%rdi, %rdx) 653 mov 8 (%rsi, %rdx), %rax 654 add $8, %rdx 655 656#ifdef USE_AS_STRNCPY 657 sub $8, %r11 658 jl LABEL(tail) 659#endif 660 661 mov %rcx, %r8 662 add %rax, %r8 663 sbb %r10, %r10 664 665 xor %rax, %r8 666 or %rcx, %r8 667 sub %r10, %r8 668 jnz LABEL(NTtail) 669 670 movnti %rax, (%rdi, %rdx) 671 mov 8 (%rsi, %rdx), %rax 672 add $8, %rdx 673 674#ifdef USE_AS_STRNCPY 675 sub $8, %r11 676 jl LABEL(tail) 677#endif 678 679 mov %rcx, %r8 680 add %rax, %r8 681 sbb %r10, %r10 682 683 xor %rax, %r8 684 or %rcx, %r8 685 sub %r10, %r8 686 jnz LABEL(NTtail) 687 688 movnti %rax, (%rdi, %rdx) 689 mov 8 (%rsi, %rdx), %rax 690 add $8, %rdx 691 692#ifdef USE_AS_STRNCPY 693 sub $8, %r11 694 jl LABEL(tail) 695#endif 696 697 mov %rcx, %r8 698 add %rax, %r8 699 sbb %r10, %r10 700 701 xor %rax, %r8 702 or %rcx, %r8 703 sub %r10, %r8 704 jnz LABEL(NTtail) 705 706 movnti %rax, (%rdi, %rdx) 707 mov 8 (%rsi, %rdx), %rax 708 add $8, %rdx 709 710#ifdef USE_AS_STRNCPY 711 sub $8, %r11 712 jl LABEL(tail) 713#endif 714 715 mov %rcx, %r8 716 add %rax, %r8 717 sbb %r10, %r10 718 719 xor %rax, %r8 720 or %rcx, %r8 721 sub %r10, %r8 722 jnz LABEL(NTtail) 723 724 movnti %rax, (%rdi, %rdx) 725 mov 8 (%rsi, %rdx), %rax 726 add $8, %rdx 727 728#ifdef USE_AS_STRNCPY 729 sub $8, %r11 730 jl LABEL(tail) 731#endif 732 733 mov %rcx, %r8 734 add %rax, %r8 735 sbb %r10, %r10 736 737 xor %rax, %r8 738 or %rcx, %r8 739 sub %r10, %r8 740 jnz LABEL(NTtail) 741 742 movnti %rax, (%rdi, %rdx) 743 mov 8 (%rsi, %rdx), %rax 744 prefetchnta 768 + 8 (%rsi, %rdx) 745 add $8, %rdx 746 747 jmp LABEL(NTloop) 748 749 .p2align 4 750 751LABEL(NTtail): 752 mfence 753 754 .p2align 4 755 756LABEL(NTafter): 757 758LABEL(tailtry): 759 760LABEL(tail): /* 1-byte tail */ 761#ifdef USE_AS_STRNCPY 762 add $8, %r11 763#endif 764 765 .p2align 4 766 767LABEL(tailloop): 768#ifdef USE_AS_STRNCPY 769 dec %r11 770 jl LABEL(exitn) 771#endif 772 773 test %al, %al 774 mov %al, (%rdi, %rdx) 775 jz LABEL(exit) 776 777 inc %rdx 778 779#ifdef USE_AS_STRNCPY 780 dec %r11 781 jl LABEL(exitn) 782 783 mov %ah, %al 784#endif 785 786 test %ah, %ah 787 mov %ah, (%rdi, %rdx) 788 jz LABEL(exit) 789 790 inc %rdx 791 792#ifdef USE_AS_STRNCPY 793 dec %r11 794 jl LABEL(exitn) 795#endif 796 797 shr $16, %rax 798 799 test %al, %al 800 mov %al, (%rdi, %rdx) 801 jz LABEL(exit) 802 803 inc %rdx 804 805#ifdef USE_AS_STRNCPY 806 dec %r11 807 jl LABEL(exitn) 808 809 mov %ah, %al 810#endif 811 812 test %ah, %ah 813 mov %ah, (%rdi, %rdx) 814 jz LABEL(exit) 815 816 shr $16, %rax 817 inc %rdx 818 819 jmp LABEL(tailloop) 820 821 .p2align 4 822 823LABEL(tailafter): 824 825LABEL(exit): 826#ifdef USE_AS_STRNCPY 827 test %r11, %r11 828 mov %r11, %rcx 829 830#ifdef USE_AS_STPCPY 831 lea (%rdi, %rdx), %r8 832#else 833 mov %rdi, %r8 834#endif 835 836 jz 2f 837 838 xor %eax, %eax /* bzero () would do too, but usually there are only a handfull of bytes left */ 839 shr $3, %rcx 840 lea 1 (%rdi, %rdx), %rdi 841 jz 1f 842 843 rep stosq 844 8451: 846 mov %r11d, %ecx 847 and $7, %ecx 848 jz 2f 849 850 .p2align 4,, 3 851 8523: 853 dec %ecx 854 mov %al, (%rdi, %rcx) 855 jnz 3b 856 857 .p2align 4,, 3 858 8592: 860 mov %r8, %rax 861 ret 862 863#endif 864 865 .p2align 4 866 867LABEL(exitn): 868#ifdef USE_AS_STPCPY 869 lea (%rdi, %rdx), %rax 870#else 871 mov %rdi, %rax 872#endif 873 874 ret 875 876#ifdef USE_AS_STRNCPY 877 SET_SIZE(strncpy) 878#else 879 SET_SIZE(strcpy) /* (char *, const char *) */ 880#endif 881