1/* 2Copyright (c) 2014, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#ifndef USE_AS_STRCAT 32 33# ifndef STRCPY 34# define STRCPY strcpy 35# endif 36 37# ifndef L 38# define L(label) .L##label 39# endif 40 41# ifndef cfi_startproc 42# define cfi_startproc .cfi_startproc 43# endif 44 45# ifndef cfi_endproc 46# define cfi_endproc .cfi_endproc 47# endif 48 49# ifndef ENTRY 50# define ENTRY(name) \ 51 .type name, @function; \ 52 .globl name; \ 53 .p2align 4; \ 54name: \ 55 cfi_startproc 56# endif 57 58# ifndef END 59# define END(name) \ 60 cfi_endproc; \ 61 .size name, .-name 62# endif 63 64#endif 65 66#define JMPTBL(I, B) I - B 67#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ 68 lea TABLE(%rip), %r11; \ 69 movslq (%r11, INDEX, SCALE), %rcx; \ 70 lea (%r11, %rcx), %rcx; \ 71 jmp *%rcx 72 73#ifndef USE_AS_STRCAT 74 75# define RETURN ret 76 77.text 78ENTRY (STRCPY) 79# ifdef USE_AS_STRNCPY 80 mov %rdx, %r8 81 test %r8, %r8 82 jz L(ExitZero) 83# endif 84 mov %rsi, %rcx 85# ifndef USE_AS_STPCPY 86 mov %rdi, %rax /* save result */ 87# endif 88 89#endif 90 and $63, %rcx 91 cmp $32, %rcx 92 jbe L(SourceStringAlignmentLess32) 93 94 and $-16, %rsi 95 and $15, %rcx 96 pxor %xmm0, %xmm0 97 pxor %xmm1, %xmm1 98 99 pcmpeqb (%rsi), %xmm1 100 pmovmskb %xmm1, %rdx 101 shr %cl, %rdx 102#ifdef USE_AS_STRNCPY 103# if defined USE_AS_STPCPY || defined USE_AS_STRCAT 104 mov $16, %r10 105 sub %rcx, %r10 106 cmp %r10, %r8 107# else 108 mov $17, %r10 109 sub %rcx, %r10 110 cmp %r10, %r8 111# endif 112 jbe L(CopyFrom1To16BytesTailCase2OrCase3) 113#endif 114 test %rdx, %rdx 115 jnz L(CopyFrom1To16BytesTail) 116 117 pcmpeqb 16(%rsi), %xmm0 118 pmovmskb %xmm0, %rdx 119#ifdef USE_AS_STRNCPY 120 add $16, %r10 121 cmp %r10, %r8 122 jbe L(CopyFrom1To32BytesCase2OrCase3) 123#endif 124 test %rdx, %rdx 125 jnz L(CopyFrom1To32Bytes) 126 127 movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */ 128 movdqu %xmm1, (%rdi) 129 130/* If source adress alignment != destination adress alignment */ 131 .p2align 4 132L(Unalign16Both): 133 sub %rcx, %rdi 134#ifdef USE_AS_STRNCPY 135 add %rcx, %r8 136#endif 137 mov $16, %rcx 138 movdqa (%rsi, %rcx), %xmm1 139 movaps 16(%rsi, %rcx), %xmm2 140 movdqu %xmm1, (%rdi, %rcx) 141 pcmpeqb %xmm2, %xmm0 142 pmovmskb %xmm0, %rdx 143 add $16, %rcx 144#ifdef USE_AS_STRNCPY 145 sub $48, %r8 146 jbe L(CopyFrom1To16BytesCase2OrCase3) 147#endif 148 test %rdx, %rdx 149#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 150 jnz L(CopyFrom1To16BytesUnalignedXmm2) 151#else 152 jnz L(CopyFrom1To16Bytes) 153#endif 154 155 movaps 16(%rsi, %rcx), %xmm3 156 movdqu %xmm2, (%rdi, %rcx) 157 pcmpeqb %xmm3, %xmm0 158 pmovmskb %xmm0, %rdx 159 add $16, %rcx 160#ifdef USE_AS_STRNCPY 161 sub $16, %r8 162 jbe L(CopyFrom1To16BytesCase2OrCase3) 163#endif 164 test %rdx, %rdx 165#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 166 jnz L(CopyFrom1To16BytesUnalignedXmm3) 167#else 168 jnz L(CopyFrom1To16Bytes) 169#endif 170 171 movaps 16(%rsi, %rcx), %xmm4 172 movdqu %xmm3, (%rdi, %rcx) 173 pcmpeqb %xmm4, %xmm0 174 pmovmskb %xmm0, %rdx 175 add $16, %rcx 176#ifdef USE_AS_STRNCPY 177 sub $16, %r8 178 jbe L(CopyFrom1To16BytesCase2OrCase3) 179#endif 180 test %rdx, %rdx 181#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 182 jnz L(CopyFrom1To16BytesUnalignedXmm4) 183#else 184 jnz L(CopyFrom1To16Bytes) 185#endif 186 187 movaps 16(%rsi, %rcx), %xmm1 188 movdqu %xmm4, (%rdi, %rcx) 189 pcmpeqb %xmm1, %xmm0 190 pmovmskb %xmm0, %rdx 191 add $16, %rcx 192#ifdef USE_AS_STRNCPY 193 sub $16, %r8 194 jbe L(CopyFrom1To16BytesCase2OrCase3) 195#endif 196 test %rdx, %rdx 197#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 198 jnz L(CopyFrom1To16BytesUnalignedXmm1) 199#else 200 jnz L(CopyFrom1To16Bytes) 201#endif 202 203 movaps 16(%rsi, %rcx), %xmm2 204 movdqu %xmm1, (%rdi, %rcx) 205 pcmpeqb %xmm2, %xmm0 206 pmovmskb %xmm0, %rdx 207 add $16, %rcx 208#ifdef USE_AS_STRNCPY 209 sub $16, %r8 210 jbe L(CopyFrom1To16BytesCase2OrCase3) 211#endif 212 test %rdx, %rdx 213#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 214 jnz L(CopyFrom1To16BytesUnalignedXmm2) 215#else 216 jnz L(CopyFrom1To16Bytes) 217#endif 218 219 movaps 16(%rsi, %rcx), %xmm3 220 movdqu %xmm2, (%rdi, %rcx) 221 pcmpeqb %xmm3, %xmm0 222 pmovmskb %xmm0, %rdx 223 add $16, %rcx 224#ifdef USE_AS_STRNCPY 225 sub $16, %r8 226 jbe L(CopyFrom1To16BytesCase2OrCase3) 227#endif 228 test %rdx, %rdx 229#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 230 jnz L(CopyFrom1To16BytesUnalignedXmm3) 231#else 232 jnz L(CopyFrom1To16Bytes) 233#endif 234 235 movdqu %xmm3, (%rdi, %rcx) 236 mov %rsi, %rdx 237 lea 16(%rsi, %rcx), %rsi 238 and $-0x40, %rsi 239 sub %rsi, %rdx 240 sub %rdx, %rdi 241#ifdef USE_AS_STRNCPY 242 lea 128(%r8, %rdx), %r8 243#endif 244L(Unaligned64Loop): 245 movaps (%rsi), %xmm2 246 movaps %xmm2, %xmm4 247 movaps 16(%rsi), %xmm5 248 movaps 32(%rsi), %xmm3 249 movaps %xmm3, %xmm6 250 movaps 48(%rsi), %xmm7 251 pminub %xmm5, %xmm2 252 pminub %xmm7, %xmm3 253 pminub %xmm2, %xmm3 254 pcmpeqb %xmm0, %xmm3 255 pmovmskb %xmm3, %rdx 256#ifdef USE_AS_STRNCPY 257 sub $64, %r8 258 jbe L(UnalignedLeaveCase2OrCase3) 259#endif 260 test %rdx, %rdx 261 jnz L(Unaligned64Leave) 262 263L(Unaligned64Loop_start): 264 add $64, %rdi 265 add $64, %rsi 266 movdqu %xmm4, -64(%rdi) 267 movaps (%rsi), %xmm2 268 movdqa %xmm2, %xmm4 269 movdqu %xmm5, -48(%rdi) 270 movaps 16(%rsi), %xmm5 271 pminub %xmm5, %xmm2 272 movaps 32(%rsi), %xmm3 273 movdqu %xmm6, -32(%rdi) 274 movaps %xmm3, %xmm6 275 movdqu %xmm7, -16(%rdi) 276 movaps 48(%rsi), %xmm7 277 pminub %xmm7, %xmm3 278 pminub %xmm2, %xmm3 279 pcmpeqb %xmm0, %xmm3 280 pmovmskb %xmm3, %rdx 281#ifdef USE_AS_STRNCPY 282 sub $64, %r8 283 jbe L(UnalignedLeaveCase2OrCase3) 284#endif 285 test %rdx, %rdx 286 jz L(Unaligned64Loop_start) 287 288L(Unaligned64Leave): 289 pxor %xmm1, %xmm1 290 291 pcmpeqb %xmm4, %xmm0 292 pcmpeqb %xmm5, %xmm1 293 pmovmskb %xmm0, %rdx 294 pmovmskb %xmm1, %rcx 295 test %rdx, %rdx 296 jnz L(CopyFrom1To16BytesUnaligned_0) 297 test %rcx, %rcx 298 jnz L(CopyFrom1To16BytesUnaligned_16) 299 300 pcmpeqb %xmm6, %xmm0 301 pcmpeqb %xmm7, %xmm1 302 pmovmskb %xmm0, %rdx 303 pmovmskb %xmm1, %rcx 304 test %rdx, %rdx 305 jnz L(CopyFrom1To16BytesUnaligned_32) 306 307 bsf %rcx, %rdx 308 movdqu %xmm4, (%rdi) 309 movdqu %xmm5, 16(%rdi) 310 movdqu %xmm6, 32(%rdi) 311#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 312# ifdef USE_AS_STPCPY 313 lea 48(%rdi, %rdx), %rax 314# endif 315 movdqu %xmm7, 48(%rdi) 316 add $15, %r8 317 sub %rdx, %r8 318 lea 49(%rdi, %rdx), %rdi 319 jmp L(StrncpyFillTailWithZero) 320#else 321 add $48, %rsi 322 add $48, %rdi 323 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 324#endif 325 326/* If source adress alignment == destination adress alignment */ 327 328L(SourceStringAlignmentLess32): 329 pxor %xmm0, %xmm0 330 movdqu (%rsi), %xmm1 331 movdqu 16(%rsi), %xmm2 332 pcmpeqb %xmm1, %xmm0 333 pmovmskb %xmm0, %rdx 334 335#ifdef USE_AS_STRNCPY 336# if defined USE_AS_STPCPY || defined USE_AS_STRCAT 337 cmp $16, %r8 338# else 339 cmp $17, %r8 340# endif 341 jbe L(CopyFrom1To16BytesTail1Case2OrCase3) 342#endif 343 test %rdx, %rdx 344 jnz L(CopyFrom1To16BytesTail1) 345 346 pcmpeqb %xmm2, %xmm0 347 movdqu %xmm1, (%rdi) 348 pmovmskb %xmm0, %rdx 349 350#ifdef USE_AS_STRNCPY 351# if defined USE_AS_STPCPY || defined USE_AS_STRCAT 352 cmp $32, %r8 353# else 354 cmp $33, %r8 355# endif 356 jbe L(CopyFrom1To32Bytes1Case2OrCase3) 357#endif 358 test %rdx, %rdx 359 jnz L(CopyFrom1To32Bytes1) 360 361 and $15, %rcx 362 and $-16, %rsi 363 364 jmp L(Unalign16Both) 365 366/*------End of main part with loops---------------------*/ 367 368/* Case1 */ 369 370#if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT) 371 .p2align 4 372L(CopyFrom1To16Bytes): 373 add %rcx, %rdi 374 add %rcx, %rsi 375 bsf %rdx, %rdx 376 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 377#endif 378 .p2align 4 379L(CopyFrom1To16BytesTail): 380 add %rcx, %rsi 381 bsf %rdx, %rdx 382 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 383 384 .p2align 4 385L(CopyFrom1To32Bytes1): 386 add $16, %rsi 387 add $16, %rdi 388#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 389 sub $16, %r8 390#endif 391L(CopyFrom1To16BytesTail1): 392 bsf %rdx, %rdx 393 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 394 395 .p2align 4 396L(CopyFrom1To32Bytes): 397 bsf %rdx, %rdx 398 add %rcx, %rsi 399 add $16, %rdx 400 sub %rcx, %rdx 401 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 402 403 .p2align 4 404L(CopyFrom1To16BytesUnaligned_0): 405 bsf %rdx, %rdx 406#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 407# ifdef USE_AS_STPCPY 408 lea (%rdi, %rdx), %rax 409# endif 410 movdqu %xmm4, (%rdi) 411 add $63, %r8 412 sub %rdx, %r8 413 lea 1(%rdi, %rdx), %rdi 414 jmp L(StrncpyFillTailWithZero) 415#else 416 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 417#endif 418 419 .p2align 4 420L(CopyFrom1To16BytesUnaligned_16): 421 bsf %rcx, %rdx 422 movdqu %xmm4, (%rdi) 423#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 424# ifdef USE_AS_STPCPY 425 lea 16(%rdi, %rdx), %rax 426# endif 427 movdqu %xmm5, 16(%rdi) 428 add $47, %r8 429 sub %rdx, %r8 430 lea 17(%rdi, %rdx), %rdi 431 jmp L(StrncpyFillTailWithZero) 432#else 433 add $16, %rsi 434 add $16, %rdi 435 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 436#endif 437 438 .p2align 4 439L(CopyFrom1To16BytesUnaligned_32): 440 bsf %rdx, %rdx 441 movdqu %xmm4, (%rdi) 442 movdqu %xmm5, 16(%rdi) 443#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 444# ifdef USE_AS_STPCPY 445 lea 32(%rdi, %rdx), %rax 446# endif 447 movdqu %xmm6, 32(%rdi) 448 add $31, %r8 449 sub %rdx, %r8 450 lea 33(%rdi, %rdx), %rdi 451 jmp L(StrncpyFillTailWithZero) 452#else 453 add $32, %rsi 454 add $32, %rdi 455 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 456#endif 457 458#ifdef USE_AS_STRNCPY 459# ifndef USE_AS_STRCAT 460 .p2align 4 461L(CopyFrom1To16BytesUnalignedXmm6): 462 movdqu %xmm6, (%rdi, %rcx) 463 jmp L(CopyFrom1To16BytesXmmExit) 464 465 .p2align 4 466L(CopyFrom1To16BytesUnalignedXmm5): 467 movdqu %xmm5, (%rdi, %rcx) 468 jmp L(CopyFrom1To16BytesXmmExit) 469 470 .p2align 4 471L(CopyFrom1To16BytesUnalignedXmm4): 472 movdqu %xmm4, (%rdi, %rcx) 473 jmp L(CopyFrom1To16BytesXmmExit) 474 475 .p2align 4 476L(CopyFrom1To16BytesUnalignedXmm3): 477 movdqu %xmm3, (%rdi, %rcx) 478 jmp L(CopyFrom1To16BytesXmmExit) 479 480 .p2align 4 481L(CopyFrom1To16BytesUnalignedXmm1): 482 movdqu %xmm1, (%rdi, %rcx) 483 jmp L(CopyFrom1To16BytesXmmExit) 484# endif 485 486 .p2align 4 487L(CopyFrom1To16BytesExit): 488 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 489 490/* Case2 */ 491 492 .p2align 4 493L(CopyFrom1To16BytesCase2): 494 add $16, %r8 495 add %rcx, %rdi 496 add %rcx, %rsi 497 bsf %rdx, %rdx 498 cmp %r8, %rdx 499 jb L(CopyFrom1To16BytesExit) 500 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 501 502 .p2align 4 503L(CopyFrom1To32BytesCase2): 504 add %rcx, %rsi 505 bsf %rdx, %rdx 506 add $16, %rdx 507 sub %rcx, %rdx 508 cmp %r8, %rdx 509 jb L(CopyFrom1To16BytesExit) 510 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 511 512L(CopyFrom1To16BytesTailCase2): 513 add %rcx, %rsi 514 bsf %rdx, %rdx 515 cmp %r8, %rdx 516 jb L(CopyFrom1To16BytesExit) 517 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 518 519L(CopyFrom1To16BytesTail1Case2): 520 bsf %rdx, %rdx 521 cmp %r8, %rdx 522 jb L(CopyFrom1To16BytesExit) 523 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 524 525/* Case2 or Case3, Case3 */ 526 527 .p2align 4 528L(CopyFrom1To16BytesCase2OrCase3): 529 test %rdx, %rdx 530 jnz L(CopyFrom1To16BytesCase2) 531L(CopyFrom1To16BytesCase3): 532 add $16, %r8 533 add %rcx, %rdi 534 add %rcx, %rsi 535 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 536 537 .p2align 4 538L(CopyFrom1To32BytesCase2OrCase3): 539 test %rdx, %rdx 540 jnz L(CopyFrom1To32BytesCase2) 541 add %rcx, %rsi 542 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 543 544 .p2align 4 545L(CopyFrom1To16BytesTailCase2OrCase3): 546 test %rdx, %rdx 547 jnz L(CopyFrom1To16BytesTailCase2) 548 add %rcx, %rsi 549 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 550 551 .p2align 4 552L(CopyFrom1To32Bytes1Case2OrCase3): 553 add $16, %rdi 554 add $16, %rsi 555 sub $16, %r8 556L(CopyFrom1To16BytesTail1Case2OrCase3): 557 test %rdx, %rdx 558 jnz L(CopyFrom1To16BytesTail1Case2) 559 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 560 561#endif 562 563/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/ 564 565 .p2align 4 566L(Exit1): 567 mov %dh, (%rdi) 568#ifdef USE_AS_STPCPY 569 lea (%rdi), %rax 570#endif 571#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 572 sub $1, %r8 573 lea 1(%rdi), %rdi 574 jnz L(StrncpyFillTailWithZero) 575#endif 576 RETURN 577 578 .p2align 4 579L(Exit2): 580 mov (%rsi), %dx 581 mov %dx, (%rdi) 582#ifdef USE_AS_STPCPY 583 lea 1(%rdi), %rax 584#endif 585#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 586 sub $2, %r8 587 lea 2(%rdi), %rdi 588 jnz L(StrncpyFillTailWithZero) 589#endif 590 RETURN 591 592 .p2align 4 593L(Exit3): 594 mov (%rsi), %cx 595 mov %cx, (%rdi) 596 mov %dh, 2(%rdi) 597#ifdef USE_AS_STPCPY 598 lea 2(%rdi), %rax 599#endif 600#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 601 sub $3, %r8 602 lea 3(%rdi), %rdi 603 jnz L(StrncpyFillTailWithZero) 604#endif 605 RETURN 606 607 .p2align 4 608L(Exit4): 609 mov (%rsi), %edx 610 mov %edx, (%rdi) 611#ifdef USE_AS_STPCPY 612 lea 3(%rdi), %rax 613#endif 614#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 615 sub $4, %r8 616 lea 4(%rdi), %rdi 617 jnz L(StrncpyFillTailWithZero) 618#endif 619 RETURN 620 621 .p2align 4 622L(Exit5): 623 mov (%rsi), %ecx 624 mov %dh, 4(%rdi) 625 mov %ecx, (%rdi) 626#ifdef USE_AS_STPCPY 627 lea 4(%rdi), %rax 628#endif 629#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 630 sub $5, %r8 631 lea 5(%rdi), %rdi 632 jnz L(StrncpyFillTailWithZero) 633#endif 634 RETURN 635 636 .p2align 4 637L(Exit6): 638 mov (%rsi), %ecx 639 mov 4(%rsi), %dx 640 mov %ecx, (%rdi) 641 mov %dx, 4(%rdi) 642#ifdef USE_AS_STPCPY 643 lea 5(%rdi), %rax 644#endif 645#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 646 sub $6, %r8 647 lea 6(%rdi), %rdi 648 jnz L(StrncpyFillTailWithZero) 649#endif 650 RETURN 651 652 .p2align 4 653L(Exit7): 654 mov (%rsi), %ecx 655 mov 3(%rsi), %edx 656 mov %ecx, (%rdi) 657 mov %edx, 3(%rdi) 658#ifdef USE_AS_STPCPY 659 lea 6(%rdi), %rax 660#endif 661#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 662 sub $7, %r8 663 lea 7(%rdi), %rdi 664 jnz L(StrncpyFillTailWithZero) 665#endif 666 RETURN 667 668 .p2align 4 669L(Exit8): 670 mov (%rsi), %rdx 671 mov %rdx, (%rdi) 672#ifdef USE_AS_STPCPY 673 lea 7(%rdi), %rax 674#endif 675#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 676 sub $8, %r8 677 lea 8(%rdi), %rdi 678 jnz L(StrncpyFillTailWithZero) 679#endif 680 RETURN 681 682 .p2align 4 683L(Exit9): 684 mov (%rsi), %rcx 685 mov %dh, 8(%rdi) 686 mov %rcx, (%rdi) 687#ifdef USE_AS_STPCPY 688 lea 8(%rdi), %rax 689#endif 690#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 691 sub $9, %r8 692 lea 9(%rdi), %rdi 693 jnz L(StrncpyFillTailWithZero) 694#endif 695 RETURN 696 697 .p2align 4 698L(Exit10): 699 mov (%rsi), %rcx 700 mov 8(%rsi), %dx 701 mov %rcx, (%rdi) 702 mov %dx, 8(%rdi) 703#ifdef USE_AS_STPCPY 704 lea 9(%rdi), %rax 705#endif 706#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 707 sub $10, %r8 708 lea 10(%rdi), %rdi 709 jnz L(StrncpyFillTailWithZero) 710#endif 711 RETURN 712 713 .p2align 4 714L(Exit11): 715 mov (%rsi), %rcx 716 mov 7(%rsi), %edx 717 mov %rcx, (%rdi) 718 mov %edx, 7(%rdi) 719#ifdef USE_AS_STPCPY 720 lea 10(%rdi), %rax 721#endif 722#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 723 sub $11, %r8 724 lea 11(%rdi), %rdi 725 jnz L(StrncpyFillTailWithZero) 726#endif 727 RETURN 728 729 .p2align 4 730L(Exit12): 731 mov (%rsi), %rcx 732 mov 8(%rsi), %edx 733 mov %rcx, (%rdi) 734 mov %edx, 8(%rdi) 735#ifdef USE_AS_STPCPY 736 lea 11(%rdi), %rax 737#endif 738#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 739 sub $12, %r8 740 lea 12(%rdi), %rdi 741 jnz L(StrncpyFillTailWithZero) 742#endif 743 RETURN 744 745 .p2align 4 746L(Exit13): 747 mov (%rsi), %rcx 748 mov 5(%rsi), %rdx 749 mov %rcx, (%rdi) 750 mov %rdx, 5(%rdi) 751#ifdef USE_AS_STPCPY 752 lea 12(%rdi), %rax 753#endif 754#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 755 sub $13, %r8 756 lea 13(%rdi), %rdi 757 jnz L(StrncpyFillTailWithZero) 758#endif 759 RETURN 760 761 .p2align 4 762L(Exit14): 763 mov (%rsi), %rcx 764 mov 6(%rsi), %rdx 765 mov %rcx, (%rdi) 766 mov %rdx, 6(%rdi) 767#ifdef USE_AS_STPCPY 768 lea 13(%rdi), %rax 769#endif 770#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 771 sub $14, %r8 772 lea 14(%rdi), %rdi 773 jnz L(StrncpyFillTailWithZero) 774#endif 775 RETURN 776 777 .p2align 4 778L(Exit15): 779 mov (%rsi), %rcx 780 mov 7(%rsi), %rdx 781 mov %rcx, (%rdi) 782 mov %rdx, 7(%rdi) 783#ifdef USE_AS_STPCPY 784 lea 14(%rdi), %rax 785#endif 786#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 787 sub $15, %r8 788 lea 15(%rdi), %rdi 789 jnz L(StrncpyFillTailWithZero) 790#endif 791 RETURN 792 793 .p2align 4 794L(Exit16): 795 movdqu (%rsi), %xmm0 796 movdqu %xmm0, (%rdi) 797#ifdef USE_AS_STPCPY 798 lea 15(%rdi), %rax 799#endif 800#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 801 sub $16, %r8 802 lea 16(%rdi), %rdi 803 jnz L(StrncpyFillTailWithZero) 804#endif 805 RETURN 806 807 .p2align 4 808L(Exit17): 809 movdqu (%rsi), %xmm0 810 movdqu %xmm0, (%rdi) 811 mov %dh, 16(%rdi) 812#ifdef USE_AS_STPCPY 813 lea 16(%rdi), %rax 814#endif 815#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 816 sub $17, %r8 817 lea 17(%rdi), %rdi 818 jnz L(StrncpyFillTailWithZero) 819#endif 820 RETURN 821 822 .p2align 4 823L(Exit18): 824 movdqu (%rsi), %xmm0 825 mov 16(%rsi), %cx 826 movdqu %xmm0, (%rdi) 827 mov %cx, 16(%rdi) 828#ifdef USE_AS_STPCPY 829 lea 17(%rdi), %rax 830#endif 831#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 832 sub $18, %r8 833 lea 18(%rdi), %rdi 834 jnz L(StrncpyFillTailWithZero) 835#endif 836 RETURN 837 838 .p2align 4 839L(Exit19): 840 movdqu (%rsi), %xmm0 841 mov 15(%rsi), %ecx 842 movdqu %xmm0, (%rdi) 843 mov %ecx, 15(%rdi) 844#ifdef USE_AS_STPCPY 845 lea 18(%rdi), %rax 846#endif 847#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 848 sub $19, %r8 849 lea 19(%rdi), %rdi 850 jnz L(StrncpyFillTailWithZero) 851#endif 852 RETURN 853 854 .p2align 4 855L(Exit20): 856 movdqu (%rsi), %xmm0 857 mov 16(%rsi), %ecx 858 movdqu %xmm0, (%rdi) 859 mov %ecx, 16(%rdi) 860#ifdef USE_AS_STPCPY 861 lea 19(%rdi), %rax 862#endif 863#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 864 sub $20, %r8 865 lea 20(%rdi), %rdi 866 jnz L(StrncpyFillTailWithZero) 867#endif 868 RETURN 869 870 .p2align 4 871L(Exit21): 872 movdqu (%rsi), %xmm0 873 mov 16(%rsi), %ecx 874 movdqu %xmm0, (%rdi) 875 mov %ecx, 16(%rdi) 876 mov %dh, 20(%rdi) 877#ifdef USE_AS_STPCPY 878 lea 20(%rdi), %rax 879#endif 880#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 881 sub $21, %r8 882 lea 21(%rdi), %rdi 883 jnz L(StrncpyFillTailWithZero) 884#endif 885 RETURN 886 887 .p2align 4 888L(Exit22): 889 movdqu (%rsi), %xmm0 890 mov 14(%rsi), %rcx 891 movdqu %xmm0, (%rdi) 892 mov %rcx, 14(%rdi) 893#ifdef USE_AS_STPCPY 894 lea 21(%rdi), %rax 895#endif 896#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 897 sub $22, %r8 898 lea 22(%rdi), %rdi 899 jnz L(StrncpyFillTailWithZero) 900#endif 901 RETURN 902 903 .p2align 4 904L(Exit23): 905 movdqu (%rsi), %xmm0 906 mov 15(%rsi), %rcx 907 movdqu %xmm0, (%rdi) 908 mov %rcx, 15(%rdi) 909#ifdef USE_AS_STPCPY 910 lea 22(%rdi), %rax 911#endif 912#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 913 sub $23, %r8 914 lea 23(%rdi), %rdi 915 jnz L(StrncpyFillTailWithZero) 916#endif 917 RETURN 918 919 .p2align 4 920L(Exit24): 921 movdqu (%rsi), %xmm0 922 mov 16(%rsi), %rcx 923 movdqu %xmm0, (%rdi) 924 mov %rcx, 16(%rdi) 925#ifdef USE_AS_STPCPY 926 lea 23(%rdi), %rax 927#endif 928#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 929 sub $24, %r8 930 lea 24(%rdi), %rdi 931 jnz L(StrncpyFillTailWithZero) 932#endif 933 RETURN 934 935 .p2align 4 936L(Exit25): 937 movdqu (%rsi), %xmm0 938 mov 16(%rsi), %rcx 939 movdqu %xmm0, (%rdi) 940 mov %rcx, 16(%rdi) 941 mov %dh, 24(%rdi) 942#ifdef USE_AS_STPCPY 943 lea 24(%rdi), %rax 944#endif 945#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 946 sub $25, %r8 947 lea 25(%rdi), %rdi 948 jnz L(StrncpyFillTailWithZero) 949#endif 950 RETURN 951 952 .p2align 4 953L(Exit26): 954 movdqu (%rsi), %xmm0 955 mov 16(%rsi), %rdx 956 mov 24(%rsi), %cx 957 movdqu %xmm0, (%rdi) 958 mov %rdx, 16(%rdi) 959 mov %cx, 24(%rdi) 960#ifdef USE_AS_STPCPY 961 lea 25(%rdi), %rax 962#endif 963#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 964 sub $26, %r8 965 lea 26(%rdi), %rdi 966 jnz L(StrncpyFillTailWithZero) 967#endif 968 RETURN 969 970 .p2align 4 971L(Exit27): 972 movdqu (%rsi), %xmm0 973 mov 16(%rsi), %rdx 974 mov 23(%rsi), %ecx 975 movdqu %xmm0, (%rdi) 976 mov %rdx, 16(%rdi) 977 mov %ecx, 23(%rdi) 978#ifdef USE_AS_STPCPY 979 lea 26(%rdi), %rax 980#endif 981#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 982 sub $27, %r8 983 lea 27(%rdi), %rdi 984 jnz L(StrncpyFillTailWithZero) 985#endif 986 RETURN 987 988 .p2align 4 989L(Exit28): 990 movdqu (%rsi), %xmm0 991 mov 16(%rsi), %rdx 992 mov 24(%rsi), %ecx 993 movdqu %xmm0, (%rdi) 994 mov %rdx, 16(%rdi) 995 mov %ecx, 24(%rdi) 996#ifdef USE_AS_STPCPY 997 lea 27(%rdi), %rax 998#endif 999#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 1000 sub $28, %r8 1001 lea 28(%rdi), %rdi 1002 jnz L(StrncpyFillTailWithZero) 1003#endif 1004 RETURN 1005 1006 .p2align 4 1007L(Exit29): 1008 movdqu (%rsi), %xmm0 1009 movdqu 13(%rsi), %xmm2 1010 movdqu %xmm0, (%rdi) 1011 movdqu %xmm2, 13(%rdi) 1012#ifdef USE_AS_STPCPY 1013 lea 28(%rdi), %rax 1014#endif 1015#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 1016 sub $29, %r8 1017 lea 29(%rdi), %rdi 1018 jnz L(StrncpyFillTailWithZero) 1019#endif 1020 RETURN 1021 1022 .p2align 4 1023L(Exit30): 1024 movdqu (%rsi), %xmm0 1025 movdqu 14(%rsi), %xmm2 1026 movdqu %xmm0, (%rdi) 1027 movdqu %xmm2, 14(%rdi) 1028#ifdef USE_AS_STPCPY 1029 lea 29(%rdi), %rax 1030#endif 1031#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 1032 sub $30, %r8 1033 lea 30(%rdi), %rdi 1034 jnz L(StrncpyFillTailWithZero) 1035#endif 1036 RETURN 1037 1038 .p2align 4 1039L(Exit31): 1040 movdqu (%rsi), %xmm0 1041 movdqu 15(%rsi), %xmm2 1042 movdqu %xmm0, (%rdi) 1043 movdqu %xmm2, 15(%rdi) 1044#ifdef USE_AS_STPCPY 1045 lea 30(%rdi), %rax 1046#endif 1047#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 1048 sub $31, %r8 1049 lea 31(%rdi), %rdi 1050 jnz L(StrncpyFillTailWithZero) 1051#endif 1052 RETURN 1053 1054 .p2align 4 1055L(Exit32): 1056 movdqu (%rsi), %xmm0 1057 movdqu 16(%rsi), %xmm2 1058 movdqu %xmm0, (%rdi) 1059 movdqu %xmm2, 16(%rdi) 1060#ifdef USE_AS_STPCPY 1061 lea 31(%rdi), %rax 1062#endif 1063#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 1064 sub $32, %r8 1065 lea 32(%rdi), %rdi 1066 jnz L(StrncpyFillTailWithZero) 1067#endif 1068 RETURN 1069 1070#ifdef USE_AS_STRNCPY 1071 1072 .p2align 4 1073L(StrncpyExit0): 1074#ifdef USE_AS_STPCPY 1075 mov %rdi, %rax 1076#endif 1077#ifdef USE_AS_STRCAT 1078 xor %ch, %ch 1079 movb %ch, (%rdi) 1080#endif 1081 RETURN 1082 1083 .p2align 4 1084L(StrncpyExit1): 1085 mov (%rsi), %dl 1086 mov %dl, (%rdi) 1087#ifdef USE_AS_STPCPY 1088 lea 1(%rdi), %rax 1089#endif 1090#ifdef USE_AS_STRCAT 1091 xor %ch, %ch 1092 movb %ch, 1(%rdi) 1093#endif 1094 RETURN 1095 1096 .p2align 4 1097L(StrncpyExit2): 1098 mov (%rsi), %dx 1099 mov %dx, (%rdi) 1100#ifdef USE_AS_STPCPY 1101 lea 2(%rdi), %rax 1102#endif 1103#ifdef USE_AS_STRCAT 1104 xor %ch, %ch 1105 movb %ch, 2(%rdi) 1106#endif 1107 RETURN 1108 1109 .p2align 4 1110L(StrncpyExit3): 1111 mov (%rsi), %cx 1112 mov 2(%rsi), %dl 1113 mov %cx, (%rdi) 1114 mov %dl, 2(%rdi) 1115#ifdef USE_AS_STPCPY 1116 lea 3(%rdi), %rax 1117#endif 1118#ifdef USE_AS_STRCAT 1119 xor %ch, %ch 1120 movb %ch, 3(%rdi) 1121#endif 1122 RETURN 1123 1124 .p2align 4 1125L(StrncpyExit4): 1126 mov (%rsi), %edx 1127 mov %edx, (%rdi) 1128#ifdef USE_AS_STPCPY 1129 lea 4(%rdi), %rax 1130#endif 1131#ifdef USE_AS_STRCAT 1132 xor %ch, %ch 1133 movb %ch, 4(%rdi) 1134#endif 1135 RETURN 1136 1137 .p2align 4 1138L(StrncpyExit5): 1139 mov (%rsi), %ecx 1140 mov 4(%rsi), %dl 1141 mov %ecx, (%rdi) 1142 mov %dl, 4(%rdi) 1143#ifdef USE_AS_STPCPY 1144 lea 5(%rdi), %rax 1145#endif 1146#ifdef USE_AS_STRCAT 1147 xor %ch, %ch 1148 movb %ch, 5(%rdi) 1149#endif 1150 RETURN 1151 1152 .p2align 4 1153L(StrncpyExit6): 1154 mov (%rsi), %ecx 1155 mov 4(%rsi), %dx 1156 mov %ecx, (%rdi) 1157 mov %dx, 4(%rdi) 1158#ifdef USE_AS_STPCPY 1159 lea 6(%rdi), %rax 1160#endif 1161#ifdef USE_AS_STRCAT 1162 xor %ch, %ch 1163 movb %ch, 6(%rdi) 1164#endif 1165 RETURN 1166 1167 .p2align 4 1168L(StrncpyExit7): 1169 mov (%rsi), %ecx 1170 mov 3(%rsi), %edx 1171 mov %ecx, (%rdi) 1172 mov %edx, 3(%rdi) 1173#ifdef USE_AS_STPCPY 1174 lea 7(%rdi), %rax 1175#endif 1176#ifdef USE_AS_STRCAT 1177 xor %ch, %ch 1178 movb %ch, 7(%rdi) 1179#endif 1180 RETURN 1181 1182 .p2align 4 1183L(StrncpyExit8): 1184 mov (%rsi), %rdx 1185 mov %rdx, (%rdi) 1186#ifdef USE_AS_STPCPY 1187 lea 8(%rdi), %rax 1188#endif 1189#ifdef USE_AS_STRCAT 1190 xor %ch, %ch 1191 movb %ch, 8(%rdi) 1192#endif 1193 RETURN 1194 1195 .p2align 4 1196L(StrncpyExit9): 1197 mov (%rsi), %rcx 1198 mov 8(%rsi), %dl 1199 mov %rcx, (%rdi) 1200 mov %dl, 8(%rdi) 1201#ifdef USE_AS_STPCPY 1202 lea 9(%rdi), %rax 1203#endif 1204#ifdef USE_AS_STRCAT 1205 xor %ch, %ch 1206 movb %ch, 9(%rdi) 1207#endif 1208 RETURN 1209 1210 .p2align 4 1211L(StrncpyExit10): 1212 mov (%rsi), %rcx 1213 mov 8(%rsi), %dx 1214 mov %rcx, (%rdi) 1215 mov %dx, 8(%rdi) 1216#ifdef USE_AS_STPCPY 1217 lea 10(%rdi), %rax 1218#endif 1219#ifdef USE_AS_STRCAT 1220 xor %ch, %ch 1221 movb %ch, 10(%rdi) 1222#endif 1223 RETURN 1224 1225 .p2align 4 1226L(StrncpyExit11): 1227 mov (%rsi), %rcx 1228 mov 7(%rsi), %edx 1229 mov %rcx, (%rdi) 1230 mov %edx, 7(%rdi) 1231#ifdef USE_AS_STPCPY 1232 lea 11(%rdi), %rax 1233#endif 1234#ifdef USE_AS_STRCAT 1235 xor %ch, %ch 1236 movb %ch, 11(%rdi) 1237#endif 1238 RETURN 1239 1240 .p2align 4 1241L(StrncpyExit12): 1242 mov (%rsi), %rcx 1243 mov 8(%rsi), %edx 1244 mov %rcx, (%rdi) 1245 mov %edx, 8(%rdi) 1246#ifdef USE_AS_STPCPY 1247 lea 12(%rdi), %rax 1248#endif 1249#ifdef USE_AS_STRCAT 1250 xor %ch, %ch 1251 movb %ch, 12(%rdi) 1252#endif 1253 RETURN 1254 1255 .p2align 4 1256L(StrncpyExit13): 1257 mov (%rsi), %rcx 1258 mov 5(%rsi), %rdx 1259 mov %rcx, (%rdi) 1260 mov %rdx, 5(%rdi) 1261#ifdef USE_AS_STPCPY 1262 lea 13(%rdi), %rax 1263#endif 1264#ifdef USE_AS_STRCAT 1265 xor %ch, %ch 1266 movb %ch, 13(%rdi) 1267#endif 1268 RETURN 1269 1270 .p2align 4 1271L(StrncpyExit14): 1272 mov (%rsi), %rcx 1273 mov 6(%rsi), %rdx 1274 mov %rcx, (%rdi) 1275 mov %rdx, 6(%rdi) 1276#ifdef USE_AS_STPCPY 1277 lea 14(%rdi), %rax 1278#endif 1279#ifdef USE_AS_STRCAT 1280 xor %ch, %ch 1281 movb %ch, 14(%rdi) 1282#endif 1283 RETURN 1284 1285 .p2align 4 1286L(StrncpyExit15): 1287 mov (%rsi), %rcx 1288 mov 7(%rsi), %rdx 1289 mov %rcx, (%rdi) 1290 mov %rdx, 7(%rdi) 1291#ifdef USE_AS_STPCPY 1292 lea 15(%rdi), %rax 1293#endif 1294#ifdef USE_AS_STRCAT 1295 xor %ch, %ch 1296 movb %ch, 15(%rdi) 1297#endif 1298 RETURN 1299 1300 .p2align 4 1301L(StrncpyExit16): 1302 movdqu (%rsi), %xmm0 1303 movdqu %xmm0, (%rdi) 1304#ifdef USE_AS_STPCPY 1305 lea 16(%rdi), %rax 1306#endif 1307#ifdef USE_AS_STRCAT 1308 xor %ch, %ch 1309 movb %ch, 16(%rdi) 1310#endif 1311 RETURN 1312 1313 .p2align 4 1314L(StrncpyExit17): 1315 movdqu (%rsi), %xmm0 1316 mov 16(%rsi), %cl 1317 movdqu %xmm0, (%rdi) 1318 mov %cl, 16(%rdi) 1319#ifdef USE_AS_STPCPY 1320 lea 17(%rdi), %rax 1321#endif 1322#ifdef USE_AS_STRCAT 1323 xor %ch, %ch 1324 movb %ch, 17(%rdi) 1325#endif 1326 RETURN 1327 1328 .p2align 4 1329L(StrncpyExit18): 1330 movdqu (%rsi), %xmm0 1331 mov 16(%rsi), %cx 1332 movdqu %xmm0, (%rdi) 1333 mov %cx, 16(%rdi) 1334#ifdef USE_AS_STPCPY 1335 lea 18(%rdi), %rax 1336#endif 1337#ifdef USE_AS_STRCAT 1338 xor %ch, %ch 1339 movb %ch, 18(%rdi) 1340#endif 1341 RETURN 1342 1343 .p2align 4 1344L(StrncpyExit19): 1345 movdqu (%rsi), %xmm0 1346 mov 15(%rsi), %ecx 1347 movdqu %xmm0, (%rdi) 1348 mov %ecx, 15(%rdi) 1349#ifdef USE_AS_STPCPY 1350 lea 19(%rdi), %rax 1351#endif 1352#ifdef USE_AS_STRCAT 1353 xor %ch, %ch 1354 movb %ch, 19(%rdi) 1355#endif 1356 RETURN 1357 1358 .p2align 4 1359L(StrncpyExit20): 1360 movdqu (%rsi), %xmm0 1361 mov 16(%rsi), %ecx 1362 movdqu %xmm0, (%rdi) 1363 mov %ecx, 16(%rdi) 1364#ifdef USE_AS_STPCPY 1365 lea 20(%rdi), %rax 1366#endif 1367#ifdef USE_AS_STRCAT 1368 xor %ch, %ch 1369 movb %ch, 20(%rdi) 1370#endif 1371 RETURN 1372 1373 .p2align 4 1374L(StrncpyExit21): 1375 movdqu (%rsi), %xmm0 1376 mov 16(%rsi), %ecx 1377 mov 20(%rsi), %dl 1378 movdqu %xmm0, (%rdi) 1379 mov %ecx, 16(%rdi) 1380 mov %dl, 20(%rdi) 1381#ifdef USE_AS_STPCPY 1382 lea 21(%rdi), %rax 1383#endif 1384#ifdef USE_AS_STRCAT 1385 xor %ch, %ch 1386 movb %ch, 21(%rdi) 1387#endif 1388 RETURN 1389 1390 .p2align 4 1391L(StrncpyExit22): 1392 movdqu (%rsi), %xmm0 1393 mov 14(%rsi), %rcx 1394 movdqu %xmm0, (%rdi) 1395 mov %rcx, 14(%rdi) 1396#ifdef USE_AS_STPCPY 1397 lea 22(%rdi), %rax 1398#endif 1399#ifdef USE_AS_STRCAT 1400 xor %ch, %ch 1401 movb %ch, 22(%rdi) 1402#endif 1403 RETURN 1404 1405 .p2align 4 1406L(StrncpyExit23): 1407 movdqu (%rsi), %xmm0 1408 mov 15(%rsi), %rcx 1409 movdqu %xmm0, (%rdi) 1410 mov %rcx, 15(%rdi) 1411#ifdef USE_AS_STPCPY 1412 lea 23(%rdi), %rax 1413#endif 1414#ifdef USE_AS_STRCAT 1415 xor %ch, %ch 1416 movb %ch, 23(%rdi) 1417#endif 1418 RETURN 1419 1420 .p2align 4 1421L(StrncpyExit24): 1422 movdqu (%rsi), %xmm0 1423 mov 16(%rsi), %rcx 1424 movdqu %xmm0, (%rdi) 1425 mov %rcx, 16(%rdi) 1426#ifdef USE_AS_STPCPY 1427 lea 24(%rdi), %rax 1428#endif 1429#ifdef USE_AS_STRCAT 1430 xor %ch, %ch 1431 movb %ch, 24(%rdi) 1432#endif 1433 RETURN 1434 1435 .p2align 4 1436L(StrncpyExit25): 1437 movdqu (%rsi), %xmm0 1438 mov 16(%rsi), %rdx 1439 mov 24(%rsi), %cl 1440 movdqu %xmm0, (%rdi) 1441 mov %rdx, 16(%rdi) 1442 mov %cl, 24(%rdi) 1443#ifdef USE_AS_STPCPY 1444 lea 25(%rdi), %rax 1445#endif 1446#ifdef USE_AS_STRCAT 1447 xor %ch, %ch 1448 movb %ch, 25(%rdi) 1449#endif 1450 RETURN 1451 1452 .p2align 4 1453L(StrncpyExit26): 1454 movdqu (%rsi), %xmm0 1455 mov 16(%rsi), %rdx 1456 mov 24(%rsi), %cx 1457 movdqu %xmm0, (%rdi) 1458 mov %rdx, 16(%rdi) 1459 mov %cx, 24(%rdi) 1460#ifdef USE_AS_STPCPY 1461 lea 26(%rdi), %rax 1462#endif 1463#ifdef USE_AS_STRCAT 1464 xor %ch, %ch 1465 movb %ch, 26(%rdi) 1466#endif 1467 RETURN 1468 1469 .p2align 4 1470L(StrncpyExit27): 1471 movdqu (%rsi), %xmm0 1472 mov 16(%rsi), %rdx 1473 mov 23(%rsi), %ecx 1474 movdqu %xmm0, (%rdi) 1475 mov %rdx, 16(%rdi) 1476 mov %ecx, 23(%rdi) 1477#ifdef USE_AS_STPCPY 1478 lea 27(%rdi), %rax 1479#endif 1480#ifdef USE_AS_STRCAT 1481 xor %ch, %ch 1482 movb %ch, 27(%rdi) 1483#endif 1484 RETURN 1485 1486 .p2align 4 1487L(StrncpyExit28): 1488 movdqu (%rsi), %xmm0 1489 mov 16(%rsi), %rdx 1490 mov 24(%rsi), %ecx 1491 movdqu %xmm0, (%rdi) 1492 mov %rdx, 16(%rdi) 1493 mov %ecx, 24(%rdi) 1494#ifdef USE_AS_STPCPY 1495 lea 28(%rdi), %rax 1496#endif 1497#ifdef USE_AS_STRCAT 1498 xor %ch, %ch 1499 movb %ch, 28(%rdi) 1500#endif 1501 RETURN 1502 1503 .p2align 4 1504L(StrncpyExit29): 1505 movdqu (%rsi), %xmm0 1506 movdqu 13(%rsi), %xmm2 1507 movdqu %xmm0, (%rdi) 1508 movdqu %xmm2, 13(%rdi) 1509#ifdef USE_AS_STPCPY 1510 lea 29(%rdi), %rax 1511#endif 1512#ifdef USE_AS_STRCAT 1513 xor %ch, %ch 1514 movb %ch, 29(%rdi) 1515#endif 1516 RETURN 1517 1518 .p2align 4 1519L(StrncpyExit30): 1520 movdqu (%rsi), %xmm0 1521 movdqu 14(%rsi), %xmm2 1522 movdqu %xmm0, (%rdi) 1523 movdqu %xmm2, 14(%rdi) 1524#ifdef USE_AS_STPCPY 1525 lea 30(%rdi), %rax 1526#endif 1527#ifdef USE_AS_STRCAT 1528 xor %ch, %ch 1529 movb %ch, 30(%rdi) 1530#endif 1531 RETURN 1532 1533 .p2align 4 1534L(StrncpyExit31): 1535 movdqu (%rsi), %xmm0 1536 movdqu 15(%rsi), %xmm2 1537 movdqu %xmm0, (%rdi) 1538 movdqu %xmm2, 15(%rdi) 1539#ifdef USE_AS_STPCPY 1540 lea 31(%rdi), %rax 1541#endif 1542#ifdef USE_AS_STRCAT 1543 xor %ch, %ch 1544 movb %ch, 31(%rdi) 1545#endif 1546 RETURN 1547 1548 .p2align 4 1549L(StrncpyExit32): 1550 movdqu (%rsi), %xmm0 1551 movdqu 16(%rsi), %xmm2 1552 movdqu %xmm0, (%rdi) 1553 movdqu %xmm2, 16(%rdi) 1554#ifdef USE_AS_STPCPY 1555 lea 32(%rdi), %rax 1556#endif 1557#ifdef USE_AS_STRCAT 1558 xor %ch, %ch 1559 movb %ch, 32(%rdi) 1560#endif 1561 RETURN 1562 1563 .p2align 4 1564L(StrncpyExit33): 1565 movdqu (%rsi), %xmm0 1566 movdqu 16(%rsi), %xmm2 1567 mov 32(%rsi), %cl 1568 movdqu %xmm0, (%rdi) 1569 movdqu %xmm2, 16(%rdi) 1570 mov %cl, 32(%rdi) 1571#ifdef USE_AS_STRCAT 1572 xor %ch, %ch 1573 movb %ch, 33(%rdi) 1574#endif 1575 RETURN 1576 1577#ifndef USE_AS_STRCAT 1578 1579 .p2align 4 1580L(Fill0): 1581 RETURN 1582 1583 .p2align 4 1584L(Fill1): 1585 mov %dl, (%rdi) 1586 RETURN 1587 1588 .p2align 4 1589L(Fill2): 1590 mov %dx, (%rdi) 1591 RETURN 1592 1593 .p2align 4 1594L(Fill3): 1595 mov %edx, -1(%rdi) 1596 RETURN 1597 1598 .p2align 4 1599L(Fill4): 1600 mov %edx, (%rdi) 1601 RETURN 1602 1603 .p2align 4 1604L(Fill5): 1605 mov %edx, (%rdi) 1606 mov %dl, 4(%rdi) 1607 RETURN 1608 1609 .p2align 4 1610L(Fill6): 1611 mov %edx, (%rdi) 1612 mov %dx, 4(%rdi) 1613 RETURN 1614 1615 .p2align 4 1616L(Fill7): 1617 mov %rdx, -1(%rdi) 1618 RETURN 1619 1620 .p2align 4 1621L(Fill8): 1622 mov %rdx, (%rdi) 1623 RETURN 1624 1625 .p2align 4 1626L(Fill9): 1627 mov %rdx, (%rdi) 1628 mov %dl, 8(%rdi) 1629 RETURN 1630 1631 .p2align 4 1632L(Fill10): 1633 mov %rdx, (%rdi) 1634 mov %dx, 8(%rdi) 1635 RETURN 1636 1637 .p2align 4 1638L(Fill11): 1639 mov %rdx, (%rdi) 1640 mov %edx, 7(%rdi) 1641 RETURN 1642 1643 .p2align 4 1644L(Fill12): 1645 mov %rdx, (%rdi) 1646 mov %edx, 8(%rdi) 1647 RETURN 1648 1649 .p2align 4 1650L(Fill13): 1651 mov %rdx, (%rdi) 1652 mov %rdx, 5(%rdi) 1653 RETURN 1654 1655 .p2align 4 1656L(Fill14): 1657 mov %rdx, (%rdi) 1658 mov %rdx, 6(%rdi) 1659 RETURN 1660 1661 .p2align 4 1662L(Fill15): 1663 movdqu %xmm0, -1(%rdi) 1664 RETURN 1665 1666 .p2align 4 1667L(Fill16): 1668 movdqu %xmm0, (%rdi) 1669 RETURN 1670 1671 .p2align 4 1672L(CopyFrom1To16BytesUnalignedXmm2): 1673 movdqu %xmm2, (%rdi, %rcx) 1674 1675 .p2align 4 1676L(CopyFrom1To16BytesXmmExit): 1677 bsf %rdx, %rdx 1678 add $15, %r8 1679 add %rcx, %rdi 1680#ifdef USE_AS_STPCPY 1681 lea (%rdi, %rdx), %rax 1682#endif 1683 sub %rdx, %r8 1684 lea 1(%rdi, %rdx), %rdi 1685 1686 .p2align 4 1687L(StrncpyFillTailWithZero): 1688 pxor %xmm0, %xmm0 1689 xor %rdx, %rdx 1690 sub $16, %r8 1691 jbe L(StrncpyFillExit) 1692 1693 movdqu %xmm0, (%rdi) 1694 add $16, %rdi 1695 1696 mov %rdi, %rsi 1697 and $0xf, %rsi 1698 sub %rsi, %rdi 1699 add %rsi, %r8 1700 sub $64, %r8 1701 jb L(StrncpyFillLess64) 1702 1703L(StrncpyFillLoopMovdqa): 1704 movdqa %xmm0, (%rdi) 1705 movdqa %xmm0, 16(%rdi) 1706 movdqa %xmm0, 32(%rdi) 1707 movdqa %xmm0, 48(%rdi) 1708 add $64, %rdi 1709 sub $64, %r8 1710 jae L(StrncpyFillLoopMovdqa) 1711 1712L(StrncpyFillLess64): 1713 add $32, %r8 1714 jl L(StrncpyFillLess32) 1715 movdqa %xmm0, (%rdi) 1716 movdqa %xmm0, 16(%rdi) 1717 add $32, %rdi 1718 sub $16, %r8 1719 jl L(StrncpyFillExit) 1720 movdqa %xmm0, (%rdi) 1721 add $16, %rdi 1722 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4) 1723 1724L(StrncpyFillLess32): 1725 add $16, %r8 1726 jl L(StrncpyFillExit) 1727 movdqa %xmm0, (%rdi) 1728 add $16, %rdi 1729 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4) 1730 1731L(StrncpyFillExit): 1732 add $16, %r8 1733 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4) 1734 1735/* end of ifndef USE_AS_STRCAT */ 1736#endif 1737 1738 .p2align 4 1739L(UnalignedLeaveCase2OrCase3): 1740 test %rdx, %rdx 1741 jnz L(Unaligned64LeaveCase2) 1742L(Unaligned64LeaveCase3): 1743 lea 64(%r8), %rcx 1744 and $-16, %rcx 1745 add $48, %r8 1746 jl L(CopyFrom1To16BytesCase3) 1747 movdqu %xmm4, (%rdi) 1748 sub $16, %r8 1749 jb L(CopyFrom1To16BytesCase3) 1750 movdqu %xmm5, 16(%rdi) 1751 sub $16, %r8 1752 jb L(CopyFrom1To16BytesCase3) 1753 movdqu %xmm6, 32(%rdi) 1754 sub $16, %r8 1755 jb L(CopyFrom1To16BytesCase3) 1756 movdqu %xmm7, 48(%rdi) 1757#ifdef USE_AS_STPCPY 1758 lea 64(%rdi), %rax 1759#endif 1760#ifdef USE_AS_STRCAT 1761 xor %ch, %ch 1762 movb %ch, 64(%rdi) 1763#endif 1764 RETURN 1765 1766 .p2align 4 1767L(Unaligned64LeaveCase2): 1768 xor %rcx, %rcx 1769 pcmpeqb %xmm4, %xmm0 1770 pmovmskb %xmm0, %rdx 1771 add $48, %r8 1772 jle L(CopyFrom1To16BytesCase2OrCase3) 1773 test %rdx, %rdx 1774#ifndef USE_AS_STRCAT 1775 jnz L(CopyFrom1To16BytesUnalignedXmm4) 1776#else 1777 jnz L(CopyFrom1To16Bytes) 1778#endif 1779 pcmpeqb %xmm5, %xmm0 1780 pmovmskb %xmm0, %rdx 1781 movdqu %xmm4, (%rdi) 1782 add $16, %rcx 1783 sub $16, %r8 1784 jbe L(CopyFrom1To16BytesCase2OrCase3) 1785 test %rdx, %rdx 1786#ifndef USE_AS_STRCAT 1787 jnz L(CopyFrom1To16BytesUnalignedXmm5) 1788#else 1789 jnz L(CopyFrom1To16Bytes) 1790#endif 1791 1792 pcmpeqb %xmm6, %xmm0 1793 pmovmskb %xmm0, %rdx 1794 movdqu %xmm5, 16(%rdi) 1795 add $16, %rcx 1796 sub $16, %r8 1797 jbe L(CopyFrom1To16BytesCase2OrCase3) 1798 test %rdx, %rdx 1799#ifndef USE_AS_STRCAT 1800 jnz L(CopyFrom1To16BytesUnalignedXmm6) 1801#else 1802 jnz L(CopyFrom1To16Bytes) 1803#endif 1804 1805 pcmpeqb %xmm7, %xmm0 1806 pmovmskb %xmm0, %rdx 1807 movdqu %xmm6, 32(%rdi) 1808 lea 16(%rdi, %rcx), %rdi 1809 lea 16(%rsi, %rcx), %rsi 1810 bsf %rdx, %rdx 1811 cmp %r8, %rdx 1812 jb L(CopyFrom1To16BytesExit) 1813 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 1814 1815 .p2align 4 1816L(ExitZero): 1817#ifndef USE_AS_STRCAT 1818 mov %rdi, %rax 1819#endif 1820 RETURN 1821 1822#endif 1823 1824#ifndef USE_AS_STRCAT 1825END (STRCPY) 1826#else 1827END (STRCAT) 1828#endif 1829 .p2align 4 1830 .section .rodata 1831L(ExitTable): 1832 .int JMPTBL(L(Exit1), L(ExitTable)) 1833 .int JMPTBL(L(Exit2), L(ExitTable)) 1834 .int JMPTBL(L(Exit3), L(ExitTable)) 1835 .int JMPTBL(L(Exit4), L(ExitTable)) 1836 .int JMPTBL(L(Exit5), L(ExitTable)) 1837 .int JMPTBL(L(Exit6), L(ExitTable)) 1838 .int JMPTBL(L(Exit7), L(ExitTable)) 1839 .int JMPTBL(L(Exit8), L(ExitTable)) 1840 .int JMPTBL(L(Exit9), L(ExitTable)) 1841 .int JMPTBL(L(Exit10), L(ExitTable)) 1842 .int JMPTBL(L(Exit11), L(ExitTable)) 1843 .int JMPTBL(L(Exit12), L(ExitTable)) 1844 .int JMPTBL(L(Exit13), L(ExitTable)) 1845 .int JMPTBL(L(Exit14), L(ExitTable)) 1846 .int JMPTBL(L(Exit15), L(ExitTable)) 1847 .int JMPTBL(L(Exit16), L(ExitTable)) 1848 .int JMPTBL(L(Exit17), L(ExitTable)) 1849 .int JMPTBL(L(Exit18), L(ExitTable)) 1850 .int JMPTBL(L(Exit19), L(ExitTable)) 1851 .int JMPTBL(L(Exit20), L(ExitTable)) 1852 .int JMPTBL(L(Exit21), L(ExitTable)) 1853 .int JMPTBL(L(Exit22), L(ExitTable)) 1854 .int JMPTBL(L(Exit23), L(ExitTable)) 1855 .int JMPTBL(L(Exit24), L(ExitTable)) 1856 .int JMPTBL(L(Exit25), L(ExitTable)) 1857 .int JMPTBL(L(Exit26), L(ExitTable)) 1858 .int JMPTBL(L(Exit27), L(ExitTable)) 1859 .int JMPTBL(L(Exit28), L(ExitTable)) 1860 .int JMPTBL(L(Exit29), L(ExitTable)) 1861 .int JMPTBL(L(Exit30), L(ExitTable)) 1862 .int JMPTBL(L(Exit31), L(ExitTable)) 1863 .int JMPTBL(L(Exit32), L(ExitTable)) 1864#ifdef USE_AS_STRNCPY 1865L(ExitStrncpyTable): 1866 .int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable)) 1867 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable)) 1868 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable)) 1869 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable)) 1870 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable)) 1871 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable)) 1872 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable)) 1873 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable)) 1874 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable)) 1875 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable)) 1876 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable)) 1877 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable)) 1878 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable)) 1879 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable)) 1880 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable)) 1881 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable)) 1882 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable)) 1883 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable)) 1884 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable)) 1885 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable)) 1886 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable)) 1887 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable)) 1888 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable)) 1889 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable)) 1890 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable)) 1891 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable)) 1892 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable)) 1893 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable)) 1894 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable)) 1895 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable)) 1896 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable)) 1897 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable)) 1898 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable)) 1899 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable)) 1900# ifndef USE_AS_STRCAT 1901 .p2align 4 1902L(FillTable): 1903 .int JMPTBL(L(Fill0), L(FillTable)) 1904 .int JMPTBL(L(Fill1), L(FillTable)) 1905 .int JMPTBL(L(Fill2), L(FillTable)) 1906 .int JMPTBL(L(Fill3), L(FillTable)) 1907 .int JMPTBL(L(Fill4), L(FillTable)) 1908 .int JMPTBL(L(Fill5), L(FillTable)) 1909 .int JMPTBL(L(Fill6), L(FillTable)) 1910 .int JMPTBL(L(Fill7), L(FillTable)) 1911 .int JMPTBL(L(Fill8), L(FillTable)) 1912 .int JMPTBL(L(Fill9), L(FillTable)) 1913 .int JMPTBL(L(Fill10), L(FillTable)) 1914 .int JMPTBL(L(Fill11), L(FillTable)) 1915 .int JMPTBL(L(Fill12), L(FillTable)) 1916 .int JMPTBL(L(Fill13), L(FillTable)) 1917 .int JMPTBL(L(Fill14), L(FillTable)) 1918 .int JMPTBL(L(Fill15), L(FillTable)) 1919 .int JMPTBL(L(Fill16), L(FillTable)) 1920# endif 1921#endif 1922