xref: /titanic_41/usr/src/lib/libc/amd64/gen/strcpy.s (revision ed5289f91b9bf164dccd6c75398362be77a4478d)
1/*
2 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
3 * Use is subject to license terms.
4 */
5
6/*
7 * Copyright (c) 2002 Advanced Micro Devices, Inc.
8 *
9 * All rights reserved.
10 *
11 * Redistribution and  use in source and binary  forms, with or
12 * without  modification,  are   permitted  provided  that  the
13 * following conditions are met:
14 *
15 * + Redistributions  of source  code  must  retain  the  above
16 *   copyright  notice,   this  list  of   conditions  and  the
17 *   following disclaimer.
18 *
19 * + Redistributions  in binary  form must reproduce  the above
20 *   copyright  notice,   this  list  of   conditions  and  the
21 *   following  disclaimer in  the  documentation and/or  other
22 *   materials provided with the distribution.
23 *
24 * + Neither the  name of Advanced Micro Devices,  Inc. nor the
25 *   names  of  its contributors  may  be  used  to endorse  or
26 *   promote  products  derived   from  this  software  without
27 *   specific prior written permission.
28 *
29 * THIS  SOFTWARE  IS PROVIDED  BY  THE  COPYRIGHT HOLDERS  AND
30 * CONTRIBUTORS AS IS AND  ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING,  BUT NOT  LIMITED TO,  THE IMPLIED  WARRANTIES OF
32 * MERCHANTABILITY  AND FITNESS  FOR A  PARTICULAR  PURPOSE ARE
33 * DISCLAIMED.  IN  NO  EVENT  SHALL  ADVANCED  MICRO  DEVICES,
34 * INC.  OR CONTRIBUTORS  BE LIABLE  FOR ANY  DIRECT, INDIRECT,
35 * INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR CONSEQUENTIAL  DAMAGES
36 * (INCLUDING,  BUT NOT LIMITED  TO, PROCUREMENT  OF SUBSTITUTE
37 * GOODS  OR  SERVICES;  LOSS  OF  USE, DATA,  OR  PROFITS;  OR
38 * BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON  ANY THEORY OF
39 * LIABILITY,  WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
40 * (INCLUDING NEGLIGENCE  OR OTHERWISE) ARISING IN  ANY WAY OUT
41 * OF THE  USE  OF  THIS  SOFTWARE, EVEN  IF  ADVISED  OF  THE
42 * POSSIBILITY OF SUCH DAMAGE.
43 *
44 * It is  licensee's responsibility  to comply with  any export
45 * regulations applicable in licensee's jurisdiction.
46 */
47
48	.file	"strcpy.s"
49
50#include "SYS.h"
51#include "cache.h"
52
53#define LABEL(s) .strcpy/**/s
54
55#ifdef USE_AS_STRNCPY
56	ENTRY(strncpy)
57#else
58	ENTRY(strcpy)                        /* (char *, const char *) */
59#endif
60
61#ifdef USE_AS_STRNCPY
62	test	%rdx, %rdx		/* (char *, const char *, size_t) */
63	mov	%rdx, %r11
64	jz	LABEL(exitn)		/* early exit */
65#endif
66
67        xor     %edx, %edx
68
69LABEL(aligntry):
70        mov     %rsi, %r8		/* align by source */
71        and     $7, %r8
72	jz	LABEL(alignafter)
73
74LABEL(align):				/* 8-byte align */
75        sub     $8, %r8
76
77	.p2align 4
78
79LABEL(alignloop):
80#ifdef USE_AS_STRNCPY
81	dec	%r11
82	jl	LABEL(exitn)
83#endif
84
85        mov     (%rsi, %rdx), %al       /* check if same character */
86        test    %al, %al                /* check if character a NUL */
87        mov     %al, (%rdi, %rdx)
88        jz      LABEL(exit)
89
90        inc     %edx
91        inc     %r8
92        jnz     LABEL(alignloop)
93
94	.p2align 4
95
96LABEL(alignafter):
97
98LABEL(8try):
99        mov     $0xfefefefefefefeff, %rcx
100
101LABEL(8):                               /* 8-byte */
102        mov     (%rsi, %rdx), %rax
103
104LABEL(8loop):
105#ifdef USE_AS_STRNCPY
106	sub	$8, %r11
107	jl	LABEL(tail)
108#endif
109
110        mov     %rcx, %r8
111        add     %rax, %r8
112        sbb     %r10, %r10
113
114        xor     %rax, %r8
115        or      %rcx, %r8
116        sub     %r10, %r8
117        jnz     LABEL(tail)
118
119        mov     %rax, (%rdi, %rdx)
120        mov     8 (%rsi, %rdx), %rax
121        add     $8, %edx
122
123#ifdef USE_AS_STRNCPY
124	sub	$8, %r11
125	jl	LABEL(tail)
126#endif
127
128        mov     %rcx, %r8
129        add     %rax, %r8
130        sbb     %r10, %r10
131
132        xor     %rax, %r8
133        or      %rcx, %r8
134        sub     %r10, %r8
135        jnz     LABEL(tail)
136
137        mov     %rax, (%rdi, %rdx)
138        mov     8 (%rsi, %rdx), %rax
139        add     $8, %edx
140
141#ifdef USE_AS_STRNCPY
142	sub	$8, %r11
143	jl	LABEL(tail)
144#endif
145
146        mov     %rcx, %r8
147        add     %rax, %r8
148        sbb     %r10, %r10
149
150        xor     %rax, %r8
151        or      %rcx, %r8
152        sub     %r10, %r8
153        jnz     LABEL(tail)
154
155        mov     %rax, (%rdi, %rdx)
156        mov     8 (%rsi, %rdx), %rax
157        add     $8, %edx
158
159#ifdef USE_AS_STRNCPY
160	sub	$8, %r11
161	jl	LABEL(tail)
162#endif
163
164        mov     %rcx, %r8
165        add     %rax, %r8
166        sbb     %r10, %r10
167
168        xor     %rax, %r8
169        or      %rcx, %r8
170        sub     %r10, %r8
171        jnz     LABEL(tail)
172
173        mov     %rax, (%rdi, %rdx)
174        mov     8 (%rsi, %rdx), %rax
175        add     $8, %edx
176
177#ifdef USE_AS_STRNCPY
178	sub	$8, %r11
179	jl	LABEL(tail)
180#endif
181
182        mov     %rcx, %r8
183        add     %rax, %r8
184        sbb     %r10, %r10
185
186        xor     %rax, %r8
187        or      %rcx, %r8
188        sub     %r10, %r8
189        jnz     LABEL(tail)
190
191        mov     %rax, (%rdi, %rdx)
192        mov     8 (%rsi, %rdx), %rax
193        add     $8, %edx
194
195#ifdef USE_AS_STRNCPY
196	sub	$8, %r11
197	jl	LABEL(tail)
198#endif
199
200        mov     %rcx, %r8
201        add     %rax, %r8
202        sbb     %r10, %r10
203
204        xor     %rax, %r8
205        or      %rcx, %r8
206        sub     %r10, %r8
207        jnz     LABEL(tail)
208
209        mov     %rax, (%rdi, %rdx)
210        mov     8 (%rsi, %rdx), %rax
211        add     $8, %edx
212
213#ifdef USE_AS_STRNCPY
214	sub	$8, %r11
215	jl	LABEL(tail)
216#endif
217
218        mov     %rcx, %r8
219        add     %rax, %r8
220        sbb     %r10, %r10
221
222        xor     %rax, %r8
223        or      %rcx, %r8
224        sub     %r10, %r8
225        jnz     LABEL(tail)
226
227        mov     %rax, (%rdi, %rdx)
228        mov     8 (%rsi, %rdx), %rax
229        add     $8, %edx
230
231#ifdef USE_AS_STRNCPY
232	sub	$8, %r11
233	jl	LABEL(tail)
234#endif
235
236        mov     %rcx, %r8
237        add     %rax, %r8
238        sbb     %r10, %r10
239
240        xor     %rax, %r8
241        or      %rcx, %r8
242        sub     %r10, %r8
243        jnz     LABEL(tail)
244
245        mov     %rax, (%rdi, %rdx)
246        mov     8 (%rsi, %rdx), %rax
247        add     $8, %edx
248
249LABEL(8after):
250
251LABEL(64try):
252        mov     _sref_(.amd64cache1half), %r9
253
254LABEL(64):				/* 64-byte */
255
256        .p2align 4
257
258LABEL(64loop):
259#ifdef USE_AS_STRNCPY
260	sub	$8, %r11
261	jl	LABEL(tail)
262#endif
263
264        mov     %rcx, %r8
265        add     %rax, %r8
266        sbb     %r10, %r10
267
268        xor     %rax, %r8
269        or      %rcx, %r8
270        sub     %r10, %r8
271        jnz     LABEL(tail)
272
273        mov     %rax, (%rdi, %rdx)
274        mov     8 (%rsi, %rdx), %rax
275        add     $8, %edx
276
277#ifdef USE_AS_STRNCPY
278	sub	$8, %r11
279	jl	LABEL(tail)
280#endif
281
282        mov     %rcx, %r8
283        add     %rax, %r8
284        sbb     %r10, %r10
285
286        xor     %rax, %r8
287        or      %rcx, %r8
288        sub     %r10, %r8
289        jnz     LABEL(tail)
290
291        mov     %rax, (%rdi, %rdx)
292        mov     8 (%rsi, %rdx), %rax
293        add     $8, %edx
294
295#ifdef USE_AS_STRNCPY
296	sub	$8, %r11
297	jl	LABEL(tail)
298#endif
299
300        mov     %rcx, %r8
301        add     %rax, %r8
302        sbb     %r10, %r10
303
304        xor     %rax, %r8
305        or      %rcx, %r8
306        sub     %r10, %r8
307        jnz     LABEL(tail)
308
309        mov     %rax, (%rdi, %rdx)
310        mov     8 (%rsi, %rdx), %rax
311        add     $8, %edx
312
313#ifdef USE_AS_STRNCPY
314	sub	$8, %r11
315	jl	LABEL(tail)
316#endif
317
318        mov     %rcx, %r8
319        add     %rax, %r8
320        sbb     %r10, %r10
321
322        xor     %rax, %r8
323        or      %rcx, %r8
324        sub     %r10, %r8
325        jnz     LABEL(tail)
326
327        mov     %rax, (%rdi, %rdx)
328        mov     8 (%rsi, %rdx), %rax
329        add     $8, %edx
330
331#ifdef USE_AS_STRNCPY
332	sub	$8, %r11
333	jl	LABEL(tail)
334#endif
335
336        mov     %rcx, %r8
337        add     %rax, %r8
338        sbb     %r10, %r10
339
340        xor     %rax, %r8
341        or      %rcx, %r8
342        sub     %r10, %r8
343        jnz     LABEL(tail)
344
345        mov     %rax, (%rdi, %rdx)
346        mov     8 (%rsi, %rdx), %rax
347        add     $8, %edx
348
349#ifdef USE_AS_STRNCPY
350	sub	$8, %r11
351	jl	LABEL(tail)
352#endif
353
354        mov     %rcx, %r8
355        add     %rax, %r8
356        sbb     %r10, %r10
357
358        xor     %rax, %r8
359        or      %rcx, %r8
360        sub     %r10, %r8
361        jnz     LABEL(tail)
362
363        mov     %rax, (%rdi, %rdx)
364        mov     8 (%rsi, %rdx), %rax
365        add     $8, %edx
366
367#ifdef USE_AS_STRNCPY
368	sub	$8, %r11
369	jl	LABEL(tail)
370#endif
371
372        mov     %rcx, %r8
373        add     %rax, %r8
374        sbb     %r10, %r10
375
376        xor     %rax, %r8
377        or      %rcx, %r8
378        sub     %r10, %r8
379        jnz     LABEL(tail)
380
381        mov     %rax, (%rdi, %rdx)
382        mov     8 (%rsi, %rdx), %rax
383        add     $8, %edx
384
385#ifdef USE_AS_STRNCPY
386	sub	$8, %r11
387	jl	LABEL(tail)
388#endif
389
390        mov     %rcx, %r8
391        add     %rax, %r8
392        sbb     %r10, %r10
393
394        xor     %rax, %r8
395        or      %rcx, %r8
396        sub     %r10, %r8
397        jnz     LABEL(tail)
398
399        cmp     %r9, %rdx
400
401        mov     %rax, (%rdi, %rdx)
402        mov     8 (%rsi, %rdx), %rax
403        lea     8 (%rdx), %rdx
404
405        jbe     LABEL(64loop)
406
407LABEL(64after):
408
409LABEL(pretry):
410        mov     _sref_(.amd64cache2half), %r9
411
412LABEL(pre):                              /* 64-byte prefetch */
413
414        .p2align 4
415
416LABEL(preloop):
417#ifdef USE_AS_STRNCPY
418	sub	$8, %r11
419	jl	LABEL(tail)
420#endif
421
422        mov     %rcx, %r8
423        add     %rax, %r8
424        sbb     %r10, %r10
425
426        xor     %rax, %r8
427        or      %rcx, %r8
428        sub     %r10, %r8
429        jnz     LABEL(tail)
430
431        mov     %rax, (%rdi, %rdx)
432        mov     8 (%rsi, %rdx), %rax
433        add     $8, %edx
434
435#ifdef USE_AS_STRNCPY
436	sub	$8, %r11
437	jl	LABEL(tail)
438#endif
439
440        mov     %rcx, %r8
441        add     %rax, %r8
442        sbb     %r10, %r10
443
444        xor     %rax, %r8
445        or      %rcx, %r8
446        sub     %r10, %r8
447        jnz     LABEL(tail)
448
449        mov     %rax, (%rdi, %rdx)
450        mov     8 (%rsi, %rdx), %rax
451        add     $8, %edx
452
453#ifdef USE_AS_STRNCPY
454	sub	$8, %r11
455	jl	LABEL(tail)
456#endif
457
458        mov     %rcx, %r8
459        add     %rax, %r8
460        sbb     %r10, %r10
461
462        xor     %rax, %r8
463        or      %rcx, %r8
464        sub     %r10, %r8
465        jnz     LABEL(tail)
466
467        mov     %rax, (%rdi, %rdx)
468        mov     8 (%rsi, %rdx), %rax
469        add     $8, %edx
470
471#ifdef USE_AS_STRNCPY
472	sub	$8, %r11
473	jl	LABEL(tail)
474#endif
475
476        mov     %rcx, %r8
477        add     %rax, %r8
478        sbb     %r10, %r10
479
480        xor     %rax, %r8
481        or      %rcx, %r8
482        sub     %r10, %r8
483        jnz     LABEL(tail)
484
485        mov     %rax, (%rdi, %rdx)
486        mov     8 (%rsi, %rdx), %rax
487        add     $8, %edx
488
489#ifdef USE_AS_STRNCPY
490	sub	$8, %r11
491	jl	LABEL(tail)
492#endif
493
494        mov     %rcx, %r8
495        add     %rax, %r8
496        sbb     %r10, %r10
497
498        xor     %rax, %r8
499        or      %rcx, %r8
500        sub     %r10, %r8
501        jnz     LABEL(tail)
502
503        mov     %rax, (%rdi, %rdx)
504        mov     8 (%rsi, %rdx), %rax
505        add     $8, %edx
506
507#ifdef USE_AS_STRNCPY
508	sub	$8, %r11
509	jl	LABEL(tail)
510#endif
511
512        mov     %rcx, %r8
513        add     %rax, %r8
514        sbb     %r10, %r10
515
516        xor     %rax, %r8
517        or      %rcx, %r8
518        sub     %r10, %r8
519        jnz     LABEL(tail)
520
521        mov     %rax, (%rdi, %rdx)
522        mov     8 (%rsi, %rdx), %rax
523        add     $8, %edx
524
525#ifdef USE_AS_STRNCPY
526	sub	$8, %r11
527	jl	LABEL(tail)
528#endif
529
530        mov     %rcx, %r8
531        add     %rax, %r8
532        sbb     %r10, %r10
533
534        xor     %rax, %r8
535        or      %rcx, %r8
536        sub     %r10, %r8
537        jnz     LABEL(tail)
538
539        mov     %rax, (%rdi, %rdx)
540        mov     8 (%rsi, %rdx), %rax
541        add     $8, %edx
542
543#ifdef USE_AS_STRNCPY
544	sub	$8, %r11
545	jl	LABEL(tail)
546#endif
547
548        mov     %rcx, %r8
549        add     %rax, %r8
550        sbb     %r10, %r10
551
552        xor     %rax, %r8
553        or      %rcx, %r8
554        sub     %r10, %r8
555        jnz     LABEL(tail)
556
557        cmp     %r9, %rdx
558
559        mov     %rax, (%rdi, %rdx)
560        prefetchnta 512 + 8 (%rdi, %rdx)	/* 3DNow: use prefetchw */
561        mov     8 (%rsi, %rdx), %rax
562        prefetchnta 512 + 8 (%rsi, %rdx)	/* 3DNow: use prefetch */
563        lea     8 (%rdx), %rdx
564
565        jb	LABEL(preloop)
566
567        .p2align 4
568
569LABEL(preafter):
570
571LABEL(NTtry):
572	mfence
573
574LABEL(NT):				/* 64-byte NT */
575
576        .p2align 4
577
578LABEL(NTloop):
579#ifdef USE_AS_STRNCPY
580	sub	$8, %r11
581	jl	LABEL(tail)
582#endif
583
584        mov     %rcx, %r8
585        add     %rax, %r8
586        sbb     %r10, %r10
587
588        xor     %rax, %r8
589        or      %rcx, %r8
590        sub     %r10, %r8
591        jnz     LABEL(NTtail)
592
593        movnti  %rax, (%rdi, %rdx)
594        mov     8 (%rsi, %rdx), %rax
595        add     $8, %rdx
596
597#ifdef USE_AS_STRNCPY
598	sub	$8, %r11
599	jl	LABEL(tail)
600#endif
601
602        mov     %rcx, %r8
603        add     %rax, %r8
604        sbb     %r10, %r10
605
606        xor     %rax, %r8
607        or      %rcx, %r8
608        sub     %r10, %r8
609        jnz     LABEL(NTtail)
610
611        movnti  %rax, (%rdi, %rdx)
612        mov     8 (%rsi, %rdx), %rax
613        add     $8, %rdx
614
615#ifdef USE_AS_STRNCPY
616	sub	$8, %r11
617	jl	LABEL(tail)
618#endif
619
620        mov     %rcx, %r8
621        add     %rax, %r8
622        sbb     %r10, %r10
623
624        xor     %rax, %r8
625        or      %rcx, %r8
626        sub     %r10, %r8
627        jnz     LABEL(NTtail)
628
629        movnti  %rax, (%rdi, %rdx)
630        mov     8 (%rsi, %rdx), %rax
631        add     $8, %rdx
632
633#ifdef USE_AS_STRNCPY
634	sub	$8, %r11
635	jl	LABEL(tail)
636#endif
637
638        mov     %rcx, %r8
639        add     %rax, %r8
640        sbb     %r10, %r10
641
642        xor     %rax, %r8
643        or      %rcx, %r8
644        sub     %r10, %r8
645        jnz     LABEL(NTtail)
646
647        movnti  %rax, (%rdi, %rdx)
648        mov     8 (%rsi, %rdx), %rax
649        add     $8, %rdx
650
651#ifdef USE_AS_STRNCPY
652	sub	$8, %r11
653	jl	LABEL(tail)
654#endif
655
656        mov     %rcx, %r8
657        add     %rax, %r8
658        sbb     %r10, %r10
659
660        xor     %rax, %r8
661        or      %rcx, %r8
662        sub     %r10, %r8
663        jnz     LABEL(NTtail)
664
665        movnti  %rax, (%rdi, %rdx)
666        mov     8 (%rsi, %rdx), %rax
667        add     $8, %rdx
668
669#ifdef USE_AS_STRNCPY
670	sub	$8, %r11
671	jl	LABEL(tail)
672#endif
673
674        mov     %rcx, %r8
675        add     %rax, %r8
676        sbb     %r10, %r10
677
678        xor     %rax, %r8
679        or      %rcx, %r8
680        sub     %r10, %r8
681        jnz     LABEL(NTtail)
682
683        movnti  %rax, (%rdi, %rdx)
684        mov     8 (%rsi, %rdx), %rax
685        add     $8, %rdx
686
687#ifdef USE_AS_STRNCPY
688	sub	$8, %r11
689	jl	LABEL(tail)
690#endif
691
692        mov     %rcx, %r8
693        add     %rax, %r8
694        sbb     %r10, %r10
695
696        xor     %rax, %r8
697        or      %rcx, %r8
698        sub     %r10, %r8
699        jnz     LABEL(NTtail)
700
701        movnti  %rax, (%rdi, %rdx)
702        mov     8 (%rsi, %rdx), %rax
703        add     $8, %rdx
704
705#ifdef USE_AS_STRNCPY
706	sub	$8, %r11
707	jl	LABEL(tail)
708#endif
709
710        mov     %rcx, %r8
711        add     %rax, %r8
712        sbb     %r10, %r10
713
714        xor     %rax, %r8
715        or      %rcx, %r8
716        sub     %r10, %r8
717        jnz     LABEL(NTtail)
718
719        movnti  %rax, (%rdi, %rdx)
720	mov     8 (%rsi, %rdx), %rax
721	prefetchnta 768 + 8 (%rsi, %rdx)
722        add     $8, %rdx
723
724        jmp     LABEL(NTloop)
725
726        .p2align 4
727
728LABEL(NTtail):
729	mfence
730
731        .p2align 4
732
733LABEL(NTafter):
734
735LABEL(tailtry):
736
737LABEL(tail):                             /* 1-byte tail */
738#ifdef USE_AS_STRNCPY
739	add	$8, %r11
740#endif
741
742        .p2align 4
743
744LABEL(tailloop):
745#ifdef USE_AS_STRNCPY
746	dec	%r11
747	jl	LABEL(exitn)
748#endif
749
750        test    %al, %al
751        mov     %al, (%rdi, %rdx)
752        jz      LABEL(exit)
753
754        inc     %rdx
755
756#ifdef USE_AS_STRNCPY
757	dec	%r11
758	jl	LABEL(exitn)
759
760	mov	%ah, %al
761#endif
762
763        test    %ah, %ah
764        mov     %ah, (%rdi, %rdx)
765        jz      LABEL(exit)
766
767        inc     %rdx
768
769#ifdef USE_AS_STRNCPY
770	dec	%r11
771	jl	LABEL(exitn)
772#endif
773
774        shr     $16, %rax
775
776        test    %al, %al
777        mov     %al, (%rdi, %rdx)
778        jz      LABEL(exit)
779
780        inc     %rdx
781
782#ifdef USE_AS_STRNCPY
783	dec	%r11
784	jl	LABEL(exitn)
785
786	mov	%ah, %al
787#endif
788
789        test    %ah, %ah
790        mov     %ah, (%rdi, %rdx)
791        jz      LABEL(exit)
792
793        shr     $16, %rax
794        inc     %rdx
795
796        jmp     LABEL(tailloop)
797
798        .p2align 4
799
800LABEL(tailafter):
801
802LABEL(exit):
803#ifdef USE_AS_STRNCPY
804	test	%r11, %r11
805	mov	%r11, %rcx
806
807#ifdef USE_AS_STPCPY
808        lea     (%rdi, %rdx), %r8
809#else
810        mov     %rdi, %r8
811#endif
812
813	jz	2f
814
815	xor	%eax, %eax		/* bzero () would do too, but usually there are only a handfull of bytes left */
816	shr	$3, %rcx
817        lea     1 (%rdi, %rdx), %rdi
818	jz	1f
819
820	rep	stosq
821
8221:
823	mov	%r11d, %ecx
824	and	$7, %ecx
825	jz	2f
826
827        .p2align 4,, 3
828
8293:
830	dec	%ecx
831	mov	%al, (%rdi, %rcx)
832	jnz	3b
833
834        .p2align 4,, 3
835
8362:
837	mov	%r8, %rax
838        ret
839
840#endif
841
842        .p2align 4
843
844LABEL(exitn):
845#ifdef USE_AS_STPCPY
846        lea     (%rdi, %rdx), %rax
847#else
848        mov     %rdi, %rax
849#endif
850
851        ret
852
853#ifdef USE_AS_STRNCPY
854	SET_SIZE(strncpy)
855#else
856	SET_SIZE(strcpy)                        /* (char *, const char *) */
857#endif
858