xref: /titanic_41/usr/src/lib/libc/amd64/gen/strcpy.s (revision 03494a9880d80f834bec10a1e8f0a2f8f7c97bf4)
1/*
2 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
3 * Use is subject to license terms.
4 */
5
6/*
7 * Copyright (c) 2002 Advanced Micro Devices, Inc.
8 *
9 * All rights reserved.
10 *
11 * Redistribution and  use in source and binary  forms, with or
12 * without  modification,  are   permitted  provided  that  the
13 * following conditions are met:
14 *
15 * + Redistributions  of source  code  must  retain  the  above
16 *   copyright  notice,   this  list  of   conditions  and  the
17 *   following disclaimer.
18 *
19 * + Redistributions  in binary  form must reproduce  the above
20 *   copyright  notice,   this  list  of   conditions  and  the
21 *   following  disclaimer in  the  documentation and/or  other
22 *   materials provided with the distribution.
23 *
24 * + Neither the  name of Advanced Micro Devices,  Inc. nor the
25 *   names  of  its contributors  may  be  used  to endorse  or
26 *   promote  products  derived   from  this  software  without
27 *   specific prior written permission.
28 *
29 * THIS  SOFTWARE  IS PROVIDED  BY  THE  COPYRIGHT HOLDERS  AND
30 * CONTRIBUTORS AS IS AND  ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING,  BUT NOT  LIMITED TO,  THE IMPLIED  WARRANTIES OF
32 * MERCHANTABILITY  AND FITNESS  FOR A  PARTICULAR  PURPOSE ARE
33 * DISCLAIMED.  IN  NO  EVENT  SHALL  ADVANCED  MICRO  DEVICES,
34 * INC.  OR CONTRIBUTORS  BE LIABLE  FOR ANY  DIRECT, INDIRECT,
35 * INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR CONSEQUENTIAL  DAMAGES
36 * (INCLUDING,  BUT NOT LIMITED  TO, PROCUREMENT  OF SUBSTITUTE
37 * GOODS  OR  SERVICES;  LOSS  OF  USE, DATA,  OR  PROFITS;  OR
38 * BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON  ANY THEORY OF
39 * LIABILITY,  WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
40 * (INCLUDING NEGLIGENCE  OR OTHERWISE) ARISING IN  ANY WAY OUT
41 * OF THE  USE  OF  THIS  SOFTWARE, EVEN  IF  ADVISED  OF  THE
42 * POSSIBILITY OF SUCH DAMAGE.
43 *
44 * It is  licensee's responsibility  to comply with  any export
45 * regulations applicable in licensee's jurisdiction.
46 */
47
48	.file	"strcpy.s"
49
50#include "SYS.h"
51#include "cache.h"
52
53#define LABEL(s) .strcpy/**/s
54
55#ifdef USE_AS_STRNCPY
56	ENTRY(strncpy)
57#else
58	ENTRY(strcpy)                        /* (char *, const char *) */
59#endif
60
61#ifdef USE_AS_STRNCPY
62	test	%rdx, %rdx		/* (char *, const char *, size_t) */
63	mov	%rdx, %r11
64	jz	LABEL(exitn)		/* early exit */
65#endif
66
67        xor     %edx, %edx
68
69LABEL(aligntry):
70        mov     %rsi, %r8		/* align by source */
71        and     $7, %r8
72	jz	LABEL(alignafter)
73
74LABEL(align):				/* 8-byte align */
75        sub     $8, %r8
76
77	.p2align 4
78
79LABEL(alignloop):
80#ifdef USE_AS_STRNCPY
81	dec	%r11
82	jl	LABEL(exitn)
83#endif
84
85        mov     (%rsi, %rdx), %al       /* check if same character */
86        test    %al, %al                /* check if character a NUL */
87        mov     %al, (%rdi, %rdx)
88        jz      LABEL(exit)
89
90        inc     %edx
91        inc     %r8
92        jnz     LABEL(alignloop)
93
94#ifdef USE_AS_STRNCPY
95	test	%r11, %r11		/* must check remaining size */
96	jz	LABEL(exitn)		/* If we've already done, exit */
97#endif
98
99	.p2align 4
100
101LABEL(alignafter):
102
103LABEL(8try):
104        mov     $0xfefefefefefefeff, %rcx
105
106LABEL(8):                               /* 8-byte */
107        mov     (%rsi, %rdx), %rax
108
109LABEL(8loop):
110#ifdef USE_AS_STRNCPY
111	sub	$8, %r11
112	jle	LABEL(tail)
113#endif
114
115        mov     %rcx, %r8
116        add     %rax, %r8
117        sbb     %r10, %r10
118
119        xor     %rax, %r8
120        or      %rcx, %r8
121        sub     %r10, %r8
122        jnz     LABEL(tail)
123
124        mov     %rax, (%rdi, %rdx)
125        mov     8 (%rsi, %rdx), %rax
126        add     $8, %edx
127
128#ifdef USE_AS_STRNCPY
129	sub	$8, %r11
130	jle	LABEL(tail)
131#endif
132
133        mov     %rcx, %r8
134        add     %rax, %r8
135        sbb     %r10, %r10
136
137        xor     %rax, %r8
138        or      %rcx, %r8
139        sub     %r10, %r8
140        jnz     LABEL(tail)
141
142        mov     %rax, (%rdi, %rdx)
143        mov     8 (%rsi, %rdx), %rax
144        add     $8, %edx
145
146#ifdef USE_AS_STRNCPY
147	sub	$8, %r11
148	jle	LABEL(tail)
149#endif
150
151        mov     %rcx, %r8
152        add     %rax, %r8
153        sbb     %r10, %r10
154
155        xor     %rax, %r8
156        or      %rcx, %r8
157        sub     %r10, %r8
158        jnz     LABEL(tail)
159
160        mov     %rax, (%rdi, %rdx)
161        mov     8 (%rsi, %rdx), %rax
162        add     $8, %edx
163
164#ifdef USE_AS_STRNCPY
165	sub	$8, %r11
166	jle	LABEL(tail)
167#endif
168
169        mov     %rcx, %r8
170        add     %rax, %r8
171        sbb     %r10, %r10
172
173        xor     %rax, %r8
174        or      %rcx, %r8
175        sub     %r10, %r8
176        jnz     LABEL(tail)
177
178        mov     %rax, (%rdi, %rdx)
179        mov     8 (%rsi, %rdx), %rax
180        add     $8, %edx
181
182#ifdef USE_AS_STRNCPY
183	sub	$8, %r11
184	jle	LABEL(tail)
185#endif
186
187        mov     %rcx, %r8
188        add     %rax, %r8
189        sbb     %r10, %r10
190
191        xor     %rax, %r8
192        or      %rcx, %r8
193        sub     %r10, %r8
194        jnz     LABEL(tail)
195
196        mov     %rax, (%rdi, %rdx)
197        mov     8 (%rsi, %rdx), %rax
198        add     $8, %edx
199
200#ifdef USE_AS_STRNCPY
201	sub	$8, %r11
202	jle	LABEL(tail)
203#endif
204
205        mov     %rcx, %r8
206        add     %rax, %r8
207        sbb     %r10, %r10
208
209        xor     %rax, %r8
210        or      %rcx, %r8
211        sub     %r10, %r8
212        jnz     LABEL(tail)
213
214        mov     %rax, (%rdi, %rdx)
215        mov     8 (%rsi, %rdx), %rax
216        add     $8, %edx
217
218#ifdef USE_AS_STRNCPY
219	sub	$8, %r11
220	jle	LABEL(tail)
221#endif
222
223        mov     %rcx, %r8
224        add     %rax, %r8
225        sbb     %r10, %r10
226
227        xor     %rax, %r8
228        or      %rcx, %r8
229        sub     %r10, %r8
230        jnz     LABEL(tail)
231
232        mov     %rax, (%rdi, %rdx)
233        mov     8 (%rsi, %rdx), %rax
234        add     $8, %edx
235
236#ifdef USE_AS_STRNCPY
237	sub	$8, %r11
238	jle	LABEL(tail)
239#endif
240
241        mov     %rcx, %r8
242        add     %rax, %r8
243        sbb     %r10, %r10
244
245        xor     %rax, %r8
246        or      %rcx, %r8
247        sub     %r10, %r8
248        jnz     LABEL(tail)
249
250        mov     %rax, (%rdi, %rdx)
251        mov     8 (%rsi, %rdx), %rax
252        add     $8, %edx
253
254LABEL(8after):
255
256LABEL(64try):
257        mov     _sref_(.amd64cache1half), %r9
258
259LABEL(64):				/* 64-byte */
260
261        .p2align 4
262
263LABEL(64loop):
264#ifdef USE_AS_STRNCPY
265	sub	$8, %r11
266	jle	LABEL(tail)
267#endif
268
269        mov     %rcx, %r8
270        add     %rax, %r8
271        sbb     %r10, %r10
272
273        xor     %rax, %r8
274        or      %rcx, %r8
275        sub     %r10, %r8
276        jnz     LABEL(tail)
277
278        mov     %rax, (%rdi, %rdx)
279        mov     8 (%rsi, %rdx), %rax
280        add     $8, %edx
281
282#ifdef USE_AS_STRNCPY
283	sub	$8, %r11
284	jle	LABEL(tail)
285#endif
286
287        mov     %rcx, %r8
288        add     %rax, %r8
289        sbb     %r10, %r10
290
291        xor     %rax, %r8
292        or      %rcx, %r8
293        sub     %r10, %r8
294        jnz     LABEL(tail)
295
296        mov     %rax, (%rdi, %rdx)
297        mov     8 (%rsi, %rdx), %rax
298        add     $8, %edx
299
300#ifdef USE_AS_STRNCPY
301	sub	$8, %r11
302	jle	LABEL(tail)
303#endif
304
305        mov     %rcx, %r8
306        add     %rax, %r8
307        sbb     %r10, %r10
308
309        xor     %rax, %r8
310        or      %rcx, %r8
311        sub     %r10, %r8
312        jnz     LABEL(tail)
313
314        mov     %rax, (%rdi, %rdx)
315        mov     8 (%rsi, %rdx), %rax
316        add     $8, %edx
317
318#ifdef USE_AS_STRNCPY
319	sub	$8, %r11
320	jle	LABEL(tail)
321#endif
322
323        mov     %rcx, %r8
324        add     %rax, %r8
325        sbb     %r10, %r10
326
327        xor     %rax, %r8
328        or      %rcx, %r8
329        sub     %r10, %r8
330        jnz     LABEL(tail)
331
332        mov     %rax, (%rdi, %rdx)
333        mov     8 (%rsi, %rdx), %rax
334        add     $8, %edx
335
336#ifdef USE_AS_STRNCPY
337	sub	$8, %r11
338	jle	LABEL(tail)
339#endif
340
341        mov     %rcx, %r8
342        add     %rax, %r8
343        sbb     %r10, %r10
344
345        xor     %rax, %r8
346        or      %rcx, %r8
347        sub     %r10, %r8
348        jnz     LABEL(tail)
349
350        mov     %rax, (%rdi, %rdx)
351        mov     8 (%rsi, %rdx), %rax
352        add     $8, %edx
353
354#ifdef USE_AS_STRNCPY
355	sub	$8, %r11
356	jle	LABEL(tail)
357#endif
358
359        mov     %rcx, %r8
360        add     %rax, %r8
361        sbb     %r10, %r10
362
363        xor     %rax, %r8
364        or      %rcx, %r8
365        sub     %r10, %r8
366        jnz     LABEL(tail)
367
368        mov     %rax, (%rdi, %rdx)
369        mov     8 (%rsi, %rdx), %rax
370        add     $8, %edx
371
372#ifdef USE_AS_STRNCPY
373	sub	$8, %r11
374	jle	LABEL(tail)
375#endif
376
377        mov     %rcx, %r8
378        add     %rax, %r8
379        sbb     %r10, %r10
380
381        xor     %rax, %r8
382        or      %rcx, %r8
383        sub     %r10, %r8
384        jnz     LABEL(tail)
385
386        mov     %rax, (%rdi, %rdx)
387        mov     8 (%rsi, %rdx), %rax
388        add     $8, %edx
389
390#ifdef USE_AS_STRNCPY
391	sub	$8, %r11
392	jle	LABEL(tail)
393#endif
394
395        mov     %rcx, %r8
396        add     %rax, %r8
397        sbb     %r10, %r10
398
399        xor     %rax, %r8
400        or      %rcx, %r8
401        sub     %r10, %r8
402        jnz     LABEL(tail)
403
404        cmp     %r9, %rdx
405
406        mov     %rax, (%rdi, %rdx)
407        mov     8 (%rsi, %rdx), %rax
408        lea     8 (%rdx), %rdx
409
410        jbe     LABEL(64loop)
411
412LABEL(64after):
413
414LABEL(pretry):
415        mov     _sref_(.amd64cache2half), %r9
416
417LABEL(pre):                              /* 64-byte prefetch */
418
419        .p2align 4
420
421LABEL(preloop):
422#ifdef USE_AS_STRNCPY
423	sub	$8, %r11
424	jle	LABEL(tail)
425#endif
426
427        mov     %rcx, %r8
428        add     %rax, %r8
429        sbb     %r10, %r10
430
431        xor     %rax, %r8
432        or      %rcx, %r8
433        sub     %r10, %r8
434        jnz     LABEL(tail)
435
436        mov     %rax, (%rdi, %rdx)
437        mov     8 (%rsi, %rdx), %rax
438        add     $8, %edx
439
440#ifdef USE_AS_STRNCPY
441	sub	$8, %r11
442	jle	LABEL(tail)
443#endif
444
445        mov     %rcx, %r8
446        add     %rax, %r8
447        sbb     %r10, %r10
448
449        xor     %rax, %r8
450        or      %rcx, %r8
451        sub     %r10, %r8
452        jnz     LABEL(tail)
453
454        mov     %rax, (%rdi, %rdx)
455        mov     8 (%rsi, %rdx), %rax
456        add     $8, %edx
457
458#ifdef USE_AS_STRNCPY
459	sub	$8, %r11
460	jle	LABEL(tail)
461#endif
462
463        mov     %rcx, %r8
464        add     %rax, %r8
465        sbb     %r10, %r10
466
467        xor     %rax, %r8
468        or      %rcx, %r8
469        sub     %r10, %r8
470        jnz     LABEL(tail)
471
472        mov     %rax, (%rdi, %rdx)
473        mov     8 (%rsi, %rdx), %rax
474        add     $8, %edx
475
476#ifdef USE_AS_STRNCPY
477	sub	$8, %r11
478	jle	LABEL(tail)
479#endif
480
481        mov     %rcx, %r8
482        add     %rax, %r8
483        sbb     %r10, %r10
484
485        xor     %rax, %r8
486        or      %rcx, %r8
487        sub     %r10, %r8
488        jnz     LABEL(tail)
489
490        mov     %rax, (%rdi, %rdx)
491        mov     8 (%rsi, %rdx), %rax
492        add     $8, %edx
493
494#ifdef USE_AS_STRNCPY
495	sub	$8, %r11
496	jle	LABEL(tail)
497#endif
498
499        mov     %rcx, %r8
500        add     %rax, %r8
501        sbb     %r10, %r10
502
503        xor     %rax, %r8
504        or      %rcx, %r8
505        sub     %r10, %r8
506        jnz     LABEL(tail)
507
508        mov     %rax, (%rdi, %rdx)
509        mov     8 (%rsi, %rdx), %rax
510        add     $8, %edx
511
512#ifdef USE_AS_STRNCPY
513	sub	$8, %r11
514	jle	LABEL(tail)
515#endif
516
517        mov     %rcx, %r8
518        add     %rax, %r8
519        sbb     %r10, %r10
520
521        xor     %rax, %r8
522        or      %rcx, %r8
523        sub     %r10, %r8
524        jnz     LABEL(tail)
525
526        mov     %rax, (%rdi, %rdx)
527        mov     8 (%rsi, %rdx), %rax
528        add     $8, %edx
529
530#ifdef USE_AS_STRNCPY
531	sub	$8, %r11
532	jle	LABEL(tail)
533#endif
534
535        mov     %rcx, %r8
536        add     %rax, %r8
537        sbb     %r10, %r10
538
539        xor     %rax, %r8
540        or      %rcx, %r8
541        sub     %r10, %r8
542        jnz     LABEL(tail)
543
544        mov     %rax, (%rdi, %rdx)
545        mov     8 (%rsi, %rdx), %rax
546        add     $8, %edx
547
548#ifdef USE_AS_STRNCPY
549	sub	$8, %r11
550	jle	LABEL(tail)
551#endif
552
553        mov     %rcx, %r8
554        add     %rax, %r8
555        sbb     %r10, %r10
556
557        xor     %rax, %r8
558        or      %rcx, %r8
559        sub     %r10, %r8
560        jnz     LABEL(tail)
561
562        cmp     %r9, %rdx
563
564        mov     %rax, (%rdi, %rdx)
565        prefetchnta 512 + 8 (%rdi, %rdx)	/* 3DNow: use prefetchw */
566        mov     8 (%rsi, %rdx), %rax
567        prefetchnta 512 + 8 (%rsi, %rdx)	/* 3DNow: use prefetch */
568        lea     8 (%rdx), %rdx
569
570        jb	LABEL(preloop)
571
572        .p2align 4
573
574LABEL(preafter):
575
576LABEL(NTtry):
577	mfence
578
579LABEL(NT):				/* 64-byte NT */
580
581        .p2align 4
582
583LABEL(NTloop):
584#ifdef USE_AS_STRNCPY
585	sub	$8, %r11
586	jle	LABEL(tail)
587#endif
588
589        mov     %rcx, %r8
590        add     %rax, %r8
591        sbb     %r10, %r10
592
593        xor     %rax, %r8
594        or      %rcx, %r8
595        sub     %r10, %r8
596        jnz     LABEL(NTtail)
597
598        movnti  %rax, (%rdi, %rdx)
599        mov     8 (%rsi, %rdx), %rax
600        add     $8, %rdx
601
602#ifdef USE_AS_STRNCPY
603	sub	$8, %r11
604	jle	LABEL(tail)
605#endif
606
607        mov     %rcx, %r8
608        add     %rax, %r8
609        sbb     %r10, %r10
610
611        xor     %rax, %r8
612        or      %rcx, %r8
613        sub     %r10, %r8
614        jnz     LABEL(NTtail)
615
616        movnti  %rax, (%rdi, %rdx)
617        mov     8 (%rsi, %rdx), %rax
618        add     $8, %rdx
619
620#ifdef USE_AS_STRNCPY
621	sub	$8, %r11
622	jle	LABEL(tail)
623#endif
624
625        mov     %rcx, %r8
626        add     %rax, %r8
627        sbb     %r10, %r10
628
629        xor     %rax, %r8
630        or      %rcx, %r8
631        sub     %r10, %r8
632        jnz     LABEL(NTtail)
633
634        movnti  %rax, (%rdi, %rdx)
635        mov     8 (%rsi, %rdx), %rax
636        add     $8, %rdx
637
638#ifdef USE_AS_STRNCPY
639	sub	$8, %r11
640	jle	LABEL(tail)
641#endif
642
643        mov     %rcx, %r8
644        add     %rax, %r8
645        sbb     %r10, %r10
646
647        xor     %rax, %r8
648        or      %rcx, %r8
649        sub     %r10, %r8
650        jnz     LABEL(NTtail)
651
652        movnti  %rax, (%rdi, %rdx)
653        mov     8 (%rsi, %rdx), %rax
654        add     $8, %rdx
655
656#ifdef USE_AS_STRNCPY
657	sub	$8, %r11
658	jle	LABEL(tail)
659#endif
660
661        mov     %rcx, %r8
662        add     %rax, %r8
663        sbb     %r10, %r10
664
665        xor     %rax, %r8
666        or      %rcx, %r8
667        sub     %r10, %r8
668        jnz     LABEL(NTtail)
669
670        movnti  %rax, (%rdi, %rdx)
671        mov     8 (%rsi, %rdx), %rax
672        add     $8, %rdx
673
674#ifdef USE_AS_STRNCPY
675	sub	$8, %r11
676	jle	LABEL(tail)
677#endif
678
679        mov     %rcx, %r8
680        add     %rax, %r8
681        sbb     %r10, %r10
682
683        xor     %rax, %r8
684        or      %rcx, %r8
685        sub     %r10, %r8
686        jnz     LABEL(NTtail)
687
688        movnti  %rax, (%rdi, %rdx)
689        mov     8 (%rsi, %rdx), %rax
690        add     $8, %rdx
691
692#ifdef USE_AS_STRNCPY
693	sub	$8, %r11
694	jle	LABEL(tail)
695#endif
696
697        mov     %rcx, %r8
698        add     %rax, %r8
699        sbb     %r10, %r10
700
701        xor     %rax, %r8
702        or      %rcx, %r8
703        sub     %r10, %r8
704        jnz     LABEL(NTtail)
705
706        movnti  %rax, (%rdi, %rdx)
707        mov     8 (%rsi, %rdx), %rax
708        add     $8, %rdx
709
710#ifdef USE_AS_STRNCPY
711	sub	$8, %r11
712	jle	LABEL(tail)
713#endif
714
715        mov     %rcx, %r8
716        add     %rax, %r8
717        sbb     %r10, %r10
718
719        xor     %rax, %r8
720        or      %rcx, %r8
721        sub     %r10, %r8
722        jnz     LABEL(NTtail)
723
724        movnti  %rax, (%rdi, %rdx)
725	mov     8 (%rsi, %rdx), %rax
726	prefetchnta 768 + 8 (%rsi, %rdx)
727        add     $8, %rdx
728
729        jmp     LABEL(NTloop)
730
731        .p2align 4
732
733LABEL(NTtail):
734	mfence
735
736        .p2align 4
737
738LABEL(NTafter):
739
740LABEL(tailtry):
741
742LABEL(tail):                             /* 1-byte tail */
743#ifdef USE_AS_STRNCPY
744	add	$8, %r11
745#endif
746
747        .p2align 4
748
749LABEL(tailloop):
750#ifdef USE_AS_STRNCPY
751	dec	%r11
752	jl	LABEL(exitn)
753#endif
754
755        test    %al, %al
756        mov     %al, (%rdi, %rdx)
757        jz      LABEL(exit)
758
759        inc     %rdx
760
761#ifdef USE_AS_STRNCPY
762	dec	%r11
763	jl	LABEL(exitn)
764
765	mov	%ah, %al
766#endif
767
768        test    %ah, %ah
769        mov     %ah, (%rdi, %rdx)
770        jz      LABEL(exit)
771
772        inc     %rdx
773
774#ifdef USE_AS_STRNCPY
775	dec	%r11
776	jl	LABEL(exitn)
777#endif
778
779        shr     $16, %rax
780
781        test    %al, %al
782        mov     %al, (%rdi, %rdx)
783        jz      LABEL(exit)
784
785        inc     %rdx
786
787#ifdef USE_AS_STRNCPY
788	dec	%r11
789	jl	LABEL(exitn)
790
791	mov	%ah, %al
792#endif
793
794        test    %ah, %ah
795        mov     %ah, (%rdi, %rdx)
796        jz      LABEL(exit)
797
798        shr     $16, %rax
799        inc     %rdx
800
801        jmp     LABEL(tailloop)
802
803        .p2align 4
804
805LABEL(tailafter):
806
807LABEL(exit):
808#ifdef USE_AS_STRNCPY
809	test	%r11, %r11
810	mov	%r11, %rcx
811
812#ifdef USE_AS_STPCPY
813        lea     (%rdi, %rdx), %r8
814#else
815        mov     %rdi, %r8
816#endif
817
818	jz	2f
819
820	xor	%eax, %eax		/* bzero () would do too, but usually there are only a handfull of bytes left */
821	shr	$3, %rcx
822        lea     1 (%rdi, %rdx), %rdi
823	jz	1f
824
825	rep	stosq
826
8271:
828	mov	%r11d, %ecx
829	and	$7, %ecx
830	jz	2f
831
832        .p2align 4,, 3
833
8343:
835	dec	%ecx
836	mov	%al, (%rdi, %rcx)
837	jnz	3b
838
839        .p2align 4,, 3
840
8412:
842	mov	%r8, %rax
843        ret
844
845#endif
846
847        .p2align 4
848
849LABEL(exitn):
850#ifdef USE_AS_STPCPY
851        lea     (%rdi, %rdx), %rax
852#else
853        mov     %rdi, %rax
854#endif
855
856        ret
857
858#ifdef USE_AS_STRNCPY
859	SET_SIZE(strncpy)
860#else
861	SET_SIZE(strcpy)                        /* (char *, const char *) */
862#endif
863