xref: /titanic_41/usr/src/lib/libc/amd64/gen/strcpy.s (revision 11a8fa6cb17403e630122ac19b39a323c6e64142)
1/*
2 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
3 * Use is subject to license terms.
4 */
5
6/*
7 * Copyright (c) 2002 Advanced Micro Devices, Inc.
8 *
9 * All rights reserved.
10 *
11 * Redistribution and  use in source and binary  forms, with or
12 * without  modification,  are   permitted  provided  that  the
13 * following conditions are met:
14 *
15 * + Redistributions  of source  code  must  retain  the  above
16 *   copyright  notice,   this  list  of   conditions  and  the
17 *   following disclaimer.
18 *
19 * + Redistributions  in binary  form must reproduce  the above
20 *   copyright  notice,   this  list  of   conditions  and  the
21 *   following  disclaimer in  the  documentation and/or  other
22 *   materials provided with the distribution.
23 *
24 * + Neither the  name of Advanced Micro Devices,  Inc. nor the
25 *   names  of  its contributors  may  be  used  to endorse  or
26 *   promote  products  derived   from  this  software  without
27 *   specific prior written permission.
28 *
29 * THIS  SOFTWARE  IS PROVIDED  BY  THE  COPYRIGHT HOLDERS  AND
30 * CONTRIBUTORS AS IS AND  ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING,  BUT NOT  LIMITED TO,  THE IMPLIED  WARRANTIES OF
32 * MERCHANTABILITY  AND FITNESS  FOR A  PARTICULAR  PURPOSE ARE
33 * DISCLAIMED.  IN  NO  EVENT  SHALL  ADVANCED  MICRO  DEVICES,
34 * INC.  OR CONTRIBUTORS  BE LIABLE  FOR ANY  DIRECT, INDIRECT,
35 * INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR CONSEQUENTIAL  DAMAGES
36 * (INCLUDING,  BUT NOT LIMITED  TO, PROCUREMENT  OF SUBSTITUTE
37 * GOODS  OR  SERVICES;  LOSS  OF  USE, DATA,  OR  PROFITS;  OR
38 * BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON  ANY THEORY OF
39 * LIABILITY,  WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
40 * (INCLUDING NEGLIGENCE  OR OTHERWISE) ARISING IN  ANY WAY OUT
41 * OF THE  USE  OF  THIS  SOFTWARE, EVEN  IF  ADVISED  OF  THE
42 * POSSIBILITY OF SUCH DAMAGE.
43 *
44 * It is  licensee's responsibility  to comply with  any export
45 * regulations applicable in licensee's jurisdiction.
46 */
47
48	.ident	"%Z%%M%	%I%	%E% SMI"
49
50	.file	"%M%"
51
52#include "SYS.h"
53#include "cache.h"
54
55#define LABEL(s) .strcpy/**/s
56
57#ifdef USE_AS_STRNCPY
58	ENTRY(strncpy)
59#else
60	ENTRY(strcpy)                        /* (char *, const char *) */
61#endif
62
63#ifdef USE_AS_STRNCPY
64	test	%rdx, %rdx		/* (char *, const char *, size_t) */
65	mov	%rdx, %r11
66	jz	LABEL(exitn)		/* early exit */
67#endif
68
69        xor     %edx, %edx
70
71LABEL(aligntry):
72        mov     %rsi, %r8		/* align by source */
73        and     $7, %r8
74	jz	LABEL(alignafter)
75
76LABEL(align):				/* 8-byte align */
77        sub     $8, %r8
78
79	.p2align 4
80
81LABEL(alignloop):
82#ifdef USE_AS_STRNCPY
83	dec	%r11
84	jl	LABEL(exitn)
85#endif
86
87        mov     (%rsi, %rdx), %al       /* check if same character */
88        test    %al, %al                /* check if character a NUL */
89        mov     %al, (%rdi, %rdx)
90        jz      LABEL(exit)
91
92        inc     %edx
93        inc     %r8
94        jnz     LABEL(alignloop)
95
96	.p2align 4
97
98LABEL(alignafter):
99
100LABEL(8try):
101        mov     $0xfefefefefefefeff, %rcx
102
103LABEL(8):                               /* 8-byte */
104        mov     (%rsi, %rdx), %rax
105
106LABEL(8loop):
107#ifdef USE_AS_STRNCPY
108	sub	$8, %r11
109	jl	LABEL(tail)
110#endif
111
112        mov     %rcx, %r8
113        add     %rax, %r8
114        sbb     %r10, %r10
115
116        xor     %rax, %r8
117        or      %rcx, %r8
118        sub     %r10, %r8
119        jnz     LABEL(tail)
120
121        mov     %rax, (%rdi, %rdx)
122        mov     8 (%rsi, %rdx), %rax
123        add     $8, %edx
124
125#ifdef USE_AS_STRNCPY
126	sub	$8, %r11
127	jl	LABEL(tail)
128#endif
129
130        mov     %rcx, %r8
131        add     %rax, %r8
132        sbb     %r10, %r10
133
134        xor     %rax, %r8
135        or      %rcx, %r8
136        sub     %r10, %r8
137        jnz     LABEL(tail)
138
139        mov     %rax, (%rdi, %rdx)
140        mov     8 (%rsi, %rdx), %rax
141        add     $8, %edx
142
143#ifdef USE_AS_STRNCPY
144	sub	$8, %r11
145	jl	LABEL(tail)
146#endif
147
148        mov     %rcx, %r8
149        add     %rax, %r8
150        sbb     %r10, %r10
151
152        xor     %rax, %r8
153        or      %rcx, %r8
154        sub     %r10, %r8
155        jnz     LABEL(tail)
156
157        mov     %rax, (%rdi, %rdx)
158        mov     8 (%rsi, %rdx), %rax
159        add     $8, %edx
160
161#ifdef USE_AS_STRNCPY
162	sub	$8, %r11
163	jl	LABEL(tail)
164#endif
165
166        mov     %rcx, %r8
167        add     %rax, %r8
168        sbb     %r10, %r10
169
170        xor     %rax, %r8
171        or      %rcx, %r8
172        sub     %r10, %r8
173        jnz     LABEL(tail)
174
175        mov     %rax, (%rdi, %rdx)
176        mov     8 (%rsi, %rdx), %rax
177        add     $8, %edx
178
179#ifdef USE_AS_STRNCPY
180	sub	$8, %r11
181	jl	LABEL(tail)
182#endif
183
184        mov     %rcx, %r8
185        add     %rax, %r8
186        sbb     %r10, %r10
187
188        xor     %rax, %r8
189        or      %rcx, %r8
190        sub     %r10, %r8
191        jnz     LABEL(tail)
192
193        mov     %rax, (%rdi, %rdx)
194        mov     8 (%rsi, %rdx), %rax
195        add     $8, %edx
196
197#ifdef USE_AS_STRNCPY
198	sub	$8, %r11
199	jl	LABEL(tail)
200#endif
201
202        mov     %rcx, %r8
203        add     %rax, %r8
204        sbb     %r10, %r10
205
206        xor     %rax, %r8
207        or      %rcx, %r8
208        sub     %r10, %r8
209        jnz     LABEL(tail)
210
211        mov     %rax, (%rdi, %rdx)
212        mov     8 (%rsi, %rdx), %rax
213        add     $8, %edx
214
215#ifdef USE_AS_STRNCPY
216	sub	$8, %r11
217	jl	LABEL(tail)
218#endif
219
220        mov     %rcx, %r8
221        add     %rax, %r8
222        sbb     %r10, %r10
223
224        xor     %rax, %r8
225        or      %rcx, %r8
226        sub     %r10, %r8
227        jnz     LABEL(tail)
228
229        mov     %rax, (%rdi, %rdx)
230        mov     8 (%rsi, %rdx), %rax
231        add     $8, %edx
232
233#ifdef USE_AS_STRNCPY
234	sub	$8, %r11
235	jl	LABEL(tail)
236#endif
237
238        mov     %rcx, %r8
239        add     %rax, %r8
240        sbb     %r10, %r10
241
242        xor     %rax, %r8
243        or      %rcx, %r8
244        sub     %r10, %r8
245        jnz     LABEL(tail)
246
247        mov     %rax, (%rdi, %rdx)
248        mov     8 (%rsi, %rdx), %rax
249        add     $8, %edx
250
251LABEL(8after):
252
253LABEL(64try):
254        mov     _sref_(.amd64cache1half), %r9
255
256LABEL(64):				/* 64-byte */
257
258        .p2align 4
259
260LABEL(64loop):
261#ifdef USE_AS_STRNCPY
262	sub	$8, %r11
263	jl	LABEL(tail)
264#endif
265
266        mov     %rcx, %r8
267        add     %rax, %r8
268        sbb     %r10, %r10
269
270        xor     %rax, %r8
271        or      %rcx, %r8
272        sub     %r10, %r8
273        jnz     LABEL(tail)
274
275        mov     %rax, (%rdi, %rdx)
276        mov     8 (%rsi, %rdx), %rax
277        add     $8, %edx
278
279#ifdef USE_AS_STRNCPY
280	sub	$8, %r11
281	jl	LABEL(tail)
282#endif
283
284        mov     %rcx, %r8
285        add     %rax, %r8
286        sbb     %r10, %r10
287
288        xor     %rax, %r8
289        or      %rcx, %r8
290        sub     %r10, %r8
291        jnz     LABEL(tail)
292
293        mov     %rax, (%rdi, %rdx)
294        mov     8 (%rsi, %rdx), %rax
295        add     $8, %edx
296
297#ifdef USE_AS_STRNCPY
298	sub	$8, %r11
299	jl	LABEL(tail)
300#endif
301
302        mov     %rcx, %r8
303        add     %rax, %r8
304        sbb     %r10, %r10
305
306        xor     %rax, %r8
307        or      %rcx, %r8
308        sub     %r10, %r8
309        jnz     LABEL(tail)
310
311        mov     %rax, (%rdi, %rdx)
312        mov     8 (%rsi, %rdx), %rax
313        add     $8, %edx
314
315#ifdef USE_AS_STRNCPY
316	sub	$8, %r11
317	jl	LABEL(tail)
318#endif
319
320        mov     %rcx, %r8
321        add     %rax, %r8
322        sbb     %r10, %r10
323
324        xor     %rax, %r8
325        or      %rcx, %r8
326        sub     %r10, %r8
327        jnz     LABEL(tail)
328
329        mov     %rax, (%rdi, %rdx)
330        mov     8 (%rsi, %rdx), %rax
331        add     $8, %edx
332
333#ifdef USE_AS_STRNCPY
334	sub	$8, %r11
335	jl	LABEL(tail)
336#endif
337
338        mov     %rcx, %r8
339        add     %rax, %r8
340        sbb     %r10, %r10
341
342        xor     %rax, %r8
343        or      %rcx, %r8
344        sub     %r10, %r8
345        jnz     LABEL(tail)
346
347        mov     %rax, (%rdi, %rdx)
348        mov     8 (%rsi, %rdx), %rax
349        add     $8, %edx
350
351#ifdef USE_AS_STRNCPY
352	sub	$8, %r11
353	jl	LABEL(tail)
354#endif
355
356        mov     %rcx, %r8
357        add     %rax, %r8
358        sbb     %r10, %r10
359
360        xor     %rax, %r8
361        or      %rcx, %r8
362        sub     %r10, %r8
363        jnz     LABEL(tail)
364
365        mov     %rax, (%rdi, %rdx)
366        mov     8 (%rsi, %rdx), %rax
367        add     $8, %edx
368
369#ifdef USE_AS_STRNCPY
370	sub	$8, %r11
371	jl	LABEL(tail)
372#endif
373
374        mov     %rcx, %r8
375        add     %rax, %r8
376        sbb     %r10, %r10
377
378        xor     %rax, %r8
379        or      %rcx, %r8
380        sub     %r10, %r8
381        jnz     LABEL(tail)
382
383        mov     %rax, (%rdi, %rdx)
384        mov     8 (%rsi, %rdx), %rax
385        add     $8, %edx
386
387#ifdef USE_AS_STRNCPY
388	sub	$8, %r11
389	jl	LABEL(tail)
390#endif
391
392        mov     %rcx, %r8
393        add     %rax, %r8
394        sbb     %r10, %r10
395
396        xor     %rax, %r8
397        or      %rcx, %r8
398        sub     %r10, %r8
399        jnz     LABEL(tail)
400
401        cmp     %r9, %rdx
402
403        mov     %rax, (%rdi, %rdx)
404        mov     8 (%rsi, %rdx), %rax
405        lea     8 (%rdx), %rdx
406
407        jbe     LABEL(64loop)
408
409LABEL(64after):
410
411LABEL(pretry):
412        mov     _sref_(.amd64cache2half), %r9
413
414LABEL(pre):                              /* 64-byte prefetch */
415
416        .p2align 4
417
418LABEL(preloop):
419#ifdef USE_AS_STRNCPY
420	sub	$8, %r11
421	jl	LABEL(tail)
422#endif
423
424        mov     %rcx, %r8
425        add     %rax, %r8
426        sbb     %r10, %r10
427
428        xor     %rax, %r8
429        or      %rcx, %r8
430        sub     %r10, %r8
431        jnz     LABEL(tail)
432
433        mov     %rax, (%rdi, %rdx)
434        mov     8 (%rsi, %rdx), %rax
435        add     $8, %edx
436
437#ifdef USE_AS_STRNCPY
438	sub	$8, %r11
439	jl	LABEL(tail)
440#endif
441
442        mov     %rcx, %r8
443        add     %rax, %r8
444        sbb     %r10, %r10
445
446        xor     %rax, %r8
447        or      %rcx, %r8
448        sub     %r10, %r8
449        jnz     LABEL(tail)
450
451        mov     %rax, (%rdi, %rdx)
452        mov     8 (%rsi, %rdx), %rax
453        add     $8, %edx
454
455#ifdef USE_AS_STRNCPY
456	sub	$8, %r11
457	jl	LABEL(tail)
458#endif
459
460        mov     %rcx, %r8
461        add     %rax, %r8
462        sbb     %r10, %r10
463
464        xor     %rax, %r8
465        or      %rcx, %r8
466        sub     %r10, %r8
467        jnz     LABEL(tail)
468
469        mov     %rax, (%rdi, %rdx)
470        mov     8 (%rsi, %rdx), %rax
471        add     $8, %edx
472
473#ifdef USE_AS_STRNCPY
474	sub	$8, %r11
475	jl	LABEL(tail)
476#endif
477
478        mov     %rcx, %r8
479        add     %rax, %r8
480        sbb     %r10, %r10
481
482        xor     %rax, %r8
483        or      %rcx, %r8
484        sub     %r10, %r8
485        jnz     LABEL(tail)
486
487        mov     %rax, (%rdi, %rdx)
488        mov     8 (%rsi, %rdx), %rax
489        add     $8, %edx
490
491#ifdef USE_AS_STRNCPY
492	sub	$8, %r11
493	jl	LABEL(tail)
494#endif
495
496        mov     %rcx, %r8
497        add     %rax, %r8
498        sbb     %r10, %r10
499
500        xor     %rax, %r8
501        or      %rcx, %r8
502        sub     %r10, %r8
503        jnz     LABEL(tail)
504
505        mov     %rax, (%rdi, %rdx)
506        mov     8 (%rsi, %rdx), %rax
507        add     $8, %edx
508
509#ifdef USE_AS_STRNCPY
510	sub	$8, %r11
511	jl	LABEL(tail)
512#endif
513
514        mov     %rcx, %r8
515        add     %rax, %r8
516        sbb     %r10, %r10
517
518        xor     %rax, %r8
519        or      %rcx, %r8
520        sub     %r10, %r8
521        jnz     LABEL(tail)
522
523        mov     %rax, (%rdi, %rdx)
524        mov     8 (%rsi, %rdx), %rax
525        add     $8, %edx
526
527#ifdef USE_AS_STRNCPY
528	sub	$8, %r11
529	jl	LABEL(tail)
530#endif
531
532        mov     %rcx, %r8
533        add     %rax, %r8
534        sbb     %r10, %r10
535
536        xor     %rax, %r8
537        or      %rcx, %r8
538        sub     %r10, %r8
539        jnz     LABEL(tail)
540
541        mov     %rax, (%rdi, %rdx)
542        mov     8 (%rsi, %rdx), %rax
543        add     $8, %edx
544
545#ifdef USE_AS_STRNCPY
546	sub	$8, %r11
547	jl	LABEL(tail)
548#endif
549
550        mov     %rcx, %r8
551        add     %rax, %r8
552        sbb     %r10, %r10
553
554        xor     %rax, %r8
555        or      %rcx, %r8
556        sub     %r10, %r8
557        jnz     LABEL(tail)
558
559        cmp     %r9, %rdx
560
561        mov     %rax, (%rdi, %rdx)
562        prefetchnta 512 + 8 (%rdi, %rdx)	/* 3DNow: use prefetchw */
563        mov     8 (%rsi, %rdx), %rax
564        prefetchnta 512 + 8 (%rsi, %rdx)	/* 3DNow: use prefetch */
565        lea     8 (%rdx), %rdx
566
567        jb	LABEL(preloop)
568
569        .p2align 4
570
571LABEL(preafter):
572
573LABEL(NTtry):
574	mfence
575
576LABEL(NT):				/* 64-byte NT */
577
578        .p2align 4
579
580LABEL(NTloop):
581#ifdef USE_AS_STRNCPY
582	sub	$8, %r11
583	jl	LABEL(tail)
584#endif
585
586        mov     %rcx, %r8
587        add     %rax, %r8
588        sbb     %r10, %r10
589
590        xor     %rax, %r8
591        or      %rcx, %r8
592        sub     %r10, %r8
593        jnz     LABEL(NTtail)
594
595        movnti  %rax, (%rdi, %rdx)
596        mov     8 (%rsi, %rdx), %rax
597        add     $8, %rdx
598
599#ifdef USE_AS_STRNCPY
600	sub	$8, %r11
601	jl	LABEL(tail)
602#endif
603
604        mov     %rcx, %r8
605        add     %rax, %r8
606        sbb     %r10, %r10
607
608        xor     %rax, %r8
609        or      %rcx, %r8
610        sub     %r10, %r8
611        jnz     LABEL(NTtail)
612
613        movnti  %rax, (%rdi, %rdx)
614        mov     8 (%rsi, %rdx), %rax
615        add     $8, %rdx
616
617#ifdef USE_AS_STRNCPY
618	sub	$8, %r11
619	jl	LABEL(tail)
620#endif
621
622        mov     %rcx, %r8
623        add     %rax, %r8
624        sbb     %r10, %r10
625
626        xor     %rax, %r8
627        or      %rcx, %r8
628        sub     %r10, %r8
629        jnz     LABEL(NTtail)
630
631        movnti  %rax, (%rdi, %rdx)
632        mov     8 (%rsi, %rdx), %rax
633        add     $8, %rdx
634
635#ifdef USE_AS_STRNCPY
636	sub	$8, %r11
637	jl	LABEL(tail)
638#endif
639
640        mov     %rcx, %r8
641        add     %rax, %r8
642        sbb     %r10, %r10
643
644        xor     %rax, %r8
645        or      %rcx, %r8
646        sub     %r10, %r8
647        jnz     LABEL(NTtail)
648
649        movnti  %rax, (%rdi, %rdx)
650        mov     8 (%rsi, %rdx), %rax
651        add     $8, %rdx
652
653#ifdef USE_AS_STRNCPY
654	sub	$8, %r11
655	jl	LABEL(tail)
656#endif
657
658        mov     %rcx, %r8
659        add     %rax, %r8
660        sbb     %r10, %r10
661
662        xor     %rax, %r8
663        or      %rcx, %r8
664        sub     %r10, %r8
665        jnz     LABEL(NTtail)
666
667        movnti  %rax, (%rdi, %rdx)
668        mov     8 (%rsi, %rdx), %rax
669        add     $8, %rdx
670
671#ifdef USE_AS_STRNCPY
672	sub	$8, %r11
673	jl	LABEL(tail)
674#endif
675
676        mov     %rcx, %r8
677        add     %rax, %r8
678        sbb     %r10, %r10
679
680        xor     %rax, %r8
681        or      %rcx, %r8
682        sub     %r10, %r8
683        jnz     LABEL(NTtail)
684
685        movnti  %rax, (%rdi, %rdx)
686        mov     8 (%rsi, %rdx), %rax
687        add     $8, %rdx
688
689#ifdef USE_AS_STRNCPY
690	sub	$8, %r11
691	jl	LABEL(tail)
692#endif
693
694        mov     %rcx, %r8
695        add     %rax, %r8
696        sbb     %r10, %r10
697
698        xor     %rax, %r8
699        or      %rcx, %r8
700        sub     %r10, %r8
701        jnz     LABEL(NTtail)
702
703        movnti  %rax, (%rdi, %rdx)
704        mov     8 (%rsi, %rdx), %rax
705        add     $8, %rdx
706
707#ifdef USE_AS_STRNCPY
708	sub	$8, %r11
709	jl	LABEL(tail)
710#endif
711
712        mov     %rcx, %r8
713        add     %rax, %r8
714        sbb     %r10, %r10
715
716        xor     %rax, %r8
717        or      %rcx, %r8
718        sub     %r10, %r8
719        jnz     LABEL(NTtail)
720
721        movnti  %rax, (%rdi, %rdx)
722	mov     8 (%rsi, %rdx), %rax
723	prefetchnta 768 + 8 (%rsi, %rdx)
724        add     $8, %rdx
725
726        jmp     LABEL(NTloop)
727
728        .p2align 4
729
730LABEL(NTtail):
731	mfence
732
733        .p2align 4
734
735LABEL(NTafter):
736
737LABEL(tailtry):
738
739LABEL(tail):                             /* 1-byte tail */
740#ifdef USE_AS_STRNCPY
741	add	$8, %r11
742#endif
743
744        .p2align 4
745
746LABEL(tailloop):
747#ifdef USE_AS_STRNCPY
748	dec	%r11
749	jl	LABEL(exitn)
750#endif
751
752        test    %al, %al
753        mov     %al, (%rdi, %rdx)
754        jz      LABEL(exit)
755
756        inc     %rdx
757
758#ifdef USE_AS_STRNCPY
759	dec	%r11
760	jl	LABEL(exitn)
761
762	mov	%ah, %al
763#endif
764
765        test    %ah, %ah
766        mov     %ah, (%rdi, %rdx)
767        jz      LABEL(exit)
768
769        inc     %rdx
770
771#ifdef USE_AS_STRNCPY
772	dec	%r11
773	jl	LABEL(exitn)
774#endif
775
776        shr     $16, %rax
777
778        test    %al, %al
779        mov     %al, (%rdi, %rdx)
780        jz      LABEL(exit)
781
782        inc     %rdx
783
784#ifdef USE_AS_STRNCPY
785	dec	%r11
786	jl	LABEL(exitn)
787
788	mov	%ah, %al
789#endif
790
791        test    %ah, %ah
792        mov     %ah, (%rdi, %rdx)
793        jz      LABEL(exit)
794
795        shr     $16, %rax
796        inc     %rdx
797
798        jmp     LABEL(tailloop)
799
800        .p2align 4
801
802LABEL(tailafter):
803
804LABEL(exit):
805#ifdef USE_AS_STRNCPY
806	test	%r11, %r11
807	mov	%r11, %rcx
808
809#ifdef USE_AS_STPCPY
810        lea     (%rdi, %rdx), %r8
811#else
812        mov     %rdi, %r8
813#endif
814
815	jz	2f
816
817	xor	%eax, %eax		/* bzero () would do too, but usually there are only a handfull of bytes left */
818	shr	$3, %rcx
819        lea     1 (%rdi, %rdx), %rdi
820	jz	1f
821
822	rep	stosq
823
8241:
825	mov	%r11d, %ecx
826	and	$7, %ecx
827	jz	2f
828
829        .p2align 4,, 3
830
8313:
832	dec	%ecx
833	mov	%al, (%rdi, %rcx)
834	jnz	3b
835
836        .p2align 4,, 3
837
8382:
839	mov	%r8, %rax
840        ret
841
842#endif
843
844        .p2align 4
845
846LABEL(exitn):
847#ifdef USE_AS_STPCPY
848        lea     (%rdi, %rdx), %rax
849#else
850        mov     %rdi, %rax
851#endif
852
853        ret
854
855#ifdef USE_AS_STRNCPY
856	SET_SIZE(strncpy)
857#else
858	SET_SIZE(strcpy)                        /* (char *, const char *) */
859#endif
860