xref: /freebsd/lib/libc/amd64/string/stpcpy.S (revision 734e82fe33aa764367791a7d603b383996c6b40b)
1/*
2 * Adapted by Guillaume Morin <guillaume@morinfr.org> from strcpy.S
3 * written by J.T. Conklin <jtc@acorntoolworks.com>
4 * Public domain.
5 */
6
7#include <machine/asm.h>
8/*
9 * This stpcpy implementation copies a byte at a time until the
10 * source pointer is aligned to a word boundary, it then copies by
11 * words until it finds a word containing a zero byte, and finally
12 * copies by bytes until the end of the string is reached.
13 *
14 * While this may result in unaligned stores if the source and
15 * destination pointers are unaligned with respect to each other,
16 * it is still faster than either byte copies or the overhead of
17 * an implementation suitable for machines with strict alignment
18 * requirements.
19 */
20
21	.globl	stpcpy,__stpcpy
22ENTRY(stpcpy)
23__stpcpy:
24	movabsq $0x0101010101010101,%r8
25	movabsq $0x8080808080808080,%r9
26
27	/*
28	 * Align source to a word boundary.
29	 * Consider unrolling loop?
30	 */
31.Lalign:
32	testb	$7,%sil
33	je	.Lword_aligned
34	movb	(%rsi),%dl
35	incq	%rsi
36	movb	%dl,(%rdi)
37	incq	%rdi
38	testb	%dl,%dl
39	jne	.Lalign
40	movq	%rdi,%rax
41	dec	%rax
42	ret
43
44	.p2align 4
45.Lloop:
46	movq	%rdx,(%rdi)
47	addq	$8,%rdi
48.Lword_aligned:
49	movq	(%rsi),%rdx
50	movq	%rdx,%rcx
51	addq	$8,%rsi
52	subq	%r8,%rcx
53	testq	%r9,%rcx
54	je	.Lloop
55
56	/*
57	 * In rare cases, the above loop may exit prematurely. We must
58	 * return to the loop if none of the bytes in the word equal 0.
59	 */
60
61	movb	%dl,(%rdi)
62	testb	%dl,%dl		/* 1st byte == 0? */
63	je	.Ldone
64	incq	%rdi
65
66	shrq	$8,%rdx
67	movb	%dl,(%rdi)
68	testb	%dl,%dl		/* 2nd byte == 0? */
69	je	.Ldone
70	incq	%rdi
71
72	shrq	$8,%rdx
73	movb	%dl,(%rdi)
74	testb	%dl,%dl		/* 3rd byte == 0? */
75	je	.Ldone
76	incq	%rdi
77
78	shrq	$8,%rdx
79	movb	%dl,(%rdi)
80	testb	%dl,%dl		/* 4th byte == 0? */
81	je	.Ldone
82	incq	%rdi
83
84	shrq	$8,%rdx
85	movb	%dl,(%rdi)
86	testb	%dl,%dl		/* 5th byte == 0? */
87	je	.Ldone
88	incq	%rdi
89
90	shrq	$8,%rdx
91	movb	%dl,(%rdi)
92	testb	%dl,%dl		/* 6th byte == 0? */
93	je	.Ldone
94	incq	%rdi
95
96	shrq	$8,%rdx
97	movb	%dl,(%rdi)
98	testb	%dl,%dl		/* 7th byte == 0? */
99	je	.Ldone
100	incq	%rdi
101
102	shrq	$8,%rdx
103	movb	%dl,(%rdi)
104	incq	%rdi
105	testb	%dl,%dl		/* 8th byte == 0? */
106	jne	.Lword_aligned
107	decq	%rdi
108
109.Ldone:
110	movq	%rdi,%rax
111	ret
112END(stpcpy)
113
114	.section .note.GNU-stack,"",%progbits
115