xref: /freebsd/lib/libc/amd64/string/strcat.S (revision dc60165b73e4c4d829a2cb9fed5cce585e93d9a9)
1/*
2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7__FBSDID("$FreeBSD$");
8
9#if 0
10	RCSID("$NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $")
11#endif
12
13ENTRY(strcat)
14	movq	%rdi,%rax
15	movabsq	$0x0101010101010101,%r8
16	movabsq	$0x8080808080808080,%r9
17
18	/*
19	 * Align destination to word boundary.
20	 * Consider unrolling loop?
21	 */
22.Lscan:
23.Lscan_align:
24	testb	$7,%dil
25	je	.Lscan_aligned
26	cmpb	$0,(%rdi)
27	je	.Lcopy
28	incq	%rdi
29	jmp	.Lscan_align
30
31	.align	4
32.Lscan_aligned:
33.Lscan_loop:
34	movq	(%rdi),%rdx
35	addq	$8,%rdi
36	subq	%r8,%rdx
37	testq	%r9,%rdx
38	je	.Lscan_loop
39
40	/*
41	 * In rare cases, the above loop may exit prematurely. We must
42	 * return to the loop if none of the bytes in the word equal 0.
43	 */
44
45	cmpb	$0,-8(%rdi)	/* 1st byte == 0? */
46	jne	1f
47	subq	$8,%rdi
48	jmp	.Lcopy
49
501:	cmpb	$0,-7(%rdi)	/* 2nd byte == 0? */
51	jne	1f
52	subq	$7,%rdi
53	jmp	.Lcopy
54
551:	cmpb	$0,-6(%rdi)	/* 3rd byte == 0? */
56	jne	1f
57	subq	$6,%rdi
58	jmp	.Lcopy
59
601:	cmpb	$0,-5(%rdi)	/* 4th byte == 0? */
61	jne	1f
62	subq	$5,%rdi
63	jmp	.Lcopy
64
651:	cmpb	$0,-4(%rdi)	/* 5th byte == 0? */
66	jne	1f
67	subq	$4,%rdi
68	jmp	.Lcopy
69
701:	cmpb	$0,-3(%rdi)	/* 6th byte == 0? */
71	jne	1f
72	subq	$3,%rdi
73	jmp	.Lcopy
74
751:	cmpb	$0,-2(%rdi)	/* 7th byte == 0? */
76	jne	1f
77	subq	$2,%rdi
78	jmp	.Lcopy
79
801:	cmpb	$0,-1(%rdi)	/* 8th byte == 0? */
81	jne	.Lscan_loop
82	subq	$1,%rdi
83
84	/*
85	 * Align source to a word boundary.
86	 * Consider unrolling loop?
87	 */
88.Lcopy:
89.Lcopy_align:
90	testb	$7,%sil
91	je	.Lcopy_aligned
92	movb	(%rsi),%dl
93	incq	%rsi
94	movb	%dl,(%rdi)
95	incq	%rdi
96	testb	%dl,%dl
97	jne	.Lcopy_align
98	ret
99
100	.align	4
101.Lcopy_loop:
102	movq	%rdx,(%rdi)
103	addq	$8,%rdi
104.Lcopy_aligned:
105	movq	(%rsi),%rdx
106	movq	%rdx,%rcx
107	addq	$8,%rsi
108	subq	%r8,%rcx
109	testq	%r9,%rcx
110	je	.Lcopy_loop
111
112	/*
113	 * In rare cases, the above loop may exit prematurely. We must
114	 * return to the loop if none of the bytes in the word equal 0.
115	 */
116
117	movb	%dl,(%rdi)
118	incq	%rdi
119	testb	%dl,%dl		/* 1st byte == 0? */
120	je	.Ldone
121
122	shrq	$8,%rdx
123	movb	%dl,(%rdi)
124	incq	%rdi
125	testb	%dl,%dl		/* 2nd byte == 0? */
126	je	.Ldone
127
128	shrq	$8,%rdx
129	movb	%dl,(%rdi)
130	incq	%rdi
131	testb	%dl,%dl		/* 3rd byte == 0? */
132	je	.Ldone
133
134	shrq	$8,%rdx
135	movb	%dl,(%rdi)
136	incq	%rdi
137	testb	%dl,%dl		/* 4th byte == 0? */
138	je	.Ldone
139
140	shrq	$8,%rdx
141	movb	%dl,(%rdi)
142	incq	%rdi
143	testb	%dl,%dl		/* 5th byte == 0? */
144	je	.Ldone
145
146	shrq	$8,%rdx
147	movb	%dl,(%rdi)
148	incq	%rdi
149	testb	%dl,%dl		/* 6th byte == 0? */
150	je	.Ldone
151
152	shrq	$8,%rdx
153	movb	%dl,(%rdi)
154	incq	%rdi
155	testb	%dl,%dl		/* 7th byte == 0? */
156	je	.Ldone
157
158	shrq	$8,%rdx
159	movb	%dl,(%rdi)
160	incq	%rdi
161	testb	%dl,%dl		/* 8th byte == 0? */
162	jne	.Lcopy_aligned
163
164.Ldone:
165	ret
166END(strcat)
167