xref: /illumos-gate/usr/src/lib/libc/amd64/gen/strcat.S (revision 79492562b32b5e6bc03e14ad2b51f986335f3709)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26	.file	"strcat.s"
27
28/
29/ strcat(s1, s2)
30/
31/ Concatenates s2 on the end of s1.  s1's space must be large enough.
32/ Returns s1.
33/
34/ Fast assembly language version of the following C-program strcat
35/ which represents the `standard' for the C-library.
36/
37/	char *
38/	strcat(char *s1, const char *s2)
39/	{
40/		char	*os1 = s1;
41/
42/		while (*s1++)
43/			;
44/		--s1;
45/		while (*s1++ = *s2++)
46/			;
47/		return (os1);
48/	}
49/
50/ In this assembly language version, the following expression is used
51/ to check if a 32-bit word data contains a null byte or not:
52/	(((A & 0x7f7f7f7f) + 0x7f7f7f7f) | A) & 0x80808080
53/ If the above expression geneates a value other than 0x80808080,
54/ that means the 32-bit word data contains a null byte.
55/
56/ The above has been extended for 64-bit support.
57/
58
59#include "SYS.h"
60
61	ENTRY(strcat)	/* (char *s1, char *s2) */
62	/ find a null byte in destination string
63	movq	%rdi,%rax		/ prepare return value
64	movabsq	$0x7f7f7f7f7f7f7f7f, %r8	/ %r8 = 0x7f...
65	movq	%r8, %r9
66	notq	%r9				/ %r9 = 0x80...
67	testq	$7, %rdi		/ if %rdi not quadword aligned
68	jnz	.L1			/ goto .L1
69	.align	4
70.L2:
71	movq	(%rdi), %rdx		/ move 1 quadword from (%rdi) to %rdx
72	movq	%r8, %rcx
73	andq	%rdx, %rcx		/ %rcx = %rdx & 0x7f7f7f7f7f7f7f7f
74	addq	$8, %rdi		/ next quadword
75	addq	%r8, %rcx		/ %rcx += 0x7f7f7f7f7f7f7f7f
76	orq	%rdx, %rcx		/ %rcx |= %rdx
77	andq	%r9, %rcx		/ %rcx &= 0x8080808080808080
78	cmpq	%r9, %rcx		/ if no null byte in this quadword
79	je	.L2			/ goto .L2
80	subq	$8, %rdi		/ post-incremented
81.L1:
82	cmpb	$0, (%rdi)		/ if a byte in (%rdi) is null
83	je	.L3			/ goto .L3
84	incq	%rdi			/ next byte
85	testq	$7, %rdi		/ if %rdi not quadword aligned
86	jnz	.L1			/ goto .L1
87	jmp	.L2			/ goto .L2 (%rdi quadword aligned)
88	.align	4
89.L3:
90	/ %rdi points to a null byte in destination string
91	testq	$7, %rsi		/ if %rsi not quadword aligned
92	jnz	.L4			/ goto .L4
93	.align	4
94.L5:
95	movq	(%rsi), %rdx		/ move 1 quadword from (%rsi) to %rdx
96	movq	%r8, %rcx
97	andq	%rdx, %rcx		/ %rcx = %rdx & 0x7f7f7f7f7f7f7f7f
98	addq	$8, %rsi		/ next quadword
99	addq	%r8, %rcx		/ %rcx += 0x7f7f7f7f7f7f7f7f
100	orq	%rdx, %rcx		/ %rcx |= %rdx
101	andq	%r9, %rcx		/ %rcx &= 0x8080808080808080
102	cmpq	%r9, %rcx		/ if null byte in this quadaword
103	jne	.L7			/ goto .L7
104	movq	%rdx, (%rdi)		/ copy this quadword to (%rdi)
105	addq	$8, %rdi		/ next quadword
106	jmp	.L5			/ goto .L5
107.L7:
108	subq	$8, %rsi		/ post-incremented
109	.align	4
110.L4:
111	movb	(%rsi), %dl		/ %dl = a byte in (%rsi)
112	cmpb	$0, %dl			/ compare %dl with a null byte
113	movb	%dl, (%rdi)		/ copy %dl to (%rdi)
114	je	.L6			/ if %dl is a null, goto .L6
115	incq	%rsi			/ next byte
116	incq	%rdi			/ next byte
117	testq	$7, %rsi		/ if %rsi not word aligned
118	jnz	.L4			/ goto .L4
119	jmp	.L5			/ goto .L5 (%rsi word aligned)
120	.align	4
121.L6:
122	ret
123	SET_SIZE(strcat)
124