xref: /titanic_52/usr/src/lib/libc/amd64/gen/strcat.s (revision 18c2aff776a775d34a4c9893a4c72e0434d68e36)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.ident	"%Z%%M%	%I%	%E% SMI"
28
29	.file	"%M%"
30
31/
32/ strcat(s1, s2)
33/
34/ Concatenates s2 on the end of s1.  s1's space must be large enough.
35/ Returns s1.
36/
37/ Fast assembly language version of the following C-program strcat
38/ which represents the `standard' for the C-library.
39/
40/	char *
41/	strcat(char *s1, const char *s2)
42/	{
43/		char	*os1 = s1;
44/
45/		while (*s1++)
46/			;
47/		--s1;
48/		while (*s1++ = *s2++)
49/			;
50/		return (os1);
51/	}
52/
53/ In this assembly language version, the following expression is used
54/ to check if a 32-bit word data contains a null byte or not:
55/	(((A & 0x7f7f7f7f) + 0x7f7f7f7f) | A) & 0x80808080
56/ If the above expression geneates a value other than 0x80808080,
57/ that means the 32-bit word data contains a null byte.
58/
59/ The above has been extended for 64-bit support.
60/
61
62#include "SYS.h"
63
64	ENTRY(strcat)	/* (char *s1, char *s2) */
65	/ find a null byte in destination string
66	movq	%rdi,%rax		/ prepare return value
67	movabsq	$0x7f7f7f7f7f7f7f7f, %r8	/ %r8 = 0x7f...
68	movq	%r8, %r9
69	notq	%r9				/ %r9 = 0x80...
70	testq	$7, %rdi		/ if %rdi not quadword aligned
71	jnz	.L1			/ goto .L1
72	.align	4
73.L2:
74	movq	(%rdi), %rdx		/ move 1 quadword from (%rdi) to %rdx
75	movq	%r8, %rcx
76	andq	%rdx, %rcx		/ %rcx = %rdx & 0x7f7f7f7f7f7f7f7f
77	addq	$8, %rdi		/ next quadword
78	addq	%r8, %rcx		/ %rcx += 0x7f7f7f7f7f7f7f7f
79	orq	%rdx, %rcx		/ %rcx |= %rdx
80	andq	%r9, %rcx		/ %rcx &= 0x8080808080808080
81	cmpq	%r9, %rcx		/ if no null byte in this quadword
82	je	.L2			/ goto .L2
83	subq	$8, %rdi		/ post-incremented
84.L1:
85	cmpb	$0, (%rdi)		/ if a byte in (%rdi) is null
86	je	.L3			/ goto .L3
87	incq	%rdi			/ next byte
88	testq	$7, %rdi		/ if %rdi not quadword aligned
89	jnz	.L1			/ goto .L1
90	jmp	.L2			/ goto .L2 (%rdi quadword aligned)
91	.align	4
92.L3:
93	/ %rdi points to a null byte in destination string
94	testq	$7, %rsi		/ if %rsi not quadword aligned
95	jnz	.L4			/ goto .L4
96	.align	4
97.L5:
98	movq	(%rsi), %rdx		/ move 1 quadword from (%rsi) to %rdx
99	movq	%r8, %rcx
100	andq	%rdx, %rcx		/ %rcx = %rdx & 0x7f7f7f7f7f7f7f7f
101	addq	$8, %rsi		/ next quadword
102	addq	%r8, %rcx		/ %rcx += 0x7f7f7f7f7f7f7f7f
103	orq	%rdx, %rcx		/ %rcx |= %rdx
104	andq	%r9, %rcx		/ %rcx &= 0x8080808080808080
105	cmpq	%r9, %rcx		/ if null byte in this quadaword
106	jne	.L7			/ goto .L7
107	movq	%rdx, (%rdi)		/ copy this quadword to (%rdi)
108	addq	$8, %rdi		/ next quadword
109	jmp	.L5			/ goto .L5
110.L7:
111	subq	$8, %rsi		/ post-incremented
112	.align	4
113.L4:
114	movb	(%rsi), %dl		/ %dl = a byte in (%rsi)
115	cmpb	$0, %dl			/ compare %dl with a null byte
116	movb	%dl, (%rdi)		/ copy %dl to (%rdi)
117	je	.L6			/ if %dl is a null, goto .L6
118	incq	%rsi			/ next byte
119	incq	%rdi			/ next byte
120	testq	$7, %rsi		/ if %rsi not word aligned
121	jnz	.L4			/ goto .L4
122	jmp	.L5			/ goto .L5 (%rsi word aligned)
123	.align	4
124.L6:
125	ret
126	SET_SIZE(strcat)
127