xref: /titanic_50/usr/src/lib/libc/amd64/gen/strncat.s (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.ident	"%Z%%M%	%I%	%E% SMI"
28
29	.file	"%M%"
30
31/
32/ strncat(s1, s2, n)
33/
34/ Concatenates s2 on the end of s1.  s1's space must be large enough.
35/ At most n characters are moved.
36/ Returns s1.
37/
38/ Fast assembly language version of the following C-program strncat
39/ which represents the `standard' for the C-library.
40/
41/	char *
42/	strncat(char *s1, const char *s2, size_t n)
43/	{
44/		char	*os1 = s1;
45/
46/		n++;
47/		while (*s1++)
48/			;
49/		--s1;
50/		while (*s1++ = *s2++)
51/			if (--n == 0) {
52/				s1[-1] = '\0';
53/				break;
54/			}
55/		return (os1);
56/	}
57/
58/ In this assembly language version, the following expression is used
59/ to check if a 32-bit word data contains a null byte or not:
60/	(((A & 0x7f7f7f7f) + 0x7f7f7f7f) | A) & 0x80808080
61/ If the above expression geneates a value other than 0x80808080,
62/ that means the 32-bit word data contains a null byte.
63/
64/ The above has been extended for 64-bit support.
65/
66
67#include "SYS.h"
68
69	ENTRY(strncat)		/* (char *, char *, size_t) */
70	movq	%rdi, %rax		/ save return value
71	movabsq	$0x7f7f7f7f7f7f7f7f, %r8	/ %r8 = 0x7f...
72	movq	%r8, %r9
73	notq	%r9				/ %r9 = 0x80...
74	testq	$7, %rdi		/ if %rdi not quadword aligned
75	jnz	.L1			/ goto .L1
76	.align	4
77.L2:
78	movq	(%rdi), %r11		/ move 1 quadword from (%rdi) to %r11
79	movq	%r8, %rcx
80	andq	%r11, %rcx		/ %rcx = %r11 & 0x7f7f7f7f
81	addq	$8, %rdi		/ next quadword
82	addq	%r8, %rcx		/ %rcx += 0x7f7f7f7f
83	orq	%r11, %rcx		/ %rcx |= %r11
84	andq	%r9, %rcx		/ %rcx &= 0x80808080
85	cmpq	%r9, %rcx		/ if no null byte in this quadword
86	je	.L2			/ goto .L2
87	subq	$8, %rdi		/ post-incremented
88.L1:
89	cmpb	$0, (%rdi)		/ if a byte in (%rdi) is null
90	je	.L3			/ goto .L3
91	incq	%rdi			/ next byte
92	testq	$7, %rdi		/ if %rdi not quadword aligned
93	jnz	.L1			/ goto .L1
94	jmp	.L2			/ goto .L2 (%rdi quadword aligned)
95	.align	4
96.L3:
97	/ %rdi points to a null byte in destination string
98
99	testq	$7, %rsi		/ if %rsi not quadword aligned
100	jnz	.L4			/ goto .L4
101	cmpq	$8, %rdx		/ if number of bytes < 8
102	jb	.L7			/ goto .L7
103	.align	4
104.L5:
105	movq	(%rsi), %r11		/ move 1 quadword from (%rsi) to %r11
106	movq	%r8, %rcx
107	andq	%r11, %rcx		/ %rcx = %r11 & 0x7f7f7f7f
108	addq	$8, %rsi		/ next quadword
109	addq	%r8, %rcx		/ %rcx += 0x7f7f7f7f
110	orq	%r11, %rcx		/ %rcx |= %r11
111	andq	%r9, %rcx		/ %rcx &= 0x80808080
112	cmpq	%r9, %rcx		/ if null byte in this quadword
113	jne	.L6			/ goto .L6
114	movq	%r11, (%rdi)		/ copy this quadword to (%rdi)
115	subq	$8, %rdx		/ decrement number of bytes by 8
116	addq	$8, %rdi		/ next quadword
117	cmpq	$8, %rdx		/ if number of bytes >= 8
118	jae	.L5			/ goto .L5
119	jmp	.L7			/ goto .L7
120.L6:
121	subq	$8, %rsi		/ post-incremented
122	.align	4
123.L7:
124	/ number of bytes < 8  or  a null byte found in the quadword
125	cmpq	$0, %rdx		/ if number of bytes == 0
126	jz	.L8			/ goto .L8 (finished)
127	movb	(%rsi), %r11b		/ %r11b = a byte in (%rsi)
128	decq	%rdx			/ decrement number of bytes by 1
129	movb	%r11b, (%rdi)		/ copy %r11b to (%rdi)
130	incq	%rsi			/ next byte
131	incq	%rdi			/ next byte
132	cmpb	$0, %r11b		/ compare %r11b with a null byte
133	je	.L9			/ if %r11b is a null, goto .L9
134	jmp	.L7			/ goto .L7
135	.align	4
136
137.L4:
138	/ %rsi not aligned
139	cmpq	$0, %rdx		/ if number of bytes == 0
140	jz	.L8			/ goto .L8 (finished)
141	movb	(%rsi), %r11b		/ %r11b = a byte in (%rsi)
142	decq	%rdx			/ decrement number of bytes by 1
143	movb	%r11b, (%rdi)		/ copy %r11b to (%rdi)
144	incq	%rdi			/ next byte
145	incq	%rsi			/ next byte
146	cmpb	$0, %r11b		/ compare %r11b with a null byte
147	je	.L9			/ if %r11b is a null, goto .L9
148	jmp	.L4			/ goto .L4
149	.align	4
150.L8:
151	movb	$0, (%rdi)		/ null termination
152.L9:
153	ret
154	SET_SIZE(strncat)
155