xref: /illumos-gate/usr/src/lib/libc/amd64/gen/strncat.S (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe/*
22*5d9d9091SRichard Lowe * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
23*5d9d9091SRichard Lowe * Use is subject to license terms.
24*5d9d9091SRichard Lowe */
25*5d9d9091SRichard Lowe
26*5d9d9091SRichard Lowe	.file	"strncat.s"
27*5d9d9091SRichard Lowe
28*5d9d9091SRichard Lowe/
29*5d9d9091SRichard Lowe/ strncat(s1, s2, n)
30*5d9d9091SRichard Lowe/
31*5d9d9091SRichard Lowe/ Concatenates s2 on the end of s1.  s1's space must be large enough.
32*5d9d9091SRichard Lowe/ At most n characters are moved.
33*5d9d9091SRichard Lowe/ Returns s1.
34*5d9d9091SRichard Lowe/
35*5d9d9091SRichard Lowe/ Fast assembly language version of the following C-program strncat
36*5d9d9091SRichard Lowe/ which represents the `standard' for the C-library.
37*5d9d9091SRichard Lowe/
38*5d9d9091SRichard Lowe/	char *
39*5d9d9091SRichard Lowe/	strncat(char *s1, const char *s2, size_t n)
40*5d9d9091SRichard Lowe/	{
41*5d9d9091SRichard Lowe/		char	*os1 = s1;
42*5d9d9091SRichard Lowe/
43*5d9d9091SRichard Lowe/		n++;
44*5d9d9091SRichard Lowe/		while (*s1++)
45*5d9d9091SRichard Lowe/			;
46*5d9d9091SRichard Lowe/		--s1;
47*5d9d9091SRichard Lowe/		while (*s1++ = *s2++)
48*5d9d9091SRichard Lowe/			if (--n == 0) {
49*5d9d9091SRichard Lowe/				s1[-1] = '\0';
50*5d9d9091SRichard Lowe/				break;
51*5d9d9091SRichard Lowe/			}
52*5d9d9091SRichard Lowe/		return (os1);
53*5d9d9091SRichard Lowe/	}
54*5d9d9091SRichard Lowe/
55*5d9d9091SRichard Lowe/ In this assembly language version, the following expression is used
56*5d9d9091SRichard Lowe/ to check if a 32-bit word data contains a null byte or not:
57*5d9d9091SRichard Lowe/	(((A & 0x7f7f7f7f) + 0x7f7f7f7f) | A) & 0x80808080
58*5d9d9091SRichard Lowe/ If the above expression geneates a value other than 0x80808080,
59*5d9d9091SRichard Lowe/ that means the 32-bit word data contains a null byte.
60*5d9d9091SRichard Lowe/
61*5d9d9091SRichard Lowe/ The above has been extended for 64-bit support.
62*5d9d9091SRichard Lowe/
63*5d9d9091SRichard Lowe
64*5d9d9091SRichard Lowe#include "SYS.h"
65*5d9d9091SRichard Lowe
66*5d9d9091SRichard Lowe	ENTRY(strncat)		/* (char *, char *, size_t) */
67*5d9d9091SRichard Lowe	movq	%rdi, %rax		/ save return value
68*5d9d9091SRichard Lowe	movabsq	$0x7f7f7f7f7f7f7f7f, %r8	/ %r8 = 0x7f...
69*5d9d9091SRichard Lowe	movq	%r8, %r9
70*5d9d9091SRichard Lowe	notq	%r9				/ %r9 = 0x80...
71*5d9d9091SRichard Lowe	testq	$7, %rdi		/ if %rdi not quadword aligned
72*5d9d9091SRichard Lowe	jnz	.L1			/ goto .L1
73*5d9d9091SRichard Lowe	.align	4
74*5d9d9091SRichard Lowe.L2:
75*5d9d9091SRichard Lowe	movq	(%rdi), %r11		/ move 1 quadword from (%rdi) to %r11
76*5d9d9091SRichard Lowe	movq	%r8, %rcx
77*5d9d9091SRichard Lowe	andq	%r11, %rcx		/ %rcx = %r11 & 0x7f7f7f7f
78*5d9d9091SRichard Lowe	addq	$8, %rdi		/ next quadword
79*5d9d9091SRichard Lowe	addq	%r8, %rcx		/ %rcx += 0x7f7f7f7f
80*5d9d9091SRichard Lowe	orq	%r11, %rcx		/ %rcx |= %r11
81*5d9d9091SRichard Lowe	andq	%r9, %rcx		/ %rcx &= 0x80808080
82*5d9d9091SRichard Lowe	cmpq	%r9, %rcx		/ if no null byte in this quadword
83*5d9d9091SRichard Lowe	je	.L2			/ goto .L2
84*5d9d9091SRichard Lowe	subq	$8, %rdi		/ post-incremented
85*5d9d9091SRichard Lowe.L1:
86*5d9d9091SRichard Lowe	cmpb	$0, (%rdi)		/ if a byte in (%rdi) is null
87*5d9d9091SRichard Lowe	je	.L3			/ goto .L3
88*5d9d9091SRichard Lowe	incq	%rdi			/ next byte
89*5d9d9091SRichard Lowe	testq	$7, %rdi		/ if %rdi not quadword aligned
90*5d9d9091SRichard Lowe	jnz	.L1			/ goto .L1
91*5d9d9091SRichard Lowe	jmp	.L2			/ goto .L2 (%rdi quadword aligned)
92*5d9d9091SRichard Lowe	.align	4
93*5d9d9091SRichard Lowe.L3:
94*5d9d9091SRichard Lowe	/ %rdi points to a null byte in destination string
95*5d9d9091SRichard Lowe
96*5d9d9091SRichard Lowe	testq	$7, %rsi		/ if %rsi not quadword aligned
97*5d9d9091SRichard Lowe	jnz	.L4			/ goto .L4
98*5d9d9091SRichard Lowe	cmpq	$8, %rdx		/ if number of bytes < 8
99*5d9d9091SRichard Lowe	jb	.L7			/ goto .L7
100*5d9d9091SRichard Lowe	.align	4
101*5d9d9091SRichard Lowe.L5:
102*5d9d9091SRichard Lowe	movq	(%rsi), %r11		/ move 1 quadword from (%rsi) to %r11
103*5d9d9091SRichard Lowe	movq	%r8, %rcx
104*5d9d9091SRichard Lowe	andq	%r11, %rcx		/ %rcx = %r11 & 0x7f7f7f7f
105*5d9d9091SRichard Lowe	addq	$8, %rsi		/ next quadword
106*5d9d9091SRichard Lowe	addq	%r8, %rcx		/ %rcx += 0x7f7f7f7f
107*5d9d9091SRichard Lowe	orq	%r11, %rcx		/ %rcx |= %r11
108*5d9d9091SRichard Lowe	andq	%r9, %rcx		/ %rcx &= 0x80808080
109*5d9d9091SRichard Lowe	cmpq	%r9, %rcx		/ if null byte in this quadword
110*5d9d9091SRichard Lowe	jne	.L6			/ goto .L6
111*5d9d9091SRichard Lowe	movq	%r11, (%rdi)		/ copy this quadword to (%rdi)
112*5d9d9091SRichard Lowe	subq	$8, %rdx		/ decrement number of bytes by 8
113*5d9d9091SRichard Lowe	addq	$8, %rdi		/ next quadword
114*5d9d9091SRichard Lowe	cmpq	$8, %rdx		/ if number of bytes >= 8
115*5d9d9091SRichard Lowe	jae	.L5			/ goto .L5
116*5d9d9091SRichard Lowe	jmp	.L7			/ goto .L7
117*5d9d9091SRichard Lowe.L6:
118*5d9d9091SRichard Lowe	subq	$8, %rsi		/ post-incremented
119*5d9d9091SRichard Lowe	.align	4
120*5d9d9091SRichard Lowe.L7:
121*5d9d9091SRichard Lowe	/ number of bytes < 8  or  a null byte found in the quadword
122*5d9d9091SRichard Lowe	cmpq	$0, %rdx		/ if number of bytes == 0
123*5d9d9091SRichard Lowe	jz	.L8			/ goto .L8 (finished)
124*5d9d9091SRichard Lowe	movb	(%rsi), %r11b		/ %r11b = a byte in (%rsi)
125*5d9d9091SRichard Lowe	decq	%rdx			/ decrement number of bytes by 1
126*5d9d9091SRichard Lowe	movb	%r11b, (%rdi)		/ copy %r11b to (%rdi)
127*5d9d9091SRichard Lowe	incq	%rsi			/ next byte
128*5d9d9091SRichard Lowe	incq	%rdi			/ next byte
129*5d9d9091SRichard Lowe	cmpb	$0, %r11b		/ compare %r11b with a null byte
130*5d9d9091SRichard Lowe	je	.L9			/ if %r11b is a null, goto .L9
131*5d9d9091SRichard Lowe	jmp	.L7			/ goto .L7
132*5d9d9091SRichard Lowe	.align	4
133*5d9d9091SRichard Lowe
134*5d9d9091SRichard Lowe.L4:
135*5d9d9091SRichard Lowe	/ %rsi not aligned
136*5d9d9091SRichard Lowe	cmpq	$0, %rdx		/ if number of bytes == 0
137*5d9d9091SRichard Lowe	jz	.L8			/ goto .L8 (finished)
138*5d9d9091SRichard Lowe	movb	(%rsi), %r11b		/ %r11b = a byte in (%rsi)
139*5d9d9091SRichard Lowe	decq	%rdx			/ decrement number of bytes by 1
140*5d9d9091SRichard Lowe	movb	%r11b, (%rdi)		/ copy %r11b to (%rdi)
141*5d9d9091SRichard Lowe	incq	%rdi			/ next byte
142*5d9d9091SRichard Lowe	incq	%rsi			/ next byte
143*5d9d9091SRichard Lowe	cmpb	$0, %r11b		/ compare %r11b with a null byte
144*5d9d9091SRichard Lowe	je	.L9			/ if %r11b is a null, goto .L9
145*5d9d9091SRichard Lowe	jmp	.L4			/ goto .L4
146*5d9d9091SRichard Lowe	.align	4
147*5d9d9091SRichard Lowe.L8:
148*5d9d9091SRichard Lowe	movb	$0, (%rdi)		/ null termination
149*5d9d9091SRichard Lowe.L9:
150*5d9d9091SRichard Lowe	ret
151*5d9d9091SRichard Lowe	SET_SIZE(strncat)
152