xref: /illumos-gate/usr/src/lib/libc/sparc/gen/strlen.S (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe
22*5d9d9091SRichard Lowe/*
23*5d9d9091SRichard Lowe * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24*5d9d9091SRichard Lowe * Use is subject to license terms.
25*5d9d9091SRichard Lowe */
26*5d9d9091SRichard Lowe
27*5d9d9091SRichard Lowe	.file	"strlen.s"
28*5d9d9091SRichard Lowe
29*5d9d9091SRichard Lowe/*
30*5d9d9091SRichard Lowe * strlen(s)
31*5d9d9091SRichard Lowe *
32*5d9d9091SRichard Lowe * Given string s, return length (not including the terminating null).
33*5d9d9091SRichard Lowe *
34*5d9d9091SRichard Lowe * Fast assembler language version of the following C-program strlen
35*5d9d9091SRichard Lowe * which represents the `standard' for the C-library.
36*5d9d9091SRichard Lowe *
37*5d9d9091SRichard Lowe *	size_t
38*5d9d9091SRichard Lowe *	strlen(s)
39*5d9d9091SRichard Lowe *	register const char *s;
40*5d9d9091SRichard Lowe *	{
41*5d9d9091SRichard Lowe *		register const char *s0 = s + 1;
42*5d9d9091SRichard Lowe *
43*5d9d9091SRichard Lowe *		while (*s++ != '\0')
44*5d9d9091SRichard Lowe *			;
45*5d9d9091SRichard Lowe *		return (s - s0);
46*5d9d9091SRichard Lowe *	}
47*5d9d9091SRichard Lowe */
48*5d9d9091SRichard Lowe
49*5d9d9091SRichard Lowe#include <sys/asm_linkage.h>
50*5d9d9091SRichard Lowe
51*5d9d9091SRichard Lowe	! The object of strlen is to, as quickly as possible, find the
52*5d9d9091SRichard Lowe	! null byte.  To this end, we attempt to get our string aligned
53*5d9d9091SRichard Lowe	! and then blast across it using Alan Mycroft's algorithm for
54*5d9d9091SRichard Lowe	! finding null bytes. If we are not aligned, the string is
55*5d9d9091SRichard Lowe	! checked a byte at a time until it is.  Once this occurs,
56*5d9d9091SRichard Lowe	! we can proceed word-wise across it.  Once a word with a
57*5d9d9091SRichard Lowe	! zero byte has been found, we then check the word a byte
58*5d9d9091SRichard Lowe	! at a time until we've located the zero byte, and return
59*5d9d9091SRichard Lowe	! the proper length.
60*5d9d9091SRichard Lowe
61*5d9d9091SRichard Lowe	.align 32
62*5d9d9091SRichard Lowe	ENTRY(strlen)
63*5d9d9091SRichard Lowe	andcc		%o0, 3, %o4	! is src word aligned
64*5d9d9091SRichard Lowe	bz,pt		%icc, .nowalgnd
65*5d9d9091SRichard Lowe	mov		%o0, %o2
66*5d9d9091SRichard Lowe
67*5d9d9091SRichard Lowe	cmp		%o4, 2		! is src half-word aligned
68*5d9d9091SRichard Lowe	be,a,pn		%icc, .s2algn
69*5d9d9091SRichard Lowe	lduh		[%o2], %o1
70*5d9d9091SRichard Lowe
71*5d9d9091SRichard Lowe	ldub		[%o2], %o1
72*5d9d9091SRichard Lowe	tst		%o1		! byte zero?
73*5d9d9091SRichard Lowe	bz,pn		%icc, .done
74*5d9d9091SRichard Lowe	cmp		%o4, 3		! src is byte aligned
75*5d9d9091SRichard Lowe
76*5d9d9091SRichard Lowe	be,pn		%icc, .nowalgnd
77*5d9d9091SRichard Lowe	inc		1, %o2
78*5d9d9091SRichard Lowe
79*5d9d9091SRichard Lowe	lduh		[%o2], %o1
80*5d9d9091SRichard Lowe
81*5d9d9091SRichard Lowe.s2algn:
82*5d9d9091SRichard Lowe	srl		%o1, 8, %o4
83*5d9d9091SRichard Lowe	tst		%o4
84*5d9d9091SRichard Lowe	bz,pn		%icc, .done
85*5d9d9091SRichard Lowe	andcc		%o1, 0xff, %g0
86*5d9d9091SRichard Lowe
87*5d9d9091SRichard Lowe	bz,pn		%icc, .done
88*5d9d9091SRichard Lowe	inc		1, %o2
89*5d9d9091SRichard Lowe
90*5d9d9091SRichard Lowe	inc		1, %o2
91*5d9d9091SRichard Lowe
92*5d9d9091SRichard Lowe.nowalgnd:
93*5d9d9091SRichard Lowe	ld		[%o2], %o1
94*5d9d9091SRichard Lowe	sethi		%hi(0x01010101), %o4
95*5d9d9091SRichard Lowe	sethi		%hi(0x80808080), %o5
96*5d9d9091SRichard Lowe	or		%o4, %lo(0x01010101), %o4
97*5d9d9091SRichard Lowe	or		%o5, %lo(0x80808080), %o5
98*5d9d9091SRichard Lowe
99*5d9d9091SRichard Lowe	andn		%o5, %o1, %o3
100*5d9d9091SRichard Lowe	sub		%o1, %o4, %g1
101*5d9d9091SRichard Lowe	andcc		%o3, %g1, %g0
102*5d9d9091SRichard Lowe	bnz,a,pn	%icc, .nullfound
103*5d9d9091SRichard Lowe	sethi		%hi(0xff000000), %o4
104*5d9d9091SRichard Lowe
105*5d9d9091SRichard Lowe	ld		[%o2+4], %o1
106*5d9d9091SRichard Lowe	inc		4, %o2
107*5d9d9091SRichard Lowe
108*5d9d9091SRichard Lowe.loop:						! this should be aligned to 32
109*5d9d9091SRichard Lowe	inc		4, %o2
110*5d9d9091SRichard Lowe	andn		%o5, %o1, %o3		! %o5 = ~word & 0x80808080
111*5d9d9091SRichard Lowe	sub		%o1, %o4, %g1		! %g1 = word - 0x01010101
112*5d9d9091SRichard Lowe	andcc		%o3, %g1, %g0
113*5d9d9091SRichard Lowe	bz,a,pt		%icc, .loop
114*5d9d9091SRichard Lowe	ld		[%o2], %o1
115*5d9d9091SRichard Lowe
116*5d9d9091SRichard Lowe	dec		4, %o2
117*5d9d9091SRichard Lowe	sethi		%hi(0xff000000), %o4
118*5d9d9091SRichard Lowe.nullfound:
119*5d9d9091SRichard Lowe	andcc		%o1, %o4, %g0
120*5d9d9091SRichard Lowe	bz,pn		%icc, .done		! first byte zero
121*5d9d9091SRichard Lowe	srl		%o4, 8, %o4
122*5d9d9091SRichard Lowe
123*5d9d9091SRichard Lowe	andcc		%o1, %o4, %g0
124*5d9d9091SRichard Lowe	bz,pn		%icc, .done		! second byte zero
125*5d9d9091SRichard Lowe	inc		1, %o2
126*5d9d9091SRichard Lowe
127*5d9d9091SRichard Lowe	srl		%o4, 8, %o4
128*5d9d9091SRichard Lowe	andcc		%o1, %o4, %g0
129*5d9d9091SRichard Lowe	bz,pn		%icc, .done		! thrid byte zero
130*5d9d9091SRichard Lowe	inc		1, %o2
131*5d9d9091SRichard Lowe
132*5d9d9091SRichard Lowe	inc		1, %o2			! fourth byte zero
133*5d9d9091SRichard Lowe.done:
134*5d9d9091SRichard Lowe	retl
135*5d9d9091SRichard Lowe	sub		%o2, %o0, %o0
136*5d9d9091SRichard Lowe	SET_SIZE(strlen)
137*5d9d9091SRichard Lowe
138