xref: /freebsd/lib/libc/string/strlen.c (revision 4d846d260e2b9a3d4d0a701462568268cbfe7a5b)
158f0484fSRodney W. Grimes /*-
2*4d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
3d915a14eSPedro F. Giffuni  *
4dabae226SXin LI  * Copyright (c) 2009, 2010 Xin LI <delphij@FreeBSD.org>
558f0484fSRodney W. Grimes  *
658f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
758f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
858f0484fSRodney W. Grimes  * are met:
958f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1058f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
1158f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
1258f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
1358f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
1458f0484fSRodney W. Grimes  *
154c6a6021SXin LI  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1658f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1758f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
184c6a6021SXin LI  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1958f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2058f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2158f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2258f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2358f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2458f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2558f0484fSRodney W. Grimes  * SUCH DAMAGE.
2658f0484fSRodney W. Grimes  */
2758f0484fSRodney W. Grimes 
2858f0484fSRodney W. Grimes #include <sys/cdefs.h>
29de5fe5d5SDavid E. O'Brien __FBSDID("$FreeBSD$");
30de5fe5d5SDavid E. O'Brien 
314c6a6021SXin LI #include <sys/limits.h>
324c6a6021SXin LI #include <sys/types.h>
3358f0484fSRodney W. Grimes #include <string.h>
3458f0484fSRodney W. Grimes 
354c6a6021SXin LI /*
364c6a6021SXin LI  * Portable strlen() for 32-bit and 64-bit systems.
374c6a6021SXin LI  *
384c6a6021SXin LI  * The expression:
394c6a6021SXin LI  *
404c6a6021SXin LI  *	((x - 0x01....01) & ~x & 0x80....80)
414c6a6021SXin LI  *
424c6a6021SXin LI  * would evaluate to a non-zero value iff any of the bytes in the
43dabae226SXin LI  * original word is zero.
444c6a6021SXin LI  *
45dabae226SXin LI  * The algorithm above is found on "Hacker's Delight" by
46dabae226SXin LI  * Henry S. Warren, Jr.
473acea07cSMateusz Guzik  *
483acea07cSMateusz Guzik  * Note: this leaves performance on the table and each architecture
493acea07cSMateusz Guzik  * would be best served with a tailor made routine instead, even if
503acea07cSMateusz Guzik  * using the same trick.
514c6a6021SXin LI  */
5258f0484fSRodney W. Grimes 
5333f0540bSMateusz Guzik /* Magic numbers for the algorithm */
544c6a6021SXin LI #if LONG_BIT == 32
554c6a6021SXin LI static const unsigned long mask01 = 0x01010101;
564c6a6021SXin LI static const unsigned long mask80 = 0x80808080;
574c6a6021SXin LI #elif LONG_BIT == 64
584c6a6021SXin LI static const unsigned long mask01 = 0x0101010101010101;
594c6a6021SXin LI static const unsigned long mask80 = 0x8080808080808080;
604c6a6021SXin LI #else
614c6a6021SXin LI #error Unsupported word size
624c6a6021SXin LI #endif
634c6a6021SXin LI 
644c6a6021SXin LI #define	LONGPTR_MASK (sizeof(long) - 1)
654c6a6021SXin LI 
6633f0540bSMateusz Guzik /*
6733f0540bSMateusz Guzik  * Helper macro to return string length if we caught the zero
6833f0540bSMateusz Guzik  * byte.
6933f0540bSMateusz Guzik  */
7033f0540bSMateusz Guzik #define testbyte(x)				\
7133f0540bSMateusz Guzik 	do {					\
7233f0540bSMateusz Guzik 		if (p[x] == '\0')		\
7333f0540bSMateusz Guzik 		    return (p - str + x);	\
7433f0540bSMateusz Guzik 	} while (0)
754c6a6021SXin LI 
764c6a6021SXin LI size_t
774c6a6021SXin LI strlen(const char *str)
784c6a6021SXin LI {
7933f0540bSMateusz Guzik 	const char *p;
804c6a6021SXin LI 	const unsigned long *lp;
81dabae226SXin LI 	long va, vb;
824c6a6021SXin LI 
8333f0540bSMateusz Guzik 	/*
8433f0540bSMateusz Guzik 	 * Before trying the hard (unaligned byte-by-byte access) way
8533f0540bSMateusz Guzik 	 * to figure out whether there is a nul character, try to see
8633f0540bSMateusz Guzik 	 * if there is a nul character is within this accessible word
8733f0540bSMateusz Guzik 	 * first.
8833f0540bSMateusz Guzik 	 *
8933f0540bSMateusz Guzik 	 * p and (p & ~LONGPTR_MASK) must be equally accessible since
9033f0540bSMateusz Guzik 	 * they always fall in the same memory page, as long as page
9133f0540bSMateusz Guzik 	 * boundaries is integral multiple of word size.
9233f0540bSMateusz Guzik 	 */
9333f0540bSMateusz Guzik 	lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);
9433f0540bSMateusz Guzik 	va = (*lp - mask01);
9533f0540bSMateusz Guzik 	vb = ((~*lp) & mask80);
9687c88b26SXin LI 	lp++;
9733f0540bSMateusz Guzik 	if (va & vb)
9833f0540bSMateusz Guzik 		/* Check if we have \0 in the first part */
9933f0540bSMateusz Guzik 		for (p = str; p < (const char *)lp; p++)
10033f0540bSMateusz Guzik 			if (*p == '\0')
10133f0540bSMateusz Guzik 				return (p - str);
1024c6a6021SXin LI 
10333f0540bSMateusz Guzik 	/* Scan the rest of the string using word sized operation */
10487c88b26SXin LI 	for (; ; lp++) {
105dabae226SXin LI 		va = (*lp - mask01);
106dabae226SXin LI 		vb = ((~*lp) & mask80);
107dabae226SXin LI 		if (va & vb) {
10833f0540bSMateusz Guzik 			p = (const char *)(lp);
10933f0540bSMateusz Guzik 			testbyte(0);
11033f0540bSMateusz Guzik 			testbyte(1);
11133f0540bSMateusz Guzik 			testbyte(2);
11233f0540bSMateusz Guzik 			testbyte(3);
11333f0540bSMateusz Guzik #if (LONG_BIT >= 64)
11433f0540bSMateusz Guzik 			testbyte(4);
11533f0540bSMateusz Guzik 			testbyte(5);
11633f0540bSMateusz Guzik 			testbyte(6);
11733f0540bSMateusz Guzik 			testbyte(7);
11833f0540bSMateusz Guzik #endif
1194c6a6021SXin LI 		}
120dabae226SXin LI 	}
1214c6a6021SXin LI 
12233f0540bSMateusz Guzik 	/* NOTREACHED */
123481101b8SXin LI 	return (0);
12458f0484fSRodney W. Grimes }
125