xref: /freebsd/lib/libc/string/strlen.c (revision 559a218c9b257775fb249b67945fe4a05b7a6b9f)
158f0484fSRodney W. Grimes /*-
2*4d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
3d915a14eSPedro F. Giffuni  *
4dabae226SXin LI  * Copyright (c) 2009, 2010 Xin LI <delphij@FreeBSD.org>
558f0484fSRodney W. Grimes  *
658f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
758f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
858f0484fSRodney W. Grimes  * are met:
958f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1058f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
1158f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
1258f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
1358f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
1458f0484fSRodney W. Grimes  *
154c6a6021SXin LI  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1658f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1758f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
184c6a6021SXin LI  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1958f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2058f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2158f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2258f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2358f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2458f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2558f0484fSRodney W. Grimes  * SUCH DAMAGE.
2658f0484fSRodney W. Grimes  */
2758f0484fSRodney W. Grimes 
284c6a6021SXin LI #include <sys/limits.h>
294c6a6021SXin LI #include <sys/types.h>
3058f0484fSRodney W. Grimes #include <string.h>
3158f0484fSRodney W. Grimes 
324c6a6021SXin LI /*
334c6a6021SXin LI  * Portable strlen() for 32-bit and 64-bit systems.
344c6a6021SXin LI  *
354c6a6021SXin LI  * The expression:
364c6a6021SXin LI  *
374c6a6021SXin LI  *	((x - 0x01....01) & ~x & 0x80....80)
384c6a6021SXin LI  *
394c6a6021SXin LI  * would evaluate to a non-zero value iff any of the bytes in the
40dabae226SXin LI  * original word is zero.
414c6a6021SXin LI  *
42dabae226SXin LI  * The algorithm above is found on "Hacker's Delight" by
43dabae226SXin LI  * Henry S. Warren, Jr.
443acea07cSMateusz Guzik  *
453acea07cSMateusz Guzik  * Note: this leaves performance on the table and each architecture
463acea07cSMateusz Guzik  * would be best served with a tailor made routine instead, even if
473acea07cSMateusz Guzik  * using the same trick.
484c6a6021SXin LI  */
4958f0484fSRodney W. Grimes 
5033f0540bSMateusz Guzik /* Magic numbers for the algorithm */
514c6a6021SXin LI #if LONG_BIT == 32
524c6a6021SXin LI static const unsigned long mask01 = 0x01010101;
534c6a6021SXin LI static const unsigned long mask80 = 0x80808080;
544c6a6021SXin LI #elif LONG_BIT == 64
554c6a6021SXin LI static const unsigned long mask01 = 0x0101010101010101;
564c6a6021SXin LI static const unsigned long mask80 = 0x8080808080808080;
574c6a6021SXin LI #else
584c6a6021SXin LI #error Unsupported word size
594c6a6021SXin LI #endif
604c6a6021SXin LI 
614c6a6021SXin LI #define	LONGPTR_MASK (sizeof(long) - 1)
624c6a6021SXin LI 
6333f0540bSMateusz Guzik /*
6433f0540bSMateusz Guzik  * Helper macro to return string length if we caught the zero
6533f0540bSMateusz Guzik  * byte.
6633f0540bSMateusz Guzik  */
6733f0540bSMateusz Guzik #define testbyte(x)				\
6833f0540bSMateusz Guzik 	do {					\
6933f0540bSMateusz Guzik 		if (p[x] == '\0')		\
7033f0540bSMateusz Guzik 		    return (p - str + x);	\
7133f0540bSMateusz Guzik 	} while (0)
724c6a6021SXin LI 
734c6a6021SXin LI size_t
strlen(const char * str)744c6a6021SXin LI strlen(const char *str)
754c6a6021SXin LI {
7633f0540bSMateusz Guzik 	const char *p;
774c6a6021SXin LI 	const unsigned long *lp;
78dabae226SXin LI 	long va, vb;
794c6a6021SXin LI 
8033f0540bSMateusz Guzik 	/*
8133f0540bSMateusz Guzik 	 * Before trying the hard (unaligned byte-by-byte access) way
8233f0540bSMateusz Guzik 	 * to figure out whether there is a nul character, try to see
8333f0540bSMateusz Guzik 	 * if there is a nul character is within this accessible word
8433f0540bSMateusz Guzik 	 * first.
8533f0540bSMateusz Guzik 	 *
8633f0540bSMateusz Guzik 	 * p and (p & ~LONGPTR_MASK) must be equally accessible since
8733f0540bSMateusz Guzik 	 * they always fall in the same memory page, as long as page
8833f0540bSMateusz Guzik 	 * boundaries is integral multiple of word size.
8933f0540bSMateusz Guzik 	 */
9033f0540bSMateusz Guzik 	lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);
9133f0540bSMateusz Guzik 	va = (*lp - mask01);
9233f0540bSMateusz Guzik 	vb = ((~*lp) & mask80);
9387c88b26SXin LI 	lp++;
9433f0540bSMateusz Guzik 	if (va & vb)
9533f0540bSMateusz Guzik 		/* Check if we have \0 in the first part */
9633f0540bSMateusz Guzik 		for (p = str; p < (const char *)lp; p++)
9733f0540bSMateusz Guzik 			if (*p == '\0')
9833f0540bSMateusz Guzik 				return (p - str);
994c6a6021SXin LI 
10033f0540bSMateusz Guzik 	/* Scan the rest of the string using word sized operation */
10187c88b26SXin LI 	for (; ; lp++) {
102dabae226SXin LI 		va = (*lp - mask01);
103dabae226SXin LI 		vb = ((~*lp) & mask80);
104dabae226SXin LI 		if (va & vb) {
10533f0540bSMateusz Guzik 			p = (const char *)(lp);
10633f0540bSMateusz Guzik 			testbyte(0);
10733f0540bSMateusz Guzik 			testbyte(1);
10833f0540bSMateusz Guzik 			testbyte(2);
10933f0540bSMateusz Guzik 			testbyte(3);
11033f0540bSMateusz Guzik #if (LONG_BIT >= 64)
11133f0540bSMateusz Guzik 			testbyte(4);
11233f0540bSMateusz Guzik 			testbyte(5);
11333f0540bSMateusz Guzik 			testbyte(6);
11433f0540bSMateusz Guzik 			testbyte(7);
11533f0540bSMateusz Guzik #endif
1164c6a6021SXin LI 		}
117dabae226SXin LI 	}
1184c6a6021SXin LI 
11933f0540bSMateusz Guzik 	/* NOTREACHED */
120481101b8SXin LI 	return (0);
12158f0484fSRodney W. Grimes }
122