xref: /freebsd/lib/libc/string/strlen.c (revision 4c6a60218c9c3ae155159abd672526a5432dacff)
158f0484fSRodney W. Grimes /*-
24c6a6021SXin LI  * Copyright (c) 2009 Xin LI <delphij@FreeBSD.org>
34c6a6021SXin LI  * All rights reserved.
458f0484fSRodney W. Grimes  *
558f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
658f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
758f0484fSRodney W. Grimes  * are met:
858f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
958f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
1058f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
1158f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
1258f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
1358f0484fSRodney W. Grimes  *
144c6a6021SXin LI  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1558f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1658f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
174c6a6021SXin LI  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1858f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1958f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2058f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2158f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2258f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2358f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2458f0484fSRodney W. Grimes  * SUCH DAMAGE.
2558f0484fSRodney W. Grimes  */
2658f0484fSRodney W. Grimes 
2758f0484fSRodney W. Grimes #include <sys/cdefs.h>
28de5fe5d5SDavid E. O'Brien __FBSDID("$FreeBSD$");
29de5fe5d5SDavid E. O'Brien 
304c6a6021SXin LI #include <sys/limits.h>
314c6a6021SXin LI #include <sys/types.h>
3258f0484fSRodney W. Grimes #include <string.h>
3358f0484fSRodney W. Grimes 
344c6a6021SXin LI /*
354c6a6021SXin LI  * Portable strlen() for 32-bit and 64-bit systems.
364c6a6021SXin LI  *
374c6a6021SXin LI  * Rationale: it is generally much more efficient to do word length
384c6a6021SXin LI  * operations and avoid branches on modern computer systems, as
394c6a6021SXin LI  * compared to byte-length operations with a lot of branches.
404c6a6021SXin LI  *
414c6a6021SXin LI  * The expression:
424c6a6021SXin LI  *
434c6a6021SXin LI  *	((x - 0x01....01) & ~x & 0x80....80)
444c6a6021SXin LI  *
454c6a6021SXin LI  * would evaluate to a non-zero value iff any of the bytes in the
464c6a6021SXin LI  * original word is zero.  However, we can further reduce ~1/3 of
474c6a6021SXin LI  * time if we consider that strlen() usually operate on 7-bit ASCII
484c6a6021SXin LI  * by employing the following expression, which allows false positive
494c6a6021SXin LI  * when high bit of 1 and use the tail case to catch these case:
504c6a6021SXin LI  *
514c6a6021SXin LI  *	((x - 0x01....01) & 0x80....80)
524c6a6021SXin LI  *
534c6a6021SXin LI  * This is more than 5.2 times as compared to the raw implementation
544c6a6021SXin LI  * on Intel T7300 under EM64T mode for strings longer than word length.
554c6a6021SXin LI  */
5658f0484fSRodney W. Grimes 
574c6a6021SXin LI /* Magic numbers for the algorithm */
584c6a6021SXin LI #if LONG_BIT == 32
594c6a6021SXin LI static const unsigned long mask01 = 0x01010101;
604c6a6021SXin LI static const unsigned long mask80 = 0x80808080;
614c6a6021SXin LI #elif LONG_BIT == 64
624c6a6021SXin LI static const unsigned long mask01 = 0x0101010101010101;
634c6a6021SXin LI static const unsigned long mask80 = 0x8080808080808080;
644c6a6021SXin LI #else
654c6a6021SXin LI #error Unsupported word size
664c6a6021SXin LI #endif
674c6a6021SXin LI 
684c6a6021SXin LI #define	LONGPTR_MASK (sizeof(long) - 1)
694c6a6021SXin LI 
704c6a6021SXin LI /*
714c6a6021SXin LI  * Helper macro to return string length if we caught the zero
724c6a6021SXin LI  * byte.
734c6a6021SXin LI  */
744c6a6021SXin LI #define testbyte(x)				\
754c6a6021SXin LI 	do {					\
764c6a6021SXin LI 		if (p[x] == '\0')		\
774c6a6021SXin LI 		    return (p - str + x);	\
784c6a6021SXin LI 	} while (0)
794c6a6021SXin LI 
804c6a6021SXin LI size_t
814c6a6021SXin LI strlen(const char *str)
824c6a6021SXin LI {
834c6a6021SXin LI 	const char *p;
844c6a6021SXin LI 	const unsigned long *lp;
854c6a6021SXin LI 
864c6a6021SXin LI 	/* Skip the first few bytes until we have an aligned p */
874c6a6021SXin LI 	for (p = str; (uintptr_t)p & LONGPTR_MASK; p++)
884c6a6021SXin LI 	    if (*p == '\0')
894c6a6021SXin LI 		return (p - str);
904c6a6021SXin LI 
914c6a6021SXin LI 	/* Scan the rest of the string using word sized operation */
924c6a6021SXin LI 	for (lp = (const unsigned long *)p; ; lp++)
934c6a6021SXin LI 	    if ((*lp - mask01) & mask80) {
944c6a6021SXin LI 		p = (const char *)(lp);
954c6a6021SXin LI 		testbyte(0);
964c6a6021SXin LI 		testbyte(1);
974c6a6021SXin LI 		testbyte(2);
984c6a6021SXin LI 		testbyte(3);
994c6a6021SXin LI #if (LONG_BIT >= 64)
1004c6a6021SXin LI 		testbyte(4);
1014c6a6021SXin LI 		testbyte(5);
1024c6a6021SXin LI 		testbyte(6);
1034c6a6021SXin LI 		testbyte(7);
1044c6a6021SXin LI #endif
1054c6a6021SXin LI 	    }
1064c6a6021SXin LI 
1074c6a6021SXin LI 	/* NOTREACHED */
1084c6a6021SXin LI 	return 0;
10958f0484fSRodney W. Grimes }
11058f0484fSRodney W. Grimes 
111