xref: /freebsd/sys/libkern/strlen.c (revision 685dc743dc3b5645e34836464128e1c0558b404b)
1df8bae1dSRodney W. Grimes /*-
2*4d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
38a36da99SPedro F. Giffuni  *
46afdae41SXin LI  * Copyright (c) 2009, 2010 Xin LI <delphij@FreeBSD.org>
5df8bae1dSRodney W. Grimes  *
6df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
7df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
8df8bae1dSRodney W. Grimes  * are met:
9df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
10df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
11df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
12df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
13df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
14df8bae1dSRodney W. Grimes  *
156afdae41SXin LI  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
186afdae41SXin LI  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
26df8bae1dSRodney W. Grimes  */
27df8bae1dSRodney W. Grimes 
28ab0de15bSDavid E. O'Brien #include <sys/cdefs.h>
29452bffb2SJohn Baldwin #include <sys/libkern.h>
306afdae41SXin LI #include <sys/limits.h>
316afdae41SXin LI 
326afdae41SXin LI /*
336afdae41SXin LI  * Portable strlen() for 32-bit and 64-bit systems.
346afdae41SXin LI  *
356afdae41SXin LI  * The expression:
366afdae41SXin LI  *
376afdae41SXin LI  *	((x - 0x01....01) & ~x & 0x80....80)
386afdae41SXin LI  *
396afdae41SXin LI  * would evaluate to a non-zero value iff any of the bytes in the
406afdae41SXin LI  * original word is zero.
416afdae41SXin LI  *
426afdae41SXin LI  * The algorithm above is found on "Hacker's Delight" by
436afdae41SXin LI  * Henry S. Warren, Jr.
443acea07cSMateusz Guzik  *
453acea07cSMateusz Guzik  * Note: this leaves performance on the table and each architecture
463acea07cSMateusz Guzik  * would be best served with a tailor made routine instead, even if
473acea07cSMateusz Guzik  * using the same trick.
486afdae41SXin LI  */
496afdae41SXin LI 
5033f0540bSMateusz Guzik /* Magic numbers for the algorithm */
516afdae41SXin LI #if LONG_BIT == 32
526afdae41SXin LI static const unsigned long mask01 = 0x01010101;
536afdae41SXin LI static const unsigned long mask80 = 0x80808080;
546afdae41SXin LI #elif LONG_BIT == 64
556afdae41SXin LI static const unsigned long mask01 = 0x0101010101010101;
566afdae41SXin LI static const unsigned long mask80 = 0x8080808080808080;
576afdae41SXin LI #else
586afdae41SXin LI #error Unsupported word size
596afdae41SXin LI #endif
606afdae41SXin LI 
616afdae41SXin LI #define	LONGPTR_MASK (sizeof(long) - 1)
626afdae41SXin LI 
6333f0540bSMateusz Guzik /*
6433f0540bSMateusz Guzik  * Helper macro to return string length if we caught the zero
6533f0540bSMateusz Guzik  * byte.
6633f0540bSMateusz Guzik  */
6733f0540bSMateusz Guzik #define testbyte(x)				\
6833f0540bSMateusz Guzik 	do {					\
6933f0540bSMateusz Guzik 		if (p[x] == '\0')		\
7033f0540bSMateusz Guzik 		    return (p - str + x);	\
7133f0540bSMateusz Guzik 	} while (0)
72df8bae1dSRodney W. Grimes 
size_t(strlen)73df8bae1dSRodney W. Grimes size_t
74849aef49SAndrew Turner (strlen)(const char *str)
75df8bae1dSRodney W. Grimes {
7633f0540bSMateusz Guzik 	const char *p;
776afdae41SXin LI 	const unsigned long *lp;
786afdae41SXin LI 	long va, vb;
79df8bae1dSRodney W. Grimes 
8033f0540bSMateusz Guzik 	/*
8133f0540bSMateusz Guzik 	 * Before trying the hard (unaligned byte-by-byte access) way
8233f0540bSMateusz Guzik 	 * to figure out whether there is a nul character, try to see
8333f0540bSMateusz Guzik 	 * if there is a nul character is within this accessible word
8433f0540bSMateusz Guzik 	 * first.
8533f0540bSMateusz Guzik 	 *
8633f0540bSMateusz Guzik 	 * p and (p & ~LONGPTR_MASK) must be equally accessible since
8733f0540bSMateusz Guzik 	 * they always fall in the same memory page, as long as page
8833f0540bSMateusz Guzik 	 * boundaries is integral multiple of word size.
8933f0540bSMateusz Guzik 	 */
9033f0540bSMateusz Guzik 	lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);
9133f0540bSMateusz Guzik 	va = (*lp - mask01);
9233f0540bSMateusz Guzik 	vb = ((~*lp) & mask80);
936afdae41SXin LI 	lp++;
9433f0540bSMateusz Guzik 	if (va & vb)
9533f0540bSMateusz Guzik 		/* Check if we have \0 in the first part */
9633f0540bSMateusz Guzik 		for (p = str; p < (const char *)lp; p++)
9733f0540bSMateusz Guzik 			if (*p == '\0')
9833f0540bSMateusz Guzik 				return (p - str);
996afdae41SXin LI 
10033f0540bSMateusz Guzik 	/* Scan the rest of the string using word sized operation */
1016afdae41SXin LI 	for (; ; lp++) {
1026afdae41SXin LI 		va = (*lp - mask01);
1036afdae41SXin LI 		vb = ((~*lp) & mask80);
1046afdae41SXin LI 		if (va & vb) {
10533f0540bSMateusz Guzik 			p = (const char *)(lp);
10633f0540bSMateusz Guzik 			testbyte(0);
10733f0540bSMateusz Guzik 			testbyte(1);
10833f0540bSMateusz Guzik 			testbyte(2);
10933f0540bSMateusz Guzik 			testbyte(3);
11033f0540bSMateusz Guzik #if (LONG_BIT >= 64)
11133f0540bSMateusz Guzik 			testbyte(4);
11233f0540bSMateusz Guzik 			testbyte(5);
11333f0540bSMateusz Guzik 			testbyte(6);
11433f0540bSMateusz Guzik 			testbyte(7);
11533f0540bSMateusz Guzik #endif
1166afdae41SXin LI 		}
117df8bae1dSRodney W. Grimes 	}
118df8bae1dSRodney W. Grimes 
11933f0540bSMateusz Guzik 	/* NOTREACHED */
1206afdae41SXin LI 	return (0);
1216afdae41SXin LI }
122