1*4a5d661aSToomas Soome /*-
2*4a5d661aSToomas Soome * Copyright (c) 2009, 2010 Xin LI <delphij@FreeBSD.org>
3*4a5d661aSToomas Soome * All rights reserved.
4*4a5d661aSToomas Soome *
5*4a5d661aSToomas Soome * Redistribution and use in source and binary forms, with or without
6*4a5d661aSToomas Soome * modification, are permitted provided that the following conditions
7*4a5d661aSToomas Soome * are met:
8*4a5d661aSToomas Soome * 1. Redistributions of source code must retain the above copyright
9*4a5d661aSToomas Soome * notice, this list of conditions and the following disclaimer.
10*4a5d661aSToomas Soome * 2. Redistributions in binary form must reproduce the above copyright
11*4a5d661aSToomas Soome * notice, this list of conditions and the following disclaimer in the
12*4a5d661aSToomas Soome * documentation and/or other materials provided with the distribution.
13*4a5d661aSToomas Soome *
14*4a5d661aSToomas Soome * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15*4a5d661aSToomas Soome * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16*4a5d661aSToomas Soome * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17*4a5d661aSToomas Soome * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18*4a5d661aSToomas Soome * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19*4a5d661aSToomas Soome * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20*4a5d661aSToomas Soome * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21*4a5d661aSToomas Soome * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22*4a5d661aSToomas Soome * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23*4a5d661aSToomas Soome * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24*4a5d661aSToomas Soome * SUCH DAMAGE.
25*4a5d661aSToomas Soome */
26*4a5d661aSToomas Soome
27*4a5d661aSToomas Soome #include <sys/cdefs.h>
28*4a5d661aSToomas Soome __FBSDID("$FreeBSD$");
29*4a5d661aSToomas Soome
30*4a5d661aSToomas Soome #include <sys/limits.h>
31*4a5d661aSToomas Soome #include <sys/types.h>
32*4a5d661aSToomas Soome #include <string.h>
33*4a5d661aSToomas Soome
34*4a5d661aSToomas Soome /*
35*4a5d661aSToomas Soome * Portable strlen() for 32-bit and 64-bit systems.
36*4a5d661aSToomas Soome *
37*4a5d661aSToomas Soome * Rationale: it is generally much more efficient to do word length
38*4a5d661aSToomas Soome * operations and avoid branches on modern computer systems, as
39*4a5d661aSToomas Soome * compared to byte-length operations with a lot of branches.
40*4a5d661aSToomas Soome *
41*4a5d661aSToomas Soome * The expression:
42*4a5d661aSToomas Soome *
43*4a5d661aSToomas Soome * ((x - 0x01....01) & ~x & 0x80....80)
44*4a5d661aSToomas Soome *
45*4a5d661aSToomas Soome * would evaluate to a non-zero value iff any of the bytes in the
46*4a5d661aSToomas Soome * original word is zero.
47*4a5d661aSToomas Soome *
48*4a5d661aSToomas Soome * On multi-issue processors, we can divide the above expression into:
49*4a5d661aSToomas Soome * a) (x - 0x01....01)
50*4a5d661aSToomas Soome * b) (~x & 0x80....80)
51*4a5d661aSToomas Soome * c) a & b
52*4a5d661aSToomas Soome *
53*4a5d661aSToomas Soome * Where, a) and b) can be partially computed in parallel.
54*4a5d661aSToomas Soome *
55*4a5d661aSToomas Soome * The algorithm above is found on "Hacker's Delight" by
56*4a5d661aSToomas Soome * Henry S. Warren, Jr.
57*4a5d661aSToomas Soome */
58*4a5d661aSToomas Soome
59*4a5d661aSToomas Soome /* Magic numbers for the algorithm */
60*4a5d661aSToomas Soome #if LONG_BIT == 32
61*4a5d661aSToomas Soome static const unsigned long mask01 = 0x01010101;
62*4a5d661aSToomas Soome static const unsigned long mask80 = 0x80808080;
63*4a5d661aSToomas Soome #elif LONG_BIT == 64
64*4a5d661aSToomas Soome static const unsigned long mask01 = 0x0101010101010101;
65*4a5d661aSToomas Soome static const unsigned long mask80 = 0x8080808080808080;
66*4a5d661aSToomas Soome #else
67*4a5d661aSToomas Soome #error Unsupported word size
68*4a5d661aSToomas Soome #endif
69*4a5d661aSToomas Soome
70*4a5d661aSToomas Soome #define LONGPTR_MASK (sizeof(long) - 1)
71*4a5d661aSToomas Soome
72*4a5d661aSToomas Soome /*
73*4a5d661aSToomas Soome * Helper macro to return string length if we caught the zero
74*4a5d661aSToomas Soome * byte.
75*4a5d661aSToomas Soome */
76*4a5d661aSToomas Soome #define testbyte(x) \
77*4a5d661aSToomas Soome do { \
78*4a5d661aSToomas Soome if (p[x] == '\0') \
79*4a5d661aSToomas Soome return (p - str + x); \
80*4a5d661aSToomas Soome } while (0)
81*4a5d661aSToomas Soome
82*4a5d661aSToomas Soome size_t
strlen(const char * str)83*4a5d661aSToomas Soome strlen(const char *str)
84*4a5d661aSToomas Soome {
85*4a5d661aSToomas Soome const char *p;
86*4a5d661aSToomas Soome const unsigned long *lp;
87*4a5d661aSToomas Soome long va, vb;
88*4a5d661aSToomas Soome
89*4a5d661aSToomas Soome /*
90*4a5d661aSToomas Soome * Before trying the hard (unaligned byte-by-byte access) way
91*4a5d661aSToomas Soome * to figure out whether there is a nul character, try to see
92*4a5d661aSToomas Soome * if there is a nul character is within this accessible word
93*4a5d661aSToomas Soome * first.
94*4a5d661aSToomas Soome *
95*4a5d661aSToomas Soome * p and (p & ~LONGPTR_MASK) must be equally accessible since
96*4a5d661aSToomas Soome * they always fall in the same memory page, as long as page
97*4a5d661aSToomas Soome * boundaries is integral multiple of word size.
98*4a5d661aSToomas Soome */
99*4a5d661aSToomas Soome lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);
100*4a5d661aSToomas Soome va = (*lp - mask01);
101*4a5d661aSToomas Soome vb = ((~*lp) & mask80);
102*4a5d661aSToomas Soome lp++;
103*4a5d661aSToomas Soome if (va & vb)
104*4a5d661aSToomas Soome /* Check if we have \0 in the first part */
105*4a5d661aSToomas Soome for (p = str; p < (const char *)lp; p++)
106*4a5d661aSToomas Soome if (*p == '\0')
107*4a5d661aSToomas Soome return (p - str);
108*4a5d661aSToomas Soome
109*4a5d661aSToomas Soome /* Scan the rest of the string using word sized operation */
110*4a5d661aSToomas Soome for (; ; lp++) {
111*4a5d661aSToomas Soome va = (*lp - mask01);
112*4a5d661aSToomas Soome vb = ((~*lp) & mask80);
113*4a5d661aSToomas Soome if (va & vb) {
114*4a5d661aSToomas Soome p = (const char *)(lp);
115*4a5d661aSToomas Soome testbyte(0);
116*4a5d661aSToomas Soome testbyte(1);
117*4a5d661aSToomas Soome testbyte(2);
118*4a5d661aSToomas Soome testbyte(3);
119*4a5d661aSToomas Soome #if (LONG_BIT >= 64)
120*4a5d661aSToomas Soome testbyte(4);
121*4a5d661aSToomas Soome testbyte(5);
122*4a5d661aSToomas Soome testbyte(6);
123*4a5d661aSToomas Soome testbyte(7);
124*4a5d661aSToomas Soome #endif
125*4a5d661aSToomas Soome }
126*4a5d661aSToomas Soome }
127*4a5d661aSToomas Soome
128*4a5d661aSToomas Soome /* NOTREACHED */
129*4a5d661aSToomas Soome return (0);
130*4a5d661aSToomas Soome }
131