1*4a5d661aSToomas Soome /*- 2*4a5d661aSToomas Soome * Copyright (c) 2009, 2010 Xin LI <delphij@FreeBSD.org> 3*4a5d661aSToomas Soome * All rights reserved. 4*4a5d661aSToomas Soome * 5*4a5d661aSToomas Soome * Redistribution and use in source and binary forms, with or without 6*4a5d661aSToomas Soome * modification, are permitted provided that the following conditions 7*4a5d661aSToomas Soome * are met: 8*4a5d661aSToomas Soome * 1. Redistributions of source code must retain the above copyright 9*4a5d661aSToomas Soome * notice, this list of conditions and the following disclaimer. 10*4a5d661aSToomas Soome * 2. Redistributions in binary form must reproduce the above copyright 11*4a5d661aSToomas Soome * notice, this list of conditions and the following disclaimer in the 12*4a5d661aSToomas Soome * documentation and/or other materials provided with the distribution. 13*4a5d661aSToomas Soome * 14*4a5d661aSToomas Soome * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15*4a5d661aSToomas Soome * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16*4a5d661aSToomas Soome * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17*4a5d661aSToomas Soome * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18*4a5d661aSToomas Soome * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19*4a5d661aSToomas Soome * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20*4a5d661aSToomas Soome * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21*4a5d661aSToomas Soome * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22*4a5d661aSToomas Soome * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23*4a5d661aSToomas Soome * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24*4a5d661aSToomas Soome * SUCH DAMAGE. 25*4a5d661aSToomas Soome */ 26*4a5d661aSToomas Soome 27*4a5d661aSToomas Soome #include <sys/cdefs.h> 28*4a5d661aSToomas Soome __FBSDID("$FreeBSD$"); 29*4a5d661aSToomas Soome 30*4a5d661aSToomas Soome #include <sys/limits.h> 31*4a5d661aSToomas Soome #include <sys/types.h> 32*4a5d661aSToomas Soome #include <string.h> 33*4a5d661aSToomas Soome 34*4a5d661aSToomas Soome /* 35*4a5d661aSToomas Soome * Portable strlen() for 32-bit and 64-bit systems. 36*4a5d661aSToomas Soome * 37*4a5d661aSToomas Soome * Rationale: it is generally much more efficient to do word length 38*4a5d661aSToomas Soome * operations and avoid branches on modern computer systems, as 39*4a5d661aSToomas Soome * compared to byte-length operations with a lot of branches. 40*4a5d661aSToomas Soome * 41*4a5d661aSToomas Soome * The expression: 42*4a5d661aSToomas Soome * 43*4a5d661aSToomas Soome * ((x - 0x01....01) & ~x & 0x80....80) 44*4a5d661aSToomas Soome * 45*4a5d661aSToomas Soome * would evaluate to a non-zero value iff any of the bytes in the 46*4a5d661aSToomas Soome * original word is zero. 47*4a5d661aSToomas Soome * 48*4a5d661aSToomas Soome * On multi-issue processors, we can divide the above expression into: 49*4a5d661aSToomas Soome * a) (x - 0x01....01) 50*4a5d661aSToomas Soome * b) (~x & 0x80....80) 51*4a5d661aSToomas Soome * c) a & b 52*4a5d661aSToomas Soome * 53*4a5d661aSToomas Soome * Where, a) and b) can be partially computed in parallel. 54*4a5d661aSToomas Soome * 55*4a5d661aSToomas Soome * The algorithm above is found on "Hacker's Delight" by 56*4a5d661aSToomas Soome * Henry S. Warren, Jr. 57*4a5d661aSToomas Soome */ 58*4a5d661aSToomas Soome 59*4a5d661aSToomas Soome /* Magic numbers for the algorithm */ 60*4a5d661aSToomas Soome #if LONG_BIT == 32 61*4a5d661aSToomas Soome static const unsigned long mask01 = 0x01010101; 62*4a5d661aSToomas Soome static const unsigned long mask80 = 0x80808080; 63*4a5d661aSToomas Soome #elif LONG_BIT == 64 64*4a5d661aSToomas Soome static const unsigned long mask01 = 0x0101010101010101; 65*4a5d661aSToomas Soome static const unsigned long mask80 = 0x8080808080808080; 66*4a5d661aSToomas Soome #else 67*4a5d661aSToomas Soome #error Unsupported word size 68*4a5d661aSToomas Soome #endif 69*4a5d661aSToomas Soome 70*4a5d661aSToomas Soome #define LONGPTR_MASK (sizeof(long) - 1) 71*4a5d661aSToomas Soome 72*4a5d661aSToomas Soome /* 73*4a5d661aSToomas Soome * Helper macro to return string length if we caught the zero 74*4a5d661aSToomas Soome * byte. 75*4a5d661aSToomas Soome */ 76*4a5d661aSToomas Soome #define testbyte(x) \ 77*4a5d661aSToomas Soome do { \ 78*4a5d661aSToomas Soome if (p[x] == '\0') \ 79*4a5d661aSToomas Soome return (p - str + x); \ 80*4a5d661aSToomas Soome } while (0) 81*4a5d661aSToomas Soome 82*4a5d661aSToomas Soome size_t 83*4a5d661aSToomas Soome strlen(const char *str) 84*4a5d661aSToomas Soome { 85*4a5d661aSToomas Soome const char *p; 86*4a5d661aSToomas Soome const unsigned long *lp; 87*4a5d661aSToomas Soome long va, vb; 88*4a5d661aSToomas Soome 89*4a5d661aSToomas Soome /* 90*4a5d661aSToomas Soome * Before trying the hard (unaligned byte-by-byte access) way 91*4a5d661aSToomas Soome * to figure out whether there is a nul character, try to see 92*4a5d661aSToomas Soome * if there is a nul character is within this accessible word 93*4a5d661aSToomas Soome * first. 94*4a5d661aSToomas Soome * 95*4a5d661aSToomas Soome * p and (p & ~LONGPTR_MASK) must be equally accessible since 96*4a5d661aSToomas Soome * they always fall in the same memory page, as long as page 97*4a5d661aSToomas Soome * boundaries is integral multiple of word size. 98*4a5d661aSToomas Soome */ 99*4a5d661aSToomas Soome lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK); 100*4a5d661aSToomas Soome va = (*lp - mask01); 101*4a5d661aSToomas Soome vb = ((~*lp) & mask80); 102*4a5d661aSToomas Soome lp++; 103*4a5d661aSToomas Soome if (va & vb) 104*4a5d661aSToomas Soome /* Check if we have \0 in the first part */ 105*4a5d661aSToomas Soome for (p = str; p < (const char *)lp; p++) 106*4a5d661aSToomas Soome if (*p == '\0') 107*4a5d661aSToomas Soome return (p - str); 108*4a5d661aSToomas Soome 109*4a5d661aSToomas Soome /* Scan the rest of the string using word sized operation */ 110*4a5d661aSToomas Soome for (; ; lp++) { 111*4a5d661aSToomas Soome va = (*lp - mask01); 112*4a5d661aSToomas Soome vb = ((~*lp) & mask80); 113*4a5d661aSToomas Soome if (va & vb) { 114*4a5d661aSToomas Soome p = (const char *)(lp); 115*4a5d661aSToomas Soome testbyte(0); 116*4a5d661aSToomas Soome testbyte(1); 117*4a5d661aSToomas Soome testbyte(2); 118*4a5d661aSToomas Soome testbyte(3); 119*4a5d661aSToomas Soome #if (LONG_BIT >= 64) 120*4a5d661aSToomas Soome testbyte(4); 121*4a5d661aSToomas Soome testbyte(5); 122*4a5d661aSToomas Soome testbyte(6); 123*4a5d661aSToomas Soome testbyte(7); 124*4a5d661aSToomas Soome #endif 125*4a5d661aSToomas Soome } 126*4a5d661aSToomas Soome } 127*4a5d661aSToomas Soome 128*4a5d661aSToomas Soome /* NOTREACHED */ 129*4a5d661aSToomas Soome return (0); 130*4a5d661aSToomas Soome } 131