158f0484fSRodney W. Grimes /*- 24c6a6021SXin LI * Copyright (c) 2009 Xin LI <delphij@FreeBSD.org> 34c6a6021SXin LI * All rights reserved. 458f0484fSRodney W. Grimes * 558f0484fSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 658f0484fSRodney W. Grimes * modification, are permitted provided that the following conditions 758f0484fSRodney W. Grimes * are met: 858f0484fSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 958f0484fSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 1058f0484fSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 1158f0484fSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 1258f0484fSRodney W. Grimes * documentation and/or other materials provided with the distribution. 1358f0484fSRodney W. Grimes * 144c6a6021SXin LI * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1558f0484fSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1658f0484fSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 174c6a6021SXin LI * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1858f0484fSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1958f0484fSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2058f0484fSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2158f0484fSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2258f0484fSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2358f0484fSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2458f0484fSRodney W. Grimes * SUCH DAMAGE. 2558f0484fSRodney W. Grimes */ 2658f0484fSRodney W. Grimes 2758f0484fSRodney W. Grimes #include <sys/cdefs.h> 28de5fe5d5SDavid E. O'Brien __FBSDID("$FreeBSD$"); 29de5fe5d5SDavid E. O'Brien 304c6a6021SXin LI #include <sys/limits.h> 314c6a6021SXin LI #include <sys/types.h> 3258f0484fSRodney W. Grimes #include <string.h> 3358f0484fSRodney W. Grimes 344c6a6021SXin LI /* 354c6a6021SXin LI * Portable strlen() for 32-bit and 64-bit systems. 364c6a6021SXin LI * 374c6a6021SXin LI * Rationale: it is generally much more efficient to do word length 384c6a6021SXin LI * operations and avoid branches on modern computer systems, as 394c6a6021SXin LI * compared to byte-length operations with a lot of branches. 404c6a6021SXin LI * 414c6a6021SXin LI * The expression: 424c6a6021SXin LI * 434c6a6021SXin LI * ((x - 0x01....01) & ~x & 0x80....80) 444c6a6021SXin LI * 454c6a6021SXin LI * would evaluate to a non-zero value iff any of the bytes in the 464c6a6021SXin LI * original word is zero. However, we can further reduce ~1/3 of 474c6a6021SXin LI * time if we consider that strlen() usually operate on 7-bit ASCII 484c6a6021SXin LI * by employing the following expression, which allows false positive 494c6a6021SXin LI * when high bit of 1 and use the tail case to catch these case: 504c6a6021SXin LI * 514c6a6021SXin LI * ((x - 0x01....01) & 0x80....80) 524c6a6021SXin LI * 534c6a6021SXin LI * This is more than 5.2 times as compared to the raw implementation 544c6a6021SXin LI * on Intel T7300 under EM64T mode for strings longer than word length. 554c6a6021SXin LI */ 5658f0484fSRodney W. Grimes 574c6a6021SXin LI /* Magic numbers for the algorithm */ 584c6a6021SXin LI #if LONG_BIT == 32 594c6a6021SXin LI static const unsigned long mask01 = 0x01010101; 604c6a6021SXin LI static const unsigned long mask80 = 0x80808080; 614c6a6021SXin LI #elif LONG_BIT == 64 624c6a6021SXin LI static const unsigned long mask01 = 0x0101010101010101; 634c6a6021SXin LI static const unsigned long mask80 = 0x8080808080808080; 644c6a6021SXin LI #else 654c6a6021SXin LI #error Unsupported word size 664c6a6021SXin LI #endif 674c6a6021SXin LI 684c6a6021SXin LI #define LONGPTR_MASK (sizeof(long) - 1) 694c6a6021SXin LI 704c6a6021SXin LI /* 714c6a6021SXin LI * Helper macro to return string length if we caught the zero 724c6a6021SXin LI * byte. 734c6a6021SXin LI */ 744c6a6021SXin LI #define testbyte(x) \ 754c6a6021SXin LI do { \ 764c6a6021SXin LI if (p[x] == '\0') \ 774c6a6021SXin LI return (p - str + x); \ 784c6a6021SXin LI } while (0) 794c6a6021SXin LI 804c6a6021SXin LI size_t 814c6a6021SXin LI strlen(const char *str) 824c6a6021SXin LI { 834c6a6021SXin LI const char *p; 844c6a6021SXin LI const unsigned long *lp; 854c6a6021SXin LI 864c6a6021SXin LI /* Skip the first few bytes until we have an aligned p */ 874c6a6021SXin LI for (p = str; (uintptr_t)p & LONGPTR_MASK; p++) 884c6a6021SXin LI if (*p == '\0') 894c6a6021SXin LI return (p - str); 904c6a6021SXin LI 914c6a6021SXin LI /* Scan the rest of the string using word sized operation */ 924c6a6021SXin LI for (lp = (const unsigned long *)p; ; lp++) 934c6a6021SXin LI if ((*lp - mask01) & mask80) { 944c6a6021SXin LI p = (const char *)(lp); 954c6a6021SXin LI testbyte(0); 964c6a6021SXin LI testbyte(1); 974c6a6021SXin LI testbyte(2); 984c6a6021SXin LI testbyte(3); 994c6a6021SXin LI #if (LONG_BIT >= 64) 1004c6a6021SXin LI testbyte(4); 1014c6a6021SXin LI testbyte(5); 1024c6a6021SXin LI testbyte(6); 1034c6a6021SXin LI testbyte(7); 1044c6a6021SXin LI #endif 1054c6a6021SXin LI } 1064c6a6021SXin LI 1074c6a6021SXin LI /* NOTREACHED */ 1084c6a6021SXin LI return 0; 10958f0484fSRodney W. Grimes } 11058f0484fSRodney W. Grimes 111