1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2009, 2010 Xin LI <delphij@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/limits.h> 32 #include <sys/types.h> 33 #include <string.h> 34 35 /* 36 * Portable strlen() for 32-bit and 64-bit systems. 37 * 38 * Rationale: it is generally much more efficient to do word length 39 * operations and avoid branches on modern computer systems, as 40 * compared to byte-length operations with a lot of branches. 41 * 42 * The expression: 43 * 44 * ((x - 0x01....01) & ~x & 0x80....80) 45 * 46 * would evaluate to a non-zero value iff any of the bytes in the 47 * original word is zero. 48 * 49 * On multi-issue processors, we can divide the above expression into: 50 * a) (x - 0x01....01) 51 * b) (~x & 0x80....80) 52 * c) a & b 53 * 54 * Where, a) and b) can be partially computed in parallel. 55 * 56 * The algorithm above is found on "Hacker's Delight" by 57 * Henry S. Warren, Jr. 58 */ 59 60 /* Magic numbers for the algorithm */ 61 #if LONG_BIT == 32 62 static const unsigned long mask01 = 0x01010101; 63 static const unsigned long mask80 = 0x80808080; 64 #elif LONG_BIT == 64 65 static const unsigned long mask01 = 0x0101010101010101; 66 static const unsigned long mask80 = 0x8080808080808080; 67 #else 68 #error Unsupported word size 69 #endif 70 71 #define LONGPTR_MASK (sizeof(long) - 1) 72 73 /* 74 * Helper macro to return string length if we caught the zero 75 * byte. 76 */ 77 #define testbyte(x) \ 78 do { \ 79 if (p[x] == '\0') \ 80 return (p - str + x); \ 81 } while (0) 82 83 size_t 84 strlen(const char *str) 85 { 86 const char *p; 87 const unsigned long *lp; 88 long va, vb; 89 90 /* 91 * Before trying the hard (unaligned byte-by-byte access) way 92 * to figure out whether there is a nul character, try to see 93 * if there is a nul character is within this accessible word 94 * first. 95 * 96 * p and (p & ~LONGPTR_MASK) must be equally accessible since 97 * they always fall in the same memory page, as long as page 98 * boundaries is integral multiple of word size. 99 */ 100 lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK); 101 va = (*lp - mask01); 102 vb = ((~*lp) & mask80); 103 lp++; 104 if (va & vb) 105 /* Check if we have \0 in the first part */ 106 for (p = str; p < (const char *)lp; p++) 107 if (*p == '\0') 108 return (p - str); 109 110 /* Scan the rest of the string using word sized operation */ 111 for (; ; lp++) { 112 va = (*lp - mask01); 113 vb = ((~*lp) & mask80); 114 if (va & vb) { 115 p = (const char *)(lp); 116 testbyte(0); 117 testbyte(1); 118 testbyte(2); 119 testbyte(3); 120 #if (LONG_BIT >= 64) 121 testbyte(4); 122 testbyte(5); 123 testbyte(6); 124 testbyte(7); 125 #endif 126 } 127 } 128 129 /* NOTREACHED */ 130 return (0); 131 } 132