1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2009, 2010 Xin LI <delphij@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/limits.h> 33 #include <sys/types.h> 34 #include <string.h> 35 36 /* 37 * Portable strlen() for 32-bit and 64-bit systems. 38 * 39 * Rationale: it is generally much more efficient to do word length 40 * operations and avoid branches on modern computer systems, as 41 * compared to byte-length operations with a lot of branches. 42 * 43 * The expression: 44 * 45 * ((x - 0x01....01) & ~x & 0x80....80) 46 * 47 * would evaluate to a non-zero value iff any of the bytes in the 48 * original word is zero. 49 * 50 * On multi-issue processors, we can divide the above expression into: 51 * a) (x - 0x01....01) 52 * b) (~x & 0x80....80) 53 * c) a & b 54 * 55 * Where, a) and b) can be partially computed in parallel. 56 * 57 * The algorithm above is found on "Hacker's Delight" by 58 * Henry S. Warren, Jr. 59 */ 60 61 /* Magic numbers for the algorithm */ 62 #if LONG_BIT == 32 63 static const unsigned long mask01 = 0x01010101; 64 static const unsigned long mask80 = 0x80808080; 65 #elif LONG_BIT == 64 66 static const unsigned long mask01 = 0x0101010101010101; 67 static const unsigned long mask80 = 0x8080808080808080; 68 #else 69 #error Unsupported word size 70 #endif 71 72 #define LONGPTR_MASK (sizeof(long) - 1) 73 74 /* 75 * Helper macro to return string length if we caught the zero 76 * byte. 77 */ 78 #define testbyte(x) \ 79 do { \ 80 if (p[x] == '\0') \ 81 return (p - str + x); \ 82 } while (0) 83 84 size_t 85 strlen(const char *str) 86 { 87 const char *p; 88 const unsigned long *lp; 89 long va, vb; 90 91 /* 92 * Before trying the hard (unaligned byte-by-byte access) way 93 * to figure out whether there is a nul character, try to see 94 * if there is a nul character is within this accessible word 95 * first. 96 * 97 * p and (p & ~LONGPTR_MASK) must be equally accessible since 98 * they always fall in the same memory page, as long as page 99 * boundaries is integral multiple of word size. 100 */ 101 lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK); 102 va = (*lp - mask01); 103 vb = ((~*lp) & mask80); 104 lp++; 105 if (va & vb) 106 /* Check if we have \0 in the first part */ 107 for (p = str; p < (const char *)lp; p++) 108 if (*p == '\0') 109 return (p - str); 110 111 /* Scan the rest of the string using word sized operation */ 112 for (; ; lp++) { 113 va = (*lp - mask01); 114 vb = ((~*lp) & mask80); 115 if (va & vb) { 116 p = (const char *)(lp); 117 testbyte(0); 118 testbyte(1); 119 testbyte(2); 120 testbyte(3); 121 #if (LONG_BIT >= 64) 122 testbyte(4); 123 testbyte(5); 124 testbyte(6); 125 testbyte(7); 126 #endif 127 } 128 } 129 130 /* NOTREACHED */ 131 return (0); 132 } 133