1 /* 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1995-2022 Wolfram Schneider <wosch@FreeBSD.org> 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * James A. Woods. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD$ 36 */ 37 38 39 #ifndef _LOCATE_STATISTIC_ 40 #define _LOCATE_STATISTIC_ 41 42 void 43 statistic (FILE *fp, char *path_fcodes) 44 { 45 long lines, chars, size, size_nbg, big, zwerg, umlaut; 46 u_char *p, *s; 47 int c; 48 int count, longest_path; 49 int error = 0; 50 u_char bigram1[NBG], bigram2[NBG], path[LOCATE_PATH_MAX]; 51 52 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { 53 p[c] = check_bigram_char(getc(fp)); 54 s[c] = check_bigram_char(getc(fp)); 55 } 56 57 lines = chars = big = zwerg = umlaut = longest_path = 0; 58 size = NBG + NBG; 59 60 for (c = getc(fp), count = 0; c != EOF; size++) { 61 if (c == SWITCH) { 62 count += getwf(fp) - OFFSET; 63 size += sizeof(int); 64 zwerg++; 65 } else 66 count += c - OFFSET; 67 68 if (count < 0 || count >= LOCATE_PATH_MAX) { 69 /* stop on error and display the statstics anyway */ 70 warnx("corrupted database: %s %d", path_fcodes, count); 71 error = 1; 72 break; 73 } 74 75 for (p = path + count; (c = getc(fp)) > SWITCH; size++) 76 if (c < PARITY) { 77 if (c == UMLAUT) { 78 c = getc(fp); 79 size++; 80 umlaut++; 81 } 82 p++; 83 } else { 84 /* bigram char */ 85 big++; 86 p += 2; 87 } 88 89 p++; 90 lines++; 91 chars += (p - path); 92 if ((p - path) > longest_path) 93 longest_path = p - path; 94 } 95 96 /* size without bigram db */ 97 size_nbg = size - (2 * NBG); 98 99 (void)printf("\nDatabase: %s\n", path_fcodes); 100 (void)printf("Compression: Front: %2.2f%%, ", chars > 0 ? (size_nbg + big) / (chars / (float)100) : 0); 101 (void)printf("Bigram: %2.2f%%, ", big > 0 ? (size_nbg - big) / (size_nbg / (float)100) : 0); 102 /* incl. bigram db overhead */ 103 (void)printf("Total: %2.2f%%\n", chars > 0 ? size / (chars / (float)100) : 0); 104 (void)printf("Filenames: %ld, ", lines); 105 (void)printf("Characters: %ld, ", chars); 106 (void)printf("Database size: %ld\n", size); 107 (void)printf("Bigram characters: %ld, ", big); 108 (void)printf("Integers: %ld, ", zwerg); 109 (void)printf("8-Bit characters: %ld\n", umlaut); 110 printf("Longest path: %d\n", longest_path > 0 ? longest_path - 1 : 0); 111 112 /* non zero exit on corrupt database */ 113 if (error) 114 exit(error); 115 } 116 #endif /* _LOCATE_STATISTIC_ */ 117 118 extern char separator; 119 120 void 121 #ifdef FF_MMAP 122 123 124 #ifdef FF_ICASE 125 fastfind_mmap_icase 126 #else 127 fastfind_mmap 128 #endif /* FF_ICASE */ 129 (char *pathpart, caddr_t paddr, off_t len, char *database) 130 131 132 #else /* MMAP */ 133 134 135 #ifdef FF_ICASE 136 fastfind_icase 137 #else 138 fastfind 139 #endif /* FF_ICASE */ 140 141 (FILE *fp, char *pathpart, char *database) 142 143 144 #endif /* MMAP */ 145 146 { 147 u_char *p, *s, *patend, *q, *foundchar; 148 int c, cc; 149 int count, found, globflag; 150 u_char *cutoff; 151 u_char bigram1[NBG], bigram2[NBG], path[LOCATE_PATH_MAX + 2]; 152 153 #ifdef FF_ICASE 154 /* use a lookup table for case insensitive search */ 155 u_char table[UCHAR_MAX + 1]; 156 157 tolower_word(pathpart); 158 #endif /* FF_ICASE*/ 159 160 /* init bigram table */ 161 #ifdef FF_MMAP 162 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) { 163 p[c] = check_bigram_char(*paddr++); 164 s[c] = check_bigram_char(*paddr++); 165 } 166 #else 167 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { 168 p[c] = check_bigram_char(getc(fp)); 169 s[c] = check_bigram_char(getc(fp)); 170 } 171 #endif /* FF_MMAP */ 172 173 /* find optimal (last) char for searching */ 174 for (p = pathpart; *p != '\0'; p++) 175 if (strchr(LOCATE_REG, *p) != NULL) 176 break; 177 178 if (*p == '\0') 179 globflag = 0; 180 else 181 globflag = 1; 182 183 p = pathpart; 184 patend = patprep(p); 185 cc = *patend; 186 187 #ifdef FF_ICASE 188 /* set patend char to true */ 189 for (c = 0; c < UCHAR_MAX + 1; c++) 190 table[c] = 0; 191 192 table[TOLOWER(*patend)] = 1; 193 table[toupper(*patend)] = 1; 194 #endif /* FF_ICASE */ 195 196 197 /* main loop */ 198 found = count = 0; 199 foundchar = 0; 200 201 #ifdef FF_MMAP 202 c = (u_char)*paddr++; 203 len--; 204 205 for (; len > 0; ) { 206 #else 207 c = getc(fp); 208 for (; c != EOF; ) { 209 #endif /* FF_MMAP */ 210 211 /* go forward or backward */ 212 if (c == SWITCH) { /* big step, an integer */ 213 #ifdef FF_MMAP 214 if (len < sizeof(int)) 215 errx(1, "corrupted database: %s", database); 216 217 count += getwm(paddr) - OFFSET; 218 len -= INTSIZE; 219 paddr += INTSIZE; 220 #else 221 count += getwf(fp) - OFFSET; 222 #endif /* FF_MMAP */ 223 } else { /* slow step, =< 14 chars */ 224 count += c - OFFSET; 225 } 226 227 if (count < 0 || count >= LOCATE_PATH_MAX) 228 errx(1, "corrupted database: %s %d", database, count); 229 230 /* overlay old path */ 231 p = path + count; 232 foundchar = p - 1; 233 234 #ifdef FF_MMAP 235 for (; len > 0;) { 236 c = (u_char)*paddr++; 237 len--; 238 #else 239 for (;;) { 240 c = getc(fp); 241 #endif /* FF_MMAP */ 242 /* 243 * == UMLAUT: 8 bit char followed 244 * <= SWITCH: offset 245 * >= PARITY: bigram 246 * rest: single ascii char 247 * 248 * offset < SWITCH < UMLAUT < ascii < PARITY < bigram 249 */ 250 if (c < PARITY) { 251 if (c <= UMLAUT) { 252 if (c == UMLAUT) { 253 #ifdef FF_MMAP 254 c = (u_char)*paddr++; 255 len--; 256 #else 257 c = getc(fp); 258 #endif /* FF_MMAP */ 259 260 } else 261 break; /* SWITCH */ 262 } 263 #ifdef FF_ICASE 264 if (table[c]) 265 #else 266 if (c == cc) 267 #endif /* FF_ICASE */ 268 foundchar = p; 269 *p++ = c; 270 } 271 else { 272 /* bigrams are parity-marked */ 273 TO7BIT(c); 274 275 #ifndef FF_ICASE 276 if (bigram1[c] == cc || 277 bigram2[c] == cc) 278 #else 279 280 if (table[bigram1[c]] || 281 table[bigram2[c]]) 282 #endif /* FF_ICASE */ 283 foundchar = p + 1; 284 285 *p++ = bigram1[c]; 286 *p++ = bigram2[c]; 287 } 288 289 if (p - path >= LOCATE_PATH_MAX) 290 errx(1, "corrupted database: %s %td", database, p - path); 291 292 } 293 294 if (found) { /* previous line matched */ 295 cutoff = path; 296 *p-- = '\0'; 297 foundchar = p; 298 } else if (foundchar >= path + count) { /* a char matched */ 299 *p-- = '\0'; 300 cutoff = path + count; 301 } else /* nothing to do */ 302 continue; 303 304 found = 0; 305 for (s = foundchar; s >= cutoff; s--) { 306 if (*s == cc 307 #ifdef FF_ICASE 308 || TOLOWER(*s) == cc 309 #endif /* FF_ICASE */ 310 ) { /* fast first char check */ 311 for (p = patend - 1, q = s - 1; *p != '\0'; 312 p--, q--) 313 if (*q != *p 314 #ifdef FF_ICASE 315 && TOLOWER(*q) != *p 316 #endif /* FF_ICASE */ 317 ) 318 break; 319 if (*p == '\0') { /* fast match success */ 320 found = 1; 321 if (!globflag || 322 #ifndef FF_ICASE 323 !fnmatch(pathpart, path, 0)) 324 #else 325 !fnmatch(pathpart, path, 326 FNM_CASEFOLD)) 327 #endif /* !FF_ICASE */ 328 { 329 if (f_silent) 330 counter++; 331 else if (f_limit) { 332 counter++; 333 if (f_limit >= counter) 334 (void)printf("%s%c",path,separator); 335 else 336 errx(0, "[show only %ld lines]", counter - 1); 337 } else 338 (void)printf("%s%c",path,separator); 339 } 340 break; 341 } 342 } 343 } 344 } 345 } 346