locate.code.c (1a1ee31f101dbc1cfab71e6c07e6d0bc42b0cca5) | locate.code.c (139764e8e96310fdceb03eda2b869b0afdec0762) |
---|---|
1/* | 1/* |
2 * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. |
|
2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * James A. Woods. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions --- 18 unchanged lines hidden (view full) --- 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * | 3 * Copyright (c) 1989, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * James A. Woods. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions --- 18 unchanged lines hidden (view full) --- 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * |
36 * $Id: locate.code.c,v 1.4 1996/08/22 18:46:13 wosch Exp $ | 37 * $Id: locate.code.c,v 1.5 1996/08/31 14:51:18 wosch Exp $ |
37 */ 38 39#ifndef lint 40static char copyright[] = 41"@(#) Copyright (c) 1989, 1993\n\ 42 The Regents of the University of California. All rights reserved.\n"; 43#endif /* not lint */ 44 --- 22 unchanged lines hidden (view full) --- 67 * /usr/src/cmd/aardvark.c 8 /cmd/aardvark.c 68 * /usr/src/cmd/armadillo.c 14 armadillo.c 69 * /usr/tmp/zoo 5 tmp/zoo 70 * 71 * The codes are: 72 * 73 * 0-28 likeliest differential counts + offset to make nonnegative 74 * 30 switch code for out-of-range count to follow in next word | 38 */ 39 40#ifndef lint 41static char copyright[] = 42"@(#) Copyright (c) 1989, 1993\n\ 43 The Regents of the University of California. All rights reserved.\n"; 44#endif /* not lint */ 45 --- 22 unchanged lines hidden (view full) --- 68 * /usr/src/cmd/aardvark.c 8 /cmd/aardvark.c 69 * /usr/src/cmd/armadillo.c 14 armadillo.c 70 * /usr/tmp/zoo 5 tmp/zoo 71 * 72 * The codes are: 73 * 74 * 0-28 likeliest differential counts + offset to make nonnegative 75 * 30 switch code for out-of-range count to follow in next word |
76 * 31 an 8 bit char followed |
|
75 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 76 * 32-127 single character (printable) ascii residue (ie, literal) 77 * | 77 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 78 * 32-127 single character (printable) ascii residue (ie, literal) 79 * |
78 * SEE ALSO: updatedb.csh, bigram.c | 80 * The locate database store any character except newline ('\n') 81 * and NUL ('\0'). The 8-bit character support don't wast extra 82 * space until you have characters in file names less than 32 83 * or greather than 127. 84 * |
79 * | 85 * |
86 * SEE ALSO: updatedb.sh, ../bigram/locate.bigram.c 87 * |
|
80 * AUTHOR: James A. Woods, Informatics General Corp., 81 * NASA Ames Research Center, 10/82 | 88 * AUTHOR: James A. Woods, Informatics General Corp., 89 * NASA Ames Research Center, 10/82 |
90 * 8-bit file names characters: 91 * Wolfram Schneider, Berlin September 1996 |
|
82 */ 83 84#include <sys/param.h> 85#include <err.h> 86#include <errno.h> 87#include <stdlib.h> 88#include <string.h> 89#include <stdio.h> 90#include "locate.h" 91 92#define BGBUFSIZE (NBG * 2) /* size of bigram buffer */ 93 94u_char buf1[MAXPATHLEN] = " "; 95u_char buf2[MAXPATHLEN]; | 92 */ 93 94#include <sys/param.h> 95#include <err.h> 96#include <errno.h> 97#include <stdlib.h> 98#include <string.h> 99#include <stdio.h> 100#include "locate.h" 101 102#define BGBUFSIZE (NBG * 2) /* size of bigram buffer */ 103 104u_char buf1[MAXPATHLEN] = " "; 105u_char buf2[MAXPATHLEN]; |
96char bigrams[BGBUFSIZE + 1] = { 0 }; | 106u_char bigrams[BGBUFSIZE + 1] = { 0 }; |
97 98#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */ 99 100#ifdef LOOKUP | 107 108#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */ 109 110#ifdef LOOKUP |
101#define BGINDEX(x) (big[(u_int)*x][(u_int)*(x+1)]) 102typedef u_char bg_t; 103bg_t big[UCHAR_MAX][UCHAR_MAX]; | 111#define BGINDEX(x) (big[(u_char)*x][(u_char)*(x + 1)]) 112typedef short bg_t; 113bg_t big[UCHAR_MAX + 1][UCHAR_MAX + 1]; |
104#else 105#define BGINDEX(x) bgindex(x) 106typedef int bg_t; 107int bgindex __P((char *)); 108#endif /* LOOKUP */ 109 110 111void usage __P((void)); --- 28 unchanged lines hidden (view full) --- 140 (void)fgets(bigrams, BGBUFSIZE + 1, fp); 141 142 if (fwrite(bigrams, 1, BGBUFSIZE, stdout) != BGBUFSIZE) 143 err(1, "stdout"); 144 (void)fclose(fp); 145 146#ifdef LOOKUP 147 /* init lookup table */ | 114#else 115#define BGINDEX(x) bgindex(x) 116typedef int bg_t; 117int bgindex __P((char *)); 118#endif /* LOOKUP */ 119 120 121void usage __P((void)); --- 28 unchanged lines hidden (view full) --- 150 (void)fgets(bigrams, BGBUFSIZE + 1, fp); 151 152 if (fwrite(bigrams, 1, BGBUFSIZE, stdout) != BGBUFSIZE) 153 err(1, "stdout"); 154 (void)fclose(fp); 155 156#ifdef LOOKUP 157 /* init lookup table */ |
148 for (i = 0; i < UCHAR_MAX; i++) 149 for (j = 0; j < UCHAR_MAX; j++) | 158 for (i = 0; i < UCHAR_MAX + 1; i++) 159 for (j = 0; j < UCHAR_MAX + 1; j++) |
150 big[i][j] = (bg_t)-1; 151 152 for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2) | 160 big[i][j] = (bg_t)-1; 161 162 for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2) |
153 big[(int)*cp][(int)*(cp + 1)] = (bg_t)i; | 163 big[(u_char)*cp][(u_char)*(cp + 1)] = (bg_t)i; 164 |
154#endif /* LOOKUP */ 155 156 oldpath = buf1; 157 path = buf2; 158 oldcount = 0; 159 160 while (fgets(path, sizeof(buf2), stdin) != NULL) { 161 | 165#endif /* LOOKUP */ 166 167 oldpath = buf1; 168 path = buf2; 169 oldcount = 0; 170 171 while (fgets(path, sizeof(buf2), stdin) != NULL) { 172 |
162 /* skip empty lines */ | 173 /* skip empty lines */ |
163 if (*path == '\n') 164 continue; 165 | 174 if (*path == '\n') 175 continue; 176 |
166 /* Squelch characters that would botch the decoding. */ | 177 /* remove newline */ |
167 for (cp = path; *cp != '\0'; cp++) { 168 /* chop newline */ 169 if (*cp == '\n') 170 *cp = '\0'; | 178 for (cp = path; *cp != '\0'; cp++) { 179 /* chop newline */ 180 if (*cp == '\n') 181 *cp = '\0'; |
171 /* range */ 172 else if (*cp < ASCII_MIN || *cp > ASCII_MAX) 173 *cp = '?'; | |
174 } 175 176 /* Skip longest common prefix. */ | 182 } 183 184 /* Skip longest common prefix. */ |
177 for (cp = path; *cp == *oldpath && *cp != '\0'; cp++, oldpath++); | 185 for (cp = path; *cp == *oldpath; cp++, oldpath++) 186 if (*cp == '\0') 187 break; |
178 179 count = cp - path; 180 diffcount = count - oldcount + OFFSET; 181 oldcount = count; 182 if (diffcount < 0 || diffcount > 2 * OFFSET) { 183 if (putchar(SWITCH) == EOF || 184 putw(diffcount, stdout) == EOF) 185 err(1, "stdout"); 186 } else 187 if (putchar(diffcount) == EOF) 188 err(1, "stdout"); 189 190 while (*cp != '\0') { | 188 189 count = cp - path; 190 diffcount = count - oldcount + OFFSET; 191 oldcount = count; 192 if (diffcount < 0 || diffcount > 2 * OFFSET) { 193 if (putchar(SWITCH) == EOF || 194 putw(diffcount, stdout) == EOF) 195 err(1, "stdout"); 196 } else 197 if (putchar(diffcount) == EOF) 198 err(1, "stdout"); 199 200 while (*cp != '\0') { |
191 if (*(cp + 1) == '\0') { 192 if (putchar(*cp) == EOF) 193 err(1, "stdout"); 194 break; 195 } 196 if ((code = BGINDEX(cp)) == (bg_t)-1) { 197 if (putchar(*cp++) == EOF || 198 putchar(*cp++) == EOF) 199 err(1, "stdout"); 200 } else { 201 /* Found, so mark byte with parity bit. */ | 201 /* print *two* characters */ 202 203 if ((code = BGINDEX(cp)) != (bg_t)-1) { 204 /* 205 * print *one* as bigram 206 * Found, so mark byte with 207 * parity bit. 208 */ |
202 if (putchar((code / 2) | PARITY) == EOF) 203 err(1, "stdout"); 204 cp += 2; 205 } | 209 if (putchar((code / 2) | PARITY) == EOF) 210 err(1, "stdout"); 211 cp += 2; 212 } |
213 214 else { 215 for (i = 0; i < 2; i++) { 216 if (*cp == '\0') 217 break; 218 219 /* print umlauts in file names */ 220 if (*cp < ASCII_MIN || 221 *cp > ASCII_MAX) { 222 if (putchar(UMLAUT) == EOF || 223 putchar(*cp++) == EOF) 224 err(1, "stdout"); 225 } 226 227 else { 228 /* normal character */ 229 if(putchar(*cp++) == EOF) 230 err(1, "stdout"); 231 } 232 } 233 234 } |
|
206 } | 235 } |
236 |
|
207 if (path == buf1) { /* swap pointers */ 208 path = buf2; 209 oldpath = buf1; 210 } else { 211 path = buf1; 212 oldpath = buf2; 213 } 214 } --- 29 unchanged lines hidden --- | 237 if (path == buf1) { /* swap pointers */ 238 path = buf2; 239 oldpath = buf1; 240 } else { 241 path = buf1; 242 oldpath = buf2; 243 } 244 } --- 29 unchanged lines hidden --- |