locate.code.c (1a1ee31f101dbc1cfab71e6c07e6d0bc42b0cca5) locate.code.c (139764e8e96310fdceb03eda2b869b0afdec0762)
1/*
1/*
2 * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * James A. Woods.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions

--- 18 unchanged lines hidden (view full) ---

28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
3 * Copyright (c) 1989, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * James A. Woods.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions

--- 18 unchanged lines hidden (view full) ---

29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
36 * $Id: locate.code.c,v 1.4 1996/08/22 18:46:13 wosch Exp $
37 * $Id: locate.code.c,v 1.5 1996/08/31 14:51:18 wosch Exp $
37 */
38
39#ifndef lint
40static char copyright[] =
41"@(#) Copyright (c) 1989, 1993\n\
42 The Regents of the University of California. All rights reserved.\n";
43#endif /* not lint */
44

--- 22 unchanged lines hidden (view full) ---

67 * /usr/src/cmd/aardvark.c 8 /cmd/aardvark.c
68 * /usr/src/cmd/armadillo.c 14 armadillo.c
69 * /usr/tmp/zoo 5 tmp/zoo
70 *
71 * The codes are:
72 *
73 * 0-28 likeliest differential counts + offset to make nonnegative
74 * 30 switch code for out-of-range count to follow in next word
38 */
39
40#ifndef lint
41static char copyright[] =
42"@(#) Copyright (c) 1989, 1993\n\
43 The Regents of the University of California. All rights reserved.\n";
44#endif /* not lint */
45

--- 22 unchanged lines hidden (view full) ---

68 * /usr/src/cmd/aardvark.c 8 /cmd/aardvark.c
69 * /usr/src/cmd/armadillo.c 14 armadillo.c
70 * /usr/tmp/zoo 5 tmp/zoo
71 *
72 * The codes are:
73 *
74 * 0-28 likeliest differential counts + offset to make nonnegative
75 * 30 switch code for out-of-range count to follow in next word
76 * 31 an 8 bit char followed
75 * 128-255 bigram codes (128 most common, as determined by 'updatedb')
76 * 32-127 single character (printable) ascii residue (ie, literal)
77 *
77 * 128-255 bigram codes (128 most common, as determined by 'updatedb')
78 * 32-127 single character (printable) ascii residue (ie, literal)
79 *
78 * SEE ALSO: updatedb.csh, bigram.c
80 * The locate database store any character except newline ('\n')
81 * and NUL ('\0'). The 8-bit character support don't wast extra
82 * space until you have characters in file names less than 32
83 * or greather than 127.
84 *
79 *
85 *
86 * SEE ALSO: updatedb.sh, ../bigram/locate.bigram.c
87 *
80 * AUTHOR: James A. Woods, Informatics General Corp.,
81 * NASA Ames Research Center, 10/82
88 * AUTHOR: James A. Woods, Informatics General Corp.,
89 * NASA Ames Research Center, 10/82
90 * 8-bit file names characters:
91 * Wolfram Schneider, Berlin September 1996
82 */
83
84#include <sys/param.h>
85#include <err.h>
86#include <errno.h>
87#include <stdlib.h>
88#include <string.h>
89#include <stdio.h>
90#include "locate.h"
91
92#define BGBUFSIZE (NBG * 2) /* size of bigram buffer */
93
94u_char buf1[MAXPATHLEN] = " ";
95u_char buf2[MAXPATHLEN];
92 */
93
94#include <sys/param.h>
95#include <err.h>
96#include <errno.h>
97#include <stdlib.h>
98#include <string.h>
99#include <stdio.h>
100#include "locate.h"
101
102#define BGBUFSIZE (NBG * 2) /* size of bigram buffer */
103
104u_char buf1[MAXPATHLEN] = " ";
105u_char buf2[MAXPATHLEN];
96char bigrams[BGBUFSIZE + 1] = { 0 };
106u_char bigrams[BGBUFSIZE + 1] = { 0 };
97
98#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */
99
100#ifdef LOOKUP
107
108#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */
109
110#ifdef LOOKUP
101#define BGINDEX(x) (big[(u_int)*x][(u_int)*(x+1)])
102typedef u_char bg_t;
103bg_t big[UCHAR_MAX][UCHAR_MAX];
111#define BGINDEX(x) (big[(u_char)*x][(u_char)*(x + 1)])
112typedef short bg_t;
113bg_t big[UCHAR_MAX + 1][UCHAR_MAX + 1];
104#else
105#define BGINDEX(x) bgindex(x)
106typedef int bg_t;
107int bgindex __P((char *));
108#endif /* LOOKUP */
109
110
111void usage __P((void));

--- 28 unchanged lines hidden (view full) ---

140 (void)fgets(bigrams, BGBUFSIZE + 1, fp);
141
142 if (fwrite(bigrams, 1, BGBUFSIZE, stdout) != BGBUFSIZE)
143 err(1, "stdout");
144 (void)fclose(fp);
145
146#ifdef LOOKUP
147 /* init lookup table */
114#else
115#define BGINDEX(x) bgindex(x)
116typedef int bg_t;
117int bgindex __P((char *));
118#endif /* LOOKUP */
119
120
121void usage __P((void));

--- 28 unchanged lines hidden (view full) ---

150 (void)fgets(bigrams, BGBUFSIZE + 1, fp);
151
152 if (fwrite(bigrams, 1, BGBUFSIZE, stdout) != BGBUFSIZE)
153 err(1, "stdout");
154 (void)fclose(fp);
155
156#ifdef LOOKUP
157 /* init lookup table */
148 for (i = 0; i < UCHAR_MAX; i++)
149 for (j = 0; j < UCHAR_MAX; j++)
158 for (i = 0; i < UCHAR_MAX + 1; i++)
159 for (j = 0; j < UCHAR_MAX + 1; j++)
150 big[i][j] = (bg_t)-1;
151
152 for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2)
160 big[i][j] = (bg_t)-1;
161
162 for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2)
153 big[(int)*cp][(int)*(cp + 1)] = (bg_t)i;
163 big[(u_char)*cp][(u_char)*(cp + 1)] = (bg_t)i;
164
154#endif /* LOOKUP */
155
156 oldpath = buf1;
157 path = buf2;
158 oldcount = 0;
159
160 while (fgets(path, sizeof(buf2), stdin) != NULL) {
161
165#endif /* LOOKUP */
166
167 oldpath = buf1;
168 path = buf2;
169 oldcount = 0;
170
171 while (fgets(path, sizeof(buf2), stdin) != NULL) {
172
162 /* skip empty lines */
173 /* skip empty lines */
163 if (*path == '\n')
164 continue;
165
174 if (*path == '\n')
175 continue;
176
166 /* Squelch characters that would botch the decoding. */
177 /* remove newline */
167 for (cp = path; *cp != '\0'; cp++) {
168 /* chop newline */
169 if (*cp == '\n')
170 *cp = '\0';
178 for (cp = path; *cp != '\0'; cp++) {
179 /* chop newline */
180 if (*cp == '\n')
181 *cp = '\0';
171 /* range */
172 else if (*cp < ASCII_MIN || *cp > ASCII_MAX)
173 *cp = '?';
174 }
175
176 /* Skip longest common prefix. */
182 }
183
184 /* Skip longest common prefix. */
177 for (cp = path; *cp == *oldpath && *cp != '\0'; cp++, oldpath++);
185 for (cp = path; *cp == *oldpath; cp++, oldpath++)
186 if (*cp == '\0')
187 break;
178
179 count = cp - path;
180 diffcount = count - oldcount + OFFSET;
181 oldcount = count;
182 if (diffcount < 0 || diffcount > 2 * OFFSET) {
183 if (putchar(SWITCH) == EOF ||
184 putw(diffcount, stdout) == EOF)
185 err(1, "stdout");
186 } else
187 if (putchar(diffcount) == EOF)
188 err(1, "stdout");
189
190 while (*cp != '\0') {
188
189 count = cp - path;
190 diffcount = count - oldcount + OFFSET;
191 oldcount = count;
192 if (diffcount < 0 || diffcount > 2 * OFFSET) {
193 if (putchar(SWITCH) == EOF ||
194 putw(diffcount, stdout) == EOF)
195 err(1, "stdout");
196 } else
197 if (putchar(diffcount) == EOF)
198 err(1, "stdout");
199
200 while (*cp != '\0') {
191 if (*(cp + 1) == '\0') {
192 if (putchar(*cp) == EOF)
193 err(1, "stdout");
194 break;
195 }
196 if ((code = BGINDEX(cp)) == (bg_t)-1) {
197 if (putchar(*cp++) == EOF ||
198 putchar(*cp++) == EOF)
199 err(1, "stdout");
200 } else {
201 /* Found, so mark byte with parity bit. */
201 /* print *two* characters */
202
203 if ((code = BGINDEX(cp)) != (bg_t)-1) {
204 /*
205 * print *one* as bigram
206 * Found, so mark byte with
207 * parity bit.
208 */
202 if (putchar((code / 2) | PARITY) == EOF)
203 err(1, "stdout");
204 cp += 2;
205 }
209 if (putchar((code / 2) | PARITY) == EOF)
210 err(1, "stdout");
211 cp += 2;
212 }
213
214 else {
215 for (i = 0; i < 2; i++) {
216 if (*cp == '\0')
217 break;
218
219 /* print umlauts in file names */
220 if (*cp < ASCII_MIN ||
221 *cp > ASCII_MAX) {
222 if (putchar(UMLAUT) == EOF ||
223 putchar(*cp++) == EOF)
224 err(1, "stdout");
225 }
226
227 else {
228 /* normal character */
229 if(putchar(*cp++) == EOF)
230 err(1, "stdout");
231 }
232 }
233
234 }
206 }
235 }
236
207 if (path == buf1) { /* swap pointers */
208 path = buf2;
209 oldpath = buf1;
210 } else {
211 path = buf1;
212 oldpath = buf2;
213 }
214 }

--- 29 unchanged lines hidden ---
237 if (path == buf1) { /* swap pointers */
238 path = buf2;
239 oldpath = buf1;
240 } else {
241 path = buf1;
242 oldpath = buf2;
243 }
244 }

--- 29 unchanged lines hidden ---