xref: /freebsd/usr.bin/xstr/xstr.c (revision 9336e0699bda8a301cd2bfa37106b6ec5e32012e)
1 /*
2  * Copyright (c) 1980, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 
36 __FBSDID("$FreeBSD$");
37 
38 #ifndef lint
39 static const char copyright[] =
40 "@(#) Copyright (c) 1980, 1993\n\
41 	The Regents of the University of California.  All rights reserved.\n";
42 #endif
43 
44 #ifndef lint
45 static const char sccsid[] = "@(#)xstr.c	8.1 (Berkeley) 6/9/93";
46 #endif
47 
48 #include <sys/types.h>
49 
50 #include <ctype.h>
51 #include <err.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <signal.h>
55 #include <string.h>
56 #include <unistd.h>
57 
58 #include "pathnames.h"
59 
60 /*
61  * xstr - extract and hash strings in a C program
62  *
63  * Bill Joy UCB
64  * November, 1978
65  */
66 
67 #define	ignore(a)	((void) a)
68 
69 off_t	tellpt;
70 
71 off_t	mesgpt;
72 char	cstrings[] =	"strings";
73 char	*strings =	cstrings;
74 
75 int	cflg;
76 int	vflg;
77 int	readstd;
78 
79 char lastchr(char *);
80 
81 int fgetNUL(char *, int, FILE *);
82 int istail(char *, char *);
83 int octdigit(char);
84 int xgetc(FILE *);
85 
86 off_t hashit(char *, int);
87 off_t yankstr(char **);
88 
89 static void usage(void);
90 
91 void flushsh(void);
92 void found(int, off_t, char *);
93 void inithash(void);
94 void onintr(int);
95 void process(const char *);
96 void prstr(char *);
97 void xsdotc(void);
98 
99 int
100 main(int argc, char *argv[])
101 {
102 	int c;
103 
104 	while ((c = getopt(argc, argv, "-cv")) != -1)
105 		switch (c) {
106 		case '-':
107 			readstd++;
108 			break;
109 		case 'c':
110 			cflg++;
111 			break;
112 		case 'v':
113 			vflg++;
114 			break;
115 		default:
116 			usage();
117 		}
118 	argc -= optind;
119 	argv += optind;
120 
121 	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
122 		signal(SIGINT, onintr);
123 	if (cflg || (argc == 0 && !readstd))
124 		inithash();
125 	else
126 		strings = mktemp(strdup(_PATH_TMP));
127 	while (readstd || argc > 0) {
128 		if (freopen("x.c", "w", stdout) == NULL)
129 			err(1, "x.c");
130 		if (!readstd && freopen(argv[0], "r", stdin) == NULL)
131 			err(2, "%s", argv[0]);
132 		process("x.c");
133 		if (readstd == 0)
134 			argc--, argv++;
135 		else
136 			readstd = 0;
137 	};
138 	flushsh();
139 	if (cflg == 0)
140 		xsdotc();
141 	if (strings[0] == '/')
142 		ignore(unlink(strings));
143 	exit(0);
144 }
145 
146 static void
147 usage(void)
148 {
149 	fprintf(stderr, "usage: xstr [-cv] [-] [file ...]\n");
150 	exit (1);
151 }
152 
153 char linebuf[BUFSIZ];
154 
155 void
156 process(const char *name)
157 {
158 	char *cp;
159 	int c;
160 	int incomm = 0;
161 	int ret;
162 
163 	printf("extern char\txstr[];\n");
164 	for (;;) {
165 		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
166 			if (ferror(stdin))
167 				err(3, "%s", name);
168 			break;
169 		}
170 		if (linebuf[0] == '#') {
171 			if (linebuf[1] == ' ' && isdigit(linebuf[2]))
172 				printf("#line%s", &linebuf[1]);
173 			else
174 				printf("%s", linebuf);
175 			continue;
176 		}
177 		for (cp = linebuf; (c = *cp++);) switch (c) {
178 
179 		case '"':
180 			if (incomm)
181 				goto def;
182 			if ((ret = (int) yankstr(&cp)) == -1)
183 				goto out;
184 			printf("(&xstr[%d])", ret);
185 			break;
186 
187 		case '\'':
188 			if (incomm)
189 				goto def;
190 			putchar(c);
191 			if (*cp)
192 				putchar(*cp++);
193 			break;
194 
195 		case '/':
196 			if (incomm || *cp != '*')
197 				goto def;
198 			incomm = 1;
199 			cp++;
200 			printf("/*");
201 			continue;
202 
203 		case '*':
204 			if (incomm && *cp == '/') {
205 				incomm = 0;
206 				cp++;
207 				printf("*/");
208 				continue;
209 			}
210 			goto def;
211 
212 def:
213 		default:
214 			putchar(c);
215 			break;
216 		}
217 	}
218 out:
219 	if (ferror(stdout))
220 		warn("x.c"), onintr(0);
221 }
222 
223 off_t
224 yankstr(char **cpp)
225 {
226 	char *cp = *cpp;
227 	int c, ch;
228 	char dbuf[BUFSIZ];
229 	char *dp = dbuf;
230 	char *tp;
231 	static char tmp[] = "b\bt\tr\rn\nf\f\\\\\"\"";
232 
233 	while ((c = *cp++)) {
234 		if (dp == dbuf + sizeof(dbuf) - 3)
235 			errx(1, "message too long");
236 		switch (c) {
237 
238 		case '"':
239 			cp++;
240 			goto out;
241 
242 		case '\\':
243 			c = *cp++;
244 			if (c == 0)
245 				break;
246 			if (c == '\n') {
247 				if (fgets(linebuf, sizeof linebuf, stdin)
248 				    == NULL) {
249 					if (ferror(stdin))
250 						err(3, "x.c");
251 					return(-1);
252 				}
253 				cp = linebuf;
254 				continue;
255 			}
256 			for (tp = tmp; (ch = *tp++); tp++)
257 				if (c == ch) {
258 					c = *tp;
259 					goto gotc;
260 				}
261 			if (!octdigit(c)) {
262 				*dp++ = '\\';
263 				break;
264 			}
265 			c -= '0';
266 			if (!octdigit(*cp))
267 				break;
268 			c <<= 3, c += *cp++ - '0';
269 			if (!octdigit(*cp))
270 				break;
271 			c <<= 3, c += *cp++ - '0';
272 			break;
273 		}
274 gotc:
275 		*dp++ = c;
276 	}
277 out:
278 	*cpp = --cp;
279 	*dp = 0;
280 	return (hashit(dbuf, 1));
281 }
282 
283 int
284 octdigit(char c)
285 {
286 	return (isdigit(c) && c != '8' && c != '9');
287 }
288 
289 void
290 inithash(void)
291 {
292 	char buf[BUFSIZ];
293 	FILE *mesgread = fopen(strings, "r");
294 
295 	if (mesgread == NULL)
296 		return;
297 	for (;;) {
298 		mesgpt = tellpt;
299 		if (fgetNUL(buf, sizeof buf, mesgread) == 0)
300 			break;
301 		ignore(hashit(buf, 0));
302 	}
303 	ignore(fclose(mesgread));
304 }
305 
306 int
307 fgetNUL(char *obuf, int rmdr, FILE *file)
308 {
309 	int c;
310 	char *buf = obuf;
311 
312 	while (--rmdr > 0 && (c = xgetc(file)) != 0 && c != EOF)
313 		*buf++ = c;
314 	*buf++ = 0;
315 	return ((feof(file) || ferror(file)) ? 0 : 1);
316 }
317 
318 int
319 xgetc(FILE *file)
320 {
321 
322 	tellpt++;
323 	return (getc(file));
324 }
325 
326 #define	BUCKETS	128
327 
328 struct	hash {
329 	off_t	hpt;
330 	char	*hstr;
331 	struct	hash *hnext;
332 	short	hnew;
333 } bucket[BUCKETS];
334 
335 off_t
336 hashit(char *str, int new)
337 {
338 	int i;
339 	struct hash *hp, *hp0;
340 
341 	hp = hp0 = &bucket[lastchr(str) & 0177];
342 	while (hp->hnext) {
343 		hp = hp->hnext;
344 		i = istail(str, hp->hstr);
345 		if (i >= 0)
346 			return (hp->hpt + i);
347 	}
348 	if ((hp = (struct hash *) calloc(1, sizeof (*hp))) == NULL)
349 		errx(8, "calloc");
350 	hp->hpt = mesgpt;
351 	if (!(hp->hstr = strdup(str)))
352 		err(1, NULL);
353 	mesgpt += strlen(hp->hstr) + 1;
354 	hp->hnext = hp0->hnext;
355 	hp->hnew = new;
356 	hp0->hnext = hp;
357 	return (hp->hpt);
358 }
359 
360 void
361 flushsh(void)
362 {
363 	int i;
364 	struct hash *hp;
365 	FILE *mesgwrit;
366 	int old = 0, new = 0;
367 
368 	for (i = 0; i < BUCKETS; i++)
369 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext)
370 			if (hp->hnew)
371 				new++;
372 			else
373 				old++;
374 	if (new == 0 && old != 0)
375 		return;
376 	mesgwrit = fopen(strings, old ? "r+" : "w");
377 	if (mesgwrit == NULL)
378 		err(4, "%s", strings);
379 	for (i = 0; i < BUCKETS; i++)
380 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
381 			found(hp->hnew, hp->hpt, hp->hstr);
382 			if (hp->hnew) {
383 				fseek(mesgwrit, hp->hpt, 0);
384 				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
385 				if (ferror(mesgwrit))
386 					err(4, "%s", strings);
387 			}
388 		}
389 	if (fclose(mesgwrit) == EOF)
390 		err(4, "%s", strings);
391 }
392 
393 void
394 found(int new, off_t off, char *str)
395 {
396 	if (vflg == 0)
397 		return;
398 	if (!new)
399 		fprintf(stderr, "found at %d:", (int) off);
400 	else
401 		fprintf(stderr, "new at %d:", (int) off);
402 	prstr(str);
403 	fprintf(stderr, "\n");
404 }
405 
406 void
407 prstr(char *cp)
408 {
409 	int c;
410 
411 	while ((c = (*cp++ & 0377)))
412 		if (c < ' ')
413 			fprintf(stderr, "^%c", c + '`');
414 		else if (c == 0177)
415 			fprintf(stderr, "^?");
416 		else if (c > 0200)
417 			fprintf(stderr, "\\%03o", c);
418 		else
419 			fprintf(stderr, "%c", c);
420 }
421 
422 void
423 xsdotc(void)
424 {
425 	FILE *strf = fopen(strings, "r");
426 	FILE *xdotcf;
427 
428 	if (strf == NULL)
429 		err(5, "%s", strings);
430 	xdotcf = fopen("xs.c", "w");
431 	if (xdotcf == NULL)
432 		err(6, "xs.c");
433 	fprintf(xdotcf, "char\txstr[] = {\n");
434 	for (;;) {
435 		int i, c;
436 
437 		for (i = 0; i < 8; i++) {
438 			c = getc(strf);
439 			if (ferror(strf)) {
440 				warn("%s", strings);
441 				onintr(0);
442 			}
443 			if (feof(strf)) {
444 				fprintf(xdotcf, "\n");
445 				goto out;
446 			}
447 			fprintf(xdotcf, "0x%02x,", c);
448 		}
449 		fprintf(xdotcf, "\n");
450 	}
451 out:
452 	fprintf(xdotcf, "};\n");
453 	ignore(fclose(xdotcf));
454 	ignore(fclose(strf));
455 }
456 
457 char
458 lastchr(char *cp)
459 {
460 
461 	while (cp[0] && cp[1])
462 		cp++;
463 	return (*cp);
464 }
465 
466 int
467 istail(char *str, char *of)
468 {
469 	int d = strlen(of) - strlen(str);
470 
471 	if (d < 0 || strcmp(&of[d], str) != 0)
472 		return (-1);
473 	return (d);
474 }
475 
476 void
477 onintr(int dummy __unused)
478 {
479 
480 	ignore(signal(SIGINT, SIG_IGN));
481 	if (strings[0] == '/')
482 		ignore(unlink(strings));
483 	ignore(unlink("x.c"));
484 	ignore(unlink("xs.c"));
485 	exit(7);
486 }
487