xref: /freebsd/usr.bin/xstr/xstr.c (revision 271c3a9060f2ee55607ebe146523f888e1db2654)
1 /*
2  * Copyright (c) 1980, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 
36 __FBSDID("$FreeBSD$");
37 
38 #ifndef lint
39 static const char copyright[] =
40 "@(#) Copyright (c) 1980, 1993\n\
41 	The Regents of the University of California.  All rights reserved.\n";
42 #endif
43 
44 #ifndef lint
45 static const char sccsid[] = "@(#)xstr.c	8.1 (Berkeley) 6/9/93";
46 #endif
47 
48 #include <sys/types.h>
49 
50 #include <ctype.h>
51 #include <err.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <signal.h>
55 #include <string.h>
56 #include <unistd.h>
57 
58 #include "pathnames.h"
59 
60 /*
61  * xstr - extract and hash strings in a C program
62  *
63  * Bill Joy UCB
64  * November, 1978
65  */
66 
67 #define	ignore(a)	((void) a)
68 
69 off_t	tellpt;
70 
71 off_t	mesgpt;
72 char	cstrings[] =	"strings";
73 char	*strings =	cstrings;
74 
75 int	cflg;
76 int	vflg;
77 int	readstd;
78 
79 char lastchr(char *);
80 
81 int fgetNUL(char *, int, FILE *);
82 int istail(char *, char *);
83 int octdigit(char);
84 int xgetc(FILE *);
85 
86 off_t hashit(char *, int);
87 off_t yankstr(char **);
88 
89 static void usage(void);
90 
91 void flushsh(void);
92 void found(int, off_t, char *);
93 void inithash(void);
94 void onintr(int);
95 void process(const char *);
96 void prstr(char *);
97 void xsdotc(void);
98 
99 int
100 main(int argc, char *argv[])
101 {
102 	int c;
103 	int fdesc;
104 
105 	while ((c = getopt(argc, argv, "-cv")) != -1)
106 		switch (c) {
107 		case '-':
108 			readstd++;
109 			break;
110 		case 'c':
111 			cflg++;
112 			break;
113 		case 'v':
114 			vflg++;
115 			break;
116 		default:
117 			usage();
118 		}
119 	argc -= optind;
120 	argv += optind;
121 
122 	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
123 		signal(SIGINT, onintr);
124 	if (cflg || (argc == 0 && !readstd))
125 		inithash();
126 	else {
127 		strings = strdup(_PATH_TMP);
128 		if (strings == NULL)
129 			err(1, "strdup() failed");
130 		fdesc = mkstemp(strings);
131 		if (fdesc == -1)
132 			err(1, "Unable to create temporary file");
133 		close(fdesc);
134 	}
135 
136 	while (readstd || argc > 0) {
137 		if (freopen("x.c", "w", stdout) == NULL)
138 			err(1, "x.c");
139 		if (!readstd && freopen(argv[0], "r", stdin) == NULL)
140 			err(2, "%s", argv[0]);
141 		process("x.c");
142 		if (readstd == 0)
143 			argc--, argv++;
144 		else
145 			readstd = 0;
146 	};
147 	flushsh();
148 	if (cflg == 0)
149 		xsdotc();
150 	if (strings[0] == '/')
151 		ignore(unlink(strings));
152 	exit(0);
153 }
154 
155 static void
156 usage(void)
157 {
158 	fprintf(stderr, "usage: xstr [-cv] [-] [file ...]\n");
159 	exit (1);
160 }
161 
162 char linebuf[BUFSIZ];
163 
164 void
165 process(const char *name)
166 {
167 	char *cp;
168 	int c;
169 	int incomm = 0;
170 	int ret;
171 
172 	printf("extern char\txstr[];\n");
173 	for (;;) {
174 		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
175 			if (ferror(stdin))
176 				err(3, "%s", name);
177 			break;
178 		}
179 		if (linebuf[0] == '#') {
180 			if (linebuf[1] == ' ' && isdigit(linebuf[2]))
181 				printf("#line%s", &linebuf[1]);
182 			else
183 				printf("%s", linebuf);
184 			continue;
185 		}
186 		for (cp = linebuf; (c = *cp++);) switch (c) {
187 
188 		case '"':
189 			if (incomm)
190 				goto def;
191 			if ((ret = (int) yankstr(&cp)) == -1)
192 				goto out;
193 			printf("(&xstr[%d])", ret);
194 			break;
195 
196 		case '\'':
197 			if (incomm)
198 				goto def;
199 			putchar(c);
200 			if (*cp)
201 				putchar(*cp++);
202 			break;
203 
204 		case '/':
205 			if (incomm || *cp != '*')
206 				goto def;
207 			incomm = 1;
208 			cp++;
209 			printf("/*");
210 			continue;
211 
212 		case '*':
213 			if (incomm && *cp == '/') {
214 				incomm = 0;
215 				cp++;
216 				printf("*/");
217 				continue;
218 			}
219 			goto def;
220 
221 def:
222 		default:
223 			putchar(c);
224 			break;
225 		}
226 	}
227 out:
228 	if (ferror(stdout))
229 		warn("x.c"), onintr(0);
230 }
231 
232 off_t
233 yankstr(char **cpp)
234 {
235 	char *cp = *cpp;
236 	int c, ch;
237 	char dbuf[BUFSIZ];
238 	char *dp = dbuf;
239 	char *tp;
240 	static char tmp[] = "b\bt\tr\rn\nf\f\\\\\"\"";
241 
242 	while ((c = *cp++)) {
243 		if (dp == dbuf + sizeof(dbuf) - 3)
244 			errx(1, "message too long");
245 		switch (c) {
246 
247 		case '"':
248 			cp++;
249 			goto out;
250 
251 		case '\\':
252 			c = *cp++;
253 			if (c == 0)
254 				break;
255 			if (c == '\n') {
256 				if (fgets(linebuf, sizeof linebuf, stdin)
257 				    == NULL) {
258 					if (ferror(stdin))
259 						err(3, "x.c");
260 					return(-1);
261 				}
262 				cp = linebuf;
263 				continue;
264 			}
265 			for (tp = tmp; (ch = *tp++); tp++)
266 				if (c == ch) {
267 					c = *tp;
268 					goto gotc;
269 				}
270 			if (!octdigit(c)) {
271 				*dp++ = '\\';
272 				break;
273 			}
274 			c -= '0';
275 			if (!octdigit(*cp))
276 				break;
277 			c <<= 3, c += *cp++ - '0';
278 			if (!octdigit(*cp))
279 				break;
280 			c <<= 3, c += *cp++ - '0';
281 			break;
282 		}
283 gotc:
284 		*dp++ = c;
285 	}
286 out:
287 	*cpp = --cp;
288 	*dp = 0;
289 	return (hashit(dbuf, 1));
290 }
291 
292 int
293 octdigit(char c)
294 {
295 	return (isdigit(c) && c != '8' && c != '9');
296 }
297 
298 void
299 inithash(void)
300 {
301 	char buf[BUFSIZ];
302 	FILE *mesgread = fopen(strings, "r");
303 
304 	if (mesgread == NULL)
305 		return;
306 	for (;;) {
307 		mesgpt = tellpt;
308 		if (fgetNUL(buf, sizeof buf, mesgread) == 0)
309 			break;
310 		ignore(hashit(buf, 0));
311 	}
312 	ignore(fclose(mesgread));
313 }
314 
315 int
316 fgetNUL(char *obuf, int rmdr, FILE *file)
317 {
318 	int c;
319 	char *buf = obuf;
320 
321 	while (--rmdr > 0 && (c = xgetc(file)) != 0 && c != EOF)
322 		*buf++ = c;
323 	*buf++ = 0;
324 	return ((feof(file) || ferror(file)) ? 0 : 1);
325 }
326 
327 int
328 xgetc(FILE *file)
329 {
330 
331 	tellpt++;
332 	return (getc(file));
333 }
334 
335 #define	BUCKETS	128
336 
337 struct	hash {
338 	off_t	hpt;
339 	char	*hstr;
340 	struct	hash *hnext;
341 	short	hnew;
342 } bucket[BUCKETS];
343 
344 off_t
345 hashit(char *str, int new)
346 {
347 	int i;
348 	struct hash *hp, *hp0;
349 
350 	hp = hp0 = &bucket[lastchr(str) & 0177];
351 	while (hp->hnext) {
352 		hp = hp->hnext;
353 		i = istail(str, hp->hstr);
354 		if (i >= 0)
355 			return (hp->hpt + i);
356 	}
357 	if ((hp = (struct hash *) calloc(1, sizeof (*hp))) == NULL)
358 		errx(8, "calloc");
359 	hp->hpt = mesgpt;
360 	if (!(hp->hstr = strdup(str)))
361 		err(1, NULL);
362 	mesgpt += strlen(hp->hstr) + 1;
363 	hp->hnext = hp0->hnext;
364 	hp->hnew = new;
365 	hp0->hnext = hp;
366 	return (hp->hpt);
367 }
368 
369 void
370 flushsh(void)
371 {
372 	int i;
373 	struct hash *hp;
374 	FILE *mesgwrit;
375 	int old = 0, new = 0;
376 
377 	for (i = 0; i < BUCKETS; i++)
378 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext)
379 			if (hp->hnew)
380 				new++;
381 			else
382 				old++;
383 	if (new == 0 && old != 0)
384 		return;
385 	mesgwrit = fopen(strings, old ? "r+" : "w");
386 	if (mesgwrit == NULL)
387 		err(4, "%s", strings);
388 	for (i = 0; i < BUCKETS; i++)
389 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
390 			found(hp->hnew, hp->hpt, hp->hstr);
391 			if (hp->hnew) {
392 				fseek(mesgwrit, hp->hpt, 0);
393 				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
394 				if (ferror(mesgwrit))
395 					err(4, "%s", strings);
396 			}
397 		}
398 	if (fclose(mesgwrit) == EOF)
399 		err(4, "%s", strings);
400 }
401 
402 void
403 found(int new, off_t off, char *str)
404 {
405 	if (vflg == 0)
406 		return;
407 	if (!new)
408 		fprintf(stderr, "found at %d:", (int) off);
409 	else
410 		fprintf(stderr, "new at %d:", (int) off);
411 	prstr(str);
412 	fprintf(stderr, "\n");
413 }
414 
415 void
416 prstr(char *cp)
417 {
418 	int c;
419 
420 	while ((c = (*cp++ & 0377)))
421 		if (c < ' ')
422 			fprintf(stderr, "^%c", c + '`');
423 		else if (c == 0177)
424 			fprintf(stderr, "^?");
425 		else if (c > 0200)
426 			fprintf(stderr, "\\%03o", c);
427 		else
428 			fprintf(stderr, "%c", c);
429 }
430 
431 void
432 xsdotc(void)
433 {
434 	FILE *strf = fopen(strings, "r");
435 	FILE *xdotcf;
436 
437 	if (strf == NULL)
438 		err(5, "%s", strings);
439 	xdotcf = fopen("xs.c", "w");
440 	if (xdotcf == NULL)
441 		err(6, "xs.c");
442 	fprintf(xdotcf, "char\txstr[] = {\n");
443 	for (;;) {
444 		int i, c;
445 
446 		for (i = 0; i < 8; i++) {
447 			c = getc(strf);
448 			if (ferror(strf)) {
449 				warn("%s", strings);
450 				onintr(0);
451 			}
452 			if (feof(strf)) {
453 				fprintf(xdotcf, "\n");
454 				goto out;
455 			}
456 			fprintf(xdotcf, "0x%02x,", c);
457 		}
458 		fprintf(xdotcf, "\n");
459 	}
460 out:
461 	fprintf(xdotcf, "};\n");
462 	ignore(fclose(xdotcf));
463 	ignore(fclose(strf));
464 }
465 
466 char
467 lastchr(char *cp)
468 {
469 
470 	while (cp[0] && cp[1])
471 		cp++;
472 	return (*cp);
473 }
474 
475 int
476 istail(char *str, char *of)
477 {
478 	int d = strlen(of) - strlen(str);
479 
480 	if (d < 0 || strcmp(&of[d], str) != 0)
481 		return (-1);
482 	return (d);
483 }
484 
485 void
486 onintr(int dummy __unused)
487 {
488 
489 	ignore(signal(SIGINT, SIG_IGN));
490 	if (strings[0] == '/')
491 		ignore(unlink(strings));
492 	ignore(unlink("x.c"));
493 	ignore(unlink("xs.c"));
494 	exit(7);
495 }
496