xref: /freebsd/usr.bin/xstr/xstr.c (revision 1b6c76a2fe091c74f08427e6c870851025a9cf67)
1 /*
2  * Copyright (c) 1980, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 static const char copyright[] =
36 "@(#) Copyright (c) 1980, 1993\n\
37 	The Regents of the University of California.  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)xstr.c	8.1 (Berkeley) 6/9/93";
43 #endif
44 static const char rcsid[] =
45   "$FreeBSD$";
46 #endif /* not lint */
47 
48 #include <sys/types.h>
49 #include <ctype.h>
50 #include <err.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <signal.h>
54 #include <string.h>
55 #include <unistd.h>
56 #include "pathnames.h"
57 
58 /*
59  * xstr - extract and hash strings in a C program
60  *
61  * Bill Joy UCB
62  * November, 1978
63  */
64 
65 #define	ignore(a)	((void) a)
66 
67 off_t	tellpt;
68 off_t	hashit();
69 void	onintr();
70 char	*savestr();
71 off_t	yankstr();
72 
73 off_t	mesgpt;
74 char	*strings =	"strings";
75 
76 int	cflg;
77 int	vflg;
78 int	readstd;
79 
80 static void usage __P((void));
81 int istail __P((char *, char *));
82 char lastchr __P((char *));
83 void xsdotc __P((void));
84 void prstr __P((char *));
85 void found __P((int, off_t, char *));
86 void flushsh __P((void));
87 int xgetc __P((FILE *));
88 int fgetNUL __P((char *, int, FILE *));
89 void inithash __P((void));
90 int octdigit __P((char));
91 void process __P((char *));
92 
93 int
94 main(argc, argv)
95 	int argc;
96 	char *argv[];
97 {
98 	int c;
99 
100 	while ((c = getopt(argc, argv, "-cv")) != -1)
101 		switch (c) {
102 		case '-':
103 			readstd++;
104 			break;
105 		case 'c':
106 			cflg++;
107 			break;
108 		case 'v':
109 			vflg++;
110 			break;
111 		default:
112 			usage();
113 		}
114 	argc -= optind;
115 	argv += optind;
116 
117 	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
118 		signal(SIGINT, onintr);
119 	if (cflg || (argc == 0 && !readstd))
120 		inithash();
121 	else
122 		strings = mktemp(strdup(_PATH_TMP));
123 	while (readstd || argc > 0) {
124 		if (freopen("x.c", "w", stdout) == NULL)
125 			err(1, "x.c");
126 		if (!readstd && freopen(argv[0], "r", stdin) == NULL)
127 			err(2, "%s", argv[0]);
128 		process("x.c");
129 		if (readstd == 0)
130 			argc--, argv++;
131 		else
132 			readstd = 0;
133 	};
134 	flushsh();
135 	if (cflg == 0)
136 		xsdotc();
137 	if (strings[0] == '/')
138 		ignore(unlink(strings));
139 	exit(0);
140 }
141 
142 static void
143 usage()
144 {
145 	fprintf(stderr, "usage: xstr [-v] [-c] [-] [name ...]\n");
146 	exit (1);
147 }
148 
149 char linebuf[BUFSIZ];
150 
151 void
152 process(name)
153 	char *name;
154 {
155 	char *cp;
156 	register int c;
157 	register int incomm = 0;
158 	int ret;
159 
160 	printf("extern char\txstr[];\n");
161 	for (;;) {
162 		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
163 			if (ferror(stdin))
164 				err(3, "%s", name);
165 			break;
166 		}
167 		if (linebuf[0] == '#') {
168 			if (linebuf[1] == ' ' && isdigit(linebuf[2]))
169 				printf("#line%s", &linebuf[1]);
170 			else
171 				printf("%s", linebuf);
172 			continue;
173 		}
174 		for (cp = linebuf; (c = *cp++);) switch (c) {
175 
176 		case '"':
177 			if (incomm)
178 				goto def;
179 			if ((ret = (int) yankstr(&cp)) == -1)
180 				goto out;
181 			printf("(&xstr[%d])", ret);
182 			break;
183 
184 		case '\'':
185 			if (incomm)
186 				goto def;
187 			putchar(c);
188 			if (*cp)
189 				putchar(*cp++);
190 			break;
191 
192 		case '/':
193 			if (incomm || *cp != '*')
194 				goto def;
195 			incomm = 1;
196 			cp++;
197 			printf("/*");
198 			continue;
199 
200 		case '*':
201 			if (incomm && *cp == '/') {
202 				incomm = 0;
203 				cp++;
204 				printf("*/");
205 				continue;
206 			}
207 			goto def;
208 
209 def:
210 		default:
211 			putchar(c);
212 			break;
213 		}
214 	}
215 out:
216 	if (ferror(stdout))
217 		warn("x.c"), onintr();
218 }
219 
220 off_t
221 yankstr(cpp)
222 	register char **cpp;
223 {
224 	register char *cp = *cpp;
225 	register int c, ch;
226 	char dbuf[BUFSIZ];
227 	register char *dp = dbuf;
228 	register char *tp;
229 
230 	while ((c = *cp++)) {
231 		switch (c) {
232 
233 		case '"':
234 			cp++;
235 			goto out;
236 
237 		case '\\':
238 			c = *cp++;
239 			if (c == 0)
240 				break;
241 			if (c == '\n') {
242 				if (fgets(linebuf, sizeof linebuf, stdin)
243 				    == NULL) {
244 					if (ferror(stdin))
245 						err(3, "x.c");
246 					return(-1);
247 				}
248 				cp = linebuf;
249 				continue;
250 			}
251 			for (tp = "b\bt\tr\rn\nf\f\\\\\"\""; (ch = *tp++); tp++)
252 				if (c == ch) {
253 					c = *tp;
254 					goto gotc;
255 				}
256 			if (!octdigit(c)) {
257 				*dp++ = '\\';
258 				break;
259 			}
260 			c -= '0';
261 			if (!octdigit(*cp))
262 				break;
263 			c <<= 3, c += *cp++ - '0';
264 			if (!octdigit(*cp))
265 				break;
266 			c <<= 3, c += *cp++ - '0';
267 			break;
268 		}
269 gotc:
270 		*dp++ = c;
271 	}
272 out:
273 	*cpp = --cp;
274 	*dp = 0;
275 	return (hashit(dbuf, 1));
276 }
277 
278 int
279 octdigit(c)
280 	char c;
281 {
282 	return (isdigit(c) && c != '8' && c != '9');
283 }
284 
285 void
286 inithash()
287 {
288 	char buf[BUFSIZ];
289 	register FILE *mesgread = fopen(strings, "r");
290 
291 	if (mesgread == NULL)
292 		return;
293 	for (;;) {
294 		mesgpt = tellpt;
295 		if (fgetNUL(buf, sizeof buf, mesgread) == 0)
296 			break;
297 		ignore(hashit(buf, 0));
298 	}
299 	ignore(fclose(mesgread));
300 }
301 
302 int
303 fgetNUL(obuf, rmdr, file)
304 	char *obuf;
305 	register int rmdr;
306 	FILE *file;
307 {
308 	register c;
309 	register char *buf = obuf;
310 
311 	while (--rmdr > 0 && (c = xgetc(file)) != 0 && c != EOF)
312 		*buf++ = c;
313 	*buf++ = 0;
314 	return ((feof(file) || ferror(file)) ? 0 : 1);
315 }
316 
317 int
318 xgetc(file)
319 	FILE *file;
320 {
321 
322 	tellpt++;
323 	return (getc(file));
324 }
325 
326 #define	BUCKETS	128
327 
328 struct	hash {
329 	off_t	hpt;
330 	char	*hstr;
331 	struct	hash *hnext;
332 	short	hnew;
333 } bucket[BUCKETS];
334 
335 off_t
336 hashit(str, new)
337 	char *str;
338 	int new;
339 {
340 	int i;
341 	register struct hash *hp, *hp0;
342 
343 	hp = hp0 = &bucket[lastchr(str) & 0177];
344 	while (hp->hnext) {
345 		hp = hp->hnext;
346 		i = istail(str, hp->hstr);
347 		if (i >= 0)
348 			return (hp->hpt + i);
349 	}
350 	if ((hp = (struct hash *) calloc(1, sizeof (*hp))) == NULL)
351 		errx(8, "calloc");
352 	hp->hpt = mesgpt;
353 	if (!(hp->hstr = strdup(str)))
354 		err(1, NULL);
355 	mesgpt += strlen(hp->hstr) + 1;
356 	hp->hnext = hp0->hnext;
357 	hp->hnew = new;
358 	hp0->hnext = hp;
359 	return (hp->hpt);
360 }
361 
362 void
363 flushsh()
364 {
365 	register int i;
366 	register struct hash *hp;
367 	register FILE *mesgwrit;
368 	register int old = 0, new = 0;
369 
370 	for (i = 0; i < BUCKETS; i++)
371 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext)
372 			if (hp->hnew)
373 				new++;
374 			else
375 				old++;
376 	if (new == 0 && old != 0)
377 		return;
378 	mesgwrit = fopen(strings, old ? "r+" : "w");
379 	if (mesgwrit == NULL)
380 		perror(strings), exit(4);
381 	for (i = 0; i < BUCKETS; i++)
382 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
383 			found(hp->hnew, hp->hpt, hp->hstr);
384 			if (hp->hnew) {
385 				fseek(mesgwrit, hp->hpt, 0);
386 				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
387 				if (ferror(mesgwrit))
388 					err(4, "%s", strings);
389 			}
390 		}
391 	if (fclose(mesgwrit) == EOF)
392 		err(4, "%s", strings);
393 }
394 
395 void
396 found(new, off, str)
397 	int new;
398 	off_t off;
399 	char *str;
400 {
401 	if (vflg == 0)
402 		return;
403 	if (!new)
404 		fprintf(stderr, "found at %d:", (int) off);
405 	else
406 		fprintf(stderr, "new at %d:", (int) off);
407 	prstr(str);
408 	fprintf(stderr, "\n");
409 }
410 
411 void
412 prstr(cp)
413 	register char *cp;
414 {
415 	register int c;
416 
417 	while ((c = (*cp++ & 0377)))
418 		if (c < ' ')
419 			fprintf(stderr, "^%c", c + '`');
420 		else if (c == 0177)
421 			fprintf(stderr, "^?");
422 		else if (c > 0200)
423 			fprintf(stderr, "\\%03o", c);
424 		else
425 			fprintf(stderr, "%c", c);
426 }
427 
428 void
429 xsdotc()
430 {
431 	register FILE *strf = fopen(strings, "r");
432 	register FILE *xdotcf;
433 
434 	if (strf == NULL)
435 		err(5, "%s", strings);
436 	xdotcf = fopen("xs.c", "w");
437 	if (xdotcf == NULL)
438 		err(6, "xs.c");
439 	fprintf(xdotcf, "char\txstr[] = {\n");
440 	for (;;) {
441 		register int i, c;
442 
443 		for (i = 0; i < 8; i++) {
444 			c = getc(strf);
445 			if (ferror(strf)) {
446 				warn("%s", strings);
447 				onintr();
448 			}
449 			if (feof(strf)) {
450 				fprintf(xdotcf, "\n");
451 				goto out;
452 			}
453 			fprintf(xdotcf, "0x%02x,", c);
454 		}
455 		fprintf(xdotcf, "\n");
456 	}
457 out:
458 	fprintf(xdotcf, "};\n");
459 	ignore(fclose(xdotcf));
460 	ignore(fclose(strf));
461 }
462 
463 char
464 lastchr(cp)
465 	register char *cp;
466 {
467 
468 	while (cp[0] && cp[1])
469 		cp++;
470 	return (*cp);
471 }
472 
473 int
474 istail(str, of)
475 	register char *str, *of;
476 {
477 	register int d = strlen(of) - strlen(str);
478 
479 	if (d < 0 || strcmp(&of[d], str) != 0)
480 		return (-1);
481 	return (d);
482 }
483 
484 void
485 onintr()
486 {
487 
488 	ignore(signal(SIGINT, SIG_IGN));
489 	if (strings[0] == '/')
490 		ignore(unlink(strings));
491 	ignore(unlink("x.c"));
492 	ignore(unlink("xs.c"));
493 	exit(7);
494 }
495