xref: /freebsd/usr.bin/xstr/xstr.c (revision b3e7694832e81d7a904a10f525f8797b753bf0d3)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 
34 __FBSDID("$FreeBSD$");
35 
36 #ifndef lint
37 static const char copyright[] =
38 "@(#) Copyright (c) 1980, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n";
40 #endif
41 
42 #ifndef lint
43 static const char sccsid[] = "@(#)xstr.c	8.1 (Berkeley) 6/9/93";
44 #endif
45 
46 #include <sys/types.h>
47 
48 #include <ctype.h>
49 #include <err.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <signal.h>
53 #include <string.h>
54 #include <unistd.h>
55 
56 #include "pathnames.h"
57 
58 /*
59  * xstr - extract and hash strings in a C program
60  *
61  * Bill Joy UCB
62  * November, 1978
63  */
64 
65 #define	ignore(a)	((void) a)
66 
67 static off_t	tellpt;
68 
69 static off_t	mesgpt;
70 static char	cstrings[] =	"strings";
71 static char	*strings =	cstrings;
72 
73 static int	cflg;
74 static int	vflg;
75 static int	readstd;
76 
77 static char lastchr(char *);
78 
79 static int fgetNUL(char *, int, FILE *);
80 static int istail(char *, char *);
81 static int octdigit(char);
82 static int xgetc(FILE *);
83 
84 static off_t hashit(char *, int);
85 static off_t yankstr(char **);
86 
87 static void usage(void) __dead2;
88 
89 static void flushsh(void);
90 static void found(int, off_t, char *);
91 static void inithash(void);
92 static void onintr(int);
93 static void process(const char *);
94 static void prstr(char *);
95 static void xsdotc(void);
96 
97 int
98 main(int argc, char *argv[])
99 {
100 	int c;
101 	int fdesc;
102 
103 	while ((c = getopt(argc, argv, "-cv")) != -1)
104 		switch (c) {
105 		case '-':
106 			readstd++;
107 			break;
108 		case 'c':
109 			cflg++;
110 			break;
111 		case 'v':
112 			vflg++;
113 			break;
114 		default:
115 			usage();
116 		}
117 	argc -= optind;
118 	argv += optind;
119 
120 	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
121 		signal(SIGINT, onintr);
122 	if (cflg || (argc == 0 && !readstd))
123 		inithash();
124 	else {
125 		strings = strdup(_PATH_TMP);
126 		if (strings == NULL)
127 			err(1, "strdup() failed");
128 		fdesc = mkstemp(strings);
129 		if (fdesc == -1)
130 			err(1, "Unable to create temporary file");
131 		close(fdesc);
132 	}
133 
134 	while (readstd || argc > 0) {
135 		if (freopen("x.c", "w", stdout) == NULL)
136 			err(1, "x.c");
137 		if (!readstd && freopen(argv[0], "r", stdin) == NULL)
138 			err(2, "%s", argv[0]);
139 		process("x.c");
140 		if (readstd == 0)
141 			argc--, argv++;
142 		else
143 			readstd = 0;
144 	}
145 	flushsh();
146 	if (cflg == 0)
147 		xsdotc();
148 	if (strings[0] == '/')
149 		ignore(unlink(strings));
150 	exit(0);
151 }
152 
153 static void
154 usage(void)
155 {
156 	fprintf(stderr, "usage: xstr [-cv] [-] [file ...]\n");
157 	exit (1);
158 }
159 
160 static char linebuf[BUFSIZ];
161 
162 static void
163 process(const char *name)
164 {
165 	char *cp;
166 	int c;
167 	int incomm = 0;
168 	int ret;
169 
170 	printf("extern char\txstr[];\n");
171 	for (;;) {
172 		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
173 			if (ferror(stdin))
174 				err(3, "%s", name);
175 			break;
176 		}
177 		if (linebuf[0] == '#') {
178 			if (linebuf[1] == ' ' && isdigit(linebuf[2]))
179 				printf("#line%s", &linebuf[1]);
180 			else
181 				printf("%s", linebuf);
182 			continue;
183 		}
184 		for (cp = linebuf; (c = *cp++);) switch (c) {
185 
186 		case '"':
187 			if (incomm)
188 				goto def;
189 			if ((ret = (int) yankstr(&cp)) == -1)
190 				goto out;
191 			printf("(&xstr[%d])", ret);
192 			break;
193 
194 		case '\'':
195 			if (incomm)
196 				goto def;
197 			putchar(c);
198 			if (*cp)
199 				putchar(*cp++);
200 			break;
201 
202 		case '/':
203 			if (incomm || *cp != '*')
204 				goto def;
205 			incomm = 1;
206 			cp++;
207 			printf("/*");
208 			continue;
209 
210 		case '*':
211 			if (incomm && *cp == '/') {
212 				incomm = 0;
213 				cp++;
214 				printf("*/");
215 				continue;
216 			}
217 			goto def;
218 
219 def:
220 		default:
221 			putchar(c);
222 			break;
223 		}
224 	}
225 out:
226 	if (ferror(stdout))
227 		warn("x.c"), onintr(0);
228 }
229 
230 static off_t
231 yankstr(char **cpp)
232 {
233 	char *cp = *cpp;
234 	int c, ch;
235 	char dbuf[BUFSIZ];
236 	char *dp = dbuf;
237 	char *tp;
238 	static char tmp[] = "b\bt\tr\rn\nf\f\\\\\"\"";
239 
240 	while ((c = *cp++)) {
241 		if (dp == dbuf + sizeof(dbuf) - 3)
242 			errx(1, "message too long");
243 		switch (c) {
244 
245 		case '"':
246 			cp++;
247 			goto out;
248 
249 		case '\\':
250 			c = *cp++;
251 			if (c == 0)
252 				break;
253 			if (c == '\n') {
254 				if (fgets(linebuf, sizeof linebuf, stdin)
255 				    == NULL) {
256 					if (ferror(stdin))
257 						err(3, "x.c");
258 					return(-1);
259 				}
260 				cp = linebuf;
261 				continue;
262 			}
263 			for (tp = tmp; (ch = *tp++); tp++)
264 				if (c == ch) {
265 					c = *tp;
266 					goto gotc;
267 				}
268 			if (!octdigit(c)) {
269 				*dp++ = '\\';
270 				break;
271 			}
272 			c -= '0';
273 			if (!octdigit(*cp))
274 				break;
275 			c <<= 3, c += *cp++ - '0';
276 			if (!octdigit(*cp))
277 				break;
278 			c <<= 3, c += *cp++ - '0';
279 			break;
280 		}
281 gotc:
282 		*dp++ = c;
283 	}
284 out:
285 	*cpp = --cp;
286 	*dp = 0;
287 	return (hashit(dbuf, 1));
288 }
289 
290 static int
291 octdigit(char c)
292 {
293 	return (isdigit(c) && c != '8' && c != '9');
294 }
295 
296 static void
297 inithash(void)
298 {
299 	char buf[BUFSIZ];
300 	FILE *mesgread = fopen(strings, "r");
301 
302 	if (mesgread == NULL)
303 		return;
304 	for (;;) {
305 		mesgpt = tellpt;
306 		if (fgetNUL(buf, sizeof buf, mesgread) == 0)
307 			break;
308 		ignore(hashit(buf, 0));
309 	}
310 	ignore(fclose(mesgread));
311 }
312 
313 static int
314 fgetNUL(char *obuf, int rmdr, FILE *file)
315 {
316 	int c;
317 	char *buf = obuf;
318 
319 	while (--rmdr > 0 && (c = xgetc(file)) != 0 && c != EOF)
320 		*buf++ = c;
321 	*buf++ = 0;
322 	return ((feof(file) || ferror(file)) ? 0 : 1);
323 }
324 
325 static int
326 xgetc(FILE *file)
327 {
328 
329 	tellpt++;
330 	return (getc(file));
331 }
332 
333 #define	BUCKETS	128
334 
335 static struct hash {
336 	off_t	hpt;
337 	char	*hstr;
338 	struct	hash *hnext;
339 	short	hnew;
340 } bucket[BUCKETS];
341 
342 static off_t
343 hashit(char *str, int new)
344 {
345 	int i;
346 	struct hash *hp, *hp0;
347 
348 	hp = hp0 = &bucket[lastchr(str) & 0177];
349 	while (hp->hnext) {
350 		hp = hp->hnext;
351 		i = istail(str, hp->hstr);
352 		if (i >= 0)
353 			return (hp->hpt + i);
354 	}
355 	if ((hp = (struct hash *) calloc(1, sizeof (*hp))) == NULL)
356 		errx(8, "calloc");
357 	hp->hpt = mesgpt;
358 	if (!(hp->hstr = strdup(str)))
359 		err(1, NULL);
360 	mesgpt += strlen(hp->hstr) + 1;
361 	hp->hnext = hp0->hnext;
362 	hp->hnew = new;
363 	hp0->hnext = hp;
364 	return (hp->hpt);
365 }
366 
367 static void
368 flushsh(void)
369 {
370 	int i;
371 	struct hash *hp;
372 	FILE *mesgwrit;
373 	int old = 0, new = 0;
374 
375 	for (i = 0; i < BUCKETS; i++)
376 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext)
377 			if (hp->hnew)
378 				new++;
379 			else
380 				old++;
381 	if (new == 0 && old != 0)
382 		return;
383 	mesgwrit = fopen(strings, old ? "r+" : "w");
384 	if (mesgwrit == NULL)
385 		err(4, "%s", strings);
386 	for (i = 0; i < BUCKETS; i++)
387 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
388 			found(hp->hnew, hp->hpt, hp->hstr);
389 			if (hp->hnew) {
390 				fseek(mesgwrit, hp->hpt, 0);
391 				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
392 				if (ferror(mesgwrit))
393 					err(4, "%s", strings);
394 			}
395 		}
396 	if (fclose(mesgwrit) == EOF)
397 		err(4, "%s", strings);
398 }
399 
400 static void
401 found(int new, off_t off, char *str)
402 {
403 	if (vflg == 0)
404 		return;
405 	if (!new)
406 		fprintf(stderr, "found at %d:", (int) off);
407 	else
408 		fprintf(stderr, "new at %d:", (int) off);
409 	prstr(str);
410 	fprintf(stderr, "\n");
411 }
412 
413 static void
414 prstr(char *cp)
415 {
416 	int c;
417 
418 	while ((c = (*cp++ & 0377)))
419 		if (c < ' ')
420 			fprintf(stderr, "^%c", c + '`');
421 		else if (c == 0177)
422 			fprintf(stderr, "^?");
423 		else if (c > 0200)
424 			fprintf(stderr, "\\%03o", c);
425 		else
426 			fprintf(stderr, "%c", c);
427 }
428 
429 static void
430 xsdotc(void)
431 {
432 	FILE *strf = fopen(strings, "r");
433 	FILE *xdotcf;
434 
435 	if (strf == NULL)
436 		err(5, "%s", strings);
437 	xdotcf = fopen("xs.c", "w");
438 	if (xdotcf == NULL)
439 		err(6, "xs.c");
440 	fprintf(xdotcf, "char\txstr[] = {\n");
441 	for (;;) {
442 		int i, c;
443 
444 		for (i = 0; i < 8; i++) {
445 			c = getc(strf);
446 			if (ferror(strf)) {
447 				warn("%s", strings);
448 				onintr(0);
449 			}
450 			if (feof(strf)) {
451 				fprintf(xdotcf, "\n");
452 				goto out;
453 			}
454 			fprintf(xdotcf, "0x%02x,", c);
455 		}
456 		fprintf(xdotcf, "\n");
457 	}
458 out:
459 	fprintf(xdotcf, "};\n");
460 	ignore(fclose(xdotcf));
461 	ignore(fclose(strf));
462 }
463 
464 static char
465 lastchr(char *cp)
466 {
467 
468 	while (cp[0] && cp[1])
469 		cp++;
470 	return (*cp);
471 }
472 
473 static int
474 istail(char *str, char *of)
475 {
476 	int d = strlen(of) - strlen(str);
477 
478 	if (d < 0 || strcmp(&of[d], str) != 0)
479 		return (-1);
480 	return (d);
481 }
482 
483 static void
484 onintr(int dummy __unused)
485 {
486 
487 	ignore(signal(SIGINT, SIG_IGN));
488 	if (strings[0] == '/')
489 		ignore(unlink(strings));
490 	ignore(unlink("x.c"));
491 	ignore(unlink("xs.c"));
492 	exit(7);
493 }
494