xref: /freebsd/usr.bin/xstr/xstr.c (revision bdcbfde31e8e9b343f113a1956384bdf30d1ed62)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 
33 #ifndef lint
34 static const char copyright[] =
35 "@(#) Copyright (c) 1980, 1993\n\
36 	The Regents of the University of California.  All rights reserved.\n";
37 #endif
38 
39 
40 #include <sys/types.h>
41 
42 #include <ctype.h>
43 #include <err.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <signal.h>
47 #include <string.h>
48 #include <unistd.h>
49 
50 #include "pathnames.h"
51 
52 /*
53  * xstr - extract and hash strings in a C program
54  *
55  * Bill Joy UCB
56  * November, 1978
57  */
58 
59 #define	ignore(a)	((void) a)
60 
61 static off_t	tellpt;
62 
63 static off_t	mesgpt;
64 static char	cstrings[] =	"strings";
65 static char	*strings =	cstrings;
66 
67 static int	cflg;
68 static int	vflg;
69 static int	readstd;
70 
71 static char lastchr(char *);
72 
73 static int fgetNUL(char *, int, FILE *);
74 static int istail(char *, char *);
75 static int octdigit(char);
76 static int xgetc(FILE *);
77 
78 static off_t hashit(char *, int);
79 static off_t yankstr(char **);
80 
81 static void usage(void) __dead2;
82 
83 static void flushsh(void);
84 static void found(int, off_t, char *);
85 static void inithash(void);
86 static void onintr(int);
87 static void process(const char *);
88 static void prstr(char *);
89 static void xsdotc(void);
90 
91 int
92 main(int argc, char *argv[])
93 {
94 	int c;
95 	int fdesc;
96 
97 	while ((c = getopt(argc, argv, "-cv")) != -1)
98 		switch (c) {
99 		case '-':
100 			readstd++;
101 			break;
102 		case 'c':
103 			cflg++;
104 			break;
105 		case 'v':
106 			vflg++;
107 			break;
108 		default:
109 			usage();
110 		}
111 	argc -= optind;
112 	argv += optind;
113 
114 	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
115 		signal(SIGINT, onintr);
116 	if (cflg || (argc == 0 && !readstd))
117 		inithash();
118 	else {
119 		strings = strdup(_PATH_TMP);
120 		if (strings == NULL)
121 			err(1, "strdup() failed");
122 		fdesc = mkstemp(strings);
123 		if (fdesc == -1)
124 			err(1, "Unable to create temporary file");
125 		close(fdesc);
126 	}
127 
128 	while (readstd || argc > 0) {
129 		if (freopen("x.c", "w", stdout) == NULL)
130 			err(1, "x.c");
131 		if (!readstd && freopen(argv[0], "r", stdin) == NULL)
132 			err(2, "%s", argv[0]);
133 		process("x.c");
134 		if (readstd == 0)
135 			argc--, argv++;
136 		else
137 			readstd = 0;
138 	}
139 	flushsh();
140 	if (cflg == 0)
141 		xsdotc();
142 	if (strings[0] == '/')
143 		ignore(unlink(strings));
144 	exit(0);
145 }
146 
147 static void
148 usage(void)
149 {
150 	fprintf(stderr, "usage: xstr [-cv] [-] [file ...]\n");
151 	exit (1);
152 }
153 
154 static char linebuf[BUFSIZ];
155 
156 static void
157 process(const char *name)
158 {
159 	char *cp;
160 	int c;
161 	int incomm = 0;
162 	int ret;
163 
164 	printf("extern char\txstr[];\n");
165 	for (;;) {
166 		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
167 			if (ferror(stdin))
168 				err(3, "%s", name);
169 			break;
170 		}
171 		if (linebuf[0] == '#') {
172 			if (linebuf[1] == ' ' && isdigit(linebuf[2]))
173 				printf("#line%s", &linebuf[1]);
174 			else
175 				printf("%s", linebuf);
176 			continue;
177 		}
178 		for (cp = linebuf; (c = *cp++);) switch (c) {
179 
180 		case '"':
181 			if (incomm)
182 				goto def;
183 			if ((ret = (int) yankstr(&cp)) == -1)
184 				goto out;
185 			printf("(&xstr[%d])", ret);
186 			break;
187 
188 		case '\'':
189 			if (incomm)
190 				goto def;
191 			putchar(c);
192 			if (*cp)
193 				putchar(*cp++);
194 			break;
195 
196 		case '/':
197 			if (incomm || *cp != '*')
198 				goto def;
199 			incomm = 1;
200 			cp++;
201 			printf("/*");
202 			continue;
203 
204 		case '*':
205 			if (incomm && *cp == '/') {
206 				incomm = 0;
207 				cp++;
208 				printf("*/");
209 				continue;
210 			}
211 			goto def;
212 
213 def:
214 		default:
215 			putchar(c);
216 			break;
217 		}
218 	}
219 out:
220 	if (ferror(stdout))
221 		warn("x.c"), onintr(0);
222 }
223 
224 static off_t
225 yankstr(char **cpp)
226 {
227 	char *cp = *cpp;
228 	int c, ch;
229 	char dbuf[BUFSIZ];
230 	char *dp = dbuf;
231 	char *tp;
232 	static char tmp[] = "b\bt\tr\rn\nf\f\\\\\"\"";
233 
234 	while ((c = *cp++)) {
235 		if (dp == dbuf + sizeof(dbuf) - 3)
236 			errx(1, "message too long");
237 		switch (c) {
238 
239 		case '"':
240 			cp++;
241 			goto out;
242 
243 		case '\\':
244 			c = *cp++;
245 			if (c == 0)
246 				break;
247 			if (c == '\n') {
248 				if (fgets(linebuf, sizeof linebuf, stdin)
249 				    == NULL) {
250 					if (ferror(stdin))
251 						err(3, "x.c");
252 					return(-1);
253 				}
254 				cp = linebuf;
255 				continue;
256 			}
257 			for (tp = tmp; (ch = *tp++); tp++)
258 				if (c == ch) {
259 					c = *tp;
260 					goto gotc;
261 				}
262 			if (!octdigit(c)) {
263 				*dp++ = '\\';
264 				break;
265 			}
266 			c -= '0';
267 			if (!octdigit(*cp))
268 				break;
269 			c <<= 3, c += *cp++ - '0';
270 			if (!octdigit(*cp))
271 				break;
272 			c <<= 3, c += *cp++ - '0';
273 			break;
274 		}
275 gotc:
276 		*dp++ = c;
277 	}
278 out:
279 	*cpp = --cp;
280 	*dp = 0;
281 	return (hashit(dbuf, 1));
282 }
283 
284 static int
285 octdigit(char c)
286 {
287 	return (isdigit(c) && c != '8' && c != '9');
288 }
289 
290 static void
291 inithash(void)
292 {
293 	char buf[BUFSIZ];
294 	FILE *mesgread = fopen(strings, "r");
295 
296 	if (mesgread == NULL)
297 		return;
298 	for (;;) {
299 		mesgpt = tellpt;
300 		if (fgetNUL(buf, sizeof buf, mesgread) == 0)
301 			break;
302 		ignore(hashit(buf, 0));
303 	}
304 	ignore(fclose(mesgread));
305 }
306 
307 static int
308 fgetNUL(char *obuf, int rmdr, FILE *file)
309 {
310 	int c;
311 	char *buf = obuf;
312 
313 	while (--rmdr > 0 && (c = xgetc(file)) != 0 && c != EOF)
314 		*buf++ = c;
315 	*buf++ = 0;
316 	return ((feof(file) || ferror(file)) ? 0 : 1);
317 }
318 
319 static int
320 xgetc(FILE *file)
321 {
322 
323 	tellpt++;
324 	return (getc(file));
325 }
326 
327 #define	BUCKETS	128
328 
329 static struct hash {
330 	off_t	hpt;
331 	char	*hstr;
332 	struct	hash *hnext;
333 	short	hnew;
334 } bucket[BUCKETS];
335 
336 static off_t
337 hashit(char *str, int new)
338 {
339 	int i;
340 	struct hash *hp, *hp0;
341 
342 	hp = hp0 = &bucket[lastchr(str) & 0177];
343 	while (hp->hnext) {
344 		hp = hp->hnext;
345 		i = istail(str, hp->hstr);
346 		if (i >= 0)
347 			return (hp->hpt + i);
348 	}
349 	if ((hp = (struct hash *) calloc(1, sizeof (*hp))) == NULL)
350 		errx(8, "calloc");
351 	hp->hpt = mesgpt;
352 	if (!(hp->hstr = strdup(str)))
353 		err(1, NULL);
354 	mesgpt += strlen(hp->hstr) + 1;
355 	hp->hnext = hp0->hnext;
356 	hp->hnew = new;
357 	hp0->hnext = hp;
358 	return (hp->hpt);
359 }
360 
361 static void
362 flushsh(void)
363 {
364 	int i;
365 	struct hash *hp;
366 	FILE *mesgwrit;
367 	int old = 0, new = 0;
368 
369 	for (i = 0; i < BUCKETS; i++)
370 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext)
371 			if (hp->hnew)
372 				new++;
373 			else
374 				old++;
375 	if (new == 0 && old != 0)
376 		return;
377 	mesgwrit = fopen(strings, old ? "r+" : "w");
378 	if (mesgwrit == NULL)
379 		err(4, "%s", strings);
380 	for (i = 0; i < BUCKETS; i++)
381 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
382 			found(hp->hnew, hp->hpt, hp->hstr);
383 			if (hp->hnew) {
384 				fseek(mesgwrit, hp->hpt, 0);
385 				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
386 				if (ferror(mesgwrit))
387 					err(4, "%s", strings);
388 			}
389 		}
390 	if (fclose(mesgwrit) == EOF)
391 		err(4, "%s", strings);
392 }
393 
394 static void
395 found(int new, off_t off, char *str)
396 {
397 	if (vflg == 0)
398 		return;
399 	if (!new)
400 		fprintf(stderr, "found at %d:", (int) off);
401 	else
402 		fprintf(stderr, "new at %d:", (int) off);
403 	prstr(str);
404 	fprintf(stderr, "\n");
405 }
406 
407 static void
408 prstr(char *cp)
409 {
410 	int c;
411 
412 	while ((c = (*cp++ & 0377)))
413 		if (c < ' ')
414 			fprintf(stderr, "^%c", c + '`');
415 		else if (c == 0177)
416 			fprintf(stderr, "^?");
417 		else if (c > 0200)
418 			fprintf(stderr, "\\%03o", c);
419 		else
420 			fprintf(stderr, "%c", c);
421 }
422 
423 static void
424 xsdotc(void)
425 {
426 	FILE *strf = fopen(strings, "r");
427 	FILE *xdotcf;
428 
429 	if (strf == NULL)
430 		err(5, "%s", strings);
431 	xdotcf = fopen("xs.c", "w");
432 	if (xdotcf == NULL)
433 		err(6, "xs.c");
434 	fprintf(xdotcf, "char\txstr[] = {\n");
435 	for (;;) {
436 		int i, c;
437 
438 		for (i = 0; i < 8; i++) {
439 			c = getc(strf);
440 			if (ferror(strf)) {
441 				warn("%s", strings);
442 				onintr(0);
443 			}
444 			if (feof(strf)) {
445 				fprintf(xdotcf, "\n");
446 				goto out;
447 			}
448 			fprintf(xdotcf, "0x%02x,", c);
449 		}
450 		fprintf(xdotcf, "\n");
451 	}
452 out:
453 	fprintf(xdotcf, "};\n");
454 	ignore(fclose(xdotcf));
455 	ignore(fclose(strf));
456 }
457 
458 static char
459 lastchr(char *cp)
460 {
461 
462 	while (cp[0] && cp[1])
463 		cp++;
464 	return (*cp);
465 }
466 
467 static int
468 istail(char *str, char *of)
469 {
470 	int d = strlen(of) - strlen(str);
471 
472 	if (d < 0 || strcmp(&of[d], str) != 0)
473 		return (-1);
474 	return (d);
475 }
476 
477 static void
478 onintr(int dummy __unused)
479 {
480 
481 	ignore(signal(SIGINT, SIG_IGN));
482 	if (strings[0] == '/')
483 		ignore(unlink(strings));
484 	ignore(unlink("x.c"));
485 	ignore(unlink("xs.c"));
486 	exit(7);
487 }
488