xref: /freebsd/usr.bin/xstr/xstr.c (revision 61ba55bcf70f2340f9c943c9571113b3fd8eda69)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 
33 #ifndef lint
34 static const char copyright[] =
35 "@(#) Copyright (c) 1980, 1993\n\
36 	The Regents of the University of California.  All rights reserved.\n";
37 #endif
38 
39 #ifndef lint
40 static const char sccsid[] = "@(#)xstr.c	8.1 (Berkeley) 6/9/93";
41 #endif
42 
43 #include <sys/types.h>
44 
45 #include <ctype.h>
46 #include <err.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <signal.h>
50 #include <string.h>
51 #include <unistd.h>
52 
53 #include "pathnames.h"
54 
55 /*
56  * xstr - extract and hash strings in a C program
57  *
58  * Bill Joy UCB
59  * November, 1978
60  */
61 
62 #define	ignore(a)	((void) a)
63 
64 static off_t	tellpt;
65 
66 static off_t	mesgpt;
67 static char	cstrings[] =	"strings";
68 static char	*strings =	cstrings;
69 
70 static int	cflg;
71 static int	vflg;
72 static int	readstd;
73 
74 static char lastchr(char *);
75 
76 static int fgetNUL(char *, int, FILE *);
77 static int istail(char *, char *);
78 static int octdigit(char);
79 static int xgetc(FILE *);
80 
81 static off_t hashit(char *, int);
82 static off_t yankstr(char **);
83 
84 static void usage(void) __dead2;
85 
86 static void flushsh(void);
87 static void found(int, off_t, char *);
88 static void inithash(void);
89 static void onintr(int);
90 static void process(const char *);
91 static void prstr(char *);
92 static void xsdotc(void);
93 
94 int
95 main(int argc, char *argv[])
96 {
97 	int c;
98 	int fdesc;
99 
100 	while ((c = getopt(argc, argv, "-cv")) != -1)
101 		switch (c) {
102 		case '-':
103 			readstd++;
104 			break;
105 		case 'c':
106 			cflg++;
107 			break;
108 		case 'v':
109 			vflg++;
110 			break;
111 		default:
112 			usage();
113 		}
114 	argc -= optind;
115 	argv += optind;
116 
117 	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
118 		signal(SIGINT, onintr);
119 	if (cflg || (argc == 0 && !readstd))
120 		inithash();
121 	else {
122 		strings = strdup(_PATH_TMP);
123 		if (strings == NULL)
124 			err(1, "strdup() failed");
125 		fdesc = mkstemp(strings);
126 		if (fdesc == -1)
127 			err(1, "Unable to create temporary file");
128 		close(fdesc);
129 	}
130 
131 	while (readstd || argc > 0) {
132 		if (freopen("x.c", "w", stdout) == NULL)
133 			err(1, "x.c");
134 		if (!readstd && freopen(argv[0], "r", stdin) == NULL)
135 			err(2, "%s", argv[0]);
136 		process("x.c");
137 		if (readstd == 0)
138 			argc--, argv++;
139 		else
140 			readstd = 0;
141 	}
142 	flushsh();
143 	if (cflg == 0)
144 		xsdotc();
145 	if (strings[0] == '/')
146 		ignore(unlink(strings));
147 	exit(0);
148 }
149 
150 static void
151 usage(void)
152 {
153 	fprintf(stderr, "usage: xstr [-cv] [-] [file ...]\n");
154 	exit (1);
155 }
156 
157 static char linebuf[BUFSIZ];
158 
159 static void
160 process(const char *name)
161 {
162 	char *cp;
163 	int c;
164 	int incomm = 0;
165 	int ret;
166 
167 	printf("extern char\txstr[];\n");
168 	for (;;) {
169 		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
170 			if (ferror(stdin))
171 				err(3, "%s", name);
172 			break;
173 		}
174 		if (linebuf[0] == '#') {
175 			if (linebuf[1] == ' ' && isdigit(linebuf[2]))
176 				printf("#line%s", &linebuf[1]);
177 			else
178 				printf("%s", linebuf);
179 			continue;
180 		}
181 		for (cp = linebuf; (c = *cp++);) switch (c) {
182 
183 		case '"':
184 			if (incomm)
185 				goto def;
186 			if ((ret = (int) yankstr(&cp)) == -1)
187 				goto out;
188 			printf("(&xstr[%d])", ret);
189 			break;
190 
191 		case '\'':
192 			if (incomm)
193 				goto def;
194 			putchar(c);
195 			if (*cp)
196 				putchar(*cp++);
197 			break;
198 
199 		case '/':
200 			if (incomm || *cp != '*')
201 				goto def;
202 			incomm = 1;
203 			cp++;
204 			printf("/*");
205 			continue;
206 
207 		case '*':
208 			if (incomm && *cp == '/') {
209 				incomm = 0;
210 				cp++;
211 				printf("*/");
212 				continue;
213 			}
214 			goto def;
215 
216 def:
217 		default:
218 			putchar(c);
219 			break;
220 		}
221 	}
222 out:
223 	if (ferror(stdout))
224 		warn("x.c"), onintr(0);
225 }
226 
227 static off_t
228 yankstr(char **cpp)
229 {
230 	char *cp = *cpp;
231 	int c, ch;
232 	char dbuf[BUFSIZ];
233 	char *dp = dbuf;
234 	char *tp;
235 	static char tmp[] = "b\bt\tr\rn\nf\f\\\\\"\"";
236 
237 	while ((c = *cp++)) {
238 		if (dp == dbuf + sizeof(dbuf) - 3)
239 			errx(1, "message too long");
240 		switch (c) {
241 
242 		case '"':
243 			cp++;
244 			goto out;
245 
246 		case '\\':
247 			c = *cp++;
248 			if (c == 0)
249 				break;
250 			if (c == '\n') {
251 				if (fgets(linebuf, sizeof linebuf, stdin)
252 				    == NULL) {
253 					if (ferror(stdin))
254 						err(3, "x.c");
255 					return(-1);
256 				}
257 				cp = linebuf;
258 				continue;
259 			}
260 			for (tp = tmp; (ch = *tp++); tp++)
261 				if (c == ch) {
262 					c = *tp;
263 					goto gotc;
264 				}
265 			if (!octdigit(c)) {
266 				*dp++ = '\\';
267 				break;
268 			}
269 			c -= '0';
270 			if (!octdigit(*cp))
271 				break;
272 			c <<= 3, c += *cp++ - '0';
273 			if (!octdigit(*cp))
274 				break;
275 			c <<= 3, c += *cp++ - '0';
276 			break;
277 		}
278 gotc:
279 		*dp++ = c;
280 	}
281 out:
282 	*cpp = --cp;
283 	*dp = 0;
284 	return (hashit(dbuf, 1));
285 }
286 
287 static int
288 octdigit(char c)
289 {
290 	return (isdigit(c) && c != '8' && c != '9');
291 }
292 
293 static void
294 inithash(void)
295 {
296 	char buf[BUFSIZ];
297 	FILE *mesgread = fopen(strings, "r");
298 
299 	if (mesgread == NULL)
300 		return;
301 	for (;;) {
302 		mesgpt = tellpt;
303 		if (fgetNUL(buf, sizeof buf, mesgread) == 0)
304 			break;
305 		ignore(hashit(buf, 0));
306 	}
307 	ignore(fclose(mesgread));
308 }
309 
310 static int
311 fgetNUL(char *obuf, int rmdr, FILE *file)
312 {
313 	int c;
314 	char *buf = obuf;
315 
316 	while (--rmdr > 0 && (c = xgetc(file)) != 0 && c != EOF)
317 		*buf++ = c;
318 	*buf++ = 0;
319 	return ((feof(file) || ferror(file)) ? 0 : 1);
320 }
321 
322 static int
323 xgetc(FILE *file)
324 {
325 
326 	tellpt++;
327 	return (getc(file));
328 }
329 
330 #define	BUCKETS	128
331 
332 static struct hash {
333 	off_t	hpt;
334 	char	*hstr;
335 	struct	hash *hnext;
336 	short	hnew;
337 } bucket[BUCKETS];
338 
339 static off_t
340 hashit(char *str, int new)
341 {
342 	int i;
343 	struct hash *hp, *hp0;
344 
345 	hp = hp0 = &bucket[lastchr(str) & 0177];
346 	while (hp->hnext) {
347 		hp = hp->hnext;
348 		i = istail(str, hp->hstr);
349 		if (i >= 0)
350 			return (hp->hpt + i);
351 	}
352 	if ((hp = (struct hash *) calloc(1, sizeof (*hp))) == NULL)
353 		errx(8, "calloc");
354 	hp->hpt = mesgpt;
355 	if (!(hp->hstr = strdup(str)))
356 		err(1, NULL);
357 	mesgpt += strlen(hp->hstr) + 1;
358 	hp->hnext = hp0->hnext;
359 	hp->hnew = new;
360 	hp0->hnext = hp;
361 	return (hp->hpt);
362 }
363 
364 static void
365 flushsh(void)
366 {
367 	int i;
368 	struct hash *hp;
369 	FILE *mesgwrit;
370 	int old = 0, new = 0;
371 
372 	for (i = 0; i < BUCKETS; i++)
373 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext)
374 			if (hp->hnew)
375 				new++;
376 			else
377 				old++;
378 	if (new == 0 && old != 0)
379 		return;
380 	mesgwrit = fopen(strings, old ? "r+" : "w");
381 	if (mesgwrit == NULL)
382 		err(4, "%s", strings);
383 	for (i = 0; i < BUCKETS; i++)
384 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
385 			found(hp->hnew, hp->hpt, hp->hstr);
386 			if (hp->hnew) {
387 				fseek(mesgwrit, hp->hpt, 0);
388 				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
389 				if (ferror(mesgwrit))
390 					err(4, "%s", strings);
391 			}
392 		}
393 	if (fclose(mesgwrit) == EOF)
394 		err(4, "%s", strings);
395 }
396 
397 static void
398 found(int new, off_t off, char *str)
399 {
400 	if (vflg == 0)
401 		return;
402 	if (!new)
403 		fprintf(stderr, "found at %d:", (int) off);
404 	else
405 		fprintf(stderr, "new at %d:", (int) off);
406 	prstr(str);
407 	fprintf(stderr, "\n");
408 }
409 
410 static void
411 prstr(char *cp)
412 {
413 	int c;
414 
415 	while ((c = (*cp++ & 0377)))
416 		if (c < ' ')
417 			fprintf(stderr, "^%c", c + '`');
418 		else if (c == 0177)
419 			fprintf(stderr, "^?");
420 		else if (c > 0200)
421 			fprintf(stderr, "\\%03o", c);
422 		else
423 			fprintf(stderr, "%c", c);
424 }
425 
426 static void
427 xsdotc(void)
428 {
429 	FILE *strf = fopen(strings, "r");
430 	FILE *xdotcf;
431 
432 	if (strf == NULL)
433 		err(5, "%s", strings);
434 	xdotcf = fopen("xs.c", "w");
435 	if (xdotcf == NULL)
436 		err(6, "xs.c");
437 	fprintf(xdotcf, "char\txstr[] = {\n");
438 	for (;;) {
439 		int i, c;
440 
441 		for (i = 0; i < 8; i++) {
442 			c = getc(strf);
443 			if (ferror(strf)) {
444 				warn("%s", strings);
445 				onintr(0);
446 			}
447 			if (feof(strf)) {
448 				fprintf(xdotcf, "\n");
449 				goto out;
450 			}
451 			fprintf(xdotcf, "0x%02x,", c);
452 		}
453 		fprintf(xdotcf, "\n");
454 	}
455 out:
456 	fprintf(xdotcf, "};\n");
457 	ignore(fclose(xdotcf));
458 	ignore(fclose(strf));
459 }
460 
461 static char
462 lastchr(char *cp)
463 {
464 
465 	while (cp[0] && cp[1])
466 		cp++;
467 	return (*cp);
468 }
469 
470 static int
471 istail(char *str, char *of)
472 {
473 	int d = strlen(of) - strlen(str);
474 
475 	if (d < 0 || strcmp(&of[d], str) != 0)
476 		return (-1);
477 	return (d);
478 }
479 
480 static void
481 onintr(int dummy __unused)
482 {
483 
484 	ignore(signal(SIGINT, SIG_IGN));
485 	if (strings[0] == '/')
486 		ignore(unlink(strings));
487 	ignore(unlink("x.c"));
488 	ignore(unlink("xs.c"));
489 	exit(7);
490 }
491