xref: /freebsd/usr.bin/xstr/xstr.c (revision 0b8224d1cc9dc6c9778ba04a75b2c8d47e5d7481)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 
33 #include <sys/types.h>
34 
35 #include <ctype.h>
36 #include <err.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <signal.h>
40 #include <string.h>
41 #include <unistd.h>
42 
43 #include "pathnames.h"
44 
45 /*
46  * xstr - extract and hash strings in a C program
47  *
48  * Bill Joy UCB
49  * November, 1978
50  */
51 
52 #define	ignore(a)	((void) a)
53 
54 static off_t	tellpt;
55 
56 static off_t	mesgpt;
57 static char	cstrings[] =	"strings";
58 static char	*strings =	cstrings;
59 
60 static int	cflg;
61 static int	vflg;
62 static int	readstd;
63 
64 static char lastchr(char *);
65 
66 static int fgetNUL(char *, int, FILE *);
67 static int istail(char *, char *);
68 static int octdigit(char);
69 static int xgetc(FILE *);
70 
71 static off_t hashit(char *, int);
72 static off_t yankstr(char **);
73 
74 static void usage(void) __dead2;
75 
76 static void flushsh(void);
77 static void found(int, off_t, char *);
78 static void inithash(void);
79 static void onintr(int);
80 static void process(const char *);
81 static void prstr(char *);
82 static void xsdotc(void);
83 
84 int
main(int argc,char * argv[])85 main(int argc, char *argv[])
86 {
87 	int c;
88 	int fdesc;
89 
90 	while ((c = getopt(argc, argv, "-cv")) != -1)
91 		switch (c) {
92 		case '-':
93 			readstd++;
94 			break;
95 		case 'c':
96 			cflg++;
97 			break;
98 		case 'v':
99 			vflg++;
100 			break;
101 		default:
102 			usage();
103 		}
104 	argc -= optind;
105 	argv += optind;
106 
107 	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
108 		signal(SIGINT, onintr);
109 	if (cflg || (argc == 0 && !readstd))
110 		inithash();
111 	else {
112 		strings = strdup(_PATH_TMP);
113 		if (strings == NULL)
114 			err(1, "strdup() failed");
115 		fdesc = mkstemp(strings);
116 		if (fdesc == -1)
117 			err(1, "Unable to create temporary file");
118 		close(fdesc);
119 	}
120 
121 	while (readstd || argc > 0) {
122 		if (freopen("x.c", "w", stdout) == NULL)
123 			err(1, "x.c");
124 		if (!readstd && freopen(argv[0], "r", stdin) == NULL)
125 			err(2, "%s", argv[0]);
126 		process("x.c");
127 		if (readstd == 0)
128 			argc--, argv++;
129 		else
130 			readstd = 0;
131 	}
132 	flushsh();
133 	if (cflg == 0)
134 		xsdotc();
135 	if (strings[0] == '/')
136 		ignore(unlink(strings));
137 	exit(0);
138 }
139 
140 static void
usage(void)141 usage(void)
142 {
143 	fprintf(stderr, "usage: xstr [-cv] [-] [file ...]\n");
144 	exit (1);
145 }
146 
147 static char linebuf[BUFSIZ];
148 
149 static void
process(const char * name)150 process(const char *name)
151 {
152 	char *cp;
153 	int c;
154 	int incomm = 0;
155 	int ret;
156 
157 	printf("extern char\txstr[];\n");
158 	for (;;) {
159 		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
160 			if (ferror(stdin))
161 				err(3, "%s", name);
162 			break;
163 		}
164 		if (linebuf[0] == '#') {
165 			if (linebuf[1] == ' ' && isdigit(linebuf[2]))
166 				printf("#line%s", &linebuf[1]);
167 			else
168 				printf("%s", linebuf);
169 			continue;
170 		}
171 		for (cp = linebuf; (c = *cp++);) switch (c) {
172 
173 		case '"':
174 			if (incomm)
175 				goto def;
176 			if ((ret = (int) yankstr(&cp)) == -1)
177 				goto out;
178 			printf("(&xstr[%d])", ret);
179 			break;
180 
181 		case '\'':
182 			if (incomm)
183 				goto def;
184 			putchar(c);
185 			if (*cp)
186 				putchar(*cp++);
187 			break;
188 
189 		case '/':
190 			if (incomm || *cp != '*')
191 				goto def;
192 			incomm = 1;
193 			cp++;
194 			printf("/*");
195 			continue;
196 
197 		case '*':
198 			if (incomm && *cp == '/') {
199 				incomm = 0;
200 				cp++;
201 				printf("*/");
202 				continue;
203 			}
204 			goto def;
205 
206 def:
207 		default:
208 			putchar(c);
209 			break;
210 		}
211 	}
212 out:
213 	if (ferror(stdout))
214 		warn("x.c"), onintr(0);
215 }
216 
217 static off_t
yankstr(char ** cpp)218 yankstr(char **cpp)
219 {
220 	char *cp = *cpp;
221 	int c, ch;
222 	char dbuf[BUFSIZ];
223 	char *dp = dbuf;
224 	char *tp;
225 	static char tmp[] = "b\bt\tr\rn\nf\f\\\\\"\"";
226 
227 	while ((c = *cp++)) {
228 		if (dp == dbuf + sizeof(dbuf) - 3)
229 			errx(1, "message too long");
230 		switch (c) {
231 
232 		case '"':
233 			cp++;
234 			goto out;
235 
236 		case '\\':
237 			c = *cp++;
238 			if (c == 0)
239 				break;
240 			if (c == '\n') {
241 				if (fgets(linebuf, sizeof linebuf, stdin)
242 				    == NULL) {
243 					if (ferror(stdin))
244 						err(3, "x.c");
245 					return(-1);
246 				}
247 				cp = linebuf;
248 				continue;
249 			}
250 			for (tp = tmp; (ch = *tp++); tp++)
251 				if (c == ch) {
252 					c = *tp;
253 					goto gotc;
254 				}
255 			if (!octdigit(c)) {
256 				*dp++ = '\\';
257 				break;
258 			}
259 			c -= '0';
260 			if (!octdigit(*cp))
261 				break;
262 			c <<= 3, c += *cp++ - '0';
263 			if (!octdigit(*cp))
264 				break;
265 			c <<= 3, c += *cp++ - '0';
266 			break;
267 		}
268 gotc:
269 		*dp++ = c;
270 	}
271 out:
272 	*cpp = --cp;
273 	*dp = 0;
274 	return (hashit(dbuf, 1));
275 }
276 
277 static int
octdigit(char c)278 octdigit(char c)
279 {
280 	return (isdigit(c) && c != '8' && c != '9');
281 }
282 
283 static void
inithash(void)284 inithash(void)
285 {
286 	char buf[BUFSIZ];
287 	FILE *mesgread = fopen(strings, "r");
288 
289 	if (mesgread == NULL)
290 		return;
291 	for (;;) {
292 		mesgpt = tellpt;
293 		if (fgetNUL(buf, sizeof buf, mesgread) == 0)
294 			break;
295 		ignore(hashit(buf, 0));
296 	}
297 	ignore(fclose(mesgread));
298 }
299 
300 static int
fgetNUL(char * obuf,int rmdr,FILE * file)301 fgetNUL(char *obuf, int rmdr, FILE *file)
302 {
303 	int c;
304 	char *buf = obuf;
305 
306 	while (--rmdr > 0 && (c = xgetc(file)) != 0 && c != EOF)
307 		*buf++ = c;
308 	*buf++ = 0;
309 	return ((feof(file) || ferror(file)) ? 0 : 1);
310 }
311 
312 static int
xgetc(FILE * file)313 xgetc(FILE *file)
314 {
315 
316 	tellpt++;
317 	return (getc(file));
318 }
319 
320 #define	BUCKETS	128
321 
322 static struct hash {
323 	off_t	hpt;
324 	char	*hstr;
325 	struct	hash *hnext;
326 	short	hnew;
327 } bucket[BUCKETS];
328 
329 static off_t
hashit(char * str,int new)330 hashit(char *str, int new)
331 {
332 	int i;
333 	struct hash *hp, *hp0;
334 
335 	hp = hp0 = &bucket[lastchr(str) & 0177];
336 	while (hp->hnext) {
337 		hp = hp->hnext;
338 		i = istail(str, hp->hstr);
339 		if (i >= 0)
340 			return (hp->hpt + i);
341 	}
342 	if ((hp = (struct hash *) calloc(1, sizeof (*hp))) == NULL)
343 		errx(8, "calloc");
344 	hp->hpt = mesgpt;
345 	if (!(hp->hstr = strdup(str)))
346 		err(1, NULL);
347 	mesgpt += strlen(hp->hstr) + 1;
348 	hp->hnext = hp0->hnext;
349 	hp->hnew = new;
350 	hp0->hnext = hp;
351 	return (hp->hpt);
352 }
353 
354 static void
flushsh(void)355 flushsh(void)
356 {
357 	int i;
358 	struct hash *hp;
359 	FILE *mesgwrit;
360 	int old = 0, new = 0;
361 
362 	for (i = 0; i < BUCKETS; i++)
363 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext)
364 			if (hp->hnew)
365 				new++;
366 			else
367 				old++;
368 	if (new == 0 && old != 0)
369 		return;
370 	mesgwrit = fopen(strings, old ? "r+" : "w");
371 	if (mesgwrit == NULL)
372 		err(4, "%s", strings);
373 	for (i = 0; i < BUCKETS; i++)
374 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
375 			found(hp->hnew, hp->hpt, hp->hstr);
376 			if (hp->hnew) {
377 				fseek(mesgwrit, hp->hpt, 0);
378 				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
379 				if (ferror(mesgwrit))
380 					err(4, "%s", strings);
381 			}
382 		}
383 	if (fclose(mesgwrit) == EOF)
384 		err(4, "%s", strings);
385 }
386 
387 static void
found(int new,off_t off,char * str)388 found(int new, off_t off, char *str)
389 {
390 	if (vflg == 0)
391 		return;
392 	if (!new)
393 		fprintf(stderr, "found at %d:", (int) off);
394 	else
395 		fprintf(stderr, "new at %d:", (int) off);
396 	prstr(str);
397 	fprintf(stderr, "\n");
398 }
399 
400 static void
prstr(char * cp)401 prstr(char *cp)
402 {
403 	int c;
404 
405 	while ((c = (*cp++ & 0377)))
406 		if (c < ' ')
407 			fprintf(stderr, "^%c", c + '`');
408 		else if (c == 0177)
409 			fprintf(stderr, "^?");
410 		else if (c > 0200)
411 			fprintf(stderr, "\\%03o", c);
412 		else
413 			fprintf(stderr, "%c", c);
414 }
415 
416 static void
xsdotc(void)417 xsdotc(void)
418 {
419 	FILE *strf = fopen(strings, "r");
420 	FILE *xdotcf;
421 
422 	if (strf == NULL)
423 		err(5, "%s", strings);
424 	xdotcf = fopen("xs.c", "w");
425 	if (xdotcf == NULL)
426 		err(6, "xs.c");
427 	fprintf(xdotcf, "char\txstr[] = {\n");
428 	for (;;) {
429 		int i, c;
430 
431 		for (i = 0; i < 8; i++) {
432 			c = getc(strf);
433 			if (ferror(strf)) {
434 				warn("%s", strings);
435 				onintr(0);
436 			}
437 			if (feof(strf)) {
438 				fprintf(xdotcf, "\n");
439 				goto out;
440 			}
441 			fprintf(xdotcf, "0x%02x,", c);
442 		}
443 		fprintf(xdotcf, "\n");
444 	}
445 out:
446 	fprintf(xdotcf, "};\n");
447 	ignore(fclose(xdotcf));
448 	ignore(fclose(strf));
449 }
450 
451 static char
lastchr(char * cp)452 lastchr(char *cp)
453 {
454 
455 	while (cp[0] && cp[1])
456 		cp++;
457 	return (*cp);
458 }
459 
460 static int
istail(char * str,char * of)461 istail(char *str, char *of)
462 {
463 	int d = strlen(of) - strlen(str);
464 
465 	if (d < 0 || strcmp(&of[d], str) != 0)
466 		return (-1);
467 	return (d);
468 }
469 
470 static void
onintr(int dummy __unused)471 onintr(int dummy __unused)
472 {
473 
474 	ignore(signal(SIGINT, SIG_IGN));
475 	if (strings[0] == '/')
476 		ignore(unlink(strings));
477 	ignore(unlink("x.c"));
478 	ignore(unlink("xs.c"));
479 	exit(7);
480 }
481