xref: /freebsd/bin/sh/mksyntax.c (revision fbbd9655e5107c68e4e0146ff22b73d7350475bc)
14b88c807SRodney W. Grimes /*-
24b88c807SRodney W. Grimes  * Copyright (c) 1991, 1993
34b88c807SRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
44b88c807SRodney W. Grimes  *
54b88c807SRodney W. Grimes  * This code is derived from software contributed to Berkeley by
64b88c807SRodney W. Grimes  * Kenneth Almquist.
74b88c807SRodney W. Grimes  *
84b88c807SRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
94b88c807SRodney W. Grimes  * modification, are permitted provided that the following conditions
104b88c807SRodney W. Grimes  * are met:
114b88c807SRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
124b88c807SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
134b88c807SRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
144b88c807SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
154b88c807SRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
16*fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
174b88c807SRodney W. Grimes  *    may be used to endorse or promote products derived from this software
184b88c807SRodney W. Grimes  *    without specific prior written permission.
194b88c807SRodney W. Grimes  *
204b88c807SRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
214b88c807SRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
224b88c807SRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
234b88c807SRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
244b88c807SRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
254b88c807SRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
264b88c807SRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
274b88c807SRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
284b88c807SRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
294b88c807SRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
304b88c807SRodney W. Grimes  * SUCH DAMAGE.
314b88c807SRodney W. Grimes  */
324b88c807SRodney W. Grimes 
3309a80d48SDavid E. O'Brien #if 0
344b88c807SRodney W. Grimes #ifndef lint
35ab0a2172SSteve Price static char const copyright[] =
364b88c807SRodney W. Grimes "@(#) Copyright (c) 1991, 1993\n\
374b88c807SRodney W. Grimes 	The Regents of the University of California.  All rights reserved.\n";
384b88c807SRodney W. Grimes #endif /* not lint */
394b88c807SRodney W. Grimes 
404b88c807SRodney W. Grimes #ifndef lint
413d7b5b93SPhilippe Charnier static char sccsid[] = "@(#)mksyntax.c	8.2 (Berkeley) 5/4/95";
424b88c807SRodney W. Grimes #endif /* not lint */
4309a80d48SDavid E. O'Brien #endif
442749b141SDavid E. O'Brien #include <sys/cdefs.h>
452749b141SDavid E. O'Brien __FBSDID("$FreeBSD$");
464b88c807SRodney W. Grimes 
474b88c807SRodney W. Grimes /*
484b88c807SRodney W. Grimes  * This program creates syntax.h and syntax.c.
494b88c807SRodney W. Grimes  */
504b88c807SRodney W. Grimes 
514b88c807SRodney W. Grimes #include <stdio.h>
5226f6b0fbSDag-Erling Smørgrav #include <stdlib.h>
53aa9caaf6SPeter Wemm #include <string.h>
544b88c807SRodney W. Grimes #include "parser.h"
554b88c807SRodney W. Grimes 
564b88c807SRodney W. Grimes 
574b88c807SRodney W. Grimes struct synclass {
58384aedabSJilles Tjoelker 	const char *name;
59384aedabSJilles Tjoelker 	const char *comment;
604b88c807SRodney W. Grimes };
614b88c807SRodney W. Grimes 
624b88c807SRodney W. Grimes /* Syntax classes */
6358bdb076SJilles Tjoelker static const struct synclass synclass[] = {
64aa9caaf6SPeter Wemm 	{ "CWORD",	"character is nothing special" },
65aa9caaf6SPeter Wemm 	{ "CNL",	"newline character" },
66aa9caaf6SPeter Wemm 	{ "CBACK",	"a backslash character" },
67a62ab027SJilles Tjoelker 	{ "CSBACK",	"a backslash character in single quotes" },
68aa9caaf6SPeter Wemm 	{ "CSQUOTE",	"single quote" },
69aa9caaf6SPeter Wemm 	{ "CDQUOTE",	"double quote" },
70aa9caaf6SPeter Wemm 	{ "CENDQUOTE",	"a terminating quote" },
71aa9caaf6SPeter Wemm 	{ "CBQUOTE",	"backwards single quote" },
72aa9caaf6SPeter Wemm 	{ "CVAR",	"a dollar sign" },
73aa9caaf6SPeter Wemm 	{ "CENDVAR",	"a '}' character" },
74aa9caaf6SPeter Wemm 	{ "CLP",	"a left paren in arithmetic" },
75aa9caaf6SPeter Wemm 	{ "CRP",	"a right paren in arithmetic" },
76aa9caaf6SPeter Wemm 	{ "CEOF",	"end of file" },
77aa9caaf6SPeter Wemm 	{ "CCTL",	"like CWORD, except it must be escaped" },
78aa9caaf6SPeter Wemm 	{ "CSPCL",	"these terminate a word" },
79d94c8673SJilles Tjoelker 	{ "CIGN",       "character should be ignored" },
80aa9caaf6SPeter Wemm 	{ NULL,		NULL }
814b88c807SRodney W. Grimes };
824b88c807SRodney W. Grimes 
834b88c807SRodney W. Grimes 
844b88c807SRodney W. Grimes /*
854b88c807SRodney W. Grimes  * Syntax classes for is_ functions.  Warning:  if you add new classes
864b88c807SRodney W. Grimes  * you may have to change the definition of the is_in_name macro.
874b88c807SRodney W. Grimes  */
8858bdb076SJilles Tjoelker static const struct synclass is_entry[] = {
89aa9caaf6SPeter Wemm 	{ "ISDIGIT",	"a digit" },
90aa9caaf6SPeter Wemm 	{ "ISUPPER",	"an upper case letter" },
91aa9caaf6SPeter Wemm 	{ "ISLOWER",	"a lower case letter" },
92aa9caaf6SPeter Wemm 	{ "ISUNDER",	"an underscore" },
93aa9caaf6SPeter Wemm 	{ "ISSPECL",	"the name of a special parameter" },
94aa9caaf6SPeter Wemm 	{ NULL, 	NULL }
954b88c807SRodney W. Grimes };
964b88c807SRodney W. Grimes 
9758bdb076SJilles Tjoelker static const char writer[] = "\
984b88c807SRodney W. Grimes /*\n\
994b88c807SRodney W. Grimes  * This file was generated by the mksyntax program.\n\
1004b88c807SRodney W. Grimes  */\n\
1014b88c807SRodney W. Grimes \n";
1024b88c807SRodney W. Grimes 
1034b88c807SRodney W. Grimes 
104aa9caaf6SPeter Wemm static FILE *cfile;
105aa9caaf6SPeter Wemm static FILE *hfile;
1064b88c807SRodney W. Grimes 
1071767d529SJilles Tjoelker static void add_default(void);
1081767d529SJilles Tjoelker static void finish(void);
1091767d529SJilles Tjoelker static void init(const char *);
110384aedabSJilles Tjoelker static void add(const char *, const char *);
1115134c3f7SWarner Losh static void output_type_macros(void);
1124b88c807SRodney W. Grimes 
113aa9caaf6SPeter Wemm int
1145134c3f7SWarner Losh main(int argc __unused, char **argv __unused)
115aa9caaf6SPeter Wemm {
1164b88c807SRodney W. Grimes 	int i;
1174b88c807SRodney W. Grimes 	char buf[80];
1184b88c807SRodney W. Grimes 	int pos;
1194b88c807SRodney W. Grimes 
1204b88c807SRodney W. Grimes 	/* Create output files */
1214b88c807SRodney W. Grimes 	if ((cfile = fopen("syntax.c", "w")) == NULL) {
1224b88c807SRodney W. Grimes 		perror("syntax.c");
1234b88c807SRodney W. Grimes 		exit(2);
1244b88c807SRodney W. Grimes 	}
1254b88c807SRodney W. Grimes 	if ((hfile = fopen("syntax.h", "w")) == NULL) {
1264b88c807SRodney W. Grimes 		perror("syntax.h");
1274b88c807SRodney W. Grimes 		exit(2);
1284b88c807SRodney W. Grimes 	}
1294b88c807SRodney W. Grimes 	fputs(writer, hfile);
1304b88c807SRodney W. Grimes 	fputs(writer, cfile);
1314b88c807SRodney W. Grimes 
1324b88c807SRodney W. Grimes 	fputs("#include <sys/cdefs.h>\n", hfile);
1331767d529SJilles Tjoelker 	fputs("#include <limits.h>\n\n", hfile);
1344b88c807SRodney W. Grimes 
1354b88c807SRodney W. Grimes 	/* Generate the #define statements in the header file */
1364b88c807SRodney W. Grimes 	fputs("/* Syntax classes */\n", hfile);
1374b88c807SRodney W. Grimes 	for (i = 0 ; synclass[i].name ; i++) {
1384b88c807SRodney W. Grimes 		sprintf(buf, "#define %s %d", synclass[i].name, i);
1394b88c807SRodney W. Grimes 		fputs(buf, hfile);
140aa9caaf6SPeter Wemm 		for (pos = strlen(buf) ; pos < 32 ; pos = (pos + 8) & ~07)
1414b88c807SRodney W. Grimes 			putc('\t', hfile);
1424b88c807SRodney W. Grimes 		fprintf(hfile, "/* %s */\n", synclass[i].comment);
1434b88c807SRodney W. Grimes 	}
1444b88c807SRodney W. Grimes 	putc('\n', hfile);
1454b88c807SRodney W. Grimes 	fputs("/* Syntax classes for is_ functions */\n", hfile);
1464b88c807SRodney W. Grimes 	for (i = 0 ; is_entry[i].name ; i++) {
1474b88c807SRodney W. Grimes 		sprintf(buf, "#define %s %#o", is_entry[i].name, 1 << i);
1484b88c807SRodney W. Grimes 		fputs(buf, hfile);
149aa9caaf6SPeter Wemm 		for (pos = strlen(buf) ; pos < 32 ; pos = (pos + 8) & ~07)
1504b88c807SRodney W. Grimes 			putc('\t', hfile);
1514b88c807SRodney W. Grimes 		fprintf(hfile, "/* %s */\n", is_entry[i].comment);
1524b88c807SRodney W. Grimes 	}
1534b88c807SRodney W. Grimes 	putc('\n', hfile);
1541767d529SJilles Tjoelker 	fputs("#define SYNBASE (1 - CHAR_MIN)\n", hfile);
1551767d529SJilles Tjoelker 	fputs("#define PEOF -SYNBASE\n\n", hfile);
1564b88c807SRodney W. Grimes 	putc('\n', hfile);
1574b88c807SRodney W. Grimes 	fputs("#define BASESYNTAX (basesyntax + SYNBASE)\n", hfile);
1584b88c807SRodney W. Grimes 	fputs("#define DQSYNTAX (dqsyntax + SYNBASE)\n", hfile);
1594b88c807SRodney W. Grimes 	fputs("#define SQSYNTAX (sqsyntax + SYNBASE)\n", hfile);
1604b88c807SRodney W. Grimes 	fputs("#define ARISYNTAX (arisyntax + SYNBASE)\n", hfile);
1614b88c807SRodney W. Grimes 	putc('\n', hfile);
1624b88c807SRodney W. Grimes 	output_type_macros();		/* is_digit, etc. */
1634b88c807SRodney W. Grimes 	putc('\n', hfile);
1644b88c807SRodney W. Grimes 
1654b88c807SRodney W. Grimes 	/* Generate the syntax tables. */
1661767d529SJilles Tjoelker 	fputs("#include \"parser.h\"\n", cfile);
1674b88c807SRodney W. Grimes 	fputs("#include \"shell.h\"\n", cfile);
1684b88c807SRodney W. Grimes 	fputs("#include \"syntax.h\"\n\n", cfile);
1691767d529SJilles Tjoelker 
1704b88c807SRodney W. Grimes 	fputs("/* syntax table used when not in quotes */\n", cfile);
1711767d529SJilles Tjoelker 	init("basesyntax");
1721767d529SJilles Tjoelker 	add_default();
1734b88c807SRodney W. Grimes 	add("\n", "CNL");
1744b88c807SRodney W. Grimes 	add("\\", "CBACK");
1754b88c807SRodney W. Grimes 	add("'", "CSQUOTE");
1764b88c807SRodney W. Grimes 	add("\"", "CDQUOTE");
1774b88c807SRodney W. Grimes 	add("`", "CBQUOTE");
1784b88c807SRodney W. Grimes 	add("$", "CVAR");
1794b88c807SRodney W. Grimes 	add("}", "CENDVAR");
1804b88c807SRodney W. Grimes 	add("<>();&| \t", "CSPCL");
1811767d529SJilles Tjoelker 	finish();
1821767d529SJilles Tjoelker 
1834b88c807SRodney W. Grimes 	fputs("\n/* syntax table used when in double quotes */\n", cfile);
1841767d529SJilles Tjoelker 	init("dqsyntax");
1851767d529SJilles Tjoelker 	add_default();
1864b88c807SRodney W. Grimes 	add("\n", "CNL");
1874b88c807SRodney W. Grimes 	add("\\", "CBACK");
1884b88c807SRodney W. Grimes 	add("\"", "CENDQUOTE");
1894b88c807SRodney W. Grimes 	add("`", "CBQUOTE");
1904b88c807SRodney W. Grimes 	add("$", "CVAR");
1914b88c807SRodney W. Grimes 	add("}", "CENDVAR");
1923a1b9c9eSJilles Tjoelker 	/* ':/' for tilde expansion, '-^]' for [a\-x] pattern ranges */
1933a1b9c9eSJilles Tjoelker 	add("!*?[]=~:/-^", "CCTL");
1941767d529SJilles Tjoelker 	finish();
1951767d529SJilles Tjoelker 
1964b88c807SRodney W. Grimes 	fputs("\n/* syntax table used when in single quotes */\n", cfile);
1971767d529SJilles Tjoelker 	init("sqsyntax");
1981767d529SJilles Tjoelker 	add_default();
1994b88c807SRodney W. Grimes 	add("\n", "CNL");
200a62ab027SJilles Tjoelker 	add("\\", "CSBACK");
2014b88c807SRodney W. Grimes 	add("'", "CENDQUOTE");
2023a1b9c9eSJilles Tjoelker 	/* ':/' for tilde expansion, '-^]' for [a\-x] pattern ranges */
2033a1b9c9eSJilles Tjoelker 	add("!*?[]=~:/-^", "CCTL");
2041767d529SJilles Tjoelker 	finish();
2051767d529SJilles Tjoelker 
2064b88c807SRodney W. Grimes 	fputs("\n/* syntax table used when in arithmetic */\n", cfile);
2071767d529SJilles Tjoelker 	init("arisyntax");
2081767d529SJilles Tjoelker 	add_default();
2094b88c807SRodney W. Grimes 	add("\n", "CNL");
2104b88c807SRodney W. Grimes 	add("\\", "CBACK");
2114b88c807SRodney W. Grimes 	add("`", "CBQUOTE");
212d94c8673SJilles Tjoelker 	add("\"", "CIGN");
2134b88c807SRodney W. Grimes 	add("$", "CVAR");
2144b88c807SRodney W. Grimes 	add("}", "CENDVAR");
2154b88c807SRodney W. Grimes 	add("(", "CLP");
2164b88c807SRodney W. Grimes 	add(")", "CRP");
2171767d529SJilles Tjoelker 	finish();
2181767d529SJilles Tjoelker 
2194b88c807SRodney W. Grimes 	fputs("\n/* character classification table */\n", cfile);
2201767d529SJilles Tjoelker 	init("is_type");
2214b88c807SRodney W. Grimes 	add("0123456789", "ISDIGIT");
22240969e73SJilles Tjoelker 	add("abcdefghijklmnopqrstuvwxyz", "ISLOWER");
22340969e73SJilles Tjoelker 	add("ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ISUPPER");
2244b88c807SRodney W. Grimes 	add("_", "ISUNDER");
2254b88c807SRodney W. Grimes 	add("#?$!-*@", "ISSPECL");
2261767d529SJilles Tjoelker 	finish();
2271767d529SJilles Tjoelker 
2284b88c807SRodney W. Grimes 	exit(0);
2294b88c807SRodney W. Grimes }
2304b88c807SRodney W. Grimes 
2314b88c807SRodney W. Grimes 
2324b88c807SRodney W. Grimes /*
2331767d529SJilles Tjoelker  * Output the header and declaration of a syntax table.
2344b88c807SRodney W. Grimes  */
2354b88c807SRodney W. Grimes 
236aa9caaf6SPeter Wemm static void
2371767d529SJilles Tjoelker init(const char *name)
2384b88c807SRodney W. Grimes {
2391767d529SJilles Tjoelker 	fprintf(hfile, "extern const char %s[];\n", name);
2401767d529SJilles Tjoelker 	fprintf(cfile, "const char %s[SYNBASE + CHAR_MAX + 1] = {\n", name);
2411767d529SJilles Tjoelker }
2424b88c807SRodney W. Grimes 
2431767d529SJilles Tjoelker 
2441767d529SJilles Tjoelker static void
2451767d529SJilles Tjoelker add_one(const char *key, const char *type)
2461767d529SJilles Tjoelker {
2471767d529SJilles Tjoelker 	fprintf(cfile, "\t[SYNBASE + %s] = %s,\n", key, type);
2484b88c807SRodney W. Grimes }
2494b88c807SRodney W. Grimes 
2504b88c807SRodney W. Grimes 
2514b88c807SRodney W. Grimes /*
2521767d529SJilles Tjoelker  * Add default values to the syntax table.
2534b88c807SRodney W. Grimes  */
2544b88c807SRodney W. Grimes 
255aa9caaf6SPeter Wemm static void
2561767d529SJilles Tjoelker add_default(void)
257aa9caaf6SPeter Wemm {
2581767d529SJilles Tjoelker 	add_one("PEOF",                "CEOF");
2591767d529SJilles Tjoelker 	add_one("CTLESC",              "CCTL");
2601767d529SJilles Tjoelker 	add_one("CTLVAR",              "CCTL");
2611767d529SJilles Tjoelker 	add_one("CTLENDVAR",           "CCTL");
2621767d529SJilles Tjoelker 	add_one("CTLBACKQ",            "CCTL");
2631767d529SJilles Tjoelker 	add_one("CTLBACKQ + CTLQUOTE", "CCTL");
2641767d529SJilles Tjoelker 	add_one("CTLARI",              "CCTL");
2651767d529SJilles Tjoelker 	add_one("CTLENDARI",           "CCTL");
2661767d529SJilles Tjoelker 	add_one("CTLQUOTEMARK",        "CCTL");
2671767d529SJilles Tjoelker 	add_one("CTLQUOTEEND",         "CCTL");
2681767d529SJilles Tjoelker }
2691767d529SJilles Tjoelker 
2701767d529SJilles Tjoelker 
2711767d529SJilles Tjoelker /*
2721767d529SJilles Tjoelker  * Output the footer of a syntax table.
2731767d529SJilles Tjoelker  */
2741767d529SJilles Tjoelker 
2751767d529SJilles Tjoelker static void
2761767d529SJilles Tjoelker finish(void)
2771767d529SJilles Tjoelker {
2781767d529SJilles Tjoelker 	fputs("};\n", cfile);
2794b88c807SRodney W. Grimes }
2804b88c807SRodney W. Grimes 
2814b88c807SRodney W. Grimes 
2824b88c807SRodney W. Grimes /*
2834b88c807SRodney W. Grimes  * Add entries to the syntax table.
2844b88c807SRodney W. Grimes  */
2854b88c807SRodney W. Grimes 
286aa9caaf6SPeter Wemm static void
287384aedabSJilles Tjoelker add(const char *p, const char *type)
2884b88c807SRodney W. Grimes {
2891767d529SJilles Tjoelker 	for (; *p; ++p) {
2901767d529SJilles Tjoelker 		char c = *p;
2911767d529SJilles Tjoelker 		switch (c) {
2921767d529SJilles Tjoelker 		case '\t': c = 't';  break;
2931767d529SJilles Tjoelker 		case '\n': c = 'n';  break;
2941767d529SJilles Tjoelker 		case '\'': c = '\''; break;
2951767d529SJilles Tjoelker 		case '\\': c = '\\'; break;
2961767d529SJilles Tjoelker 
2971767d529SJilles Tjoelker 		default:
2981767d529SJilles Tjoelker 			fprintf(cfile, "\t[SYNBASE + '%c'] = %s,\n", c, type);
2991767d529SJilles Tjoelker 			continue;
3004b88c807SRodney W. Grimes 		}
3011767d529SJilles Tjoelker 		fprintf(cfile, "\t[SYNBASE + '\\%c'] = %s,\n", c, type);
3024b88c807SRodney W. Grimes 	}
3034b88c807SRodney W. Grimes }
3044b88c807SRodney W. Grimes 
3054b88c807SRodney W. Grimes 
3064b88c807SRodney W. Grimes /*
3074b88c807SRodney W. Grimes  * Output character classification macros (e.g. is_digit).  If digits are
3084b88c807SRodney W. Grimes  * contiguous, we can test for them quickly.
3094b88c807SRodney W. Grimes  */
3104b88c807SRodney W. Grimes 
311384aedabSJilles Tjoelker static const char *macro[] = {
312eaf77199SJilles Tjoelker 	"#define is_digit(c)\t((unsigned int)((c) - '0') <= 9)",
313716b138bSStefan Farfeleder 	"#define is_eof(c)\t((c) == PEOF)",
314467fdf32SJilles Tjoelker 	"#define is_alpha(c)\t((is_type+SYNBASE)[(int)c] & (ISUPPER|ISLOWER))",
315467fdf32SJilles Tjoelker 	"#define is_name(c)\t((is_type+SYNBASE)[(int)c] & (ISUPPER|ISLOWER|ISUNDER))",
316467fdf32SJilles Tjoelker 	"#define is_in_name(c)\t((is_type+SYNBASE)[(int)c] & (ISUPPER|ISLOWER|ISUNDER|ISDIGIT))",
317fe5d61a4SJilles Tjoelker 	"#define is_special(c)\t((is_type+SYNBASE)[(int)c] & (ISSPECL|ISDIGIT))",
318eaf77199SJilles Tjoelker 	"#define digit_val(c)\t((c) - '0')",
3194b88c807SRodney W. Grimes 	NULL
3204b88c807SRodney W. Grimes };
3214b88c807SRodney W. Grimes 
322aa9caaf6SPeter Wemm static void
3235134c3f7SWarner Losh output_type_macros(void)
324aa9caaf6SPeter Wemm {
325384aedabSJilles Tjoelker 	const char **pp;
3264b88c807SRodney W. Grimes 
3274b88c807SRodney W. Grimes 	for (pp = macro ; *pp ; pp++)
3284b88c807SRodney W. Grimes 		fprintf(hfile, "%s\n", *pp);
3294b88c807SRodney W. Grimes }
330