xref: /freebsd/contrib/one-true-awk/main.c (revision 725a9f47324d42037db93c27ceb40d4956872f3e)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 const char	*version = "version 20240122";
26 
27 #define DEBUG
28 #include <stdio.h>
29 #include <ctype.h>
30 #include <locale.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <signal.h>
34 #include "awk.h"
35 
36 extern	char	**environ;
37 extern	int	nfields;
38 
39 int	dbg	= 0;
40 Awkfloat	srand_seed = 1;
41 char	*cmdname;	/* gets argv[0] for error messages */
42 extern	FILE	*yyin;	/* lex input file */
43 char	*lexprog;	/* points to program argument if it exists */
44 extern	int errorflag;	/* non-zero if any syntax errors; set by yyerror */
45 enum compile_states	compile_time = ERROR_PRINTING;
46 
47 static char	**pfile;	/* program filenames from -f's */
48 static size_t	maxpfile;	/* max program filename */
49 static size_t	npfile;		/* number of filenames */
50 static size_t	curpfile;	/* current filename */
51 
52 bool	CSV = false;	/* true for csv input */
53 
54 bool	safe = false;	/* true => "safe" mode */
55 
56 size_t	awk_mb_cur_max = 1;
57 
58 static noreturn void fpecatch(int n
59 #ifdef SA_SIGINFO
60 	, siginfo_t *si, void *uc
61 #endif
62 )
63 {
64 #ifdef SA_SIGINFO
65 	static const char *emsg[] = {
66 		[0] = "Unknown error",
67 		[FPE_INTDIV] = "Integer divide by zero",
68 		[FPE_INTOVF] = "Integer overflow",
69 		[FPE_FLTDIV] = "Floating point divide by zero",
70 		[FPE_FLTOVF] = "Floating point overflow",
71 		[FPE_FLTUND] = "Floating point underflow",
72 		[FPE_FLTRES] = "Floating point inexact result",
73 		[FPE_FLTINV] = "Invalid Floating point operation",
74 		[FPE_FLTSUB] = "Subscript out of range",
75 	};
76 #endif
77 	FATAL("floating point exception"
78 #ifdef SA_SIGINFO
79 		": %s", (size_t)si->si_code < sizeof(emsg) / sizeof(emsg[0]) &&
80 		emsg[si->si_code] ? emsg[si->si_code] : emsg[0]
81 #endif
82 	    );
83 }
84 
85 /* Can this work with recursive calls?  I don't think so.
86 void segvcatch(int n)
87 {
88 	FATAL("segfault.  Do you have an unbounded recursive call?", n);
89 }
90 */
91 
92 static const char *
93 setfs(char *p)
94 {
95 	/* wart: t=>\t */
96 	if (p[0] == 't' && p[1] == '\0')
97 		return "\t";
98 	return p;
99 }
100 
101 static char *
102 getarg(int *argc, char ***argv, const char *msg)
103 {
104 	if ((*argv)[1][2] != '\0') {	/* arg is -fsomething */
105 		return &(*argv)[1][2];
106 	} else {			/* arg is -f something */
107 		(*argc)--; (*argv)++;
108 		if (*argc <= 1)
109 			FATAL("%s", msg);
110 		return (*argv)[1];
111 	}
112 }
113 
114 int main(int argc, char *argv[])
115 {
116 	const char *fs = NULL;
117 	char *fn, *vn;
118 
119 	setlocale(LC_CTYPE, "");
120 	setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
121 	awk_mb_cur_max = MB_CUR_MAX;
122 	cmdname = argv[0];
123 	if (argc == 1) {
124 		fprintf(stderr,
125 		  "usage: %s [-F fs | --csv] [-v var=value] [-f progfile | 'prog'] [file ...]\n",
126 		  cmdname);
127 		exit(1);
128 	}
129 #ifdef SA_SIGINFO
130 	{
131 		struct sigaction sa;
132 		sa.sa_sigaction = fpecatch;
133 		sa.sa_flags = SA_SIGINFO;
134 		sigemptyset(&sa.sa_mask);
135 		(void)sigaction(SIGFPE, &sa, NULL);
136 	}
137 #else
138 	(void)signal(SIGFPE, fpecatch);
139 #endif
140 	/*signal(SIGSEGV, segvcatch); experiment */
141 
142 	/* Set and keep track of the random seed */
143 	srand_seed = 1;
144 	srandom((unsigned long) srand_seed);
145 
146 	yyin = NULL;
147 	symtab = makesymtab(NSYMTAB/NSYMTAB);
148 	while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
149 		if (strcmp(argv[1], "-version") == 0 || strcmp(argv[1], "--version") == 0) {
150 			printf("awk %s\n", version);
151 			return 0;
152 		}
153 		if (strcmp(argv[1], "--") == 0) {	/* explicit end of args */
154 			argc--;
155 			argv++;
156 			break;
157 		}
158 		if (strcmp(argv[1], "--csv") == 0) {	/* turn on csv input processing */
159 			CSV = true;
160 			argc--;
161 			argv++;
162 			continue;
163 		}
164 		switch (argv[1][1]) {
165 		case 's':
166 			if (strcmp(argv[1], "-safe") == 0)
167 				safe = true;
168 			break;
169 		case 'f':	/* next argument is program filename */
170 			fn = getarg(&argc, &argv, "no program filename");
171 			if (npfile >= maxpfile) {
172 				maxpfile += 20;
173 				pfile = (char **) realloc(pfile, maxpfile * sizeof(*pfile));
174 				if (pfile == NULL)
175 					FATAL("error allocating space for -f options");
176  			}
177 			pfile[npfile++] = fn;
178  			break;
179 		case 'F':	/* set field separator */
180 			fs = setfs(getarg(&argc, &argv, "no field separator"));
181 			break;
182 		case 'v':	/* -v a=1 to be done NOW.  one -v for each */
183 			vn = getarg(&argc, &argv, "no variable name");
184 			if (isclvar(vn))
185 				setclvar(vn);
186 			else
187 				FATAL("invalid -v option argument: %s", vn);
188 			break;
189 		case 'd':
190 			dbg = atoi(&argv[1][2]);
191 			if (dbg == 0)
192 				dbg = 1;
193 			printf("awk %s\n", version);
194 			break;
195 		default:
196 			WARNING("unknown option %s ignored", argv[1]);
197 			break;
198 		}
199 		argc--;
200 		argv++;
201 	}
202 
203 	if (CSV && (fs != NULL || lookup("FS", symtab) != NULL))
204 		WARNING("danger: don't set FS when --csv is in effect");
205 
206 	/* argv[1] is now the first argument */
207 	if (npfile == 0) {	/* no -f; first argument is program */
208 		if (argc <= 1) {
209 			if (dbg)
210 				exit(0);
211 			FATAL("no program given");
212 		}
213 		DPRINTF("program = |%s|\n", argv[1]);
214 		lexprog = argv[1];
215 		argc--;
216 		argv++;
217 	}
218 	recinit(recsize);
219 	syminit();
220 	compile_time = COMPILING;
221 	argv[0] = cmdname;	/* put prog name at front of arglist */
222 	DPRINTF("argc=%d, argv[0]=%s\n", argc, argv[0]);
223 	arginit(argc, argv);
224 	if (!safe)
225 		envinit(environ);
226 	yyparse();
227 #if 0
228 	// Doing this would comply with POSIX, but is not compatible with
229 	// other awks and with what most users expect. So comment it out.
230 	setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
231 #endif
232 	if (fs)
233 		*FS = qstring(fs, '\0');
234 	DPRINTF("errorflag=%d\n", errorflag);
235 	if (errorflag == 0) {
236 		compile_time = RUNNING;
237 		run(winner);
238 	} else
239 		bracecheck();
240 	return(errorflag);
241 }
242 
243 int pgetc(void)		/* get 1 character from awk program */
244 {
245 	int c;
246 
247 	for (;;) {
248 		if (yyin == NULL) {
249 			if (curpfile >= npfile)
250 				return EOF;
251 			if (strcmp(pfile[curpfile], "-") == 0)
252 				yyin = stdin;
253 			else if ((yyin = fopen(pfile[curpfile], "r")) == NULL)
254 				FATAL("can't open file %s", pfile[curpfile]);
255 			lineno = 1;
256 		}
257 		if ((c = getc(yyin)) != EOF)
258 			return c;
259 		if (yyin != stdin)
260 			fclose(yyin);
261 		yyin = NULL;
262 		curpfile++;
263 	}
264 }
265 
266 char *cursource(void)	/* current source file name */
267 {
268 	if (npfile > 0)
269 		return pfile[curpfile < npfile ? curpfile : curpfile - 1];
270 	else
271 		return NULL;
272 }
273