xref: /freebsd/contrib/one-true-awk/main.c (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 const char	*version = "version 20240623";
26 
27 #define DEBUG
28 #include <stdio.h>
29 #include <ctype.h>
30 #include <locale.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <signal.h>
34 #include "awk.h"
35 
36 extern	char	**environ;
37 extern	int	nfields;
38 
39 int	dbg	= 0;
40 Awkfloat	srand_seed = 1;
41 char	*cmdname;	/* gets argv[0] for error messages */
42 extern	FILE	*yyin;	/* lex input file */
43 char	*lexprog;	/* points to program argument if it exists */
44 extern	int errorflag;	/* non-zero if any syntax errors; set by yyerror */
45 enum compile_states	compile_time = ERROR_PRINTING;
46 
47 static char	**pfile;	/* program filenames from -f's */
48 static size_t	maxpfile;	/* max program filename */
49 static size_t	npfile;		/* number of filenames */
50 static size_t	curpfile;	/* current filename */
51 
52 bool	CSV = false;	/* true for csv input */
53 
54 bool	safe = false;	/* true => "safe" mode */
55 
56 size_t	awk_mb_cur_max = 1;
57 
58 static noreturn void fpecatch(int n
59 #ifdef SA_SIGINFO
60 	, siginfo_t *si, void *uc
61 #endif
62 )
63 {
64 #ifdef SA_SIGINFO
65 	const char *mesg = NULL;
66 
67 	switch (si->si_code) {
68 	case FPE_INTDIV:
69 		mesg = "Integer divide by zero";
70 		break;
71 	case FPE_INTOVF:
72 		mesg = "Integer overflow";
73 		break;
74 	case FPE_FLTDIV:
75 		mesg = "Floating point divide by zero";
76 		break;
77 	case FPE_FLTOVF:
78 		mesg = "Floating point overflow";
79 		break;
80 	case FPE_FLTUND:
81 		mesg = "Floating point underflow";
82 		break;
83 	case FPE_FLTRES:
84 		mesg = "Floating point inexact result";
85 		break;
86 	case FPE_FLTINV:
87 		mesg = "Invalid Floating point operation";
88 		break;
89 	case FPE_FLTSUB:
90 		mesg = "Subscript out of range";
91 		break;
92 	case 0:
93 	default:
94 		mesg = "Unknown error";
95 		break;
96 	}
97 #endif
98 	FATAL("floating point exception"
99 #ifdef SA_SIGINFO
100 		": %s", mesg
101 #endif
102 	    );
103 }
104 
105 /* Can this work with recursive calls?  I don't think so.
106 void segvcatch(int n)
107 {
108 	FATAL("segfault.  Do you have an unbounded recursive call?", n);
109 }
110 */
111 
112 static const char *
113 setfs(char *p)
114 {
115 	/* wart: t=>\t */
116 	if (p[0] == 't' && p[1] == '\0')
117 		return "\t";
118 	return p;
119 }
120 
121 static char *
122 getarg(int *argc, char ***argv, const char *msg)
123 {
124 	if ((*argv)[1][2] != '\0') {	/* arg is -fsomething */
125 		return &(*argv)[1][2];
126 	} else {			/* arg is -f something */
127 		(*argc)--; (*argv)++;
128 		if (*argc <= 1)
129 			FATAL("%s", msg);
130 		return (*argv)[1];
131 	}
132 }
133 
134 int main(int argc, char *argv[])
135 {
136 	const char *fs = NULL;
137 	char *fn, *vn;
138 
139 	setlocale(LC_CTYPE, "");
140 	setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
141 	awk_mb_cur_max = MB_CUR_MAX;
142 	cmdname = argv[0];
143 	if (argc == 1) {
144 		fprintf(stderr,
145 		  "usage: %s [-F fs | --csv] [-v var=value] [-f progfile | 'prog'] [file ...]\n",
146 		  cmdname);
147 		exit(1);
148 	}
149 #ifdef SA_SIGINFO
150 	{
151 		struct sigaction sa;
152 		sa.sa_sigaction = fpecatch;
153 		sa.sa_flags = SA_SIGINFO;
154 		sigemptyset(&sa.sa_mask);
155 		(void)sigaction(SIGFPE, &sa, NULL);
156 	}
157 #else
158 	(void)signal(SIGFPE, fpecatch);
159 #endif
160 	/*signal(SIGSEGV, segvcatch); experiment */
161 
162 	/* Set and keep track of the random seed */
163 	srand_seed = 1;
164 	srandom((unsigned long) srand_seed);
165 
166 	yyin = NULL;
167 	symtab = makesymtab(NSYMTAB/NSYMTAB);
168 	while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
169 		if (strcmp(argv[1], "-version") == 0 || strcmp(argv[1], "--version") == 0) {
170 			printf("awk %s\n", version);
171 			return 0;
172 		}
173 		if (strcmp(argv[1], "--") == 0) {	/* explicit end of args */
174 			argc--;
175 			argv++;
176 			break;
177 		}
178 		if (strcmp(argv[1], "--csv") == 0) {	/* turn on csv input processing */
179 			CSV = true;
180 			argc--;
181 			argv++;
182 			continue;
183 		}
184 		switch (argv[1][1]) {
185 		case 's':
186 			if (strcmp(argv[1], "-safe") == 0)
187 				safe = true;
188 			break;
189 		case 'f':	/* next argument is program filename */
190 			fn = getarg(&argc, &argv, "no program filename");
191 			if (npfile >= maxpfile) {
192 				maxpfile += 20;
193 				pfile = (char **) realloc(pfile, maxpfile * sizeof(*pfile));
194 				if (pfile == NULL)
195 					FATAL("error allocating space for -f options");
196  			}
197 			pfile[npfile++] = fn;
198  			break;
199 		case 'F':	/* set field separator */
200 			fs = setfs(getarg(&argc, &argv, "no field separator"));
201 			break;
202 		case 'v':	/* -v a=1 to be done NOW.  one -v for each */
203 			vn = getarg(&argc, &argv, "no variable name");
204 			if (isclvar(vn))
205 				setclvar(vn);
206 			else
207 				FATAL("invalid -v option argument: %s", vn);
208 			break;
209 		case 'd':
210 			dbg = atoi(&argv[1][2]);
211 			if (dbg == 0)
212 				dbg = 1;
213 			printf("awk %s\n", version);
214 			break;
215 		default:
216 			WARNING("unknown option %s ignored", argv[1]);
217 			break;
218 		}
219 		argc--;
220 		argv++;
221 	}
222 
223 	if (CSV && (fs != NULL || lookup("FS", symtab) != NULL))
224 		WARNING("danger: don't set FS when --csv is in effect");
225 
226 	/* argv[1] is now the first argument */
227 	if (npfile == 0) {	/* no -f; first argument is program */
228 		if (argc <= 1) {
229 			if (dbg)
230 				exit(0);
231 			FATAL("no program given");
232 		}
233 		DPRINTF("program = |%s|\n", argv[1]);
234 		lexprog = argv[1];
235 		argc--;
236 		argv++;
237 	}
238 	recinit(recsize);
239 	syminit();
240 	compile_time = COMPILING;
241 	argv[0] = cmdname;	/* put prog name at front of arglist */
242 	DPRINTF("argc=%d, argv[0]=%s\n", argc, argv[0]);
243 	arginit(argc, argv);
244 	if (!safe)
245 		envinit(environ);
246 	yyparse();
247 #if 0
248 	// Doing this would comply with POSIX, but is not compatible with
249 	// other awks and with what most users expect. So comment it out.
250 	setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
251 #endif
252 	if (fs)
253 		*FS = qstring(fs, '\0');
254 	DPRINTF("errorflag=%d\n", errorflag);
255 	if (errorflag == 0) {
256 		compile_time = RUNNING;
257 		run(winner);
258 	} else
259 		bracecheck();
260 	return(errorflag);
261 }
262 
263 int pgetc(void)		/* get 1 character from awk program */
264 {
265 	int c;
266 
267 	for (;;) {
268 		if (yyin == NULL) {
269 			if (curpfile >= npfile)
270 				return EOF;
271 			if (strcmp(pfile[curpfile], "-") == 0)
272 				yyin = stdin;
273 			else if ((yyin = fopen(pfile[curpfile], "r")) == NULL)
274 				FATAL("can't open file %s", pfile[curpfile]);
275 			lineno = 1;
276 		}
277 		if ((c = getc(yyin)) != EOF)
278 			return c;
279 		if (yyin != stdin)
280 			fclose(yyin);
281 		yyin = NULL;
282 		curpfile++;
283 	}
284 }
285 
286 char *cursource(void)	/* current source file name */
287 {
288 	if (npfile > 0)
289 		return pfile[curpfile < npfile ? curpfile : curpfile - 1];
290 	else
291 		return NULL;
292 }
293