xref: /illumos-gate/usr/src/cmd/tic/tic_scan.c (revision 71269a2275bf5a143dad6461eee2710a344e7261)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*	Copyright (c) 1988 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 #pragma ident	"%Z%%M%	%I%	%E% SMI"
41 
42 /*
43  *			COPYRIGHT NOTICE
44  *
45  *	This software is copyright(C) 1982 by Pavel Curtis
46  *
47  *	Permission is granted to reproduce and distribute
48  *	this file by any means so long as no fee is charged
49  *	above a nominal handling fee and so long as this
50  *	notice is always included in the copies.
51  *
52  *	Other rights are reserved except as explicitly granted
53  *	by written permission of the author.
54  *		Pavel Curtis
55  *		Computer Science Dept.
56  *		405 Upson Hall
57  *		Cornell University
58  *		Ithaca, NY 14853
59  *
60  *		Ph- (607) 256-4934
61  *
62  *		Pavel.Cornell@Udel-Relay(ARPAnet)
63  *		decvax!cornell!pavel(UUCPnet)
64  */
65 
66 /*
67  *	comp_scan.c --- Lexical scanner for terminfo compiler.
68  *
69  *   $Log:	RCS/comp_scan.v $
70  * Revision 2.1  82/10/25  14:45:55  pavel
71  * Added Copyright Notice
72  *
73  * Revision 2.0  82/10/24  15:17:12  pavel
74  * Beta-one Test Release
75  *
76  * Revision 1.3  82/08/23  22:30:03  pavel
77  * The REAL Alpha-one Release Version
78  *
79  * Revision 1.2  82/08/19  19:10:06  pavel
80  * Alpha Test Release One
81  *
82  * Revision 1.1  82/08/12  18:37:46  pavel
83  * Initial revision
84  *
85  *
86  */
87 
88 
89 #include <stdio.h>
90 #include <ctype.h>
91 #include "compiler.h"
92 
93 #define	iswhite(ch)	(ch == ' ' || ch == '\t')
94 
95 
96 static int	first_column;		/* See 'next_char()' below */
97 
98 static void backspace(void);
99 void reset_input(void);
100 void panic_mode(int);
101 
102 
103 
104 /*
105  *	int
106  *	get_token()
107  *
108  *	Scans the input for the next token, storing the specifics in the
109  *	global structure 'curr_token' and returning one of the following:
110  *
111  *		NAMES		A line beginning in column 1.  'name'
112  *				will be set to point to everything up to
113  *				but not including the first comma on the line.
114  *		BOOLEAN		An entry consisting of a name followed by
115  *				a comma.  'name' will be set to point to the
116  *				name of the capability.
117  *		NUMBER		An entry of the form
118  *					name#digits,
119  *				'name' will be set to point to the capability
120  *				name and 'valnumber' to the number given.
121  *		STRING		An entry of the form
122  *					name=characters,
123  *				'name' is set to the capability name and
124  *				'valstring' to the string of characters, with
125  *				input translations done.
126  *		CANCEL		An entry of the form
127  *					name@,
128  *				'name' is set to the capability name and
129  *				'valnumber' to -1.
130  *		EOF		The end of the file has been reached.
131  *
132  */
133 
134 int
135 get_token()
136 {
137 	long		number;
138 	int		type;
139 	register int	ch;
140 	static char	buffer[1024];
141 	register char	*ptr;
142 	int		dot_flag = FALSE;
143 
144 	while ((ch = next_char()) == '\n' || (isascii(ch) && iswhite(ch)));
145 
146 	if (ch == EOF)
147 	    type = EOF;
148 	else {
149 	    if (ch == '.') {
150 		dot_flag = TRUE;
151 
152 		while ((ch = next_char()) == ' ' || ch == '\t');
153 	    }
154 
155 	    if (! isascii(ch) || ! isalnum(ch)) {
156 		warning("Illegal character - '%c'", ch);
157 		panic_mode(',');
158 	    }
159 
160 	    ptr = buffer;
161 	    if (ch != '\n') *(ptr++) = ch;
162 
163 	    if (first_column) {
164 		while ((ch = next_char()) != ',' && ch != '\n' && ch != EOF)
165 		    *(ptr++) = ch;
166 
167 		if (ch == EOF)
168 		    err_abort("Premature EOF");
169 		else if (ch == '\n') {
170 		    warning("Newline in middle of terminal name");
171 		    panic_mode(',');
172 		}
173 
174 		*ptr = '\0';
175 		curr_token.tk_name = buffer;
176 		type = NAMES;
177 	    } else {
178 		ch = next_char();
179 		while (isascii(ch) && isalnum(ch)) {
180 		    *(ptr++) = ch;
181 		    ch = next_char();
182 		}
183 
184 		*ptr++ = '\0';
185 		switch (ch) {
186 		    case ',':
187 			curr_token.tk_name = buffer;
188 			type = BOOLEAN;
189 			break;
190 
191 		    case '@':
192 			if (next_char() != ',')
193 			    warning("Missing comma");
194 			curr_token.tk_name = buffer;
195 			type = CANCEL;
196 			break;
197 
198 		    case '#':
199 			number = 0;
200 			if ((ch = next_char()) == ',')
201 				warning("Missing numeric value");
202 			backspace();
203 			if ((ch = next_char()) == '0') {
204 			    if ((ch = next_char()) == 'x' || ch == 'X') {
205 				while (isascii(ch = next_char()) &&
206 				    isxdigit(ch)) {
207 				    number *= 16;
208 				    if (isdigit(ch))
209 					number += ch - '0';
210 				    else if (ch >= 'a' && ch <= 'f')
211 					number += 10 + ch - 'a';
212 				    else
213 					number += 10 + ch - 'A';
214 				}
215 			    } else {
216 				backspace();
217 				while ((ch = next_char()) >= '0' &&
218 				    ch <= '7')
219 				    number = number * 8 + ch - '0';
220 				}
221 			    } else {
222 				    backspace();
223 				    while (isascii(ch = next_char()) &&
224 					isdigit(ch))
225 					number = number * 10 + ch - '0';
226 			    }
227 			if (ch != ',')
228 			    warning("Missing comma");
229 			curr_token.tk_name = buffer;
230 			curr_token.tk_valnumber = number;
231 			type = NUMBER;
232 			break;
233 
234 		    case '=':
235 			ch = trans_string(ptr);
236 			if (ch != NULL && ch != ',')
237 			    warning("Missing comma");
238 			if (ch == NULL)
239 				warning("NULL string value");
240 			curr_token.tk_name = buffer;
241 			curr_token.tk_valstring = ptr;
242 			type = STRING;
243 			break;
244 
245 		    default:
246 			warning("Illegal character - '%c'", ch);
247 		}
248 	    } /* end else (first_column == FALSE) */
249 	} /* end else (ch != EOF) */
250 
251 	if (dot_flag == TRUE)
252 	    DEBUG(8, "Commented out ", "");
253 
254 	if (debug_level >= 8) {
255 	    fprintf(stderr, "Token: ");
256 	    switch (type) {
257 		case BOOLEAN:
258 			fprintf(stderr, "Boolean;  name='%s'\n",
259 			    curr_token.tk_name);
260 			break;
261 
262 		case NUMBER:
263 			fprintf(stderr, "Number; name = '%s', value = %d\n",
264 			    curr_token.tk_name, curr_token.tk_valnumber);
265 			break;
266 
267 		case STRING:
268 			fprintf(stderr, "String; name = '%s', value = '%s'\n",
269 			    curr_token.tk_name, curr_token.tk_valstring);
270 			break;
271 
272 		case CANCEL:
273 			fprintf(stderr, "Cancel; name = '%s'\n",
274 			    curr_token.tk_name);
275 		    break;
276 
277 		case NAMES:
278 			fprintf(stderr, "Names; value = '%s'\n",
279 			    curr_token.tk_name);
280 			break;
281 
282 		case EOF:
283 			fprintf(stderr, "End of file\n");
284 			break;
285 
286 		default:
287 			warning("Bad token type");
288 	    }
289 	}
290 
291 	if (dot_flag == TRUE)	/* if commented out, use the next one */
292 	    type = get_token();
293 
294 	return (type);
295 }
296 
297 
298 
299 /*
300  *	int
301  *	next_char()
302  *
303  *	Returns the next character in the input stream.  Comments and leading
304  *	white space are stripped.  The global state variable 'firstcolumn' is
305  *	set TRUE if the character returned is from the first column of the
306  * 	inputline.  The global variable curr_line is incremented for each new.
307  *	line. The global variable curr_file_pos is set to the file offset
308  *	of the beginning of each line.
309  *
310  */
311 
312 int	curr_column = -1;
313 char	line[1024];
314 
315 int
316 next_char()
317 {
318 	char	*rtn_value;
319 	long	ftell();
320 	char	*p;
321 
322 	if (curr_column < 0 || curr_column > 1023 ||
323 	    line[curr_column] == '\0') {
324 	    do {
325 			curr_file_pos = ftell(stdin);
326 
327 			if ((rtn_value = fgets(line, 1024, stdin)) == NULL)
328 				return (EOF);
329 			curr_line++;
330 			p = &line[0];
331 			while (*p && iswhite(*p)) {
332 				p++;
333 			}
334 	    } while (*p == '#');
335 
336 	    curr_column = 0;
337 	    while (isascii(line[curr_column]) && iswhite(line[curr_column]))
338 		curr_column++;
339 	}
340 
341 	if (curr_column == 0 && line[0] != '\n')
342 	    first_column = TRUE;
343 	else
344 	    first_column = FALSE;
345 
346 	return (line[curr_column++]);
347 }
348 
349 
350 static void
351 backspace(void)
352 {
353 	curr_column--;
354 
355 	if (curr_column < 0)
356 		syserr_abort("Backspaced off beginning of line");
357 }
358 
359 
360 
361 /*
362  *	reset_input()
363  *
364  *	Resets the input-reading routines.  Used after a seek has been done.
365  *
366  */
367 
368 void
369 reset_input(void)
370 {
371 	curr_column = -1;
372 }
373 
374 
375 
376 /*
377  *	int
378  *	trans_string(ptr)
379  *
380  *	Reads characters using next_char() until encountering a comma, a new
381  *	entry, or end-of-file.  The returned value is the character which
382  *	caused reading to stop.  The following translations are done on the
383  *	input:
384  *
385  *		^X  goes to  ctrl-X (i.e. X & 037)
386  *		{\E,\n,\r,\b,\t,\f}  go to
387  *			{ESCAPE,newline,carriage-return,backspace,tab,formfeed}
388  *		{\^,\\}  go to  {carat,backslash}
389  *		\ddd (for ddd = up to three octal digits)  goes to
390  *							the character ddd
391  *
392  *		\e == \E
393  *		\0 == \200
394  *
395  */
396 
397 int
398 trans_string(char *ptr)
399 {
400 	register int	count = 0;
401 	int		number;
402 	register int	i;
403 	register int	ch;
404 
405 	while ((ch = next_char()) != ',' && ch != EOF && !first_column) {
406 	    if (ch == '^') {
407 		ch = next_char();
408 		if (ch == EOF)
409 		    err_abort("Premature EOF");
410 
411 		if (!isascii(ch) || ! isprint(ch)) {
412 		    warning("Illegal ^ character - '%c'", ch);
413 		}
414 
415 		if (ch == '@')
416 		    *(ptr++) = 0200;
417 		else
418 		    *(ptr++) = ch & 037;
419 	    } else if (ch == '\\') {
420 		ch = next_char();
421 		if (ch == EOF)
422 		    err_abort("Premature EOF");
423 
424 		if (ch >= '0' && ch <= '7') {
425 		    number = ch - '0';
426 		    for (i = 0; i < 2; i++) {
427 			ch = next_char();
428 			if (ch == EOF)
429 			    err_abort("Premature EOF");
430 
431 			if (ch < '0' || ch > '7') {
432 			    backspace();
433 			    break;
434 			}
435 
436 			number = number * 8 + ch - '0';
437 		    }
438 
439 		    if (number == 0)
440 			number = 0200;
441 		    *(ptr++) = (char)number;
442 		} else {
443 		    switch (ch) {
444 			case 'E':
445 			case 'e':	*(ptr++) = '\033';	break;
446 
447 			case 'l':
448 			case 'n':	*(ptr++) = '\n';	break;
449 
450 			case 'r':	*(ptr++) = '\r';	break;
451 
452 			case 'b':	*(ptr++) = '\010';	break;
453 
454 			case 's':	*(ptr++) = ' ';		break;
455 
456 			case 'f':	*(ptr++) = '\014';	break;
457 
458 			case 't':	*(ptr++) = '\t';	break;
459 
460 			case '\\':	*(ptr++) = '\\';	break;
461 
462 			case '^':	*(ptr++) = '^';		break;
463 
464 			case ',':	*(ptr++) = ',';		break;
465 
466 			case ':':	*(ptr++) = ':';		break;
467 
468 			default:
469 			    warning("Illegal character in \\ sequence - '%c'",
470 				ch);
471 			    *(ptr++) = ch;
472 		    } /* endswitch (ch) */
473 		} /* endelse (ch < '0' ||  ch > '7') */
474 	    } /* end else if (ch == '\\') */
475 	    else {
476 		if (ch != '\n') *(ptr++) = ch;
477 	    }
478 
479 	    count ++;
480 
481 	    if (count > 1000)
482 		warning("Very long string found.  Missing comma?");
483 	} /* end while */
484 
485 	if (ch == EOF)
486 	    warning("Premature EOF - missing comma?");
487 	/* start of new description */
488 	else if (first_column) {
489 	    backspace();
490 	    warning("Missing comma?");
491 	    /* pretend we did get a comma */
492 	    ch = ',';
493 	}
494 
495 	*ptr = '\0';
496 
497 	if (count == 0)
498 		return (NULL);
499 	return (ch);
500 }
501 
502 /*
503  * Panic mode error recovery - skip everything until a "ch" is found.
504  */
505 void
506 panic_mode(int ch)
507 {
508 	int c;
509 
510 	for (;;) {
511 		c = next_char();
512 		if (c == ch)
513 			return;
514 		if (c == EOF)
515 			return;
516 	}
517 }
518