xref: /illumos-gate/usr/src/cmd/tic/tic_scan.c (revision 96c8483a3fb53529bbf410957b0ad69cfb5d9229)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*	Copyright (c) 1988 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 /*
41  *			COPYRIGHT NOTICE
42  *
43  *	This software is copyright(C) 1982 by Pavel Curtis
44  *
45  *	Permission is granted to reproduce and distribute
46  *	this file by any means so long as no fee is charged
47  *	above a nominal handling fee and so long as this
48  *	notice is always included in the copies.
49  *
50  *	Other rights are reserved except as explicitly granted
51  *	by written permission of the author.
52  *		Pavel Curtis
53  *		Computer Science Dept.
54  *		405 Upson Hall
55  *		Cornell University
56  *		Ithaca, NY 14853
57  *
58  *		Ph- (607) 256-4934
59  *
60  *		Pavel.Cornell@Udel-Relay(ARPAnet)
61  *		decvax!cornell!pavel(UUCPnet)
62  */
63 
64 /*
65  *	comp_scan.c --- Lexical scanner for terminfo compiler.
66  *
67  *   $Log:	RCS/comp_scan.v $
68  * Revision 2.1  82/10/25  14:45:55  pavel
69  * Added Copyright Notice
70  *
71  * Revision 2.0  82/10/24  15:17:12  pavel
72  * Beta-one Test Release
73  *
74  * Revision 1.3  82/08/23  22:30:03  pavel
75  * The REAL Alpha-one Release Version
76  *
77  * Revision 1.2  82/08/19  19:10:06  pavel
78  * Alpha Test Release One
79  *
80  * Revision 1.1  82/08/12  18:37:46  pavel
81  * Initial revision
82  *
83  *
84  */
85 
86 
87 #include <stdio.h>
88 #include <ctype.h>
89 #include "compiler.h"
90 
91 #define	iswhite(ch)	(ch == ' ' || ch == '\t')
92 
93 
94 static int	first_column;		/* See 'next_char()' below */
95 
96 static void backspace(void);
97 void reset_input(void);
98 void panic_mode(int);
99 
100 
101 
102 /*
103  *	int
104  *	get_token()
105  *
106  *	Scans the input for the next token, storing the specifics in the
107  *	global structure 'curr_token' and returning one of the following:
108  *
109  *		NAMES		A line beginning in column 1.  'name'
110  *				will be set to point to everything up to
111  *				but not including the first comma on the line.
112  *		BOOLEAN		An entry consisting of a name followed by
113  *				a comma.  'name' will be set to point to the
114  *				name of the capability.
115  *		NUMBER		An entry of the form
116  *					name#digits,
117  *				'name' will be set to point to the capability
118  *				name and 'valnumber' to the number given.
119  *		STRING		An entry of the form
120  *					name=characters,
121  *				'name' is set to the capability name and
122  *				'valstring' to the string of characters, with
123  *				input translations done.
124  *		CANCEL		An entry of the form
125  *					name@,
126  *				'name' is set to the capability name and
127  *				'valnumber' to -1.
128  *		EOF		The end of the file has been reached.
129  *
130  */
131 
132 int
133 get_token()
134 {
135 	long		number;
136 	int		type = UNDEF;
137 	register int	ch;
138 	static char	buffer[1024];
139 	register char	*ptr;
140 	int		dot_flag = FALSE;
141 
142 	while ((ch = next_char()) == '\n' || (isascii(ch) && iswhite(ch)));
143 
144 	if (ch == EOF)
145 	    type = EOF;
146 	else {
147 	    if (ch == '.') {
148 		dot_flag = TRUE;
149 
150 		while ((ch = next_char()) == ' ' || ch == '\t');
151 	    }
152 
153 	    if (! isascii(ch) || ! isalnum(ch)) {
154 		warning("Illegal character - '%c'", ch);
155 		panic_mode(',');
156 	    }
157 
158 	    ptr = buffer;
159 	    if (ch != '\n') *(ptr++) = ch;
160 
161 	    if (first_column) {
162 		while ((ch = next_char()) != ',' && ch != '\n' && ch != EOF)
163 		    *(ptr++) = ch;
164 
165 		if (ch == EOF)
166 		    err_abort("Premature EOF");
167 		else if (ch == '\n') {
168 		    warning("Newline in middle of terminal name");
169 		    panic_mode(',');
170 		}
171 
172 		*ptr = '\0';
173 		curr_token.tk_name = buffer;
174 		type = NAMES;
175 	    } else {
176 		ch = next_char();
177 		while (isascii(ch) && isalnum(ch)) {
178 		    *(ptr++) = ch;
179 		    ch = next_char();
180 		}
181 
182 		*ptr++ = '\0';
183 		switch (ch) {
184 		    case ',':
185 			curr_token.tk_name = buffer;
186 			type = BOOLEAN;
187 			break;
188 
189 		    case '@':
190 			if (next_char() != ',')
191 			    warning("Missing comma");
192 			curr_token.tk_name = buffer;
193 			type = CANCEL;
194 			break;
195 
196 		    case '#':
197 			number = 0;
198 			if ((ch = next_char()) == ',')
199 				warning("Missing numeric value");
200 			backspace();
201 			if ((ch = next_char()) == '0') {
202 			    if ((ch = next_char()) == 'x' || ch == 'X') {
203 				while (isascii(ch = next_char()) &&
204 				    isxdigit(ch)) {
205 				    number *= 16;
206 				    if (isdigit(ch))
207 					number += ch - '0';
208 				    else if (ch >= 'a' && ch <= 'f')
209 					number += 10 + ch - 'a';
210 				    else
211 					number += 10 + ch - 'A';
212 				}
213 			    } else {
214 				backspace();
215 				while ((ch = next_char()) >= '0' &&
216 				    ch <= '7')
217 				    number = number * 8 + ch - '0';
218 				}
219 			    } else {
220 				    backspace();
221 				    while (isascii(ch = next_char()) &&
222 					isdigit(ch))
223 					number = number * 10 + ch - '0';
224 			    }
225 			if (ch != ',')
226 			    warning("Missing comma");
227 			curr_token.tk_name = buffer;
228 			curr_token.tk_valnumber = number;
229 			type = NUMBER;
230 			break;
231 
232 		    case '=':
233 			ch = trans_string(ptr);
234 			if (ch != NULL && ch != ',')
235 			    warning("Missing comma");
236 			if (ch == NULL)
237 				warning("NULL string value");
238 			curr_token.tk_name = buffer;
239 			curr_token.tk_valstring = ptr;
240 			type = STRING;
241 			break;
242 
243 		    default:
244 			warning("Illegal character - '%c'", ch);
245 		}
246 	    } /* end else (first_column == FALSE) */
247 	} /* end else (ch != EOF) */
248 
249 	if (dot_flag == TRUE)
250 	    DEBUG(8, "Commented out ", "");
251 
252 	if (debug_level >= 8) {
253 	    fprintf(stderr, "Token: ");
254 	    switch (type) {
255 		case BOOLEAN:
256 			fprintf(stderr, "Boolean;  name='%s'\n",
257 			    curr_token.tk_name);
258 			break;
259 
260 		case NUMBER:
261 			fprintf(stderr, "Number; name = '%s', value = %d\n",
262 			    curr_token.tk_name, curr_token.tk_valnumber);
263 			break;
264 
265 		case STRING:
266 			fprintf(stderr, "String; name = '%s', value = '%s'\n",
267 			    curr_token.tk_name, curr_token.tk_valstring);
268 			break;
269 
270 		case CANCEL:
271 			fprintf(stderr, "Cancel; name = '%s'\n",
272 			    curr_token.tk_name);
273 		    break;
274 
275 		case NAMES:
276 			fprintf(stderr, "Names; value = '%s'\n",
277 			    curr_token.tk_name);
278 			break;
279 
280 		case EOF:
281 			fprintf(stderr, "End of file\n");
282 			break;
283 
284 		default:
285 			warning("Bad token type");
286 	    }
287 	}
288 
289 	if (dot_flag == TRUE)	/* if commented out, use the next one */
290 	    type = get_token();
291 
292 	return (type);
293 }
294 
295 
296 
297 /*
298  *	int
299  *	next_char()
300  *
301  *	Returns the next character in the input stream.  Comments and leading
302  *	white space are stripped.  The global state variable 'firstcolumn' is
303  *	set TRUE if the character returned is from the first column of the
304  * 	inputline.  The global variable curr_line is incremented for each new.
305  *	line. The global variable curr_file_pos is set to the file offset
306  *	of the beginning of each line.
307  *
308  */
309 
310 int	curr_column = -1;
311 char	line[1024];
312 
313 int
314 next_char()
315 {
316 	char	*rtn_value;
317 	long	ftell();
318 	char	*p;
319 
320 	if (curr_column < 0 || curr_column > 1023 ||
321 	    line[curr_column] == '\0') {
322 	    do {
323 			curr_file_pos = ftell(stdin);
324 
325 			if ((rtn_value = fgets(line, 1024, stdin)) == NULL)
326 				return (EOF);
327 			curr_line++;
328 			p = &line[0];
329 			while (*p && iswhite(*p)) {
330 				p++;
331 			}
332 	    } while (*p == '#');
333 
334 	    curr_column = 0;
335 	    while (isascii(line[curr_column]) && iswhite(line[curr_column]))
336 		curr_column++;
337 	}
338 
339 	if (curr_column == 0 && line[0] != '\n')
340 	    first_column = TRUE;
341 	else
342 	    first_column = FALSE;
343 
344 	return (line[curr_column++]);
345 }
346 
347 
348 static void
349 backspace(void)
350 {
351 	curr_column--;
352 
353 	if (curr_column < 0)
354 		syserr_abort("Backspaced off beginning of line");
355 }
356 
357 
358 
359 /*
360  *	reset_input()
361  *
362  *	Resets the input-reading routines.  Used after a seek has been done.
363  *
364  */
365 
366 void
367 reset_input(void)
368 {
369 	curr_column = -1;
370 }
371 
372 
373 
374 /*
375  *	int
376  *	trans_string(ptr)
377  *
378  *	Reads characters using next_char() until encountering a comma, a new
379  *	entry, or end-of-file.  The returned value is the character which
380  *	caused reading to stop.  The following translations are done on the
381  *	input:
382  *
383  *		^X  goes to  ctrl-X (i.e. X & 037)
384  *		{\E,\n,\r,\b,\t,\f}  go to
385  *			{ESCAPE,newline,carriage-return,backspace,tab,formfeed}
386  *		{\^,\\}  go to  {carat,backslash}
387  *		\ddd (for ddd = up to three octal digits)  goes to
388  *							the character ddd
389  *
390  *		\e == \E
391  *		\0 == \200
392  *
393  */
394 
395 int
396 trans_string(char *ptr)
397 {
398 	register int	count = 0;
399 	int		number;
400 	register int	i;
401 	register int	ch;
402 
403 	while ((ch = next_char()) != ',' && ch != EOF && !first_column) {
404 	    if (ch == '^') {
405 		ch = next_char();
406 		if (ch == EOF)
407 		    err_abort("Premature EOF");
408 
409 		if (!isascii(ch) || ! isprint(ch)) {
410 		    warning("Illegal ^ character - '%c'", ch);
411 		}
412 
413 		if (ch == '@')
414 		    *(ptr++) = 0200;
415 		else
416 		    *(ptr++) = ch & 037;
417 	    } else if (ch == '\\') {
418 		ch = next_char();
419 		if (ch == EOF)
420 		    err_abort("Premature EOF");
421 
422 		if (ch >= '0' && ch <= '7') {
423 		    number = ch - '0';
424 		    for (i = 0; i < 2; i++) {
425 			ch = next_char();
426 			if (ch == EOF)
427 			    err_abort("Premature EOF");
428 
429 			if (ch < '0' || ch > '7') {
430 			    backspace();
431 			    break;
432 			}
433 
434 			number = number * 8 + ch - '0';
435 		    }
436 
437 		    if (number == 0)
438 			number = 0200;
439 		    *(ptr++) = (char)number;
440 		} else {
441 		    switch (ch) {
442 			case 'E':
443 			case 'e':	*(ptr++) = '\033';	break;
444 
445 			case 'l':
446 			case 'n':	*(ptr++) = '\n';	break;
447 
448 			case 'r':	*(ptr++) = '\r';	break;
449 
450 			case 'b':	*(ptr++) = '\010';	break;
451 
452 			case 's':	*(ptr++) = ' ';		break;
453 
454 			case 'f':	*(ptr++) = '\014';	break;
455 
456 			case 't':	*(ptr++) = '\t';	break;
457 
458 			case '\\':	*(ptr++) = '\\';	break;
459 
460 			case '^':	*(ptr++) = '^';		break;
461 
462 			case ',':	*(ptr++) = ',';		break;
463 
464 			case ':':	*(ptr++) = ':';		break;
465 
466 			default:
467 			    warning("Illegal character in \\ sequence - '%c'",
468 				ch);
469 			    *(ptr++) = ch;
470 		    } /* endswitch (ch) */
471 		} /* endelse (ch < '0' ||  ch > '7') */
472 	    } /* end else if (ch == '\\') */
473 	    else {
474 		if (ch != '\n') *(ptr++) = ch;
475 	    }
476 
477 	    count ++;
478 
479 	    if (count > 1000)
480 		warning("Very long string found.  Missing comma?");
481 	} /* end while */
482 
483 	if (ch == EOF)
484 	    warning("Premature EOF - missing comma?");
485 	/* start of new description */
486 	else if (first_column) {
487 	    backspace();
488 	    warning("Missing comma?");
489 	    /* pretend we did get a comma */
490 	    ch = ',';
491 	}
492 
493 	*ptr = '\0';
494 
495 	if (count == 0)
496 		return (NULL);
497 	return (ch);
498 }
499 
500 /*
501  * Panic mode error recovery - skip everything until a "ch" is found.
502  */
503 void
504 panic_mode(int ch)
505 {
506 	int c;
507 
508 	for (;;) {
509 		c = next_char();
510 		if (c == ch)
511 			return;
512 		if (c == EOF)
513 			return;
514 	}
515 }
516