xref: /titanic_41/usr/src/cmd/sgs/lex/common/parser.y (revision 1ae0874509b6811fdde1dfd46f0d93fd09867a3f)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License, Version 1.0 only
7  * (the "License").  You may not use this file except in compliance
8  * with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  */
23 %}
24 /*
25  * Copyright 2005 Sun Microsystems, Inc.
26  * All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 /*	Copyright (c) 1988 AT&T	*/
31 /*	  All Rights Reserved  	*/
32 
33 
34 %{
35 #pragma ident	"%Z%%M%	%I%	%E% SMI"
36 
37 void yyerror(char *);
38 
39 %}
40 /* parser.y */
41 
42 /* XCU4: add XSCON: %x exclusive start token */
43 /* XCU4: add ARRAY: %a yytext is char array */
44 /* XCU4: add POINTER: %p yytext is a pointer to char */
45 %token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS XSCON ARRAY POINTER
46 
47 %nonassoc ARRAY POINTER
48 %left XSCON SCON NEWE
49 %left '/'
50 /*
51  * XCU4: lower the precedence of $ and ^ to less than the or operator
52  * per Spec. 1170
53  */
54 %left '$' '^'
55 %left '|'
56 %left CHAR CCL NCCL '(' '.' STR NULLS
57 %left ITER
58 %left CAT
59 %left '*' '+' '?'
60 
61 %{
62 #include "ldefs.c"
63 
64 #define YYSTYPE union _yystype_
65 union _yystype_
66 {
67 	int	i;
68 	CHR	*cp;
69 };
70 int	peekon = 0; /* need this to check if "^" came in a definition section */
71 
72 %}
73 %%
74 %{
75 int i;
76 int j,k;
77 int g;
78 CHR *p;
79 static wchar_t  L_PctUpT[]= {'%', 'T', 0};
80 static wchar_t  L_PctLoT[]= {'%', 't', 0};
81 static wchar_t  L_PctCbr[]= {'%', '}', 0};
82 %}
83 acc	:	lexinput
84 	={
85 # ifdef DEBUG
86 		if(debug) sect2dump();
87 # endif
88 	}
89 	;
90 lexinput:	defns delim prods end
91 	|	defns delim end
92 	={
93 		if(!funcflag)phead2();
94 		funcflag = TRUE;
95 	}
96 	| error
97 	={
98 # ifdef DEBUG
99 		if(debug) {
100 			sect1dump();
101 			sect2dump();
102 			}
103 # endif
104 		fatal = 0;
105 		n_error++;
106 		error("Illegal definition");
107 		fatal = 1;
108 		}
109 	;
110 end:		delim | ;
111 defns:	defns STR STR
112 	={	scopy($2.cp,dp);
113 		def[dptr] = dp;
114 		dp += slength($2.cp) + 1;
115 		scopy($3.cp,dp);
116 		subs[dptr++] = dp;
117 		if(dptr >= DEFSIZE)
118 			error("Too many definitions");
119 		dp += slength($3.cp) + 1;
120 		if(dp >= dchar+DEFCHAR)
121 			error("Definitions too long");
122 		subs[dptr]=def[dptr]=0;	/* for lookup - require ending null */
123 	}
124 	|
125 	;
126 delim:	DELIM
127 	={
128 # ifdef DEBUG
129 		if(sect == DEFSECTION && debug) sect1dump();
130 # endif
131 		sect++;
132 		}
133 	;
134 prods:	prods pr
135 	={	$$.i = mn2(RNEWE,$1.i,$2.i);
136 		}
137 	|	pr
138 	={	$$.i = $1.i;}
139 	;
140 pr:	r NEWE
141 	={
142 		if(divflg == TRUE)
143 			i = mn1(S1FINAL,casecount);
144 		else i = mn1(FINAL,casecount);
145 		$$.i = mn2(RCAT,$1.i,i);
146 		divflg = FALSE;
147 		if((++casecount)>NACTIONS)
148 			error("Too many (>%d) pattern-action rules.", NACTIONS);
149 		}
150 	| error NEWE
151 	={
152 # ifdef DEBUG
153 		if(debug) sect2dump();
154 # endif
155 		fatal = 0;
156 		yyline--;
157 		n_error++;
158 		error("Illegal rule");
159 		fatal = 1;
160 		yyline++;
161 		}
162 r:	CHAR
163 	={	$$.i = mn0($1.i); }
164 	| STR
165 	={
166 		p = (CHR *)$1.cp;
167 		i = mn0((unsigned)(*p++));
168 		while(*p)
169 			i = mn2(RSTR,i,(unsigned)(*p++));
170 		$$.i = i;
171 		}
172 	| '.'
173 	={
174 		$$.i = mn0(DOT);
175 		}
176 	| CCL
177 	={	$$.i = mn1(RCCL,$1.i); }
178 	| NCCL
179 	={	$$.i = mn1(RNCCL,$1.i); }
180 	| r '*'
181 	={	$$.i = mn1(STAR,$1.i); }
182 	| r '+'
183 	={	$$.i = mn1(PLUS,$1.i); }
184 	| r '?'
185 	={	$$.i = mn1(QUEST,$1.i); }
186 	| r '|' r
187 	={	$$.i = mn2(BAR,$1.i,$3.i); }
188 	| r r %prec CAT
189 	={	$$.i = mn2(RCAT,$1.i,$2.i); }
190 	| r '/' r
191 	={	if(!divflg){
192 			j = mn1(S2FINAL,-casecount);
193 			i = mn2(RCAT,$1.i,j);
194 			$$.i = mn2(DIV,i,$3.i);
195 			}
196 		else {
197 			$$.i = mn2(RCAT,$1.i,$3.i);
198 			error("illegal extra slash");
199 			}
200 		divflg = TRUE;
201 		}
202 	| r ITER ',' ITER '}'
203 	={	if($2.i > $4.i){
204 			i = $2.i;
205 			$2.i = $4.i;
206 			$4.i = i;
207 			}
208 		if($4.i <= 0)
209 			error("iteration range must be positive");
210 		else {
211 			j = $1.i;
212 			for(k = 2; k<=$2.i;k++)
213 				j = mn2(RCAT,j,dupl($1.i));
214 			for(i = $2.i+1; i<=$4.i; i++){
215 				g = dupl($1.i);
216 				for(k=2;k<=i;k++)
217 					g = mn2(RCAT,g,dupl($1.i));
218 				j = mn2(BAR,j,g);
219 				}
220 			$$.i = j;
221 			}
222 	}
223 	| r ITER '}'
224 	={
225 		if($2.i < 0)error("can't have negative iteration");
226 		else if($2.i == 0) $$.i = mn0(RNULLS);
227 		else {
228 			j = $1.i;
229 			for(k=2;k<=$2.i;k++)
230 				j = mn2(RCAT,j,dupl($1.i));
231 			$$.i = j;
232 			}
233 		}
234 	| r ITER ',' '}'
235 	={
236 				/* from n to infinity */
237 		if($2.i < 0)error("can't have negative iteration");
238 		else if($2.i == 0) $$.i = mn1(STAR,$1.i);
239 		else if($2.i == 1)$$.i = mn1(PLUS,$1.i);
240 		else {		/* >= 2 iterations minimum */
241 			j = $1.i;
242 			for(k=2;k<$2.i;k++)
243 				j = mn2(RCAT,j,dupl($1.i));
244 			k = mn1(PLUS,dupl($1.i));
245 			$$.i = mn2(RCAT,j,k);
246 			}
247 		}
248 	| SCON r
249 	={	$$.i = mn2(RSCON,$2.i,(uintptr_t)$1.cp); }
250 
251 	/* XCU4: add XSCON */
252 	| XSCON r
253 	={	$$.i = mn2(RXSCON,$2.i,(uintptr_t)$1.cp); }
254 	| '^' r
255 	={	$$.i = mn1(CARAT,$2.i); }
256 	| r '$'
257 	={	i = mn0('\n');
258 		if(!divflg){
259 			j = mn1(S2FINAL,-casecount);
260 			k = mn2(RCAT,$1.i,j);
261 			$$.i = mn2(DIV,k,i);
262 			}
263 		else $$.i = mn2(RCAT,$1.i,i);
264 		divflg = TRUE;
265 		}
266 	| '(' r ')'
267 	={	$$.i = $2.i; }
268 	|	NULLS
269 	={	$$.i = mn0(RNULLS); }
270 
271 	/* XCU4: add ARRAY and POINTER */
272 	| ARRAY
273 	={ isArray = 1; };
274 	|     POINTER
275 	={ isArray = 0; };
276 	;
277 
278 %%
279 int
280 yylex(void)
281 {
282 	CHR *p;
283 	int  i;
284 	CHR *xp;
285 	int lex_startcond_lookupval;
286 	CHR  *t, c;
287 	int n, j = 0, k, x;
288 	CHR ch;
289 	static int sectbegin;
290 	static CHR token[TOKENSIZE];
291 	static int iter;
292 	int ccs; /* Current CodeSet. */
293 	CHR *ccp;
294 	int exclusive_flag;	/* XCU4: exclusive start flag */
295 
296 # ifdef DEBUG
297 	yylval.i = 0;
298 # endif
299 
300 	if(sect == DEFSECTION) {		/* definitions section */
301 		while(!eof) {
302 			if(prev == '\n'){    /* next char is at beginning of line */
303 				(void)getl(p=buf);
304 				switch(*p){
305 				case '%':
306 					switch(c= *(p+1)){
307 					case '%':
308 						if(scomp(p, (CHR *)"%%")) {
309 							p++;
310 							while(*(++p))
311 								if(!space(*p)) {
312 									warning("invalid string following %%%% be ignored");
313 									break;
314 								}
315 						}
316 						lgate();
317 						if(!ratfor)(void) fprintf(fout,"# ");
318 						(void) fprintf(fout,"define YYNEWLINE %d\n",ctable['\n']);
319 						if(!ratfor)(void) fprintf(fout,"int yylex(){\nint nstr; extern int yyprevious;\n");
320 						sectbegin = TRUE;
321 						i = treesize*(sizeof(*name)+sizeof(*left)+
322 							sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
323 						c = (int)myalloc(i,1);
324 						if(c == 0)
325 							error("Too little core for parse tree");
326 						p = (CHR *)c;
327 						free(p);
328 						name = (int *)myalloc(treesize,sizeof(*name));
329 						left = (int *)myalloc(treesize,sizeof(*left));
330 						right = (int *)myalloc(treesize,sizeof(*right));
331 						nullstr = myalloc(treesize,sizeof(*nullstr));
332 						parent = (int *)myalloc(treesize,sizeof(*parent));
333 						if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0)
334 							error("Too little core for parse tree");
335 						return(freturn(DELIM));
336 					case 'p': case 'P':
337 					        /* %p or %pointer */
338 						if ((*(p+2) == 'o') ||
339 						    (*(p+2) == 'O')) {
340 						    if(lgatflg)
341 							error("Too late for %%pointer");
342 						    while(*p && !iswspace(*p))
343 							p++;
344 						    isArray = 0;
345 						    continue;
346 						}
347 						/* has overridden number of positions */
348 						p += 2;
349 						maxpos = siconv(p);
350 						if (maxpos<=0)error("illegal position number");
351 # ifdef DEBUG
352 						if (debug) (void) printf("positions (%%p) now %d\n",maxpos);
353 # endif
354 						if(report == 2)report = 1;
355 						continue;
356 					case 'n': case 'N':	/* has overridden number of states */
357 						p += 2;
358 						nstates = siconv(p);
359 						if(nstates<=0)error("illegal state number");
360 # ifdef DEBUG
361 						if(debug)(void) printf( " no. states (%%n) now %d\n",nstates);
362 # endif
363 						if(report == 2)report = 1;
364 						continue;
365 					case 'e': case 'E':		/* has overridden number of tree nodes */
366 						p += 2;
367 						treesize = siconv(p);
368 						if(treesize<=0)error("illegal number of parse tree nodes");
369 # ifdef DEBUG
370 						if (debug) (void) printf("treesize (%%e) now %d\n",treesize);
371 # endif
372 						if(report == 2)report = 1;
373 						continue;
374 					case 'o': case 'O':
375 						p += 2;
376 						outsize = siconv(p);
377 						if(outsize<=0)error("illegal size of output array");
378 						if (report ==2) report=1;
379 						continue;
380 					case 'a': case 'A':
381 					        /* %a or %array */
382 						if ((*(p+2) == 'r') ||
383 						    (*(p+2) == 'R')) {
384 						    if(lgatflg)
385 							error("Too late for %%array");
386 						    while(*p && !iswspace(*p))
387 							p++;
388 						    isArray = 1;
389 						    continue;
390 						}
391 						/* has overridden number of transitions */
392 						p += 2;
393 						ntrans = siconv(p);
394 						if(ntrans<=0)error("illegal translation number");
395 # ifdef DEBUG
396 						if (debug)(void) printf("N. trans (%%a) now %d\n",ntrans);
397 # endif
398 						if(report == 2)report = 1;
399 						continue;
400 					case 'k': case 'K': /* overriden packed char classes */
401 						p += 2;
402 						free(pchar);
403 						pchlen = siconv(p);
404 						if(pchlen<=0)error("illegal number of packed character class");
405 # ifdef DEBUG
406 						if (debug) (void) printf( "Size classes (%%k) now %d\n",pchlen);
407 # endif
408 						pchar=pcptr=(CHR *)myalloc(pchlen, sizeof(*pchar));
409 						if (report==2) report=1;
410 						continue;
411 					case 't': case 'T': 	/* character set specifier */
412 						if(handleeuc)
413 							error("\
414 Character table (%t) is supported only in ASCII compatibility mode.\n");
415 						ZCH = watoi(p+2);
416 						if (ZCH < NCH) ZCH = NCH;
417 						if (ZCH > 2*NCH) error("ch table needs redeclaration");
418 						chset = TRUE;
419 						for(i = 0; i<ZCH; i++)
420 							ctable[i] = 0;
421 						while(getl(p) && scomp(p,L_PctUpT) != 0 && scomp(p,L_PctLoT) != 0){
422 							if((n = siconv(p)) <= 0 || n > ZCH){
423 								error("Character value %d out of range",n);
424 								continue;
425 								}
426 							while(digit(*p)) p++;
427 							if(!iswspace(*p)) error("bad translation format");
428 							while(iswspace(*p)) p++;
429 							t = p;
430 							while(*t){
431 								c = ctrans(&t);
432 								if(ctable[(unsigned)c]){
433 									if (iswprint(c))
434 										warning("Character '%wc' used twice",c);
435 
436 									else
437 										error("Chararter %o used twice",c);
438 									}
439 								else ctable[(unsigned)c] = n;
440 								t++;
441 								}
442 							p = buf;
443 							}
444 						{
445 						char chused[2*NCH]; int kr;
446 						for(i=0; i<ZCH; i++)
447 							chused[i]=0;
448 						for(i=0; i<NCH; i++)
449 							chused[ctable[i]]=1;
450 						for(kr=i=1; i<NCH; i++)
451 							if (ctable[i]==0)
452 								{
453 								while (chused[kr] == 0)
454 									kr++;
455 								ctable[i]=kr;
456 								chused[kr]=1;
457 								}
458 						}
459 						lgate();
460 						continue;
461 					case 'r': case 'R':
462 						c = 'r';
463 						/* FALLTHRU */
464 					case 'c': case 'C':
465 						if(lgatflg)
466 							error("Too late for language specifier");
467 						ratfor = (c == 'r');
468 						continue;
469 					case '{':
470 						lgate();
471 						while(getl(p) && scomp(p, L_PctCbr) != 0)
472 							if(p[0]=='/' && p[1]=='*')
473 								cpycom(p);
474 							else
475 								(void) fprintf(fout,"%ws\n",p);
476 						if(p[0] == '%') continue;
477 						if (*p) error("EOF before %%%%");
478 						else error("EOF before %%}");
479 						break;
480 
481 					case 'x': case 'X':		/* XCU4: exclusive start conditions */
482 						exclusive_flag = 1;
483 						goto start;
484 
485 					case 's': case 'S':		/* start conditions */
486 						exclusive_flag = 0;
487 start:
488 						lgate();
489 
490 						while(*p && !iswspace(*p) && ((*p) != (wchar_t)',')) p++;
491 						n = TRUE;
492 						while(n){
493 							while(*p && (iswspace(*p) || ((*p) == (wchar_t)','))) p++;
494 							t = p;
495 							while(*p && !iswspace(*p) && ((*p) != (wchar_t)',')) {
496 							    if(!isascii(*p))
497 								error("None-ASCII characters in start condition.");
498 							    p++;
499 							}
500 							if(!*p) n = FALSE;
501 							*p++ = 0;
502 							if (*t == 0) continue;
503 							i = sptr*2;
504 							if(!ratfor)(void) fprintf(fout,"# ");
505 							fprintf(fout,"define %ws %d\n",t,i);
506 							scopy(t,sp);
507 							sname[sptr] = sp;
508 							/* XCU4: save exclusive flag with start name */
509 							exclusive[sptr++] = exclusive_flag;
510 							sname[sptr] = 0;	/* required by lookup */
511 							if(sptr >= STARTSIZE)
512 								error("Too many start conditions");
513 							sp += slength(sp) + 1;
514 							if(sp >= schar+STARTCHAR)
515 								error("Start conditions too long");
516 							}
517 						continue;
518 					default:
519 						error("Invalid request %s",p);
520 						continue;
521 						}	/* end of switch after seeing '%' */
522 					break;
523 				case ' ': case '\t':		/* must be code */
524 					lgate();
525 					if( p[1]=='/' && p[2]=='*' ) cpycom(p);
526 					else (void) fprintf(fout, "%ws\n",p);
527 					continue;
528 				case '/':	/* look for comments */
529 					lgate();
530 					if((*(p+1))=='*') cpycom(p);
531 					/* FALLTHRU */
532 				default:		/* definition */
533 					while(*p && !iswspace(*p)) p++;
534 					if(*p == 0)
535 						continue;
536 					prev = *p;
537 					*p = 0;
538 					bptr = p+1;
539 					yylval.cp = (CHR *)buf;
540 					if(digit(buf[0]))
541 						warning("Substitution strings may not begin with digits");
542 					return(freturn(STR));
543 				}
544 			} else { /* still sect 1, but prev != '\n' */
545 				p = bptr;
546 				while(*p && iswspace(*p)) p++;
547 				if(*p == 0)
548 					warning("No translation given - null string assumed");
549 				scopy(p,token);
550 				yylval.cp = (CHR *)token;
551 				prev = '\n';
552 				return(freturn(STR));
553 				}
554 			}
555 		error("unexpected EOF before %%%%");
556 		/* end of section one processing */
557 	} else if(sect == RULESECTION){		/* rules and actions */
558 		lgate();
559 		while(!eof){
560 			static int first_test=TRUE, first_value;
561 			static int reverse=FALSE;
562 			switch(c=gch()){
563 			case '\0':
564 				if(n_error)error_tail();
565 				return(freturn(0));
566 			case '\n':
567 				if(prev == '\n') continue;
568 				x = NEWE;
569 				break;
570 			case ' ':
571 			case '\t':
572 				if(prev == '\n') copy_line = TRUE;
573 				if(sectbegin == TRUE){
574 					(void)cpyact();
575 					copy_line = FALSE;
576 					while((c=gch()) && c != '\n');
577 					continue;
578 					}
579 				if(!funcflag)phead2();
580 				funcflag = TRUE;
581 				if(ratfor)(void) fprintf(fout,"%d\n",30000+casecount);
582 				else (void) fprintf(fout,"case %d:\n",casecount);
583 				if(cpyact()){
584 					if(ratfor)(void) fprintf(fout,"goto 30997\n");
585 					else (void) fprintf(fout,"break;\n");
586 					}
587 				while((c=gch()) && c != '\n') {
588 					if (c=='/') {
589 						if((c=gch())=='*') {
590 							c=gch();
591 							while(c !=EOF) {
592 								while (c=='*')
593 									if ((c=gch()) == '/') goto w_loop;
594 								c = gch();
595 							}
596 							error("EOF inside comment");
597 						} else
598 							warning("undefined string");
599 					} else if (c=='}')
600 						error("illegal extra \"}\"");
601 				w_loop: ;
602 				}
603 				/* while ((c=gch())== ' ' || c == '\t') ; */
604 				/* if (!space(c)) error("undefined action string"); */
605 				if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
606 					fatal = 0;
607 					n_error++;
608 					error("executable statements should occur right after %%%%");
609 					fatal = 1;
610 					continue;
611 					}
612 				x = NEWE;
613 				break;
614 			case '%':
615 				if(prev != '\n') goto character;
616 				if(peek == '{'){	/* included code */
617 					(void)getl(buf);
618 					while(!eof&& getl(buf) && scomp(L_PctCbr,buf)!=0)
619 						if(buf[0]=='/' && buf[1]=='*')
620 							cpycom(buf);
621 						else
622 							(void) fprintf(fout,"%ws\n",buf);
623 					continue;
624 					}
625 				if(peek == '%'){
626 					c = gch();
627 					c = gch();
628 					x = DELIM;
629 					break;
630 					}
631 				goto character;
632 			case '|':
633 				if(peek == ' ' || peek == '\t' || peek == '\n'){
634 					if(ratfor)(void) fprintf(fout,"%d\n",30000+casecount++);
635 					else (void) fprintf(fout,"case %d:\n",casecount++);
636 					continue;
637 					}
638 				x = '|';
639 				break;
640 			case '$':
641 				if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
642 					x = c;
643 					break;
644 					}
645 				goto character;
646 			case '^':
647                                 if(peekon && (prev == '}')){
648                                         x = c;
649                                         break;
650                                 }
651 				if(prev != '\n' && scon != TRUE) goto character;
652 				/* valid only at line begin */
653 				x = c;
654 				break;
655 			case '?':
656 			case '+':
657 			case '*':
658 				if(prev == '\n' ) {
659 					fatal = 0;
660 					n_error++;
661 					error("illegal operator -- %c",c);
662 					fatal = 1;
663 				}
664 				/* FALLTHRU */
665 			case '.':
666 			case '(':
667 			case ')':
668 			case ',':
669 			case '/':
670 				x = c;
671 				break;
672 			case '}':
673 				iter = FALSE;
674 				x = c;
675 				break;
676 			case '{':	/* either iteration or definition */
677 				if(digit(c=gch())){	/* iteration */
678 					iter = TRUE;
679 					if(prev=='{') first_test = TRUE;
680 				ieval:
681 					i = 0;
682 					while(digit(c)){
683 						token[i++] = c;
684 						c = gch();
685 						}
686 					token[i] = 0;
687 					yylval.i = siconv(token);
688 					if(first_test) {
689 						first_test = FALSE;
690 						first_value = yylval.i;
691 					} else
692 						if(first_value>yylval.i)warning("the values between braces are reversed");
693 					ch = c;
694 					munput('c',&ch);
695 					x = ITER;
696 					break;
697 					}
698 				else {		/* definition */
699 					i = 0;
700 					while(c && c!='}'){
701 						token[i++] = c;
702 						if(i >= TOKENSIZE)
703 							error("definition too long");
704 						c = gch();
705 						}
706 					token[i] = 0;
707 					i = lookup(token,def);
708 					if(i < 0)
709 						error("definition %ws not found",token);
710 					else
711 						munput('s',(CHR *)(subs[i]));
712             				if (peek == '^')
713                                                 peekon = 1;
714 					continue;
715 					}
716 			case '<':		/* start condition ? */
717 				if(prev != '\n')  /* not at line begin, not start */
718 					goto character;
719 				t = slptr;
720 				do {
721 					i = 0;
722 					if(!isascii(c = gch()))
723 					    error("Non-ASCII characters in start condition.");
724 					while(c != ',' && c && c != '>'){
725 						token[i++] = c;
726 						if(i >= TOKENSIZE)
727 							error("string name too long");
728 						if(!isascii(c = gch()))
729 						    error("None-ASCII characters in start condition.");
730 						}
731 					token[i] = 0;
732 					if(i == 0)
733 						goto character;
734 					i = lookup(token,sname);
735 					lex_startcond_lookupval = i;
736 					if(i < 0) {
737 						fatal = 0;
738 						n_error++;
739 						error("undefined start condition %ws",token);
740 						fatal = 1;
741 						continue;
742 						}
743 					*slptr++ = i+1;
744 					} while(c && c != '>');
745 				*slptr++ = 0;
746 				/* check if previous value re-usable */
747 				for (xp=slist; xp<t; )
748 					{
749 					if (scomp(xp, t)==0)
750 						break;
751 					while (*xp++);
752 					}
753 				if (xp<t)
754 					{
755 					/* re-use previous pointer to string */
756 					slptr=t;
757 					t=xp;
758 					}
759 				if(slptr > slist+STARTSIZE)	/* note not packed */
760 					error("Too many start conditions used");
761 				yylval.cp = (CHR *)t;
762 
763 				/* XCU4: add XSCON */
764 
765 				if (exclusive[lex_startcond_lookupval])
766 					x = XSCON;
767 				else
768 					x = SCON;
769 				break;
770 			case '"':
771 				i = 0;
772 				while((c=gch()) && c != '"' && c != '\n'){
773 					if(c == '\\') c = usescape(c=gch());
774 					remch(c);
775 					token[i++] = c;
776 					if(i >= TOKENSIZE){
777 						warning("String too long");
778 						i = TOKENSIZE-1;
779 						break;
780 						}
781 					}
782 				if(c == '\n') {
783 					yyline--;
784 					warning("Non-terminated string");
785 					yyline++;
786 					}
787 				token[i] = 0;
788 				if(i == 0)x = NULLS;
789 				else if(i == 1){
790 					yylval.i = (unsigned)token[0];
791 					x = CHAR;
792 					}
793 				else {
794 					yylval.cp = (CHR *)token;
795 					x = STR;
796 					}
797 				break;
798 			case '[':
799 				reverse = FALSE;
800 				x = CCL;
801 				if((c = gch()) == '^'){
802 					x = NCCL;
803 					reverse = TRUE;
804 					c = gch();
805 					}
806 				i = 0;
807 				while(c != ']' && c){
808 					static int light=TRUE, ESCAPE=FALSE;
809 					if(c == '-' && prev == '^' && reverse){
810 						symbol[(unsigned)c] = 1;
811 						c = gch();
812 						continue;
813 					}
814 					if(c == '\\') {
815 						c = usescape(c=gch());
816 						ESCAPE = TRUE;
817 					}
818 					if(c=='-' && !ESCAPE && prev!='[' && peek!=']'){
819 					/* range specified */
820 						if (light) {
821 							c = gch();
822 							if(c == '\\')
823 								c=usescape(c=gch());
824 							remch(c);
825 							k = c;
826 							ccs=wcsetno(k);
827 							if(wcsetno(j)!=ccs)
828 							    error("\
829 Character range specified between different codesets.");
830 							if((unsigned)j > (unsigned)k) {
831 								n = j;
832 								j = k;
833 								k = n;
834 								}
835 							if(!handleeuc)
836 							if(!(('A'<=j && k<='Z') ||
837 						     	     ('a'<=j && k<='z') ||
838 						     	     ('0'<=j && k<='9')))
839 								warning("Non-portable Character Class");
840 							token[i++] = RANGE;
841 							token[i++] = j;
842 							token[i++] = k;
843 							light = FALSE;
844 						} else {
845 							error("unmatched hyphen");
846 							if(symbol[(unsigned)c])warning("\"%c\" redefined inside brackets",c);
847 							else symbol[(unsigned)c] = 1;
848 						}
849 						ESCAPE = FALSE;
850 					} else {
851 						j = c;
852 						remch(c);
853 						token[i++] = c; /* Remember whatever.*/
854 						light = TRUE;
855 						ESCAPE = FALSE;
856 					}
857 					c = gch();
858 				}
859 				/* try to pack ccl's */
860 
861 				token[i] = 0;
862 				ccp = ccl;
863 				while (ccp < ccptr && scomp(token, ccp) != 0) ccp++;
864 				if (ccp < ccptr) {  /* found in ccl */
865 				    yylval.cp = ccp;
866 				} else {            /* not in ccl, add it */
867 				    scopy(token,ccptr);
868 				    yylval.cp = ccptr;
869 				    ccptr += slength(token) + 1;
870 				    if(ccptr >= ccl+CCLSIZE)
871 				      error("Too many large character classes");
872 				}
873 				break;
874 			case '\\':
875 				c = usescape(c=gch());
876 			default:
877 			character:
878 				if(iter){	/* second part of an iteration */
879 					iter = FALSE;
880 					if('0' <= c && c <= '9')
881 						goto ieval;
882 					}
883 				remch(c);
884 				if(alpha(peek)){
885 					i = 0;
886 					yylval.cp = (CHR *)token;
887 					token[i++] = c;
888 					while(alpha(peek)) {
889 						remch(token[i++] = gch());
890 						if(i >= TOKENSIZE) {
891 							warning("string too long");
892 							i = TOKENSIZE - 1;
893 							break;
894 							}
895 						}
896 					if(peek == '?' || peek == '*' || peek == '+')
897 						munput('c',&token[--i]);
898 					token[i] = 0;
899 					if(i == 1){
900 						yylval.i = (unsigned)(token[0]);
901 						x = CHAR;
902 						}
903 					else x = STR;
904 					}
905 				else {
906 					yylval.i = (unsigned)c;
907 					x = CHAR;
908 					}
909 				}
910 			scon = FALSE;
911 			peekon = 0;
912 			if((x == SCON) || (x == XSCON))
913 				scon = TRUE;
914 			sectbegin = FALSE;
915 			return(freturn(x));
916 			/* NOTREACHED */
917 			}
918 		}
919 	/* section three */
920 	lgate();
921 	ptail();
922 # ifdef DEBUG
923 	if(debug)
924 		(void) fprintf(fout,"\n/*this comes from section three - debug */\n");
925 # endif
926 
927 	if(getl(buf) && !eof) {
928   		if (sargv[optind] == NULL)
929 			(void) fprintf(fout, "\n# line %d\n", yyline-1);
930 		else
931 			(void) fprintf(fout,
932 				"\n# line %d \"%s\"\n", yyline-1, sargv[optind]);
933 		(void) fprintf(fout,"%ws\n",buf);
934 		while(getl(buf) && !eof)
935 			(void) fprintf(fout,"%ws\n",buf);
936         }
937 
938 	return(freturn(0));
939 	}
940 /* end of yylex */
941 # ifdef DEBUG
942 freturn(i)
943   int i; {
944 	if(yydebug) {
945 		(void) printf("now return ");
946 		if((unsigned)i < NCH) allprint(i);
947 		else (void) printf("%d",i);
948 		(void) printf("   yylval = ");
949 		switch(i){
950 			case STR: case CCL: case NCCL:
951 				strpt(yylval.cp);
952 				break;
953 			case CHAR:
954 				allprint(yylval.i);
955 				break;
956 			default:
957 				(void) printf("%d",yylval.i);
958 				break;
959 			}
960 		(void) putchar('\n');
961 		}
962 	return(i);
963 	}
964 # endif
965