xref: /illumos-gate/usr/src/cmd/sgs/lex/common/parser.y (revision 1a90c98d7539778aeb0a1d20f735b66aaba17fca)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 %}
23 
24 /*
25  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms.
27  */
28 
29 /*	Copyright (c) 1988 AT&T	*/
30 /*	  All Rights Reserved	*/
31 
32 %{
33 #include "ldefs.h"
34 
35 #define YYSTYPE union _yystype_
36 union _yystype_
37 {
38 	int	i;
39 	CHR	*cp;
40 };
41 int	peekon = 0; /* need this to check if "^" came in a definition section */
42 int i;
43 int j,k;
44 int g;
45 CHR *p;
46 static wchar_t  L_PctUpT[]= {'%', 'T', 0};
47 static wchar_t  L_PctLoT[]= {'%', 't', 0};
48 static wchar_t  L_PctCbr[]= {'%', '}', 0};
49 
50 int yyerror(const char *);
51 %}
52 
53 /* parser.y */
54 
55 /* XCU4: add XSCON: %x exclusive start token */
56 /* XCU4: add ARRAY: %a yytext is char array */
57 /* XCU4: add POINTER: %p yytext is a pointer to char */
58 %token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS XSCON ARRAY POINTER
59 
60 %nonassoc ARRAY POINTER
61 %left XSCON SCON NEWE
62 %left '/'
63 /*
64  * XCU4: lower the precedence of $ and ^ to less than the or operator
65  * per Spec. 1170
66  */
67 %left '$' '^'
68 %left '|'
69 %left CHAR CCL NCCL '(' '.' STR NULLS
70 %left ITER
71 %left CAT
72 %left '*' '+' '?'
73 %%
74 
75 acc	:	lexinput
76 	{
77 # ifdef DEBUG
78 		if(debug) sect2dump();
79 # endif
80 	}
81 	;
82 lexinput:	defns delim prods end
83 	|	defns delim end
84 	{
85 		if(!funcflag)phead2();
86 		funcflag = TRUE;
87 	}
88 	| error
89 	{
90 # ifdef DEBUG
91 		if(debug) {
92 			sect1dump();
93 			sect2dump();
94 			}
95 # endif
96 		fatal = 0;
97 		n_error++;
98 		error("Illegal definition");
99 		fatal = 1;
100 		}
101 	;
102 end:		delim | ;
103 defns:	defns STR STR
104 	{	scopy($2.cp,dp);
105 		def[dptr] = dp;
106 		dp += slength($2.cp) + 1;
107 		scopy($3.cp,dp);
108 		subs[dptr++] = dp;
109 		if(dptr >= DEFSIZE)
110 			error("Too many definitions");
111 		dp += slength($3.cp) + 1;
112 		if(dp >= dchar+DEFCHAR)
113 			error("Definitions too long");
114 		subs[dptr]=def[dptr]=0;	/* for lookup - require ending null */
115 	}
116 	|
117 	;
118 delim:	DELIM
119 	{
120 # ifdef DEBUG
121 		if(sect == DEFSECTION && debug) sect1dump();
122 # endif
123 		sect++;
124 		}
125 	;
126 prods:	prods pr
127 	{	$$.i = mn2(RNEWE,$1.i,$2.i);
128 		}
129 	|	pr
130 	{	$$.i = $1.i;}
131 	;
132 pr:	r NEWE
133 	{
134 		if(divflg == TRUE)
135 			i = mn1(S1FINAL,casecount);
136 		else i = mn1(FINAL,casecount);
137 		$$.i = mn2(RCAT,$1.i,i);
138 		divflg = FALSE;
139 		if((++casecount)>NACTIONS)
140 			error("Too many (>%d) pattern-action rules.", NACTIONS);
141 		}
142 	| error NEWE
143 	{
144 # ifdef DEBUG
145 		if(debug) sect2dump();
146 # endif
147 		fatal = 0;
148 		yyline--;
149 		n_error++;
150 		error("Illegal rule");
151 		fatal = 1;
152 		yyline++;
153 		}
154 r:	CHAR
155 	{	$$.i = mn0($1.i); }
156 	| STR
157 	{
158 		p = (CHR *)$1.cp;
159 		i = mn0((unsigned)(*p++));
160 		while(*p)
161 			i = mn2(RSTR,i,(unsigned)(*p++));
162 		$$.i = i;
163 		}
164 	| '.'
165 	{
166 		$$.i = mn0(DOT);
167 		}
168 	| CCL
169 	{	$$.i = mn1(RCCL,$1.i); }
170 	| NCCL
171 	{	$$.i = mn1(RNCCL,$1.i); }
172 	| r '*'
173 	{	$$.i = mn1(STAR,$1.i); }
174 	| r '+'
175 	{	$$.i = mn1(PLUS,$1.i); }
176 	| r '?'
177 	{	$$.i = mn1(QUEST,$1.i); }
178 	| r '|' r
179 	{	$$.i = mn2(BAR,$1.i,$3.i); }
180 	| r r %prec CAT
181 	{	$$.i = mn2(RCAT,$1.i,$2.i); }
182 	| r '/' r
183 	{	if(!divflg){
184 			j = mn1(S2FINAL,-casecount);
185 			i = mn2(RCAT,$1.i,j);
186 			$$.i = mn2(DIV,i,$3.i);
187 			}
188 		else {
189 			$$.i = mn2(RCAT,$1.i,$3.i);
190 			error("illegal extra slash");
191 			}
192 		divflg = TRUE;
193 		}
194 	| r ITER ',' ITER '}'
195 	{	if($2.i > $4.i){
196 			i = $2.i;
197 			$2.i = $4.i;
198 			$4.i = i;
199 			}
200 		if($4.i <= 0)
201 			error("iteration range must be positive");
202 		else {
203 			j = $1.i;
204 			for(k = 2; k<=$2.i;k++)
205 				j = mn2(RCAT,j,dupl($1.i));
206 			for(i = $2.i+1; i<=$4.i; i++){
207 				g = dupl($1.i);
208 				for(k=2;k<=i;k++)
209 					g = mn2(RCAT,g,dupl($1.i));
210 				j = mn2(BAR,j,g);
211 				}
212 			$$.i = j;
213 			}
214 	}
215 	| r ITER '}'
216 	{
217 		if($2.i < 0)error("can't have negative iteration");
218 		else if($2.i == 0) $$.i = mn0(RNULLS);
219 		else {
220 			j = $1.i;
221 			for(k=2;k<=$2.i;k++)
222 				j = mn2(RCAT,j,dupl($1.i));
223 			$$.i = j;
224 			}
225 		}
226 	| r ITER ',' '}'
227 	{
228 				/* from n to infinity */
229 		if($2.i < 0)error("can't have negative iteration");
230 		else if($2.i == 0) $$.i = mn1(STAR,$1.i);
231 		else if($2.i == 1)$$.i = mn1(PLUS,$1.i);
232 		else {		/* >= 2 iterations minimum */
233 			j = $1.i;
234 			for(k=2;k<$2.i;k++)
235 				j = mn2(RCAT,j,dupl($1.i));
236 			k = mn1(PLUS,dupl($1.i));
237 			$$.i = mn2(RCAT,j,k);
238 			}
239 		}
240 	| SCON r
241 	{	$$.i = mn2(RSCON,$2.i,(uintptr_t)$1.cp); }
242 
243 	/* XCU4: add XSCON */
244 	| XSCON r
245 	{	$$.i = mn2(RXSCON,$2.i,(uintptr_t)$1.cp); }
246 	| '^' r
247 	{	$$.i = mn1(CARAT,$2.i); }
248 	| r '$'
249 	{	i = mn0('\n');
250 		if(!divflg){
251 			j = mn1(S2FINAL,-casecount);
252 			k = mn2(RCAT,$1.i,j);
253 			$$.i = mn2(DIV,k,i);
254 			}
255 		else $$.i = mn2(RCAT,$1.i,i);
256 		divflg = TRUE;
257 		}
258 	| '(' r ')'
259 	{	$$.i = $2.i; }
260 	|	NULLS
261 	{	$$.i = mn0(RNULLS); }
262 
263 	/* XCU4: add ARRAY and POINTER */
264 	| ARRAY
265 	{ isArray = 1; };
266 	|     POINTER
267 	{ isArray = 0; };
268 	;
269 
270 %%
271 int
272 yylex(void)
273 {
274 	CHR *p;
275 	int  i;
276 	CHR *xp;
277 	int lex_startcond_lookupval;
278 	CHR  *t, c;
279 	int n, j = 0, k, x;
280 	CHR ch;
281 	static int sectbegin;
282 	static CHR token[TOKENSIZE];
283 	static int iter;
284 	int ccs; /* Current CodeSet. */
285 	CHR *ccp;
286 	int exclusive_flag;	/* XCU4: exclusive start flag */
287 
288 # ifdef DEBUG
289 	yylval.i = 0;
290 # endif
291 
292 	if(sect == DEFSECTION) {		/* definitions section */
293 		while(!eof) {
294 			if(prev == '\n'){    /* next char is at beginning of line */
295 				(void)getl(p=buf);
296 				switch(*p){
297 				case '%':
298 					switch(c= *(p+1)){
299 					case '%':
300 						/*LINTED: E_BAD_PTR_CAST_ALIGN*/
301 						if(scomp(p, (CHR *)"%%")) {
302 							p++;
303 							while(*(++p))
304 								if(!space(*p)) {
305 									warning("invalid string following %%%% be ignored");
306 									break;
307 								}
308 						}
309 						lgate();
310 						if(!ratfor)(void) fprintf(fout,"# ");
311 						(void) fprintf(fout,"define YYNEWLINE %d\n",ctable['\n']);
312 						if(!ratfor)(void) fprintf(fout,"int yylex(){\nint nstr; extern int yyprevious;\n");
313 						sectbegin = TRUE;
314 						i = treesize*(sizeof(*name)+sizeof(*left)+
315 							sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
316 						c = (int)myalloc(i,1);
317 						if(c == 0)
318 							error("Too little core for parse tree");
319 						p = (CHR *)c;
320 						free(p);
321 						/*LINTED: E_BAD_PTR_CAST_ALIGN*/
322 						name = (int *)myalloc(treesize,sizeof(*name));
323 						/*LINTED: E_BAD_PTR_CAST_ALIGN*/
324 						left = (int *)myalloc(treesize,sizeof(*left));
325 						/*LINTED: E_BAD_PTR_CAST_ALIGN*/
326 						right = (int *)myalloc(treesize,sizeof(*right));
327 						nullstr = myalloc(treesize,sizeof(*nullstr));
328 						/*LINTED: E_BAD_PTR_CAST_ALIGN*/
329 						parent = (int *)myalloc(treesize,sizeof(*parent));
330 						if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0)
331 							error("Too little core for parse tree");
332 						return(freturn(DELIM));
333 					case 'p': case 'P':
334 					        /* %p or %pointer */
335 						if ((*(p+2) == 'o') ||
336 						    (*(p+2) == 'O')) {
337 						    if(lgatflg)
338 							error("Too late for %%pointer");
339 						    while(*p && !iswspace(*p))
340 							p++;
341 						    isArray = 0;
342 						    continue;
343 						}
344 						/* has overridden number of positions */
345 						p += 2;
346 						maxpos = siconv(p);
347 						if (maxpos<=0)error("illegal position number");
348 # ifdef DEBUG
349 						if (debug) (void) printf("positions (%%p) now %d\n",maxpos);
350 # endif
351 						if(report == 2)report = 1;
352 						continue;
353 					case 'n': case 'N':	/* has overridden number of states */
354 						p += 2;
355 						nstates = siconv(p);
356 						if(nstates<=0)error("illegal state number");
357 # ifdef DEBUG
358 						if(debug)(void) printf( " no. states (%%n) now %d\n",nstates);
359 # endif
360 						if(report == 2)report = 1;
361 						continue;
362 					case 'e': case 'E':		/* has overridden number of tree nodes */
363 						p += 2;
364 						treesize = siconv(p);
365 						if(treesize<=0)error("illegal number of parse tree nodes");
366 # ifdef DEBUG
367 						if (debug) (void) printf("treesize (%%e) now %d\n",treesize);
368 # endif
369 						if(report == 2)report = 1;
370 						continue;
371 					case 'o': case 'O':
372 						p += 2;
373 						outsize = siconv(p);
374 						if(outsize<=0)error("illegal size of output array");
375 						if (report ==2) report=1;
376 						continue;
377 					case 'a': case 'A':
378 					        /* %a or %array */
379 						if ((*(p+2) == 'r') ||
380 						    (*(p+2) == 'R')) {
381 						    if(lgatflg)
382 							error("Too late for %%array");
383 						    while(*p && !iswspace(*p))
384 							p++;
385 						    isArray = 1;
386 						    continue;
387 						}
388 						/* has overridden number of transitions */
389 						p += 2;
390 						ntrans = siconv(p);
391 						if(ntrans<=0)error("illegal translation number");
392 # ifdef DEBUG
393 						if (debug)(void) printf("N. trans (%%a) now %d\n",ntrans);
394 # endif
395 						if(report == 2)report = 1;
396 						continue;
397 					case 'k': case 'K': /* overriden packed char classes */
398 						p += 2;
399 						free(pchar);
400 						pchlen = siconv(p);
401 						if(pchlen<=0)error("illegal number of packed character class");
402 # ifdef DEBUG
403 						if (debug) (void) printf( "Size classes (%%k) now %d\n",pchlen);
404 # endif
405 						/*LINTED: E_BAD_PTR_CAST_ALIGN*/
406 						pchar=pcptr=(CHR *)myalloc(pchlen, sizeof(*pchar));
407 						if (report==2) report=1;
408 						continue;
409 					case 't': case 'T':	/* character set specifier */
410 						if(handleeuc)
411 							error("\
412 Character table (%t) is supported only in ASCII compatibility mode.\n");
413 						ZCH = watoi(p+2);
414 						if (ZCH < NCH) ZCH = NCH;
415 						if (ZCH > 2*NCH) error("ch table needs redeclaration");
416 						chset = TRUE;
417 						for(i = 0; i<ZCH; i++)
418 							ctable[i] = 0;
419 						while(getl(p) && scomp(p,L_PctUpT) != 0 && scomp(p,L_PctLoT) != 0){
420 							if((n = siconv(p)) <= 0 || n > ZCH){
421 								error("Character value %d out of range",n);
422 								continue;
423 								}
424 							while(digit(*p)) p++;
425 							if(!iswspace(*p)) error("bad translation format");
426 							while(iswspace(*p)) p++;
427 							t = p;
428 							while(*t){
429 								c = ctrans(&t);
430 								if(ctable[(unsigned)c]){
431 									if (iswprint(c))
432 										warning("Character '%wc' used twice",c);
433 
434 									else
435 										error("Chararter %o used twice",c);
436 									}
437 								else ctable[(unsigned)c] = n;
438 								t++;
439 								}
440 							p = buf;
441 							}
442 						{
443 						char chused[2*NCH]; int kr;
444 						for(i=0; i<ZCH; i++)
445 							chused[i]=0;
446 						for(i=0; i<NCH; i++)
447 							chused[ctable[i]]=1;
448 						for(kr=i=1; i<NCH; i++)
449 							if (ctable[i]==0)
450 								{
451 								while (chused[kr] == 0)
452 									kr++;
453 								ctable[i]=kr;
454 								chused[kr]=1;
455 								}
456 						}
457 						lgate();
458 						continue;
459 					case 'r': case 'R':
460 						c = 'r';
461 						/* FALLTHRU */
462 					case 'c': case 'C':
463 						if(lgatflg)
464 							error("Too late for language specifier");
465 						ratfor = (c == 'r');
466 						continue;
467 					case '{':
468 						lgate();
469 						while(getl(p) && scomp(p, L_PctCbr) != 0)
470 							if(p[0]=='/' && p[1]=='*')
471 								cpycom(p);
472 							else
473 								(void) fprintf(fout, "%ws\n", p);
474 						if(p[0] == '%') continue;
475 						if (*p) error("EOF before %%%%");
476 						else error("EOF before %%}");
477 						break;
478 
479 					case 'x': case 'X':		/* XCU4: exclusive start conditions */
480 						exclusive_flag = 1;
481 						goto start;
482 
483 					case 's': case 'S':		/* start conditions */
484 						exclusive_flag = 0;
485 start:
486 						lgate();
487 
488 						while(*p && !iswspace(*p) && ((*p) != (wchar_t)',')) p++;
489 						n = TRUE;
490 						while(n){
491 							while(*p && (iswspace(*p) || ((*p) == (wchar_t)','))) p++;
492 							t = p;
493 							while(*p && !iswspace(*p) && ((*p) != (wchar_t)',')) {
494 							    if(!isascii(*p))
495 								error("None-ASCII characters in start condition.");
496 							    p++;
497 							}
498 							if(!*p) n = FALSE;
499 							*p++ = 0;
500 							if (*t == 0) continue;
501 							i = sptr*2;
502 							if(!ratfor)(void) fprintf(fout,"# ");
503 							(void) fprintf(fout, "define %ws %d\n", t, i);
504 							scopy(t,sp);
505 							sname[sptr] = sp;
506 							/* XCU4: save exclusive flag with start name */
507 							exclusive[sptr++] = exclusive_flag;
508 							sname[sptr] = 0;	/* required by lookup */
509 							if(sptr >= STARTSIZE)
510 								error("Too many start conditions");
511 							sp += slength(sp) + 1;
512 							if(sp >= schar+STARTCHAR)
513 								error("Start conditions too long");
514 							}
515 						continue;
516 					default:
517 						error("Invalid request %s",p);
518 						continue;
519 						}	/* end of switch after seeing '%' */
520 					break;
521 				case ' ': case '\t':		/* must be code */
522 					lgate();
523 					if( p[1]=='/' && p[2]=='*' ) cpycom(p);
524 					else (void) fprintf(fout, "%ws\n", p);
525 					continue;
526 				case '/':	/* look for comments */
527 					lgate();
528 					if((*(p+1))=='*') cpycom(p);
529 					/* FALLTHRU */
530 				default:		/* definition */
531 					while(*p && !iswspace(*p)) p++;
532 					if(*p == 0)
533 						continue;
534 					prev = *p;
535 					*p = 0;
536 					bptr = p+1;
537 					yylval.cp = (CHR *)buf;
538 					if(digit(buf[0]))
539 						warning("Substitution strings may not begin with digits");
540 					return(freturn(STR));
541 				}
542 			} else { /* still sect 1, but prev != '\n' */
543 				p = bptr;
544 				while(*p && iswspace(*p)) p++;
545 				if(*p == 0)
546 					warning("No translation given - null string assumed");
547 				scopy(p,token);
548 				yylval.cp = (CHR *)token;
549 				prev = '\n';
550 				return(freturn(STR));
551 				}
552 			}
553 		error("unexpected EOF before %%%%");
554 		/* end of section one processing */
555 	} else if(sect == RULESECTION){		/* rules and actions */
556 		lgate();
557 		while(!eof){
558 			static int first_test=TRUE, first_value;
559 			static int reverse=FALSE;
560 			switch(c=gch()){
561 			case '\0':
562 				if(n_error)error_tail();
563 				return(freturn(0));
564 			case '\n':
565 				if(prev == '\n') continue;
566 				x = NEWE;
567 				break;
568 			case ' ':
569 			case '\t':
570 				if(prev == '\n') copy_line = TRUE;
571 				if(sectbegin == TRUE){
572 					(void)cpyact();
573 					copy_line = FALSE;
574 					/*LINTED: E_EQUALITY_NOT_ASSIGNMENT*/
575 					while((c=gch()) && c != '\n');
576 					continue;
577 					}
578 				if(!funcflag)phead2();
579 				funcflag = TRUE;
580 				if(ratfor)(void) fprintf(fout,"%d\n",30000+casecount);
581 				else (void) fprintf(fout,"case %d:\n",casecount);
582 				if(cpyact()){
583 					if(ratfor)(void) fprintf(fout,"goto 30997\n");
584 					else (void) fprintf(fout,"break;\n");
585 					}
586 				/*LINTED: E_EQUALITY_NOT_ASSIGNMENT*/
587 				while((c=gch()) && c != '\n') {
588 					if (c=='/') {
589 						if((c=gch())=='*') {
590 							c=gch();
591 							while(c !=EOF) {
592 								while (c=='*')
593 									if ((c=gch()) == '/') goto w_loop;
594 								c = gch();
595 							}
596 							error("EOF inside comment");
597 						} else
598 							warning("undefined string");
599 					} else if (c=='}')
600 						error("illegal extra \"}\"");
601 				w_loop: ;
602 				}
603 				/* while ((c=gch())== ' ' || c == '\t') ; */
604 				/* if (!space(c)) error("undefined action string"); */
605 				if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
606 					fatal = 0;
607 					n_error++;
608 					error("executable statements should occur right after %%%%");
609 					fatal = 1;
610 					continue;
611 					}
612 				x = NEWE;
613 				break;
614 			case '%':
615 				if(prev != '\n') goto character;
616 				if(peek == '{'){	/* included code */
617 					(void)getl(buf);
618 					while(!eof&& getl(buf) && scomp(L_PctCbr,buf)!=0)
619 						if(buf[0]=='/' && buf[1]=='*')
620 							cpycom(buf);
621 						else
622 							(void) fprintf(fout, "%ws\n", buf);
623 					continue;
624 					}
625 				if(peek == '%'){
626 					c = gch();
627 					c = gch();
628 					x = DELIM;
629 					break;
630 					}
631 				goto character;
632 			case '|':
633 				if(peek == ' ' || peek == '\t' || peek == '\n'){
634 					if(ratfor)(void) fprintf(fout,"%d\n",30000+casecount++);
635 					else (void) fprintf(fout,"case %d:\n",casecount++);
636 					continue;
637 					}
638 				x = '|';
639 				break;
640 			case '$':
641 				if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
642 					x = c;
643 					break;
644 					}
645 				goto character;
646 			case '^':
647                                 if(peekon && (prev == '}')){
648                                         x = c;
649                                         break;
650                                 }
651 				if(prev != '\n' && scon != TRUE) goto character;
652 				/* valid only at line begin */
653 				x = c;
654 				break;
655 			case '?':
656 			case '+':
657 			case '*':
658 				if(prev == '\n' ) {
659 					fatal = 0;
660 					n_error++;
661 					error("illegal operator -- %c",c);
662 					fatal = 1;
663 				}
664 				/* FALLTHRU */
665 			case '.':
666 			case '(':
667 			case ')':
668 			case ',':
669 			case '/':
670 				x = c;
671 				break;
672 			case '}':
673 				iter = FALSE;
674 				x = c;
675 				break;
676 			case '{':	/* either iteration or definition */
677 				if(digit(c=gch())){	/* iteration */
678 					iter = TRUE;
679 					if(prev=='{') first_test = TRUE;
680 				ieval:
681 					i = 0;
682 					while(digit(c)){
683 						token[i++] = c;
684 						c = gch();
685 						}
686 					token[i] = 0;
687 					yylval.i = siconv(token);
688 					if(first_test) {
689 						first_test = FALSE;
690 						first_value = yylval.i;
691 					} else
692 						if(first_value>yylval.i)warning("the values between braces are reversed");
693 					ch = c;
694 					munput('c',&ch);
695 					x = ITER;
696 					break;
697 					}
698 				else {		/* definition */
699 					i = 0;
700 					while(c && c!='}'){
701 						token[i++] = c;
702 						if(i >= TOKENSIZE)
703 							error("definition too long");
704 						c = gch();
705 						}
706 					token[i] = 0;
707 					i = lookup(token,def);
708 					if(i < 0)
709 						error("definition %ws not found",token);
710 					else
711 						munput('s',(CHR *)(subs[i]));
712 					if (peek == '^')
713                                                 peekon = 1;
714 					continue;
715 					}
716 			case '<':		/* start condition ? */
717 				if(prev != '\n')  /* not at line begin, not start */
718 					goto character;
719 				t = slptr;
720 				do {
721 					i = 0;
722 					if(!isascii(c = gch()))
723 					    error("Non-ASCII characters in start condition.");
724 					while(c != ',' && c && c != '>'){
725 						token[i++] = c;
726 						if(i >= TOKENSIZE)
727 							error("string name too long");
728 						if(!isascii(c = gch()))
729 						    error("None-ASCII characters in start condition.");
730 						}
731 					token[i] = 0;
732 					if(i == 0)
733 						goto character;
734 					i = lookup(token,sname);
735 					lex_startcond_lookupval = i;
736 					if(i < 0) {
737 						fatal = 0;
738 						n_error++;
739 						error("undefined start condition %ws",token);
740 						fatal = 1;
741 						continue;
742 						}
743 					*slptr++ = i+1;
744 					} while(c && c != '>');
745 				*slptr++ = 0;
746 				/* check if previous value re-usable */
747 				for (xp=slist; xp<t; )
748 					{
749 					if (scomp(xp, t)==0)
750 						break;
751 					while (*xp++);
752 					}
753 				if (xp<t)
754 					{
755 					/* re-use previous pointer to string */
756 					slptr=t;
757 					t=xp;
758 					}
759 				if(slptr > slist+STARTSIZE)	/* note not packed */
760 					error("Too many start conditions used");
761 				yylval.cp = (CHR *)t;
762 
763 				/* XCU4: add XSCON */
764 
765 				if (exclusive[lex_startcond_lookupval])
766 					x = XSCON;
767 				else
768 					x = SCON;
769 				break;
770 			case '"':
771 				i = 0;
772 				/*LINTED: E_EQUALITY_NOT_ASSIGNMENT*/
773 				while((c=gch()) && c != '"' && c != '\n'){
774 					if(c == '\\') c = usescape(c=gch());
775 					remch(c);
776 					token[i++] = c;
777 					if(i >= TOKENSIZE){
778 						warning("String too long");
779 						i = TOKENSIZE-1;
780 						break;
781 						}
782 					}
783 				if(c == '\n') {
784 					yyline--;
785 					warning("Non-terminated string");
786 					yyline++;
787 					}
788 				token[i] = 0;
789 				if(i == 0)x = NULLS;
790 				else if(i == 1){
791 					yylval.i = (unsigned)token[0];
792 					x = CHAR;
793 					}
794 				else {
795 					yylval.cp = (CHR *)token;
796 					x = STR;
797 					}
798 				break;
799 			case '[':
800 				reverse = FALSE;
801 				x = CCL;
802 				if((c = gch()) == '^'){
803 					x = NCCL;
804 					reverse = TRUE;
805 					c = gch();
806 					}
807 				i = 0;
808 				while(c != ']' && c){
809 					static int light=TRUE, ESCAPE=FALSE;
810 					if(c == '-' && prev == '^' && reverse){
811 						symbol[(unsigned)c] = 1;
812 						c = gch();
813 						continue;
814 					}
815 					if(c == '\\') {
816 						c = usescape(c=gch());
817 						ESCAPE = TRUE;
818 					}
819 					if(c=='-' && !ESCAPE && prev!='[' && peek!=']'){
820 					/* range specified */
821 						if (light) {
822 							c = gch();
823 							if(c == '\\')
824 								c=usescape(c=gch());
825 							remch(c);
826 							k = c;
827 							ccs=wcsetno(k);
828 							if(wcsetno(j)!=ccs)
829 							    error("\
830 Character range specified between different codesets.");
831 							if((unsigned)j > (unsigned)k) {
832 								n = j;
833 								j = k;
834 								k = n;
835 								}
836 							if(!handleeuc)
837 							if(!(('A'<=j && k<='Z') ||
838 							    ('a'<=j && k<='z') ||
839 							    ('0'<=j && k<='9')))
840 								warning("Non-portable Character Class");
841 							token[i++] = RANGE;
842 							token[i++] = j;
843 							token[i++] = k;
844 							light = FALSE;
845 						} else {
846 							error("unmatched hyphen");
847 							if(symbol[(unsigned)c])warning("\"%c\" redefined inside brackets",c);
848 							else symbol[(unsigned)c] = 1;
849 						}
850 						ESCAPE = FALSE;
851 					} else {
852 						j = c;
853 						remch(c);
854 						token[i++] = c; /* Remember whatever.*/
855 						light = TRUE;
856 						ESCAPE = FALSE;
857 					}
858 					c = gch();
859 				}
860 				/* try to pack ccl's */
861 
862 				token[i] = 0;
863 				ccp = ccl;
864 				while (ccp < ccptr && scomp(token, ccp) != 0) ccp++;
865 				if (ccp < ccptr) {  /* found in ccl */
866 				    yylval.cp = ccp;
867 				} else {            /* not in ccl, add it */
868 				    scopy(token,ccptr);
869 				    yylval.cp = ccptr;
870 				    ccptr += slength(token) + 1;
871 				    if(ccptr >= ccl+CCLSIZE)
872 				      error("Too many large character classes");
873 				}
874 				break;
875 			case '\\':
876 				c = usescape(c=gch());
877 				/* FALLTHROUGH */
878 			default:
879 			character:
880 				if(iter){	/* second part of an iteration */
881 					iter = FALSE;
882 					if('0' <= c && c <= '9')
883 						goto ieval;
884 					}
885 				remch(c);
886 				if(alpha(peek)){
887 					i = 0;
888 					yylval.cp = (CHR *)token;
889 					token[i++] = c;
890 					while(alpha(peek)) {
891 						remch(token[i++] = gch());
892 						if(i >= TOKENSIZE) {
893 							warning("string too long");
894 							i = TOKENSIZE - 1;
895 							break;
896 							}
897 						}
898 					if(peek == '?' || peek == '*' || peek == '+')
899 						munput('c',&token[--i]);
900 					token[i] = 0;
901 					if(i == 1){
902 						yylval.i = (unsigned)(token[0]);
903 						x = CHAR;
904 						}
905 					else x = STR;
906 					}
907 				else {
908 					yylval.i = (unsigned)c;
909 					x = CHAR;
910 					}
911 				}
912 			scon = FALSE;
913 			peekon = 0;
914 			if((x == SCON) || (x == XSCON))
915 				scon = TRUE;
916 			sectbegin = FALSE;
917 			return(freturn(x));
918 			/* NOTREACHED */
919 			}
920 		}
921 	/* section three */
922 	lgate();
923 	ptail();
924 # ifdef DEBUG
925 	if(debug)
926 		(void) fprintf(fout,"\n/*this comes from section three - debug */\n");
927 # endif
928 
929 	if(getl(buf) && !eof) {
930 		if (sargv[optind] == NULL)
931 			(void) fprintf(fout, "\n# line %d\n", yyline-1);
932 		else
933 			(void) fprintf(fout,
934 				"\n# line %d \"%s\"\n", yyline-1, sargv[optind]);
935 		(void) fprintf(fout, "%ws\n", buf);
936 		while(getl(buf) && !eof)
937 			(void) fprintf(fout, "%ws\n", buf);
938         }
939 
940 	return(freturn(0));
941 	}
942 /* end of yylex */
943 # ifdef DEBUG
freturn(i)944 freturn(i)
945   int i; {
946 	if(yydebug) {
947 		(void) printf("now return ");
948 		if((unsigned)i < NCH) allprint(i);
949 		else (void) printf("%d",i);
950 		(void) printf("   yylval = ");
951 		switch(i){
952 			case STR: case CCL: case NCCL:
953 				strpt(yylval.cp);
954 				break;
955 			case CHAR:
956 				allprint(yylval.i);
957 				break;
958 			default:
959 				(void) printf("%d",yylval.i);
960 				break;
961 			}
962 		(void) putchar('\n');
963 		}
964 	return(i);
965 	}
966 # endif
967