xref: /illumos-gate/usr/src/cmd/sgs/lex/common/sub3.c (revision 672986541be54a7a471bb088e60780c37e371d7e)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*67298654Sdamico  * Common Development and Distribution License (the "License").
6*67298654Sdamico  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*67298654Sdamico  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate /*
297c478bd9Sstevel@tonic-gate  * sub3.c ... ALE enhancement.
307c478bd9Sstevel@tonic-gate  * Since a typical Asian language has a huge character set, it is not
317c478bd9Sstevel@tonic-gate  * ideal to index an array by a character code itself, which requires
327c478bd9Sstevel@tonic-gate  * as large as 2**16 entries per array.
337c478bd9Sstevel@tonic-gate  * To get arround this problem, we identify a set of characters that
347c478bd9Sstevel@tonic-gate  * causes the same transition on all states and call it character group.
357c478bd9Sstevel@tonic-gate  * Every character in a same character group has a unique number called
367c478bd9Sstevel@tonic-gate  * character group id.  A function yycgid(c) maps the character c (in process
377c478bd9Sstevel@tonic-gate  * code) to the id.  This mapping is determined by analyzing all regular
387c478bd9Sstevel@tonic-gate  * expressions in the lex program.
397c478bd9Sstevel@tonic-gate  *
407c478bd9Sstevel@tonic-gate  */
417c478bd9Sstevel@tonic-gate #include	<stdlib.h>
427c478bd9Sstevel@tonic-gate #include	<widec.h>
437c478bd9Sstevel@tonic-gate #include	<search.h>
44*67298654Sdamico #include	"ldefs.h"
457c478bd9Sstevel@tonic-gate 
467c478bd9Sstevel@tonic-gate /*
477c478bd9Sstevel@tonic-gate  * "lchar" stands for linearized character.  It is a variant of
487c478bd9Sstevel@tonic-gate  * process code.  AT&T's 16-bit process code has a drawback in which
497c478bd9Sstevel@tonic-gate  * for three three process code C, D and E where C <= D <= E,
507c478bd9Sstevel@tonic-gate  * codeset(C)==codeset(E) does not mean codeset(D)==codeset(C).
517c478bd9Sstevel@tonic-gate  * In other words, four codesets alternates as the magnitude
527c478bd9Sstevel@tonic-gate  * of character increases.
537c478bd9Sstevel@tonic-gate  * The lchar representation holds this property:
547c478bd9Sstevel@tonic-gate  *   If three lchar C', D' and E' have the relationship C' < D' <  E' and
557c478bd9Sstevel@tonic-gate  *   codeset(C') == codeset(E') then D' is guaranteed to belong to
567c478bd9Sstevel@tonic-gate  *   the same codeset as C' and E'.
577c478bd9Sstevel@tonic-gate  * lchar is implemented as 32 bit entities and the function linearize()
587c478bd9Sstevel@tonic-gate  * that maps a wchar_t to lchar is defined below.  There is no
597c478bd9Sstevel@tonic-gate  * reverse function for it though.
607c478bd9Sstevel@tonic-gate  * The 32-bit process code by AT&T, used only for Taiwanese version at the
617c478bd9Sstevel@tonic-gate  * time of wrting, has no such problem and we use it as it is.
627c478bd9Sstevel@tonic-gate  */
637c478bd9Sstevel@tonic-gate 
647c478bd9Sstevel@tonic-gate lchar	yycgidtbl[MAXNCG] = {
657c478bd9Sstevel@tonic-gate 	0, 		/* For ease of computation of the id. */
667c478bd9Sstevel@tonic-gate 	'\n', 		/* Newline is always special because '.' exclude it. */
677c478bd9Sstevel@tonic-gate 	0x000000ff, 	/* The upper limit of codeset 0. */
687c478bd9Sstevel@tonic-gate 	0x20ffffff,	/* The upper limit of codeset 2. */
697c478bd9Sstevel@tonic-gate 	0x40ffffff	/* The upper limit of codeset 3. */
707c478bd9Sstevel@tonic-gate /*	0x60ffffff	   The upper limit of codeset 1. */
717c478bd9Sstevel@tonic-gate 	/* Above assumes the number of significant bits of wchar_t is <= 24. */
727c478bd9Sstevel@tonic-gate };
737c478bd9Sstevel@tonic-gate int	ncgidtbl = 5; /* # elements in yycgidtbl. */
747c478bd9Sstevel@tonic-gate int	ncg; /* Should set to ncgidtbl*2; this is the largest value yycgid() */
757c478bd9Sstevel@tonic-gate 		/* returns plus 1. */
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate static void setsymbol(int i);
787c478bd9Sstevel@tonic-gate 
797c478bd9Sstevel@tonic-gate /*
807c478bd9Sstevel@tonic-gate  * For given 16-bit wchar_t (See NOTE), lchar is computed as illustrated below:
817c478bd9Sstevel@tonic-gate  *
827c478bd9Sstevel@tonic-gate  * 	wc: axxxxxxbyyyyyyy
837c478bd9Sstevel@tonic-gate  *
847c478bd9Sstevel@tonic-gate  * returns: 0ab0000000000000axxxxxxxbyyyyyyy
857c478bd9Sstevel@tonic-gate  *
867c478bd9Sstevel@tonic-gate  * linearize() doesn't do any if compiled with 32-bit wchar_t, use of
877c478bd9Sstevel@tonic-gate  * which is flagged with LONG_WCHAR_T macro.
887c478bd9Sstevel@tonic-gate  * NOTE:
897c478bd9Sstevel@tonic-gate  * The implementation is highly depends on the process code representation.
907c478bd9Sstevel@tonic-gate  * This function should be modified when 32-bit process code is used.
917c478bd9Sstevel@tonic-gate  * There is no need to keep 'a' and 'b' bits in the lower half of lchar.
927c478bd9Sstevel@tonic-gate  * You can actually omit these and squeeze the xxxxxx part one bit right.
937c478bd9Sstevel@tonic-gate  * We don't do that here just in sake of speed.
947c478bd9Sstevel@tonic-gate  */
957c478bd9Sstevel@tonic-gate lchar
967c478bd9Sstevel@tonic-gate linearize(wchar_t wc)
977c478bd9Sstevel@tonic-gate {
987c478bd9Sstevel@tonic-gate #ifdef LONG_WCHAR_T
997c478bd9Sstevel@tonic-gate 	return ((lchar)wc); /* Don't do anything. */
1007c478bd9Sstevel@tonic-gate #else
1017c478bd9Sstevel@tonic-gate 
1027c478bd9Sstevel@tonic-gate 	lchar	prefix;
1037c478bd9Sstevel@tonic-gate 	switch (wc&0x8080) {
1047c478bd9Sstevel@tonic-gate 	case 0x0000: prefix = 0x00000000; break;
1057c478bd9Sstevel@tonic-gate 	case 0x0080: prefix = 0x20000000; break;
1067c478bd9Sstevel@tonic-gate 	case 0x8000: prefix = 0x40000000; break;
1077c478bd9Sstevel@tonic-gate 	case 0x8080: prefix = 0x60000000; break;
1087c478bd9Sstevel@tonic-gate 	}
1097c478bd9Sstevel@tonic-gate 	return (prefix|wc);
1107c478bd9Sstevel@tonic-gate #endif
1117c478bd9Sstevel@tonic-gate }
1127c478bd9Sstevel@tonic-gate 
1137c478bd9Sstevel@tonic-gate /* compare liniear characters pointed to by pc1 and pc2 */
1147c478bd9Sstevel@tonic-gate int
1157c478bd9Sstevel@tonic-gate cmplc(const void *arg1, const void *arg2)
1167c478bd9Sstevel@tonic-gate {
1177c478bd9Sstevel@tonic-gate 	lchar *pc1 = (lchar *)arg1;
1187c478bd9Sstevel@tonic-gate 	lchar *pc2 = (lchar *)arg2;
1197c478bd9Sstevel@tonic-gate 
1207c478bd9Sstevel@tonic-gate 	if (*pc1 > *pc2)
1217c478bd9Sstevel@tonic-gate 		return (1);
1227c478bd9Sstevel@tonic-gate 	else if (*pc1 == *pc2)
1237c478bd9Sstevel@tonic-gate 		return (0);
1247c478bd9Sstevel@tonic-gate 	else
1257c478bd9Sstevel@tonic-gate 		return (-1);
1267c478bd9Sstevel@tonic-gate }
1277c478bd9Sstevel@tonic-gate 
1287c478bd9Sstevel@tonic-gate void
1297c478bd9Sstevel@tonic-gate remch(wchar_t c)
1307c478bd9Sstevel@tonic-gate {
1317c478bd9Sstevel@tonic-gate 	lchar	lc = linearize(c);
1327c478bd9Sstevel@tonic-gate 
1337c478bd9Sstevel@tonic-gate 	/*
1347c478bd9Sstevel@tonic-gate 	 * User-friendliness consideration:
1357c478bd9Sstevel@tonic-gate 	 * Make sure no EUC chars are used in reg. exp.
1367c478bd9Sstevel@tonic-gate 	 */
1377c478bd9Sstevel@tonic-gate 	if (!handleeuc) {
1387c478bd9Sstevel@tonic-gate 		if (!isascii(c))
1397c478bd9Sstevel@tonic-gate 			if (iswprint(c))
1407c478bd9Sstevel@tonic-gate 				warning(
1417c478bd9Sstevel@tonic-gate "Non-ASCII character '%wc' in pattern; use -w or -e lex option.", c);
1427c478bd9Sstevel@tonic-gate 			else warning(
1437c478bd9Sstevel@tonic-gate "Non-ASCII character of value %#x in pattern; use -w or -e lex option.", c);
1447c478bd9Sstevel@tonic-gate 		/* In any case, we don't need to construct ncgidtbl[]. */
1457c478bd9Sstevel@tonic-gate 		return;
1467c478bd9Sstevel@tonic-gate 	}
1477c478bd9Sstevel@tonic-gate 
1487c478bd9Sstevel@tonic-gate 	lsearch(&lc, yycgidtbl,
1497c478bd9Sstevel@tonic-gate 	    (size_t *)&ncgidtbl, sizeof (lchar), cmplc);
1507c478bd9Sstevel@tonic-gate }
1517c478bd9Sstevel@tonic-gate 
1527c478bd9Sstevel@tonic-gate void
1537c478bd9Sstevel@tonic-gate sortcgidtbl(void)
1547c478bd9Sstevel@tonic-gate {
1557c478bd9Sstevel@tonic-gate 	if (!handleeuc)
1567c478bd9Sstevel@tonic-gate 		return;
1577c478bd9Sstevel@tonic-gate 	qsort(yycgidtbl, ncgidtbl, sizeof (lchar), cmplc);
1587c478bd9Sstevel@tonic-gate }
1597c478bd9Sstevel@tonic-gate 
1607c478bd9Sstevel@tonic-gate /*
1617c478bd9Sstevel@tonic-gate  * int yycgid(wchar_t c)
1627c478bd9Sstevel@tonic-gate  *	Takes c and returns its character group id, determind by the
1637c478bd9Sstevel@tonic-gate  *	following algorithm.  The program also uses the binary search
1647c478bd9Sstevel@tonic-gate  *	algorithm, generalized from Knuth (6.2.1) Algorithm B.
1657c478bd9Sstevel@tonic-gate  *
1667c478bd9Sstevel@tonic-gate  *	This function computes the "character group id" based on
1677c478bd9Sstevel@tonic-gate  *	a table yycgidtbl of which each lchar entry is pre-sorted
1687c478bd9Sstevel@tonic-gate  *	in ascending sequence  The number of valid entries is given
1697c478bd9Sstevel@tonic-gate  *	by YYNCGIDTBL.  There is no duplicate entries in yycgidtbl.
1707c478bd9Sstevel@tonic-gate  *		const int YYNCGIDTBL;
1717c478bd9Sstevel@tonic-gate  *		lchar	yycgidtbl[YYNCGIDTBL];
1727c478bd9Sstevel@tonic-gate  *
1737c478bd9Sstevel@tonic-gate  *	yycgidtbl[0] is guaranteed to have zero.
1747c478bd9Sstevel@tonic-gate  *
1757c478bd9Sstevel@tonic-gate  *	For given c, yycgid(c) returns:
1767c478bd9Sstevel@tonic-gate  *		2*i	iff yycgidtbl[i] == lc
1777c478bd9Sstevel@tonic-gate  *		2*i+1	iff yycgidtbl[i] < lc < yycgidtbl[i+1]
1787c478bd9Sstevel@tonic-gate  *		YYNCGIDTBL*2-1
1797c478bd9Sstevel@tonic-gate  *			iff yycgidtbl[YYNCGIDTBL-1] < lc
1807c478bd9Sstevel@tonic-gate  *	where lc=linearize(c).
1817c478bd9Sstevel@tonic-gate  *
1827c478bd9Sstevel@tonic-gate  *	Some interesting properties.:
1837c478bd9Sstevel@tonic-gate  *	1.  For any c, 0 <= yycgid(c) <= 2*YYNCGIDTBL-1
1847c478bd9Sstevel@tonic-gate  *	2.  yycgid(c) == 0  iff  c == 0.
1857c478bd9Sstevel@tonic-gate  *	3.  For any wchar_t c and d, if linearize(c) < linearize(d) then
1867c478bd9Sstevel@tonic-gate  *	    yycgid(c) <= yycgid(d).
1877c478bd9Sstevel@tonic-gate  *	4.  For any wchar_t c and d, if yycgid(c) < yycgid(d) then
1887c478bd9Sstevel@tonic-gate  *	    linearize(c) < linearize(d).
1897c478bd9Sstevel@tonic-gate  */
1907c478bd9Sstevel@tonic-gate #define	YYNCGIDTBL ncgidtbl
1917c478bd9Sstevel@tonic-gate 
1927c478bd9Sstevel@tonic-gate int
1937c478bd9Sstevel@tonic-gate yycgid(wchar_t c)
1947c478bd9Sstevel@tonic-gate {
1957c478bd9Sstevel@tonic-gate 	int first = 0;
1967c478bd9Sstevel@tonic-gate 	int last = YYNCGIDTBL - 1;
1977c478bd9Sstevel@tonic-gate 	lchar lc;
1987c478bd9Sstevel@tonic-gate 
1997c478bd9Sstevel@tonic-gate 	/*
2007c478bd9Sstevel@tonic-gate 	 * In ASCII compat. mode, each character forms a "group" and the
2017c478bd9Sstevel@tonic-gate 	 * group-id is itself...
2027c478bd9Sstevel@tonic-gate 	 */
2037c478bd9Sstevel@tonic-gate 	if (!handleeuc)
2047c478bd9Sstevel@tonic-gate 		return (c);
2057c478bd9Sstevel@tonic-gate 
2067c478bd9Sstevel@tonic-gate 	lc = linearize(c);
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate 	/* An exceptional case: yycgidtbl[YYNCGIDTBL-1] < lc */
2097c478bd9Sstevel@tonic-gate 	if (yycgidtbl[YYNCGIDTBL - 1] < lc)
2107c478bd9Sstevel@tonic-gate 		return (YYNCGIDTBL*2 - 1);
2117c478bd9Sstevel@tonic-gate 
2127c478bd9Sstevel@tonic-gate 	while (last >= 0) {
2137c478bd9Sstevel@tonic-gate 		int i = (first+last)/2;
2147c478bd9Sstevel@tonic-gate 		if (lc == yycgidtbl[i])
2157c478bd9Sstevel@tonic-gate 			return (2*i);	/* lc exactly matches an element. */
2167c478bd9Sstevel@tonic-gate 		else if (yycgidtbl[i] < lc) {
217*67298654Sdamico 			if (lc < yycgidtbl[i+1]) {
218*67298654Sdamico 				/* lc is in between two elements */
219*67298654Sdamico 				return (2*i+1);
220*67298654Sdamico 			}
2217c478bd9Sstevel@tonic-gate 			else
2227c478bd9Sstevel@tonic-gate 				first = i + 1;
2237c478bd9Sstevel@tonic-gate 		} else
2247c478bd9Sstevel@tonic-gate 			last = i - 1;
2257c478bd9Sstevel@tonic-gate 	}
2267c478bd9Sstevel@tonic-gate 	error(
2277c478bd9Sstevel@tonic-gate 	"system error in yycgid():binary search failed for c=0x%04x\n", c);
2287c478bd9Sstevel@tonic-gate 	return (0);
2297c478bd9Sstevel@tonic-gate }
2307c478bd9Sstevel@tonic-gate 
2317c478bd9Sstevel@tonic-gate /*
2327c478bd9Sstevel@tonic-gate  * repbycgid --- replaces each character in the parsing tree by its
2337c478bd9Sstevel@tonic-gate  * character group id.   This, however, should be called even in
2347c478bd9Sstevel@tonic-gate  * the ASCII compat. mode to process DOT nodes and to call cclinter()
2357c478bd9Sstevel@tonic-gate  * for the DOT and CCL nodes.
2367c478bd9Sstevel@tonic-gate  */
2377c478bd9Sstevel@tonic-gate void
2387c478bd9Sstevel@tonic-gate repbycgid(void)
2397c478bd9Sstevel@tonic-gate {
2407c478bd9Sstevel@tonic-gate 	int i, c;
2417c478bd9Sstevel@tonic-gate 
2427c478bd9Sstevel@tonic-gate 	for (i = 0; i < tptr; ++i) {
2437c478bd9Sstevel@tonic-gate 		c = name[i];
2447c478bd9Sstevel@tonic-gate 		if (!ISOPERATOR(c)) {
2457c478bd9Sstevel@tonic-gate 		/* If not an operator, it must be a char.  */
2467c478bd9Sstevel@tonic-gate 			name[i] = yycgid((wchar_t)c); /* So replace it. */
2477c478bd9Sstevel@tonic-gate #ifdef DEBUG
2487c478bd9Sstevel@tonic-gate 			if (debug) {
2497c478bd9Sstevel@tonic-gate 				printf("name[%d]:'%c'->%d;\n", i, c, name[i]);
2507c478bd9Sstevel@tonic-gate 			}
2517c478bd9Sstevel@tonic-gate #endif
2527c478bd9Sstevel@tonic-gate 		} else if (c == RSTR) {
2537c478bd9Sstevel@tonic-gate 			c = right[i];
2547c478bd9Sstevel@tonic-gate 			right[i] = yycgid((wchar_t)c);
2557c478bd9Sstevel@tonic-gate #ifdef DEBUG
2567c478bd9Sstevel@tonic-gate 			if (debug) {
2577c478bd9Sstevel@tonic-gate 				printf(
258*67298654Sdamico 				    "name[%d].right:'%c'->%d;\n",
259*67298654Sdamico 				    i, c, right[i]);
2607c478bd9Sstevel@tonic-gate 			}
2617c478bd9Sstevel@tonic-gate #endif
2627c478bd9Sstevel@tonic-gate 		} else if ((c == RCCL) || (c == RNCCL)) {
2637c478bd9Sstevel@tonic-gate 			CHR cc, *s;
2647c478bd9Sstevel@tonic-gate 			int j;
2657c478bd9Sstevel@tonic-gate 			CHR ccltoken[CCLSIZE];
2667c478bd9Sstevel@tonic-gate 			CHR *ccp;
2677c478bd9Sstevel@tonic-gate 			int m;
2687c478bd9Sstevel@tonic-gate 			/*
2697c478bd9Sstevel@tonic-gate 			 * This node represetns a character class RE [ccccc]
2707c478bd9Sstevel@tonic-gate 			 * s points to the string of characters that forms
2717c478bd9Sstevel@tonic-gate 			 * the class and/or a special prefix notation
2727c478bd9Sstevel@tonic-gate 			 * <RANGE>XY which corresponds to the RE X-Y,
2737c478bd9Sstevel@tonic-gate 			 * characters in the range of X and Y.  Here,
2747c478bd9Sstevel@tonic-gate 			 * X <= Y is guranteed.
2757c478bd9Sstevel@tonic-gate 			 * We transform these characters into a string
2767c478bd9Sstevel@tonic-gate 			 * of sorted character group ids.
2777c478bd9Sstevel@tonic-gate 			 *
2787c478bd9Sstevel@tonic-gate 			 * There is another mechanism of packing tables
2797c478bd9Sstevel@tonic-gate 			 * that is inherited from the ASCII lex.  Call of
2807c478bd9Sstevel@tonic-gate 			 * cclinter() is required for this packing.
2817c478bd9Sstevel@tonic-gate 			 * This used to be done as yylex() reads the lex
2827c478bd9Sstevel@tonic-gate 			 * rules but we have to do this here because the
2837c478bd9Sstevel@tonic-gate 			 * transition table is made to work on the char-group
2847c478bd9Sstevel@tonic-gate 			 * ids and the mapping cannot be determined until
2857c478bd9Sstevel@tonic-gate 			 * the entire file is read.
2867c478bd9Sstevel@tonic-gate 			 */
2877c478bd9Sstevel@tonic-gate #ifdef DEBUG
2887c478bd9Sstevel@tonic-gate 			if (debug) {
2897c478bd9Sstevel@tonic-gate 				printf("name[%d]:R[N]CCL of \"", i);
2907c478bd9Sstevel@tonic-gate 				strpt(left[i]);
2917c478bd9Sstevel@tonic-gate 				printf(" -> {");
2927c478bd9Sstevel@tonic-gate 			}
2937c478bd9Sstevel@tonic-gate #endif
2947c478bd9Sstevel@tonic-gate 			/* Prepare symbol[] for cclinter(). */
2957c478bd9Sstevel@tonic-gate 			for (j = 0; j < ncg; ++j)
2967c478bd9Sstevel@tonic-gate 				symbol[j] = FALSE;
2977c478bd9Sstevel@tonic-gate 
2987c478bd9Sstevel@tonic-gate 			s = (CHR *) left[i];
2997c478bd9Sstevel@tonic-gate 			while (cc = *s++) {
3007c478bd9Sstevel@tonic-gate 				if (cc == RANGE) {
3017c478bd9Sstevel@tonic-gate 					int	low, high, i;
3027c478bd9Sstevel@tonic-gate 					/*
3037c478bd9Sstevel@tonic-gate 					 * Special form: <RANGE>XY
3047c478bd9Sstevel@tonic-gate 					 * This means the range X-Y.
3057c478bd9Sstevel@tonic-gate 					 * We mark all symbols[]
3067c478bd9Sstevel@tonic-gate 					 * elements for yycgid(X) thru
3077c478bd9Sstevel@tonic-gate 					 * yycgid(Y), inclusively.
3087c478bd9Sstevel@tonic-gate 					 */
3097c478bd9Sstevel@tonic-gate 					low = yycgid(*s++);
3107c478bd9Sstevel@tonic-gate 					high = yycgid(*s++);
3117c478bd9Sstevel@tonic-gate 					for (i = low; i <= high; ++i)
3127c478bd9Sstevel@tonic-gate 						setsymbol(i);
3137c478bd9Sstevel@tonic-gate 				} else {
3147c478bd9Sstevel@tonic-gate 					setsymbol(yycgid(cc));
3157c478bd9Sstevel@tonic-gate 				}
3167c478bd9Sstevel@tonic-gate 			}
3177c478bd9Sstevel@tonic-gate 
3187c478bd9Sstevel@tonic-gate 			/* Now make a transformed string of cgids. */
3197c478bd9Sstevel@tonic-gate 			s = ccptr;
3207c478bd9Sstevel@tonic-gate 			m = 0;
3217c478bd9Sstevel@tonic-gate 			for (j = 0; j < ncg; ++j)
3227c478bd9Sstevel@tonic-gate 				if (symbol[j]) {
3237c478bd9Sstevel@tonic-gate 					ccltoken[m++] = (CHR)j;
3247c478bd9Sstevel@tonic-gate #ifdef DEBUG
3257c478bd9Sstevel@tonic-gate 					if (debug) printf("%d, ", j);
3267c478bd9Sstevel@tonic-gate #endif
3277c478bd9Sstevel@tonic-gate 				}
3287c478bd9Sstevel@tonic-gate 
3297c478bd9Sstevel@tonic-gate #ifdef DEBUG
3307c478bd9Sstevel@tonic-gate 			if (debug) printf("}\n");
3317c478bd9Sstevel@tonic-gate #endif
3327c478bd9Sstevel@tonic-gate 			ccltoken[m] = 0;
3337c478bd9Sstevel@tonic-gate 			ccp = ccl;
3347c478bd9Sstevel@tonic-gate 			while (ccp < ccptr && scomp(ccltoken, ccp) != 0)
3357c478bd9Sstevel@tonic-gate 				ccp++;
3367c478bd9Sstevel@tonic-gate 			if (ccp < ccptr) {  /* character class found in ccl */
3377c478bd9Sstevel@tonic-gate 				left[i] = (int)ccp;
3387c478bd9Sstevel@tonic-gate 			} else { /* not in ccl, add it */
3397c478bd9Sstevel@tonic-gate 				left[i] = (int)ccptr;
3407c478bd9Sstevel@tonic-gate 				scopy(ccltoken, ccptr);
3417c478bd9Sstevel@tonic-gate 				ccptr += slength(ccltoken) + 1;
3427c478bd9Sstevel@tonic-gate 				if (ccptr > ccl + CCLSIZE)
343*67298654Sdamico 					error(
344*67298654Sdamico 					"Too many large character classes");
3457c478bd9Sstevel@tonic-gate 			}
3467c478bd9Sstevel@tonic-gate 			cclinter(c == RCCL);
3477c478bd9Sstevel@tonic-gate 		} else if (c == DOT) {
3487c478bd9Sstevel@tonic-gate 			if (psave == 0) { /* First DOT node. */
3497c478bd9Sstevel@tonic-gate 				int j, nlid;
3507c478bd9Sstevel@tonic-gate 				/*
3517c478bd9Sstevel@tonic-gate 				 * Make symbol[k]=TRUE for all k
3527c478bd9Sstevel@tonic-gate 				 *  except k == yycgid('\n').
3537c478bd9Sstevel@tonic-gate 				 */
3547c478bd9Sstevel@tonic-gate 				nlid = yycgid('\n');
3557c478bd9Sstevel@tonic-gate 				psave = ccptr;
3567c478bd9Sstevel@tonic-gate 				for (j = 1; j < ncg; ++j) {
3577c478bd9Sstevel@tonic-gate 					if (j == nlid) {
3587c478bd9Sstevel@tonic-gate 						symbol[j] = FALSE;
3597c478bd9Sstevel@tonic-gate 					} else {
3607c478bd9Sstevel@tonic-gate 						symbol[j] = TRUE;
3617c478bd9Sstevel@tonic-gate 						*ccptr++ = (CHR) j;
3627c478bd9Sstevel@tonic-gate 					}
3637c478bd9Sstevel@tonic-gate 				}
3647c478bd9Sstevel@tonic-gate 				*ccptr++ = 0;
3657c478bd9Sstevel@tonic-gate 				if (ccptr > ccl + CCLSIZE)
366*67298654Sdamico 					error(
367*67298654Sdamico 					"Too many large character classes");
3687c478bd9Sstevel@tonic-gate 			}
3697c478bd9Sstevel@tonic-gate 			/* Mimic mn1(RCCL,psave)... */
3707c478bd9Sstevel@tonic-gate 			name[i] = RCCL;
3717c478bd9Sstevel@tonic-gate 			left[i] = (int)psave;
3727c478bd9Sstevel@tonic-gate 			cclinter(1);
3737c478bd9Sstevel@tonic-gate 		}
3747c478bd9Sstevel@tonic-gate 	}
3757c478bd9Sstevel@tonic-gate #ifdef DEBUG
3767c478bd9Sstevel@tonic-gate 	if (debug) {
3777c478bd9Sstevel@tonic-gate 		printf("treedump after repbycgid().\n");
3787c478bd9Sstevel@tonic-gate 		treedump();
3797c478bd9Sstevel@tonic-gate 	}
3807c478bd9Sstevel@tonic-gate #endif
3817c478bd9Sstevel@tonic-gate }
3827c478bd9Sstevel@tonic-gate 
3837c478bd9Sstevel@tonic-gate static void
3847c478bd9Sstevel@tonic-gate setsymbol(int i)
3857c478bd9Sstevel@tonic-gate {
3867c478bd9Sstevel@tonic-gate 	if (i > sizeof (symbol))
3877c478bd9Sstevel@tonic-gate 		error("setsymbol: (SYSERR) %d out of range", i);
3887c478bd9Sstevel@tonic-gate 	symbol[i] = TRUE;
3897c478bd9Sstevel@tonic-gate }
390