17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 567298654Sdamico * Common Development and Distribution License (the "License"). 667298654Sdamico * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*1dd08564Sab196087 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 277c478bd9Sstevel@tonic-gate 287c478bd9Sstevel@tonic-gate /* 297c478bd9Sstevel@tonic-gate * sub3.c ... ALE enhancement. 307c478bd9Sstevel@tonic-gate * Since a typical Asian language has a huge character set, it is not 317c478bd9Sstevel@tonic-gate * ideal to index an array by a character code itself, which requires 327c478bd9Sstevel@tonic-gate * as large as 2**16 entries per array. 337c478bd9Sstevel@tonic-gate * To get arround this problem, we identify a set of characters that 347c478bd9Sstevel@tonic-gate * causes the same transition on all states and call it character group. 357c478bd9Sstevel@tonic-gate * Every character in a same character group has a unique number called 367c478bd9Sstevel@tonic-gate * character group id. A function yycgid(c) maps the character c (in process 377c478bd9Sstevel@tonic-gate * code) to the id. This mapping is determined by analyzing all regular 387c478bd9Sstevel@tonic-gate * expressions in the lex program. 397c478bd9Sstevel@tonic-gate * 407c478bd9Sstevel@tonic-gate */ 417c478bd9Sstevel@tonic-gate #include <stdlib.h> 427c478bd9Sstevel@tonic-gate #include <widec.h> 437c478bd9Sstevel@tonic-gate #include <search.h> 4467298654Sdamico #include "ldefs.h" 457c478bd9Sstevel@tonic-gate 467c478bd9Sstevel@tonic-gate /* 477c478bd9Sstevel@tonic-gate * "lchar" stands for linearized character. It is a variant of 487c478bd9Sstevel@tonic-gate * process code. AT&T's 16-bit process code has a drawback in which 497c478bd9Sstevel@tonic-gate * for three three process code C, D and E where C <= D <= E, 507c478bd9Sstevel@tonic-gate * codeset(C)==codeset(E) does not mean codeset(D)==codeset(C). 517c478bd9Sstevel@tonic-gate * In other words, four codesets alternates as the magnitude 527c478bd9Sstevel@tonic-gate * of character increases. 537c478bd9Sstevel@tonic-gate * The lchar representation holds this property: 547c478bd9Sstevel@tonic-gate * If three lchar C', D' and E' have the relationship C' < D' < E' and 557c478bd9Sstevel@tonic-gate * codeset(C') == codeset(E') then D' is guaranteed to belong to 567c478bd9Sstevel@tonic-gate * the same codeset as C' and E'. 577c478bd9Sstevel@tonic-gate * lchar is implemented as 32 bit entities and the function linearize() 587c478bd9Sstevel@tonic-gate * that maps a wchar_t to lchar is defined below. There is no 597c478bd9Sstevel@tonic-gate * reverse function for it though. 607c478bd9Sstevel@tonic-gate * The 32-bit process code by AT&T, used only for Taiwanese version at the 617c478bd9Sstevel@tonic-gate * time of wrting, has no such problem and we use it as it is. 627c478bd9Sstevel@tonic-gate */ 637c478bd9Sstevel@tonic-gate 647c478bd9Sstevel@tonic-gate lchar yycgidtbl[MAXNCG] = { 657c478bd9Sstevel@tonic-gate 0, /* For ease of computation of the id. */ 667c478bd9Sstevel@tonic-gate '\n', /* Newline is always special because '.' exclude it. */ 677c478bd9Sstevel@tonic-gate 0x000000ff, /* The upper limit of codeset 0. */ 687c478bd9Sstevel@tonic-gate 0x20ffffff, /* The upper limit of codeset 2. */ 697c478bd9Sstevel@tonic-gate 0x40ffffff /* The upper limit of codeset 3. */ 707c478bd9Sstevel@tonic-gate /* 0x60ffffff The upper limit of codeset 1. */ 717c478bd9Sstevel@tonic-gate /* Above assumes the number of significant bits of wchar_t is <= 24. */ 727c478bd9Sstevel@tonic-gate }; 737c478bd9Sstevel@tonic-gate int ncgidtbl = 5; /* # elements in yycgidtbl. */ 747c478bd9Sstevel@tonic-gate int ncg; /* Should set to ncgidtbl*2; this is the largest value yycgid() */ 757c478bd9Sstevel@tonic-gate /* returns plus 1. */ 767c478bd9Sstevel@tonic-gate 777c478bd9Sstevel@tonic-gate static void setsymbol(int i); 787c478bd9Sstevel@tonic-gate 797c478bd9Sstevel@tonic-gate /* 807c478bd9Sstevel@tonic-gate * For given 16-bit wchar_t (See NOTE), lchar is computed as illustrated below: 817c478bd9Sstevel@tonic-gate * 827c478bd9Sstevel@tonic-gate * wc: axxxxxxbyyyyyyy 837c478bd9Sstevel@tonic-gate * 847c478bd9Sstevel@tonic-gate * returns: 0ab0000000000000axxxxxxxbyyyyyyy 857c478bd9Sstevel@tonic-gate * 867c478bd9Sstevel@tonic-gate * linearize() doesn't do any if compiled with 32-bit wchar_t, use of 877c478bd9Sstevel@tonic-gate * which is flagged with LONG_WCHAR_T macro. 887c478bd9Sstevel@tonic-gate * NOTE: 897c478bd9Sstevel@tonic-gate * The implementation is highly depends on the process code representation. 907c478bd9Sstevel@tonic-gate * This function should be modified when 32-bit process code is used. 917c478bd9Sstevel@tonic-gate * There is no need to keep 'a' and 'b' bits in the lower half of lchar. 927c478bd9Sstevel@tonic-gate * You can actually omit these and squeeze the xxxxxx part one bit right. 937c478bd9Sstevel@tonic-gate * We don't do that here just in sake of speed. 947c478bd9Sstevel@tonic-gate */ 957c478bd9Sstevel@tonic-gate lchar 967c478bd9Sstevel@tonic-gate linearize(wchar_t wc) 977c478bd9Sstevel@tonic-gate { 987c478bd9Sstevel@tonic-gate #ifdef LONG_WCHAR_T 997c478bd9Sstevel@tonic-gate return ((lchar)wc); /* Don't do anything. */ 1007c478bd9Sstevel@tonic-gate #else 1017c478bd9Sstevel@tonic-gate 1027c478bd9Sstevel@tonic-gate lchar prefix; 1037c478bd9Sstevel@tonic-gate switch (wc&0x8080) { 1047c478bd9Sstevel@tonic-gate case 0x0000: prefix = 0x00000000; break; 1057c478bd9Sstevel@tonic-gate case 0x0080: prefix = 0x20000000; break; 1067c478bd9Sstevel@tonic-gate case 0x8000: prefix = 0x40000000; break; 1077c478bd9Sstevel@tonic-gate case 0x8080: prefix = 0x60000000; break; 1087c478bd9Sstevel@tonic-gate } 1097c478bd9Sstevel@tonic-gate return (prefix|wc); 1107c478bd9Sstevel@tonic-gate #endif 1117c478bd9Sstevel@tonic-gate } 1127c478bd9Sstevel@tonic-gate 1137c478bd9Sstevel@tonic-gate /* compare liniear characters pointed to by pc1 and pc2 */ 1147c478bd9Sstevel@tonic-gate int 1157c478bd9Sstevel@tonic-gate cmplc(const void *arg1, const void *arg2) 1167c478bd9Sstevel@tonic-gate { 1177c478bd9Sstevel@tonic-gate lchar *pc1 = (lchar *)arg1; 1187c478bd9Sstevel@tonic-gate lchar *pc2 = (lchar *)arg2; 1197c478bd9Sstevel@tonic-gate 1207c478bd9Sstevel@tonic-gate if (*pc1 > *pc2) 1217c478bd9Sstevel@tonic-gate return (1); 1227c478bd9Sstevel@tonic-gate else if (*pc1 == *pc2) 1237c478bd9Sstevel@tonic-gate return (0); 1247c478bd9Sstevel@tonic-gate else 1257c478bd9Sstevel@tonic-gate return (-1); 1267c478bd9Sstevel@tonic-gate } 1277c478bd9Sstevel@tonic-gate 1287c478bd9Sstevel@tonic-gate void 1297c478bd9Sstevel@tonic-gate remch(wchar_t c) 1307c478bd9Sstevel@tonic-gate { 1317c478bd9Sstevel@tonic-gate lchar lc = linearize(c); 132*1dd08564Sab196087 size_t local_ncgidtbl; 1337c478bd9Sstevel@tonic-gate 1347c478bd9Sstevel@tonic-gate /* 1357c478bd9Sstevel@tonic-gate * User-friendliness consideration: 1367c478bd9Sstevel@tonic-gate * Make sure no EUC chars are used in reg. exp. 1377c478bd9Sstevel@tonic-gate */ 1387c478bd9Sstevel@tonic-gate if (!handleeuc) { 1397c478bd9Sstevel@tonic-gate if (!isascii(c)) 1407c478bd9Sstevel@tonic-gate if (iswprint(c)) 1417c478bd9Sstevel@tonic-gate warning( 1427c478bd9Sstevel@tonic-gate "Non-ASCII character '%wc' in pattern; use -w or -e lex option.", c); 1437c478bd9Sstevel@tonic-gate else warning( 1447c478bd9Sstevel@tonic-gate "Non-ASCII character of value %#x in pattern; use -w or -e lex option.", c); 1457c478bd9Sstevel@tonic-gate /* In any case, we don't need to construct ncgidtbl[]. */ 1467c478bd9Sstevel@tonic-gate return; 1477c478bd9Sstevel@tonic-gate } 1487c478bd9Sstevel@tonic-gate 149*1dd08564Sab196087 /* 150*1dd08564Sab196087 * lsearch wants ncgidtbl to be size_t, but it is int. Hence, 151*1dd08564Sab196087 * the use of local_ncgidtbl to satisfy the calling interface. 152*1dd08564Sab196087 */ 153*1dd08564Sab196087 local_ncgidtbl = ncgidtbl; 154*1dd08564Sab196087 (void) lsearch(&lc, yycgidtbl, 155*1dd08564Sab196087 &local_ncgidtbl, sizeof (lchar), cmplc); 156*1dd08564Sab196087 ncgidtbl = (int)local_ncgidtbl; 1577c478bd9Sstevel@tonic-gate } 1587c478bd9Sstevel@tonic-gate 1597c478bd9Sstevel@tonic-gate void 1607c478bd9Sstevel@tonic-gate sortcgidtbl(void) 1617c478bd9Sstevel@tonic-gate { 1627c478bd9Sstevel@tonic-gate if (!handleeuc) 1637c478bd9Sstevel@tonic-gate return; 1647c478bd9Sstevel@tonic-gate qsort(yycgidtbl, ncgidtbl, sizeof (lchar), cmplc); 1657c478bd9Sstevel@tonic-gate } 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate /* 1687c478bd9Sstevel@tonic-gate * int yycgid(wchar_t c) 1697c478bd9Sstevel@tonic-gate * Takes c and returns its character group id, determind by the 1707c478bd9Sstevel@tonic-gate * following algorithm. The program also uses the binary search 1717c478bd9Sstevel@tonic-gate * algorithm, generalized from Knuth (6.2.1) Algorithm B. 1727c478bd9Sstevel@tonic-gate * 1737c478bd9Sstevel@tonic-gate * This function computes the "character group id" based on 1747c478bd9Sstevel@tonic-gate * a table yycgidtbl of which each lchar entry is pre-sorted 1757c478bd9Sstevel@tonic-gate * in ascending sequence The number of valid entries is given 1767c478bd9Sstevel@tonic-gate * by YYNCGIDTBL. There is no duplicate entries in yycgidtbl. 1777c478bd9Sstevel@tonic-gate * const int YYNCGIDTBL; 1787c478bd9Sstevel@tonic-gate * lchar yycgidtbl[YYNCGIDTBL]; 1797c478bd9Sstevel@tonic-gate * 1807c478bd9Sstevel@tonic-gate * yycgidtbl[0] is guaranteed to have zero. 1817c478bd9Sstevel@tonic-gate * 1827c478bd9Sstevel@tonic-gate * For given c, yycgid(c) returns: 1837c478bd9Sstevel@tonic-gate * 2*i iff yycgidtbl[i] == lc 1847c478bd9Sstevel@tonic-gate * 2*i+1 iff yycgidtbl[i] < lc < yycgidtbl[i+1] 1857c478bd9Sstevel@tonic-gate * YYNCGIDTBL*2-1 1867c478bd9Sstevel@tonic-gate * iff yycgidtbl[YYNCGIDTBL-1] < lc 1877c478bd9Sstevel@tonic-gate * where lc=linearize(c). 1887c478bd9Sstevel@tonic-gate * 1897c478bd9Sstevel@tonic-gate * Some interesting properties.: 1907c478bd9Sstevel@tonic-gate * 1. For any c, 0 <= yycgid(c) <= 2*YYNCGIDTBL-1 1917c478bd9Sstevel@tonic-gate * 2. yycgid(c) == 0 iff c == 0. 1927c478bd9Sstevel@tonic-gate * 3. For any wchar_t c and d, if linearize(c) < linearize(d) then 1937c478bd9Sstevel@tonic-gate * yycgid(c) <= yycgid(d). 1947c478bd9Sstevel@tonic-gate * 4. For any wchar_t c and d, if yycgid(c) < yycgid(d) then 1957c478bd9Sstevel@tonic-gate * linearize(c) < linearize(d). 1967c478bd9Sstevel@tonic-gate */ 1977c478bd9Sstevel@tonic-gate #define YYNCGIDTBL ncgidtbl 1987c478bd9Sstevel@tonic-gate 1997c478bd9Sstevel@tonic-gate int 2007c478bd9Sstevel@tonic-gate yycgid(wchar_t c) 2017c478bd9Sstevel@tonic-gate { 2027c478bd9Sstevel@tonic-gate int first = 0; 2037c478bd9Sstevel@tonic-gate int last = YYNCGIDTBL - 1; 2047c478bd9Sstevel@tonic-gate lchar lc; 2057c478bd9Sstevel@tonic-gate 2067c478bd9Sstevel@tonic-gate /* 2077c478bd9Sstevel@tonic-gate * In ASCII compat. mode, each character forms a "group" and the 2087c478bd9Sstevel@tonic-gate * group-id is itself... 2097c478bd9Sstevel@tonic-gate */ 2107c478bd9Sstevel@tonic-gate if (!handleeuc) 2117c478bd9Sstevel@tonic-gate return (c); 2127c478bd9Sstevel@tonic-gate 2137c478bd9Sstevel@tonic-gate lc = linearize(c); 2147c478bd9Sstevel@tonic-gate 2157c478bd9Sstevel@tonic-gate /* An exceptional case: yycgidtbl[YYNCGIDTBL-1] < lc */ 2167c478bd9Sstevel@tonic-gate if (yycgidtbl[YYNCGIDTBL - 1] < lc) 2177c478bd9Sstevel@tonic-gate return (YYNCGIDTBL*2 - 1); 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate while (last >= 0) { 2207c478bd9Sstevel@tonic-gate int i = (first+last)/2; 2217c478bd9Sstevel@tonic-gate if (lc == yycgidtbl[i]) 2227c478bd9Sstevel@tonic-gate return (2*i); /* lc exactly matches an element. */ 2237c478bd9Sstevel@tonic-gate else if (yycgidtbl[i] < lc) { 22467298654Sdamico if (lc < yycgidtbl[i+1]) { 22567298654Sdamico /* lc is in between two elements */ 22667298654Sdamico return (2*i+1); 22767298654Sdamico } 2287c478bd9Sstevel@tonic-gate else 2297c478bd9Sstevel@tonic-gate first = i + 1; 2307c478bd9Sstevel@tonic-gate } else 2317c478bd9Sstevel@tonic-gate last = i - 1; 2327c478bd9Sstevel@tonic-gate } 2337c478bd9Sstevel@tonic-gate error( 2347c478bd9Sstevel@tonic-gate "system error in yycgid():binary search failed for c=0x%04x\n", c); 2357c478bd9Sstevel@tonic-gate return (0); 2367c478bd9Sstevel@tonic-gate } 2377c478bd9Sstevel@tonic-gate 2387c478bd9Sstevel@tonic-gate /* 2397c478bd9Sstevel@tonic-gate * repbycgid --- replaces each character in the parsing tree by its 2407c478bd9Sstevel@tonic-gate * character group id. This, however, should be called even in 2417c478bd9Sstevel@tonic-gate * the ASCII compat. mode to process DOT nodes and to call cclinter() 2427c478bd9Sstevel@tonic-gate * for the DOT and CCL nodes. 2437c478bd9Sstevel@tonic-gate */ 2447c478bd9Sstevel@tonic-gate void 2457c478bd9Sstevel@tonic-gate repbycgid(void) 2467c478bd9Sstevel@tonic-gate { 2477c478bd9Sstevel@tonic-gate int i, c; 2487c478bd9Sstevel@tonic-gate 2497c478bd9Sstevel@tonic-gate for (i = 0; i < tptr; ++i) { 2507c478bd9Sstevel@tonic-gate c = name[i]; 2517c478bd9Sstevel@tonic-gate if (!ISOPERATOR(c)) { 2527c478bd9Sstevel@tonic-gate /* If not an operator, it must be a char. */ 2537c478bd9Sstevel@tonic-gate name[i] = yycgid((wchar_t)c); /* So replace it. */ 2547c478bd9Sstevel@tonic-gate #ifdef DEBUG 2557c478bd9Sstevel@tonic-gate if (debug) { 2567c478bd9Sstevel@tonic-gate printf("name[%d]:'%c'->%d;\n", i, c, name[i]); 2577c478bd9Sstevel@tonic-gate } 2587c478bd9Sstevel@tonic-gate #endif 2597c478bd9Sstevel@tonic-gate } else if (c == RSTR) { 2607c478bd9Sstevel@tonic-gate c = right[i]; 2617c478bd9Sstevel@tonic-gate right[i] = yycgid((wchar_t)c); 2627c478bd9Sstevel@tonic-gate #ifdef DEBUG 2637c478bd9Sstevel@tonic-gate if (debug) { 2647c478bd9Sstevel@tonic-gate printf( 26567298654Sdamico "name[%d].right:'%c'->%d;\n", 26667298654Sdamico i, c, right[i]); 2677c478bd9Sstevel@tonic-gate } 2687c478bd9Sstevel@tonic-gate #endif 2697c478bd9Sstevel@tonic-gate } else if ((c == RCCL) || (c == RNCCL)) { 2707c478bd9Sstevel@tonic-gate CHR cc, *s; 2717c478bd9Sstevel@tonic-gate int j; 2727c478bd9Sstevel@tonic-gate CHR ccltoken[CCLSIZE]; 2737c478bd9Sstevel@tonic-gate CHR *ccp; 2747c478bd9Sstevel@tonic-gate int m; 2757c478bd9Sstevel@tonic-gate /* 2767c478bd9Sstevel@tonic-gate * This node represetns a character class RE [ccccc] 2777c478bd9Sstevel@tonic-gate * s points to the string of characters that forms 2787c478bd9Sstevel@tonic-gate * the class and/or a special prefix notation 2797c478bd9Sstevel@tonic-gate * <RANGE>XY which corresponds to the RE X-Y, 2807c478bd9Sstevel@tonic-gate * characters in the range of X and Y. Here, 2817c478bd9Sstevel@tonic-gate * X <= Y is guranteed. 2827c478bd9Sstevel@tonic-gate * We transform these characters into a string 2837c478bd9Sstevel@tonic-gate * of sorted character group ids. 2847c478bd9Sstevel@tonic-gate * 2857c478bd9Sstevel@tonic-gate * There is another mechanism of packing tables 2867c478bd9Sstevel@tonic-gate * that is inherited from the ASCII lex. Call of 2877c478bd9Sstevel@tonic-gate * cclinter() is required for this packing. 2887c478bd9Sstevel@tonic-gate * This used to be done as yylex() reads the lex 2897c478bd9Sstevel@tonic-gate * rules but we have to do this here because the 2907c478bd9Sstevel@tonic-gate * transition table is made to work on the char-group 2917c478bd9Sstevel@tonic-gate * ids and the mapping cannot be determined until 2927c478bd9Sstevel@tonic-gate * the entire file is read. 2937c478bd9Sstevel@tonic-gate */ 2947c478bd9Sstevel@tonic-gate #ifdef DEBUG 2957c478bd9Sstevel@tonic-gate if (debug) { 2967c478bd9Sstevel@tonic-gate printf("name[%d]:R[N]CCL of \"", i); 2977c478bd9Sstevel@tonic-gate strpt(left[i]); 2987c478bd9Sstevel@tonic-gate printf(" -> {"); 2997c478bd9Sstevel@tonic-gate } 3007c478bd9Sstevel@tonic-gate #endif 3017c478bd9Sstevel@tonic-gate /* Prepare symbol[] for cclinter(). */ 3027c478bd9Sstevel@tonic-gate for (j = 0; j < ncg; ++j) 3037c478bd9Sstevel@tonic-gate symbol[j] = FALSE; 3047c478bd9Sstevel@tonic-gate 3057c478bd9Sstevel@tonic-gate s = (CHR *) left[i]; 3067c478bd9Sstevel@tonic-gate while (cc = *s++) { 3077c478bd9Sstevel@tonic-gate if (cc == RANGE) { 3087c478bd9Sstevel@tonic-gate int low, high, i; 3097c478bd9Sstevel@tonic-gate /* 3107c478bd9Sstevel@tonic-gate * Special form: <RANGE>XY 3117c478bd9Sstevel@tonic-gate * This means the range X-Y. 3127c478bd9Sstevel@tonic-gate * We mark all symbols[] 3137c478bd9Sstevel@tonic-gate * elements for yycgid(X) thru 3147c478bd9Sstevel@tonic-gate * yycgid(Y), inclusively. 3157c478bd9Sstevel@tonic-gate */ 3167c478bd9Sstevel@tonic-gate low = yycgid(*s++); 3177c478bd9Sstevel@tonic-gate high = yycgid(*s++); 3187c478bd9Sstevel@tonic-gate for (i = low; i <= high; ++i) 3197c478bd9Sstevel@tonic-gate setsymbol(i); 3207c478bd9Sstevel@tonic-gate } else { 3217c478bd9Sstevel@tonic-gate setsymbol(yycgid(cc)); 3227c478bd9Sstevel@tonic-gate } 3237c478bd9Sstevel@tonic-gate } 3247c478bd9Sstevel@tonic-gate 3257c478bd9Sstevel@tonic-gate /* Now make a transformed string of cgids. */ 3267c478bd9Sstevel@tonic-gate s = ccptr; 3277c478bd9Sstevel@tonic-gate m = 0; 3287c478bd9Sstevel@tonic-gate for (j = 0; j < ncg; ++j) 3297c478bd9Sstevel@tonic-gate if (symbol[j]) { 3307c478bd9Sstevel@tonic-gate ccltoken[m++] = (CHR)j; 3317c478bd9Sstevel@tonic-gate #ifdef DEBUG 3327c478bd9Sstevel@tonic-gate if (debug) printf("%d, ", j); 3337c478bd9Sstevel@tonic-gate #endif 3347c478bd9Sstevel@tonic-gate } 3357c478bd9Sstevel@tonic-gate 3367c478bd9Sstevel@tonic-gate #ifdef DEBUG 3377c478bd9Sstevel@tonic-gate if (debug) printf("}\n"); 3387c478bd9Sstevel@tonic-gate #endif 3397c478bd9Sstevel@tonic-gate ccltoken[m] = 0; 3407c478bd9Sstevel@tonic-gate ccp = ccl; 3417c478bd9Sstevel@tonic-gate while (ccp < ccptr && scomp(ccltoken, ccp) != 0) 3427c478bd9Sstevel@tonic-gate ccp++; 3437c478bd9Sstevel@tonic-gate if (ccp < ccptr) { /* character class found in ccl */ 3447c478bd9Sstevel@tonic-gate left[i] = (int)ccp; 3457c478bd9Sstevel@tonic-gate } else { /* not in ccl, add it */ 3467c478bd9Sstevel@tonic-gate left[i] = (int)ccptr; 3477c478bd9Sstevel@tonic-gate scopy(ccltoken, ccptr); 3487c478bd9Sstevel@tonic-gate ccptr += slength(ccltoken) + 1; 3497c478bd9Sstevel@tonic-gate if (ccptr > ccl + CCLSIZE) 35067298654Sdamico error( 35167298654Sdamico "Too many large character classes"); 3527c478bd9Sstevel@tonic-gate } 3537c478bd9Sstevel@tonic-gate cclinter(c == RCCL); 3547c478bd9Sstevel@tonic-gate } else if (c == DOT) { 3557c478bd9Sstevel@tonic-gate if (psave == 0) { /* First DOT node. */ 3567c478bd9Sstevel@tonic-gate int j, nlid; 3577c478bd9Sstevel@tonic-gate /* 3587c478bd9Sstevel@tonic-gate * Make symbol[k]=TRUE for all k 3597c478bd9Sstevel@tonic-gate * except k == yycgid('\n'). 3607c478bd9Sstevel@tonic-gate */ 3617c478bd9Sstevel@tonic-gate nlid = yycgid('\n'); 3627c478bd9Sstevel@tonic-gate psave = ccptr; 3637c478bd9Sstevel@tonic-gate for (j = 1; j < ncg; ++j) { 3647c478bd9Sstevel@tonic-gate if (j == nlid) { 3657c478bd9Sstevel@tonic-gate symbol[j] = FALSE; 3667c478bd9Sstevel@tonic-gate } else { 3677c478bd9Sstevel@tonic-gate symbol[j] = TRUE; 3687c478bd9Sstevel@tonic-gate *ccptr++ = (CHR) j; 3697c478bd9Sstevel@tonic-gate } 3707c478bd9Sstevel@tonic-gate } 3717c478bd9Sstevel@tonic-gate *ccptr++ = 0; 3727c478bd9Sstevel@tonic-gate if (ccptr > ccl + CCLSIZE) 37367298654Sdamico error( 37467298654Sdamico "Too many large character classes"); 3757c478bd9Sstevel@tonic-gate } 3767c478bd9Sstevel@tonic-gate /* Mimic mn1(RCCL,psave)... */ 3777c478bd9Sstevel@tonic-gate name[i] = RCCL; 3787c478bd9Sstevel@tonic-gate left[i] = (int)psave; 3797c478bd9Sstevel@tonic-gate cclinter(1); 3807c478bd9Sstevel@tonic-gate } 3817c478bd9Sstevel@tonic-gate } 3827c478bd9Sstevel@tonic-gate #ifdef DEBUG 3837c478bd9Sstevel@tonic-gate if (debug) { 3847c478bd9Sstevel@tonic-gate printf("treedump after repbycgid().\n"); 3857c478bd9Sstevel@tonic-gate treedump(); 3867c478bd9Sstevel@tonic-gate } 3877c478bd9Sstevel@tonic-gate #endif 3887c478bd9Sstevel@tonic-gate } 3897c478bd9Sstevel@tonic-gate 3907c478bd9Sstevel@tonic-gate static void 3917c478bd9Sstevel@tonic-gate setsymbol(int i) 3927c478bd9Sstevel@tonic-gate { 3937c478bd9Sstevel@tonic-gate if (i > sizeof (symbol)) 3947c478bd9Sstevel@tonic-gate error("setsymbol: (SYSERR) %d out of range", i); 3957c478bd9Sstevel@tonic-gate symbol[i] = TRUE; 3967c478bd9Sstevel@tonic-gate } 397