1*7e382390SJung-uk Kim /* ccl - routines for character classes */
2*7e382390SJung-uk Kim
3*7e382390SJung-uk Kim /* Copyright (c) 1990 The Regents of the University of California. */
4*7e382390SJung-uk Kim /* All rights reserved. */
5*7e382390SJung-uk Kim
6*7e382390SJung-uk Kim /* This code is derived from software contributed to Berkeley by */
7*7e382390SJung-uk Kim /* Vern Paxson. */
8*7e382390SJung-uk Kim
9*7e382390SJung-uk Kim /* The United States Government has rights in this work pursuant */
10*7e382390SJung-uk Kim /* to contract no. DE-AC03-76SF00098 between the United States */
11*7e382390SJung-uk Kim /* Department of Energy and the University of California. */
12*7e382390SJung-uk Kim
13*7e382390SJung-uk Kim /* This file is part of flex. */
14*7e382390SJung-uk Kim
15*7e382390SJung-uk Kim /* Redistribution and use in source and binary forms, with or without */
16*7e382390SJung-uk Kim /* modification, are permitted provided that the following conditions */
17*7e382390SJung-uk Kim /* are met: */
18*7e382390SJung-uk Kim
19*7e382390SJung-uk Kim /* 1. Redistributions of source code must retain the above copyright */
20*7e382390SJung-uk Kim /* notice, this list of conditions and the following disclaimer. */
21*7e382390SJung-uk Kim /* 2. Redistributions in binary form must reproduce the above copyright */
22*7e382390SJung-uk Kim /* notice, this list of conditions and the following disclaimer in the */
23*7e382390SJung-uk Kim /* documentation and/or other materials provided with the distribution. */
24*7e382390SJung-uk Kim
25*7e382390SJung-uk Kim /* Neither the name of the University nor the names of its contributors */
26*7e382390SJung-uk Kim /* may be used to endorse or promote products derived from this software */
27*7e382390SJung-uk Kim /* without specific prior written permission. */
28*7e382390SJung-uk Kim
29*7e382390SJung-uk Kim /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30*7e382390SJung-uk Kim /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31*7e382390SJung-uk Kim /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32*7e382390SJung-uk Kim /* PURPOSE. */
33*7e382390SJung-uk Kim
34*7e382390SJung-uk Kim #include "flexdef.h"
35*7e382390SJung-uk Kim
36*7e382390SJung-uk Kim /* return true if the chr is in the ccl. Takes negation into account. */
37*7e382390SJung-uk Kim static bool
ccl_contains(const int cclp,const int ch)38*7e382390SJung-uk Kim ccl_contains (const int cclp, const int ch)
39*7e382390SJung-uk Kim {
40*7e382390SJung-uk Kim int ind, len, i;
41*7e382390SJung-uk Kim
42*7e382390SJung-uk Kim len = ccllen[cclp];
43*7e382390SJung-uk Kim ind = cclmap[cclp];
44*7e382390SJung-uk Kim
45*7e382390SJung-uk Kim for (i = 0; i < len; ++i)
46*7e382390SJung-uk Kim if (ccltbl[ind + i] == ch)
47*7e382390SJung-uk Kim return !cclng[cclp];
48*7e382390SJung-uk Kim
49*7e382390SJung-uk Kim return cclng[cclp];
50*7e382390SJung-uk Kim }
51*7e382390SJung-uk Kim
52*7e382390SJung-uk Kim
53*7e382390SJung-uk Kim /* ccladd - add a single character to a ccl */
54*7e382390SJung-uk Kim
ccladd(int cclp,int ch)55*7e382390SJung-uk Kim void ccladd (int cclp, int ch)
56*7e382390SJung-uk Kim {
57*7e382390SJung-uk Kim int ind, len, newpos, i;
58*7e382390SJung-uk Kim
59*7e382390SJung-uk Kim check_char (ch);
60*7e382390SJung-uk Kim
61*7e382390SJung-uk Kim len = ccllen[cclp];
62*7e382390SJung-uk Kim ind = cclmap[cclp];
63*7e382390SJung-uk Kim
64*7e382390SJung-uk Kim /* check to see if the character is already in the ccl */
65*7e382390SJung-uk Kim
66*7e382390SJung-uk Kim for (i = 0; i < len; ++i)
67*7e382390SJung-uk Kim if (ccltbl[ind + i] == ch)
68*7e382390SJung-uk Kim return;
69*7e382390SJung-uk Kim
70*7e382390SJung-uk Kim /* mark newlines */
71*7e382390SJung-uk Kim if (ch == nlch)
72*7e382390SJung-uk Kim ccl_has_nl[cclp] = true;
73*7e382390SJung-uk Kim
74*7e382390SJung-uk Kim newpos = ind + len;
75*7e382390SJung-uk Kim
76*7e382390SJung-uk Kim if (newpos >= current_max_ccl_tbl_size) {
77*7e382390SJung-uk Kim current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
78*7e382390SJung-uk Kim
79*7e382390SJung-uk Kim ++num_reallocs;
80*7e382390SJung-uk Kim
81*7e382390SJung-uk Kim ccltbl = reallocate_Character_array (ccltbl,
82*7e382390SJung-uk Kim current_max_ccl_tbl_size);
83*7e382390SJung-uk Kim }
84*7e382390SJung-uk Kim
85*7e382390SJung-uk Kim ccllen[cclp] = len + 1;
86*7e382390SJung-uk Kim ccltbl[newpos] = (unsigned char) ch;
87*7e382390SJung-uk Kim }
88*7e382390SJung-uk Kim
89*7e382390SJung-uk Kim /* dump_cclp - same thing as list_character_set, but for cclps. */
90*7e382390SJung-uk Kim
dump_cclp(FILE * file,int cclp)91*7e382390SJung-uk Kim static void dump_cclp (FILE* file, int cclp)
92*7e382390SJung-uk Kim {
93*7e382390SJung-uk Kim int i;
94*7e382390SJung-uk Kim
95*7e382390SJung-uk Kim putc ('[', file);
96*7e382390SJung-uk Kim
97*7e382390SJung-uk Kim for (i = 0; i < csize; ++i) {
98*7e382390SJung-uk Kim if (ccl_contains(cclp, i)){
99*7e382390SJung-uk Kim int start_char = i;
100*7e382390SJung-uk Kim
101*7e382390SJung-uk Kim putc (' ', file);
102*7e382390SJung-uk Kim
103*7e382390SJung-uk Kim fputs (readable_form (i), file);
104*7e382390SJung-uk Kim
105*7e382390SJung-uk Kim while (++i < csize && ccl_contains(cclp,i)) ;
106*7e382390SJung-uk Kim
107*7e382390SJung-uk Kim if (i - 1 > start_char)
108*7e382390SJung-uk Kim /* this was a run */
109*7e382390SJung-uk Kim fprintf (file, "-%s",
110*7e382390SJung-uk Kim readable_form (i - 1));
111*7e382390SJung-uk Kim
112*7e382390SJung-uk Kim putc (' ', file);
113*7e382390SJung-uk Kim }
114*7e382390SJung-uk Kim }
115*7e382390SJung-uk Kim
116*7e382390SJung-uk Kim putc (']', file);
117*7e382390SJung-uk Kim }
118*7e382390SJung-uk Kim
119*7e382390SJung-uk Kim
120*7e382390SJung-uk Kim
121*7e382390SJung-uk Kim /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
122*7e382390SJung-uk Kim int
ccl_set_diff(int a,int b)123*7e382390SJung-uk Kim ccl_set_diff (int a, int b)
124*7e382390SJung-uk Kim {
125*7e382390SJung-uk Kim int d, ch;
126*7e382390SJung-uk Kim
127*7e382390SJung-uk Kim /* create new class */
128*7e382390SJung-uk Kim d = cclinit();
129*7e382390SJung-uk Kim
130*7e382390SJung-uk Kim /* In order to handle negation, we spin through all possible chars,
131*7e382390SJung-uk Kim * addding each char in a that is not in b.
132*7e382390SJung-uk Kim * (This could be O(n^2), but n is small and bounded.)
133*7e382390SJung-uk Kim */
134*7e382390SJung-uk Kim for ( ch = 0; ch < csize; ++ch )
135*7e382390SJung-uk Kim if (ccl_contains (a, ch) && !ccl_contains(b, ch))
136*7e382390SJung-uk Kim ccladd (d, ch);
137*7e382390SJung-uk Kim
138*7e382390SJung-uk Kim /* debug */
139*7e382390SJung-uk Kim if (0){
140*7e382390SJung-uk Kim fprintf(stderr, "ccl_set_diff (");
141*7e382390SJung-uk Kim fprintf(stderr, "\n ");
142*7e382390SJung-uk Kim dump_cclp (stderr, a);
143*7e382390SJung-uk Kim fprintf(stderr, "\n ");
144*7e382390SJung-uk Kim dump_cclp (stderr, b);
145*7e382390SJung-uk Kim fprintf(stderr, "\n ");
146*7e382390SJung-uk Kim dump_cclp (stderr, d);
147*7e382390SJung-uk Kim fprintf(stderr, "\n)\n");
148*7e382390SJung-uk Kim }
149*7e382390SJung-uk Kim return d;
150*7e382390SJung-uk Kim }
151*7e382390SJung-uk Kim
152*7e382390SJung-uk Kim /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
153*7e382390SJung-uk Kim int
ccl_set_union(int a,int b)154*7e382390SJung-uk Kim ccl_set_union (int a, int b)
155*7e382390SJung-uk Kim {
156*7e382390SJung-uk Kim int d, i;
157*7e382390SJung-uk Kim
158*7e382390SJung-uk Kim /* create new class */
159*7e382390SJung-uk Kim d = cclinit();
160*7e382390SJung-uk Kim
161*7e382390SJung-uk Kim /* Add all of a */
162*7e382390SJung-uk Kim for (i = 0; i < ccllen[a]; ++i)
163*7e382390SJung-uk Kim ccladd (d, ccltbl[cclmap[a] + i]);
164*7e382390SJung-uk Kim
165*7e382390SJung-uk Kim /* Add all of b */
166*7e382390SJung-uk Kim for (i = 0; i < ccllen[b]; ++i)
167*7e382390SJung-uk Kim ccladd (d, ccltbl[cclmap[b] + i]);
168*7e382390SJung-uk Kim
169*7e382390SJung-uk Kim /* debug */
170*7e382390SJung-uk Kim if (0){
171*7e382390SJung-uk Kim fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
172*7e382390SJung-uk Kim fprintf(stderr, "\n ");
173*7e382390SJung-uk Kim dump_cclp (stderr, a);
174*7e382390SJung-uk Kim fprintf(stderr, "\n ");
175*7e382390SJung-uk Kim dump_cclp (stderr, b);
176*7e382390SJung-uk Kim fprintf(stderr, "\n ");
177*7e382390SJung-uk Kim dump_cclp (stderr, d);
178*7e382390SJung-uk Kim fprintf(stderr, "\n)\n");
179*7e382390SJung-uk Kim }
180*7e382390SJung-uk Kim return d;
181*7e382390SJung-uk Kim }
182*7e382390SJung-uk Kim
183*7e382390SJung-uk Kim
184*7e382390SJung-uk Kim /* cclinit - return an empty ccl */
185*7e382390SJung-uk Kim
cclinit(void)186*7e382390SJung-uk Kim int cclinit (void)
187*7e382390SJung-uk Kim {
188*7e382390SJung-uk Kim if (++lastccl >= current_maxccls) {
189*7e382390SJung-uk Kim current_maxccls += MAX_CCLS_INCREMENT;
190*7e382390SJung-uk Kim
191*7e382390SJung-uk Kim ++num_reallocs;
192*7e382390SJung-uk Kim
193*7e382390SJung-uk Kim cclmap =
194*7e382390SJung-uk Kim reallocate_integer_array (cclmap, current_maxccls);
195*7e382390SJung-uk Kim ccllen =
196*7e382390SJung-uk Kim reallocate_integer_array (ccllen, current_maxccls);
197*7e382390SJung-uk Kim cclng = reallocate_integer_array (cclng, current_maxccls);
198*7e382390SJung-uk Kim ccl_has_nl =
199*7e382390SJung-uk Kim reallocate_bool_array (ccl_has_nl,
200*7e382390SJung-uk Kim current_maxccls);
201*7e382390SJung-uk Kim }
202*7e382390SJung-uk Kim
203*7e382390SJung-uk Kim if (lastccl == 1)
204*7e382390SJung-uk Kim /* we're making the first ccl */
205*7e382390SJung-uk Kim cclmap[lastccl] = 0;
206*7e382390SJung-uk Kim
207*7e382390SJung-uk Kim else
208*7e382390SJung-uk Kim /* The new pointer is just past the end of the last ccl.
209*7e382390SJung-uk Kim * Since the cclmap points to the \first/ character of a
210*7e382390SJung-uk Kim * ccl, adding the length of the ccl to the cclmap pointer
211*7e382390SJung-uk Kim * will produce a cursor to the first free space.
212*7e382390SJung-uk Kim */
213*7e382390SJung-uk Kim cclmap[lastccl] =
214*7e382390SJung-uk Kim cclmap[lastccl - 1] + ccllen[lastccl - 1];
215*7e382390SJung-uk Kim
216*7e382390SJung-uk Kim ccllen[lastccl] = 0;
217*7e382390SJung-uk Kim cclng[lastccl] = 0; /* ccl's start out life un-negated */
218*7e382390SJung-uk Kim ccl_has_nl[lastccl] = false;
219*7e382390SJung-uk Kim
220*7e382390SJung-uk Kim return lastccl;
221*7e382390SJung-uk Kim }
222*7e382390SJung-uk Kim
223*7e382390SJung-uk Kim
224*7e382390SJung-uk Kim /* cclnegate - negate the given ccl */
225*7e382390SJung-uk Kim
cclnegate(int cclp)226*7e382390SJung-uk Kim void cclnegate (int cclp)
227*7e382390SJung-uk Kim {
228*7e382390SJung-uk Kim cclng[cclp] = 1;
229*7e382390SJung-uk Kim ccl_has_nl[cclp] = !ccl_has_nl[cclp];
230*7e382390SJung-uk Kim }
231*7e382390SJung-uk Kim
232*7e382390SJung-uk Kim
233*7e382390SJung-uk Kim /* list_character_set - list the members of a set of characters in CCL form
234*7e382390SJung-uk Kim *
235*7e382390SJung-uk Kim * Writes to the given file a character-class representation of those
236*7e382390SJung-uk Kim * characters present in the given CCL. A character is present if it
237*7e382390SJung-uk Kim * has a non-zero value in the cset array.
238*7e382390SJung-uk Kim */
239*7e382390SJung-uk Kim
list_character_set(FILE * file,int cset[])240*7e382390SJung-uk Kim void list_character_set (FILE *file, int cset[])
241*7e382390SJung-uk Kim {
242*7e382390SJung-uk Kim int i;
243*7e382390SJung-uk Kim
244*7e382390SJung-uk Kim putc ('[', file);
245*7e382390SJung-uk Kim
246*7e382390SJung-uk Kim for (i = 0; i < csize; ++i) {
247*7e382390SJung-uk Kim if (cset[i]) {
248*7e382390SJung-uk Kim int start_char = i;
249*7e382390SJung-uk Kim
250*7e382390SJung-uk Kim putc (' ', file);
251*7e382390SJung-uk Kim
252*7e382390SJung-uk Kim fputs (readable_form (i), file);
253*7e382390SJung-uk Kim
254*7e382390SJung-uk Kim while (++i < csize && cset[i]) ;
255*7e382390SJung-uk Kim
256*7e382390SJung-uk Kim if (i - 1 > start_char)
257*7e382390SJung-uk Kim /* this was a run */
258*7e382390SJung-uk Kim fprintf (file, "-%s",
259*7e382390SJung-uk Kim readable_form (i - 1));
260*7e382390SJung-uk Kim
261*7e382390SJung-uk Kim putc (' ', file);
262*7e382390SJung-uk Kim }
263*7e382390SJung-uk Kim }
264*7e382390SJung-uk Kim
265*7e382390SJung-uk Kim putc (']', file);
266*7e382390SJung-uk Kim }
267*7e382390SJung-uk Kim
268*7e382390SJung-uk Kim /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
269*7e382390SJung-uk Kim * scanner. Specifically, if a lowercase or uppercase character, x, is in the
270*7e382390SJung-uk Kim * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
271*7e382390SJung-uk Kim * be in the range. If not, then this range is ambiguous, and the function
272*7e382390SJung-uk Kim * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that
273*7e382390SJung-uk Kim * [a-z] will be labeled ambiguous because it does not include [A-Z].
274*7e382390SJung-uk Kim *
275*7e382390SJung-uk Kim * @param c1 the lower end of the range
276*7e382390SJung-uk Kim * @param c2 the upper end of the range
277*7e382390SJung-uk Kim * @return true if [c1-c2] is not ambiguous for a caseless scanner.
278*7e382390SJung-uk Kim */
range_covers_case(int c1,int c2)279*7e382390SJung-uk Kim bool range_covers_case (int c1, int c2)
280*7e382390SJung-uk Kim {
281*7e382390SJung-uk Kim int i, o;
282*7e382390SJung-uk Kim
283*7e382390SJung-uk Kim for (i = c1; i <= c2; i++) {
284*7e382390SJung-uk Kim if (has_case (i)) {
285*7e382390SJung-uk Kim o = reverse_case (i);
286*7e382390SJung-uk Kim if (o < c1 || c2 < o)
287*7e382390SJung-uk Kim return false;
288*7e382390SJung-uk Kim }
289*7e382390SJung-uk Kim }
290*7e382390SJung-uk Kim return true;
291*7e382390SJung-uk Kim }
292*7e382390SJung-uk Kim
293*7e382390SJung-uk Kim /** Reverse the case of a character, if possible.
294*7e382390SJung-uk Kim * @return c if case-reversal does not apply.
295*7e382390SJung-uk Kim */
reverse_case(int c)296*7e382390SJung-uk Kim int reverse_case (int c)
297*7e382390SJung-uk Kim {
298*7e382390SJung-uk Kim return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
299*7e382390SJung-uk Kim }
300*7e382390SJung-uk Kim
301*7e382390SJung-uk Kim /** Return true if c is uppercase or lowercase. */
has_case(int c)302*7e382390SJung-uk Kim bool has_case (int c)
303*7e382390SJung-uk Kim {
304*7e382390SJung-uk Kim return (isupper (c) || islower (c)) ? true : false;
305*7e382390SJung-uk Kim }
306