1 /* ccl - routines for character classes */ 2 3 /* Copyright (c) 1990 The Regents of the University of California. */ 4 /* All rights reserved. */ 5 6 /* This code is derived from software contributed to Berkeley by */ 7 /* Vern Paxson. */ 8 9 /* The United States Government has rights in this work pursuant */ 10 /* to contract no. DE-AC03-76SF00098 between the United States */ 11 /* Department of Energy and the University of California. */ 12 13 /* This file is part of flex. */ 14 15 /* Redistribution and use in source and binary forms, with or without */ 16 /* modification, are permitted provided that the following conditions */ 17 /* are met: */ 18 19 /* 1. Redistributions of source code must retain the above copyright */ 20 /* notice, this list of conditions and the following disclaimer. */ 21 /* 2. Redistributions in binary form must reproduce the above copyright */ 22 /* notice, this list of conditions and the following disclaimer in the */ 23 /* documentation and/or other materials provided with the distribution. */ 24 25 /* Neither the name of the University nor the names of its contributors */ 26 /* may be used to endorse or promote products derived from this software */ 27 /* without specific prior written permission. */ 28 29 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 30 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 31 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 32 /* PURPOSE. */ 33 34 #include "flexdef.h" 35 36 /* return true if the chr is in the ccl. Takes negation into account. */ 37 static bool 38 ccl_contains (const int cclp, const int ch) 39 { 40 int ind, len, i; 41 42 len = ccllen[cclp]; 43 ind = cclmap[cclp]; 44 45 for (i = 0; i < len; ++i) 46 if (ccltbl[ind + i] == ch) 47 return !cclng[cclp]; 48 49 return cclng[cclp]; 50 } 51 52 53 /* ccladd - add a single character to a ccl */ 54 55 void ccladd (int cclp, int ch) 56 { 57 int ind, len, newpos, i; 58 59 check_char (ch); 60 61 len = ccllen[cclp]; 62 ind = cclmap[cclp]; 63 64 /* check to see if the character is already in the ccl */ 65 66 for (i = 0; i < len; ++i) 67 if (ccltbl[ind + i] == ch) 68 return; 69 70 /* mark newlines */ 71 if (ch == nlch) 72 ccl_has_nl[cclp] = true; 73 74 newpos = ind + len; 75 76 if (newpos >= current_max_ccl_tbl_size) { 77 current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; 78 79 ++num_reallocs; 80 81 ccltbl = reallocate_Character_array (ccltbl, 82 current_max_ccl_tbl_size); 83 } 84 85 ccllen[cclp] = len + 1; 86 ccltbl[newpos] = (unsigned char) ch; 87 } 88 89 /* dump_cclp - same thing as list_character_set, but for cclps. */ 90 91 static void dump_cclp (FILE* file, int cclp) 92 { 93 int i; 94 95 putc ('[', file); 96 97 for (i = 0; i < csize; ++i) { 98 if (ccl_contains(cclp, i)){ 99 int start_char = i; 100 101 putc (' ', file); 102 103 fputs (readable_form (i), file); 104 105 while (++i < csize && ccl_contains(cclp,i)) ; 106 107 if (i - 1 > start_char) 108 /* this was a run */ 109 fprintf (file, "-%s", 110 readable_form (i - 1)); 111 112 putc (' ', file); 113 } 114 } 115 116 putc (']', file); 117 } 118 119 120 121 /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */ 122 int 123 ccl_set_diff (int a, int b) 124 { 125 int d, ch; 126 127 /* create new class */ 128 d = cclinit(); 129 130 /* In order to handle negation, we spin through all possible chars, 131 * addding each char in a that is not in b. 132 * (This could be O(n^2), but n is small and bounded.) 133 */ 134 for ( ch = 0; ch < csize; ++ch ) 135 if (ccl_contains (a, ch) && !ccl_contains(b, ch)) 136 ccladd (d, ch); 137 138 /* debug */ 139 if (0){ 140 fprintf(stderr, "ccl_set_diff ("); 141 fprintf(stderr, "\n "); 142 dump_cclp (stderr, a); 143 fprintf(stderr, "\n "); 144 dump_cclp (stderr, b); 145 fprintf(stderr, "\n "); 146 dump_cclp (stderr, d); 147 fprintf(stderr, "\n)\n"); 148 } 149 return d; 150 } 151 152 /* ccl_set_union - create a new ccl as the set union of the two given ccls. */ 153 int 154 ccl_set_union (int a, int b) 155 { 156 int d, i; 157 158 /* create new class */ 159 d = cclinit(); 160 161 /* Add all of a */ 162 for (i = 0; i < ccllen[a]; ++i) 163 ccladd (d, ccltbl[cclmap[a] + i]); 164 165 /* Add all of b */ 166 for (i = 0; i < ccllen[b]; ++i) 167 ccladd (d, ccltbl[cclmap[b] + i]); 168 169 /* debug */ 170 if (0){ 171 fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d); 172 fprintf(stderr, "\n "); 173 dump_cclp (stderr, a); 174 fprintf(stderr, "\n "); 175 dump_cclp (stderr, b); 176 fprintf(stderr, "\n "); 177 dump_cclp (stderr, d); 178 fprintf(stderr, "\n)\n"); 179 } 180 return d; 181 } 182 183 184 /* cclinit - return an empty ccl */ 185 186 int cclinit (void) 187 { 188 if (++lastccl >= current_maxccls) { 189 current_maxccls += MAX_CCLS_INCREMENT; 190 191 ++num_reallocs; 192 193 cclmap = 194 reallocate_integer_array (cclmap, current_maxccls); 195 ccllen = 196 reallocate_integer_array (ccllen, current_maxccls); 197 cclng = reallocate_integer_array (cclng, current_maxccls); 198 ccl_has_nl = 199 reallocate_bool_array (ccl_has_nl, 200 current_maxccls); 201 } 202 203 if (lastccl == 1) 204 /* we're making the first ccl */ 205 cclmap[lastccl] = 0; 206 207 else 208 /* The new pointer is just past the end of the last ccl. 209 * Since the cclmap points to the \first/ character of a 210 * ccl, adding the length of the ccl to the cclmap pointer 211 * will produce a cursor to the first free space. 212 */ 213 cclmap[lastccl] = 214 cclmap[lastccl - 1] + ccllen[lastccl - 1]; 215 216 ccllen[lastccl] = 0; 217 cclng[lastccl] = 0; /* ccl's start out life un-negated */ 218 ccl_has_nl[lastccl] = false; 219 220 return lastccl; 221 } 222 223 224 /* cclnegate - negate the given ccl */ 225 226 void cclnegate (int cclp) 227 { 228 cclng[cclp] = 1; 229 ccl_has_nl[cclp] = !ccl_has_nl[cclp]; 230 } 231 232 233 /* list_character_set - list the members of a set of characters in CCL form 234 * 235 * Writes to the given file a character-class representation of those 236 * characters present in the given CCL. A character is present if it 237 * has a non-zero value in the cset array. 238 */ 239 240 void list_character_set (FILE *file, int cset[]) 241 { 242 int i; 243 244 putc ('[', file); 245 246 for (i = 0; i < csize; ++i) { 247 if (cset[i]) { 248 int start_char = i; 249 250 putc (' ', file); 251 252 fputs (readable_form (i), file); 253 254 while (++i < csize && cset[i]) ; 255 256 if (i - 1 > start_char) 257 /* this was a run */ 258 fprintf (file, "-%s", 259 readable_form (i - 1)); 260 261 putc (' ', file); 262 } 263 } 264 265 putc (']', file); 266 } 267 268 /** Determines if the range [c1-c2] is unambiguous in a case-insensitive 269 * scanner. Specifically, if a lowercase or uppercase character, x, is in the 270 * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also 271 * be in the range. If not, then this range is ambiguous, and the function 272 * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that 273 * [a-z] will be labeled ambiguous because it does not include [A-Z]. 274 * 275 * @param c1 the lower end of the range 276 * @param c2 the upper end of the range 277 * @return true if [c1-c2] is not ambiguous for a caseless scanner. 278 */ 279 bool range_covers_case (int c1, int c2) 280 { 281 int i, o; 282 283 for (i = c1; i <= c2; i++) { 284 if (has_case (i)) { 285 o = reverse_case (i); 286 if (o < c1 || c2 < o) 287 return false; 288 } 289 } 290 return true; 291 } 292 293 /** Reverse the case of a character, if possible. 294 * @return c if case-reversal does not apply. 295 */ 296 int reverse_case (int c) 297 { 298 return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c); 299 } 300 301 /** Return true if c is uppercase or lowercase. */ 302 bool has_case (int c) 303 { 304 return (isupper (c) || islower (c)) ? true : false; 305 } 306