Lines Matching +full:cs +full:- +full:number

1 /*-
53 /* character-class table */
68 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
73 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
75 { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
85 /* character-name table */
106 { "vertical-tab", '\v' },
108 { "form-feed", '\f' },
110 { "carriage-return", '\r' },
134 { "exclamation-mark", '!' },
135 { "quotation-mark", '"' },
136 { "number-sign", '#' },
137 { "dollar-sign", '$' },
138 { "percent-sign", '%' },
141 { "left-parenthesis", '(' },
142 { "right-parenthesis", ')' },
144 { "plus-sign", '+' },
146 { "hyphen", '-' },
147 { "hyphen-minus", '-' },
149 { "full-stop", '.' },
164 { "less-than-sign", '<' },
165 { "equals-sign", '=' },
166 { "greater-than-sign", '>' },
167 { "question-mark", '?' },
168 { "commercial-at", '@' },
169 { "left-square-bracket", '[' },
171 { "reverse-solidus", '\\' },
172 { "right-square-bracket", ']' },
174 { "circumflex-accent", '^' },
176 { "low-line", '_' },
177 { "grave-accent", '`' },
178 { "left-brace", '{' },
179 { "left-curly-bracket", '{' },
180 { "vertical-line", '|' },
181 { "right-brace", '}' },
182 { "right-curly-bracket", '}' },
194 const char *end; /* end of string (-> NUL normally) */
199 int ncsalloc; /* number of csets allocated */
201 # define NPAREN 10 /* we need to remember () 1-9 for back refs */
202 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
203 sopno pend[NPAREN]; /* -> ) ([0] unused) */
250 #define PEEK() (*p->next)
251 #define PEEK2() (*(p->next+1))
252 #define MORE() (p->end - p->next > 0)
253 #define MORE2() (p->end - p->next > 1)
258 #define NEXT() (p->next++)
259 #define NEXT2() (p->next += 2)
260 #define NEXTn(n) (p->next += (n))
261 #define GETNEXT() (*p->next++)
268 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
269 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
270 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
271 #define HERE() (p->slen)
272 #define THERE() (p->slen - 1)
273 #define THERETHERE() (p->slen - 2)
274 #define DROP(n) (p->slen -= (n))
290 - llvm_regcomp - interface for parser and compilation
311 if (preg->re_endp < pattern) in llvm_regcomp()
313 len = preg->re_endp - pattern; in llvm_regcomp()
319 (NC-1)*sizeof(cat_t)); in llvm_regcomp()
322 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ in llvm_regcomp()
323 p->strip = (sop *)calloc(p->ssize, sizeof(sop)); in llvm_regcomp()
324 p->slen = 0; in llvm_regcomp()
325 if (p->strip == NULL) { in llvm_regcomp()
331 p->g = g; in llvm_regcomp()
332 p->next = pattern; in llvm_regcomp()
333 p->end = p->next + len; in llvm_regcomp()
334 p->error = 0; in llvm_regcomp()
335 p->ncsalloc = 0; in llvm_regcomp()
337 p->pbegin[i] = 0; in llvm_regcomp()
338 p->pend[i] = 0; in llvm_regcomp()
340 g->csetsize = NC; in llvm_regcomp()
341 g->sets = NULL; in llvm_regcomp()
342 g->setbits = NULL; in llvm_regcomp()
343 g->ncsets = 0; in llvm_regcomp()
344 g->cflags = cflags; in llvm_regcomp()
345 g->iflags = 0; in llvm_regcomp()
346 g->nbol = 0; in llvm_regcomp()
347 g->neol = 0; in llvm_regcomp()
348 g->must = NULL; in llvm_regcomp()
349 g->mlen = 0; in llvm_regcomp()
350 g->nsub = 0; in llvm_regcomp()
351 g->ncategories = 1; /* category 0 is "everything else" */ in llvm_regcomp()
352 g->categories = &g->catspace[-(CHAR_MIN)]; in llvm_regcomp()
353 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); in llvm_regcomp()
354 g->backrefs = 0; in llvm_regcomp()
358 g->firststate = THERE(); in llvm_regcomp()
366 g->laststate = THERE(); in llvm_regcomp()
372 g->nplus = pluscount(p, g); in llvm_regcomp()
373 g->magic = MAGIC2; in llvm_regcomp()
374 preg->re_nsub = g->nsub; in llvm_regcomp()
375 preg->re_g = g; in llvm_regcomp()
376 preg->re_magic = MAGIC1; in llvm_regcomp()
379 if (g->iflags&REGEX_BAD) in llvm_regcomp()
384 if (p->error != 0) /* lose */ in llvm_regcomp()
386 return(p->error); in llvm_regcomp()
390 - p_ere - ERE parser top level, concatenation and alternation
424 if (!first) { /* tail-end fixups */ in p_ere()
433 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
453 p->g->nsub++; in p_ere_exp()
454 subno = p->g->nsub; in p_ere_exp()
456 p->pbegin[subno] = HERE(); in p_ere_exp()
461 p->pend[subno] = HERE(); in p_ere_exp()
462 assert(p->pend[subno] != 0); in p_ere_exp()
472 * other 1003.2 regular-expression reviewers noticed it at in p_ere_exp()
481 p->g->iflags |= USEBOL; in p_ere_exp()
482 p->g->nbol++; in p_ere_exp()
487 p->g->iflags |= USEEOL; in p_ere_exp()
488 p->g->neol++; in p_ere_exp()
499 if (p->g->cflags&REG_NEWLINE) in p_ere_exp()
511 /* \[0-9] is taken to be a back-reference to a previously specified in p_ere_exp()
512 * matching group. backrefnum will hold the number. The matching in p_ere_exp()
516 backrefnum = c - '0'; in p_ere_exp()
517 if (p->pend[backrefnum] == 0) { in p_ere_exp()
523 * that marks a back-reference to the parse structure. in p_ere_exp()
525 assert(backrefnum <= p->g->nsub); in p_ere_exp()
527 assert(p->pbegin[backrefnum] != 0); in p_ere_exp()
528 assert(OP(p->strip[p->pbegin[backrefnum]]) == OLPAREN); in p_ere_exp()
529 assert(OP(p->strip[p->pend[backrefnum]]) == ORPAREN); in p_ere_exp()
530 (void) dupl(p, p->pbegin[backrefnum]+1, p->pend[backrefnum]); in p_ere_exp()
532 p->g->backrefs = 1; in p_ere_exp()
584 } else /* single number with comma */ in p_ere_exp()
586 } else /* just a single number */ in p_ere_exp()
608 - p_str - string (no metacharacters) "parser"
619 - p_bre - BRE parser top level, anchoring and concatenation
639 p->g->iflags |= USEBOL; in p_bre()
640 p->g->nbol++; in p_bre()
649 p->g->iflags |= USEEOL; in p_bre()
650 p->g->neol++; in p_bre()
657 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
681 if (p->g->cflags&REG_NEWLINE) in p_simp_re()
693 p->g->nsub++; in p_simp_re()
694 subno = p->g->nsub; in p_simp_re()
696 p->pbegin[subno] = HERE(); in p_simp_re()
702 p->pend[subno] = HERE(); in p_simp_re()
703 assert(p->pend[subno] != 0); in p_simp_re()
708 case BACKSL|')': /* should not get here -- must be user */ in p_simp_re()
721 i = (c&~BACKSL) - '0'; in p_simp_re()
723 if (p->pend[i] != 0) { in p_simp_re()
724 assert(i <= p->g->nsub); in p_simp_re()
726 assert(p->pbegin[i] != 0); in p_simp_re()
727 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); in p_simp_re()
728 assert(OP(p->strip[p->pend[i]]) == ORPAREN); in p_simp_re()
729 (void) dupl(p, p->pbegin[i]+1, p->pend[i]); in p_simp_re()
733 p->g->backrefs = 1; in p_simp_re()
755 } else /* single number with comma */ in p_simp_re()
757 } else /* just a single number */ in p_simp_re()
773 - p_count - parse a repetition count
782 count = count*10 + (GETNEXT() - '0'); in p_count()
791 - p_bracket - parse a bracketed character list
799 cset *cs; in p_bracket() local
802 /* Dept of Truly Sickening Special-Case Kludges */ in p_bracket()
803 if (p->end - p->next > 5) { in p_bracket()
804 if (strncmp(p->next, "[:<:]]", 6) == 0) { in p_bracket()
809 if (strncmp(p->next, "[:>:]]", 6) == 0) { in p_bracket()
816 if ((cs = allocset(p)) == NULL) { in p_bracket()
824 CHadd(cs, ']'); in p_bracket()
825 else if (EAT('-')) in p_bracket()
826 CHadd(cs, '-'); in p_bracket()
827 while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) in p_bracket()
828 p_b_term(p, cs); in p_bracket()
829 if (EAT('-')) in p_bracket()
830 CHadd(cs, '-'); in p_bracket()
833 if (p->error != 0) { /* don't mess things up further */ in p_bracket()
834 freeset(p, cs); in p_bracket()
838 if (p->g->cflags&REG_ICASE) { in p_bracket()
842 for (i = p->g->csetsize - 1; i >= 0; i--) in p_bracket()
843 if (CHIN(cs, i) && isalpha(i)) { in p_bracket()
846 CHadd(cs, ci); in p_bracket()
848 if (cs->multis != NULL) in p_bracket()
849 mccase(p, cs); in p_bracket()
854 for (i = p->g->csetsize - 1; i >= 0; i--) in p_bracket()
855 if (CHIN(cs, i)) in p_bracket()
856 CHsub(cs, i); in p_bracket()
858 CHadd(cs, i); in p_bracket()
859 if (p->g->cflags&REG_NEWLINE) in p_bracket()
860 CHsub(cs, '\n'); in p_bracket()
861 if (cs->multis != NULL) in p_bracket()
862 mcinvert(p, cs); in p_bracket()
865 assert(cs->multis == NULL); /* xxx */ in p_bracket()
867 if (nch(p, cs) == 1) { /* optimize singleton sets */ in p_bracket()
868 ordinary(p, firstch(p, cs)); in p_bracket()
869 freeset(p, cs); in p_bracket()
871 EMIT(OANYOF, freezeset(p, cs)); in p_bracket()
875 - p_b_term - parse one term of a bracketed character list
878 p_b_term(struct parse *p, cset *cs) in p_b_term() argument
889 case '-': in p_b_term()
903 REQUIRE(c != '-' && c != ']', REG_ECTYPE); in p_b_term()
904 p_b_cclass(p, cs); in p_b_term()
912 REQUIRE(c != '-' && c != ']', REG_ECOLLATE); in p_b_term()
913 p_b_eclass(p, cs); in p_b_term()
920 if (SEE('-') && MORE2() && PEEK2() != ']') { in p_b_term()
923 if (EAT('-')) in p_b_term()
924 finish = '-'; in p_b_term()
932 CHadd(cs, i); in p_b_term()
938 - p_b_cclass - parse a character-class name and deal with it
941 p_b_cclass(struct parse *p, cset *cs) in p_b_cclass() argument
943 const char *sp = p->next; in p_b_cclass()
951 len = p->next - sp; in p_b_cclass()
952 for (cp = cclasses; cp->name != NULL; cp++) in p_b_cclass()
953 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') in p_b_cclass()
955 if (cp->name == NULL) { in p_b_cclass()
961 u = cp->chars; in p_b_cclass()
963 CHadd(cs, c); in p_b_cclass()
964 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) in p_b_cclass()
965 MCadd(p, cs, u); in p_b_cclass()
969 - p_b_eclass - parse an equivalence-class name and deal with it
974 p_b_eclass(struct parse *p, cset *cs) in p_b_eclass() argument
979 CHadd(cs, c); in p_b_eclass()
983 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
1001 - p_b_coll_elem - parse a collating-element name and look it up
1007 const char *sp = p->next; in p_b_coll_elem()
1017 len = p->next - sp; in p_b_coll_elem()
1018 for (cp = cnames; cp->name != NULL; cp++) in p_b_coll_elem()
1019 if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len) in p_b_coll_elem()
1020 return(cp->code); /* known name */ in p_b_coll_elem()
1028 - othercase - return the case counterpart of an alphabetic
1044 - bothcases - emit a dualcase version of a two-case character
1051 const char *oldnext = p->next; in bothcases()
1052 const char *oldend = p->end; in bothcases()
1057 p->next = bracket; in bothcases()
1058 p->end = bracket+2; in bothcases()
1063 assert(p->next == bracket+2); in bothcases()
1064 p->next = oldnext; in bothcases()
1065 p->end = oldend; in bothcases()
1069 - ordinary - emit an ordinary character
1074 cat_t *cap = p->g->categories; in ordinary()
1076 if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) in ordinary()
1081 cap[ch] = p->g->ncategories++; in ordinary()
1086 - nonnewline - emit REG_NEWLINE version of OANY
1093 const char *oldnext = p->next; in nonnewline()
1094 const char *oldend = p->end; in nonnewline()
1097 p->next = bracket; in nonnewline()
1098 p->end = bracket+3; in nonnewline()
1100 assert(p->next == bracket+3); in nonnewline()
1101 p->next = oldnext; in nonnewline()
1102 p->end = oldend; in nonnewline()
1106 - repeat - generate code for a bounded repetition, recursively if needed
1111 int from, /* repeated from this number */ in repeat()
1112 int to) /* to this number of times (maybe INFINITY) */ in repeat()
1121 if (p->error != 0) /* head off possible runaway recursion */ in repeat()
1128 DROP(finish-start); /* drop the operand */ in repeat()
1145 case REP(1, N): /* as x?x{1,n-1} */ in repeat()
1155 repeat(p, copy, 1, to-1); in repeat()
1161 case REP(N, N): /* as xx{m-1,n-1} */ in repeat()
1163 repeat(p, copy, from-1, to-1); in repeat()
1165 case REP(N, INF): /* as xx{n-1,INF} */ in repeat()
1167 repeat(p, copy, from-1, to); in repeat()
1176 - seterr - set an error condition
1181 if (p->error == 0) /* keep earliest error condition */ in seterr()
1182 p->error = e; in seterr()
1183 p->next = nuls; /* try to bring things to a halt */ in seterr()
1184 p->end = nuls; in seterr()
1185 return(0); /* make the return value well-defined */ in seterr()
1189 - allocset - allocate a set of characters for []
1194 int no = p->g->ncsets++; in allocset()
1197 cset *cs; in allocset() local
1198 size_t css = (size_t)p->g->csetsize; in allocset()
1201 if (no >= p->ncsalloc) { /* need another column of space */ in allocset()
1204 p->ncsalloc += CHAR_BIT; in allocset()
1205 nc = p->ncsalloc; in allocset()
1211 ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset)); in allocset()
1214 p->g->sets = ptr; in allocset()
1216 ptr = (uch *)realloc((char *)p->g->setbits, nbytes); in allocset()
1219 p->g->setbits = ptr; in allocset()
1222 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); in allocset()
1224 (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); in allocset()
1227 if (p->g->sets == NULL || p->g->setbits == NULL) in allocset()
1230 cs = &p->g->sets[no]; in allocset()
1231 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); in allocset()
1232 cs->mask = 1 << ((no) % CHAR_BIT); in allocset()
1233 cs->hash = 0; in allocset()
1234 cs->smultis = 0; in allocset()
1235 cs->multis = NULL; in allocset()
1237 return(cs); in allocset()
1239 free(p->g->sets); in allocset()
1240 p->g->sets = NULL; in allocset()
1241 free(p->g->setbits); in allocset()
1242 p->g->setbits = NULL; in allocset()
1250 - freeset - free a now-unused set
1253 freeset(struct parse *p, cset *cs) in freeset() argument
1256 cset *top = &p->g->sets[p->g->ncsets]; in freeset()
1257 size_t css = (size_t)p->g->csetsize; in freeset()
1260 CHsub(cs, i); in freeset()
1261 if (cs == top-1) /* recover only the easy case */ in freeset()
1262 p->g->ncsets--; in freeset()
1266 - freezeset - final processing on a set of characters
1271 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1274 static int /* set number */
1275 freezeset(struct parse *p, cset *cs) in freezeset() argument
1277 uch h = cs->hash; in freezeset()
1279 cset *top = &p->g->sets[p->g->ncsets]; in freezeset()
1281 size_t css = (size_t)p->g->csetsize; in freezeset()
1284 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) in freezeset()
1285 if (cs2->hash == h && cs2 != cs) { in freezeset()
1288 if (!!CHIN(cs2, i) != !!CHIN(cs, i)) in freezeset()
1295 freeset(p, cs); in freezeset()
1296 cs = cs2; in freezeset()
1299 return((int)(cs - p->g->sets)); in freezeset()
1303 - firstch - return first character in a set (which must have at least one)
1306 firstch(struct parse *p, cset *cs) in firstch() argument
1309 size_t css = (size_t)p->g->csetsize; in firstch()
1312 if (CHIN(cs, i)) in firstch()
1319 - nch - number of characters in a set
1322 nch(struct parse *p, cset *cs) in nch() argument
1325 size_t css = (size_t)p->g->csetsize; in nch()
1329 if (CHIN(cs, i)) in nch()
1335 - mcadd - add a collating element to a cset
1338 mcadd( struct parse *p, cset *cs, const char *cp) in mcadd() argument
1340 size_t oldend = cs->smultis; in mcadd()
1343 cs->smultis += strlen(cp) + 1; in mcadd()
1344 np = realloc(cs->multis, cs->smultis); in mcadd()
1346 if (cs->multis) in mcadd()
1347 free(cs->multis); in mcadd()
1348 cs->multis = NULL; in mcadd()
1352 cs->multis = np; in mcadd()
1354 llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); in mcadd()
1358 - mcinvert - invert the list of collating elements in a cset
1365 mcinvert(struct parse *p, cset *cs) in mcinvert() argument
1367 assert(cs->multis == NULL); /* xxx */ in mcinvert()
1371 - mccase - add case counterparts of the list of collating elements in a cset
1378 mccase(struct parse *p, cset *cs) in mccase() argument
1380 assert(cs->multis == NULL); /* xxx */ in mccase()
1384 - isinsets - is this character in any sets?
1391 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; in isinsets()
1394 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) in isinsets()
1401 - samesets - are these two characters in exactly the same sets?
1408 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; in samesets()
1412 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) in samesets()
1419 - categorize - sort out character categories
1424 cat_t *cats = g->categories; in categorize()
1430 if (p->error != 0) in categorize()
1435 cat = g->ncategories++; in categorize()
1444 - dupl - emit a duplicate of a bunch of sops
1452 sopno len = finish - start; in dupl()
1457 enlarge(p, p->ssize + len); /* this many unexpected additions */ in dupl()
1458 assert(p->ssize >= p->slen + len); in dupl()
1459 (void) memmove((char *)(p->strip + p->slen), in dupl()
1460 (char *)(p->strip + start), (size_t)len*sizeof(sop)); in dupl()
1461 p->slen += len; in dupl()
1466 - doemit - emit a strip operator
1469 * hard-case backup, but it's just too big and messy unless there are
1476 if (p->error != 0) in doemit()
1483 if (p->slen >= p->ssize) in doemit()
1484 enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ in doemit()
1485 assert(p->slen < p->ssize); in doemit()
1488 p->strip[p->slen++] = SOP(op, opnd); in doemit()
1492 - doinsert - insert a sop into the strip
1502 if (p->error != 0) in doinsert()
1508 s = p->strip[sn]; in doinsert()
1513 if (p->pbegin[i] >= pos) { in doinsert()
1514 p->pbegin[i]++; in doinsert()
1516 if (p->pend[i] >= pos) { in doinsert()
1517 p->pend[i]++; in doinsert()
1521 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], in doinsert()
1522 (HERE()-pos-1)*sizeof(sop)); in doinsert()
1523 p->strip[pos] = s; in doinsert()
1527 - dofwd - complete a forward reference
1533 if (p->error != 0) in dofwd()
1537 p->strip[pos] = OP(p->strip[pos]) | value; in dofwd()
1541 - enlarge - enlarge the strip
1548 if (p->ssize >= size) in enlarge()
1556 sp = (sop *)realloc(p->strip, size*sizeof(sop)); in enlarge()
1561 p->strip = sp; in enlarge()
1562 p->ssize = size; in enlarge()
1566 - stripsnug - compact the strip
1571 g->nstates = p->slen; in stripsnug()
1572 if ((uintptr_t)p->slen > SIZE_MAX / sizeof(sop)) { in stripsnug()
1573 g->strip = p->strip; in stripsnug()
1578 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); in stripsnug()
1579 if (g->strip == NULL) { in stripsnug()
1581 g->strip = p->strip; in stripsnug()
1586 - findmust - fill in must and mlen with longest mandatory literal string
1606 if (p->error != 0) in findmust()
1611 scan = g->strip + 1; in findmust()
1617 newstart = scan - 1; in findmust()
1626 scan--; in findmust()
1633 g->iflags |= REGEX_BAD; in findmust()
1639 if (newlen > g->mlen) { /* ends one */ in findmust()
1641 g->mlen = newlen; in findmust()
1648 if (g->mlen == 0) /* there isn't one */ in findmust()
1652 g->must = malloc((size_t)g->mlen + 1); in findmust()
1653 if (g->must == NULL) { /* argh; just forget it */ in findmust()
1654 g->mlen = 0; in findmust()
1657 cp = g->must; in findmust()
1659 for (i = g->mlen; i > 0; i--) { in findmust()
1662 assert(cp < g->must + g->mlen); in findmust()
1665 assert(cp == g->must + g->mlen); in findmust()
1670 - pluscount - count + nesting
1680 if (p->error != 0) in pluscount()
1683 scan = g->strip + 1; in pluscount()
1693 plusnest--; in pluscount()
1698 g->iflags |= REGEX_BAD; in pluscount()