1 %{ 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License, Version 1.0 only 7 * (the "License"). You may not use this file except in compliance 8 * with the License. 9 * 10 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 11 * or http://www.opensolaris.org/os/licensing. 12 * See the License for the specific language governing permissions 13 * and limitations under the License. 14 * 15 * When distributing Covered Code, include this CDDL HEADER in each 16 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 17 * If applicable, add the following below this CDDL HEADER, with the 18 * fields enclosed by brackets "[]" replaced with your own identifying 19 * information: Portions Copyright [yyyy] [name of copyright owner] 20 * 21 * CDDL HEADER END 22 */ 23 %} 24 /* 25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 30 /* All Rights Reserved */ 31 32 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 33 /* All Rights Reserved */ 34 35 %{ 36 #pragma ident "%Z%%M% %I% %E% SMI" 37 %} 38 39 /* 40 * egrep -- print lines containing (or not containing) a regular expression 41 * 42 * status returns: 43 * 0 - ok, and some matches 44 * 1 - ok, but no matches 45 * 2 - some error; matches irrelevant 46 */ 47 %token CHAR MCHAR DOT MDOT CCL NCCL MCCL NMCCL OR CAT STAR PLUS QUEST 48 %left OR 49 %left CHAR MCHAR DOT CCL NCCL MCCL NMCCL '(' 50 %left CAT 51 %left STAR PLUS QUEST 52 53 %{ 54 #include <stdio.h> 55 #include <ctype.h> 56 #include <memory.h> 57 #include <wchar.h> 58 #include <wctype.h> 59 #include <widec.h> 60 #include <stdlib.h> 61 #include <limits.h> 62 #include <locale.h> 63 64 #define BLKSIZE 512 /* size of reported disk blocks */ 65 #define EBUFSIZ 8192 66 #define MAXLIN 350 67 #define NCHARS 256 68 #define MAXPOS 4000 69 #define NSTATES 64 70 #define FINAL -1 71 #define RIGHT '\n' /* serves as record separator and as $ */ 72 #define LEFT '\n' /* beginning of line */ 73 int gotofn[NSTATES][NCHARS]; 74 int state[NSTATES]; 75 int out[NSTATES]; 76 int line = 1; 77 int *name; 78 int *left; 79 int *right; 80 int *parent; 81 int *foll; 82 int *positions; 83 char *chars; 84 wchar_t *lower; 85 wchar_t *upper; 86 int maxlin, maxclin, maxwclin, maxpos; 87 int nxtpos = 0; 88 int inxtpos; 89 int nxtchar = 0; 90 int *tmpstat; 91 int *initstat; 92 int istat; 93 int nstate = 1; 94 int xstate; 95 int count; 96 int icount; 97 char *input; 98 99 100 wchar_t lyylval; 101 wchar_t nextch(); 102 wchar_t maxmin(); 103 int compare(); 104 void overflo(); 105 106 char reinit = 0; 107 108 long long lnum; 109 int bflag; 110 int cflag; 111 int eflag; 112 int fflag; 113 int hflag; 114 int iflag; 115 int lflag; 116 int nflag; 117 int sflag; 118 int vflag; 119 int nfile; 120 long long blkno; 121 long long tln; 122 int nsucc; 123 int badbotch; 124 extern char *optarg; 125 extern int optind; 126 127 int f; 128 FILE *expfile; 129 %} 130 131 %% 132 s: t 133 { 134 unary(FINAL, $1); 135 line--; 136 } 137 ; 138 t: b r 139 { $$ = node(CAT, $1, $2); } 140 | OR b r OR 141 { $$ = node(CAT, $2, $3); } 142 | OR b r 143 { $$ = node(CAT, $2, $3); } 144 | b r OR 145 { $$ = node(CAT, $1, $2); } 146 ; 147 b: 148 { /* if(multibyte) 149 $$ = mdotenter(); 150 else */ 151 $$ = enter(DOT); 152 $$ = unary(STAR, $$); 153 } 154 ; 155 r: CHAR 156 { $$ = iflag && isalpha($1) ? 157 node(OR, enter(tolower($1)), enter(toupper($1))) : enter($1); } 158 | MCHAR 159 { $$ = (iflag && iswalpha(lyylval)) ? 160 node(OR, mchar(towlower(lyylval)), mchar(towupper(lyylval))) : 161 mchar(lyylval); } 162 | DOT 163 { if(multibyte) 164 $$ = mdotenter(); 165 else 166 $$ = enter(DOT); 167 } 168 | CCL 169 { $$ = cclenter(CCL); } 170 | NCCL 171 { $$ = cclenter(NCCL); } 172 | MCCL 173 { $$ = ccl(CCL); } 174 | NMCCL 175 { $$ = ccl(NCCL); } 176 ; 177 178 r: r OR r 179 { $$ = node(OR, $1, $3); } 180 | r r %prec CAT 181 { $$ = node(CAT, $1, $2); } 182 | r STAR 183 { $$ = unary(STAR, $1); } 184 | r PLUS 185 { $$ = unary(PLUS, $1); } 186 | r QUEST 187 { $$ = unary(QUEST, $1); } 188 | '(' r ')' 189 { $$ = $2; } 190 | error 191 ; 192 193 %% 194 void add(int *, int); 195 void clearg(void); 196 void execute(char *); 197 void follow(int); 198 int mgetc(void); 199 void synerror(void); 200 201 202 void 203 yyerror(char *s) 204 { 205 fprintf(stderr, "egrep: %s\n", s); 206 exit(2); 207 } 208 209 int 210 yylex(void) 211 { 212 extern int yylval; 213 int cclcnt, x, ccount, oldccount; 214 wchar_t c, lc; 215 216 c = nextch(); 217 switch(c) { 218 case '^': 219 yylval = LEFT; 220 return(CHAR); 221 case '$': 222 c = RIGHT; 223 goto defchar; 224 case '|': return (OR); 225 case '*': return (STAR); 226 case '+': return (PLUS); 227 case '?': return (QUEST); 228 case '(': return (c); 229 case ')': return (c); 230 case '.': return(DOT); 231 case '\0': return (0); 232 case RIGHT: return (OR); 233 case '[': 234 x = (multibyte ? MCCL : CCL); 235 cclcnt = 0; 236 count = nxtchar++; 237 if ((c = nextch()) == '^') { 238 x = (multibyte ? NMCCL : NCCL); 239 c = nextch(); 240 } 241 lc = 0; 242 do { 243 if (iflag && iswalpha(c)) 244 c = towlower(c); 245 if (c == '\0') synerror(); 246 if (c == '-' && cclcnt > 0 && lc != 0) { 247 if ((c = nextch()) != 0) { 248 if(c == ']') { 249 chars[nxtchar++] = '-'; 250 cclcnt++; 251 break; 252 } 253 if (iflag && iswalpha(c)) 254 c = towlower(c); 255 if (!multibyte || 256 (c & WCHAR_CSMASK) == (lc & WCHAR_CSMASK) && 257 lc < c && 258 !iswcntrl(c) && !iswcntrl(lc)) { 259 if (nxtchar >= maxclin) 260 if (allocchars() == 0) 261 overflo(); 262 chars[nxtchar++] = '-'; 263 cclcnt++; 264 } 265 } 266 } 267 ccount = oldccount = nxtchar; 268 if(ccount + MB_LEN_MAX >= maxclin) 269 if(allocchars() == 0) 270 overflo(); 271 ccount += wctomb(&chars[ccount], c); 272 cclcnt += ccount - oldccount; 273 nxtchar += ccount - oldccount; 274 lc = c; 275 } while ((c = nextch()) != ']'); 276 chars[count] = cclcnt; 277 return(x); 278 279 case '\\': 280 if ((c = nextch()) == '\0') synerror(); 281 defchar: 282 default: 283 if (c <= 0177) { 284 yylval = c; 285 return (CHAR); 286 } else { 287 lyylval = c; 288 return (MCHAR); 289 } 290 } 291 } 292 293 wchar_t 294 nextch(void) 295 { 296 wchar_t lc; 297 char multic[MB_LEN_MAX]; 298 int length, d; 299 if (fflag) { 300 if ((length = _mbftowc(multic, &lc, mgetc, &d)) < 0) 301 synerror(); 302 if(length == 0) 303 lc = '\0'; 304 } 305 else { 306 if((length = mbtowc(&lc, input, MB_LEN_MAX)) == -1) 307 synerror(); 308 if(length == 0) 309 return(0); 310 input += length; 311 } 312 return(lc); 313 } 314 315 int 316 mgetc(void) 317 { 318 return(getc(expfile)); 319 } 320 321 void 322 synerror(void) 323 { 324 fprintf(stderr, gettext("egrep: syntax error\n")); 325 exit(2); 326 } 327 328 int 329 enter(int x) 330 { 331 if(line >= maxlin) 332 if(alloctree() == 0) 333 overflo(); 334 name[line] = x; 335 left[line] = 0; 336 right[line] = 0; 337 return(line++); 338 } 339 340 int 341 cclenter(int x) 342 { 343 int linno; 344 linno = enter(x); 345 right[linno] = count; 346 return (linno); 347 } 348 349 int 350 node(int x, int l, int r) 351 { 352 if(line >= maxlin) 353 if(alloctree() == 0) 354 overflo(); 355 name[line] = x; 356 left[line] = l; 357 right[line] = r; 358 parent[l] = line; 359 parent[r] = line; 360 return(line++); 361 } 362 363 int 364 unary(int x, int d) 365 { 366 if(line >= maxlin) 367 if(alloctree() == 0) 368 overflo(); 369 name[line] = x; 370 left[line] = d; 371 right[line] = 0; 372 parent[d] = line; 373 return(line++); 374 } 375 376 int 377 allocchars(void) 378 { 379 maxclin += MAXLIN; 380 if((chars = realloc(chars, maxclin)) == (char *)0) 381 return 0; 382 return 1; 383 } 384 385 int 386 alloctree(void) 387 { 388 maxlin += MAXLIN; 389 if((name = (int *)realloc(name, maxlin*sizeof(int))) == (int *)0) 390 return 0; 391 if((left = (int *)realloc(left, maxlin*sizeof(int))) == (int *)0) 392 return 0; 393 if((right = (int *)realloc(right, maxlin*sizeof(int))) == (int *)0) 394 return 0; 395 if((parent = (int *)realloc(parent, maxlin*sizeof(int))) == (int *)0) 396 return 0; 397 if((foll = (int *)realloc(foll, maxlin*sizeof(int))) == (int *)0) 398 return 0; 399 if((tmpstat = (int *)realloc(tmpstat, maxlin*sizeof(int))) == (int *)0) 400 return 0; 401 if((initstat = (int *)realloc(initstat, maxlin*sizeof(int))) == (int *)0) 402 return 0; 403 return 1; 404 } 405 406 void 407 overflo(void) 408 { 409 fprintf(stderr, gettext("egrep: regular expression too long\n")); 410 exit(2); 411 } 412 413 void 414 cfoll(int v) 415 { 416 int i; 417 if (left[v] == 0) { 418 count = 0; 419 for (i=1; i<=line; i++) tmpstat[i] = 0; 420 follow(v); 421 add(foll, v); 422 } 423 else if (right[v] == 0) cfoll(left[v]); 424 else { 425 cfoll(left[v]); 426 cfoll(right[v]); 427 } 428 } 429 430 void 431 cgotofn(void) 432 { 433 int i; 434 count = 0; 435 inxtpos = nxtpos; 436 for (i=3; i<=line; i++) tmpstat[i] = 0; 437 if (cstate(line-1)==0) { 438 tmpstat[line] = 1; 439 count++; 440 out[1] = 1; 441 } 442 for (i=3; i<=line; i++) initstat[i] = tmpstat[i]; 443 count--; /*leave out position 1 */ 444 icount = count; 445 tmpstat[1] = 0; 446 add(state, 1); 447 istat = nxtst(1, LEFT); 448 } 449 450 int 451 nxtst(int s, int c) 452 { 453 int i, num, k; 454 int pos, curpos, number, newpos; 455 num = positions[state[s]]; 456 count = icount; 457 for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 458 pos = state[s] + 1; 459 for (i=0; i<num; i++) { 460 curpos = positions[pos]; 461 k = name[curpos]; 462 if (k >= 0) 463 if ( 464 (k == c) 465 || (k == DOT && dot(c)) 466 || (k == MDOT && mdot(c)) 467 || (k == CCL && dot(c) && member(c, right[curpos], 1)) 468 || (k == NCCL && dot(c) && member(c, right[curpos], 0)) 469 || (k == MCCL && mdot(c) && member(c, right[curpos], 1)) 470 ) { 471 number = positions[foll[curpos]]; 472 newpos = foll[curpos] + 1; 473 for (k=0; k<number; k++) { 474 if (tmpstat[positions[newpos]] != 1) { 475 tmpstat[positions[newpos]] = 1; 476 count++; 477 } 478 newpos++; 479 } 480 } 481 pos++; 482 } 483 if (notin(nstate)) { 484 if (++nstate >= NSTATES) { 485 for (i=1; i<NSTATES; i++) 486 out[i] = 0; 487 for (i=1; i<NSTATES; i++) 488 for (k=0; k<NCHARS; k++) 489 gotofn[i][k] = 0; 490 nstate = 1; 491 nxtpos = inxtpos; 492 reinit = 1; 493 add(state, nstate); 494 if (tmpstat[line] == 1) out[nstate] = 1; 495 return nstate; 496 } 497 add(state, nstate); 498 if (tmpstat[line] == 1) out[nstate] = 1; 499 gotofn[s][c] = nstate; 500 return nstate; 501 } 502 else { 503 gotofn[s][c] = xstate; 504 return xstate; 505 } 506 } 507 508 509 int 510 cstate(int v) 511 { 512 int b; 513 if (left[v] == 0) { 514 if (tmpstat[v] != 1) { 515 tmpstat[v] = 1; 516 count++; 517 } 518 return(1); 519 } 520 else if (right[v] == 0) { 521 if (cstate(left[v]) == 0) return (0); 522 else if (name[v] == PLUS) return (1); 523 else return (0); 524 } 525 else if (name[v] == CAT) { 526 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 527 else return (1); 528 } 529 else { /* name[v] == OR */ 530 b = cstate(right[v]); 531 if (cstate(left[v]) == 0 || b == 0) return (0); 532 else return (1); 533 } 534 } 535 536 537 int 538 dot(int c) 539 { 540 if(multibyte && c >= 0200 && (!iscntrl(c) || c == SS2 && eucw2 || c == SS3 && eucw3)) 541 return(0); 542 if(c == RIGHT || c == LEFT) 543 return(0); 544 return(1); 545 } 546 547 int 548 mdot(int c) 549 { 550 if(c >= 0200 && !iscntrl(c)) 551 return(1); 552 return(0); 553 } 554 555 int 556 member(int symb, int set, int torf) 557 { 558 int i, num, pos, c, lc; 559 if(symb == RIGHT || symb == LEFT) 560 return(0); 561 num = chars[set]; 562 pos = set + 1; 563 lc = 0; 564 if(iflag) 565 symb = tolower(symb); 566 for (i=0; i<num; i++) { 567 c = (unsigned char)chars[pos++]; 568 if(c == '-' && lc != 0 && ++i < num) { 569 c = (unsigned char)chars[pos++]; 570 if(lc <= symb && symb <= c) 571 return(torf); 572 } 573 if (symb == c) 574 return (torf); 575 lc = c; 576 } 577 return(!torf); 578 } 579 580 int 581 notin(int n) 582 { 583 int i, j, pos; 584 for (i=1; i<=n; i++) { 585 if (positions[state[i]] == count) { 586 pos = state[i] + 1; 587 for (j=0; j < count; j++) 588 if (tmpstat[positions[pos++]] != 1) goto nxt; 589 xstate = i; 590 return (0); 591 } 592 nxt: ; 593 } 594 return (1); 595 } 596 597 void 598 add(int *array, int n) 599 { 600 int i; 601 if (nxtpos + count >= maxpos) { 602 maxpos += MAXPOS + count; 603 if((positions = (int *)realloc(positions, maxpos *sizeof(int))) == (int *)0) 604 overflo(); 605 } 606 array[n] = nxtpos; 607 positions[nxtpos++] = count; 608 for (i=3; i <= line; i++) { 609 if (tmpstat[i] == 1) { 610 positions[nxtpos++] = i; 611 } 612 } 613 } 614 615 void 616 follow(int v) 617 { 618 int p; 619 if (v == line) return; 620 p = parent[v]; 621 switch(name[p]) { 622 case STAR: 623 case PLUS: cstate(v); 624 follow(p); 625 return; 626 627 case OR: 628 case QUEST: follow(p); 629 return; 630 631 case CAT: if (v == left[p]) { 632 if (cstate(right[p]) == 0) { 633 follow(p); 634 return; 635 } 636 } 637 else follow(p); 638 return; 639 case FINAL: if (tmpstat[line] != 1) { 640 tmpstat[line] = 1; 641 count++; 642 } 643 return; 644 } 645 } 646 647 #define USAGE "[ -bchilnsv ] [ -e exp ] [ -f file ] [ strings ] [ file ] ..." 648 649 int 650 main(int argc, char **argv) 651 { 652 char c; 653 char nl = '\n'; 654 int errflag = 0; 655 656 (void)setlocale(LC_ALL, ""); 657 658 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 659 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't. */ 660 #endif 661 (void) textdomain(TEXT_DOMAIN); 662 663 while((c = getopt(argc, argv, "ybcie:f:hlnvs")) != -1) 664 switch(c) { 665 666 case 'b': 667 bflag++; 668 continue; 669 670 case 'c': 671 cflag++; 672 continue; 673 674 case 'e': 675 eflag++; 676 input = optarg; 677 continue; 678 679 case 'f': 680 fflag++; 681 expfile = fopen(optarg, "r"); 682 if(expfile == NULL) { 683 fprintf(stderr, 684 gettext("egrep: can't open %s\n"), optarg); 685 exit(2); 686 } 687 continue; 688 689 case 'h': 690 hflag++; 691 continue; 692 693 case 'y': 694 case 'i': 695 iflag++; 696 continue; 697 698 case 'l': 699 lflag++; 700 continue; 701 702 case 'n': 703 nflag++; 704 continue; 705 706 case 's': 707 sflag++; 708 continue; 709 710 case 'v': 711 vflag++; 712 continue; 713 714 case '?': 715 errflag++; 716 } 717 if (errflag || ((argc <= 0) && !fflag && !eflag)) { 718 fprintf(stderr, gettext("usage: egrep %s\n"), gettext(USAGE)); 719 exit(2); 720 } 721 if(!eflag && !fflag) { 722 input = argv[optind]; 723 optind++; 724 } 725 726 argc -= optind; 727 argv = &argv[optind]; 728 729 /* allocate initial space for arrays */ 730 if((name = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 731 overflo(); 732 if((left = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 733 overflo(); 734 if((right = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 735 overflo(); 736 if((parent = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 737 overflo(); 738 if((foll = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 739 overflo(); 740 if((tmpstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 741 overflo(); 742 if((initstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 743 overflo(); 744 if((chars = (char *)malloc(MAXLIN)) == (char *)0) 745 overflo(); 746 if((lower = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0) 747 overflo(); 748 if((upper = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0) 749 overflo(); 750 if((positions = (int *)malloc(MAXPOS*sizeof(int))) == (int *)0) 751 overflo(); 752 maxlin = MAXLIN; 753 maxclin = MAXLIN; 754 maxwclin = MAXLIN; 755 maxpos = MAXPOS; 756 757 yyparse(); 758 759 cfoll(line-1); 760 cgotofn(); 761 nfile = argc; 762 if (argc<=0) { 763 execute(0); 764 } 765 else while (--argc >= 0) { 766 if (reinit == 1) clearg(); 767 execute(*argv++); 768 } 769 return (badbotch ? 2 : nsucc==0); 770 } 771 772 void 773 execute(char *file) 774 { 775 char *p; 776 int cstat; 777 wchar_t c; 778 int t; 779 long count; 780 long count1, count2; 781 long nchars; 782 int succ; 783 char *ptr, *ptrend, *lastptr; 784 char *buf; 785 long lBufSiz; 786 FILE *f; 787 int nlflag; 788 789 lBufSiz = EBUFSIZ; 790 if ((buf = malloc (lBufSiz + EBUFSIZ)) == NULL) { 791 exit (2); /* out of memory - BAIL */ 792 } 793 794 if (file) { 795 if ((f = fopen(file, "r")) == NULL) { 796 fprintf(stderr, 797 gettext("egrep: can't open %s\n"), file); 798 badbotch=1; 799 return; 800 } 801 } else { 802 file = "<stdin>"; 803 f = stdin; 804 } 805 lnum = 1; 806 tln = 0; 807 if((count = read(fileno(f), buf, EBUFSIZ)) <= 0) { 808 fclose(f); 809 810 if (cflag) { 811 if (nfile>1 && !hflag) 812 fprintf(stdout, "%s:", file); 813 fprintf(stdout, "%lld\n", tln); 814 } 815 return; 816 } 817 818 blkno = count; 819 ptr = buf; 820 for(;;) { 821 if((ptrend = memchr(ptr, '\n', buf + count - ptr)) == NULL) { 822 /* 823 move the unused partial record to the head of the buffer 824 */ 825 if (ptr > buf) { 826 count = buf + count - ptr; 827 memmove (buf, ptr, count); 828 ptr = buf; 829 } 830 831 /* 832 Get a bigger buffer if this one is full 833 */ 834 if(count > lBufSiz) { 835 /* 836 expand the buffer 837 */ 838 lBufSiz += EBUFSIZ; 839 if ((buf = realloc (buf, lBufSiz + EBUFSIZ)) == NULL) { 840 exit (2); /* out of memory - BAIL */ 841 } 842 843 ptr = buf; 844 } 845 846 p = buf + count; 847 if((count1 = read(fileno(f), p, EBUFSIZ)) > 0) { 848 count += count1; 849 blkno += count1; 850 continue; 851 } 852 ptrend = ptr + count; 853 nlflag = 0; 854 } else 855 nlflag = 1; 856 *ptrend = '\n'; 857 p = ptr; 858 lastptr = ptr; 859 cstat = istat; 860 succ = 0; 861 for(;;) { 862 if(out[cstat]) { 863 if(multibyte && p > ptr) { 864 wchar_t wchar; 865 int length; 866 char *endptr = p; 867 p = lastptr; 868 while(p < endptr) { 869 length = mbtowc(&wchar, p, MB_LEN_MAX); 870 if(length <= 1) 871 p++; 872 else 873 p += length; 874 } 875 if(p == endptr) { 876 succ = !vflag; 877 break; 878 } 879 cstat = 1; 880 length = mbtowc(&wchar, lastptr, MB_LEN_MAX); 881 if(length <= 1) 882 lastptr++; 883 else 884 lastptr += length; 885 p = lastptr; 886 continue; 887 } 888 succ = !vflag; 889 break; 890 } 891 c = (unsigned char)*p++; 892 if ((t = gotofn[cstat][c]) == 0) 893 cstat = nxtst(cstat, c); 894 else 895 cstat = t; 896 if(c == RIGHT) { 897 if(out[cstat]) { 898 succ = !vflag; 899 break; 900 } 901 succ = vflag; 902 break; 903 } 904 } 905 if(succ) { 906 nsucc = 1; 907 if (cflag) tln++; 908 else if (sflag) 909 ; /* ugh */ 910 else if (lflag) { 911 printf("%s\n", file); 912 fclose(f); 913 return; 914 } 915 else { 916 if (nfile > 1 && !hflag) 917 printf(gettext("%s:"), file); 918 if (bflag) { 919 nchars = blkno - (buf + count - ptrend) - 2; 920 if(nlflag) 921 nchars++; 922 printf("%lld:", nchars/BLKSIZE); 923 } 924 if (nflag) 925 printf("%lld:", lnum); 926 if(nlflag) 927 nchars = ptrend - ptr + 1; 928 else 929 nchars = ptrend - ptr; 930 fwrite(ptr, (size_t)1, (size_t)nchars, stdout); 931 } 932 } 933 if(!nlflag) 934 break; 935 ptr = ptrend + 1; 936 if(ptr >= buf + count) { 937 ptr = buf; 938 if((count = read(fileno(f), buf, EBUFSIZ)) <= 0) 939 break; 940 blkno += count; 941 } 942 lnum++; 943 if (reinit == 1) 944 clearg(); 945 } 946 fclose(f); 947 if (cflag) { 948 if (nfile > 1 && !hflag) 949 printf(gettext("%s:"), file); 950 printf("%lld\n", tln); 951 } 952 } 953 954 void 955 clearg(void) 956 { 957 int i, k; 958 for (i=1; i<=nstate; i++) 959 out[i] = 0; 960 for (i=1; i<=nstate; i++) 961 for (k=0; k<NCHARS; k++) 962 gotofn[i][k] = 0; 963 nstate = 1; 964 nxtpos = inxtpos; 965 reinit = 0; 966 count = 0; 967 for (i=3; i<=line; i++) tmpstat[i] = 0; 968 if (cstate(line-1)==0) { 969 tmpstat[line] = 1; 970 count++; 971 out[1] = 1; 972 } 973 for (i=3; i<=line; i++) initstat[i] = tmpstat[i]; 974 count--; /*leave out position 1 */ 975 icount = count; 976 tmpstat[1] = 0; 977 add(state, 1); 978 istat = nxtst(1, LEFT); 979 } 980 981 int 982 mdotenter(void) 983 { 984 int i, x1, x2; 985 x1 = enter(DOT); 986 x2 = enter(MDOT); 987 for(i = 1; i < (int) eucw1; i++) 988 x2 = node(CAT, x2, enter(MDOT)); 989 x1 = node(OR, x1, x2); 990 if(eucw2) { 991 x2 = enter('\216'); 992 for(i = 1; i <= (int) eucw2; i++) 993 x2 = node(CAT, x2, enter(MDOT)); 994 x1 = node(OR, x1, x2); 995 } 996 if(eucw3) { 997 x2 = enter('\217'); 998 for(i = 1; i <= (int) eucw3; i++) 999 x2 = node(CAT, x2, enter(MDOT)); 1000 x1 = node(OR, x1, x2); 1001 } 1002 return(x1); 1003 } 1004 1005 int 1006 mchar(wchar_t c) 1007 { 1008 char multichar[MB_LEN_MAX+1]; 1009 char *p; 1010 int x1, lc, length; 1011 1012 length = wctomb(multichar, c); 1013 p = multichar; 1014 *(p + length) = '\0'; 1015 x1 = enter((unsigned char)*p++); 1016 while(lc = (unsigned char)*p++) 1017 x1 = node(CAT, x1, enter(lc)); 1018 return(x1); 1019 } 1020 1021 int 1022 ccl(int type) 1023 { 1024 wchar_t c, lc; 1025 char multic1[MB_LEN_MAX]; 1026 char multic2[MB_LEN_MAX]; 1027 int x1, x2, length, current, last, cclcnt; 1028 x2 = 0; 1029 current = 0; 1030 last = genrange(type); 1031 nxtchar = count + 1; 1032 cclcnt = 0; 1033 /* create usual character class for single byte characters */ 1034 while(current <= last && (isascii(c = lower[current]) || c <= 0377 && iscntrl(c))) { 1035 cclcnt++; 1036 chars[nxtchar++] = c; 1037 if(lower[current] != upper[current]) { 1038 chars[nxtchar++] = '-'; 1039 chars[nxtchar++] = upper[current]; 1040 cclcnt += 2; 1041 } 1042 current++; 1043 } 1044 1045 if(cclcnt) 1046 chars[count] = cclcnt; 1047 else 1048 nxtchar = count; 1049 if(current > 0) 1050 /* single byte part of character class */ 1051 x2 = cclenter(type); 1052 else if(type == NCCL) 1053 /* all single byte characters match */ 1054 x2 = enter(DOT); 1055 while(current <= last) { 1056 if(upper[current] == lower[current]) 1057 x1 = mchar(lower[current]); 1058 else { 1059 length = wctomb(multic1, lower[current]); 1060 wctomb(multic2, upper[current]); 1061 x1 = range((unsigned char *)multic1, 1062 (unsigned char *)multic2, length); 1063 } 1064 if(x2) 1065 x2 = node(OR, x2, x1); 1066 else 1067 x2 = x1; 1068 current++; 1069 } 1070 return x2; 1071 } 1072 1073 int 1074 range(unsigned char *p1, unsigned char *p2, int length) 1075 { 1076 char multic[MB_LEN_MAX+1]; 1077 char *p; 1078 int i, x1, x2; 1079 if(length == 1) 1080 return(classenter(*p1, *p2)); 1081 if(p1[0] == p2[0]) 1082 return(node(CAT, enter(p1[0]), range(p1+1, p2+1, length - 1))); 1083 p = multic; 1084 for(i = 1; i < length; i++) 1085 *p++ = 0377; 1086 x1 = node(CAT, enter(p1[0]), 1087 range(p1+1, (unsigned char *)multic, length - 1)); 1088 if((unsigned char)(p1[0] + 1) < p2[0]) { 1089 x2 = classenter(p1[0] + 1, p2[0] - 1); 1090 for(i = 1; i < length; i++) 1091 x2 = node(CAT, x2, enter(MDOT)); 1092 x1 = node(OR, x1, x2); 1093 } 1094 p = multic; 1095 for(i = 1; i < length; i++) 1096 *p++ = 0200; 1097 x2 = node(CAT, enter(p2[0]), 1098 range((unsigned char *)multic, p2+1, length - 1)); 1099 return node(OR, x1, x2); 1100 } 1101 1102 int 1103 classenter(int x1, int x2) 1104 { 1105 static int max, min; 1106 if(!max) { 1107 int i; 1108 for(i = 0200; i <= 0377; i++) 1109 if(!iscntrl(i)) 1110 break; 1111 min = i; 1112 for(i = 0377; i >= 0200; i--) 1113 if(!iscntrl(i)) 1114 break; 1115 max = i; 1116 } 1117 if(x1 <= min && x2 >= max) 1118 return enter(MDOT); 1119 if(nxtchar + 4 >= maxclin) 1120 if(allocchars() == 0) 1121 overflo(); 1122 count = nxtchar++; 1123 chars[nxtchar++] = x1; 1124 chars[nxtchar++] = '-'; 1125 chars[nxtchar++] = x2; 1126 chars[count] = 3; 1127 return cclenter(MCCL); 1128 } 1129 1130 int 1131 genrange(int type) 1132 { 1133 char *p, *endp; 1134 int current, nel, i, last, length; 1135 wchar_t c, lc; 1136 1137 current = 0; 1138 p = &chars[count+1]; 1139 endp = &chars[count+1] + chars[count]; 1140 lc = 0; 1141 1142 /* convert character class into union of ranges */ 1143 while(p < endp) { 1144 length = mbtowc(&c, p, MB_LEN_MAX); 1145 p += length; 1146 if(c == '-' && lc != 0) { 1147 length = mbtowc(&c, p, MB_LEN_MAX); 1148 upper[current-1] = c; 1149 p += length; 1150 } else { 1151 lower[current] = c; 1152 upper[current++] = c; 1153 } 1154 lc = c; 1155 } 1156 nel = current; 1157 /* sort lower and upper bounds of ranges */ 1158 qsort((char *)lower, nel, sizeof(wchar_t), compare); 1159 qsort((char *)upper, nel, sizeof(wchar_t), compare); 1160 last = current - 1; 1161 current = 0; 1162 /* combine overlapping or adjacent ranges */ 1163 for(i = 0; i < last; i++) 1164 if(upper[i] >= lower[i+1] - 1) 1165 upper[current] = upper[i+1]; 1166 else { 1167 lower[++current] = lower[i+1]; 1168 upper[current] = upper[i+1]; 1169 } 1170 if(type == NCCL) { 1171 /* find complement of character class */ 1172 int j, next; 1173 i = 0; 1174 while(i <= current && isascii(c=lower[i]) || c <= 0377 && iscntrl(c)) 1175 i++; 1176 if(i > current) { 1177 /* match all multibyte characters */ 1178 if(eucw2) { 1179 lower[i] = maxmin(WCHAR_CS2, 0); 1180 upper[i++] = maxmin(WCHAR_CS2, 1); 1181 } 1182 if(eucw3) { 1183 lower[i] = maxmin(WCHAR_CS3, 0); 1184 upper[i++] = maxmin(WCHAR_CS3, 1); 1185 } 1186 lower[i] = maxmin(WCHAR_CS1, 0); 1187 upper[i++] = maxmin(WCHAR_CS1, 1); 1188 return i - 1; 1189 } 1190 next = current + 1; 1191 if(next + current + 2 >= maxwclin) { 1192 maxwclin += MAXLIN + next + current + 2; 1193 if((lower = (wchar_t *)realloc(lower, maxwclin *sizeof(wchar_t))) == (wchar_t *)0 || 1194 (upper = (wchar_t *)realloc(upper, maxwclin * sizeof(wchar_t))) == (wchar_t *)0) 1195 overflo(); 1196 } 1197 if(eucw2 && lower[i] > maxmin(WCHAR_CS2, 0)) { 1198 lower[next] = maxmin(WCHAR_CS2, 0); 1199 if((lower[i] & WCHAR_CSMASK) != WCHAR_CS2) { 1200 upper[next++] = maxmin(WCHAR_CS2, 1); 1201 if((lower[i] & WCHAR_CSMASK) == WCHAR_CS1 && eucw3) { 1202 lower[next] = maxmin(WCHAR_CS3, 0); 1203 upper[next++] = maxmin(WCHAR_CS3, 1); 1204 } 1205 if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) { 1206 lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0); 1207 upper[next++] = lower[i] - 1; 1208 } 1209 } else 1210 upper[next++] = lower[i] - 1; 1211 } else if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) { 1212 lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0); 1213 upper[next++] = lower[i] - 1; 1214 } 1215 for(j = i; j < current; j++) { 1216 if(upper[j] < maxmin(upper[j] & WCHAR_CSMASK, 1)) { 1217 lower[next] = upper[j] + 1; 1218 if((upper[j] & WCHAR_CSMASK) != (lower[j+1] & WCHAR_CSMASK)) { 1219 upper[next++] = maxmin(upper[j] & WCHAR_CSMASK, 1); 1220 if(eucw3 && (upper[j] & WCHAR_CSMASK) == WCHAR_CS2 && (lower[j+1] & WCHAR_CSMASK) == WCHAR_CS1) { 1221 lower[next] = maxmin(WCHAR_CS3, 0); 1222 upper[next++] = maxmin(WCHAR_CS3, 1); 1223 } 1224 if(lower[j+1] > maxmin(lower[j+1] & WCHAR_CSMASK, 0)) { 1225 lower[next] = maxmin(lower[j+1] & WCHAR_CSMASK, 0); 1226 upper[next++] = lower[j+1] - 1; 1227 } 1228 } else 1229 upper[next++] = lower[j+1] - 1; 1230 } else if(lower[j+1] > maxmin(lower[j+1], 0)) { 1231 lower[next] = maxmin(lower[j+1], 0); 1232 upper[next++] = lower[j+1] - 1; 1233 } 1234 } 1235 if(upper[current] < maxmin(upper[current] & WCHAR_CSMASK, 1)) { 1236 lower[next] = upper[current] + 1; 1237 upper[next++] = maxmin(upper[current] & WCHAR_CSMASK, 1); 1238 } 1239 if((upper[current] & WCHAR_CSMASK) != WCHAR_CS1) { 1240 if((upper[current] & WCHAR_CSMASK) == WCHAR_CS2 && eucw3) { 1241 lower[next] = maxmin(WCHAR_CS3, 0); 1242 upper[next++] = maxmin(WCHAR_CS3, 1); 1243 } 1244 lower[next] = maxmin(WCHAR_CS1, 0); 1245 upper[next++] = maxmin(WCHAR_CS1, 1); 1246 } 1247 for(j = current + 1; j < next; j++) { 1248 lower[i] = lower[j]; 1249 upper[i++] = upper[j]; 1250 } 1251 current = i - 1; 1252 } 1253 return(current); 1254 } 1255 1256 int 1257 compare(wchar_t *c, wchar_t *d) 1258 { 1259 if(*c < *d) 1260 return -1; 1261 if(*c == *d) 1262 return 0; 1263 return 1; 1264 } 1265 1266 wchar_t 1267 maxmin(wchar_t c, int flag) 1268 { 1269 static wchar_t minmax1[2], minmax2[2], minmax3[2]; 1270 1271 if(!minmax1[0]) { 1272 /* compute min and max process codes for all code sets */ 1273 int length, i; 1274 char multic[MB_LEN_MAX], minmax[2]; 1275 for(i = 0377; i >= 0200; i--) 1276 if(!iscntrl(i)) 1277 break; 1278 minmax[1] = i; 1279 for(i = 0240; i <= 0377; i++) 1280 if(!iscntrl(i)) 1281 break; 1282 minmax[0] = i; 1283 for(i = 0; i <= 1; i++) { 1284 length = MB_LEN_MAX; 1285 while(length--) 1286 multic[length] = minmax[i]; 1287 mbtowc(&minmax1[i], multic, MB_LEN_MAX); 1288 if(eucw2) { 1289 multic[0] = SS2; 1290 mbtowc(&minmax2[i], multic, MB_LEN_MAX); 1291 } 1292 if(eucw3) { 1293 multic[0] = SS3; 1294 mbtowc(&minmax3[i], multic, MB_LEN_MAX); 1295 } 1296 } 1297 } 1298 switch(c) { 1299 case WCHAR_CS1: return minmax1[flag]; 1300 case WCHAR_CS2: return minmax2[flag]; 1301 case WCHAR_CS3: return minmax3[flag]; 1302 } 1303 1304 /* NOTREACHED */ 1305 return (0); 1306 } 1307