1da2e3ebdSchin /*********************************************************************** 2da2e3ebdSchin * * 3da2e3ebdSchin * This software is part of the ast package * 4*34f9b3eeSRoland Mainz * Copyright (c) 1992-2009 AT&T Intellectual Property * 5da2e3ebdSchin * and is licensed under the * 6da2e3ebdSchin * Common Public License, Version 1.0 * 77c2fbfb3SApril Chin * by AT&T Intellectual Property * 8da2e3ebdSchin * * 9da2e3ebdSchin * A copy of the License is available at * 10da2e3ebdSchin * http://www.opensource.org/licenses/cpl1.0.txt * 11da2e3ebdSchin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12da2e3ebdSchin * * 13da2e3ebdSchin * Information and Software Systems Research * 14da2e3ebdSchin * AT&T Research * 15da2e3ebdSchin * Florham Park NJ * 16da2e3ebdSchin * * 17da2e3ebdSchin * Glenn Fowler <gsf@research.att.com> * 18da2e3ebdSchin * David Korn <dgk@research.att.com> * 19da2e3ebdSchin * * 20da2e3ebdSchin ***********************************************************************/ 21da2e3ebdSchin #pragma prototyped 22da2e3ebdSchin /* 23da2e3ebdSchin * David Korn 24da2e3ebdSchin * AT&T Bell Laboratories 25da2e3ebdSchin * 26da2e3ebdSchin * library interface for word count 27da2e3ebdSchin */ 28da2e3ebdSchin 29da2e3ebdSchin #include <cmd.h> 30da2e3ebdSchin #include <wc.h> 31da2e3ebdSchin #include <ctype.h> 32da2e3ebdSchin 337c2fbfb3SApril Chin #if _hdr_wchar && _hdr_wctype && _lib_iswctype 34da2e3ebdSchin 35da2e3ebdSchin #include <wchar.h> 36da2e3ebdSchin #include <wctype.h> 37*34f9b3eeSRoland Mainz #include <lc.h> 38da2e3ebdSchin 39da2e3ebdSchin #else 40da2e3ebdSchin 41da2e3ebdSchin #ifndef iswspace 42da2e3ebdSchin #define iswspace(x) isspace(x) 43da2e3ebdSchin #endif 44da2e3ebdSchin 45da2e3ebdSchin #endif 46da2e3ebdSchin 47*34f9b3eeSRoland Mainz #define WC_SP 0x08 48*34f9b3eeSRoland Mainz #define WC_NL 0x10 49*34f9b3eeSRoland Mainz #define WC_MB 0x20 50*34f9b3eeSRoland Mainz #define WC_ERR 0x40 51*34f9b3eeSRoland Mainz 52*34f9b3eeSRoland Mainz #define eol(c) ((c)&WC_NL) 53*34f9b3eeSRoland Mainz #define mbc(c) ((c)&WC_MB) 54*34f9b3eeSRoland Mainz #define spc(c) ((c)&WC_SP) 55*34f9b3eeSRoland Mainz #define mbwc(w,p,n) (*ast.mb_towc)(&w,(char*)p,n) 56da2e3ebdSchin 57da2e3ebdSchin Wc_t* wc_init(int mode) 58da2e3ebdSchin { 59da2e3ebdSchin register int n; 60da2e3ebdSchin register int w; 61da2e3ebdSchin Wc_t* wp; 62da2e3ebdSchin 63da2e3ebdSchin if (!(wp = (Wc_t*)stakalloc(sizeof(Wc_t)))) 64*34f9b3eeSRoland Mainz return 0; 65*34f9b3eeSRoland Mainz if (!mbwide()) 66*34f9b3eeSRoland Mainz wp->mb = 0; 67*34f9b3eeSRoland Mainz #if _hdr_wchar && _hdr_wctype && _lib_iswctype 68*34f9b3eeSRoland Mainz else if (!(mode & WC_NOUTF8) && (lcinfo(LC_CTYPE)->lc->flags & LC_utf8)) 69*34f9b3eeSRoland Mainz wp->mb = 1; 70*34f9b3eeSRoland Mainz #endif 71*34f9b3eeSRoland Mainz else 72*34f9b3eeSRoland Mainz wp->mb = -1; 73da2e3ebdSchin w = mode & WC_WORDS; 74da2e3ebdSchin for (n = (1<<CHAR_BIT); --n >= 0;) 75*34f9b3eeSRoland Mainz wp->type[n] = (w && isspace(n)) ? WC_SP : 0; 76*34f9b3eeSRoland Mainz wp->type['\n'] = WC_SP|WC_NL; 77*34f9b3eeSRoland Mainz if ((mode & (WC_MBYTE|WC_WORDS)) && wp->mb > 0) 78*34f9b3eeSRoland Mainz { 79*34f9b3eeSRoland Mainz for (n = 0; n < 64; n++) 80*34f9b3eeSRoland Mainz { 81*34f9b3eeSRoland Mainz wp->type[0x80+n] |= WC_MB; 82*34f9b3eeSRoland Mainz if (n<32) 83*34f9b3eeSRoland Mainz wp->type[0xc0+n] |= WC_MB+1; 84*34f9b3eeSRoland Mainz else if (n<48) 85*34f9b3eeSRoland Mainz wp->type[0xc0+n] |= WC_MB+2; 86*34f9b3eeSRoland Mainz else if (n<56) 87*34f9b3eeSRoland Mainz wp->type[0xc0+n] |= WC_MB+3; 88*34f9b3eeSRoland Mainz else if (n<60) 89*34f9b3eeSRoland Mainz wp->type[0xc0+n] |= WC_MB+4; 90*34f9b3eeSRoland Mainz else if (n<62) 91*34f9b3eeSRoland Mainz wp->type[0xc0+n] |= WC_MB+5; 92*34f9b3eeSRoland Mainz } 93*34f9b3eeSRoland Mainz wp->type[0xc0] = WC_MB|WC_ERR; 94*34f9b3eeSRoland Mainz wp->type[0xc1] = WC_MB|WC_ERR; 95*34f9b3eeSRoland Mainz wp->type[0xfe] = WC_MB|WC_ERR; 96*34f9b3eeSRoland Mainz wp->type[0xff] = WC_MB|WC_ERR; 97*34f9b3eeSRoland Mainz } 98*34f9b3eeSRoland Mainz wp->mode = mode; 99*34f9b3eeSRoland Mainz return wp; 100*34f9b3eeSRoland Mainz } 101*34f9b3eeSRoland Mainz 102*34f9b3eeSRoland Mainz static int invalid(const char *file, int nlines) 103*34f9b3eeSRoland Mainz { 104*34f9b3eeSRoland Mainz error_info.file = (char*)file; 105*34f9b3eeSRoland Mainz error_info.line = nlines; 106*34f9b3eeSRoland Mainz error(ERROR_SYSTEM|1, "invalid multibyte character"); 107*34f9b3eeSRoland Mainz error_info.file = 0; 108*34f9b3eeSRoland Mainz error_info.line = 0; 109*34f9b3eeSRoland Mainz return nlines; 110*34f9b3eeSRoland Mainz } 111*34f9b3eeSRoland Mainz 112*34f9b3eeSRoland Mainz /* 113*34f9b3eeSRoland Mainz * handle utf space characters 114*34f9b3eeSRoland Mainz */ 115*34f9b3eeSRoland Mainz 116*34f9b3eeSRoland Mainz static int chkstate(int state, register unsigned int c) 117*34f9b3eeSRoland Mainz { 118*34f9b3eeSRoland Mainz switch(state) 119*34f9b3eeSRoland Mainz { 120*34f9b3eeSRoland Mainz case 1: 121*34f9b3eeSRoland Mainz state = (c==0x9a?4:0); 122*34f9b3eeSRoland Mainz break; 123*34f9b3eeSRoland Mainz case 2: 124*34f9b3eeSRoland Mainz state = ((c==0x80||c==0x81)?6+(c&1):0); 125*34f9b3eeSRoland Mainz break; 126*34f9b3eeSRoland Mainz case 3: 127*34f9b3eeSRoland Mainz state = (c==0x80?5:0); 128*34f9b3eeSRoland Mainz break; 129*34f9b3eeSRoland Mainz case 4: 130*34f9b3eeSRoland Mainz state = (c==0x80?10:0); 131*34f9b3eeSRoland Mainz break; 132*34f9b3eeSRoland Mainz case 5: 133*34f9b3eeSRoland Mainz state = (c==0x80?10:0); 134*34f9b3eeSRoland Mainz break; 135*34f9b3eeSRoland Mainz case 6: 136*34f9b3eeSRoland Mainz state = 0; 137*34f9b3eeSRoland Mainz if(c==0xa0 || c==0xa1) 138*34f9b3eeSRoland Mainz return(10); 139*34f9b3eeSRoland Mainz else if((c&0xf0)== 0x80) 140*34f9b3eeSRoland Mainz { 141*34f9b3eeSRoland Mainz if((c&=0xf)==7) 142*34f9b3eeSRoland Mainz return(iswspace(0x2007)?10:0); 143*34f9b3eeSRoland Mainz if(c<=0xb) 144*34f9b3eeSRoland Mainz return(10); 145*34f9b3eeSRoland Mainz } 146*34f9b3eeSRoland Mainz else if(c==0xaf && iswspace(0x202f)) 147*34f9b3eeSRoland Mainz return(10); 148*34f9b3eeSRoland Mainz break; 149*34f9b3eeSRoland Mainz case 7: 150*34f9b3eeSRoland Mainz state = (c==0x9f?10:0); 151*34f9b3eeSRoland Mainz break; 152*34f9b3eeSRoland Mainz case 8: 153*34f9b3eeSRoland Mainz return (iswspace(c)?10:0); 154*34f9b3eeSRoland Mainz } 155*34f9b3eeSRoland Mainz return state; 156da2e3ebdSchin } 157da2e3ebdSchin 158da2e3ebdSchin /* 159da2e3ebdSchin * compute the line, word, and character count for file <fd> 160da2e3ebdSchin */ 161*34f9b3eeSRoland Mainz 162da2e3ebdSchin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file) 163da2e3ebdSchin { 164*34f9b3eeSRoland Mainz register char* type = wp->type; 165da2e3ebdSchin register unsigned char* cp; 166da2e3ebdSchin register Sfoff_t nchars; 167da2e3ebdSchin register Sfoff_t nwords; 168da2e3ebdSchin register Sfoff_t nlines; 169*34f9b3eeSRoland Mainz register Sfoff_t eline = -1; 170*34f9b3eeSRoland Mainz register Sfoff_t longest = 0; 171da2e3ebdSchin register ssize_t c; 172da2e3ebdSchin register unsigned char* endbuff; 173*34f9b3eeSRoland Mainz register int lasttype = WC_SP; 174da2e3ebdSchin unsigned int lastchar; 175*34f9b3eeSRoland Mainz ssize_t n; 176*34f9b3eeSRoland Mainz ssize_t o; 177da2e3ebdSchin unsigned char* buff; 178da2e3ebdSchin wchar_t x; 179*34f9b3eeSRoland Mainz unsigned char side[32]; 180da2e3ebdSchin 181da2e3ebdSchin sfset(fd,SF_WRITE,1); 182da2e3ebdSchin nlines = nwords = nchars = 0; 183da2e3ebdSchin wp->longest = 0; 184*34f9b3eeSRoland Mainz if (wp->mb < 0 && (wp->mode & (WC_MBYTE|WC_WORDS))) 185da2e3ebdSchin { 186da2e3ebdSchin cp = buff = endbuff = 0; 187da2e3ebdSchin for (;;) 188da2e3ebdSchin { 189*34f9b3eeSRoland Mainz if (cp >= endbuff || (n = mbwc(x, cp, endbuff-cp)) < 0) 190*34f9b3eeSRoland Mainz { 191*34f9b3eeSRoland Mainz if ((o = endbuff-cp) < sizeof(side)) 192da2e3ebdSchin { 193da2e3ebdSchin if (buff) 194da2e3ebdSchin { 195*34f9b3eeSRoland Mainz if (o) 196*34f9b3eeSRoland Mainz memcpy(side, cp, o); 197*34f9b3eeSRoland Mainz mbinit(); 198da2e3ebdSchin } 199*34f9b3eeSRoland Mainz else 200*34f9b3eeSRoland Mainz o = 0; 201*34f9b3eeSRoland Mainz cp = side + o; 202*34f9b3eeSRoland Mainz if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) || (n = sfvalue(fd)) <= 0) 203da2e3ebdSchin { 204da2e3ebdSchin if ((nchars - longest) > wp->longest) 205da2e3ebdSchin wp->longest = nchars - longest; 206*34f9b3eeSRoland Mainz break; 207*34f9b3eeSRoland Mainz } 208*34f9b3eeSRoland Mainz if ((c = sizeof(side) - o) > n) 209*34f9b3eeSRoland Mainz c = n; 210*34f9b3eeSRoland Mainz if (c) 211*34f9b3eeSRoland Mainz memcpy(cp, buff, c); 212*34f9b3eeSRoland Mainz endbuff = buff + n; 213*34f9b3eeSRoland Mainz cp = side; 214*34f9b3eeSRoland Mainz x = mbchar(cp); 215*34f9b3eeSRoland Mainz if ((cp-side) < o) 216*34f9b3eeSRoland Mainz { 217*34f9b3eeSRoland Mainz cp = buff; 218*34f9b3eeSRoland Mainz nchars += (cp-side) - 1; 219*34f9b3eeSRoland Mainz } 220*34f9b3eeSRoland Mainz else 221*34f9b3eeSRoland Mainz cp = buff + (cp-side) - o; 222*34f9b3eeSRoland Mainz } 223*34f9b3eeSRoland Mainz else 224*34f9b3eeSRoland Mainz { 225*34f9b3eeSRoland Mainz cp++; 226*34f9b3eeSRoland Mainz x = -1; 227*34f9b3eeSRoland Mainz } 228*34f9b3eeSRoland Mainz if (x == -1 && eline != nlines && !(wp->mode & WC_QUIET)) 229*34f9b3eeSRoland Mainz eline = invalid(file, nlines); 230*34f9b3eeSRoland Mainz } 231*34f9b3eeSRoland Mainz else 232*34f9b3eeSRoland Mainz cp += n ? n : 1; 233*34f9b3eeSRoland Mainz if (x == '\n') 234*34f9b3eeSRoland Mainz { 235*34f9b3eeSRoland Mainz if ((nchars - longest) > wp->longest) 236*34f9b3eeSRoland Mainz wp->longest = nchars - longest; 237*34f9b3eeSRoland Mainz longest = nchars + 1; 238da2e3ebdSchin nlines++; 239da2e3ebdSchin lasttype = 1; 240da2e3ebdSchin } 241da2e3ebdSchin else if (iswspace(x)) 242da2e3ebdSchin lasttype = 1; 243da2e3ebdSchin else if (lasttype) 244da2e3ebdSchin { 245da2e3ebdSchin lasttype = 0; 246da2e3ebdSchin nwords++; 247da2e3ebdSchin } 248*34f9b3eeSRoland Mainz nchars++; 249*34f9b3eeSRoland Mainz } 250*34f9b3eeSRoland Mainz } 251*34f9b3eeSRoland Mainz else if (!wp->mb && !(wp->mode & WC_LONGEST) || wp->mb > 0 && !(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST))) 252*34f9b3eeSRoland Mainz { 253*34f9b3eeSRoland Mainz if (!(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST))) 254*34f9b3eeSRoland Mainz { 255*34f9b3eeSRoland Mainz while ((cp = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0) 256*34f9b3eeSRoland Mainz { 257*34f9b3eeSRoland Mainz nchars += c; 258*34f9b3eeSRoland Mainz endbuff = cp + c; 259*34f9b3eeSRoland Mainz if (*--endbuff == '\n') 260*34f9b3eeSRoland Mainz nlines++; 261*34f9b3eeSRoland Mainz else 262*34f9b3eeSRoland Mainz *endbuff = '\n'; 263*34f9b3eeSRoland Mainz for (;;) 264*34f9b3eeSRoland Mainz if (*cp++ == '\n') 265*34f9b3eeSRoland Mainz { 266*34f9b3eeSRoland Mainz if (cp > endbuff) 267*34f9b3eeSRoland Mainz break; 268*34f9b3eeSRoland Mainz nlines++; 269*34f9b3eeSRoland Mainz } 270da2e3ebdSchin } 271da2e3ebdSchin } 272da2e3ebdSchin else 273da2e3ebdSchin { 274*34f9b3eeSRoland Mainz while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0) 275da2e3ebdSchin { 276da2e3ebdSchin nchars += c; 277da2e3ebdSchin /* check to see whether first character terminates word */ 278da2e3ebdSchin if (c==1) 279da2e3ebdSchin { 280*34f9b3eeSRoland Mainz if (eol(lasttype)) 281da2e3ebdSchin nlines++; 282*34f9b3eeSRoland Mainz if ((c = type[*cp]) && !lasttype) 283da2e3ebdSchin nwords++; 284da2e3ebdSchin lasttype = c; 285da2e3ebdSchin continue; 286da2e3ebdSchin } 287*34f9b3eeSRoland Mainz if (!lasttype && type[*cp]) 288da2e3ebdSchin nwords++; 289da2e3ebdSchin lastchar = cp[--c]; 290*34f9b3eeSRoland Mainz *(endbuff = cp+c) = '\n'; 291da2e3ebdSchin c = lasttype; 292da2e3ebdSchin /* process each buffer */ 293da2e3ebdSchin for (;;) 294da2e3ebdSchin { 295da2e3ebdSchin /* process spaces and new-lines */ 296*34f9b3eeSRoland Mainz do 297da2e3ebdSchin { 298*34f9b3eeSRoland Mainz if (eol(c)) 299da2e3ebdSchin for (;;) 300da2e3ebdSchin { 301da2e3ebdSchin /* check for end of buffer */ 302da2e3ebdSchin if (cp > endbuff) 303*34f9b3eeSRoland Mainz goto beob; 304da2e3ebdSchin nlines++; 305da2e3ebdSchin if (*cp != '\n') 306da2e3ebdSchin break; 307da2e3ebdSchin cp++; 308da2e3ebdSchin } 309*34f9b3eeSRoland Mainz } while (c = type[*cp++]); 310da2e3ebdSchin /* skip over word characters */ 311*34f9b3eeSRoland Mainz while (!(c = type[*cp++])); 312da2e3ebdSchin nwords++; 313da2e3ebdSchin } 314*34f9b3eeSRoland Mainz beob: 315da2e3ebdSchin if ((cp -= 2) >= buff) 316*34f9b3eeSRoland Mainz c = type[*cp]; 317da2e3ebdSchin else 318da2e3ebdSchin c = lasttype; 319*34f9b3eeSRoland Mainz lasttype = type[lastchar]; 320da2e3ebdSchin /* see if was in word */ 321da2e3ebdSchin if (!c && !lasttype) 322da2e3ebdSchin nwords--; 323da2e3ebdSchin } 324*34f9b3eeSRoland Mainz if (eol(lasttype)) 325da2e3ebdSchin nlines++; 326da2e3ebdSchin else if (!lasttype) 327da2e3ebdSchin nwords++; 328da2e3ebdSchin } 329*34f9b3eeSRoland Mainz } 330*34f9b3eeSRoland Mainz else 331*34f9b3eeSRoland Mainz { 332*34f9b3eeSRoland Mainz int lineoff=0; 333*34f9b3eeSRoland Mainz int skip=0; 334*34f9b3eeSRoland Mainz int adjust=0; 335*34f9b3eeSRoland Mainz int state=0; 336*34f9b3eeSRoland Mainz int oldc; 337*34f9b3eeSRoland Mainz int xspace; 338*34f9b3eeSRoland Mainz int wasspace = 1; 339*34f9b3eeSRoland Mainz unsigned char* start; 340*34f9b3eeSRoland Mainz 341*34f9b3eeSRoland Mainz lastchar = 0; 342*34f9b3eeSRoland Mainz start = (endbuff = side) + 1; 343*34f9b3eeSRoland Mainz xspace = iswspace(0xa0) || iswspace(0x85); 344*34f9b3eeSRoland Mainz while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0) 345*34f9b3eeSRoland Mainz { 346*34f9b3eeSRoland Mainz nchars += c; 347*34f9b3eeSRoland Mainz start = cp-lineoff; 348*34f9b3eeSRoland Mainz /* check to see whether first character terminates word */ 349*34f9b3eeSRoland Mainz if(c==1) 350*34f9b3eeSRoland Mainz { 351*34f9b3eeSRoland Mainz if(eol(lasttype)) 352*34f9b3eeSRoland Mainz nlines++; 353*34f9b3eeSRoland Mainz if((c = type[*cp]) && !lasttype) 354*34f9b3eeSRoland Mainz nwords++; 355*34f9b3eeSRoland Mainz lasttype = c; 356*34f9b3eeSRoland Mainz endbuff = start; 357*34f9b3eeSRoland Mainz continue; 358*34f9b3eeSRoland Mainz } 359*34f9b3eeSRoland Mainz lastchar = cp[--c]; 360*34f9b3eeSRoland Mainz endbuff = cp+c; 361*34f9b3eeSRoland Mainz cp[c] = '\n'; 362*34f9b3eeSRoland Mainz if(mbc(lasttype)) 363*34f9b3eeSRoland Mainz { 364*34f9b3eeSRoland Mainz c = lasttype; 365*34f9b3eeSRoland Mainz goto mbyte; 366*34f9b3eeSRoland Mainz } 367*34f9b3eeSRoland Mainz if(!lasttype && spc(type[*cp])) 368*34f9b3eeSRoland Mainz nwords++; 369*34f9b3eeSRoland Mainz c = lasttype; 370*34f9b3eeSRoland Mainz /* process each buffer */ 371*34f9b3eeSRoland Mainz for (;;) 372*34f9b3eeSRoland Mainz { 373*34f9b3eeSRoland Mainz /* process spaces and new-lines */ 374*34f9b3eeSRoland Mainz spaces: 375*34f9b3eeSRoland Mainz do 376*34f9b3eeSRoland Mainz { 377*34f9b3eeSRoland Mainz if (eol(c)) 378*34f9b3eeSRoland Mainz { 379*34f9b3eeSRoland Mainz /* check for end of buffer */ 380*34f9b3eeSRoland Mainz if (cp > endbuff) 381*34f9b3eeSRoland Mainz goto eob; 382*34f9b3eeSRoland Mainz if(wp->mode&WC_LONGEST) 383*34f9b3eeSRoland Mainz { 384*34f9b3eeSRoland Mainz if((cp-start)-adjust > longest) 385*34f9b3eeSRoland Mainz longest = (cp-start)-adjust-1; 386*34f9b3eeSRoland Mainz start = cp; 387*34f9b3eeSRoland Mainz } 388*34f9b3eeSRoland Mainz nlines++; 389*34f9b3eeSRoland Mainz nchars -= adjust; 390*34f9b3eeSRoland Mainz adjust = 0; 391*34f9b3eeSRoland Mainz } 392*34f9b3eeSRoland Mainz } while (spc(c = type[*cp++])); 393*34f9b3eeSRoland Mainz wasspace=1; 394*34f9b3eeSRoland Mainz if(mbc(c)) 395*34f9b3eeSRoland Mainz { 396*34f9b3eeSRoland Mainz mbyte: 397*34f9b3eeSRoland Mainz do 398*34f9b3eeSRoland Mainz { 399*34f9b3eeSRoland Mainz if(c&WC_ERR) 400*34f9b3eeSRoland Mainz goto err; 401*34f9b3eeSRoland Mainz if(skip && (c&7)) 402*34f9b3eeSRoland Mainz break; 403*34f9b3eeSRoland Mainz if(!skip) 404*34f9b3eeSRoland Mainz { 405*34f9b3eeSRoland Mainz if(!(c&7)) 406*34f9b3eeSRoland Mainz { 407*34f9b3eeSRoland Mainz skip=1; 408*34f9b3eeSRoland Mainz break; 409*34f9b3eeSRoland Mainz } 410*34f9b3eeSRoland Mainz skip = (c&7); 411*34f9b3eeSRoland Mainz adjust += skip; 412*34f9b3eeSRoland Mainz state = 0; 413*34f9b3eeSRoland Mainz if(skip==2 && (cp[-1]&0xc)==0 && (state=(cp[-1]&0x3))) 414*34f9b3eeSRoland Mainz oldc = *cp; 415*34f9b3eeSRoland Mainz else if(xspace && cp[-1]==0xc2) 416*34f9b3eeSRoland Mainz { 417*34f9b3eeSRoland Mainz state = 8; 418*34f9b3eeSRoland Mainz oldc = *cp; 419*34f9b3eeSRoland Mainz } 420*34f9b3eeSRoland Mainz } 421*34f9b3eeSRoland Mainz else 422*34f9b3eeSRoland Mainz { 423*34f9b3eeSRoland Mainz skip--; 424*34f9b3eeSRoland Mainz if(state && (state=chkstate(state,oldc))) 425*34f9b3eeSRoland Mainz { 426*34f9b3eeSRoland Mainz if(state==10) 427*34f9b3eeSRoland Mainz { 428*34f9b3eeSRoland Mainz if(!wasspace) 429*34f9b3eeSRoland Mainz nwords++; 430*34f9b3eeSRoland Mainz wasspace = 1; 431*34f9b3eeSRoland Mainz state=0; 432*34f9b3eeSRoland Mainz goto spaces; 433*34f9b3eeSRoland Mainz } 434*34f9b3eeSRoland Mainz oldc = *cp; 435*34f9b3eeSRoland Mainz } 436*34f9b3eeSRoland Mainz } 437*34f9b3eeSRoland Mainz } while (mbc(c = type[*cp++])); 438*34f9b3eeSRoland Mainz wasspace = 0; 439*34f9b3eeSRoland Mainz if(skip) 440*34f9b3eeSRoland Mainz { 441*34f9b3eeSRoland Mainz if(eol(c) && (cp > endbuff)) 442*34f9b3eeSRoland Mainz goto eob; 443*34f9b3eeSRoland Mainz err: 444*34f9b3eeSRoland Mainz skip = 0; 445*34f9b3eeSRoland Mainz state = 0; 446*34f9b3eeSRoland Mainz if(eline!=nlines && !(wp->mode & WC_QUIET)) 447*34f9b3eeSRoland Mainz eline = invalid(file, nlines); 448*34f9b3eeSRoland Mainz while(mbc(c) && ((c|WC_ERR) || (c&7)==0)) 449*34f9b3eeSRoland Mainz c=type[*cp++]; 450*34f9b3eeSRoland Mainz if(eol(c) && (cp > endbuff)) 451*34f9b3eeSRoland Mainz { 452*34f9b3eeSRoland Mainz c = WC_MB|WC_ERR; 453*34f9b3eeSRoland Mainz goto eob; 454*34f9b3eeSRoland Mainz } 455*34f9b3eeSRoland Mainz if(mbc(c)) 456*34f9b3eeSRoland Mainz goto mbyte; 457*34f9b3eeSRoland Mainz else if(c&WC_SP) 458*34f9b3eeSRoland Mainz goto spaces; 459*34f9b3eeSRoland Mainz } 460*34f9b3eeSRoland Mainz if(spc(c)) 461*34f9b3eeSRoland Mainz { 462*34f9b3eeSRoland Mainz nwords++; 463*34f9b3eeSRoland Mainz continue; 464*34f9b3eeSRoland Mainz } 465*34f9b3eeSRoland Mainz } 466*34f9b3eeSRoland Mainz /* skip over word characters */ 467*34f9b3eeSRoland Mainz while(!(c = type[*cp++])); 468*34f9b3eeSRoland Mainz if(mbc(c)) 469*34f9b3eeSRoland Mainz goto mbyte; 470*34f9b3eeSRoland Mainz nwords++; 471*34f9b3eeSRoland Mainz } 472*34f9b3eeSRoland Mainz eob: 473*34f9b3eeSRoland Mainz lineoff = cp-start; 474*34f9b3eeSRoland Mainz if((cp -= 2) >= buff) 475*34f9b3eeSRoland Mainz c = type[*cp]; 476*34f9b3eeSRoland Mainz else 477*34f9b3eeSRoland Mainz c = lasttype; 478*34f9b3eeSRoland Mainz lasttype = type[lastchar]; 479*34f9b3eeSRoland Mainz /* see if was in word */ 480*34f9b3eeSRoland Mainz if(!c && !lasttype) 481*34f9b3eeSRoland Mainz nwords--; 482*34f9b3eeSRoland Mainz } 483*34f9b3eeSRoland Mainz if ((wp->mode&WC_LONGEST) && ((endbuff + 1 - start) - adjust - (lastchar == '\n')) > longest) 484*34f9b3eeSRoland Mainz longest = (endbuff + 1 - start) - adjust - (lastchar == '\n'); 485*34f9b3eeSRoland Mainz wp->longest = longest; 486*34f9b3eeSRoland Mainz if (eol(lasttype)) 487*34f9b3eeSRoland Mainz nlines++; 488*34f9b3eeSRoland Mainz else if (!lasttype) 489*34f9b3eeSRoland Mainz nwords++; 490*34f9b3eeSRoland Mainz nchars -= adjust; 491*34f9b3eeSRoland Mainz } 492da2e3ebdSchin wp->chars = nchars; 493da2e3ebdSchin wp->words = nwords; 494da2e3ebdSchin wp->lines = nlines; 495*34f9b3eeSRoland Mainz return 0; 496da2e3ebdSchin } 497*34f9b3eeSRoland Mainz 498