/*********************************************************************** * * * This software is part of the ast package * * Copyright (c) 1992-2008 AT&T Intellectual Property * * and is licensed under the * * Common Public License, Version 1.0 * * by AT&T Intellectual Property * * * * A copy of the License is available at * * http://www.opensource.org/licenses/cpl1.0.txt * * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * * * * Information and Software Systems Research * * AT&T Research * * Florham Park NJ * * * * Glenn Fowler * * David Korn * * * ***********************************************************************/ #pragma prototyped /* * David Korn * AT&T Bell Laboratories * * library interface for word count */ #include #include #include #if _hdr_wchar && _hdr_wctype && _lib_iswctype #include #include #else #ifndef iswspace #define iswspace(x) isspace(x) #endif #endif #define endline(c) (((signed char)-1)<0?(c)<0:(c)==((char)-1)) #define mbok(p,n) (((n)<1)?0:mbwide()?((*ast.mb_towc)(NiL,(char*)(p),n)>=0):1) Wc_t *wc_init(int mode) { register int n; register int w; Wc_t* wp; if(!(wp = (Wc_t*)stakalloc(sizeof(Wc_t)))) return(0); wp->mode = mode; w = mode & WC_WORDS; for(n=(1<=0;) wp->space[n] = w ? !!isspace(n) : 0; wp->space['\n'] = -1; return(wp); } /* * compute the line, word, and character count for file */ int wc_count(Wc_t *wp, Sfio_t *fd, const char* file) { register signed char *space = wp->space; register unsigned char *cp; register Sfoff_t nchars; register Sfoff_t nwords; register Sfoff_t nlines; register Sfoff_t eline; register Sfoff_t longest; register ssize_t c; register unsigned char *endbuff; register int lasttype = 1; unsigned int lastchar; unsigned char *buff; wchar_t x; sfset(fd,SF_WRITE,1); nlines = nwords = nchars = 0; wp->longest = 0; if (wp->mode & (WC_LONGEST|WC_MBYTE)) { longest = 0; eline = -1; cp = buff = endbuff = 0; for (;;) { if (!mbok(cp, endbuff-cp)) { if (buff) sfread(fd, buff, cp-buff); if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, SF_LOCKR))) break; endbuff = (cp = buff) + sfvalue(fd); } nchars++; x = mbchar(cp); if (x == -1) { if (eline != nlines && !(wp->mode & WC_QUIET)) { error_info.file = (char*)file; error_info.line = eline = nlines; error(ERROR_SYSTEM|1, "invalid multibyte character"); error_info.file = 0; error_info.line = 0; } } else if (x == '\n') { if ((nchars - longest) > wp->longest) wp->longest = nchars - longest; longest = nchars; nlines++; lasttype = 1; } else if (iswspace(x)) lasttype = 1; else if (lasttype) { lasttype = 0; nwords++; } } } else { for (;;) { /* fill next buffer and check for end-of-file */ if (!(buff = (unsigned char*)sfreserve(fd, 0, 0)) || (c = sfvalue(fd)) <= 0) break; sfread(fd,(char*)(cp=buff),c); nchars += c; /* check to see whether first character terminates word */ if(c==1) { if(endline(lasttype)) nlines++; if((c = space[*cp]) && !lasttype) nwords++; lasttype = c; continue; } if(!lasttype && space[*cp]) nwords++; lastchar = cp[--c]; cp[c] = '\n'; endbuff = cp+c; c = lasttype; /* process each buffer */ for (;;) { /* process spaces and new-lines */ do if (endline(c)) { for (;;) { /* check for end of buffer */ if (cp > endbuff) goto eob; nlines++; if (*cp != '\n') break; cp++; } } while (c = space[*cp++]); /* skip over word characters */ while(!(c = space[*cp++])); nwords++; } eob: if((cp -= 2) >= buff) c = space[*cp]; else c = lasttype; lasttype = space[lastchar]; /* see if was in word */ if(!c && !lasttype) nwords--; } if(endline(lasttype)) nlines++; else if(!lasttype) nwords++; } wp->chars = nchars; wp->words = nwords; wp->lines = nlines; return(0); }