1da2e3ebdSchin /*********************************************************************** 2da2e3ebdSchin * * 3da2e3ebdSchin * This software is part of the ast package * 4*7c2fbfb3SApril Chin * Copyright (c) 1992-2008 AT&T Intellectual Property * 5da2e3ebdSchin * and is licensed under the * 6da2e3ebdSchin * Common Public License, Version 1.0 * 7*7c2fbfb3SApril Chin * by AT&T Intellectual Property * 8da2e3ebdSchin * * 9da2e3ebdSchin * A copy of the License is available at * 10da2e3ebdSchin * http://www.opensource.org/licenses/cpl1.0.txt * 11da2e3ebdSchin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12da2e3ebdSchin * * 13da2e3ebdSchin * Information and Software Systems Research * 14da2e3ebdSchin * AT&T Research * 15da2e3ebdSchin * Florham Park NJ * 16da2e3ebdSchin * * 17da2e3ebdSchin * Glenn Fowler <gsf@research.att.com> * 18da2e3ebdSchin * David Korn <dgk@research.att.com> * 19da2e3ebdSchin * * 20da2e3ebdSchin ***********************************************************************/ 21da2e3ebdSchin #pragma prototyped 22da2e3ebdSchin /* 23da2e3ebdSchin * David Korn 24da2e3ebdSchin * AT&T Bell Laboratories 25da2e3ebdSchin * 26da2e3ebdSchin * library interface for word count 27da2e3ebdSchin */ 28da2e3ebdSchin 29da2e3ebdSchin #include <cmd.h> 30da2e3ebdSchin #include <wc.h> 31da2e3ebdSchin #include <ctype.h> 32da2e3ebdSchin 33*7c2fbfb3SApril Chin #if _hdr_wchar && _hdr_wctype && _lib_iswctype 34da2e3ebdSchin 35da2e3ebdSchin #include <wchar.h> 36da2e3ebdSchin #include <wctype.h> 37da2e3ebdSchin 38da2e3ebdSchin #else 39da2e3ebdSchin 40da2e3ebdSchin #ifndef iswspace 41da2e3ebdSchin #define iswspace(x) isspace(x) 42da2e3ebdSchin #endif 43da2e3ebdSchin 44da2e3ebdSchin #endif 45da2e3ebdSchin 46da2e3ebdSchin #define endline(c) (((signed char)-1)<0?(c)<0:(c)==((char)-1)) 47da2e3ebdSchin #define mbok(p,n) (((n)<1)?0:mbwide()?((*ast.mb_towc)(NiL,(char*)(p),n)>=0):1) 48da2e3ebdSchin 49da2e3ebdSchin Wc_t *wc_init(int mode) 50da2e3ebdSchin { 51da2e3ebdSchin register int n; 52da2e3ebdSchin register int w; 53da2e3ebdSchin Wc_t* wp; 54da2e3ebdSchin 55da2e3ebdSchin if(!(wp = (Wc_t*)stakalloc(sizeof(Wc_t)))) 56da2e3ebdSchin return(0); 57da2e3ebdSchin wp->mode = mode; 58da2e3ebdSchin w = mode & WC_WORDS; 59da2e3ebdSchin for(n=(1<<CHAR_BIT);--n >=0;) 60da2e3ebdSchin wp->space[n] = w ? !!isspace(n) : 0; 61da2e3ebdSchin wp->space['\n'] = -1; 62da2e3ebdSchin return(wp); 63da2e3ebdSchin } 64da2e3ebdSchin 65da2e3ebdSchin /* 66da2e3ebdSchin * compute the line, word, and character count for file <fd> 67da2e3ebdSchin */ 68da2e3ebdSchin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file) 69da2e3ebdSchin { 70da2e3ebdSchin register signed char *space = wp->space; 71da2e3ebdSchin register unsigned char *cp; 72da2e3ebdSchin register Sfoff_t nchars; 73da2e3ebdSchin register Sfoff_t nwords; 74da2e3ebdSchin register Sfoff_t nlines; 75da2e3ebdSchin register Sfoff_t eline; 76da2e3ebdSchin register Sfoff_t longest; 77da2e3ebdSchin register ssize_t c; 78da2e3ebdSchin register unsigned char *endbuff; 79da2e3ebdSchin register int lasttype = 1; 80da2e3ebdSchin unsigned int lastchar; 81da2e3ebdSchin unsigned char *buff; 82da2e3ebdSchin wchar_t x; 83da2e3ebdSchin 84da2e3ebdSchin sfset(fd,SF_WRITE,1); 85da2e3ebdSchin nlines = nwords = nchars = 0; 86da2e3ebdSchin wp->longest = 0; 87da2e3ebdSchin if (wp->mode & (WC_LONGEST|WC_MBYTE)) 88da2e3ebdSchin { 89da2e3ebdSchin longest = 0; 90da2e3ebdSchin eline = -1; 91da2e3ebdSchin cp = buff = endbuff = 0; 92da2e3ebdSchin for (;;) 93da2e3ebdSchin { 94da2e3ebdSchin if (!mbok(cp, endbuff-cp)) 95da2e3ebdSchin { 96da2e3ebdSchin if (buff) 97da2e3ebdSchin sfread(fd, buff, cp-buff); 98da2e3ebdSchin if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, SF_LOCKR))) 99da2e3ebdSchin break; 100da2e3ebdSchin endbuff = (cp = buff) + sfvalue(fd); 101da2e3ebdSchin } 102da2e3ebdSchin nchars++; 103da2e3ebdSchin x = mbchar(cp); 104da2e3ebdSchin if (x == -1) 105da2e3ebdSchin { 106da2e3ebdSchin if (eline != nlines && !(wp->mode & WC_QUIET)) 107da2e3ebdSchin { 108da2e3ebdSchin error_info.file = (char*)file; 109da2e3ebdSchin error_info.line = eline = nlines; 110da2e3ebdSchin error(ERROR_SYSTEM|1, "invalid multibyte character"); 111da2e3ebdSchin error_info.file = 0; 112da2e3ebdSchin error_info.line = 0; 113da2e3ebdSchin } 114da2e3ebdSchin } 115da2e3ebdSchin else if (x == '\n') 116da2e3ebdSchin { 117da2e3ebdSchin if ((nchars - longest) > wp->longest) 118da2e3ebdSchin wp->longest = nchars - longest; 119da2e3ebdSchin longest = nchars; 120da2e3ebdSchin nlines++; 121da2e3ebdSchin lasttype = 1; 122da2e3ebdSchin } 123da2e3ebdSchin else if (iswspace(x)) 124da2e3ebdSchin lasttype = 1; 125da2e3ebdSchin else if (lasttype) 126da2e3ebdSchin { 127da2e3ebdSchin lasttype = 0; 128da2e3ebdSchin nwords++; 129da2e3ebdSchin } 130da2e3ebdSchin } 131da2e3ebdSchin } 132da2e3ebdSchin else 133da2e3ebdSchin { 134da2e3ebdSchin for (;;) 135da2e3ebdSchin { 136da2e3ebdSchin /* fill next buffer and check for end-of-file */ 137da2e3ebdSchin if (!(buff = (unsigned char*)sfreserve(fd, 0, 0)) || (c = sfvalue(fd)) <= 0) 138da2e3ebdSchin break; 139da2e3ebdSchin sfread(fd,(char*)(cp=buff),c); 140da2e3ebdSchin nchars += c; 141da2e3ebdSchin /* check to see whether first character terminates word */ 142da2e3ebdSchin if(c==1) 143da2e3ebdSchin { 144da2e3ebdSchin if(endline(lasttype)) 145da2e3ebdSchin nlines++; 146da2e3ebdSchin if((c = space[*cp]) && !lasttype) 147da2e3ebdSchin nwords++; 148da2e3ebdSchin lasttype = c; 149da2e3ebdSchin continue; 150da2e3ebdSchin } 151da2e3ebdSchin if(!lasttype && space[*cp]) 152da2e3ebdSchin nwords++; 153da2e3ebdSchin lastchar = cp[--c]; 154da2e3ebdSchin cp[c] = '\n'; 155da2e3ebdSchin endbuff = cp+c; 156da2e3ebdSchin c = lasttype; 157da2e3ebdSchin /* process each buffer */ 158da2e3ebdSchin for (;;) 159da2e3ebdSchin { 160da2e3ebdSchin /* process spaces and new-lines */ 161da2e3ebdSchin do if (endline(c)) 162da2e3ebdSchin { 163da2e3ebdSchin for (;;) 164da2e3ebdSchin { 165da2e3ebdSchin /* check for end of buffer */ 166da2e3ebdSchin if (cp > endbuff) 167da2e3ebdSchin goto eob; 168da2e3ebdSchin nlines++; 169da2e3ebdSchin if (*cp != '\n') 170da2e3ebdSchin break; 171da2e3ebdSchin cp++; 172da2e3ebdSchin } 173da2e3ebdSchin } while (c = space[*cp++]); 174da2e3ebdSchin /* skip over word characters */ 175da2e3ebdSchin while(!(c = space[*cp++])); 176da2e3ebdSchin nwords++; 177da2e3ebdSchin } 178da2e3ebdSchin eob: 179da2e3ebdSchin if((cp -= 2) >= buff) 180da2e3ebdSchin c = space[*cp]; 181da2e3ebdSchin else 182da2e3ebdSchin c = lasttype; 183da2e3ebdSchin lasttype = space[lastchar]; 184da2e3ebdSchin /* see if was in word */ 185da2e3ebdSchin if(!c && !lasttype) 186da2e3ebdSchin nwords--; 187da2e3ebdSchin } 188da2e3ebdSchin if(endline(lasttype)) 189da2e3ebdSchin nlines++; 190da2e3ebdSchin else if(!lasttype) 191da2e3ebdSchin nwords++; 192da2e3ebdSchin } 193da2e3ebdSchin wp->chars = nchars; 194da2e3ebdSchin wp->words = nwords; 195da2e3ebdSchin wp->lines = nlines; 196da2e3ebdSchin return(0); 197da2e3ebdSchin } 198