1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1992-2008 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * * 20 ***********************************************************************/ 21 #pragma prototyped 22 /* 23 * David Korn 24 * AT&T Bell Laboratories 25 * 26 * library interface for word count 27 */ 28 29 #include <cmd.h> 30 #include <wc.h> 31 #include <ctype.h> 32 33 #if _hdr_wchar && _hdr_wctype && _lib_iswctype 34 35 #include <wchar.h> 36 #include <wctype.h> 37 38 #else 39 40 #ifndef iswspace 41 #define iswspace(x) isspace(x) 42 #endif 43 44 #endif 45 46 #define endline(c) (((signed char)-1)<0?(c)<0:(c)==((char)-1)) 47 #define mbok(p,n) (((n)<1)?0:mbwide()?((*ast.mb_towc)(NiL,(char*)(p),n)>=0):1) 48 49 Wc_t *wc_init(int mode) 50 { 51 register int n; 52 register int w; 53 Wc_t* wp; 54 55 if(!(wp = (Wc_t*)stakalloc(sizeof(Wc_t)))) 56 return(0); 57 wp->mode = mode; 58 w = mode & WC_WORDS; 59 for(n=(1<<CHAR_BIT);--n >=0;) 60 wp->space[n] = w ? !!isspace(n) : 0; 61 wp->space['\n'] = -1; 62 return(wp); 63 } 64 65 /* 66 * compute the line, word, and character count for file <fd> 67 */ 68 int wc_count(Wc_t *wp, Sfio_t *fd, const char* file) 69 { 70 register signed char *space = wp->space; 71 register unsigned char *cp; 72 register Sfoff_t nchars; 73 register Sfoff_t nwords; 74 register Sfoff_t nlines; 75 register Sfoff_t eline; 76 register Sfoff_t longest; 77 register ssize_t c; 78 register unsigned char *endbuff; 79 register int lasttype = 1; 80 unsigned int lastchar; 81 unsigned char *buff; 82 wchar_t x; 83 84 sfset(fd,SF_WRITE,1); 85 nlines = nwords = nchars = 0; 86 wp->longest = 0; 87 if (wp->mode & (WC_LONGEST|WC_MBYTE)) 88 { 89 longest = 0; 90 eline = -1; 91 cp = buff = endbuff = 0; 92 for (;;) 93 { 94 if (!mbok(cp, endbuff-cp)) 95 { 96 if (buff) 97 sfread(fd, buff, cp-buff); 98 if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, SF_LOCKR))) 99 break; 100 endbuff = (cp = buff) + sfvalue(fd); 101 } 102 nchars++; 103 x = mbchar(cp); 104 if (x == -1) 105 { 106 if (eline != nlines && !(wp->mode & WC_QUIET)) 107 { 108 error_info.file = (char*)file; 109 error_info.line = eline = nlines; 110 error(ERROR_SYSTEM|1, "invalid multibyte character"); 111 error_info.file = 0; 112 error_info.line = 0; 113 } 114 } 115 else if (x == '\n') 116 { 117 if ((nchars - longest) > wp->longest) 118 wp->longest = nchars - longest; 119 longest = nchars; 120 nlines++; 121 lasttype = 1; 122 } 123 else if (iswspace(x)) 124 lasttype = 1; 125 else if (lasttype) 126 { 127 lasttype = 0; 128 nwords++; 129 } 130 } 131 } 132 else 133 { 134 for (;;) 135 { 136 /* fill next buffer and check for end-of-file */ 137 if (!(buff = (unsigned char*)sfreserve(fd, 0, 0)) || (c = sfvalue(fd)) <= 0) 138 break; 139 sfread(fd,(char*)(cp=buff),c); 140 nchars += c; 141 /* check to see whether first character terminates word */ 142 if(c==1) 143 { 144 if(endline(lasttype)) 145 nlines++; 146 if((c = space[*cp]) && !lasttype) 147 nwords++; 148 lasttype = c; 149 continue; 150 } 151 if(!lasttype && space[*cp]) 152 nwords++; 153 lastchar = cp[--c]; 154 cp[c] = '\n'; 155 endbuff = cp+c; 156 c = lasttype; 157 /* process each buffer */ 158 for (;;) 159 { 160 /* process spaces and new-lines */ 161 do if (endline(c)) 162 { 163 for (;;) 164 { 165 /* check for end of buffer */ 166 if (cp > endbuff) 167 goto eob; 168 nlines++; 169 if (*cp != '\n') 170 break; 171 cp++; 172 } 173 } while (c = space[*cp++]); 174 /* skip over word characters */ 175 while(!(c = space[*cp++])); 176 nwords++; 177 } 178 eob: 179 if((cp -= 2) >= buff) 180 c = space[*cp]; 181 else 182 c = lasttype; 183 lasttype = space[lastchar]; 184 /* see if was in word */ 185 if(!c && !lasttype) 186 nwords--; 187 } 188 if(endline(lasttype)) 189 nlines++; 190 else if(!lasttype) 191 nwords++; 192 } 193 wp->chars = nchars; 194 wp->words = nwords; 195 wp->lines = nlines; 196 return(0); 197 } 198