1da2e3ebdSchin /***********************************************************************
2da2e3ebdSchin * *
3da2e3ebdSchin * This software is part of the ast package *
4*3e14f97fSRoger A. Faulkner * Copyright (c) 1992-2010 AT&T Intellectual Property *
5da2e3ebdSchin * and is licensed under the *
6da2e3ebdSchin * Common Public License, Version 1.0 *
77c2fbfb3SApril Chin * by AT&T Intellectual Property *
8da2e3ebdSchin * *
9da2e3ebdSchin * A copy of the License is available at *
10da2e3ebdSchin * http://www.opensource.org/licenses/cpl1.0.txt *
11da2e3ebdSchin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12da2e3ebdSchin * *
13da2e3ebdSchin * Information and Software Systems Research *
14da2e3ebdSchin * AT&T Research *
15da2e3ebdSchin * Florham Park NJ *
16da2e3ebdSchin * *
17da2e3ebdSchin * Glenn Fowler <gsf@research.att.com> *
18da2e3ebdSchin * David Korn <dgk@research.att.com> *
19da2e3ebdSchin * *
20da2e3ebdSchin ***********************************************************************/
21da2e3ebdSchin #pragma prototyped
22da2e3ebdSchin /*
23da2e3ebdSchin * David Korn
24da2e3ebdSchin * AT&T Bell Laboratories
25da2e3ebdSchin *
26da2e3ebdSchin * library interface for word count
27da2e3ebdSchin */
28da2e3ebdSchin
29da2e3ebdSchin #include <cmd.h>
30da2e3ebdSchin #include <wc.h>
31da2e3ebdSchin #include <ctype.h>
32da2e3ebdSchin
337c2fbfb3SApril Chin #if _hdr_wchar && _hdr_wctype && _lib_iswctype
34da2e3ebdSchin
35da2e3ebdSchin #include <wchar.h>
36da2e3ebdSchin #include <wctype.h>
3734f9b3eeSRoland Mainz #include <lc.h>
38da2e3ebdSchin
39da2e3ebdSchin #else
40da2e3ebdSchin
41da2e3ebdSchin #ifndef iswspace
42da2e3ebdSchin #define iswspace(x) isspace(x)
43da2e3ebdSchin #endif
44da2e3ebdSchin
45da2e3ebdSchin #endif
46da2e3ebdSchin
4734f9b3eeSRoland Mainz #define WC_SP 0x08
4834f9b3eeSRoland Mainz #define WC_NL 0x10
4934f9b3eeSRoland Mainz #define WC_MB 0x20
5034f9b3eeSRoland Mainz #define WC_ERR 0x40
5134f9b3eeSRoland Mainz
5234f9b3eeSRoland Mainz #define eol(c) ((c)&WC_NL)
5334f9b3eeSRoland Mainz #define mbc(c) ((c)&WC_MB)
5434f9b3eeSRoland Mainz #define spc(c) ((c)&WC_SP)
55*3e14f97fSRoger A. Faulkner #define mb2wc(w,p,n) (*ast.mb_towc)(&w,(char*)p,n)
56da2e3ebdSchin
wc_init(int mode)57da2e3ebdSchin Wc_t* wc_init(int mode)
58da2e3ebdSchin {
59da2e3ebdSchin register int n;
60da2e3ebdSchin register int w;
61da2e3ebdSchin Wc_t* wp;
62da2e3ebdSchin
63da2e3ebdSchin if (!(wp = (Wc_t*)stakalloc(sizeof(Wc_t))))
6434f9b3eeSRoland Mainz return 0;
6534f9b3eeSRoland Mainz if (!mbwide())
6634f9b3eeSRoland Mainz wp->mb = 0;
6734f9b3eeSRoland Mainz #if _hdr_wchar && _hdr_wctype && _lib_iswctype
6834f9b3eeSRoland Mainz else if (!(mode & WC_NOUTF8) && (lcinfo(LC_CTYPE)->lc->flags & LC_utf8))
6934f9b3eeSRoland Mainz wp->mb = 1;
7034f9b3eeSRoland Mainz #endif
7134f9b3eeSRoland Mainz else
7234f9b3eeSRoland Mainz wp->mb = -1;
73da2e3ebdSchin w = mode & WC_WORDS;
74da2e3ebdSchin for (n = (1<<CHAR_BIT); --n >= 0;)
7534f9b3eeSRoland Mainz wp->type[n] = (w && isspace(n)) ? WC_SP : 0;
7634f9b3eeSRoland Mainz wp->type['\n'] = WC_SP|WC_NL;
7734f9b3eeSRoland Mainz if ((mode & (WC_MBYTE|WC_WORDS)) && wp->mb > 0)
7834f9b3eeSRoland Mainz {
7934f9b3eeSRoland Mainz for (n = 0; n < 64; n++)
8034f9b3eeSRoland Mainz {
8134f9b3eeSRoland Mainz wp->type[0x80+n] |= WC_MB;
8234f9b3eeSRoland Mainz if (n<32)
8334f9b3eeSRoland Mainz wp->type[0xc0+n] |= WC_MB+1;
8434f9b3eeSRoland Mainz else if (n<48)
8534f9b3eeSRoland Mainz wp->type[0xc0+n] |= WC_MB+2;
8634f9b3eeSRoland Mainz else if (n<56)
8734f9b3eeSRoland Mainz wp->type[0xc0+n] |= WC_MB+3;
8834f9b3eeSRoland Mainz else if (n<60)
8934f9b3eeSRoland Mainz wp->type[0xc0+n] |= WC_MB+4;
9034f9b3eeSRoland Mainz else if (n<62)
9134f9b3eeSRoland Mainz wp->type[0xc0+n] |= WC_MB+5;
9234f9b3eeSRoland Mainz }
9334f9b3eeSRoland Mainz wp->type[0xc0] = WC_MB|WC_ERR;
9434f9b3eeSRoland Mainz wp->type[0xc1] = WC_MB|WC_ERR;
9534f9b3eeSRoland Mainz wp->type[0xfe] = WC_MB|WC_ERR;
9634f9b3eeSRoland Mainz wp->type[0xff] = WC_MB|WC_ERR;
9734f9b3eeSRoland Mainz }
9834f9b3eeSRoland Mainz wp->mode = mode;
9934f9b3eeSRoland Mainz return wp;
10034f9b3eeSRoland Mainz }
10134f9b3eeSRoland Mainz
invalid(const char * file,int nlines)10234f9b3eeSRoland Mainz static int invalid(const char *file, int nlines)
10334f9b3eeSRoland Mainz {
10434f9b3eeSRoland Mainz error_info.file = (char*)file;
10534f9b3eeSRoland Mainz error_info.line = nlines;
10634f9b3eeSRoland Mainz error(ERROR_SYSTEM|1, "invalid multibyte character");
10734f9b3eeSRoland Mainz error_info.file = 0;
10834f9b3eeSRoland Mainz error_info.line = 0;
10934f9b3eeSRoland Mainz return nlines;
11034f9b3eeSRoland Mainz }
11134f9b3eeSRoland Mainz
11234f9b3eeSRoland Mainz /*
11334f9b3eeSRoland Mainz * handle utf space characters
11434f9b3eeSRoland Mainz */
11534f9b3eeSRoland Mainz
chkstate(int state,register unsigned int c)11634f9b3eeSRoland Mainz static int chkstate(int state, register unsigned int c)
11734f9b3eeSRoland Mainz {
11834f9b3eeSRoland Mainz switch(state)
11934f9b3eeSRoland Mainz {
12034f9b3eeSRoland Mainz case 1:
12134f9b3eeSRoland Mainz state = (c==0x9a?4:0);
12234f9b3eeSRoland Mainz break;
12334f9b3eeSRoland Mainz case 2:
12434f9b3eeSRoland Mainz state = ((c==0x80||c==0x81)?6+(c&1):0);
12534f9b3eeSRoland Mainz break;
12634f9b3eeSRoland Mainz case 3:
12734f9b3eeSRoland Mainz state = (c==0x80?5:0);
12834f9b3eeSRoland Mainz break;
12934f9b3eeSRoland Mainz case 4:
13034f9b3eeSRoland Mainz state = (c==0x80?10:0);
13134f9b3eeSRoland Mainz break;
13234f9b3eeSRoland Mainz case 5:
13334f9b3eeSRoland Mainz state = (c==0x80?10:0);
13434f9b3eeSRoland Mainz break;
13534f9b3eeSRoland Mainz case 6:
13634f9b3eeSRoland Mainz state = 0;
13734f9b3eeSRoland Mainz if(c==0xa0 || c==0xa1)
13834f9b3eeSRoland Mainz return(10);
13934f9b3eeSRoland Mainz else if((c&0xf0)== 0x80)
14034f9b3eeSRoland Mainz {
14134f9b3eeSRoland Mainz if((c&=0xf)==7)
14234f9b3eeSRoland Mainz return(iswspace(0x2007)?10:0);
14334f9b3eeSRoland Mainz if(c<=0xb)
14434f9b3eeSRoland Mainz return(10);
14534f9b3eeSRoland Mainz }
14634f9b3eeSRoland Mainz else if(c==0xaf && iswspace(0x202f))
14734f9b3eeSRoland Mainz return(10);
14834f9b3eeSRoland Mainz break;
14934f9b3eeSRoland Mainz case 7:
15034f9b3eeSRoland Mainz state = (c==0x9f?10:0);
15134f9b3eeSRoland Mainz break;
15234f9b3eeSRoland Mainz case 8:
15334f9b3eeSRoland Mainz return (iswspace(c)?10:0);
15434f9b3eeSRoland Mainz }
15534f9b3eeSRoland Mainz return state;
156da2e3ebdSchin }
157da2e3ebdSchin
158da2e3ebdSchin /*
159da2e3ebdSchin * compute the line, word, and character count for file <fd>
160da2e3ebdSchin */
16134f9b3eeSRoland Mainz
wc_count(Wc_t * wp,Sfio_t * fd,const char * file)162da2e3ebdSchin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file)
163da2e3ebdSchin {
16434f9b3eeSRoland Mainz register char* type = wp->type;
165da2e3ebdSchin register unsigned char* cp;
166*3e14f97fSRoger A. Faulkner register Sfoff_t nbytes;
167da2e3ebdSchin register Sfoff_t nchars;
168da2e3ebdSchin register Sfoff_t nwords;
169da2e3ebdSchin register Sfoff_t nlines;
17034f9b3eeSRoland Mainz register Sfoff_t eline = -1;
17134f9b3eeSRoland Mainz register Sfoff_t longest = 0;
172da2e3ebdSchin register ssize_t c;
173da2e3ebdSchin register unsigned char* endbuff;
17434f9b3eeSRoland Mainz register int lasttype = WC_SP;
175da2e3ebdSchin unsigned int lastchar;
17634f9b3eeSRoland Mainz ssize_t n;
17734f9b3eeSRoland Mainz ssize_t o;
178da2e3ebdSchin unsigned char* buff;
179da2e3ebdSchin wchar_t x;
18034f9b3eeSRoland Mainz unsigned char side[32];
181da2e3ebdSchin
182da2e3ebdSchin sfset(fd,SF_WRITE,1);
183*3e14f97fSRoger A. Faulkner nlines = nwords = nchars = nbytes = 0;
184da2e3ebdSchin wp->longest = 0;
18534f9b3eeSRoland Mainz if (wp->mb < 0 && (wp->mode & (WC_MBYTE|WC_WORDS)))
186da2e3ebdSchin {
187da2e3ebdSchin cp = buff = endbuff = 0;
188da2e3ebdSchin for (;;)
189da2e3ebdSchin {
190*3e14f97fSRoger A. Faulkner if (cp >= endbuff || (n = mb2wc(x, cp, endbuff-cp)) < 0)
19134f9b3eeSRoland Mainz {
19234f9b3eeSRoland Mainz if ((o = endbuff-cp) < sizeof(side))
193da2e3ebdSchin {
194da2e3ebdSchin if (buff)
195da2e3ebdSchin {
19634f9b3eeSRoland Mainz if (o)
19734f9b3eeSRoland Mainz memcpy(side, cp, o);
19834f9b3eeSRoland Mainz mbinit();
199da2e3ebdSchin }
20034f9b3eeSRoland Mainz else
20134f9b3eeSRoland Mainz o = 0;
20234f9b3eeSRoland Mainz cp = side + o;
20334f9b3eeSRoland Mainz if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) || (n = sfvalue(fd)) <= 0)
204da2e3ebdSchin {
205da2e3ebdSchin if ((nchars - longest) > wp->longest)
206da2e3ebdSchin wp->longest = nchars - longest;
20734f9b3eeSRoland Mainz break;
20834f9b3eeSRoland Mainz }
209*3e14f97fSRoger A. Faulkner nbytes += n;
21034f9b3eeSRoland Mainz if ((c = sizeof(side) - o) > n)
21134f9b3eeSRoland Mainz c = n;
21234f9b3eeSRoland Mainz if (c)
21334f9b3eeSRoland Mainz memcpy(cp, buff, c);
21434f9b3eeSRoland Mainz endbuff = buff + n;
21534f9b3eeSRoland Mainz cp = side;
21634f9b3eeSRoland Mainz x = mbchar(cp);
21734f9b3eeSRoland Mainz if ((cp-side) < o)
21834f9b3eeSRoland Mainz {
21934f9b3eeSRoland Mainz cp = buff;
22034f9b3eeSRoland Mainz nchars += (cp-side) - 1;
22134f9b3eeSRoland Mainz }
22234f9b3eeSRoland Mainz else
22334f9b3eeSRoland Mainz cp = buff + (cp-side) - o;
22434f9b3eeSRoland Mainz }
22534f9b3eeSRoland Mainz else
22634f9b3eeSRoland Mainz {
22734f9b3eeSRoland Mainz cp++;
22834f9b3eeSRoland Mainz x = -1;
22934f9b3eeSRoland Mainz }
23034f9b3eeSRoland Mainz if (x == -1 && eline != nlines && !(wp->mode & WC_QUIET))
23134f9b3eeSRoland Mainz eline = invalid(file, nlines);
23234f9b3eeSRoland Mainz }
23334f9b3eeSRoland Mainz else
23434f9b3eeSRoland Mainz cp += n ? n : 1;
23534f9b3eeSRoland Mainz if (x == '\n')
23634f9b3eeSRoland Mainz {
23734f9b3eeSRoland Mainz if ((nchars - longest) > wp->longest)
23834f9b3eeSRoland Mainz wp->longest = nchars - longest;
23934f9b3eeSRoland Mainz longest = nchars + 1;
240da2e3ebdSchin nlines++;
241da2e3ebdSchin lasttype = 1;
242da2e3ebdSchin }
243da2e3ebdSchin else if (iswspace(x))
244da2e3ebdSchin lasttype = 1;
245da2e3ebdSchin else if (lasttype)
246da2e3ebdSchin {
247da2e3ebdSchin lasttype = 0;
248da2e3ebdSchin nwords++;
249da2e3ebdSchin }
25034f9b3eeSRoland Mainz nchars++;
25134f9b3eeSRoland Mainz }
252*3e14f97fSRoger A. Faulkner if (!(wp->mode & WC_MBYTE))
253*3e14f97fSRoger A. Faulkner nchars = nbytes;
25434f9b3eeSRoland Mainz }
25534f9b3eeSRoland Mainz else if (!wp->mb && !(wp->mode & WC_LONGEST) || wp->mb > 0 && !(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
25634f9b3eeSRoland Mainz {
25734f9b3eeSRoland Mainz if (!(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
25834f9b3eeSRoland Mainz {
25934f9b3eeSRoland Mainz while ((cp = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
26034f9b3eeSRoland Mainz {
26134f9b3eeSRoland Mainz nchars += c;
26234f9b3eeSRoland Mainz endbuff = cp + c;
26334f9b3eeSRoland Mainz if (*--endbuff == '\n')
26434f9b3eeSRoland Mainz nlines++;
26534f9b3eeSRoland Mainz else
26634f9b3eeSRoland Mainz *endbuff = '\n';
26734f9b3eeSRoland Mainz for (;;)
26834f9b3eeSRoland Mainz if (*cp++ == '\n')
26934f9b3eeSRoland Mainz {
27034f9b3eeSRoland Mainz if (cp > endbuff)
27134f9b3eeSRoland Mainz break;
27234f9b3eeSRoland Mainz nlines++;
27334f9b3eeSRoland Mainz }
274da2e3ebdSchin }
275da2e3ebdSchin }
276da2e3ebdSchin else
277da2e3ebdSchin {
27834f9b3eeSRoland Mainz while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
279da2e3ebdSchin {
280da2e3ebdSchin nchars += c;
281da2e3ebdSchin /* check to see whether first character terminates word */
282da2e3ebdSchin if (c==1)
283da2e3ebdSchin {
28434f9b3eeSRoland Mainz if (eol(lasttype))
285da2e3ebdSchin nlines++;
28634f9b3eeSRoland Mainz if ((c = type[*cp]) && !lasttype)
287da2e3ebdSchin nwords++;
288da2e3ebdSchin lasttype = c;
289da2e3ebdSchin continue;
290da2e3ebdSchin }
29134f9b3eeSRoland Mainz if (!lasttype && type[*cp])
292da2e3ebdSchin nwords++;
293da2e3ebdSchin lastchar = cp[--c];
29434f9b3eeSRoland Mainz *(endbuff = cp+c) = '\n';
295da2e3ebdSchin c = lasttype;
296da2e3ebdSchin /* process each buffer */
297da2e3ebdSchin for (;;)
298da2e3ebdSchin {
299da2e3ebdSchin /* process spaces and new-lines */
30034f9b3eeSRoland Mainz do
301da2e3ebdSchin {
30234f9b3eeSRoland Mainz if (eol(c))
303da2e3ebdSchin for (;;)
304da2e3ebdSchin {
305da2e3ebdSchin /* check for end of buffer */
306da2e3ebdSchin if (cp > endbuff)
30734f9b3eeSRoland Mainz goto beob;
308da2e3ebdSchin nlines++;
309da2e3ebdSchin if (*cp != '\n')
310da2e3ebdSchin break;
311da2e3ebdSchin cp++;
312da2e3ebdSchin }
31334f9b3eeSRoland Mainz } while (c = type[*cp++]);
314da2e3ebdSchin /* skip over word characters */
31534f9b3eeSRoland Mainz while (!(c = type[*cp++]));
316da2e3ebdSchin nwords++;
317da2e3ebdSchin }
31834f9b3eeSRoland Mainz beob:
319da2e3ebdSchin if ((cp -= 2) >= buff)
32034f9b3eeSRoland Mainz c = type[*cp];
321da2e3ebdSchin else
322da2e3ebdSchin c = lasttype;
32334f9b3eeSRoland Mainz lasttype = type[lastchar];
324da2e3ebdSchin /* see if was in word */
325da2e3ebdSchin if (!c && !lasttype)
326da2e3ebdSchin nwords--;
327da2e3ebdSchin }
32834f9b3eeSRoland Mainz if (eol(lasttype))
329da2e3ebdSchin nlines++;
330da2e3ebdSchin else if (!lasttype)
331da2e3ebdSchin nwords++;
332da2e3ebdSchin }
33334f9b3eeSRoland Mainz }
33434f9b3eeSRoland Mainz else
33534f9b3eeSRoland Mainz {
33634f9b3eeSRoland Mainz int lineoff=0;
33734f9b3eeSRoland Mainz int skip=0;
33834f9b3eeSRoland Mainz int adjust=0;
33934f9b3eeSRoland Mainz int state=0;
34034f9b3eeSRoland Mainz int oldc;
34134f9b3eeSRoland Mainz int xspace;
34234f9b3eeSRoland Mainz int wasspace = 1;
34334f9b3eeSRoland Mainz unsigned char* start;
34434f9b3eeSRoland Mainz
34534f9b3eeSRoland Mainz lastchar = 0;
34634f9b3eeSRoland Mainz start = (endbuff = side) + 1;
34734f9b3eeSRoland Mainz xspace = iswspace(0xa0) || iswspace(0x85);
34834f9b3eeSRoland Mainz while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
34934f9b3eeSRoland Mainz {
350*3e14f97fSRoger A. Faulkner nbytes += c;
35134f9b3eeSRoland Mainz nchars += c;
35234f9b3eeSRoland Mainz start = cp-lineoff;
35334f9b3eeSRoland Mainz /* check to see whether first character terminates word */
35434f9b3eeSRoland Mainz if(c==1)
35534f9b3eeSRoland Mainz {
35634f9b3eeSRoland Mainz if(eol(lasttype))
35734f9b3eeSRoland Mainz nlines++;
35834f9b3eeSRoland Mainz if((c = type[*cp]) && !lasttype)
35934f9b3eeSRoland Mainz nwords++;
36034f9b3eeSRoland Mainz lasttype = c;
36134f9b3eeSRoland Mainz endbuff = start;
36234f9b3eeSRoland Mainz continue;
36334f9b3eeSRoland Mainz }
36434f9b3eeSRoland Mainz lastchar = cp[--c];
36534f9b3eeSRoland Mainz endbuff = cp+c;
36634f9b3eeSRoland Mainz cp[c] = '\n';
36734f9b3eeSRoland Mainz if(mbc(lasttype))
36834f9b3eeSRoland Mainz {
36934f9b3eeSRoland Mainz c = lasttype;
37034f9b3eeSRoland Mainz goto mbyte;
37134f9b3eeSRoland Mainz }
37234f9b3eeSRoland Mainz if(!lasttype && spc(type[*cp]))
37334f9b3eeSRoland Mainz nwords++;
37434f9b3eeSRoland Mainz c = lasttype;
37534f9b3eeSRoland Mainz /* process each buffer */
37634f9b3eeSRoland Mainz for (;;)
37734f9b3eeSRoland Mainz {
37834f9b3eeSRoland Mainz /* process spaces and new-lines */
37934f9b3eeSRoland Mainz spaces:
38034f9b3eeSRoland Mainz do
38134f9b3eeSRoland Mainz {
38234f9b3eeSRoland Mainz if (eol(c))
38334f9b3eeSRoland Mainz {
38434f9b3eeSRoland Mainz /* check for end of buffer */
38534f9b3eeSRoland Mainz if (cp > endbuff)
38634f9b3eeSRoland Mainz goto eob;
38734f9b3eeSRoland Mainz if(wp->mode&WC_LONGEST)
38834f9b3eeSRoland Mainz {
38934f9b3eeSRoland Mainz if((cp-start)-adjust > longest)
39034f9b3eeSRoland Mainz longest = (cp-start)-adjust-1;
39134f9b3eeSRoland Mainz start = cp;
39234f9b3eeSRoland Mainz }
39334f9b3eeSRoland Mainz nlines++;
39434f9b3eeSRoland Mainz nchars -= adjust;
39534f9b3eeSRoland Mainz adjust = 0;
39634f9b3eeSRoland Mainz }
39734f9b3eeSRoland Mainz } while (spc(c = type[*cp++]));
39834f9b3eeSRoland Mainz wasspace=1;
39934f9b3eeSRoland Mainz if(mbc(c))
40034f9b3eeSRoland Mainz {
40134f9b3eeSRoland Mainz mbyte:
40234f9b3eeSRoland Mainz do
40334f9b3eeSRoland Mainz {
40434f9b3eeSRoland Mainz if(c&WC_ERR)
40534f9b3eeSRoland Mainz goto err;
40634f9b3eeSRoland Mainz if(skip && (c&7))
40734f9b3eeSRoland Mainz break;
40834f9b3eeSRoland Mainz if(!skip)
40934f9b3eeSRoland Mainz {
41034f9b3eeSRoland Mainz if(!(c&7))
41134f9b3eeSRoland Mainz {
41234f9b3eeSRoland Mainz skip=1;
41334f9b3eeSRoland Mainz break;
41434f9b3eeSRoland Mainz }
41534f9b3eeSRoland Mainz skip = (c&7);
41634f9b3eeSRoland Mainz adjust += skip;
41734f9b3eeSRoland Mainz state = 0;
41834f9b3eeSRoland Mainz if(skip==2 && (cp[-1]&0xc)==0 && (state=(cp[-1]&0x3)))
41934f9b3eeSRoland Mainz oldc = *cp;
42034f9b3eeSRoland Mainz else if(xspace && cp[-1]==0xc2)
42134f9b3eeSRoland Mainz {
42234f9b3eeSRoland Mainz state = 8;
42334f9b3eeSRoland Mainz oldc = *cp;
42434f9b3eeSRoland Mainz }
42534f9b3eeSRoland Mainz }
42634f9b3eeSRoland Mainz else
42734f9b3eeSRoland Mainz {
42834f9b3eeSRoland Mainz skip--;
42934f9b3eeSRoland Mainz if(state && (state=chkstate(state,oldc)))
43034f9b3eeSRoland Mainz {
43134f9b3eeSRoland Mainz if(state==10)
43234f9b3eeSRoland Mainz {
43334f9b3eeSRoland Mainz if(!wasspace)
43434f9b3eeSRoland Mainz nwords++;
43534f9b3eeSRoland Mainz wasspace = 1;
43634f9b3eeSRoland Mainz state=0;
43734f9b3eeSRoland Mainz goto spaces;
43834f9b3eeSRoland Mainz }
43934f9b3eeSRoland Mainz oldc = *cp;
44034f9b3eeSRoland Mainz }
44134f9b3eeSRoland Mainz }
44234f9b3eeSRoland Mainz } while (mbc(c = type[*cp++]));
44334f9b3eeSRoland Mainz wasspace = 0;
44434f9b3eeSRoland Mainz if(skip)
44534f9b3eeSRoland Mainz {
44634f9b3eeSRoland Mainz if(eol(c) && (cp > endbuff))
44734f9b3eeSRoland Mainz goto eob;
44834f9b3eeSRoland Mainz err:
44934f9b3eeSRoland Mainz skip = 0;
45034f9b3eeSRoland Mainz state = 0;
45134f9b3eeSRoland Mainz if(eline!=nlines && !(wp->mode & WC_QUIET))
45234f9b3eeSRoland Mainz eline = invalid(file, nlines);
45334f9b3eeSRoland Mainz while(mbc(c) && ((c|WC_ERR) || (c&7)==0))
45434f9b3eeSRoland Mainz c=type[*cp++];
45534f9b3eeSRoland Mainz if(eol(c) && (cp > endbuff))
45634f9b3eeSRoland Mainz {
45734f9b3eeSRoland Mainz c = WC_MB|WC_ERR;
45834f9b3eeSRoland Mainz goto eob;
45934f9b3eeSRoland Mainz }
46034f9b3eeSRoland Mainz if(mbc(c))
46134f9b3eeSRoland Mainz goto mbyte;
46234f9b3eeSRoland Mainz else if(c&WC_SP)
46334f9b3eeSRoland Mainz goto spaces;
46434f9b3eeSRoland Mainz }
46534f9b3eeSRoland Mainz if(spc(c))
46634f9b3eeSRoland Mainz {
46734f9b3eeSRoland Mainz nwords++;
46834f9b3eeSRoland Mainz continue;
46934f9b3eeSRoland Mainz }
47034f9b3eeSRoland Mainz }
47134f9b3eeSRoland Mainz /* skip over word characters */
47234f9b3eeSRoland Mainz while(!(c = type[*cp++]));
47334f9b3eeSRoland Mainz if(mbc(c))
47434f9b3eeSRoland Mainz goto mbyte;
47534f9b3eeSRoland Mainz nwords++;
47634f9b3eeSRoland Mainz }
47734f9b3eeSRoland Mainz eob:
47834f9b3eeSRoland Mainz lineoff = cp-start;
47934f9b3eeSRoland Mainz if((cp -= 2) >= buff)
48034f9b3eeSRoland Mainz c = type[*cp];
48134f9b3eeSRoland Mainz else
48234f9b3eeSRoland Mainz c = lasttype;
48334f9b3eeSRoland Mainz lasttype = type[lastchar];
48434f9b3eeSRoland Mainz /* see if was in word */
48534f9b3eeSRoland Mainz if(!c && !lasttype)
48634f9b3eeSRoland Mainz nwords--;
48734f9b3eeSRoland Mainz }
48834f9b3eeSRoland Mainz if ((wp->mode&WC_LONGEST) && ((endbuff + 1 - start) - adjust - (lastchar == '\n')) > longest)
48934f9b3eeSRoland Mainz longest = (endbuff + 1 - start) - adjust - (lastchar == '\n');
49034f9b3eeSRoland Mainz wp->longest = longest;
49134f9b3eeSRoland Mainz if (eol(lasttype))
49234f9b3eeSRoland Mainz nlines++;
49334f9b3eeSRoland Mainz else if (!lasttype)
49434f9b3eeSRoland Mainz nwords++;
495*3e14f97fSRoger A. Faulkner if (wp->mode & WC_MBYTE)
49634f9b3eeSRoland Mainz nchars -= adjust;
497*3e14f97fSRoger A. Faulkner else
498*3e14f97fSRoger A. Faulkner nchars = nbytes;
49934f9b3eeSRoland Mainz }
500da2e3ebdSchin wp->chars = nchars;
501da2e3ebdSchin wp->words = nwords;
502da2e3ebdSchin wp->lines = nlines;
50334f9b3eeSRoland Mainz return 0;
504da2e3ebdSchin }
50534f9b3eeSRoland Mainz
506