xref: /titanic_51/usr/src/lib/libcmd/common/wclib.c (revision 3e14f97f673e8a630f076077de35afdd43dc1587)
1da2e3ebdSchin /***********************************************************************
2da2e3ebdSchin *                                                                      *
3da2e3ebdSchin *               This software is part of the ast package               *
4*3e14f97fSRoger A. Faulkner *          Copyright (c) 1992-2010 AT&T Intellectual Property          *
5da2e3ebdSchin *                      and is licensed under the                       *
6da2e3ebdSchin *                  Common Public License, Version 1.0                  *
77c2fbfb3SApril Chin *                    by AT&T Intellectual Property                     *
8da2e3ebdSchin *                                                                      *
9da2e3ebdSchin *                A copy of the License is available at                 *
10da2e3ebdSchin *            http://www.opensource.org/licenses/cpl1.0.txt             *
11da2e3ebdSchin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12da2e3ebdSchin *                                                                      *
13da2e3ebdSchin *              Information and Software Systems Research               *
14da2e3ebdSchin *                            AT&T Research                             *
15da2e3ebdSchin *                           Florham Park NJ                            *
16da2e3ebdSchin *                                                                      *
17da2e3ebdSchin *                 Glenn Fowler <gsf@research.att.com>                  *
18da2e3ebdSchin *                  David Korn <dgk@research.att.com>                   *
19da2e3ebdSchin *                                                                      *
20da2e3ebdSchin ***********************************************************************/
21da2e3ebdSchin #pragma prototyped
22da2e3ebdSchin /*
23da2e3ebdSchin  * David Korn
24da2e3ebdSchin  * AT&T Bell Laboratories
25da2e3ebdSchin  *
26da2e3ebdSchin  * library interface for word count
27da2e3ebdSchin  */
28da2e3ebdSchin 
29da2e3ebdSchin #include <cmd.h>
30da2e3ebdSchin #include <wc.h>
31da2e3ebdSchin #include <ctype.h>
32da2e3ebdSchin 
337c2fbfb3SApril Chin #if _hdr_wchar && _hdr_wctype && _lib_iswctype
34da2e3ebdSchin 
35da2e3ebdSchin #include <wchar.h>
36da2e3ebdSchin #include <wctype.h>
3734f9b3eeSRoland Mainz #include <lc.h>
38da2e3ebdSchin 
39da2e3ebdSchin #else
40da2e3ebdSchin 
41da2e3ebdSchin #ifndef iswspace
42da2e3ebdSchin #define iswspace(x)	isspace(x)
43da2e3ebdSchin #endif
44da2e3ebdSchin 
45da2e3ebdSchin #endif
46da2e3ebdSchin 
4734f9b3eeSRoland Mainz #define	WC_SP		0x08
4834f9b3eeSRoland Mainz #define	WC_NL		0x10
4934f9b3eeSRoland Mainz #define	WC_MB		0x20
5034f9b3eeSRoland Mainz #define	WC_ERR		0x40
5134f9b3eeSRoland Mainz 
5234f9b3eeSRoland Mainz #define eol(c)		((c)&WC_NL)
5334f9b3eeSRoland Mainz #define mbc(c)		((c)&WC_MB)
5434f9b3eeSRoland Mainz #define spc(c)		((c)&WC_SP)
55*3e14f97fSRoger A. Faulkner #define mb2wc(w,p,n)	(*ast.mb_towc)(&w,(char*)p,n)
56da2e3ebdSchin 
wc_init(int mode)57da2e3ebdSchin Wc_t* wc_init(int mode)
58da2e3ebdSchin {
59da2e3ebdSchin 	register int	n;
60da2e3ebdSchin 	register int	w;
61da2e3ebdSchin 	Wc_t*		wp;
62da2e3ebdSchin 
63da2e3ebdSchin 	if (!(wp = (Wc_t*)stakalloc(sizeof(Wc_t))))
6434f9b3eeSRoland Mainz 		return 0;
6534f9b3eeSRoland Mainz 	if (!mbwide())
6634f9b3eeSRoland Mainz 		wp->mb = 0;
6734f9b3eeSRoland Mainz #if _hdr_wchar && _hdr_wctype && _lib_iswctype
6834f9b3eeSRoland Mainz 	else if (!(mode & WC_NOUTF8) && (lcinfo(LC_CTYPE)->lc->flags & LC_utf8))
6934f9b3eeSRoland Mainz 		wp->mb = 1;
7034f9b3eeSRoland Mainz #endif
7134f9b3eeSRoland Mainz 	else
7234f9b3eeSRoland Mainz 		wp->mb = -1;
73da2e3ebdSchin 	w = mode & WC_WORDS;
74da2e3ebdSchin 	for (n = (1<<CHAR_BIT); --n >= 0;)
7534f9b3eeSRoland Mainz 		wp->type[n] = (w && isspace(n)) ? WC_SP : 0;
7634f9b3eeSRoland Mainz 	wp->type['\n'] = WC_SP|WC_NL;
7734f9b3eeSRoland Mainz 	if ((mode & (WC_MBYTE|WC_WORDS)) && wp->mb > 0)
7834f9b3eeSRoland Mainz 	{
7934f9b3eeSRoland Mainz 		for (n = 0; n < 64; n++)
8034f9b3eeSRoland Mainz 		{
8134f9b3eeSRoland Mainz 			wp->type[0x80+n] |= WC_MB;
8234f9b3eeSRoland Mainz 			if (n<32)
8334f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+1;
8434f9b3eeSRoland Mainz 			else if (n<48)
8534f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+2;
8634f9b3eeSRoland Mainz 			else if (n<56)
8734f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+3;
8834f9b3eeSRoland Mainz 			else if (n<60)
8934f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+4;
9034f9b3eeSRoland Mainz 			else if (n<62)
9134f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+5;
9234f9b3eeSRoland Mainz 		}
9334f9b3eeSRoland Mainz 		wp->type[0xc0] = WC_MB|WC_ERR;
9434f9b3eeSRoland Mainz 		wp->type[0xc1] = WC_MB|WC_ERR;
9534f9b3eeSRoland Mainz 		wp->type[0xfe] = WC_MB|WC_ERR;
9634f9b3eeSRoland Mainz 		wp->type[0xff] = WC_MB|WC_ERR;
9734f9b3eeSRoland Mainz 	}
9834f9b3eeSRoland Mainz 	wp->mode = mode;
9934f9b3eeSRoland Mainz 	return wp;
10034f9b3eeSRoland Mainz }
10134f9b3eeSRoland Mainz 
invalid(const char * file,int nlines)10234f9b3eeSRoland Mainz static int invalid(const char *file, int nlines)
10334f9b3eeSRoland Mainz {
10434f9b3eeSRoland Mainz 	error_info.file = (char*)file;
10534f9b3eeSRoland Mainz 	error_info.line = nlines;
10634f9b3eeSRoland Mainz 	error(ERROR_SYSTEM|1, "invalid multibyte character");
10734f9b3eeSRoland Mainz 	error_info.file = 0;
10834f9b3eeSRoland Mainz 	error_info.line = 0;
10934f9b3eeSRoland Mainz 	return nlines;
11034f9b3eeSRoland Mainz }
11134f9b3eeSRoland Mainz 
11234f9b3eeSRoland Mainz /*
11334f9b3eeSRoland Mainz  * handle utf space characters
11434f9b3eeSRoland Mainz  */
11534f9b3eeSRoland Mainz 
chkstate(int state,register unsigned int c)11634f9b3eeSRoland Mainz static int chkstate(int state, register unsigned int c)
11734f9b3eeSRoland Mainz {
11834f9b3eeSRoland Mainz 	switch(state)
11934f9b3eeSRoland Mainz 	{
12034f9b3eeSRoland Mainz 	case 1:
12134f9b3eeSRoland Mainz 		state = (c==0x9a?4:0);
12234f9b3eeSRoland Mainz 		break;
12334f9b3eeSRoland Mainz 	case 2:
12434f9b3eeSRoland Mainz 		state = ((c==0x80||c==0x81)?6+(c&1):0);
12534f9b3eeSRoland Mainz 		break;
12634f9b3eeSRoland Mainz 	case 3:
12734f9b3eeSRoland Mainz 		state = (c==0x80?5:0);
12834f9b3eeSRoland Mainz 		break;
12934f9b3eeSRoland Mainz 	case 4:
13034f9b3eeSRoland Mainz 		state = (c==0x80?10:0);
13134f9b3eeSRoland Mainz 		break;
13234f9b3eeSRoland Mainz 	case 5:
13334f9b3eeSRoland Mainz 		state = (c==0x80?10:0);
13434f9b3eeSRoland Mainz 		break;
13534f9b3eeSRoland Mainz 	case 6:
13634f9b3eeSRoland Mainz 		state = 0;
13734f9b3eeSRoland Mainz 		if(c==0xa0 || c==0xa1)
13834f9b3eeSRoland Mainz 			return(10);
13934f9b3eeSRoland Mainz 		else if((c&0xf0)== 0x80)
14034f9b3eeSRoland Mainz 		{
14134f9b3eeSRoland Mainz 			if((c&=0xf)==7)
14234f9b3eeSRoland Mainz 				return(iswspace(0x2007)?10:0);
14334f9b3eeSRoland Mainz 			if(c<=0xb)
14434f9b3eeSRoland Mainz 				return(10);
14534f9b3eeSRoland Mainz 		}
14634f9b3eeSRoland Mainz 		else if(c==0xaf && iswspace(0x202f))
14734f9b3eeSRoland Mainz 			return(10);
14834f9b3eeSRoland Mainz 		break;
14934f9b3eeSRoland Mainz 	case 7:
15034f9b3eeSRoland Mainz 		state = (c==0x9f?10:0);
15134f9b3eeSRoland Mainz 		break;
15234f9b3eeSRoland Mainz 	case 8:
15334f9b3eeSRoland Mainz 		return (iswspace(c)?10:0);
15434f9b3eeSRoland Mainz 	}
15534f9b3eeSRoland Mainz 	return state;
156da2e3ebdSchin }
157da2e3ebdSchin 
158da2e3ebdSchin /*
159da2e3ebdSchin  * compute the line, word, and character count for file <fd>
160da2e3ebdSchin  */
16134f9b3eeSRoland Mainz 
wc_count(Wc_t * wp,Sfio_t * fd,const char * file)162da2e3ebdSchin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file)
163da2e3ebdSchin {
16434f9b3eeSRoland Mainz 	register char*		type = wp->type;
165da2e3ebdSchin 	register unsigned char*	cp;
166*3e14f97fSRoger A. Faulkner 	register Sfoff_t	nbytes;
167da2e3ebdSchin 	register Sfoff_t	nchars;
168da2e3ebdSchin 	register Sfoff_t	nwords;
169da2e3ebdSchin 	register Sfoff_t	nlines;
17034f9b3eeSRoland Mainz 	register Sfoff_t	eline = -1;
17134f9b3eeSRoland Mainz 	register Sfoff_t	longest = 0;
172da2e3ebdSchin 	register ssize_t	c;
173da2e3ebdSchin 	register unsigned char*	endbuff;
17434f9b3eeSRoland Mainz 	register int		lasttype = WC_SP;
175da2e3ebdSchin 	unsigned int		lastchar;
17634f9b3eeSRoland Mainz 	ssize_t			n;
17734f9b3eeSRoland Mainz 	ssize_t			o;
178da2e3ebdSchin 	unsigned char*		buff;
179da2e3ebdSchin 	wchar_t			x;
18034f9b3eeSRoland Mainz 	unsigned char		side[32];
181da2e3ebdSchin 
182da2e3ebdSchin 	sfset(fd,SF_WRITE,1);
183*3e14f97fSRoger A. Faulkner 	nlines = nwords = nchars = nbytes = 0;
184da2e3ebdSchin 	wp->longest = 0;
18534f9b3eeSRoland Mainz 	if (wp->mb < 0 && (wp->mode & (WC_MBYTE|WC_WORDS)))
186da2e3ebdSchin 	{
187da2e3ebdSchin 		cp = buff = endbuff = 0;
188da2e3ebdSchin 		for (;;)
189da2e3ebdSchin 		{
190*3e14f97fSRoger A. Faulkner 			if (cp >= endbuff || (n = mb2wc(x, cp, endbuff-cp)) < 0)
19134f9b3eeSRoland Mainz 			{
19234f9b3eeSRoland Mainz 				if ((o = endbuff-cp) < sizeof(side))
193da2e3ebdSchin 				{
194da2e3ebdSchin 					if (buff)
195da2e3ebdSchin 					{
19634f9b3eeSRoland Mainz 						if (o)
19734f9b3eeSRoland Mainz 							memcpy(side, cp, o);
19834f9b3eeSRoland Mainz 						mbinit();
199da2e3ebdSchin 					}
20034f9b3eeSRoland Mainz 					else
20134f9b3eeSRoland Mainz 						o = 0;
20234f9b3eeSRoland Mainz 					cp = side + o;
20334f9b3eeSRoland Mainz 					if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) || (n = sfvalue(fd)) <= 0)
204da2e3ebdSchin 					{
205da2e3ebdSchin 						if ((nchars - longest) > wp->longest)
206da2e3ebdSchin 							wp->longest = nchars - longest;
20734f9b3eeSRoland Mainz 						break;
20834f9b3eeSRoland Mainz 					}
209*3e14f97fSRoger A. Faulkner 					nbytes += n;
21034f9b3eeSRoland Mainz 					if ((c = sizeof(side) - o) > n)
21134f9b3eeSRoland Mainz 						c = n;
21234f9b3eeSRoland Mainz 					if (c)
21334f9b3eeSRoland Mainz 						memcpy(cp, buff, c);
21434f9b3eeSRoland Mainz 					endbuff = buff + n;
21534f9b3eeSRoland Mainz 					cp = side;
21634f9b3eeSRoland Mainz 					x = mbchar(cp);
21734f9b3eeSRoland Mainz 					if ((cp-side) < o)
21834f9b3eeSRoland Mainz 					{
21934f9b3eeSRoland Mainz 						cp = buff;
22034f9b3eeSRoland Mainz 						nchars += (cp-side) - 1;
22134f9b3eeSRoland Mainz 					}
22234f9b3eeSRoland Mainz 					else
22334f9b3eeSRoland Mainz 						cp = buff + (cp-side) - o;
22434f9b3eeSRoland Mainz 				}
22534f9b3eeSRoland Mainz 				else
22634f9b3eeSRoland Mainz 				{
22734f9b3eeSRoland Mainz 					cp++;
22834f9b3eeSRoland Mainz 					x = -1;
22934f9b3eeSRoland Mainz 				}
23034f9b3eeSRoland Mainz 				if (x == -1 && eline != nlines && !(wp->mode & WC_QUIET))
23134f9b3eeSRoland Mainz 					eline = invalid(file, nlines);
23234f9b3eeSRoland Mainz 			}
23334f9b3eeSRoland Mainz 			else
23434f9b3eeSRoland Mainz 				cp += n ? n : 1;
23534f9b3eeSRoland Mainz 			if (x == '\n')
23634f9b3eeSRoland Mainz 			{
23734f9b3eeSRoland Mainz 				if ((nchars - longest) > wp->longest)
23834f9b3eeSRoland Mainz 					wp->longest = nchars - longest;
23934f9b3eeSRoland Mainz 				longest = nchars + 1;
240da2e3ebdSchin 				nlines++;
241da2e3ebdSchin 				lasttype = 1;
242da2e3ebdSchin 			}
243da2e3ebdSchin 			else if (iswspace(x))
244da2e3ebdSchin 				lasttype = 1;
245da2e3ebdSchin 			else if (lasttype)
246da2e3ebdSchin 			{
247da2e3ebdSchin 				lasttype = 0;
248da2e3ebdSchin 				nwords++;
249da2e3ebdSchin 			}
25034f9b3eeSRoland Mainz 			nchars++;
25134f9b3eeSRoland Mainz 		}
252*3e14f97fSRoger A. Faulkner 		if (!(wp->mode & WC_MBYTE))
253*3e14f97fSRoger A. Faulkner 			nchars = nbytes;
25434f9b3eeSRoland Mainz 	}
25534f9b3eeSRoland Mainz 	else if (!wp->mb && !(wp->mode & WC_LONGEST) || wp->mb > 0 && !(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
25634f9b3eeSRoland Mainz 	{
25734f9b3eeSRoland Mainz 		if (!(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
25834f9b3eeSRoland Mainz 		{
25934f9b3eeSRoland Mainz 			while ((cp = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
26034f9b3eeSRoland Mainz 			{
26134f9b3eeSRoland Mainz 				nchars += c;
26234f9b3eeSRoland Mainz 				endbuff = cp + c;
26334f9b3eeSRoland Mainz 				if (*--endbuff == '\n')
26434f9b3eeSRoland Mainz 					nlines++;
26534f9b3eeSRoland Mainz 				else
26634f9b3eeSRoland Mainz 					*endbuff = '\n';
26734f9b3eeSRoland Mainz 				for (;;)
26834f9b3eeSRoland Mainz 					if (*cp++ == '\n')
26934f9b3eeSRoland Mainz 					{
27034f9b3eeSRoland Mainz 						if (cp > endbuff)
27134f9b3eeSRoland Mainz 							break;
27234f9b3eeSRoland Mainz 						nlines++;
27334f9b3eeSRoland Mainz 					}
274da2e3ebdSchin 			}
275da2e3ebdSchin 		}
276da2e3ebdSchin 		else
277da2e3ebdSchin 		{
27834f9b3eeSRoland Mainz 			while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
279da2e3ebdSchin 			{
280da2e3ebdSchin 				nchars += c;
281da2e3ebdSchin 				/* check to see whether first character terminates word */
282da2e3ebdSchin 				if (c==1)
283da2e3ebdSchin 				{
28434f9b3eeSRoland Mainz 					if (eol(lasttype))
285da2e3ebdSchin 						nlines++;
28634f9b3eeSRoland Mainz 					if ((c = type[*cp]) && !lasttype)
287da2e3ebdSchin 						nwords++;
288da2e3ebdSchin 					lasttype = c;
289da2e3ebdSchin 					continue;
290da2e3ebdSchin 				}
29134f9b3eeSRoland Mainz 				if (!lasttype && type[*cp])
292da2e3ebdSchin 					nwords++;
293da2e3ebdSchin 				lastchar = cp[--c];
29434f9b3eeSRoland Mainz 				*(endbuff = cp+c) = '\n';
295da2e3ebdSchin 				c = lasttype;
296da2e3ebdSchin 				/* process each buffer */
297da2e3ebdSchin 				for (;;)
298da2e3ebdSchin 				{
299da2e3ebdSchin 					/* process spaces and new-lines */
30034f9b3eeSRoland Mainz 					do
301da2e3ebdSchin 					{
30234f9b3eeSRoland Mainz 						if (eol(c))
303da2e3ebdSchin 							for (;;)
304da2e3ebdSchin 							{
305da2e3ebdSchin 								/* check for end of buffer */
306da2e3ebdSchin 								if (cp > endbuff)
30734f9b3eeSRoland Mainz 									goto beob;
308da2e3ebdSchin 								nlines++;
309da2e3ebdSchin 								if (*cp != '\n')
310da2e3ebdSchin 									break;
311da2e3ebdSchin 								cp++;
312da2e3ebdSchin 							}
31334f9b3eeSRoland Mainz 					} while (c = type[*cp++]);
314da2e3ebdSchin 					/* skip over word characters */
31534f9b3eeSRoland Mainz 					while (!(c = type[*cp++]));
316da2e3ebdSchin 					nwords++;
317da2e3ebdSchin 				}
31834f9b3eeSRoland Mainz 			beob:
319da2e3ebdSchin 				if ((cp -= 2) >= buff)
32034f9b3eeSRoland Mainz 					c = type[*cp];
321da2e3ebdSchin 				else
322da2e3ebdSchin 					c = lasttype;
32334f9b3eeSRoland Mainz 				lasttype = type[lastchar];
324da2e3ebdSchin 				/* see if was in word */
325da2e3ebdSchin 				if (!c && !lasttype)
326da2e3ebdSchin 					nwords--;
327da2e3ebdSchin 			}
32834f9b3eeSRoland Mainz 			if (eol(lasttype))
329da2e3ebdSchin 				nlines++;
330da2e3ebdSchin 			else if (!lasttype)
331da2e3ebdSchin 				nwords++;
332da2e3ebdSchin 		}
33334f9b3eeSRoland Mainz 	}
33434f9b3eeSRoland Mainz 	else
33534f9b3eeSRoland Mainz 	{
33634f9b3eeSRoland Mainz 		int		lineoff=0;
33734f9b3eeSRoland Mainz 		int		skip=0;
33834f9b3eeSRoland Mainz 		int		adjust=0;
33934f9b3eeSRoland Mainz 		int		state=0;
34034f9b3eeSRoland Mainz 		int		oldc;
34134f9b3eeSRoland Mainz 		int		xspace;
34234f9b3eeSRoland Mainz 		int		wasspace = 1;
34334f9b3eeSRoland Mainz 		unsigned char*	start;
34434f9b3eeSRoland Mainz 
34534f9b3eeSRoland Mainz 		lastchar = 0;
34634f9b3eeSRoland Mainz 		start = (endbuff = side) + 1;
34734f9b3eeSRoland Mainz 		xspace = iswspace(0xa0) || iswspace(0x85);
34834f9b3eeSRoland Mainz 		while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
34934f9b3eeSRoland Mainz 		{
350*3e14f97fSRoger A. Faulkner 			nbytes += c;
35134f9b3eeSRoland Mainz 			nchars += c;
35234f9b3eeSRoland Mainz 			start = cp-lineoff;
35334f9b3eeSRoland Mainz 			/* check to see whether first character terminates word */
35434f9b3eeSRoland Mainz 			if(c==1)
35534f9b3eeSRoland Mainz 			{
35634f9b3eeSRoland Mainz 				if(eol(lasttype))
35734f9b3eeSRoland Mainz 					nlines++;
35834f9b3eeSRoland Mainz 				if((c = type[*cp]) && !lasttype)
35934f9b3eeSRoland Mainz 					nwords++;
36034f9b3eeSRoland Mainz 				lasttype = c;
36134f9b3eeSRoland Mainz 				endbuff = start;
36234f9b3eeSRoland Mainz 				continue;
36334f9b3eeSRoland Mainz 			}
36434f9b3eeSRoland Mainz 			lastchar = cp[--c];
36534f9b3eeSRoland Mainz 			endbuff = cp+c;
36634f9b3eeSRoland Mainz 			cp[c] = '\n';
36734f9b3eeSRoland Mainz 			if(mbc(lasttype))
36834f9b3eeSRoland Mainz 			{
36934f9b3eeSRoland Mainz 				c = lasttype;
37034f9b3eeSRoland Mainz 				goto mbyte;
37134f9b3eeSRoland Mainz 			}
37234f9b3eeSRoland Mainz 			if(!lasttype && spc(type[*cp]))
37334f9b3eeSRoland Mainz 				nwords++;
37434f9b3eeSRoland Mainz 			c = lasttype;
37534f9b3eeSRoland Mainz 			/* process each buffer */
37634f9b3eeSRoland Mainz 			for (;;)
37734f9b3eeSRoland Mainz 			{
37834f9b3eeSRoland Mainz 				/* process spaces and new-lines */
37934f9b3eeSRoland Mainz 			spaces:
38034f9b3eeSRoland Mainz 				do
38134f9b3eeSRoland Mainz 				{
38234f9b3eeSRoland Mainz 					if (eol(c))
38334f9b3eeSRoland Mainz 					{
38434f9b3eeSRoland Mainz 						/* check for end of buffer */
38534f9b3eeSRoland Mainz 						if (cp > endbuff)
38634f9b3eeSRoland Mainz 							goto eob;
38734f9b3eeSRoland Mainz 						if(wp->mode&WC_LONGEST)
38834f9b3eeSRoland Mainz 						{
38934f9b3eeSRoland Mainz 							if((cp-start)-adjust > longest)
39034f9b3eeSRoland Mainz 								longest = (cp-start)-adjust-1;
39134f9b3eeSRoland Mainz 							start = cp;
39234f9b3eeSRoland Mainz 						}
39334f9b3eeSRoland Mainz 						nlines++;
39434f9b3eeSRoland Mainz 						nchars -= adjust;
39534f9b3eeSRoland Mainz 						adjust = 0;
39634f9b3eeSRoland Mainz 					}
39734f9b3eeSRoland Mainz 				} while (spc(c = type[*cp++]));
39834f9b3eeSRoland Mainz 				wasspace=1;
39934f9b3eeSRoland Mainz 				if(mbc(c))
40034f9b3eeSRoland Mainz 				{
40134f9b3eeSRoland Mainz 				mbyte:
40234f9b3eeSRoland Mainz 					do
40334f9b3eeSRoland Mainz 					{
40434f9b3eeSRoland Mainz 						if(c&WC_ERR)
40534f9b3eeSRoland Mainz 							goto err;
40634f9b3eeSRoland Mainz 						if(skip && (c&7))
40734f9b3eeSRoland Mainz 							break;
40834f9b3eeSRoland Mainz 						if(!skip)
40934f9b3eeSRoland Mainz 						{
41034f9b3eeSRoland Mainz 							if(!(c&7))
41134f9b3eeSRoland Mainz 							{
41234f9b3eeSRoland Mainz 								skip=1;
41334f9b3eeSRoland Mainz 								break;
41434f9b3eeSRoland Mainz 							}
41534f9b3eeSRoland Mainz 							skip = (c&7);
41634f9b3eeSRoland Mainz 							adjust += skip;
41734f9b3eeSRoland Mainz 							state = 0;
41834f9b3eeSRoland Mainz 							if(skip==2 && (cp[-1]&0xc)==0 && (state=(cp[-1]&0x3)))
41934f9b3eeSRoland Mainz 								oldc = *cp;
42034f9b3eeSRoland Mainz 							else if(xspace && cp[-1]==0xc2)
42134f9b3eeSRoland Mainz 							{
42234f9b3eeSRoland Mainz 								state = 8;
42334f9b3eeSRoland Mainz 								oldc = *cp;
42434f9b3eeSRoland Mainz 							}
42534f9b3eeSRoland Mainz 						}
42634f9b3eeSRoland Mainz 						else
42734f9b3eeSRoland Mainz 						{
42834f9b3eeSRoland Mainz 							skip--;
42934f9b3eeSRoland Mainz 							if(state && (state=chkstate(state,oldc)))
43034f9b3eeSRoland Mainz 							{
43134f9b3eeSRoland Mainz 								if(state==10)
43234f9b3eeSRoland Mainz 								{
43334f9b3eeSRoland Mainz 									if(!wasspace)
43434f9b3eeSRoland Mainz 										nwords++;
43534f9b3eeSRoland Mainz 									wasspace = 1;
43634f9b3eeSRoland Mainz 									state=0;
43734f9b3eeSRoland Mainz 									goto spaces;
43834f9b3eeSRoland Mainz 								}
43934f9b3eeSRoland Mainz 								oldc = *cp;
44034f9b3eeSRoland Mainz 							}
44134f9b3eeSRoland Mainz 						}
44234f9b3eeSRoland Mainz 					} while (mbc(c = type[*cp++]));
44334f9b3eeSRoland Mainz 					wasspace = 0;
44434f9b3eeSRoland Mainz 					if(skip)
44534f9b3eeSRoland Mainz 					{
44634f9b3eeSRoland Mainz 						if(eol(c) && (cp > endbuff))
44734f9b3eeSRoland Mainz 							goto eob;
44834f9b3eeSRoland Mainz 				err:
44934f9b3eeSRoland Mainz 						skip = 0;
45034f9b3eeSRoland Mainz 						state = 0;
45134f9b3eeSRoland Mainz 						if(eline!=nlines && !(wp->mode & WC_QUIET))
45234f9b3eeSRoland Mainz 							eline = invalid(file, nlines);
45334f9b3eeSRoland Mainz 						while(mbc(c) && ((c|WC_ERR) || (c&7)==0))
45434f9b3eeSRoland Mainz 							c=type[*cp++];
45534f9b3eeSRoland Mainz 						if(eol(c) && (cp > endbuff))
45634f9b3eeSRoland Mainz 						{
45734f9b3eeSRoland Mainz 							c = WC_MB|WC_ERR;
45834f9b3eeSRoland Mainz 							goto eob;
45934f9b3eeSRoland Mainz 						}
46034f9b3eeSRoland Mainz 						if(mbc(c))
46134f9b3eeSRoland Mainz 							goto mbyte;
46234f9b3eeSRoland Mainz 						else if(c&WC_SP)
46334f9b3eeSRoland Mainz 							goto spaces;
46434f9b3eeSRoland Mainz 					}
46534f9b3eeSRoland Mainz 					if(spc(c))
46634f9b3eeSRoland Mainz 					{
46734f9b3eeSRoland Mainz 						nwords++;
46834f9b3eeSRoland Mainz 						continue;
46934f9b3eeSRoland Mainz 					}
47034f9b3eeSRoland Mainz 				}
47134f9b3eeSRoland Mainz 				/* skip over word characters */
47234f9b3eeSRoland Mainz 				while(!(c = type[*cp++]));
47334f9b3eeSRoland Mainz 				if(mbc(c))
47434f9b3eeSRoland Mainz 					goto mbyte;
47534f9b3eeSRoland Mainz 				nwords++;
47634f9b3eeSRoland Mainz 			}
47734f9b3eeSRoland Mainz 		eob:
47834f9b3eeSRoland Mainz 			lineoff = cp-start;
47934f9b3eeSRoland Mainz 			if((cp -= 2) >= buff)
48034f9b3eeSRoland Mainz 				c = type[*cp];
48134f9b3eeSRoland Mainz 			else
48234f9b3eeSRoland Mainz 				c = lasttype;
48334f9b3eeSRoland Mainz 			lasttype = type[lastchar];
48434f9b3eeSRoland Mainz 			/* see if was in word */
48534f9b3eeSRoland Mainz 			if(!c && !lasttype)
48634f9b3eeSRoland Mainz 				nwords--;
48734f9b3eeSRoland Mainz 		}
48834f9b3eeSRoland Mainz 		if ((wp->mode&WC_LONGEST) && ((endbuff + 1 - start) - adjust - (lastchar == '\n')) > longest)
48934f9b3eeSRoland Mainz 			longest = (endbuff + 1 - start) - adjust - (lastchar == '\n');
49034f9b3eeSRoland Mainz 		wp->longest = longest;
49134f9b3eeSRoland Mainz 		if (eol(lasttype))
49234f9b3eeSRoland Mainz 			nlines++;
49334f9b3eeSRoland Mainz 		else if (!lasttype)
49434f9b3eeSRoland Mainz 			nwords++;
495*3e14f97fSRoger A. Faulkner 		if (wp->mode & WC_MBYTE)
49634f9b3eeSRoland Mainz 			nchars -= adjust;
497*3e14f97fSRoger A. Faulkner 		else
498*3e14f97fSRoger A. Faulkner 			nchars = nbytes;
49934f9b3eeSRoland Mainz 	}
500da2e3ebdSchin 	wp->chars = nchars;
501da2e3ebdSchin 	wp->words = nwords;
502da2e3ebdSchin 	wp->lines = nlines;
50334f9b3eeSRoland Mainz 	return 0;
504da2e3ebdSchin }
50534f9b3eeSRoland Mainz 
506