xref: /titanic_51/usr/src/lib/libcmd/common/wclib.c (revision 34f9b3eef6fdadbda0a846aa4d68691ac40eace5)
1da2e3ebdSchin /***********************************************************************
2da2e3ebdSchin *                                                                      *
3da2e3ebdSchin *               This software is part of the ast package               *
4*34f9b3eeSRoland Mainz *          Copyright (c) 1992-2009 AT&T Intellectual Property          *
5da2e3ebdSchin *                      and is licensed under the                       *
6da2e3ebdSchin *                  Common Public License, Version 1.0                  *
77c2fbfb3SApril Chin *                    by AT&T Intellectual Property                     *
8da2e3ebdSchin *                                                                      *
9da2e3ebdSchin *                A copy of the License is available at                 *
10da2e3ebdSchin *            http://www.opensource.org/licenses/cpl1.0.txt             *
11da2e3ebdSchin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12da2e3ebdSchin *                                                                      *
13da2e3ebdSchin *              Information and Software Systems Research               *
14da2e3ebdSchin *                            AT&T Research                             *
15da2e3ebdSchin *                           Florham Park NJ                            *
16da2e3ebdSchin *                                                                      *
17da2e3ebdSchin *                 Glenn Fowler <gsf@research.att.com>                  *
18da2e3ebdSchin *                  David Korn <dgk@research.att.com>                   *
19da2e3ebdSchin *                                                                      *
20da2e3ebdSchin ***********************************************************************/
21da2e3ebdSchin #pragma prototyped
22da2e3ebdSchin /*
23da2e3ebdSchin  * David Korn
24da2e3ebdSchin  * AT&T Bell Laboratories
25da2e3ebdSchin  *
26da2e3ebdSchin  * library interface for word count
27da2e3ebdSchin  */
28da2e3ebdSchin 
29da2e3ebdSchin #include <cmd.h>
30da2e3ebdSchin #include <wc.h>
31da2e3ebdSchin #include <ctype.h>
32da2e3ebdSchin 
337c2fbfb3SApril Chin #if _hdr_wchar && _hdr_wctype && _lib_iswctype
34da2e3ebdSchin 
35da2e3ebdSchin #include <wchar.h>
36da2e3ebdSchin #include <wctype.h>
37*34f9b3eeSRoland Mainz #include <lc.h>
38da2e3ebdSchin 
39da2e3ebdSchin #else
40da2e3ebdSchin 
41da2e3ebdSchin #ifndef iswspace
42da2e3ebdSchin #define iswspace(x)	isspace(x)
43da2e3ebdSchin #endif
44da2e3ebdSchin 
45da2e3ebdSchin #endif
46da2e3ebdSchin 
47*34f9b3eeSRoland Mainz #define	WC_SP		0x08
48*34f9b3eeSRoland Mainz #define	WC_NL		0x10
49*34f9b3eeSRoland Mainz #define	WC_MB		0x20
50*34f9b3eeSRoland Mainz #define	WC_ERR		0x40
51*34f9b3eeSRoland Mainz 
52*34f9b3eeSRoland Mainz #define eol(c)		((c)&WC_NL)
53*34f9b3eeSRoland Mainz #define mbc(c)		((c)&WC_MB)
54*34f9b3eeSRoland Mainz #define spc(c)		((c)&WC_SP)
55*34f9b3eeSRoland Mainz #define mbwc(w,p,n)	(*ast.mb_towc)(&w,(char*)p,n)
56da2e3ebdSchin 
57da2e3ebdSchin Wc_t* wc_init(int mode)
58da2e3ebdSchin {
59da2e3ebdSchin 	register int	n;
60da2e3ebdSchin 	register int	w;
61da2e3ebdSchin 	Wc_t*		wp;
62da2e3ebdSchin 
63da2e3ebdSchin 	if (!(wp = (Wc_t*)stakalloc(sizeof(Wc_t))))
64*34f9b3eeSRoland Mainz 		return 0;
65*34f9b3eeSRoland Mainz 	if (!mbwide())
66*34f9b3eeSRoland Mainz 		wp->mb = 0;
67*34f9b3eeSRoland Mainz #if _hdr_wchar && _hdr_wctype && _lib_iswctype
68*34f9b3eeSRoland Mainz 	else if (!(mode & WC_NOUTF8) && (lcinfo(LC_CTYPE)->lc->flags & LC_utf8))
69*34f9b3eeSRoland Mainz 		wp->mb = 1;
70*34f9b3eeSRoland Mainz #endif
71*34f9b3eeSRoland Mainz 	else
72*34f9b3eeSRoland Mainz 		wp->mb = -1;
73da2e3ebdSchin 	w = mode & WC_WORDS;
74da2e3ebdSchin 	for (n = (1<<CHAR_BIT); --n >= 0;)
75*34f9b3eeSRoland Mainz 		wp->type[n] = (w && isspace(n)) ? WC_SP : 0;
76*34f9b3eeSRoland Mainz 	wp->type['\n'] = WC_SP|WC_NL;
77*34f9b3eeSRoland Mainz 	if ((mode & (WC_MBYTE|WC_WORDS)) && wp->mb > 0)
78*34f9b3eeSRoland Mainz 	{
79*34f9b3eeSRoland Mainz 		for (n = 0; n < 64; n++)
80*34f9b3eeSRoland Mainz 		{
81*34f9b3eeSRoland Mainz 			wp->type[0x80+n] |= WC_MB;
82*34f9b3eeSRoland Mainz 			if (n<32)
83*34f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+1;
84*34f9b3eeSRoland Mainz 			else if (n<48)
85*34f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+2;
86*34f9b3eeSRoland Mainz 			else if (n<56)
87*34f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+3;
88*34f9b3eeSRoland Mainz 			else if (n<60)
89*34f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+4;
90*34f9b3eeSRoland Mainz 			else if (n<62)
91*34f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+5;
92*34f9b3eeSRoland Mainz 		}
93*34f9b3eeSRoland Mainz 		wp->type[0xc0] = WC_MB|WC_ERR;
94*34f9b3eeSRoland Mainz 		wp->type[0xc1] = WC_MB|WC_ERR;
95*34f9b3eeSRoland Mainz 		wp->type[0xfe] = WC_MB|WC_ERR;
96*34f9b3eeSRoland Mainz 		wp->type[0xff] = WC_MB|WC_ERR;
97*34f9b3eeSRoland Mainz 	}
98*34f9b3eeSRoland Mainz 	wp->mode = mode;
99*34f9b3eeSRoland Mainz 	return wp;
100*34f9b3eeSRoland Mainz }
101*34f9b3eeSRoland Mainz 
102*34f9b3eeSRoland Mainz static int invalid(const char *file, int nlines)
103*34f9b3eeSRoland Mainz {
104*34f9b3eeSRoland Mainz 	error_info.file = (char*)file;
105*34f9b3eeSRoland Mainz 	error_info.line = nlines;
106*34f9b3eeSRoland Mainz 	error(ERROR_SYSTEM|1, "invalid multibyte character");
107*34f9b3eeSRoland Mainz 	error_info.file = 0;
108*34f9b3eeSRoland Mainz 	error_info.line = 0;
109*34f9b3eeSRoland Mainz 	return nlines;
110*34f9b3eeSRoland Mainz }
111*34f9b3eeSRoland Mainz 
112*34f9b3eeSRoland Mainz /*
113*34f9b3eeSRoland Mainz  * handle utf space characters
114*34f9b3eeSRoland Mainz  */
115*34f9b3eeSRoland Mainz 
116*34f9b3eeSRoland Mainz static int chkstate(int state, register unsigned int c)
117*34f9b3eeSRoland Mainz {
118*34f9b3eeSRoland Mainz 	switch(state)
119*34f9b3eeSRoland Mainz 	{
120*34f9b3eeSRoland Mainz 	case 1:
121*34f9b3eeSRoland Mainz 		state = (c==0x9a?4:0);
122*34f9b3eeSRoland Mainz 		break;
123*34f9b3eeSRoland Mainz 	case 2:
124*34f9b3eeSRoland Mainz 		state = ((c==0x80||c==0x81)?6+(c&1):0);
125*34f9b3eeSRoland Mainz 		break;
126*34f9b3eeSRoland Mainz 	case 3:
127*34f9b3eeSRoland Mainz 		state = (c==0x80?5:0);
128*34f9b3eeSRoland Mainz 		break;
129*34f9b3eeSRoland Mainz 	case 4:
130*34f9b3eeSRoland Mainz 		state = (c==0x80?10:0);
131*34f9b3eeSRoland Mainz 		break;
132*34f9b3eeSRoland Mainz 	case 5:
133*34f9b3eeSRoland Mainz 		state = (c==0x80?10:0);
134*34f9b3eeSRoland Mainz 		break;
135*34f9b3eeSRoland Mainz 	case 6:
136*34f9b3eeSRoland Mainz 		state = 0;
137*34f9b3eeSRoland Mainz 		if(c==0xa0 || c==0xa1)
138*34f9b3eeSRoland Mainz 			return(10);
139*34f9b3eeSRoland Mainz 		else if((c&0xf0)== 0x80)
140*34f9b3eeSRoland Mainz 		{
141*34f9b3eeSRoland Mainz 			if((c&=0xf)==7)
142*34f9b3eeSRoland Mainz 				return(iswspace(0x2007)?10:0);
143*34f9b3eeSRoland Mainz 			if(c<=0xb)
144*34f9b3eeSRoland Mainz 				return(10);
145*34f9b3eeSRoland Mainz 		}
146*34f9b3eeSRoland Mainz 		else if(c==0xaf && iswspace(0x202f))
147*34f9b3eeSRoland Mainz 			return(10);
148*34f9b3eeSRoland Mainz 		break;
149*34f9b3eeSRoland Mainz 	case 7:
150*34f9b3eeSRoland Mainz 		state = (c==0x9f?10:0);
151*34f9b3eeSRoland Mainz 		break;
152*34f9b3eeSRoland Mainz 	case 8:
153*34f9b3eeSRoland Mainz 		return (iswspace(c)?10:0);
154*34f9b3eeSRoland Mainz 	}
155*34f9b3eeSRoland Mainz 	return state;
156da2e3ebdSchin }
157da2e3ebdSchin 
158da2e3ebdSchin /*
159da2e3ebdSchin  * compute the line, word, and character count for file <fd>
160da2e3ebdSchin  */
161*34f9b3eeSRoland Mainz 
162da2e3ebdSchin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file)
163da2e3ebdSchin {
164*34f9b3eeSRoland Mainz 	register char*		type = wp->type;
165da2e3ebdSchin 	register unsigned char*	cp;
166da2e3ebdSchin 	register Sfoff_t	nchars;
167da2e3ebdSchin 	register Sfoff_t	nwords;
168da2e3ebdSchin 	register Sfoff_t	nlines;
169*34f9b3eeSRoland Mainz 	register Sfoff_t	eline = -1;
170*34f9b3eeSRoland Mainz 	register Sfoff_t	longest = 0;
171da2e3ebdSchin 	register ssize_t	c;
172da2e3ebdSchin 	register unsigned char*	endbuff;
173*34f9b3eeSRoland Mainz 	register int		lasttype = WC_SP;
174da2e3ebdSchin 	unsigned int		lastchar;
175*34f9b3eeSRoland Mainz 	ssize_t			n;
176*34f9b3eeSRoland Mainz 	ssize_t			o;
177da2e3ebdSchin 	unsigned char*		buff;
178da2e3ebdSchin 	wchar_t			x;
179*34f9b3eeSRoland Mainz 	unsigned char		side[32];
180da2e3ebdSchin 
181da2e3ebdSchin 	sfset(fd,SF_WRITE,1);
182da2e3ebdSchin 	nlines = nwords = nchars = 0;
183da2e3ebdSchin 	wp->longest = 0;
184*34f9b3eeSRoland Mainz 	if (wp->mb < 0 && (wp->mode & (WC_MBYTE|WC_WORDS)))
185da2e3ebdSchin 	{
186da2e3ebdSchin 		cp = buff = endbuff = 0;
187da2e3ebdSchin 		for (;;)
188da2e3ebdSchin 		{
189*34f9b3eeSRoland Mainz 			if (cp >= endbuff || (n = mbwc(x, cp, endbuff-cp)) < 0)
190*34f9b3eeSRoland Mainz 			{
191*34f9b3eeSRoland Mainz 				if ((o = endbuff-cp) < sizeof(side))
192da2e3ebdSchin 				{
193da2e3ebdSchin 					if (buff)
194da2e3ebdSchin 					{
195*34f9b3eeSRoland Mainz 						if (o)
196*34f9b3eeSRoland Mainz 							memcpy(side, cp, o);
197*34f9b3eeSRoland Mainz 						mbinit();
198da2e3ebdSchin 					}
199*34f9b3eeSRoland Mainz 					else
200*34f9b3eeSRoland Mainz 						o = 0;
201*34f9b3eeSRoland Mainz 					cp = side + o;
202*34f9b3eeSRoland Mainz 					if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) || (n = sfvalue(fd)) <= 0)
203da2e3ebdSchin 					{
204da2e3ebdSchin 						if ((nchars - longest) > wp->longest)
205da2e3ebdSchin 							wp->longest = nchars - longest;
206*34f9b3eeSRoland Mainz 						break;
207*34f9b3eeSRoland Mainz 					}
208*34f9b3eeSRoland Mainz 					if ((c = sizeof(side) - o) > n)
209*34f9b3eeSRoland Mainz 						c = n;
210*34f9b3eeSRoland Mainz 					if (c)
211*34f9b3eeSRoland Mainz 						memcpy(cp, buff, c);
212*34f9b3eeSRoland Mainz 					endbuff = buff + n;
213*34f9b3eeSRoland Mainz 					cp = side;
214*34f9b3eeSRoland Mainz 					x = mbchar(cp);
215*34f9b3eeSRoland Mainz 					if ((cp-side) < o)
216*34f9b3eeSRoland Mainz 					{
217*34f9b3eeSRoland Mainz 						cp = buff;
218*34f9b3eeSRoland Mainz 						nchars += (cp-side) - 1;
219*34f9b3eeSRoland Mainz 					}
220*34f9b3eeSRoland Mainz 					else
221*34f9b3eeSRoland Mainz 						cp = buff + (cp-side) - o;
222*34f9b3eeSRoland Mainz 				}
223*34f9b3eeSRoland Mainz 				else
224*34f9b3eeSRoland Mainz 				{
225*34f9b3eeSRoland Mainz 					cp++;
226*34f9b3eeSRoland Mainz 					x = -1;
227*34f9b3eeSRoland Mainz 				}
228*34f9b3eeSRoland Mainz 				if (x == -1 && eline != nlines && !(wp->mode & WC_QUIET))
229*34f9b3eeSRoland Mainz 					eline = invalid(file, nlines);
230*34f9b3eeSRoland Mainz 			}
231*34f9b3eeSRoland Mainz 			else
232*34f9b3eeSRoland Mainz 				cp += n ? n : 1;
233*34f9b3eeSRoland Mainz 			if (x == '\n')
234*34f9b3eeSRoland Mainz 			{
235*34f9b3eeSRoland Mainz 				if ((nchars - longest) > wp->longest)
236*34f9b3eeSRoland Mainz 					wp->longest = nchars - longest;
237*34f9b3eeSRoland Mainz 				longest = nchars + 1;
238da2e3ebdSchin 				nlines++;
239da2e3ebdSchin 				lasttype = 1;
240da2e3ebdSchin 			}
241da2e3ebdSchin 			else if (iswspace(x))
242da2e3ebdSchin 				lasttype = 1;
243da2e3ebdSchin 			else if (lasttype)
244da2e3ebdSchin 			{
245da2e3ebdSchin 				lasttype = 0;
246da2e3ebdSchin 				nwords++;
247da2e3ebdSchin 			}
248*34f9b3eeSRoland Mainz 			nchars++;
249*34f9b3eeSRoland Mainz 		}
250*34f9b3eeSRoland Mainz 	}
251*34f9b3eeSRoland Mainz 	else if (!wp->mb && !(wp->mode & WC_LONGEST) || wp->mb > 0 && !(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
252*34f9b3eeSRoland Mainz 	{
253*34f9b3eeSRoland Mainz 		if (!(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
254*34f9b3eeSRoland Mainz 		{
255*34f9b3eeSRoland Mainz 			while ((cp = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
256*34f9b3eeSRoland Mainz 			{
257*34f9b3eeSRoland Mainz 				nchars += c;
258*34f9b3eeSRoland Mainz 				endbuff = cp + c;
259*34f9b3eeSRoland Mainz 				if (*--endbuff == '\n')
260*34f9b3eeSRoland Mainz 					nlines++;
261*34f9b3eeSRoland Mainz 				else
262*34f9b3eeSRoland Mainz 					*endbuff = '\n';
263*34f9b3eeSRoland Mainz 				for (;;)
264*34f9b3eeSRoland Mainz 					if (*cp++ == '\n')
265*34f9b3eeSRoland Mainz 					{
266*34f9b3eeSRoland Mainz 						if (cp > endbuff)
267*34f9b3eeSRoland Mainz 							break;
268*34f9b3eeSRoland Mainz 						nlines++;
269*34f9b3eeSRoland Mainz 					}
270da2e3ebdSchin 			}
271da2e3ebdSchin 		}
272da2e3ebdSchin 		else
273da2e3ebdSchin 		{
274*34f9b3eeSRoland Mainz 			while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
275da2e3ebdSchin 			{
276da2e3ebdSchin 				nchars += c;
277da2e3ebdSchin 				/* check to see whether first character terminates word */
278da2e3ebdSchin 				if (c==1)
279da2e3ebdSchin 				{
280*34f9b3eeSRoland Mainz 					if (eol(lasttype))
281da2e3ebdSchin 						nlines++;
282*34f9b3eeSRoland Mainz 					if ((c = type[*cp]) && !lasttype)
283da2e3ebdSchin 						nwords++;
284da2e3ebdSchin 					lasttype = c;
285da2e3ebdSchin 					continue;
286da2e3ebdSchin 				}
287*34f9b3eeSRoland Mainz 				if (!lasttype && type[*cp])
288da2e3ebdSchin 					nwords++;
289da2e3ebdSchin 				lastchar = cp[--c];
290*34f9b3eeSRoland Mainz 				*(endbuff = cp+c) = '\n';
291da2e3ebdSchin 				c = lasttype;
292da2e3ebdSchin 				/* process each buffer */
293da2e3ebdSchin 				for (;;)
294da2e3ebdSchin 				{
295da2e3ebdSchin 					/* process spaces and new-lines */
296*34f9b3eeSRoland Mainz 					do
297da2e3ebdSchin 					{
298*34f9b3eeSRoland Mainz 						if (eol(c))
299da2e3ebdSchin 							for (;;)
300da2e3ebdSchin 							{
301da2e3ebdSchin 								/* check for end of buffer */
302da2e3ebdSchin 								if (cp > endbuff)
303*34f9b3eeSRoland Mainz 									goto beob;
304da2e3ebdSchin 								nlines++;
305da2e3ebdSchin 								if (*cp != '\n')
306da2e3ebdSchin 									break;
307da2e3ebdSchin 								cp++;
308da2e3ebdSchin 							}
309*34f9b3eeSRoland Mainz 					} while (c = type[*cp++]);
310da2e3ebdSchin 					/* skip over word characters */
311*34f9b3eeSRoland Mainz 					while (!(c = type[*cp++]));
312da2e3ebdSchin 					nwords++;
313da2e3ebdSchin 				}
314*34f9b3eeSRoland Mainz 			beob:
315da2e3ebdSchin 				if ((cp -= 2) >= buff)
316*34f9b3eeSRoland Mainz 					c = type[*cp];
317da2e3ebdSchin 				else
318da2e3ebdSchin 					c = lasttype;
319*34f9b3eeSRoland Mainz 				lasttype = type[lastchar];
320da2e3ebdSchin 				/* see if was in word */
321da2e3ebdSchin 				if (!c && !lasttype)
322da2e3ebdSchin 					nwords--;
323da2e3ebdSchin 			}
324*34f9b3eeSRoland Mainz 			if (eol(lasttype))
325da2e3ebdSchin 				nlines++;
326da2e3ebdSchin 			else if (!lasttype)
327da2e3ebdSchin 				nwords++;
328da2e3ebdSchin 		}
329*34f9b3eeSRoland Mainz 	}
330*34f9b3eeSRoland Mainz 	else
331*34f9b3eeSRoland Mainz 	{
332*34f9b3eeSRoland Mainz 		int		lineoff=0;
333*34f9b3eeSRoland Mainz 		int		skip=0;
334*34f9b3eeSRoland Mainz 		int		adjust=0;
335*34f9b3eeSRoland Mainz 		int		state=0;
336*34f9b3eeSRoland Mainz 		int		oldc;
337*34f9b3eeSRoland Mainz 		int		xspace;
338*34f9b3eeSRoland Mainz 		int		wasspace = 1;
339*34f9b3eeSRoland Mainz 		unsigned char*	start;
340*34f9b3eeSRoland Mainz 
341*34f9b3eeSRoland Mainz 		lastchar = 0;
342*34f9b3eeSRoland Mainz 		start = (endbuff = side) + 1;
343*34f9b3eeSRoland Mainz 		xspace = iswspace(0xa0) || iswspace(0x85);
344*34f9b3eeSRoland Mainz 		while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
345*34f9b3eeSRoland Mainz 		{
346*34f9b3eeSRoland Mainz 			nchars += c;
347*34f9b3eeSRoland Mainz 			start = cp-lineoff;
348*34f9b3eeSRoland Mainz 			/* check to see whether first character terminates word */
349*34f9b3eeSRoland Mainz 			if(c==1)
350*34f9b3eeSRoland Mainz 			{
351*34f9b3eeSRoland Mainz 				if(eol(lasttype))
352*34f9b3eeSRoland Mainz 					nlines++;
353*34f9b3eeSRoland Mainz 				if((c = type[*cp]) && !lasttype)
354*34f9b3eeSRoland Mainz 					nwords++;
355*34f9b3eeSRoland Mainz 				lasttype = c;
356*34f9b3eeSRoland Mainz 				endbuff = start;
357*34f9b3eeSRoland Mainz 				continue;
358*34f9b3eeSRoland Mainz 			}
359*34f9b3eeSRoland Mainz 			lastchar = cp[--c];
360*34f9b3eeSRoland Mainz 			endbuff = cp+c;
361*34f9b3eeSRoland Mainz 			cp[c] = '\n';
362*34f9b3eeSRoland Mainz 			if(mbc(lasttype))
363*34f9b3eeSRoland Mainz 			{
364*34f9b3eeSRoland Mainz 				c = lasttype;
365*34f9b3eeSRoland Mainz 				goto mbyte;
366*34f9b3eeSRoland Mainz 			}
367*34f9b3eeSRoland Mainz 			if(!lasttype && spc(type[*cp]))
368*34f9b3eeSRoland Mainz 				nwords++;
369*34f9b3eeSRoland Mainz 			c = lasttype;
370*34f9b3eeSRoland Mainz 			/* process each buffer */
371*34f9b3eeSRoland Mainz 			for (;;)
372*34f9b3eeSRoland Mainz 			{
373*34f9b3eeSRoland Mainz 				/* process spaces and new-lines */
374*34f9b3eeSRoland Mainz 			spaces:
375*34f9b3eeSRoland Mainz 				do
376*34f9b3eeSRoland Mainz 				{
377*34f9b3eeSRoland Mainz 					if (eol(c))
378*34f9b3eeSRoland Mainz 					{
379*34f9b3eeSRoland Mainz 						/* check for end of buffer */
380*34f9b3eeSRoland Mainz 						if (cp > endbuff)
381*34f9b3eeSRoland Mainz 							goto eob;
382*34f9b3eeSRoland Mainz 						if(wp->mode&WC_LONGEST)
383*34f9b3eeSRoland Mainz 						{
384*34f9b3eeSRoland Mainz 							if((cp-start)-adjust > longest)
385*34f9b3eeSRoland Mainz 								longest = (cp-start)-adjust-1;
386*34f9b3eeSRoland Mainz 							start = cp;
387*34f9b3eeSRoland Mainz 						}
388*34f9b3eeSRoland Mainz 						nlines++;
389*34f9b3eeSRoland Mainz 						nchars -= adjust;
390*34f9b3eeSRoland Mainz 						adjust = 0;
391*34f9b3eeSRoland Mainz 					}
392*34f9b3eeSRoland Mainz 				} while (spc(c = type[*cp++]));
393*34f9b3eeSRoland Mainz 				wasspace=1;
394*34f9b3eeSRoland Mainz 				if(mbc(c))
395*34f9b3eeSRoland Mainz 				{
396*34f9b3eeSRoland Mainz 				mbyte:
397*34f9b3eeSRoland Mainz 					do
398*34f9b3eeSRoland Mainz 					{
399*34f9b3eeSRoland Mainz 						if(c&WC_ERR)
400*34f9b3eeSRoland Mainz 							goto err;
401*34f9b3eeSRoland Mainz 						if(skip && (c&7))
402*34f9b3eeSRoland Mainz 							break;
403*34f9b3eeSRoland Mainz 						if(!skip)
404*34f9b3eeSRoland Mainz 						{
405*34f9b3eeSRoland Mainz 							if(!(c&7))
406*34f9b3eeSRoland Mainz 							{
407*34f9b3eeSRoland Mainz 								skip=1;
408*34f9b3eeSRoland Mainz 								break;
409*34f9b3eeSRoland Mainz 							}
410*34f9b3eeSRoland Mainz 							skip = (c&7);
411*34f9b3eeSRoland Mainz 							adjust += skip;
412*34f9b3eeSRoland Mainz 							state = 0;
413*34f9b3eeSRoland Mainz 							if(skip==2 && (cp[-1]&0xc)==0 && (state=(cp[-1]&0x3)))
414*34f9b3eeSRoland Mainz 								oldc = *cp;
415*34f9b3eeSRoland Mainz 							else if(xspace && cp[-1]==0xc2)
416*34f9b3eeSRoland Mainz 							{
417*34f9b3eeSRoland Mainz 								state = 8;
418*34f9b3eeSRoland Mainz 								oldc = *cp;
419*34f9b3eeSRoland Mainz 							}
420*34f9b3eeSRoland Mainz 						}
421*34f9b3eeSRoland Mainz 						else
422*34f9b3eeSRoland Mainz 						{
423*34f9b3eeSRoland Mainz 							skip--;
424*34f9b3eeSRoland Mainz 							if(state && (state=chkstate(state,oldc)))
425*34f9b3eeSRoland Mainz 							{
426*34f9b3eeSRoland Mainz 								if(state==10)
427*34f9b3eeSRoland Mainz 								{
428*34f9b3eeSRoland Mainz 									if(!wasspace)
429*34f9b3eeSRoland Mainz 										nwords++;
430*34f9b3eeSRoland Mainz 									wasspace = 1;
431*34f9b3eeSRoland Mainz 									state=0;
432*34f9b3eeSRoland Mainz 									goto spaces;
433*34f9b3eeSRoland Mainz 								}
434*34f9b3eeSRoland Mainz 								oldc = *cp;
435*34f9b3eeSRoland Mainz 							}
436*34f9b3eeSRoland Mainz 						}
437*34f9b3eeSRoland Mainz 					} while (mbc(c = type[*cp++]));
438*34f9b3eeSRoland Mainz 					wasspace = 0;
439*34f9b3eeSRoland Mainz 					if(skip)
440*34f9b3eeSRoland Mainz 					{
441*34f9b3eeSRoland Mainz 						if(eol(c) && (cp > endbuff))
442*34f9b3eeSRoland Mainz 							goto eob;
443*34f9b3eeSRoland Mainz 				err:
444*34f9b3eeSRoland Mainz 						skip = 0;
445*34f9b3eeSRoland Mainz 						state = 0;
446*34f9b3eeSRoland Mainz 						if(eline!=nlines && !(wp->mode & WC_QUIET))
447*34f9b3eeSRoland Mainz 							eline = invalid(file, nlines);
448*34f9b3eeSRoland Mainz 						while(mbc(c) && ((c|WC_ERR) || (c&7)==0))
449*34f9b3eeSRoland Mainz 							c=type[*cp++];
450*34f9b3eeSRoland Mainz 						if(eol(c) && (cp > endbuff))
451*34f9b3eeSRoland Mainz 						{
452*34f9b3eeSRoland Mainz 							c = WC_MB|WC_ERR;
453*34f9b3eeSRoland Mainz 							goto eob;
454*34f9b3eeSRoland Mainz 						}
455*34f9b3eeSRoland Mainz 						if(mbc(c))
456*34f9b3eeSRoland Mainz 							goto mbyte;
457*34f9b3eeSRoland Mainz 						else if(c&WC_SP)
458*34f9b3eeSRoland Mainz 							goto spaces;
459*34f9b3eeSRoland Mainz 					}
460*34f9b3eeSRoland Mainz 					if(spc(c))
461*34f9b3eeSRoland Mainz 					{
462*34f9b3eeSRoland Mainz 						nwords++;
463*34f9b3eeSRoland Mainz 						continue;
464*34f9b3eeSRoland Mainz 					}
465*34f9b3eeSRoland Mainz 				}
466*34f9b3eeSRoland Mainz 				/* skip over word characters */
467*34f9b3eeSRoland Mainz 				while(!(c = type[*cp++]));
468*34f9b3eeSRoland Mainz 				if(mbc(c))
469*34f9b3eeSRoland Mainz 					goto mbyte;
470*34f9b3eeSRoland Mainz 				nwords++;
471*34f9b3eeSRoland Mainz 			}
472*34f9b3eeSRoland Mainz 		eob:
473*34f9b3eeSRoland Mainz 			lineoff = cp-start;
474*34f9b3eeSRoland Mainz 			if((cp -= 2) >= buff)
475*34f9b3eeSRoland Mainz 				c = type[*cp];
476*34f9b3eeSRoland Mainz 			else
477*34f9b3eeSRoland Mainz 				c = lasttype;
478*34f9b3eeSRoland Mainz 			lasttype = type[lastchar];
479*34f9b3eeSRoland Mainz 			/* see if was in word */
480*34f9b3eeSRoland Mainz 			if(!c && !lasttype)
481*34f9b3eeSRoland Mainz 				nwords--;
482*34f9b3eeSRoland Mainz 		}
483*34f9b3eeSRoland Mainz 		if ((wp->mode&WC_LONGEST) && ((endbuff + 1 - start) - adjust - (lastchar == '\n')) > longest)
484*34f9b3eeSRoland Mainz 			longest = (endbuff + 1 - start) - adjust - (lastchar == '\n');
485*34f9b3eeSRoland Mainz 		wp->longest = longest;
486*34f9b3eeSRoland Mainz 		if (eol(lasttype))
487*34f9b3eeSRoland Mainz 			nlines++;
488*34f9b3eeSRoland Mainz 		else if (!lasttype)
489*34f9b3eeSRoland Mainz 			nwords++;
490*34f9b3eeSRoland Mainz 		nchars -= adjust;
491*34f9b3eeSRoland Mainz 	}
492da2e3ebdSchin 	wp->chars = nchars;
493da2e3ebdSchin 	wp->words = nwords;
494da2e3ebdSchin 	wp->lines = nlines;
495*34f9b3eeSRoland Mainz 	return 0;
496da2e3ebdSchin }
497*34f9b3eeSRoland Mainz 
498