xref: /titanic_51/usr/src/lib/libcmd/common/wclib.c (revision ef049e29cf099834a7d2659dff30b0c6a6871c5b)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *           Copyright (c) 1992-2007 AT&T Knowledge Ventures            *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                      by AT&T Knowledge Ventures                      *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                                                                      *
20 ***********************************************************************/
21 #pragma prototyped
22 /*
23  * David Korn
24  * AT&T Bell Laboratories
25  *
26  * library interface for word count
27  */
28 
29 #include <cmd.h>
30 #include <wc.h>
31 #include <ctype.h>
32 
33 #if _hdr_wchar && _hdr_wctype
34 
35 #include <wchar.h>
36 #include <wctype.h>
37 
38 #else
39 
40 #ifndef iswspace
41 #define iswspace(x)	isspace(x)
42 #endif
43 
44 #endif
45 
46 #define endline(c)	(((signed char)-1)<0?(c)<0:(c)==((char)-1))
47 #define mbok(p,n)	(((n)<1)?0:mbwide()?((*ast.mb_towc)(NiL,(char*)(p),n)>=0):1)
48 
49 Wc_t *wc_init(int mode)
50 {
51 	register int	n;
52 	register int	w;
53 	Wc_t*		wp;
54 
55 	if(!(wp = (Wc_t*)stakalloc(sizeof(Wc_t))))
56 		return(0);
57 	wp->mode = mode;
58 	w = mode & WC_WORDS;
59 	for(n=(1<<CHAR_BIT);--n >=0;)
60 		wp->space[n] = w ? !!isspace(n) : 0;
61 	wp->space['\n'] = -1;
62 	return(wp);
63 }
64 
65 /*
66  * compute the line, word, and character count for file <fd>
67  */
68 int wc_count(Wc_t *wp, Sfio_t *fd, const char* file)
69 {
70 	register signed char	*space = wp->space;
71 	register unsigned char	*cp;
72 	register Sfoff_t	nchars;
73 	register Sfoff_t	nwords;
74 	register Sfoff_t	nlines;
75 	register Sfoff_t	eline;
76 	register Sfoff_t	longest;
77 	register ssize_t	c;
78 	register unsigned char	*endbuff;
79 	register int		lasttype = 1;
80 	unsigned int		lastchar;
81 	unsigned char		*buff;
82 	wchar_t			x;
83 
84 	sfset(fd,SF_WRITE,1);
85 	nlines = nwords = nchars = 0;
86 	wp->longest = 0;
87 	if (wp->mode & (WC_LONGEST|WC_MBYTE))
88 	{
89 		longest = 0;
90 		eline = -1;
91 		cp = buff = endbuff = 0;
92 		for (;;)
93 		{
94 			if (!mbok(cp, endbuff-cp))
95 			{
96 				if (buff)
97 					sfread(fd, buff, cp-buff);
98 				if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, SF_LOCKR)))
99 					break;
100 				endbuff = (cp = buff) + sfvalue(fd);
101 			}
102 			nchars++;
103 			x = mbchar(cp);
104 			if (x == -1)
105 			{
106 				if (eline != nlines && !(wp->mode & WC_QUIET))
107 				{
108 					error_info.file = (char*)file;
109 					error_info.line = eline = nlines;
110 					error(ERROR_SYSTEM|1, "invalid multibyte character");
111 					error_info.file = 0;
112 					error_info.line = 0;
113 				}
114 			}
115 			else if (x == '\n')
116 			{
117 				if ((nchars - longest) > wp->longest)
118 					wp->longest = nchars - longest;
119 				longest = nchars;
120 				nlines++;
121 				lasttype = 1;
122 			}
123 			else if (iswspace(x))
124 				lasttype = 1;
125 			else if (lasttype)
126 			{
127 				lasttype = 0;
128 				nwords++;
129 			}
130 		}
131 	}
132 	else
133 	{
134 		for (;;)
135 		{
136 			/* fill next buffer and check for end-of-file */
137 			if (!(buff = (unsigned char*)sfreserve(fd, 0, 0)) || (c = sfvalue(fd)) <= 0)
138 				break;
139 			sfread(fd,(char*)(cp=buff),c);
140 			nchars += c;
141 			/* check to see whether first character terminates word */
142 			if(c==1)
143 			{
144 				if(endline(lasttype))
145 					nlines++;
146 				if((c = space[*cp]) && !lasttype)
147 					nwords++;
148 				lasttype = c;
149 				continue;
150 			}
151 			if(!lasttype && space[*cp])
152 				nwords++;
153 			lastchar = cp[--c];
154 			cp[c] = '\n';
155 			endbuff = cp+c;
156 			c = lasttype;
157 			/* process each buffer */
158 			for (;;)
159 			{
160 				/* process spaces and new-lines */
161 				do if (endline(c))
162 				{
163 					for (;;)
164 					{
165 						/* check for end of buffer */
166 						if (cp > endbuff)
167 							goto eob;
168 						nlines++;
169 						if (*cp != '\n')
170 							break;
171 						cp++;
172 					}
173 				} while (c = space[*cp++]);
174 				/* skip over word characters */
175 				while(!(c = space[*cp++]));
176 				nwords++;
177 			}
178 		eob:
179 			if((cp -= 2) >= buff)
180 				c = space[*cp];
181 			else
182 				c  = lasttype;
183 			lasttype = space[lastchar];
184 			/* see if was in word */
185 			if(!c && !lasttype)
186 				nwords--;
187 		}
188 		if(endline(lasttype))
189 			nlines++;
190 		else if(!lasttype)
191 			nwords++;
192 	}
193 	wp->chars = nchars;
194 	wp->words = nwords;
195 	wp->lines = nlines;
196 	return(0);
197 }
198