xref: /freebsd/usr.bin/wc/wc.c (revision 3823d5e198425b4f5e5a80267d195769d1063773)
1 /*
2  * Copyright (c) 1980, 1987, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #ifndef lint
31 static const char copyright[] =
32 "@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
33 	The Regents of the University of California.  All rights reserved.\n";
34 #endif /* not lint */
35 
36 #if 0
37 #ifndef lint
38 static char sccsid[] = "@(#)wc.c	8.1 (Berkeley) 6/6/93";
39 #endif /* not lint */
40 #endif
41 
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
44 
45 #include <sys/param.h>
46 #include <sys/stat.h>
47 
48 #include <ctype.h>
49 #include <err.h>
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <locale.h>
53 #include <stdint.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <unistd.h>
58 #include <wchar.h>
59 #include <wctype.h>
60 
61 static uintmax_t tlinect, twordct, tcharct, tlongline;
62 static int doline, doword, dochar, domulti, dolongline;
63 static volatile sig_atomic_t siginfo;
64 
65 static void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
66 		    uintmax_t charct, uintmax_t llct);
67 static int	cnt(const char *);
68 static void	usage(void);
69 
70 static void
71 siginfo_handler(int sig __unused)
72 {
73 
74 	siginfo = 1;
75 }
76 
77 int
78 main(int argc, char *argv[])
79 {
80 	int ch, errors, total;
81 
82 	(void) setlocale(LC_CTYPE, "");
83 
84 	while ((ch = getopt(argc, argv, "clmwL")) != -1)
85 		switch((char)ch) {
86 		case 'l':
87 			doline = 1;
88 			break;
89 		case 'w':
90 			doword = 1;
91 			break;
92 		case 'c':
93 			dochar = 1;
94 			domulti = 0;
95 			break;
96 		case 'L':
97 			dolongline = 1;
98 			break;
99 		case 'm':
100 			domulti = 1;
101 			dochar = 0;
102 			break;
103 		case '?':
104 		default:
105 			usage();
106 		}
107 	argv += optind;
108 	argc -= optind;
109 
110 	(void)signal(SIGINFO, siginfo_handler);
111 
112 	/* Wc's flags are on by default. */
113 	if (doline + doword + dochar + domulti + dolongline == 0)
114 		doline = doword = dochar = 1;
115 
116 	errors = 0;
117 	total = 0;
118 	if (!*argv) {
119 		if (cnt((char *)NULL) != 0)
120 			++errors;
121 	} else {
122 		do {
123 			if (cnt(*argv) != 0)
124 				++errors;
125 			++total;
126 		} while(*++argv);
127 	}
128 
129 	if (total > 1)
130 		show_cnt("total", tlinect, twordct, tcharct, tlongline);
131 	exit(errors == 0 ? 0 : 1);
132 }
133 
134 static void
135 show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
136     uintmax_t charct, uintmax_t llct)
137 {
138 	FILE *out;
139 
140 	if (!siginfo)
141 		out = stdout;
142 	else {
143 		out = stderr;
144 		siginfo = 0;
145 	}
146 
147 	if (doline)
148 		(void)fprintf(out, " %7ju", linect);
149 	if (doword)
150 		(void)fprintf(out, " %7ju", wordct);
151 	if (dochar || domulti)
152 		(void)fprintf(out, " %7ju", charct);
153 	if (dolongline)
154 		(void)fprintf(out, " %7ju", llct);
155 	if (file != NULL)
156 		(void)fprintf(out, " %s\n", file);
157 	else
158 		(void)fprintf(out, "\n");
159 }
160 
161 static int
162 cnt(const char *file)
163 {
164 	struct stat sb;
165 	uintmax_t linect, wordct, charct, llct, tmpll;
166 	int fd, len, warned;
167 	size_t clen;
168 	short gotsp;
169 	u_char *p;
170 	u_char buf[MAXBSIZE];
171 	wchar_t wch;
172 	mbstate_t mbs;
173 
174 	linect = wordct = charct = llct = tmpll = 0;
175 	if (file == NULL)
176 		fd = STDIN_FILENO;
177 	else {
178 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
179 			warn("%s: open", file);
180 			return (1);
181 		}
182 		if (doword || (domulti && MB_CUR_MAX != 1))
183 			goto word;
184 		/*
185 		 * Line counting is split out because it's a lot faster to get
186 		 * lines than to get words, since the word count requires some
187 		 * logic.
188 		 */
189 		if (doline) {
190 			while ((len = read(fd, buf, MAXBSIZE))) {
191 				if (len == -1) {
192 					warn("%s: read", file);
193 					(void)close(fd);
194 					return (1);
195 				}
196 				if (siginfo) {
197 					show_cnt(file, linect, wordct, charct,
198 					    llct);
199 				}
200 				charct += len;
201 				for (p = buf; len--; ++p)
202 					if (*p == '\n') {
203 						if (tmpll > llct)
204 							llct = tmpll;
205 						tmpll = 0;
206 						++linect;
207 					} else
208 						tmpll++;
209 			}
210 			tlinect += linect;
211 			if (dochar)
212 				tcharct += charct;
213 			if (dolongline) {
214 				if (llct > tlongline)
215 					tlongline = llct;
216 			}
217 			show_cnt(file, linect, wordct, charct, llct);
218 			(void)close(fd);
219 			return (0);
220 		}
221 		/*
222 		 * If all we need is the number of characters and it's a
223 		 * regular file, just stat the puppy.
224 		 */
225 		if (dochar || domulti) {
226 			if (fstat(fd, &sb)) {
227 				warn("%s: fstat", file);
228 				(void)close(fd);
229 				return (1);
230 			}
231 			if (S_ISREG(sb.st_mode)) {
232 				charct = sb.st_size;
233 				show_cnt(file, linect, wordct, charct, llct);
234 				tcharct += charct;
235 				(void)close(fd);
236 				return (0);
237 			}
238 		}
239 	}
240 
241 	/* Do it the hard way... */
242 word:	gotsp = 1;
243 	warned = 0;
244 	memset(&mbs, 0, sizeof(mbs));
245 	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
246 		if (len == -1) {
247 			warn("%s: read", file != NULL ? file : "stdin");
248 			(void)close(fd);
249 			return (1);
250 		}
251 		p = buf;
252 		while (len > 0) {
253 			if (siginfo)
254 				show_cnt(file, linect, wordct, charct, llct);
255 			if (!domulti || MB_CUR_MAX == 1) {
256 				clen = 1;
257 				wch = (unsigned char)*p;
258 			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
259 			    (size_t)-1) {
260 				if (!warned) {
261 					errno = EILSEQ;
262 					warn("%s",
263 					    file != NULL ? file : "stdin");
264 					warned = 1;
265 				}
266 				memset(&mbs, 0, sizeof(mbs));
267 				clen = 1;
268 				wch = (unsigned char)*p;
269 			} else if (clen == (size_t)-2)
270 				break;
271 			else if (clen == 0)
272 				clen = 1;
273 			charct++;
274 			if (wch != L'\n')
275 				tmpll++;
276 			len -= clen;
277 			p += clen;
278 			if (wch == L'\n') {
279 				if (tmpll > llct)
280 					llct = tmpll;
281 				tmpll = 0;
282 				++linect;
283 			}
284 			if (iswspace(wch))
285 				gotsp = 1;
286 			else if (gotsp) {
287 				gotsp = 0;
288 				++wordct;
289 			}
290 		}
291 	}
292 	if (domulti && MB_CUR_MAX > 1)
293 		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
294 			warn("%s", file != NULL ? file : "stdin");
295 	if (doline)
296 		tlinect += linect;
297 	if (doword)
298 		twordct += wordct;
299 	if (dochar || domulti)
300 		tcharct += charct;
301 	if (dolongline) {
302 		if (llct > tlongline)
303 			tlongline = llct;
304 	}
305 	show_cnt(file, linect, wordct, charct, llct);
306 	(void)close(fd);
307 	return (0);
308 }
309 
310 static void
311 usage(void)
312 {
313 	(void)fprintf(stderr, "usage: wc [-Lclmw] [file ...]\n");
314 	exit(1);
315 }
316