xref: /freebsd/usr.bin/wc/wc.c (revision f09bc09332776cba0167c9fe018d7eb495a2a471)
19b50d902SRodney W. Grimes /*
29b50d902SRodney W. Grimes  * Copyright (c) 1980, 1987, 1991, 1993
39b50d902SRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
49b50d902SRodney W. Grimes  *
59b50d902SRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
69b50d902SRodney W. Grimes  * modification, are permitted provided that the following conditions
79b50d902SRodney W. Grimes  * are met:
89b50d902SRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
99b50d902SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
109b50d902SRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
119b50d902SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
129b50d902SRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
139b50d902SRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
149b50d902SRodney W. Grimes  *    must display the following acknowledgement:
159b50d902SRodney W. Grimes  *	This product includes software developed by the University of
169b50d902SRodney W. Grimes  *	California, Berkeley and its contributors.
179b50d902SRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
189b50d902SRodney W. Grimes  *    may be used to endorse or promote products derived from this software
199b50d902SRodney W. Grimes  *    without specific prior written permission.
209b50d902SRodney W. Grimes  *
219b50d902SRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
229b50d902SRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
239b50d902SRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
249b50d902SRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
259b50d902SRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
269b50d902SRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
279b50d902SRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
289b50d902SRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
299b50d902SRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
309b50d902SRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
319b50d902SRodney W. Grimes  * SUCH DAMAGE.
329b50d902SRodney W. Grimes  */
339b50d902SRodney W. Grimes 
349b50d902SRodney W. Grimes #ifndef lint
3506469209SMike Barcroft static const char copyright[] =
369b50d902SRodney W. Grimes "@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
379b50d902SRodney W. Grimes 	The Regents of the University of California.  All rights reserved.\n";
38a821e36eSMike Barcroft #endif /* not lint */
399b50d902SRodney W. Grimes 
4006469209SMike Barcroft #if 0
419b50d902SRodney W. Grimes #ifndef lint
42a821e36eSMike Barcroft static char sccsid[] = "@(#)wc.c	8.1 (Berkeley) 6/6/93";
43a821e36eSMike Barcroft #endif /* not lint */
442c51e5edSBruce Evans #endif
459b50d902SRodney W. Grimes 
46a821e36eSMike Barcroft #include <sys/cdefs.h>
47a821e36eSMike Barcroft __FBSDID("$FreeBSD$");
48a821e36eSMike Barcroft 
499b50d902SRodney W. Grimes #include <sys/param.h>
509b50d902SRodney W. Grimes #include <sys/stat.h>
512c51e5edSBruce Evans 
522c51e5edSBruce Evans #include <ctype.h>
532c51e5edSBruce Evans #include <err.h>
54ebb42aeeSTim J. Robbins #include <errno.h>
559b50d902SRodney W. Grimes #include <fcntl.h>
56ae6fa8aeSAndrey A. Chernov #include <locale.h>
57a821e36eSMike Barcroft #include <stdint.h>
589b50d902SRodney W. Grimes #include <stdio.h>
599b50d902SRodney W. Grimes #include <stdlib.h>
609b50d902SRodney W. Grimes #include <string.h>
612c51e5edSBruce Evans #include <unistd.h>
62149a123bSTim J. Robbins #include <wchar.h>
63e58245f7STim J. Robbins #include <wctype.h>
649b50d902SRodney W. Grimes 
65f45dd010SGiorgos Keramidas uintmax_t tlinect, twordct, tcharct, tlongline;
66*f09bc093SPawel Jakub Dawidek int doline, doword, dochar, domulti, dolongline, siginfo;
679b50d902SRodney W. Grimes 
68*f09bc093SPawel Jakub Dawidek static void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
69*f09bc093SPawel Jakub Dawidek 		    uintmax_t charct, uintmax_t llct);
703f330d7dSWarner Losh static int	cnt(const char *);
713f330d7dSWarner Losh static void	usage(void);
729b50d902SRodney W. Grimes 
73*f09bc093SPawel Jakub Dawidek static void
74*f09bc093SPawel Jakub Dawidek siginfo_handler(int sig __unused)
75*f09bc093SPawel Jakub Dawidek {
76*f09bc093SPawel Jakub Dawidek 
77*f09bc093SPawel Jakub Dawidek 	siginfo = 1;
78*f09bc093SPawel Jakub Dawidek }
79*f09bc093SPawel Jakub Dawidek 
809b50d902SRodney W. Grimes int
81806abfccSJosef El-Rayes main(int argc, char *argv[])
829b50d902SRodney W. Grimes {
83a0cf59e6SSheldon Hearn 	int ch, errors, total;
849b50d902SRodney W. Grimes 
85ae6fa8aeSAndrey A. Chernov 	(void) setlocale(LC_CTYPE, "");
86ae6fa8aeSAndrey A. Chernov 
87f45dd010SGiorgos Keramidas 	while ((ch = getopt(argc, argv, "clmwL")) != -1)
889b50d902SRodney W. Grimes 		switch((char)ch) {
899b50d902SRodney W. Grimes 		case 'l':
909b50d902SRodney W. Grimes 			doline = 1;
919b50d902SRodney W. Grimes 			break;
929b50d902SRodney W. Grimes 		case 'w':
939b50d902SRodney W. Grimes 			doword = 1;
949b50d902SRodney W. Grimes 			break;
959b50d902SRodney W. Grimes 		case 'c':
969b50d902SRodney W. Grimes 			dochar = 1;
97ebb42aeeSTim J. Robbins 			domulti = 0;
98ebb42aeeSTim J. Robbins 			break;
99f45dd010SGiorgos Keramidas 		case 'L':
100f45dd010SGiorgos Keramidas 			dolongline = 1;
101f45dd010SGiorgos Keramidas 			break;
102ebb42aeeSTim J. Robbins 		case 'm':
103ebb42aeeSTim J. Robbins 			domulti = 1;
104ebb42aeeSTim J. Robbins 			dochar = 0;
1059b50d902SRodney W. Grimes 			break;
1069b50d902SRodney W. Grimes 		case '?':
1079b50d902SRodney W. Grimes 		default:
1089b50d902SRodney W. Grimes 			usage();
1099b50d902SRodney W. Grimes 		}
1109b50d902SRodney W. Grimes 	argv += optind;
1119b50d902SRodney W. Grimes 	argc -= optind;
1129b50d902SRodney W. Grimes 
113*f09bc093SPawel Jakub Dawidek 	(void)signal(SIGINFO, siginfo_handler);
114*f09bc093SPawel Jakub Dawidek 
1159b50d902SRodney W. Grimes 	/* Wc's flags are on by default. */
116f45dd010SGiorgos Keramidas 	if (doline + doword + dochar + domulti + dolongline == 0)
1179b50d902SRodney W. Grimes 		doline = doword = dochar = 1;
1189b50d902SRodney W. Grimes 
1192c51e5edSBruce Evans 	errors = 0;
1209b50d902SRodney W. Grimes 	total = 0;
1219b50d902SRodney W. Grimes 	if (!*argv) {
1222c51e5edSBruce Evans 		if (cnt((char *)NULL) != 0)
1232c51e5edSBruce Evans 			++errors;
124*f09bc093SPawel Jakub Dawidek 	} else {
125*f09bc093SPawel Jakub Dawidek 		do {
1262c51e5edSBruce Evans 			if (cnt(*argv) != 0)
1272c51e5edSBruce Evans 				++errors;
1289b50d902SRodney W. Grimes 			++total;
1299b50d902SRodney W. Grimes 		} while(*++argv);
1309b50d902SRodney W. Grimes 	}
131*f09bc093SPawel Jakub Dawidek 
132*f09bc093SPawel Jakub Dawidek 	if (total > 1)
133*f09bc093SPawel Jakub Dawidek 		show_cnt("total", tlinect, twordct, tcharct, tlongline);
1342c51e5edSBruce Evans 	exit(errors == 0 ? 0 : 1);
1359b50d902SRodney W. Grimes }
1369b50d902SRodney W. Grimes 
137*f09bc093SPawel Jakub Dawidek static void
138*f09bc093SPawel Jakub Dawidek show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
139*f09bc093SPawel Jakub Dawidek     uintmax_t charct, uintmax_t llct)
140*f09bc093SPawel Jakub Dawidek {
141*f09bc093SPawel Jakub Dawidek 	FILE *out;
142*f09bc093SPawel Jakub Dawidek 
143*f09bc093SPawel Jakub Dawidek 	if (!siginfo)
144*f09bc093SPawel Jakub Dawidek 		out = stdout;
145*f09bc093SPawel Jakub Dawidek 	else {
146*f09bc093SPawel Jakub Dawidek 		out = stderr;
147*f09bc093SPawel Jakub Dawidek 		siginfo = 0;
148*f09bc093SPawel Jakub Dawidek 	}
149*f09bc093SPawel Jakub Dawidek 
150*f09bc093SPawel Jakub Dawidek 	if (doline)
151*f09bc093SPawel Jakub Dawidek 		(void)fprintf(out, " %7ju", linect);
152*f09bc093SPawel Jakub Dawidek 	if (doword)
153*f09bc093SPawel Jakub Dawidek 		(void)fprintf(out, " %7ju", wordct);
154*f09bc093SPawel Jakub Dawidek 	if (dochar || domulti)
155*f09bc093SPawel Jakub Dawidek 		(void)fprintf(out, " %7ju", charct);
156*f09bc093SPawel Jakub Dawidek 	if (dolongline)
157*f09bc093SPawel Jakub Dawidek 		(void)fprintf(out, " %7ju", llct);
158*f09bc093SPawel Jakub Dawidek 	if (file != NULL)
159*f09bc093SPawel Jakub Dawidek 		(void)fprintf(out, " %s\n", file);
160*f09bc093SPawel Jakub Dawidek 	else
161*f09bc093SPawel Jakub Dawidek 		(void)fprintf(out, "\n");
162*f09bc093SPawel Jakub Dawidek }
163*f09bc093SPawel Jakub Dawidek 
164a821e36eSMike Barcroft static int
165806abfccSJosef El-Rayes cnt(const char *file)
1669b50d902SRodney W. Grimes {
1679b50d902SRodney W. Grimes 	struct stat sb;
168f45dd010SGiorgos Keramidas 	uintmax_t linect, wordct, charct, llct, tmpll;
169149a123bSTim J. Robbins 	int fd, len, warned;
170149a123bSTim J. Robbins 	size_t clen;
171a0cf59e6SSheldon Hearn 	short gotsp;
172a0cf59e6SSheldon Hearn 	u_char *p;
173abd0c85dSTim J. Robbins 	u_char buf[MAXBSIZE];
174ebb42aeeSTim J. Robbins 	wchar_t wch;
175149a123bSTim J. Robbins 	mbstate_t mbs;
1769b50d902SRodney W. Grimes 
177f45dd010SGiorgos Keramidas 	linect = wordct = charct = llct = tmpll = 0;
178*f09bc093SPawel Jakub Dawidek 	if (file == NULL)
1792c51e5edSBruce Evans 		fd = STDIN_FILENO;
180*f09bc093SPawel Jakub Dawidek 	else {
181a0d038a4SWolfram Schneider 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
1822c51e5edSBruce Evans 			warn("%s: open", file);
1832c51e5edSBruce Evans 			return (1);
184a0d038a4SWolfram Schneider 		}
185ebb42aeeSTim J. Robbins 		if (doword || (domulti && MB_CUR_MAX != 1))
1869b50d902SRodney W. Grimes 			goto word;
1879b50d902SRodney W. Grimes 		/*
1889b50d902SRodney W. Grimes 		 * Line counting is split out because it's a lot faster to get
1899b50d902SRodney W. Grimes 		 * lines than to get words, since the word count requires some
1909b50d902SRodney W. Grimes 		 * logic.
1919b50d902SRodney W. Grimes 		 */
1929b50d902SRodney W. Grimes 		if (doline) {
1938c85cce7SPhilippe Charnier 			while ((len = read(fd, buf, MAXBSIZE))) {
1942c51e5edSBruce Evans 				if (len == -1) {
1952c51e5edSBruce Evans 					warn("%s: read", file);
1962c51e5edSBruce Evans 					(void)close(fd);
1972c51e5edSBruce Evans 					return (1);
1982c51e5edSBruce Evans 				}
199*f09bc093SPawel Jakub Dawidek 				if (siginfo) {
200*f09bc093SPawel Jakub Dawidek 					show_cnt(file, linect, wordct, charct,
201*f09bc093SPawel Jakub Dawidek 					    llct);
202*f09bc093SPawel Jakub Dawidek 				}
2039b50d902SRodney W. Grimes 				charct += len;
2049b50d902SRodney W. Grimes 				for (p = buf; len--; ++p)
205f45dd010SGiorgos Keramidas 					if (*p == '\n') {
206f45dd010SGiorgos Keramidas 						if (tmpll > llct)
207f45dd010SGiorgos Keramidas 							llct = tmpll;
208f45dd010SGiorgos Keramidas 						tmpll = 0;
2099b50d902SRodney W. Grimes 						++linect;
210f45dd010SGiorgos Keramidas 					} else
211f45dd010SGiorgos Keramidas 						tmpll++;
2129b50d902SRodney W. Grimes 			}
2139b50d902SRodney W. Grimes 			tlinect += linect;
214*f09bc093SPawel Jakub Dawidek 			if (dochar)
2159b50d902SRodney W. Grimes 				tcharct += charct;
216f45dd010SGiorgos Keramidas 			if (dolongline) {
217f45dd010SGiorgos Keramidas 				if (llct > tlongline)
218f45dd010SGiorgos Keramidas 					tlongline = llct;
219f45dd010SGiorgos Keramidas 			}
220*f09bc093SPawel Jakub Dawidek 			show_cnt(file, linect, wordct, charct, llct);
2219b50d902SRodney W. Grimes 			(void)close(fd);
2222c51e5edSBruce Evans 			return (0);
2239b50d902SRodney W. Grimes 		}
2249b50d902SRodney W. Grimes 		/*
2259b50d902SRodney W. Grimes 		 * If all we need is the number of characters and it's a
226a1a27143STim J. Robbins 		 * regular file, just stat the puppy.
2279b50d902SRodney W. Grimes 		 */
228ebb42aeeSTim J. Robbins 		if (dochar || domulti) {
2292c51e5edSBruce Evans 			if (fstat(fd, &sb)) {
2302c51e5edSBruce Evans 				warn("%s: fstat", file);
2312c51e5edSBruce Evans 				(void)close(fd);
2322c51e5edSBruce Evans 				return (1);
2332c51e5edSBruce Evans 			}
234a1a27143STim J. Robbins 			if (S_ISREG(sb.st_mode)) {
235*f09bc093SPawel Jakub Dawidek 				charct = sb.st_size;
236*f09bc093SPawel Jakub Dawidek 				show_cnt(file, linect, wordct, charct, llct);
237*f09bc093SPawel Jakub Dawidek 				tcharct += charct;
2389b50d902SRodney W. Grimes 				(void)close(fd);
2392c51e5edSBruce Evans 				return (0);
2409b50d902SRodney W. Grimes 			}
2419b50d902SRodney W. Grimes 		}
2429b50d902SRodney W. Grimes 	}
2439b50d902SRodney W. Grimes 
2449b50d902SRodney W. Grimes 	/* Do it the hard way... */
245ebb42aeeSTim J. Robbins word:	gotsp = 1;
246ebb42aeeSTim J. Robbins 	warned = 0;
247149a123bSTim J. Robbins 	memset(&mbs, 0, sizeof(mbs));
248149a123bSTim J. Robbins 	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
249149a123bSTim J. Robbins 		if (len == -1) {
250*f09bc093SPawel Jakub Dawidek 			warn("%s: read", file != NULL ? file : "stdin");
2512c51e5edSBruce Evans 			(void)close(fd);
2522c51e5edSBruce Evans 			return (1);
2532c51e5edSBruce Evans 		}
254ebb42aeeSTim J. Robbins 		p = buf;
255ebb42aeeSTim J. Robbins 		while (len > 0) {
256*f09bc093SPawel Jakub Dawidek 			if (siginfo)
257*f09bc093SPawel Jakub Dawidek 				show_cnt(file, linect, wordct, charct, llct);
258ebb42aeeSTim J. Robbins 			if (!domulti || MB_CUR_MAX == 1) {
259ebb42aeeSTim J. Robbins 				clen = 1;
260ebb42aeeSTim J. Robbins 				wch = (unsigned char)*p;
261149a123bSTim J. Robbins 			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
262149a123bSTim J. Robbins 			    (size_t)-1) {
263ebb42aeeSTim J. Robbins 				if (!warned) {
264ebb42aeeSTim J. Robbins 					errno = EILSEQ;
265*f09bc093SPawel Jakub Dawidek 					warn("%s",
266*f09bc093SPawel Jakub Dawidek 					    file != NULL ? file : "stdin");
267ebb42aeeSTim J. Robbins 					warned = 1;
268ebb42aeeSTim J. Robbins 				}
269149a123bSTim J. Robbins 				memset(&mbs, 0, sizeof(mbs));
270149a123bSTim J. Robbins 				clen = 1;
271149a123bSTim J. Robbins 				wch = (unsigned char)*p;
272149a123bSTim J. Robbins 			} else if (clen == (size_t)-2)
273ebb42aeeSTim J. Robbins 				break;
274149a123bSTim J. Robbins 			else if (clen == 0)
275149a123bSTim J. Robbins 				clen = 1;
276ebb42aeeSTim J. Robbins 			charct++;
277f45dd010SGiorgos Keramidas 			if (wch != L'\n')
278f45dd010SGiorgos Keramidas 				tmpll++;
279ebb42aeeSTim J. Robbins 			len -= clen;
280ebb42aeeSTim J. Robbins 			p += clen;
281f45dd010SGiorgos Keramidas 			if (wch == L'\n') {
282f45dd010SGiorgos Keramidas 				if (tmpll > llct)
283f45dd010SGiorgos Keramidas 					llct = tmpll;
284f45dd010SGiorgos Keramidas 				tmpll = 0;
2859b50d902SRodney W. Grimes 				++linect;
286f45dd010SGiorgos Keramidas 			}
287e58245f7STim J. Robbins 			if (iswspace(wch))
2889b50d902SRodney W. Grimes 				gotsp = 1;
2899b50d902SRodney W. Grimes 			else if (gotsp) {
2909b50d902SRodney W. Grimes 				gotsp = 0;
2919b50d902SRodney W. Grimes 				++wordct;
2929b50d902SRodney W. Grimes 			}
2939b50d902SRodney W. Grimes 		}
2949b50d902SRodney W. Grimes 	}
295149a123bSTim J. Robbins 	if (domulti && MB_CUR_MAX > 1)
296149a123bSTim J. Robbins 		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
297*f09bc093SPawel Jakub Dawidek 			warn("%s", file != NULL ? file : "stdin");
298*f09bc093SPawel Jakub Dawidek 	if (doline)
2999b50d902SRodney W. Grimes 		tlinect += linect;
300*f09bc093SPawel Jakub Dawidek 	if (doword)
3019b50d902SRodney W. Grimes 		twordct += wordct;
302*f09bc093SPawel Jakub Dawidek 	if (dochar || domulti)
3039b50d902SRodney W. Grimes 		tcharct += charct;
304f45dd010SGiorgos Keramidas 	if (dolongline) {
305f45dd010SGiorgos Keramidas 		if (llct > tlongline)
306f45dd010SGiorgos Keramidas 			tlongline = llct;
307f45dd010SGiorgos Keramidas 	}
308*f09bc093SPawel Jakub Dawidek 	show_cnt(file, linect, wordct, charct, llct);
3099b50d902SRodney W. Grimes 	(void)close(fd);
3102c51e5edSBruce Evans 	return (0);
3119b50d902SRodney W. Grimes }
3129b50d902SRodney W. Grimes 
313a821e36eSMike Barcroft static void
3140970727fSEd Schouten usage(void)
3159b50d902SRodney W. Grimes {
316f45dd010SGiorgos Keramidas 	(void)fprintf(stderr, "usage: wc [-Lclmw] [file ...]\n");
3179b50d902SRodney W. Grimes 	exit(1);
3189b50d902SRodney W. Grimes }
319