xref: /freebsd/usr.bin/wc/wc.c (revision 149a123b349b2fc13de147b534cdfbdd3897d84c)
19b50d902SRodney W. Grimes /*
29b50d902SRodney W. Grimes  * Copyright (c) 1980, 1987, 1991, 1993
39b50d902SRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
49b50d902SRodney W. Grimes  *
59b50d902SRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
69b50d902SRodney W. Grimes  * modification, are permitted provided that the following conditions
79b50d902SRodney W. Grimes  * are met:
89b50d902SRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
99b50d902SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
109b50d902SRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
119b50d902SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
129b50d902SRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
139b50d902SRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
149b50d902SRodney W. Grimes  *    must display the following acknowledgement:
159b50d902SRodney W. Grimes  *	This product includes software developed by the University of
169b50d902SRodney W. Grimes  *	California, Berkeley and its contributors.
179b50d902SRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
189b50d902SRodney W. Grimes  *    may be used to endorse or promote products derived from this software
199b50d902SRodney W. Grimes  *    without specific prior written permission.
209b50d902SRodney W. Grimes  *
219b50d902SRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
229b50d902SRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
239b50d902SRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
249b50d902SRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
259b50d902SRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
269b50d902SRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
279b50d902SRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
289b50d902SRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
299b50d902SRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
309b50d902SRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
319b50d902SRodney W. Grimes  * SUCH DAMAGE.
329b50d902SRodney W. Grimes  */
339b50d902SRodney W. Grimes 
349b50d902SRodney W. Grimes #ifndef lint
3506469209SMike Barcroft static const char copyright[] =
369b50d902SRodney W. Grimes "@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
379b50d902SRodney W. Grimes 	The Regents of the University of California.  All rights reserved.\n";
38a821e36eSMike Barcroft #endif /* not lint */
399b50d902SRodney W. Grimes 
4006469209SMike Barcroft #if 0
419b50d902SRodney W. Grimes #ifndef lint
42a821e36eSMike Barcroft static char sccsid[] = "@(#)wc.c	8.1 (Berkeley) 6/6/93";
43a821e36eSMike Barcroft #endif /* not lint */
442c51e5edSBruce Evans #endif
459b50d902SRodney W. Grimes 
46a821e36eSMike Barcroft #include <sys/cdefs.h>
47a821e36eSMike Barcroft __FBSDID("$FreeBSD$");
48a821e36eSMike Barcroft 
499b50d902SRodney W. Grimes #include <sys/param.h>
509b50d902SRodney W. Grimes #include <sys/stat.h>
512c51e5edSBruce Evans 
522c51e5edSBruce Evans #include <ctype.h>
532c51e5edSBruce Evans #include <err.h>
54ebb42aeeSTim J. Robbins #include <errno.h>
559b50d902SRodney W. Grimes #include <fcntl.h>
56ae6fa8aeSAndrey A. Chernov #include <locale.h>
57a821e36eSMike Barcroft #include <stdint.h>
589b50d902SRodney W. Grimes #include <stdio.h>
599b50d902SRodney W. Grimes #include <stdlib.h>
609b50d902SRodney W. Grimes #include <string.h>
612c51e5edSBruce Evans #include <unistd.h>
62149a123bSTim J. Robbins #include <wchar.h>
63e58245f7STim J. Robbins #include <wctype.h>
649b50d902SRodney W. Grimes 
65a821e36eSMike Barcroft uintmax_t tlinect, twordct, tcharct;
66ebb42aeeSTim J. Robbins int doline, doword, dochar, domulti;
679b50d902SRodney W. Grimes 
683f330d7dSWarner Losh static int	cnt(const char *);
693f330d7dSWarner Losh static void	usage(void);
709b50d902SRodney W. Grimes 
719b50d902SRodney W. Grimes int
729b50d902SRodney W. Grimes main(argc, argv)
739b50d902SRodney W. Grimes 	int argc;
749b50d902SRodney W. Grimes 	char *argv[];
759b50d902SRodney W. Grimes {
76a0cf59e6SSheldon Hearn 	int ch, errors, total;
779b50d902SRodney W. Grimes 
78ae6fa8aeSAndrey A. Chernov 	(void) setlocale(LC_CTYPE, "");
79ae6fa8aeSAndrey A. Chernov 
80ebb42aeeSTim J. Robbins 	while ((ch = getopt(argc, argv, "clmw")) != -1)
819b50d902SRodney W. Grimes 		switch((char)ch) {
829b50d902SRodney W. Grimes 		case 'l':
839b50d902SRodney W. Grimes 			doline = 1;
849b50d902SRodney W. Grimes 			break;
859b50d902SRodney W. Grimes 		case 'w':
869b50d902SRodney W. Grimes 			doword = 1;
879b50d902SRodney W. Grimes 			break;
889b50d902SRodney W. Grimes 		case 'c':
899b50d902SRodney W. Grimes 			dochar = 1;
90ebb42aeeSTim J. Robbins 			domulti = 0;
91ebb42aeeSTim J. Robbins 			break;
92ebb42aeeSTim J. Robbins 		case 'm':
93ebb42aeeSTim J. Robbins 			domulti = 1;
94ebb42aeeSTim J. Robbins 			dochar = 0;
959b50d902SRodney W. Grimes 			break;
969b50d902SRodney W. Grimes 		case '?':
979b50d902SRodney W. Grimes 		default:
989b50d902SRodney W. Grimes 			usage();
999b50d902SRodney W. Grimes 		}
1009b50d902SRodney W. Grimes 	argv += optind;
1019b50d902SRodney W. Grimes 	argc -= optind;
1029b50d902SRodney W. Grimes 
1039b50d902SRodney W. Grimes 	/* Wc's flags are on by default. */
104ebb42aeeSTim J. Robbins 	if (doline + doword + dochar + domulti == 0)
1059b50d902SRodney W. Grimes 		doline = doword = dochar = 1;
1069b50d902SRodney W. Grimes 
1072c51e5edSBruce Evans 	errors = 0;
1089b50d902SRodney W. Grimes 	total = 0;
1099b50d902SRodney W. Grimes 	if (!*argv) {
1102c51e5edSBruce Evans 		if (cnt((char *)NULL) != 0)
1112c51e5edSBruce Evans 			++errors;
1122c51e5edSBruce Evans 		else
1139b50d902SRodney W. Grimes 			(void)printf("\n");
1149b50d902SRodney W. Grimes 	}
1159b50d902SRodney W. Grimes 	else do {
1162c51e5edSBruce Evans 		if (cnt(*argv) != 0)
1172c51e5edSBruce Evans 			++errors;
1182c51e5edSBruce Evans 		else
1199b50d902SRodney W. Grimes 			(void)printf(" %s\n", *argv);
1209b50d902SRodney W. Grimes 		++total;
1219b50d902SRodney W. Grimes 	} while(*++argv);
1229b50d902SRodney W. Grimes 
1239b50d902SRodney W. Grimes 	if (total > 1) {
1249b50d902SRodney W. Grimes 		if (doline)
125a821e36eSMike Barcroft 			(void)printf(" %7ju", tlinect);
1269b50d902SRodney W. Grimes 		if (doword)
127a821e36eSMike Barcroft 			(void)printf(" %7ju", twordct);
128ebb42aeeSTim J. Robbins 		if (dochar || domulti)
129a821e36eSMike Barcroft 			(void)printf(" %7ju", tcharct);
1309b50d902SRodney W. Grimes 		(void)printf(" total\n");
1319b50d902SRodney W. Grimes 	}
1322c51e5edSBruce Evans 	exit(errors == 0 ? 0 : 1);
1339b50d902SRodney W. Grimes }
1349b50d902SRodney W. Grimes 
135a821e36eSMike Barcroft static int
1369b50d902SRodney W. Grimes cnt(file)
1377a8fb588SMark Murray 	const char *file;
1389b50d902SRodney W. Grimes {
1399b50d902SRodney W. Grimes 	struct stat sb;
140a821e36eSMike Barcroft 	uintmax_t linect, wordct, charct;
141149a123bSTim J. Robbins 	int fd, len, warned;
142149a123bSTim J. Robbins 	size_t clen;
143a0cf59e6SSheldon Hearn 	short gotsp;
144a0cf59e6SSheldon Hearn 	u_char *p;
145abd0c85dSTim J. Robbins 	u_char buf[MAXBSIZE];
146ebb42aeeSTim J. Robbins 	wchar_t wch;
147149a123bSTim J. Robbins 	mbstate_t mbs;
1489b50d902SRodney W. Grimes 
1499b50d902SRodney W. Grimes 	linect = wordct = charct = 0;
1502c51e5edSBruce Evans 	if (file == NULL) {
1512c51e5edSBruce Evans 		file = "stdin";
1522c51e5edSBruce Evans 		fd = STDIN_FILENO;
1532c51e5edSBruce Evans 	} else {
154a0d038a4SWolfram Schneider 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
1552c51e5edSBruce Evans 			warn("%s: open", file);
1562c51e5edSBruce Evans 			return (1);
157a0d038a4SWolfram Schneider 		}
158ebb42aeeSTim J. Robbins 		if (doword || (domulti && MB_CUR_MAX != 1))
1599b50d902SRodney W. Grimes 			goto word;
1609b50d902SRodney W. Grimes 		/*
1619b50d902SRodney W. Grimes 		 * Line counting is split out because it's a lot faster to get
1629b50d902SRodney W. Grimes 		 * lines than to get words, since the word count requires some
1639b50d902SRodney W. Grimes 		 * logic.
1649b50d902SRodney W. Grimes 		 */
1659b50d902SRodney W. Grimes 		if (doline) {
1668c85cce7SPhilippe Charnier 			while ((len = read(fd, buf, MAXBSIZE))) {
1672c51e5edSBruce Evans 				if (len == -1) {
1682c51e5edSBruce Evans 					warn("%s: read", file);
1692c51e5edSBruce Evans 					(void)close(fd);
1702c51e5edSBruce Evans 					return (1);
1712c51e5edSBruce Evans 				}
1729b50d902SRodney W. Grimes 				charct += len;
1739b50d902SRodney W. Grimes 				for (p = buf; len--; ++p)
1749b50d902SRodney W. Grimes 					if (*p == '\n')
1759b50d902SRodney W. Grimes 						++linect;
1769b50d902SRodney W. Grimes 			}
1779b50d902SRodney W. Grimes 			tlinect += linect;
178a821e36eSMike Barcroft 			(void)printf(" %7ju", linect);
1799b50d902SRodney W. Grimes 			if (dochar) {
1809b50d902SRodney W. Grimes 				tcharct += charct;
181a821e36eSMike Barcroft 				(void)printf(" %7ju", charct);
1829b50d902SRodney W. Grimes 			}
1839b50d902SRodney W. Grimes 			(void)close(fd);
1842c51e5edSBruce Evans 			return (0);
1859b50d902SRodney W. Grimes 		}
1869b50d902SRodney W. Grimes 		/*
1879b50d902SRodney W. Grimes 		 * If all we need is the number of characters and it's a
188a1a27143STim J. Robbins 		 * regular file, just stat the puppy.
1899b50d902SRodney W. Grimes 		 */
190ebb42aeeSTim J. Robbins 		if (dochar || domulti) {
1912c51e5edSBruce Evans 			if (fstat(fd, &sb)) {
1922c51e5edSBruce Evans 				warn("%s: fstat", file);
1932c51e5edSBruce Evans 				(void)close(fd);
1942c51e5edSBruce Evans 				return (1);
1952c51e5edSBruce Evans 			}
196a1a27143STim J. Robbins 			if (S_ISREG(sb.st_mode)) {
197a0cf59e6SSheldon Hearn 				(void)printf(" %7lld", (long long)sb.st_size);
1989b50d902SRodney W. Grimes 				tcharct += sb.st_size;
1999b50d902SRodney W. Grimes 				(void)close(fd);
2002c51e5edSBruce Evans 				return (0);
2019b50d902SRodney W. Grimes 			}
2029b50d902SRodney W. Grimes 		}
2039b50d902SRodney W. Grimes 	}
2049b50d902SRodney W. Grimes 
2059b50d902SRodney W. Grimes 	/* Do it the hard way... */
206ebb42aeeSTim J. Robbins word:	gotsp = 1;
207ebb42aeeSTim J. Robbins 	warned = 0;
208149a123bSTim J. Robbins 	memset(&mbs, 0, sizeof(mbs));
209149a123bSTim J. Robbins 	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
210149a123bSTim J. Robbins 		if (len == -1) {
2112c51e5edSBruce Evans 			warn("%s: read", file);
2122c51e5edSBruce Evans 			(void)close(fd);
2132c51e5edSBruce Evans 			return (1);
2142c51e5edSBruce Evans 		}
215ebb42aeeSTim J. Robbins 		p = buf;
216ebb42aeeSTim J. Robbins 		while (len > 0) {
217ebb42aeeSTim J. Robbins 			if (!domulti || MB_CUR_MAX == 1) {
218ebb42aeeSTim J. Robbins 				clen = 1;
219ebb42aeeSTim J. Robbins 				wch = (unsigned char)*p;
220149a123bSTim J. Robbins 			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
221149a123bSTim J. Robbins 			    (size_t)-1) {
222ebb42aeeSTim J. Robbins 				if (!warned) {
223ebb42aeeSTim J. Robbins 					errno = EILSEQ;
224ebb42aeeSTim J. Robbins 					warn("%s", file);
225ebb42aeeSTim J. Robbins 					warned = 1;
226ebb42aeeSTim J. Robbins 				}
227149a123bSTim J. Robbins 				memset(&mbs, 0, sizeof(mbs));
228149a123bSTim J. Robbins 				clen = 1;
229149a123bSTim J. Robbins 				wch = (unsigned char)*p;
230149a123bSTim J. Robbins 			} else if (clen == (size_t)-2)
231ebb42aeeSTim J. Robbins 				break;
232149a123bSTim J. Robbins 			else if (clen == 0)
233149a123bSTim J. Robbins 				clen = 1;
234ebb42aeeSTim J. Robbins 			charct++;
235ebb42aeeSTim J. Robbins 			len -= clen;
236ebb42aeeSTim J. Robbins 			p += clen;
237ebb42aeeSTim J. Robbins 			if (wch == L'\n')
2389b50d902SRodney W. Grimes 				++linect;
239e58245f7STim J. Robbins 			if (iswspace(wch))
2409b50d902SRodney W. Grimes 				gotsp = 1;
2419b50d902SRodney W. Grimes 			else if (gotsp) {
2429b50d902SRodney W. Grimes 				gotsp = 0;
2439b50d902SRodney W. Grimes 				++wordct;
2449b50d902SRodney W. Grimes 			}
2459b50d902SRodney W. Grimes 		}
2469b50d902SRodney W. Grimes 	}
247149a123bSTim J. Robbins 	if (domulti && MB_CUR_MAX > 1)
248149a123bSTim J. Robbins 		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
249149a123bSTim J. Robbins 			warn("%s", file);
2509b50d902SRodney W. Grimes 	if (doline) {
2519b50d902SRodney W. Grimes 		tlinect += linect;
252a821e36eSMike Barcroft 		(void)printf(" %7ju", linect);
2539b50d902SRodney W. Grimes 	}
2549b50d902SRodney W. Grimes 	if (doword) {
2559b50d902SRodney W. Grimes 		twordct += wordct;
256a821e36eSMike Barcroft 		(void)printf(" %7ju", wordct);
2579b50d902SRodney W. Grimes 	}
258ebb42aeeSTim J. Robbins 	if (dochar || domulti) {
2599b50d902SRodney W. Grimes 		tcharct += charct;
260a821e36eSMike Barcroft 		(void)printf(" %7ju", charct);
2619b50d902SRodney W. Grimes 	}
2629b50d902SRodney W. Grimes 	(void)close(fd);
2632c51e5edSBruce Evans 	return (0);
2649b50d902SRodney W. Grimes }
2659b50d902SRodney W. Grimes 
266a821e36eSMike Barcroft static void
2679b50d902SRodney W. Grimes usage()
2689b50d902SRodney W. Grimes {
269ebb42aeeSTim J. Robbins 	(void)fprintf(stderr, "usage: wc [-clmw] [file ...]\n");
2709b50d902SRodney W. Grimes 	exit(1);
2719b50d902SRodney W. Grimes }
272