xref: /freebsd/usr.bin/wc/wc.c (revision 0970727f4f1fe4369574849bd6c1d4a4d238583d)
19b50d902SRodney W. Grimes /*
29b50d902SRodney W. Grimes  * Copyright (c) 1980, 1987, 1991, 1993
39b50d902SRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
49b50d902SRodney W. Grimes  *
59b50d902SRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
69b50d902SRodney W. Grimes  * modification, are permitted provided that the following conditions
79b50d902SRodney W. Grimes  * are met:
89b50d902SRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
99b50d902SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
109b50d902SRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
119b50d902SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
129b50d902SRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
139b50d902SRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
149b50d902SRodney W. Grimes  *    must display the following acknowledgement:
159b50d902SRodney W. Grimes  *	This product includes software developed by the University of
169b50d902SRodney W. Grimes  *	California, Berkeley and its contributors.
179b50d902SRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
189b50d902SRodney W. Grimes  *    may be used to endorse or promote products derived from this software
199b50d902SRodney W. Grimes  *    without specific prior written permission.
209b50d902SRodney W. Grimes  *
219b50d902SRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
229b50d902SRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
239b50d902SRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
249b50d902SRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
259b50d902SRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
269b50d902SRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
279b50d902SRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
289b50d902SRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
299b50d902SRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
309b50d902SRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
319b50d902SRodney W. Grimes  * SUCH DAMAGE.
329b50d902SRodney W. Grimes  */
339b50d902SRodney W. Grimes 
349b50d902SRodney W. Grimes #ifndef lint
3506469209SMike Barcroft static const char copyright[] =
369b50d902SRodney W. Grimes "@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
379b50d902SRodney W. Grimes 	The Regents of the University of California.  All rights reserved.\n";
38a821e36eSMike Barcroft #endif /* not lint */
399b50d902SRodney W. Grimes 
4006469209SMike Barcroft #if 0
419b50d902SRodney W. Grimes #ifndef lint
42a821e36eSMike Barcroft static char sccsid[] = "@(#)wc.c	8.1 (Berkeley) 6/6/93";
43a821e36eSMike Barcroft #endif /* not lint */
442c51e5edSBruce Evans #endif
459b50d902SRodney W. Grimes 
46a821e36eSMike Barcroft #include <sys/cdefs.h>
47a821e36eSMike Barcroft __FBSDID("$FreeBSD$");
48a821e36eSMike Barcroft 
499b50d902SRodney W. Grimes #include <sys/param.h>
509b50d902SRodney W. Grimes #include <sys/stat.h>
512c51e5edSBruce Evans 
522c51e5edSBruce Evans #include <ctype.h>
532c51e5edSBruce Evans #include <err.h>
54ebb42aeeSTim J. Robbins #include <errno.h>
559b50d902SRodney W. Grimes #include <fcntl.h>
56ae6fa8aeSAndrey A. Chernov #include <locale.h>
57a821e36eSMike Barcroft #include <stdint.h>
589b50d902SRodney W. Grimes #include <stdio.h>
599b50d902SRodney W. Grimes #include <stdlib.h>
609b50d902SRodney W. Grimes #include <string.h>
612c51e5edSBruce Evans #include <unistd.h>
62149a123bSTim J. Robbins #include <wchar.h>
63e58245f7STim J. Robbins #include <wctype.h>
649b50d902SRodney W. Grimes 
65f45dd010SGiorgos Keramidas uintmax_t tlinect, twordct, tcharct, tlongline;
66f45dd010SGiorgos Keramidas int doline, doword, dochar, domulti, dolongline;
679b50d902SRodney W. Grimes 
683f330d7dSWarner Losh static int	cnt(const char *);
693f330d7dSWarner Losh static void	usage(void);
709b50d902SRodney W. Grimes 
719b50d902SRodney W. Grimes int
72806abfccSJosef El-Rayes main(int argc, char *argv[])
739b50d902SRodney W. Grimes {
74a0cf59e6SSheldon Hearn 	int ch, errors, total;
759b50d902SRodney W. Grimes 
76ae6fa8aeSAndrey A. Chernov 	(void) setlocale(LC_CTYPE, "");
77ae6fa8aeSAndrey A. Chernov 
78f45dd010SGiorgos Keramidas 	while ((ch = getopt(argc, argv, "clmwL")) != -1)
799b50d902SRodney W. Grimes 		switch((char)ch) {
809b50d902SRodney W. Grimes 		case 'l':
819b50d902SRodney W. Grimes 			doline = 1;
829b50d902SRodney W. Grimes 			break;
839b50d902SRodney W. Grimes 		case 'w':
849b50d902SRodney W. Grimes 			doword = 1;
859b50d902SRodney W. Grimes 			break;
869b50d902SRodney W. Grimes 		case 'c':
879b50d902SRodney W. Grimes 			dochar = 1;
88ebb42aeeSTim J. Robbins 			domulti = 0;
89ebb42aeeSTim J. Robbins 			break;
90f45dd010SGiorgos Keramidas 		case 'L':
91f45dd010SGiorgos Keramidas 			dolongline = 1;
92f45dd010SGiorgos Keramidas 			break;
93ebb42aeeSTim J. Robbins 		case 'm':
94ebb42aeeSTim J. Robbins 			domulti = 1;
95ebb42aeeSTim J. Robbins 			dochar = 0;
969b50d902SRodney W. Grimes 			break;
979b50d902SRodney W. Grimes 		case '?':
989b50d902SRodney W. Grimes 		default:
999b50d902SRodney W. Grimes 			usage();
1009b50d902SRodney W. Grimes 		}
1019b50d902SRodney W. Grimes 	argv += optind;
1029b50d902SRodney W. Grimes 	argc -= optind;
1039b50d902SRodney W. Grimes 
1049b50d902SRodney W. Grimes 	/* Wc's flags are on by default. */
105f45dd010SGiorgos Keramidas 	if (doline + doword + dochar + domulti + dolongline == 0)
1069b50d902SRodney W. Grimes 		doline = doword = dochar = 1;
1079b50d902SRodney W. Grimes 
1082c51e5edSBruce Evans 	errors = 0;
1099b50d902SRodney W. Grimes 	total = 0;
1109b50d902SRodney W. Grimes 	if (!*argv) {
1112c51e5edSBruce Evans 		if (cnt((char *)NULL) != 0)
1122c51e5edSBruce Evans 			++errors;
1132c51e5edSBruce Evans 		else
1149b50d902SRodney W. Grimes 			(void)printf("\n");
1159b50d902SRodney W. Grimes 	}
1169b50d902SRodney W. Grimes 	else do {
1172c51e5edSBruce Evans 		if (cnt(*argv) != 0)
1182c51e5edSBruce Evans 			++errors;
1192c51e5edSBruce Evans 		else
1209b50d902SRodney W. Grimes 			(void)printf(" %s\n", *argv);
1219b50d902SRodney W. Grimes 		++total;
1229b50d902SRodney W. Grimes 	} while(*++argv);
1239b50d902SRodney W. Grimes 
1249b50d902SRodney W. Grimes 	if (total > 1) {
1259b50d902SRodney W. Grimes 		if (doline)
126a821e36eSMike Barcroft 			(void)printf(" %7ju", tlinect);
1279b50d902SRodney W. Grimes 		if (doword)
128a821e36eSMike Barcroft 			(void)printf(" %7ju", twordct);
129ebb42aeeSTim J. Robbins 		if (dochar || domulti)
130a821e36eSMike Barcroft 			(void)printf(" %7ju", tcharct);
131f45dd010SGiorgos Keramidas 		if (dolongline)
132f45dd010SGiorgos Keramidas 			(void)printf(" %7ju", tlongline);
1339b50d902SRodney W. Grimes 		(void)printf(" total\n");
1349b50d902SRodney W. Grimes 	}
1352c51e5edSBruce Evans 	exit(errors == 0 ? 0 : 1);
1369b50d902SRodney W. Grimes }
1379b50d902SRodney W. Grimes 
138a821e36eSMike Barcroft static int
139806abfccSJosef El-Rayes cnt(const char *file)
1409b50d902SRodney W. Grimes {
1419b50d902SRodney W. Grimes 	struct stat sb;
142f45dd010SGiorgos Keramidas 	uintmax_t linect, wordct, charct, llct, tmpll;
143149a123bSTim J. Robbins 	int fd, len, warned;
144149a123bSTim J. Robbins 	size_t clen;
145a0cf59e6SSheldon Hearn 	short gotsp;
146a0cf59e6SSheldon Hearn 	u_char *p;
147abd0c85dSTim J. Robbins 	u_char buf[MAXBSIZE];
148ebb42aeeSTim J. Robbins 	wchar_t wch;
149149a123bSTim J. Robbins 	mbstate_t mbs;
1509b50d902SRodney W. Grimes 
151f45dd010SGiorgos Keramidas 	linect = wordct = charct = llct = tmpll = 0;
1522c51e5edSBruce Evans 	if (file == NULL) {
1532c51e5edSBruce Evans 		file = "stdin";
1542c51e5edSBruce Evans 		fd = STDIN_FILENO;
1552c51e5edSBruce Evans 	} else {
156a0d038a4SWolfram Schneider 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
1572c51e5edSBruce Evans 			warn("%s: open", file);
1582c51e5edSBruce Evans 			return (1);
159a0d038a4SWolfram Schneider 		}
160ebb42aeeSTim J. Robbins 		if (doword || (domulti && MB_CUR_MAX != 1))
1619b50d902SRodney W. Grimes 			goto word;
1629b50d902SRodney W. Grimes 		/*
1639b50d902SRodney W. Grimes 		 * Line counting is split out because it's a lot faster to get
1649b50d902SRodney W. Grimes 		 * lines than to get words, since the word count requires some
1659b50d902SRodney W. Grimes 		 * logic.
1669b50d902SRodney W. Grimes 		 */
1679b50d902SRodney W. Grimes 		if (doline) {
1688c85cce7SPhilippe Charnier 			while ((len = read(fd, buf, MAXBSIZE))) {
1692c51e5edSBruce Evans 				if (len == -1) {
1702c51e5edSBruce Evans 					warn("%s: read", file);
1712c51e5edSBruce Evans 					(void)close(fd);
1722c51e5edSBruce Evans 					return (1);
1732c51e5edSBruce Evans 				}
1749b50d902SRodney W. Grimes 				charct += len;
1759b50d902SRodney W. Grimes 				for (p = buf; len--; ++p)
176f45dd010SGiorgos Keramidas 					if (*p == '\n') {
177f45dd010SGiorgos Keramidas 						if (tmpll > llct)
178f45dd010SGiorgos Keramidas 							llct = tmpll;
179f45dd010SGiorgos Keramidas 						tmpll = 0;
1809b50d902SRodney W. Grimes 						++linect;
181f45dd010SGiorgos Keramidas 					} else
182f45dd010SGiorgos Keramidas 						tmpll++;
1839b50d902SRodney W. Grimes 			}
1849b50d902SRodney W. Grimes 			tlinect += linect;
185a821e36eSMike Barcroft 			(void)printf(" %7ju", linect);
1869b50d902SRodney W. Grimes 			if (dochar) {
1879b50d902SRodney W. Grimes 				tcharct += charct;
188a821e36eSMike Barcroft 				(void)printf(" %7ju", charct);
1899b50d902SRodney W. Grimes 			}
190f45dd010SGiorgos Keramidas 			if (dolongline) {
191f45dd010SGiorgos Keramidas 				if (llct > tlongline)
192f45dd010SGiorgos Keramidas 					tlongline = llct;
193f45dd010SGiorgos Keramidas 				(void)printf(" %7ju", tlongline);
194f45dd010SGiorgos Keramidas 			}
1959b50d902SRodney W. Grimes 			(void)close(fd);
1962c51e5edSBruce Evans 			return (0);
1979b50d902SRodney W. Grimes 		}
1989b50d902SRodney W. Grimes 		/*
1999b50d902SRodney W. Grimes 		 * If all we need is the number of characters and it's a
200a1a27143STim J. Robbins 		 * regular file, just stat the puppy.
2019b50d902SRodney W. Grimes 		 */
202ebb42aeeSTim J. Robbins 		if (dochar || domulti) {
2032c51e5edSBruce Evans 			if (fstat(fd, &sb)) {
2042c51e5edSBruce Evans 				warn("%s: fstat", file);
2052c51e5edSBruce Evans 				(void)close(fd);
2062c51e5edSBruce Evans 				return (1);
2072c51e5edSBruce Evans 			}
208a1a27143STim J. Robbins 			if (S_ISREG(sb.st_mode)) {
209a0cf59e6SSheldon Hearn 				(void)printf(" %7lld", (long long)sb.st_size);
2109b50d902SRodney W. Grimes 				tcharct += sb.st_size;
2119b50d902SRodney W. Grimes 				(void)close(fd);
2122c51e5edSBruce Evans 				return (0);
2139b50d902SRodney W. Grimes 			}
2149b50d902SRodney W. Grimes 		}
2159b50d902SRodney W. Grimes 	}
2169b50d902SRodney W. Grimes 
2179b50d902SRodney W. Grimes 	/* Do it the hard way... */
218ebb42aeeSTim J. Robbins word:	gotsp = 1;
219ebb42aeeSTim J. Robbins 	warned = 0;
220149a123bSTim J. Robbins 	memset(&mbs, 0, sizeof(mbs));
221149a123bSTim J. Robbins 	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
222149a123bSTim J. Robbins 		if (len == -1) {
2232c51e5edSBruce Evans 			warn("%s: read", file);
2242c51e5edSBruce Evans 			(void)close(fd);
2252c51e5edSBruce Evans 			return (1);
2262c51e5edSBruce Evans 		}
227ebb42aeeSTim J. Robbins 		p = buf;
228ebb42aeeSTim J. Robbins 		while (len > 0) {
229ebb42aeeSTim J. Robbins 			if (!domulti || MB_CUR_MAX == 1) {
230ebb42aeeSTim J. Robbins 				clen = 1;
231ebb42aeeSTim J. Robbins 				wch = (unsigned char)*p;
232149a123bSTim J. Robbins 			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
233149a123bSTim J. Robbins 			    (size_t)-1) {
234ebb42aeeSTim J. Robbins 				if (!warned) {
235ebb42aeeSTim J. Robbins 					errno = EILSEQ;
236ebb42aeeSTim J. Robbins 					warn("%s", file);
237ebb42aeeSTim J. Robbins 					warned = 1;
238ebb42aeeSTim J. Robbins 				}
239149a123bSTim J. Robbins 				memset(&mbs, 0, sizeof(mbs));
240149a123bSTim J. Robbins 				clen = 1;
241149a123bSTim J. Robbins 				wch = (unsigned char)*p;
242149a123bSTim J. Robbins 			} else if (clen == (size_t)-2)
243ebb42aeeSTim J. Robbins 				break;
244149a123bSTim J. Robbins 			else if (clen == 0)
245149a123bSTim J. Robbins 				clen = 1;
246ebb42aeeSTim J. Robbins 			charct++;
247f45dd010SGiorgos Keramidas 			if (wch != L'\n')
248f45dd010SGiorgos Keramidas 				tmpll++;
249ebb42aeeSTim J. Robbins 			len -= clen;
250ebb42aeeSTim J. Robbins 			p += clen;
251f45dd010SGiorgos Keramidas 			if (wch == L'\n') {
252f45dd010SGiorgos Keramidas 				if (tmpll > llct)
253f45dd010SGiorgos Keramidas 					llct = tmpll;
254f45dd010SGiorgos Keramidas 				tmpll = 0;
2559b50d902SRodney W. Grimes 				++linect;
256f45dd010SGiorgos Keramidas 			}
257e58245f7STim J. Robbins 			if (iswspace(wch))
2589b50d902SRodney W. Grimes 				gotsp = 1;
2599b50d902SRodney W. Grimes 			else if (gotsp) {
2609b50d902SRodney W. Grimes 				gotsp = 0;
2619b50d902SRodney W. Grimes 				++wordct;
2629b50d902SRodney W. Grimes 			}
2639b50d902SRodney W. Grimes 		}
2649b50d902SRodney W. Grimes 	}
265149a123bSTim J. Robbins 	if (domulti && MB_CUR_MAX > 1)
266149a123bSTim J. Robbins 		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
267149a123bSTim J. Robbins 			warn("%s", file);
2689b50d902SRodney W. Grimes 	if (doline) {
2699b50d902SRodney W. Grimes 		tlinect += linect;
270a821e36eSMike Barcroft 		(void)printf(" %7ju", linect);
2719b50d902SRodney W. Grimes 	}
2729b50d902SRodney W. Grimes 	if (doword) {
2739b50d902SRodney W. Grimes 		twordct += wordct;
274a821e36eSMike Barcroft 		(void)printf(" %7ju", wordct);
2759b50d902SRodney W. Grimes 	}
276ebb42aeeSTim J. Robbins 	if (dochar || domulti) {
2779b50d902SRodney W. Grimes 		tcharct += charct;
278a821e36eSMike Barcroft 		(void)printf(" %7ju", charct);
2799b50d902SRodney W. Grimes 	}
280f45dd010SGiorgos Keramidas 	if (dolongline) {
281f45dd010SGiorgos Keramidas 		if (llct > tlongline)
282f45dd010SGiorgos Keramidas 			tlongline = llct;
283f45dd010SGiorgos Keramidas 		(void)printf(" %7ju", llct);
284f45dd010SGiorgos Keramidas 	}
2859b50d902SRodney W. Grimes 	(void)close(fd);
2862c51e5edSBruce Evans 	return (0);
2879b50d902SRodney W. Grimes }
2889b50d902SRodney W. Grimes 
289a821e36eSMike Barcroft static void
2900970727fSEd Schouten usage(void)
2919b50d902SRodney W. Grimes {
292f45dd010SGiorgos Keramidas 	(void)fprintf(stderr, "usage: wc [-Lclmw] [file ...]\n");
2939b50d902SRodney W. Grimes 	exit(1);
2949b50d902SRodney W. Grimes }
295