xref: /freebsd/usr.bin/wc/wc.c (revision 399d34850fe953934b6516ac78dfc3c7aed1e10f)
19b50d902SRodney W. Grimes /*
29b50d902SRodney W. Grimes  * Copyright (c) 1980, 1987, 1991, 1993
39b50d902SRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
49b50d902SRodney W. Grimes  *
59b50d902SRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
69b50d902SRodney W. Grimes  * modification, are permitted provided that the following conditions
79b50d902SRodney W. Grimes  * are met:
89b50d902SRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
99b50d902SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
109b50d902SRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
119b50d902SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
129b50d902SRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
139b50d902SRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
149b50d902SRodney W. Grimes  *    may be used to endorse or promote products derived from this software
159b50d902SRodney W. Grimes  *    without specific prior written permission.
169b50d902SRodney W. Grimes  *
179b50d902SRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
189b50d902SRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
199b50d902SRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
209b50d902SRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
219b50d902SRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
229b50d902SRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
239b50d902SRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
249b50d902SRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
259b50d902SRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
269b50d902SRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
279b50d902SRodney W. Grimes  * SUCH DAMAGE.
289b50d902SRodney W. Grimes  */
299b50d902SRodney W. Grimes 
309b50d902SRodney W. Grimes #ifndef lint
3106469209SMike Barcroft static const char copyright[] =
329b50d902SRodney W. Grimes "@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
339b50d902SRodney W. Grimes 	The Regents of the University of California.  All rights reserved.\n";
34a821e36eSMike Barcroft #endif /* not lint */
359b50d902SRodney W. Grimes 
3606469209SMike Barcroft #if 0
379b50d902SRodney W. Grimes #ifndef lint
38a821e36eSMike Barcroft static char sccsid[] = "@(#)wc.c	8.1 (Berkeley) 6/6/93";
39a821e36eSMike Barcroft #endif /* not lint */
402c51e5edSBruce Evans #endif
419b50d902SRodney W. Grimes 
42a821e36eSMike Barcroft #include <sys/cdefs.h>
43a821e36eSMike Barcroft __FBSDID("$FreeBSD$");
44a821e36eSMike Barcroft 
459b50d902SRodney W. Grimes #include <sys/param.h>
469b50d902SRodney W. Grimes #include <sys/stat.h>
472c51e5edSBruce Evans 
482c51e5edSBruce Evans #include <ctype.h>
492c51e5edSBruce Evans #include <err.h>
50ebb42aeeSTim J. Robbins #include <errno.h>
519b50d902SRodney W. Grimes #include <fcntl.h>
52ae6fa8aeSAndrey A. Chernov #include <locale.h>
53a821e36eSMike Barcroft #include <stdint.h>
549b50d902SRodney W. Grimes #include <stdio.h>
559b50d902SRodney W. Grimes #include <stdlib.h>
569b50d902SRodney W. Grimes #include <string.h>
572c51e5edSBruce Evans #include <unistd.h>
58149a123bSTim J. Robbins #include <wchar.h>
59e58245f7STim J. Robbins #include <wctype.h>
606711c482SMarcel Moolenaar #include <libxo/xo.h>
619b50d902SRodney W. Grimes 
628df975a2SEd Schouten static uintmax_t tlinect, twordct, tcharct, tlongline;
638df975a2SEd Schouten static int doline, doword, dochar, domulti, dolongline;
64c9a96406SPawel Jakub Dawidek static volatile sig_atomic_t siginfo;
656711c482SMarcel Moolenaar static xo_handle_t *stderr_handle;
669b50d902SRodney W. Grimes 
67f09bc093SPawel Jakub Dawidek static void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
68f09bc093SPawel Jakub Dawidek 		    uintmax_t charct, uintmax_t llct);
693f330d7dSWarner Losh static int	cnt(const char *);
703f330d7dSWarner Losh static void	usage(void);
719b50d902SRodney W. Grimes 
72f09bc093SPawel Jakub Dawidek static void
73f09bc093SPawel Jakub Dawidek siginfo_handler(int sig __unused)
74f09bc093SPawel Jakub Dawidek {
75f09bc093SPawel Jakub Dawidek 
76f09bc093SPawel Jakub Dawidek 	siginfo = 1;
77f09bc093SPawel Jakub Dawidek }
78f09bc093SPawel Jakub Dawidek 
799b50d902SRodney W. Grimes int
80806abfccSJosef El-Rayes main(int argc, char *argv[])
819b50d902SRodney W. Grimes {
82a0cf59e6SSheldon Hearn 	int ch, errors, total;
839b50d902SRodney W. Grimes 
84ae6fa8aeSAndrey A. Chernov 	(void) setlocale(LC_CTYPE, "");
85ae6fa8aeSAndrey A. Chernov 
866711c482SMarcel Moolenaar 	argc = xo_parse_args(argc, argv);
876711c482SMarcel Moolenaar 	if (argc < 0)
886711c482SMarcel Moolenaar 		return (argc);
896711c482SMarcel Moolenaar 
90f45dd010SGiorgos Keramidas 	while ((ch = getopt(argc, argv, "clmwL")) != -1)
919b50d902SRodney W. Grimes 		switch((char)ch) {
929b50d902SRodney W. Grimes 		case 'l':
939b50d902SRodney W. Grimes 			doline = 1;
949b50d902SRodney W. Grimes 			break;
959b50d902SRodney W. Grimes 		case 'w':
969b50d902SRodney W. Grimes 			doword = 1;
979b50d902SRodney W. Grimes 			break;
989b50d902SRodney W. Grimes 		case 'c':
999b50d902SRodney W. Grimes 			dochar = 1;
100ebb42aeeSTim J. Robbins 			domulti = 0;
101ebb42aeeSTim J. Robbins 			break;
102f45dd010SGiorgos Keramidas 		case 'L':
103f45dd010SGiorgos Keramidas 			dolongline = 1;
104f45dd010SGiorgos Keramidas 			break;
105ebb42aeeSTim J. Robbins 		case 'm':
106ebb42aeeSTim J. Robbins 			domulti = 1;
107ebb42aeeSTim J. Robbins 			dochar = 0;
1089b50d902SRodney W. Grimes 			break;
1099b50d902SRodney W. Grimes 		case '?':
1109b50d902SRodney W. Grimes 		default:
1119b50d902SRodney W. Grimes 			usage();
1129b50d902SRodney W. Grimes 		}
1139b50d902SRodney W. Grimes 	argv += optind;
1149b50d902SRodney W. Grimes 	argc -= optind;
1159b50d902SRodney W. Grimes 
116f09bc093SPawel Jakub Dawidek 	(void)signal(SIGINFO, siginfo_handler);
117f09bc093SPawel Jakub Dawidek 
1189b50d902SRodney W. Grimes 	/* Wc's flags are on by default. */
119f45dd010SGiorgos Keramidas 	if (doline + doword + dochar + domulti + dolongline == 0)
1209b50d902SRodney W. Grimes 		doline = doword = dochar = 1;
1219b50d902SRodney W. Grimes 
1226711c482SMarcel Moolenaar 	stderr_handle = xo_create_to_file(stderr, XO_STYLE_TEXT, 0);
1236711c482SMarcel Moolenaar 	xo_open_container("wc");
1246711c482SMarcel Moolenaar 	xo_open_list("file");
1256711c482SMarcel Moolenaar 
1262c51e5edSBruce Evans 	errors = 0;
1279b50d902SRodney W. Grimes 	total = 0;
1289b50d902SRodney W. Grimes 	if (!*argv) {
1296711c482SMarcel Moolenaar 	 	xo_open_instance("file");
1302c51e5edSBruce Evans 		if (cnt((char *)NULL) != 0)
1312c51e5edSBruce Evans 			++errors;
1326711c482SMarcel Moolenaar 	 	xo_close_instance("file");
133f09bc093SPawel Jakub Dawidek 	} else {
134f09bc093SPawel Jakub Dawidek 		do {
1356711c482SMarcel Moolenaar 	 		xo_open_instance("file");
1362c51e5edSBruce Evans 			if (cnt(*argv) != 0)
1372c51e5edSBruce Evans 				++errors;
1386711c482SMarcel Moolenaar 	 		xo_close_instance("file");
1399b50d902SRodney W. Grimes 			++total;
1409b50d902SRodney W. Grimes 		} while(*++argv);
1419b50d902SRodney W. Grimes 	}
142f09bc093SPawel Jakub Dawidek 
143*399d3485SMarcel Moolenaar 	xo_close_list("file");
144*399d3485SMarcel Moolenaar 
1456711c482SMarcel Moolenaar 	if (total > 1) {
1466711c482SMarcel Moolenaar 		xo_open_container("total");
147f09bc093SPawel Jakub Dawidek 		show_cnt("total", tlinect, twordct, tcharct, tlongline);
1486711c482SMarcel Moolenaar 		xo_close_container("total");
1496711c482SMarcel Moolenaar 	}
150*399d3485SMarcel Moolenaar 
1516711c482SMarcel Moolenaar 	xo_close_container("wc");
1526711c482SMarcel Moolenaar 	xo_finish();
1532c51e5edSBruce Evans 	exit(errors == 0 ? 0 : 1);
1549b50d902SRodney W. Grimes }
1559b50d902SRodney W. Grimes 
156f09bc093SPawel Jakub Dawidek static void
157f09bc093SPawel Jakub Dawidek show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
158f09bc093SPawel Jakub Dawidek     uintmax_t charct, uintmax_t llct)
159f09bc093SPawel Jakub Dawidek {
1606711c482SMarcel Moolenaar 	xo_handle_t *xop;
161f09bc093SPawel Jakub Dawidek 
162f09bc093SPawel Jakub Dawidek 	if (!siginfo)
1636711c482SMarcel Moolenaar 		xop = NULL;
164f09bc093SPawel Jakub Dawidek 	else {
1656711c482SMarcel Moolenaar 		xop = stderr_handle;
166f09bc093SPawel Jakub Dawidek 		siginfo = 0;
167f09bc093SPawel Jakub Dawidek 	}
168f09bc093SPawel Jakub Dawidek 
169f09bc093SPawel Jakub Dawidek 	if (doline)
1706711c482SMarcel Moolenaar 		xo_emit_h(xop, " {:lines/%7ju/%ju}", linect);
171f09bc093SPawel Jakub Dawidek 	if (doword)
1726711c482SMarcel Moolenaar 		xo_emit_h(xop, " {:words/%7ju/%ju}", wordct);
173f09bc093SPawel Jakub Dawidek 	if (dochar || domulti)
1746711c482SMarcel Moolenaar 		xo_emit_h(xop, " {:characters/%7ju/%ju}", charct);
175f09bc093SPawel Jakub Dawidek 	if (dolongline)
1766711c482SMarcel Moolenaar 		xo_emit_h(xop, " {:long-lines/%7ju/%ju}", llct);
177f09bc093SPawel Jakub Dawidek 	if (file != NULL)
178985c93f0SMarcel Moolenaar 		xo_emit_h(xop, " {:filename/%s}\n", file);
179f09bc093SPawel Jakub Dawidek 	else
1806711c482SMarcel Moolenaar 		xo_emit_h(xop, "\n");
181f09bc093SPawel Jakub Dawidek }
182f09bc093SPawel Jakub Dawidek 
183a821e36eSMike Barcroft static int
184806abfccSJosef El-Rayes cnt(const char *file)
1859b50d902SRodney W. Grimes {
1869b50d902SRodney W. Grimes 	struct stat sb;
187f45dd010SGiorgos Keramidas 	uintmax_t linect, wordct, charct, llct, tmpll;
188149a123bSTim J. Robbins 	int fd, len, warned;
189149a123bSTim J. Robbins 	size_t clen;
190a0cf59e6SSheldon Hearn 	short gotsp;
191a0cf59e6SSheldon Hearn 	u_char *p;
192abd0c85dSTim J. Robbins 	u_char buf[MAXBSIZE];
193ebb42aeeSTim J. Robbins 	wchar_t wch;
194149a123bSTim J. Robbins 	mbstate_t mbs;
1959b50d902SRodney W. Grimes 
196f45dd010SGiorgos Keramidas 	linect = wordct = charct = llct = tmpll = 0;
197f09bc093SPawel Jakub Dawidek 	if (file == NULL)
1982c51e5edSBruce Evans 		fd = STDIN_FILENO;
199f09bc093SPawel Jakub Dawidek 	else {
200a0d038a4SWolfram Schneider 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
2016711c482SMarcel Moolenaar 			xo_warn("%s: open", file);
2022c51e5edSBruce Evans 			return (1);
203a0d038a4SWolfram Schneider 		}
204ebb42aeeSTim J. Robbins 		if (doword || (domulti && MB_CUR_MAX != 1))
2059b50d902SRodney W. Grimes 			goto word;
2069b50d902SRodney W. Grimes 		/*
2079b50d902SRodney W. Grimes 		 * Line counting is split out because it's a lot faster to get
2089b50d902SRodney W. Grimes 		 * lines than to get words, since the word count requires some
2099b50d902SRodney W. Grimes 		 * logic.
2109b50d902SRodney W. Grimes 		 */
2119b50d902SRodney W. Grimes 		if (doline) {
2128c85cce7SPhilippe Charnier 			while ((len = read(fd, buf, MAXBSIZE))) {
2132c51e5edSBruce Evans 				if (len == -1) {
2146711c482SMarcel Moolenaar 					xo_warn("%s: read", file);
2152c51e5edSBruce Evans 					(void)close(fd);
2162c51e5edSBruce Evans 					return (1);
2172c51e5edSBruce Evans 				}
218f09bc093SPawel Jakub Dawidek 				if (siginfo) {
219f09bc093SPawel Jakub Dawidek 					show_cnt(file, linect, wordct, charct,
220f09bc093SPawel Jakub Dawidek 					    llct);
221f09bc093SPawel Jakub Dawidek 				}
2229b50d902SRodney W. Grimes 				charct += len;
2239b50d902SRodney W. Grimes 				for (p = buf; len--; ++p)
224f45dd010SGiorgos Keramidas 					if (*p == '\n') {
225f45dd010SGiorgos Keramidas 						if (tmpll > llct)
226f45dd010SGiorgos Keramidas 							llct = tmpll;
227f45dd010SGiorgos Keramidas 						tmpll = 0;
2289b50d902SRodney W. Grimes 						++linect;
229f45dd010SGiorgos Keramidas 					} else
230f45dd010SGiorgos Keramidas 						tmpll++;
2319b50d902SRodney W. Grimes 			}
2329b50d902SRodney W. Grimes 			tlinect += linect;
233f09bc093SPawel Jakub Dawidek 			if (dochar)
2349b50d902SRodney W. Grimes 				tcharct += charct;
235f45dd010SGiorgos Keramidas 			if (dolongline) {
236f45dd010SGiorgos Keramidas 				if (llct > tlongline)
237f45dd010SGiorgos Keramidas 					tlongline = llct;
238f45dd010SGiorgos Keramidas 			}
239f09bc093SPawel Jakub Dawidek 			show_cnt(file, linect, wordct, charct, llct);
2409b50d902SRodney W. Grimes 			(void)close(fd);
2412c51e5edSBruce Evans 			return (0);
2429b50d902SRodney W. Grimes 		}
2439b50d902SRodney W. Grimes 		/*
2449b50d902SRodney W. Grimes 		 * If all we need is the number of characters and it's a
245a1a27143STim J. Robbins 		 * regular file, just stat the puppy.
2469b50d902SRodney W. Grimes 		 */
247ebb42aeeSTim J. Robbins 		if (dochar || domulti) {
2482c51e5edSBruce Evans 			if (fstat(fd, &sb)) {
2496711c482SMarcel Moolenaar 				xo_warn("%s: fstat", file);
2502c51e5edSBruce Evans 				(void)close(fd);
2512c51e5edSBruce Evans 				return (1);
2522c51e5edSBruce Evans 			}
253a1a27143STim J. Robbins 			if (S_ISREG(sb.st_mode)) {
254f09bc093SPawel Jakub Dawidek 				charct = sb.st_size;
255f09bc093SPawel Jakub Dawidek 				show_cnt(file, linect, wordct, charct, llct);
256f09bc093SPawel Jakub Dawidek 				tcharct += charct;
2579b50d902SRodney W. Grimes 				(void)close(fd);
2582c51e5edSBruce Evans 				return (0);
2599b50d902SRodney W. Grimes 			}
2609b50d902SRodney W. Grimes 		}
2619b50d902SRodney W. Grimes 	}
2629b50d902SRodney W. Grimes 
2639b50d902SRodney W. Grimes 	/* Do it the hard way... */
264ebb42aeeSTim J. Robbins word:	gotsp = 1;
265ebb42aeeSTim J. Robbins 	warned = 0;
266149a123bSTim J. Robbins 	memset(&mbs, 0, sizeof(mbs));
267149a123bSTim J. Robbins 	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
268149a123bSTim J. Robbins 		if (len == -1) {
2696711c482SMarcel Moolenaar 			xo_warn("%s: read", file != NULL ? file : "stdin");
2702c51e5edSBruce Evans 			(void)close(fd);
2712c51e5edSBruce Evans 			return (1);
2722c51e5edSBruce Evans 		}
273ebb42aeeSTim J. Robbins 		p = buf;
274ebb42aeeSTim J. Robbins 		while (len > 0) {
275f09bc093SPawel Jakub Dawidek 			if (siginfo)
276f09bc093SPawel Jakub Dawidek 				show_cnt(file, linect, wordct, charct, llct);
277ebb42aeeSTim J. Robbins 			if (!domulti || MB_CUR_MAX == 1) {
278ebb42aeeSTim J. Robbins 				clen = 1;
279ebb42aeeSTim J. Robbins 				wch = (unsigned char)*p;
280149a123bSTim J. Robbins 			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
281149a123bSTim J. Robbins 			    (size_t)-1) {
282ebb42aeeSTim J. Robbins 				if (!warned) {
283ebb42aeeSTim J. Robbins 					errno = EILSEQ;
2846711c482SMarcel Moolenaar 					xo_warn("%s",
285f09bc093SPawel Jakub Dawidek 					    file != NULL ? file : "stdin");
286ebb42aeeSTim J. Robbins 					warned = 1;
287ebb42aeeSTim J. Robbins 				}
288149a123bSTim J. Robbins 				memset(&mbs, 0, sizeof(mbs));
289149a123bSTim J. Robbins 				clen = 1;
290149a123bSTim J. Robbins 				wch = (unsigned char)*p;
291149a123bSTim J. Robbins 			} else if (clen == (size_t)-2)
292ebb42aeeSTim J. Robbins 				break;
293149a123bSTim J. Robbins 			else if (clen == 0)
294149a123bSTim J. Robbins 				clen = 1;
295ebb42aeeSTim J. Robbins 			charct++;
296f45dd010SGiorgos Keramidas 			if (wch != L'\n')
297f45dd010SGiorgos Keramidas 				tmpll++;
298ebb42aeeSTim J. Robbins 			len -= clen;
299ebb42aeeSTim J. Robbins 			p += clen;
300f45dd010SGiorgos Keramidas 			if (wch == L'\n') {
301f45dd010SGiorgos Keramidas 				if (tmpll > llct)
302f45dd010SGiorgos Keramidas 					llct = tmpll;
303f45dd010SGiorgos Keramidas 				tmpll = 0;
3049b50d902SRodney W. Grimes 				++linect;
305f45dd010SGiorgos Keramidas 			}
306e58245f7STim J. Robbins 			if (iswspace(wch))
3079b50d902SRodney W. Grimes 				gotsp = 1;
3089b50d902SRodney W. Grimes 			else if (gotsp) {
3099b50d902SRodney W. Grimes 				gotsp = 0;
3109b50d902SRodney W. Grimes 				++wordct;
3119b50d902SRodney W. Grimes 			}
3129b50d902SRodney W. Grimes 		}
3139b50d902SRodney W. Grimes 	}
314149a123bSTim J. Robbins 	if (domulti && MB_CUR_MAX > 1)
315149a123bSTim J. Robbins 		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
3166711c482SMarcel Moolenaar 			xo_warn("%s", file != NULL ? file : "stdin");
317f09bc093SPawel Jakub Dawidek 	if (doline)
3189b50d902SRodney W. Grimes 		tlinect += linect;
319f09bc093SPawel Jakub Dawidek 	if (doword)
3209b50d902SRodney W. Grimes 		twordct += wordct;
321f09bc093SPawel Jakub Dawidek 	if (dochar || domulti)
3229b50d902SRodney W. Grimes 		tcharct += charct;
323f45dd010SGiorgos Keramidas 	if (dolongline) {
324f45dd010SGiorgos Keramidas 		if (llct > tlongline)
325f45dd010SGiorgos Keramidas 			tlongline = llct;
326f45dd010SGiorgos Keramidas 	}
327f09bc093SPawel Jakub Dawidek 	show_cnt(file, linect, wordct, charct, llct);
3289b50d902SRodney W. Grimes 	(void)close(fd);
3292c51e5edSBruce Evans 	return (0);
3309b50d902SRodney W. Grimes }
3319b50d902SRodney W. Grimes 
332a821e36eSMike Barcroft static void
3330970727fSEd Schouten usage(void)
3349b50d902SRodney W. Grimes {
3356711c482SMarcel Moolenaar 	xo_error("usage: wc [-Lclmw] [file ...]\n");
3369b50d902SRodney W. Grimes 	exit(1);
3379b50d902SRodney W. Grimes }
338