xref: /freebsd/usr.bin/wc/wc.c (revision 985c93f0b4c024d32da582b93d06602d3223cf39)
19b50d902SRodney W. Grimes /*
29b50d902SRodney W. Grimes  * Copyright (c) 1980, 1987, 1991, 1993
39b50d902SRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
49b50d902SRodney W. Grimes  *
59b50d902SRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
69b50d902SRodney W. Grimes  * modification, are permitted provided that the following conditions
79b50d902SRodney W. Grimes  * are met:
89b50d902SRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
99b50d902SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
109b50d902SRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
119b50d902SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
129b50d902SRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
139b50d902SRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
149b50d902SRodney W. Grimes  *    may be used to endorse or promote products derived from this software
159b50d902SRodney W. Grimes  *    without specific prior written permission.
169b50d902SRodney W. Grimes  *
179b50d902SRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
189b50d902SRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
199b50d902SRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
209b50d902SRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
219b50d902SRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
229b50d902SRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
239b50d902SRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
249b50d902SRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
259b50d902SRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
269b50d902SRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
279b50d902SRodney W. Grimes  * SUCH DAMAGE.
289b50d902SRodney W. Grimes  */
299b50d902SRodney W. Grimes 
309b50d902SRodney W. Grimes #ifndef lint
3106469209SMike Barcroft static const char copyright[] =
329b50d902SRodney W. Grimes "@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
339b50d902SRodney W. Grimes 	The Regents of the University of California.  All rights reserved.\n";
34a821e36eSMike Barcroft #endif /* not lint */
359b50d902SRodney W. Grimes 
3606469209SMike Barcroft #if 0
379b50d902SRodney W. Grimes #ifndef lint
38a821e36eSMike Barcroft static char sccsid[] = "@(#)wc.c	8.1 (Berkeley) 6/6/93";
39a821e36eSMike Barcroft #endif /* not lint */
402c51e5edSBruce Evans #endif
419b50d902SRodney W. Grimes 
42a821e36eSMike Barcroft #include <sys/cdefs.h>
43a821e36eSMike Barcroft __FBSDID("$FreeBSD$");
44a821e36eSMike Barcroft 
459b50d902SRodney W. Grimes #include <sys/param.h>
469b50d902SRodney W. Grimes #include <sys/stat.h>
472c51e5edSBruce Evans 
482c51e5edSBruce Evans #include <ctype.h>
492c51e5edSBruce Evans #include <err.h>
50ebb42aeeSTim J. Robbins #include <errno.h>
519b50d902SRodney W. Grimes #include <fcntl.h>
52ae6fa8aeSAndrey A. Chernov #include <locale.h>
53a821e36eSMike Barcroft #include <stdint.h>
549b50d902SRodney W. Grimes #include <stdio.h>
559b50d902SRodney W. Grimes #include <stdlib.h>
569b50d902SRodney W. Grimes #include <string.h>
572c51e5edSBruce Evans #include <unistd.h>
58149a123bSTim J. Robbins #include <wchar.h>
59e58245f7STim J. Robbins #include <wctype.h>
606711c482SMarcel Moolenaar #include <libxo/xo.h>
619b50d902SRodney W. Grimes 
628df975a2SEd Schouten static uintmax_t tlinect, twordct, tcharct, tlongline;
638df975a2SEd Schouten static int doline, doword, dochar, domulti, dolongline;
64c9a96406SPawel Jakub Dawidek static volatile sig_atomic_t siginfo;
656711c482SMarcel Moolenaar static xo_handle_t *stderr_handle;
669b50d902SRodney W. Grimes 
67f09bc093SPawel Jakub Dawidek static void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
68f09bc093SPawel Jakub Dawidek 		    uintmax_t charct, uintmax_t llct);
693f330d7dSWarner Losh static int	cnt(const char *);
703f330d7dSWarner Losh static void	usage(void);
719b50d902SRodney W. Grimes 
72f09bc093SPawel Jakub Dawidek static void
73f09bc093SPawel Jakub Dawidek siginfo_handler(int sig __unused)
74f09bc093SPawel Jakub Dawidek {
75f09bc093SPawel Jakub Dawidek 
76f09bc093SPawel Jakub Dawidek 	siginfo = 1;
77f09bc093SPawel Jakub Dawidek }
78f09bc093SPawel Jakub Dawidek 
799b50d902SRodney W. Grimes int
80806abfccSJosef El-Rayes main(int argc, char *argv[])
819b50d902SRodney W. Grimes {
82a0cf59e6SSheldon Hearn 	int ch, errors, total;
839b50d902SRodney W. Grimes 
84ae6fa8aeSAndrey A. Chernov 	(void) setlocale(LC_CTYPE, "");
85ae6fa8aeSAndrey A. Chernov 
866711c482SMarcel Moolenaar 	argc = xo_parse_args(argc, argv);
876711c482SMarcel Moolenaar 	if (argc < 0)
886711c482SMarcel Moolenaar 		return (argc);
896711c482SMarcel Moolenaar 
90f45dd010SGiorgos Keramidas 	while ((ch = getopt(argc, argv, "clmwL")) != -1)
919b50d902SRodney W. Grimes 		switch((char)ch) {
929b50d902SRodney W. Grimes 		case 'l':
939b50d902SRodney W. Grimes 			doline = 1;
949b50d902SRodney W. Grimes 			break;
959b50d902SRodney W. Grimes 		case 'w':
969b50d902SRodney W. Grimes 			doword = 1;
979b50d902SRodney W. Grimes 			break;
989b50d902SRodney W. Grimes 		case 'c':
999b50d902SRodney W. Grimes 			dochar = 1;
100ebb42aeeSTim J. Robbins 			domulti = 0;
101ebb42aeeSTim J. Robbins 			break;
102f45dd010SGiorgos Keramidas 		case 'L':
103f45dd010SGiorgos Keramidas 			dolongline = 1;
104f45dd010SGiorgos Keramidas 			break;
105ebb42aeeSTim J. Robbins 		case 'm':
106ebb42aeeSTim J. Robbins 			domulti = 1;
107ebb42aeeSTim J. Robbins 			dochar = 0;
1089b50d902SRodney W. Grimes 			break;
1099b50d902SRodney W. Grimes 		case '?':
1109b50d902SRodney W. Grimes 		default:
1119b50d902SRodney W. Grimes 			usage();
1129b50d902SRodney W. Grimes 		}
1139b50d902SRodney W. Grimes 	argv += optind;
1149b50d902SRodney W. Grimes 	argc -= optind;
1159b50d902SRodney W. Grimes 
116f09bc093SPawel Jakub Dawidek 	(void)signal(SIGINFO, siginfo_handler);
117f09bc093SPawel Jakub Dawidek 
1189b50d902SRodney W. Grimes 	/* Wc's flags are on by default. */
119f45dd010SGiorgos Keramidas 	if (doline + doword + dochar + domulti + dolongline == 0)
1209b50d902SRodney W. Grimes 		doline = doword = dochar = 1;
1219b50d902SRodney W. Grimes 
1226711c482SMarcel Moolenaar 	stderr_handle = xo_create_to_file(stderr, XO_STYLE_TEXT, 0);
1236711c482SMarcel Moolenaar 	xo_open_container("wc");
1246711c482SMarcel Moolenaar 	xo_open_list("file");
1256711c482SMarcel Moolenaar 
1262c51e5edSBruce Evans 	errors = 0;
1279b50d902SRodney W. Grimes 	total = 0;
1289b50d902SRodney W. Grimes 	if (!*argv) {
1296711c482SMarcel Moolenaar 	 	xo_open_instance("file");
1302c51e5edSBruce Evans 		if (cnt((char *)NULL) != 0)
1312c51e5edSBruce Evans 			++errors;
1326711c482SMarcel Moolenaar 	 	xo_close_instance("file");
133f09bc093SPawel Jakub Dawidek 	} else {
134f09bc093SPawel Jakub Dawidek 		do {
1356711c482SMarcel Moolenaar 	 		xo_open_instance("file");
1362c51e5edSBruce Evans 			if (cnt(*argv) != 0)
1372c51e5edSBruce Evans 				++errors;
1386711c482SMarcel Moolenaar 	 		xo_close_instance("file");
1399b50d902SRodney W. Grimes 			++total;
1409b50d902SRodney W. Grimes 		} while(*++argv);
1419b50d902SRodney W. Grimes 	}
142f09bc093SPawel Jakub Dawidek 
1436711c482SMarcel Moolenaar 	if (total > 1) {
1446711c482SMarcel Moolenaar 		xo_open_container("total");
145f09bc093SPawel Jakub Dawidek 		show_cnt("total", tlinect, twordct, tcharct, tlongline);
1466711c482SMarcel Moolenaar 		xo_close_container("total");
1476711c482SMarcel Moolenaar 	}
1486711c482SMarcel Moolenaar 	xo_close_list("file");
1496711c482SMarcel Moolenaar 	xo_close_container("wc");
1506711c482SMarcel Moolenaar 	xo_finish();
1512c51e5edSBruce Evans 	exit(errors == 0 ? 0 : 1);
1529b50d902SRodney W. Grimes }
1539b50d902SRodney W. Grimes 
154f09bc093SPawel Jakub Dawidek static void
155f09bc093SPawel Jakub Dawidek show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
156f09bc093SPawel Jakub Dawidek     uintmax_t charct, uintmax_t llct)
157f09bc093SPawel Jakub Dawidek {
1586711c482SMarcel Moolenaar 	xo_handle_t *xop;
159f09bc093SPawel Jakub Dawidek 
160f09bc093SPawel Jakub Dawidek 	if (!siginfo)
1616711c482SMarcel Moolenaar 		xop = NULL;
162f09bc093SPawel Jakub Dawidek 	else {
1636711c482SMarcel Moolenaar 		xop = stderr_handle;
164f09bc093SPawel Jakub Dawidek 		siginfo = 0;
165f09bc093SPawel Jakub Dawidek 	}
166f09bc093SPawel Jakub Dawidek 
167f09bc093SPawel Jakub Dawidek 	if (doline)
1686711c482SMarcel Moolenaar 		xo_emit_h(xop, " {:lines/%7ju/%ju}", linect);
169f09bc093SPawel Jakub Dawidek 	if (doword)
1706711c482SMarcel Moolenaar 		xo_emit_h(xop, " {:words/%7ju/%ju}", wordct);
171f09bc093SPawel Jakub Dawidek 	if (dochar || domulti)
1726711c482SMarcel Moolenaar 		xo_emit_h(xop, " {:characters/%7ju/%ju}", charct);
173f09bc093SPawel Jakub Dawidek 	if (dolongline)
1746711c482SMarcel Moolenaar 		xo_emit_h(xop, " {:long-lines/%7ju/%ju}", llct);
175f09bc093SPawel Jakub Dawidek 	if (file != NULL)
176*985c93f0SMarcel Moolenaar 		xo_emit_h(xop, " {:filename/%s}\n", file);
177f09bc093SPawel Jakub Dawidek 	else
1786711c482SMarcel Moolenaar 		xo_emit_h(xop, "\n");
179f09bc093SPawel Jakub Dawidek }
180f09bc093SPawel Jakub Dawidek 
181a821e36eSMike Barcroft static int
182806abfccSJosef El-Rayes cnt(const char *file)
1839b50d902SRodney W. Grimes {
1849b50d902SRodney W. Grimes 	struct stat sb;
185f45dd010SGiorgos Keramidas 	uintmax_t linect, wordct, charct, llct, tmpll;
186149a123bSTim J. Robbins 	int fd, len, warned;
187149a123bSTim J. Robbins 	size_t clen;
188a0cf59e6SSheldon Hearn 	short gotsp;
189a0cf59e6SSheldon Hearn 	u_char *p;
190abd0c85dSTim J. Robbins 	u_char buf[MAXBSIZE];
191ebb42aeeSTim J. Robbins 	wchar_t wch;
192149a123bSTim J. Robbins 	mbstate_t mbs;
1939b50d902SRodney W. Grimes 
194f45dd010SGiorgos Keramidas 	linect = wordct = charct = llct = tmpll = 0;
195f09bc093SPawel Jakub Dawidek 	if (file == NULL)
1962c51e5edSBruce Evans 		fd = STDIN_FILENO;
197f09bc093SPawel Jakub Dawidek 	else {
198a0d038a4SWolfram Schneider 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
1996711c482SMarcel Moolenaar 			xo_warn("%s: open", file);
2002c51e5edSBruce Evans 			return (1);
201a0d038a4SWolfram Schneider 		}
202ebb42aeeSTim J. Robbins 		if (doword || (domulti && MB_CUR_MAX != 1))
2039b50d902SRodney W. Grimes 			goto word;
2049b50d902SRodney W. Grimes 		/*
2059b50d902SRodney W. Grimes 		 * Line counting is split out because it's a lot faster to get
2069b50d902SRodney W. Grimes 		 * lines than to get words, since the word count requires some
2079b50d902SRodney W. Grimes 		 * logic.
2089b50d902SRodney W. Grimes 		 */
2099b50d902SRodney W. Grimes 		if (doline) {
2108c85cce7SPhilippe Charnier 			while ((len = read(fd, buf, MAXBSIZE))) {
2112c51e5edSBruce Evans 				if (len == -1) {
2126711c482SMarcel Moolenaar 					xo_warn("%s: read", file);
2132c51e5edSBruce Evans 					(void)close(fd);
2142c51e5edSBruce Evans 					return (1);
2152c51e5edSBruce Evans 				}
216f09bc093SPawel Jakub Dawidek 				if (siginfo) {
217f09bc093SPawel Jakub Dawidek 					show_cnt(file, linect, wordct, charct,
218f09bc093SPawel Jakub Dawidek 					    llct);
219f09bc093SPawel Jakub Dawidek 				}
2209b50d902SRodney W. Grimes 				charct += len;
2219b50d902SRodney W. Grimes 				for (p = buf; len--; ++p)
222f45dd010SGiorgos Keramidas 					if (*p == '\n') {
223f45dd010SGiorgos Keramidas 						if (tmpll > llct)
224f45dd010SGiorgos Keramidas 							llct = tmpll;
225f45dd010SGiorgos Keramidas 						tmpll = 0;
2269b50d902SRodney W. Grimes 						++linect;
227f45dd010SGiorgos Keramidas 					} else
228f45dd010SGiorgos Keramidas 						tmpll++;
2299b50d902SRodney W. Grimes 			}
2309b50d902SRodney W. Grimes 			tlinect += linect;
231f09bc093SPawel Jakub Dawidek 			if (dochar)
2329b50d902SRodney W. Grimes 				tcharct += charct;
233f45dd010SGiorgos Keramidas 			if (dolongline) {
234f45dd010SGiorgos Keramidas 				if (llct > tlongline)
235f45dd010SGiorgos Keramidas 					tlongline = llct;
236f45dd010SGiorgos Keramidas 			}
237f09bc093SPawel Jakub Dawidek 			show_cnt(file, linect, wordct, charct, llct);
2389b50d902SRodney W. Grimes 			(void)close(fd);
2392c51e5edSBruce Evans 			return (0);
2409b50d902SRodney W. Grimes 		}
2419b50d902SRodney W. Grimes 		/*
2429b50d902SRodney W. Grimes 		 * If all we need is the number of characters and it's a
243a1a27143STim J. Robbins 		 * regular file, just stat the puppy.
2449b50d902SRodney W. Grimes 		 */
245ebb42aeeSTim J. Robbins 		if (dochar || domulti) {
2462c51e5edSBruce Evans 			if (fstat(fd, &sb)) {
2476711c482SMarcel Moolenaar 				xo_warn("%s: fstat", file);
2482c51e5edSBruce Evans 				(void)close(fd);
2492c51e5edSBruce Evans 				return (1);
2502c51e5edSBruce Evans 			}
251a1a27143STim J. Robbins 			if (S_ISREG(sb.st_mode)) {
252f09bc093SPawel Jakub Dawidek 				charct = sb.st_size;
253f09bc093SPawel Jakub Dawidek 				show_cnt(file, linect, wordct, charct, llct);
254f09bc093SPawel Jakub Dawidek 				tcharct += charct;
2559b50d902SRodney W. Grimes 				(void)close(fd);
2562c51e5edSBruce Evans 				return (0);
2579b50d902SRodney W. Grimes 			}
2589b50d902SRodney W. Grimes 		}
2599b50d902SRodney W. Grimes 	}
2609b50d902SRodney W. Grimes 
2619b50d902SRodney W. Grimes 	/* Do it the hard way... */
262ebb42aeeSTim J. Robbins word:	gotsp = 1;
263ebb42aeeSTim J. Robbins 	warned = 0;
264149a123bSTim J. Robbins 	memset(&mbs, 0, sizeof(mbs));
265149a123bSTim J. Robbins 	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
266149a123bSTim J. Robbins 		if (len == -1) {
2676711c482SMarcel Moolenaar 			xo_warn("%s: read", file != NULL ? file : "stdin");
2682c51e5edSBruce Evans 			(void)close(fd);
2692c51e5edSBruce Evans 			return (1);
2702c51e5edSBruce Evans 		}
271ebb42aeeSTim J. Robbins 		p = buf;
272ebb42aeeSTim J. Robbins 		while (len > 0) {
273f09bc093SPawel Jakub Dawidek 			if (siginfo)
274f09bc093SPawel Jakub Dawidek 				show_cnt(file, linect, wordct, charct, llct);
275ebb42aeeSTim J. Robbins 			if (!domulti || MB_CUR_MAX == 1) {
276ebb42aeeSTim J. Robbins 				clen = 1;
277ebb42aeeSTim J. Robbins 				wch = (unsigned char)*p;
278149a123bSTim J. Robbins 			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
279149a123bSTim J. Robbins 			    (size_t)-1) {
280ebb42aeeSTim J. Robbins 				if (!warned) {
281ebb42aeeSTim J. Robbins 					errno = EILSEQ;
2826711c482SMarcel Moolenaar 					xo_warn("%s",
283f09bc093SPawel Jakub Dawidek 					    file != NULL ? file : "stdin");
284ebb42aeeSTim J. Robbins 					warned = 1;
285ebb42aeeSTim J. Robbins 				}
286149a123bSTim J. Robbins 				memset(&mbs, 0, sizeof(mbs));
287149a123bSTim J. Robbins 				clen = 1;
288149a123bSTim J. Robbins 				wch = (unsigned char)*p;
289149a123bSTim J. Robbins 			} else if (clen == (size_t)-2)
290ebb42aeeSTim J. Robbins 				break;
291149a123bSTim J. Robbins 			else if (clen == 0)
292149a123bSTim J. Robbins 				clen = 1;
293ebb42aeeSTim J. Robbins 			charct++;
294f45dd010SGiorgos Keramidas 			if (wch != L'\n')
295f45dd010SGiorgos Keramidas 				tmpll++;
296ebb42aeeSTim J. Robbins 			len -= clen;
297ebb42aeeSTim J. Robbins 			p += clen;
298f45dd010SGiorgos Keramidas 			if (wch == L'\n') {
299f45dd010SGiorgos Keramidas 				if (tmpll > llct)
300f45dd010SGiorgos Keramidas 					llct = tmpll;
301f45dd010SGiorgos Keramidas 				tmpll = 0;
3029b50d902SRodney W. Grimes 				++linect;
303f45dd010SGiorgos Keramidas 			}
304e58245f7STim J. Robbins 			if (iswspace(wch))
3059b50d902SRodney W. Grimes 				gotsp = 1;
3069b50d902SRodney W. Grimes 			else if (gotsp) {
3079b50d902SRodney W. Grimes 				gotsp = 0;
3089b50d902SRodney W. Grimes 				++wordct;
3099b50d902SRodney W. Grimes 			}
3109b50d902SRodney W. Grimes 		}
3119b50d902SRodney W. Grimes 	}
312149a123bSTim J. Robbins 	if (domulti && MB_CUR_MAX > 1)
313149a123bSTim J. Robbins 		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
3146711c482SMarcel Moolenaar 			xo_warn("%s", file != NULL ? file : "stdin");
315f09bc093SPawel Jakub Dawidek 	if (doline)
3169b50d902SRodney W. Grimes 		tlinect += linect;
317f09bc093SPawel Jakub Dawidek 	if (doword)
3189b50d902SRodney W. Grimes 		twordct += wordct;
319f09bc093SPawel Jakub Dawidek 	if (dochar || domulti)
3209b50d902SRodney W. Grimes 		tcharct += charct;
321f45dd010SGiorgos Keramidas 	if (dolongline) {
322f45dd010SGiorgos Keramidas 		if (llct > tlongline)
323f45dd010SGiorgos Keramidas 			tlongline = llct;
324f45dd010SGiorgos Keramidas 	}
325f09bc093SPawel Jakub Dawidek 	show_cnt(file, linect, wordct, charct, llct);
3269b50d902SRodney W. Grimes 	(void)close(fd);
3272c51e5edSBruce Evans 	return (0);
3289b50d902SRodney W. Grimes }
3299b50d902SRodney W. Grimes 
330a821e36eSMike Barcroft static void
3310970727fSEd Schouten usage(void)
3329b50d902SRodney W. Grimes {
3336711c482SMarcel Moolenaar 	xo_error("usage: wc [-Lclmw] [file ...]\n");
3349b50d902SRodney W. Grimes 	exit(1);
3359b50d902SRodney W. Grimes }
336