xref: /freebsd/usr.bin/wc/wc.c (revision 84b851c23fecf26c88f1407b8a06282785c6bffd)
18a16b7a1SPedro F. Giffuni /*-
28a16b7a1SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
38a16b7a1SPedro F. Giffuni  *
49b50d902SRodney W. Grimes  * Copyright (c) 1980, 1987, 1991, 1993
59b50d902SRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
69b50d902SRodney W. Grimes  *
79b50d902SRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
89b50d902SRodney W. Grimes  * modification, are permitted provided that the following conditions
99b50d902SRodney W. Grimes  * are met:
109b50d902SRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
119b50d902SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
129b50d902SRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
139b50d902SRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
149b50d902SRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
169b50d902SRodney W. Grimes  *    may be used to endorse or promote products derived from this software
179b50d902SRodney W. Grimes  *    without specific prior written permission.
189b50d902SRodney W. Grimes  *
199b50d902SRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
209b50d902SRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
219b50d902SRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
229b50d902SRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
239b50d902SRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
249b50d902SRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
259b50d902SRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
269b50d902SRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
279b50d902SRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
289b50d902SRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
299b50d902SRodney W. Grimes  * SUCH DAMAGE.
309b50d902SRodney W. Grimes  */
319b50d902SRodney W. Grimes 
329b50d902SRodney W. Grimes #ifndef lint
3306469209SMike Barcroft static const char copyright[] =
349b50d902SRodney W. Grimes "@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
359b50d902SRodney W. Grimes 	The Regents of the University of California.  All rights reserved.\n";
36a821e36eSMike Barcroft #endif /* not lint */
379b50d902SRodney W. Grimes 
3806469209SMike Barcroft #if 0
399b50d902SRodney W. Grimes #ifndef lint
40a821e36eSMike Barcroft static char sccsid[] = "@(#)wc.c	8.1 (Berkeley) 6/6/93";
41a821e36eSMike Barcroft #endif /* not lint */
422c51e5edSBruce Evans #endif
439b50d902SRodney W. Grimes 
44a821e36eSMike Barcroft #include <sys/cdefs.h>
45a821e36eSMike Barcroft __FBSDID("$FreeBSD$");
46a821e36eSMike Barcroft 
479b50d902SRodney W. Grimes #include <sys/param.h>
489b50d902SRodney W. Grimes #include <sys/stat.h>
492c51e5edSBruce Evans 
502c51e5edSBruce Evans #include <ctype.h>
512c51e5edSBruce Evans #include <err.h>
52ebb42aeeSTim J. Robbins #include <errno.h>
539b50d902SRodney W. Grimes #include <fcntl.h>
54ae6fa8aeSAndrey A. Chernov #include <locale.h>
55a821e36eSMike Barcroft #include <stdint.h>
569b50d902SRodney W. Grimes #include <stdio.h>
579b50d902SRodney W. Grimes #include <stdlib.h>
589b50d902SRodney W. Grimes #include <string.h>
592c51e5edSBruce Evans #include <unistd.h>
60149a123bSTim J. Robbins #include <wchar.h>
61e58245f7STim J. Robbins #include <wctype.h>
626711c482SMarcel Moolenaar #include <libxo/xo.h>
639b50d902SRodney W. Grimes 
648df975a2SEd Schouten static uintmax_t tlinect, twordct, tcharct, tlongline;
658df975a2SEd Schouten static int doline, doword, dochar, domulti, dolongline;
66c9a96406SPawel Jakub Dawidek static volatile sig_atomic_t siginfo;
676711c482SMarcel Moolenaar static xo_handle_t *stderr_handle;
689b50d902SRodney W. Grimes 
69f09bc093SPawel Jakub Dawidek static void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
70f09bc093SPawel Jakub Dawidek 		    uintmax_t charct, uintmax_t llct);
713f330d7dSWarner Losh static int	cnt(const char *);
723f330d7dSWarner Losh static void	usage(void);
739b50d902SRodney W. Grimes 
74f09bc093SPawel Jakub Dawidek static void
75f09bc093SPawel Jakub Dawidek siginfo_handler(int sig __unused)
76f09bc093SPawel Jakub Dawidek {
77f09bc093SPawel Jakub Dawidek 
78f09bc093SPawel Jakub Dawidek 	siginfo = 1;
79f09bc093SPawel Jakub Dawidek }
80f09bc093SPawel Jakub Dawidek 
818ded906dSBryan Drewery static void
828ded906dSBryan Drewery reset_siginfo(void)
838ded906dSBryan Drewery {
848ded906dSBryan Drewery 
858ded906dSBryan Drewery 	signal(SIGINFO, SIG_DFL);
868ded906dSBryan Drewery 	siginfo = 0;
878ded906dSBryan Drewery }
888ded906dSBryan Drewery 
899b50d902SRodney W. Grimes int
90806abfccSJosef El-Rayes main(int argc, char *argv[])
919b50d902SRodney W. Grimes {
92a0cf59e6SSheldon Hearn 	int ch, errors, total;
939b50d902SRodney W. Grimes 
94ae6fa8aeSAndrey A. Chernov 	(void) setlocale(LC_CTYPE, "");
95ae6fa8aeSAndrey A. Chernov 
966711c482SMarcel Moolenaar 	argc = xo_parse_args(argc, argv);
976711c482SMarcel Moolenaar 	if (argc < 0)
986711c482SMarcel Moolenaar 		return (argc);
996711c482SMarcel Moolenaar 
100f45dd010SGiorgos Keramidas 	while ((ch = getopt(argc, argv, "clmwL")) != -1)
1019b50d902SRodney W. Grimes 		switch((char)ch) {
1029b50d902SRodney W. Grimes 		case 'l':
1039b50d902SRodney W. Grimes 			doline = 1;
1049b50d902SRodney W. Grimes 			break;
1059b50d902SRodney W. Grimes 		case 'w':
1069b50d902SRodney W. Grimes 			doword = 1;
1079b50d902SRodney W. Grimes 			break;
1089b50d902SRodney W. Grimes 		case 'c':
1099b50d902SRodney W. Grimes 			dochar = 1;
110ebb42aeeSTim J. Robbins 			domulti = 0;
111ebb42aeeSTim J. Robbins 			break;
112f45dd010SGiorgos Keramidas 		case 'L':
113f45dd010SGiorgos Keramidas 			dolongline = 1;
114f45dd010SGiorgos Keramidas 			break;
115ebb42aeeSTim J. Robbins 		case 'm':
116ebb42aeeSTim J. Robbins 			domulti = 1;
117ebb42aeeSTim J. Robbins 			dochar = 0;
1189b50d902SRodney W. Grimes 			break;
1199b50d902SRodney W. Grimes 		case '?':
1209b50d902SRodney W. Grimes 		default:
1219b50d902SRodney W. Grimes 			usage();
1229b50d902SRodney W. Grimes 		}
1239b50d902SRodney W. Grimes 	argv += optind;
1249b50d902SRodney W. Grimes 	argc -= optind;
1259b50d902SRodney W. Grimes 
126f09bc093SPawel Jakub Dawidek 	(void)signal(SIGINFO, siginfo_handler);
127f09bc093SPawel Jakub Dawidek 
1289b50d902SRodney W. Grimes 	/* Wc's flags are on by default. */
129f45dd010SGiorgos Keramidas 	if (doline + doword + dochar + domulti + dolongline == 0)
1309b50d902SRodney W. Grimes 		doline = doword = dochar = 1;
1319b50d902SRodney W. Grimes 
1326711c482SMarcel Moolenaar 	stderr_handle = xo_create_to_file(stderr, XO_STYLE_TEXT, 0);
1336711c482SMarcel Moolenaar 	xo_open_container("wc");
1346711c482SMarcel Moolenaar 	xo_open_list("file");
1356711c482SMarcel Moolenaar 
1362c51e5edSBruce Evans 	errors = 0;
1379b50d902SRodney W. Grimes 	total = 0;
1389b50d902SRodney W. Grimes 	if (!*argv) {
1396711c482SMarcel Moolenaar 	 	xo_open_instance("file");
1402c51e5edSBruce Evans 		if (cnt((char *)NULL) != 0)
1412c51e5edSBruce Evans 			++errors;
1426711c482SMarcel Moolenaar 	 	xo_close_instance("file");
143f09bc093SPawel Jakub Dawidek 	} else {
144f09bc093SPawel Jakub Dawidek 		do {
1456711c482SMarcel Moolenaar 	 		xo_open_instance("file");
1462c51e5edSBruce Evans 			if (cnt(*argv) != 0)
1472c51e5edSBruce Evans 				++errors;
1486711c482SMarcel Moolenaar 	 		xo_close_instance("file");
1499b50d902SRodney W. Grimes 			++total;
1509b50d902SRodney W. Grimes 		} while(*++argv);
1519b50d902SRodney W. Grimes 	}
152f09bc093SPawel Jakub Dawidek 
153399d3485SMarcel Moolenaar 	xo_close_list("file");
154399d3485SMarcel Moolenaar 
1556711c482SMarcel Moolenaar 	if (total > 1) {
1566711c482SMarcel Moolenaar 		xo_open_container("total");
157f09bc093SPawel Jakub Dawidek 		show_cnt("total", tlinect, twordct, tcharct, tlongline);
1586711c482SMarcel Moolenaar 		xo_close_container("total");
1596711c482SMarcel Moolenaar 	}
160399d3485SMarcel Moolenaar 
1616711c482SMarcel Moolenaar 	xo_close_container("wc");
1626711c482SMarcel Moolenaar 	xo_finish();
1632c51e5edSBruce Evans 	exit(errors == 0 ? 0 : 1);
1649b50d902SRodney W. Grimes }
1659b50d902SRodney W. Grimes 
166f09bc093SPawel Jakub Dawidek static void
167f09bc093SPawel Jakub Dawidek show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
168f09bc093SPawel Jakub Dawidek     uintmax_t charct, uintmax_t llct)
169f09bc093SPawel Jakub Dawidek {
1706711c482SMarcel Moolenaar 	xo_handle_t *xop;
171f09bc093SPawel Jakub Dawidek 
172f09bc093SPawel Jakub Dawidek 	if (!siginfo)
1736711c482SMarcel Moolenaar 		xop = NULL;
174f09bc093SPawel Jakub Dawidek 	else {
1756711c482SMarcel Moolenaar 		xop = stderr_handle;
176f09bc093SPawel Jakub Dawidek 		siginfo = 0;
177f09bc093SPawel Jakub Dawidek 	}
178f09bc093SPawel Jakub Dawidek 
179f09bc093SPawel Jakub Dawidek 	if (doline)
1806711c482SMarcel Moolenaar 		xo_emit_h(xop, " {:lines/%7ju/%ju}", linect);
181f09bc093SPawel Jakub Dawidek 	if (doword)
1826711c482SMarcel Moolenaar 		xo_emit_h(xop, " {:words/%7ju/%ju}", wordct);
183f09bc093SPawel Jakub Dawidek 	if (dochar || domulti)
1846711c482SMarcel Moolenaar 		xo_emit_h(xop, " {:characters/%7ju/%ju}", charct);
185f09bc093SPawel Jakub Dawidek 	if (dolongline)
1866711c482SMarcel Moolenaar 		xo_emit_h(xop, " {:long-lines/%7ju/%ju}", llct);
187f09bc093SPawel Jakub Dawidek 	if (file != NULL)
188985c93f0SMarcel Moolenaar 		xo_emit_h(xop, " {:filename/%s}\n", file);
189f09bc093SPawel Jakub Dawidek 	else
1906711c482SMarcel Moolenaar 		xo_emit_h(xop, "\n");
191f09bc093SPawel Jakub Dawidek }
192f09bc093SPawel Jakub Dawidek 
193a821e36eSMike Barcroft static int
194806abfccSJosef El-Rayes cnt(const char *file)
1959b50d902SRodney W. Grimes {
1969b50d902SRodney W. Grimes 	struct stat sb;
197f45dd010SGiorgos Keramidas 	uintmax_t linect, wordct, charct, llct, tmpll;
198149a123bSTim J. Robbins 	int fd, len, warned;
199149a123bSTim J. Robbins 	size_t clen;
200a0cf59e6SSheldon Hearn 	short gotsp;
201a0cf59e6SSheldon Hearn 	u_char *p;
202abd0c85dSTim J. Robbins 	u_char buf[MAXBSIZE];
203ebb42aeeSTim J. Robbins 	wchar_t wch;
204149a123bSTim J. Robbins 	mbstate_t mbs;
2059b50d902SRodney W. Grimes 
206f45dd010SGiorgos Keramidas 	linect = wordct = charct = llct = tmpll = 0;
207f09bc093SPawel Jakub Dawidek 	if (file == NULL)
2082c51e5edSBruce Evans 		fd = STDIN_FILENO;
209de143041SConrad Meyer 	else if ((fd = open(file, O_RDONLY, 0)) < 0) {
2106711c482SMarcel Moolenaar 		xo_warn("%s: open", file);
2112c51e5edSBruce Evans 		return (1);
212a0d038a4SWolfram Schneider 	}
213ebb42aeeSTim J. Robbins 	if (doword || (domulti && MB_CUR_MAX != 1))
2149b50d902SRodney W. Grimes 		goto word;
2159b50d902SRodney W. Grimes 	/*
2160dc7c9e6SConrad Meyer 	 * If all we need is the number of characters and it's a regular file,
2170dc7c9e6SConrad Meyer 	 * just stat it.
2189b50d902SRodney W. Grimes 	 */
219*84b851c2SConrad Meyer 	if (doline == 0 && dolongline == 0) {
2200dc7c9e6SConrad Meyer 		if (fstat(fd, &sb)) {
2210dc7c9e6SConrad Meyer 			xo_warn("%s: fstat", file);
2220dc7c9e6SConrad Meyer 			(void)close(fd);
2230dc7c9e6SConrad Meyer 			return (1);
2240dc7c9e6SConrad Meyer 		}
2250dc7c9e6SConrad Meyer 		if (S_ISREG(sb.st_mode)) {
2260dc7c9e6SConrad Meyer 			reset_siginfo();
2270dc7c9e6SConrad Meyer 			charct = sb.st_size;
2280dc7c9e6SConrad Meyer 			show_cnt(file, linect, wordct, charct, llct);
2290dc7c9e6SConrad Meyer 			tcharct += charct;
2300dc7c9e6SConrad Meyer 			(void)close(fd);
2310dc7c9e6SConrad Meyer 			return (0);
2320dc7c9e6SConrad Meyer 		}
2330dc7c9e6SConrad Meyer 	}
2340dc7c9e6SConrad Meyer 	/*
2350dc7c9e6SConrad Meyer 	 * For files we can't stat, or if we need line counting, slurp the
2360dc7c9e6SConrad Meyer 	 * file.  Line counting is split out because it's a lot faster to get
2370dc7c9e6SConrad Meyer 	 * lines than to get words, since the word count requires locale
2380dc7c9e6SConrad Meyer 	 * handling.
2390dc7c9e6SConrad Meyer 	 */
2408c85cce7SPhilippe Charnier 	while ((len = read(fd, buf, MAXBSIZE))) {
2412c51e5edSBruce Evans 		if (len == -1) {
2426711c482SMarcel Moolenaar 			xo_warn("%s: read", file);
2432c51e5edSBruce Evans 			(void)close(fd);
2442c51e5edSBruce Evans 			return (1);
2452c51e5edSBruce Evans 		}
2460dc7c9e6SConrad Meyer 		if (siginfo)
2470dc7c9e6SConrad Meyer 			show_cnt(file, linect, wordct, charct, llct);
2489b50d902SRodney W. Grimes 		charct += len;
249*84b851c2SConrad Meyer 		if (doline || dolongline) {
2509b50d902SRodney W. Grimes 			for (p = buf; len--; ++p)
251f45dd010SGiorgos Keramidas 				if (*p == '\n') {
252f45dd010SGiorgos Keramidas 					if (tmpll > llct)
253f45dd010SGiorgos Keramidas 						llct = tmpll;
254f45dd010SGiorgos Keramidas 					tmpll = 0;
2559b50d902SRodney W. Grimes 					++linect;
256f45dd010SGiorgos Keramidas 				} else
257f45dd010SGiorgos Keramidas 					tmpll++;
2589b50d902SRodney W. Grimes 		}
259de143041SConrad Meyer 	}
2608ded906dSBryan Drewery 	reset_siginfo();
261de143041SConrad Meyer 	if (doline)
2629b50d902SRodney W. Grimes 		tlinect += linect;
263f09bc093SPawel Jakub Dawidek 	if (dochar)
2649b50d902SRodney W. Grimes 		tcharct += charct;
265*84b851c2SConrad Meyer 	if (dolongline && llct > tlongline)
266f45dd010SGiorgos Keramidas 		tlongline = llct;
267f09bc093SPawel Jakub Dawidek 	show_cnt(file, linect, wordct, charct, llct);
2689b50d902SRodney W. Grimes 	(void)close(fd);
2692c51e5edSBruce Evans 	return (0);
2709b50d902SRodney W. Grimes 
2719b50d902SRodney W. Grimes 	/* Do it the hard way... */
272ebb42aeeSTim J. Robbins word:	gotsp = 1;
273ebb42aeeSTim J. Robbins 	warned = 0;
274149a123bSTim J. Robbins 	memset(&mbs, 0, sizeof(mbs));
275149a123bSTim J. Robbins 	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
276149a123bSTim J. Robbins 		if (len == -1) {
2776711c482SMarcel Moolenaar 			xo_warn("%s: read", file != NULL ? file : "stdin");
2782c51e5edSBruce Evans 			(void)close(fd);
2792c51e5edSBruce Evans 			return (1);
2802c51e5edSBruce Evans 		}
281ebb42aeeSTim J. Robbins 		p = buf;
282ebb42aeeSTim J. Robbins 		while (len > 0) {
283f09bc093SPawel Jakub Dawidek 			if (siginfo)
284f09bc093SPawel Jakub Dawidek 				show_cnt(file, linect, wordct, charct, llct);
285ebb42aeeSTim J. Robbins 			if (!domulti || MB_CUR_MAX == 1) {
286ebb42aeeSTim J. Robbins 				clen = 1;
287ebb42aeeSTim J. Robbins 				wch = (unsigned char)*p;
288149a123bSTim J. Robbins 			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
289149a123bSTim J. Robbins 			    (size_t)-1) {
290ebb42aeeSTim J. Robbins 				if (!warned) {
291ebb42aeeSTim J. Robbins 					errno = EILSEQ;
2926711c482SMarcel Moolenaar 					xo_warn("%s",
293f09bc093SPawel Jakub Dawidek 					    file != NULL ? file : "stdin");
294ebb42aeeSTim J. Robbins 					warned = 1;
295ebb42aeeSTim J. Robbins 				}
296149a123bSTim J. Robbins 				memset(&mbs, 0, sizeof(mbs));
297149a123bSTim J. Robbins 				clen = 1;
298149a123bSTim J. Robbins 				wch = (unsigned char)*p;
299149a123bSTim J. Robbins 			} else if (clen == (size_t)-2)
300ebb42aeeSTim J. Robbins 				break;
301149a123bSTim J. Robbins 			else if (clen == 0)
302149a123bSTim J. Robbins 				clen = 1;
303ebb42aeeSTim J. Robbins 			charct++;
304f45dd010SGiorgos Keramidas 			if (wch != L'\n')
305f45dd010SGiorgos Keramidas 				tmpll++;
306ebb42aeeSTim J. Robbins 			len -= clen;
307ebb42aeeSTim J. Robbins 			p += clen;
308f45dd010SGiorgos Keramidas 			if (wch == L'\n') {
309f45dd010SGiorgos Keramidas 				if (tmpll > llct)
310f45dd010SGiorgos Keramidas 					llct = tmpll;
311f45dd010SGiorgos Keramidas 				tmpll = 0;
3129b50d902SRodney W. Grimes 				++linect;
313f45dd010SGiorgos Keramidas 			}
314e58245f7STim J. Robbins 			if (iswspace(wch))
3159b50d902SRodney W. Grimes 				gotsp = 1;
3169b50d902SRodney W. Grimes 			else if (gotsp) {
3179b50d902SRodney W. Grimes 				gotsp = 0;
3189b50d902SRodney W. Grimes 				++wordct;
3199b50d902SRodney W. Grimes 			}
3209b50d902SRodney W. Grimes 		}
3219b50d902SRodney W. Grimes 	}
3228ded906dSBryan Drewery 	reset_siginfo();
323149a123bSTim J. Robbins 	if (domulti && MB_CUR_MAX > 1)
324149a123bSTim J. Robbins 		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
3256711c482SMarcel Moolenaar 			xo_warn("%s", file != NULL ? file : "stdin");
326f09bc093SPawel Jakub Dawidek 	if (doline)
3279b50d902SRodney W. Grimes 		tlinect += linect;
328f09bc093SPawel Jakub Dawidek 	if (doword)
3299b50d902SRodney W. Grimes 		twordct += wordct;
330f09bc093SPawel Jakub Dawidek 	if (dochar || domulti)
3319b50d902SRodney W. Grimes 		tcharct += charct;
332*84b851c2SConrad Meyer 	if (dolongline && llct > tlongline)
333f45dd010SGiorgos Keramidas 		tlongline = llct;
334f09bc093SPawel Jakub Dawidek 	show_cnt(file, linect, wordct, charct, llct);
3359b50d902SRodney W. Grimes 	(void)close(fd);
3362c51e5edSBruce Evans 	return (0);
3379b50d902SRodney W. Grimes }
3389b50d902SRodney W. Grimes 
339a821e36eSMike Barcroft static void
3400970727fSEd Schouten usage(void)
3419b50d902SRodney W. Grimes {
3426711c482SMarcel Moolenaar 	xo_error("usage: wc [-Lclmw] [file ...]\n");
3439b50d902SRodney W. Grimes 	exit(1);
3449b50d902SRodney W. Grimes }
345