xref: /freebsd/usr.bin/wc/wc.c (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1987, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/capsicum.h>
33 #include <sys/param.h>
34 #include <sys/stat.h>
35 
36 #include <capsicum_helpers.h>
37 #include <ctype.h>
38 #include <errno.h>
39 #include <fcntl.h>
40 #include <locale.h>
41 #include <stdbool.h>
42 #include <stdint.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <wchar.h>
48 #include <wctype.h>
49 #include <libxo/xo.h>
50 
51 #include <libcasper.h>
52 #include <casper/cap_fileargs.h>
53 
54 static const char *stdin_filename = "stdin";
55 
56 static fileargs_t *fa;
57 static uintmax_t tlinect, twordct, tcharct, tlongline;
58 static bool doline, doword, dochar, domulti, dolongline;
59 static volatile sig_atomic_t siginfo;
60 static xo_handle_t *stderr_handle;
61 
62 static void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
63 		    uintmax_t charct, uintmax_t llct);
64 static int	cnt(const char *);
65 static void	usage(void);
66 
67 static void
68 siginfo_handler(int sig __unused)
69 {
70 
71 	siginfo = 1;
72 }
73 
74 static void
75 reset_siginfo(void)
76 {
77 
78 	signal(SIGINFO, SIG_DFL);
79 	siginfo = 0;
80 }
81 
82 int
83 main(int argc, char *argv[])
84 {
85 	int ch, errors, total;
86 	cap_rights_t rights;
87 
88 	(void) setlocale(LC_CTYPE, "");
89 
90 	argc = xo_parse_args(argc, argv);
91 	if (argc < 0)
92 		exit(EXIT_FAILURE);
93 
94 	while ((ch = getopt(argc, argv, "clmwL")) != -1)
95 		switch((char)ch) {
96 		case 'l':
97 			doline = true;
98 			break;
99 		case 'w':
100 			doword = true;
101 			break;
102 		case 'c':
103 			dochar = true;
104 			domulti = false;
105 			break;
106 		case 'L':
107 			dolongline = true;
108 			break;
109 		case 'm':
110 			domulti = true;
111 			dochar = false;
112 			break;
113 		case '?':
114 		default:
115 			usage();
116 		}
117 	argv += optind;
118 	argc -= optind;
119 
120 	fa = fileargs_init(argc, argv, O_RDONLY, 0,
121 	    cap_rights_init(&rights, CAP_READ, CAP_FSTAT), FA_OPEN);
122 	if (fa == NULL)
123 		xo_err(EXIT_FAILURE, "Unable to initialize casper");
124 	caph_cache_catpages();
125 	if (caph_limit_stdio() < 0)
126 		xo_err(EXIT_FAILURE, "Unable to limit stdio");
127 	if (caph_enter_casper() < 0)
128 		xo_err(EXIT_FAILURE, "Unable to enter capability mode");
129 
130 	/* Wc's flags are on by default. */
131 	if (!(doline || doword || dochar || domulti || dolongline))
132 		doline = doword = dochar = true;
133 
134 	stderr_handle = xo_create_to_file(stderr, XO_STYLE_TEXT, 0);
135 	xo_open_container("wc");
136 	xo_open_list("file");
137 
138 	(void)signal(SIGINFO, siginfo_handler);
139 	errors = 0;
140 	total = 0;
141 	if (argc == 0) {
142 		xo_open_instance("file");
143 		if (cnt(NULL) != 0)
144 			++errors;
145 		xo_close_instance("file");
146 	} else {
147 		while (argc--) {
148 			xo_open_instance("file");
149 			if (cnt(*argv++) != 0)
150 				++errors;
151 			xo_close_instance("file");
152 			++total;
153 		}
154 	}
155 
156 	xo_close_list("file");
157 
158 	if (total > 1) {
159 		xo_open_container("total");
160 		show_cnt("total", tlinect, twordct, tcharct, tlongline);
161 		xo_close_container("total");
162 	}
163 
164 	fileargs_free(fa);
165 	xo_close_container("wc");
166 	if (xo_finish() < 0)
167 		xo_err(EXIT_FAILURE, "stdout");
168 	exit(errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
169 }
170 
171 static void
172 show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
173     uintmax_t charct, uintmax_t llct)
174 {
175 	xo_handle_t *xop;
176 
177 	if (!siginfo)
178 		xop = NULL;
179 	else {
180 		xop = stderr_handle;
181 		siginfo = 0;
182 	}
183 
184 	if (doline)
185 		xo_emit_h(xop, " {:lines/%7ju/%ju}", linect);
186 	if (doword)
187 		xo_emit_h(xop, " {:words/%7ju/%ju}", wordct);
188 	if (dochar || domulti)
189 		xo_emit_h(xop, " {:characters/%7ju/%ju}", charct);
190 	if (dolongline)
191 		xo_emit_h(xop, " {:long-lines/%7ju/%ju}", llct);
192 	if (file != stdin_filename)
193 		xo_emit_h(xop, " {:filename/%s}\n", file);
194 	else
195 		xo_emit_h(xop, "\n");
196 }
197 
198 static int
199 cnt(const char *file)
200 {
201 	static char buf[MAXBSIZE];
202 	struct stat sb;
203 	mbstate_t mbs;
204 	const char *p;
205 	uintmax_t linect, wordct, charct, llct, tmpll;
206 	ssize_t len;
207 	size_t clen;
208 	int fd;
209 	wchar_t wch;
210 	bool gotsp, warned;
211 
212 	linect = wordct = charct = llct = tmpll = 0;
213 	if (file == NULL) {
214 		fd = STDIN_FILENO;
215 		file = stdin_filename;
216 	} else if ((fd = fileargs_open(fa, file)) < 0) {
217 		xo_warn("%s: open", file);
218 		return (1);
219 	}
220 	if (doword || (domulti && MB_CUR_MAX != 1))
221 		goto word;
222 	/*
223 	 * If all we need is the number of characters and it's a regular file,
224 	 * just stat it.
225 	 */
226 	if (doline == 0 && dolongline == 0) {
227 		if (fstat(fd, &sb)) {
228 			xo_warn("%s: fstat", file);
229 			(void)close(fd);
230 			return (1);
231 		}
232 		/* pseudo-filesystems advertize a zero size */
233 		if (S_ISREG(sb.st_mode) && sb.st_size > 0) {
234 			reset_siginfo();
235 			charct = sb.st_size;
236 			show_cnt(file, linect, wordct, charct, llct);
237 			tcharct += charct;
238 			(void)close(fd);
239 			return (0);
240 		}
241 	}
242 	/*
243 	 * For files we can't stat, or if we need line counting, slurp the
244 	 * file.  Line counting is split out because it's a lot faster to get
245 	 * lines than to get words, since the word count requires locale
246 	 * handling.
247 	 */
248 	while ((len = read(fd, buf, sizeof(buf))) != 0) {
249 		if (len < 0) {
250 			xo_warn("%s: read", file);
251 			(void)close(fd);
252 			return (1);
253 		}
254 		if (siginfo)
255 			show_cnt(file, linect, wordct, charct, llct);
256 		charct += len;
257 		if (doline || dolongline) {
258 			for (p = buf; len > 0; --len, ++p) {
259 				if (*p == '\n') {
260 					if (tmpll > llct)
261 						llct = tmpll;
262 					tmpll = 0;
263 					++linect;
264 				} else {
265 					tmpll++;
266 				}
267 			}
268 		}
269 	}
270 	reset_siginfo();
271 	if (doline)
272 		tlinect += linect;
273 	if (dochar)
274 		tcharct += charct;
275 	if (dolongline && llct > tlongline)
276 		tlongline = llct;
277 	show_cnt(file, linect, wordct, charct, llct);
278 	(void)close(fd);
279 	return (0);
280 
281 	/* Do it the hard way... */
282 word:	gotsp = true;
283 	warned = false;
284 	memset(&mbs, 0, sizeof(mbs));
285 	while ((len = read(fd, buf, sizeof(buf))) != 0) {
286 		if (len < 0) {
287 			xo_warn("%s: read", file);
288 			(void)close(fd);
289 			return (1);
290 		}
291 		p = buf;
292 		while (len > 0) {
293 			if (siginfo)
294 				show_cnt(file, linect, wordct, charct, llct);
295 			if (!domulti || MB_CUR_MAX == 1) {
296 				clen = 1;
297 				wch = (unsigned char)*p;
298 			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) == 0) {
299 				clen = 1;
300 			} else if (clen == (size_t)-1) {
301 				if (!warned) {
302 					errno = EILSEQ;
303 					xo_warn("%s", file);
304 					warned = true;
305 				}
306 				memset(&mbs, 0, sizeof(mbs));
307 				clen = 1;
308 				wch = (unsigned char)*p;
309 			} else if (clen == (size_t)-2) {
310 				break;
311 			}
312 			charct++;
313 			if (wch != L'\n')
314 				tmpll++;
315 			len -= clen;
316 			p += clen;
317 			if (wch == L'\n') {
318 				if (tmpll > llct)
319 					llct = tmpll;
320 				tmpll = 0;
321 				++linect;
322 			}
323 			if (iswspace(wch)) {
324 				gotsp = true;
325 			} else if (gotsp) {
326 				gotsp = false;
327 				++wordct;
328 			}
329 		}
330 	}
331 	reset_siginfo();
332 	if (domulti && MB_CUR_MAX > 1) {
333 		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
334 			xo_warn("%s", file);
335 	}
336 	if (doline)
337 		tlinect += linect;
338 	if (doword)
339 		twordct += wordct;
340 	if (dochar || domulti)
341 		tcharct += charct;
342 	if (dolongline && llct > tlongline)
343 		tlongline = llct;
344 	show_cnt(file, linect, wordct, charct, llct);
345 	(void)close(fd);
346 	return (0);
347 }
348 
349 static void
350 usage(void)
351 {
352 	xo_error("usage: wc [-Lclmw] [file ...]\n");
353 	exit(EXIT_FAILURE);
354 }
355