1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1987, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/capsicum.h> 33 #include <sys/param.h> 34 #include <sys/stat.h> 35 36 #include <capsicum_helpers.h> 37 #include <ctype.h> 38 #include <errno.h> 39 #include <fcntl.h> 40 #include <locale.h> 41 #include <stdbool.h> 42 #include <stdint.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <unistd.h> 47 #include <wchar.h> 48 #include <wctype.h> 49 #include <libxo/xo.h> 50 51 #include <libcasper.h> 52 #include <casper/cap_fileargs.h> 53 54 static const char *stdin_filename = "stdin"; 55 56 static fileargs_t *fa; 57 static uintmax_t tlinect, twordct, tcharct, tlongline; 58 static bool doline, doword, dochar, domulti, dolongline; 59 static volatile sig_atomic_t siginfo; 60 static xo_handle_t *stderr_handle; 61 62 static void show_cnt(const char *file, uintmax_t linect, uintmax_t wordct, 63 uintmax_t charct, uintmax_t llct); 64 static int cnt(const char *); 65 static void usage(void); 66 67 static void 68 siginfo_handler(int sig __unused) 69 { 70 71 siginfo = 1; 72 } 73 74 static void 75 reset_siginfo(void) 76 { 77 78 signal(SIGINFO, SIG_DFL); 79 siginfo = 0; 80 } 81 82 int 83 main(int argc, char *argv[]) 84 { 85 int ch, errors, total; 86 cap_rights_t rights; 87 88 (void) setlocale(LC_CTYPE, ""); 89 90 argc = xo_parse_args(argc, argv); 91 if (argc < 0) 92 exit(EXIT_FAILURE); 93 94 while ((ch = getopt(argc, argv, "clmwL")) != -1) 95 switch((char)ch) { 96 case 'l': 97 doline = true; 98 break; 99 case 'w': 100 doword = true; 101 break; 102 case 'c': 103 dochar = true; 104 domulti = false; 105 break; 106 case 'L': 107 dolongline = true; 108 break; 109 case 'm': 110 domulti = true; 111 dochar = false; 112 break; 113 case '?': 114 default: 115 usage(); 116 } 117 argv += optind; 118 argc -= optind; 119 120 (void)signal(SIGINFO, siginfo_handler); 121 122 fa = fileargs_init(argc, argv, O_RDONLY, 0, 123 cap_rights_init(&rights, CAP_READ, CAP_FSTAT), FA_OPEN); 124 if (fa == NULL) 125 xo_err(EXIT_FAILURE, "Unable to initialize casper"); 126 caph_cache_catpages(); 127 if (caph_limit_stdio() < 0) 128 xo_err(EXIT_FAILURE, "Unable to limit stdio"); 129 if (caph_enter_casper() < 0) 130 xo_err(EXIT_FAILURE, "Unable to enter capability mode"); 131 132 /* Wc's flags are on by default. */ 133 if (!(doline || doword || dochar || domulti || dolongline)) 134 doline = doword = dochar = true; 135 136 stderr_handle = xo_create_to_file(stderr, XO_STYLE_TEXT, 0); 137 xo_open_container("wc"); 138 xo_open_list("file"); 139 140 errors = 0; 141 total = 0; 142 if (argc == 0) { 143 xo_open_instance("file"); 144 if (cnt(NULL) != 0) 145 ++errors; 146 xo_close_instance("file"); 147 } else { 148 while (argc--) { 149 xo_open_instance("file"); 150 if (cnt(*argv++) != 0) 151 ++errors; 152 xo_close_instance("file"); 153 ++total; 154 } 155 } 156 157 xo_close_list("file"); 158 159 if (total > 1) { 160 xo_open_container("total"); 161 show_cnt("total", tlinect, twordct, tcharct, tlongline); 162 xo_close_container("total"); 163 } 164 165 fileargs_free(fa); 166 xo_close_container("wc"); 167 if (xo_finish() < 0) 168 xo_err(EXIT_FAILURE, "stdout"); 169 exit(errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE); 170 } 171 172 static void 173 show_cnt(const char *file, uintmax_t linect, uintmax_t wordct, 174 uintmax_t charct, uintmax_t llct) 175 { 176 xo_handle_t *xop; 177 178 if (!siginfo) 179 xop = NULL; 180 else { 181 xop = stderr_handle; 182 siginfo = 0; 183 } 184 185 if (doline) 186 xo_emit_h(xop, " {:lines/%7ju/%ju}", linect); 187 if (doword) 188 xo_emit_h(xop, " {:words/%7ju/%ju}", wordct); 189 if (dochar || domulti) 190 xo_emit_h(xop, " {:characters/%7ju/%ju}", charct); 191 if (dolongline) 192 xo_emit_h(xop, " {:long-lines/%7ju/%ju}", llct); 193 if (file != stdin_filename) 194 xo_emit_h(xop, " {:filename/%s}\n", file); 195 else 196 xo_emit_h(xop, "\n"); 197 } 198 199 static int 200 cnt(const char *file) 201 { 202 static char buf[MAXBSIZE]; 203 struct stat sb; 204 mbstate_t mbs; 205 const char *p; 206 uintmax_t linect, wordct, charct, llct, tmpll; 207 ssize_t len; 208 size_t clen; 209 int fd; 210 wchar_t wch; 211 bool gotsp, warned; 212 213 linect = wordct = charct = llct = tmpll = 0; 214 if (file == NULL) { 215 fd = STDIN_FILENO; 216 file = stdin_filename; 217 } else if ((fd = fileargs_open(fa, file)) < 0) { 218 xo_warn("%s: open", file); 219 return (1); 220 } 221 if (doword || (domulti && MB_CUR_MAX != 1)) 222 goto word; 223 /* 224 * If all we need is the number of characters and it's a regular file, 225 * just stat it. 226 */ 227 if (doline == 0 && dolongline == 0) { 228 if (fstat(fd, &sb)) { 229 xo_warn("%s: fstat", file); 230 (void)close(fd); 231 return (1); 232 } 233 /* pseudo-filesystems advertize a zero size */ 234 if (S_ISREG(sb.st_mode) && sb.st_size > 0) { 235 reset_siginfo(); 236 charct = sb.st_size; 237 show_cnt(file, linect, wordct, charct, llct); 238 tcharct += charct; 239 (void)close(fd); 240 return (0); 241 } 242 } 243 /* 244 * For files we can't stat, or if we need line counting, slurp the 245 * file. Line counting is split out because it's a lot faster to get 246 * lines than to get words, since the word count requires locale 247 * handling. 248 */ 249 while ((len = read(fd, buf, sizeof(buf))) != 0) { 250 if (len < 0) { 251 xo_warn("%s: read", file); 252 (void)close(fd); 253 return (1); 254 } 255 if (siginfo) 256 show_cnt(file, linect, wordct, charct, llct); 257 charct += len; 258 if (doline || dolongline) { 259 for (p = buf; len > 0; --len, ++p) { 260 if (*p == '\n') { 261 if (tmpll > llct) 262 llct = tmpll; 263 tmpll = 0; 264 ++linect; 265 } else { 266 tmpll++; 267 } 268 } 269 } 270 } 271 reset_siginfo(); 272 if (doline) 273 tlinect += linect; 274 if (dochar) 275 tcharct += charct; 276 if (dolongline && llct > tlongline) 277 tlongline = llct; 278 show_cnt(file, linect, wordct, charct, llct); 279 (void)close(fd); 280 return (0); 281 282 /* Do it the hard way... */ 283 word: gotsp = true; 284 warned = false; 285 memset(&mbs, 0, sizeof(mbs)); 286 while ((len = read(fd, buf, sizeof(buf))) != 0) { 287 if (len < 0) { 288 xo_warn("%s: read", file); 289 (void)close(fd); 290 return (1); 291 } 292 p = buf; 293 while (len > 0) { 294 if (siginfo) 295 show_cnt(file, linect, wordct, charct, llct); 296 if (!domulti || MB_CUR_MAX == 1) { 297 clen = 1; 298 wch = (unsigned char)*p; 299 } else if ((clen = mbrtowc(&wch, p, len, &mbs)) == 0) { 300 clen = 1; 301 } else if (clen == (size_t)-1) { 302 if (!warned) { 303 errno = EILSEQ; 304 xo_warn("%s", file); 305 warned = true; 306 } 307 memset(&mbs, 0, sizeof(mbs)); 308 clen = 1; 309 wch = (unsigned char)*p; 310 } else if (clen == (size_t)-2) { 311 break; 312 } 313 charct++; 314 if (wch != L'\n') 315 tmpll++; 316 len -= clen; 317 p += clen; 318 if (wch == L'\n') { 319 if (tmpll > llct) 320 llct = tmpll; 321 tmpll = 0; 322 ++linect; 323 } 324 if (iswspace(wch)) { 325 gotsp = true; 326 } else if (gotsp) { 327 gotsp = false; 328 ++wordct; 329 } 330 } 331 } 332 reset_siginfo(); 333 if (domulti && MB_CUR_MAX > 1) { 334 if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned) 335 xo_warn("%s", file); 336 } 337 if (doline) 338 tlinect += linect; 339 if (doword) 340 twordct += wordct; 341 if (dochar || domulti) 342 tcharct += charct; 343 if (dolongline && llct > tlongline) 344 tlongline = llct; 345 show_cnt(file, linect, wordct, charct, llct); 346 (void)close(fd); 347 return (0); 348 } 349 350 static void 351 usage(void) 352 { 353 xo_error("usage: wc [-Lclmw] [file ...]\n"); 354 exit(EXIT_FAILURE); 355 } 356