1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #ifndef lint 38 static const char copyright[] = 39 "@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41 static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 42 #endif /* not lint */ 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include <ctype.h> 47 #include <err.h> 48 #include <limits.h> 49 #include <locale.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <unistd.h> 54 55 int bflag; 56 int cflag; 57 char dchar; 58 int dflag; 59 int fflag; 60 int nflag; 61 int sflag; 62 63 void b_n_cut(FILE *, const char *); 64 void c_cut(FILE *, const char *); 65 void f_cut(FILE *, const char *); 66 void get_list(char *); 67 void needpos(size_t); 68 static void usage(void); 69 70 int 71 main(argc, argv) 72 int argc; 73 char *argv[]; 74 { 75 FILE *fp; 76 void (*fcn)(FILE *, const char *); 77 int ch, rval; 78 79 setlocale(LC_ALL, ""); 80 81 fcn = NULL; 82 dchar = '\t'; /* default delimiter is \t */ 83 84 /* 85 * Since we don't support multi-byte characters, the -c and -b 86 * options are equivalent. 87 */ 88 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 89 switch(ch) { 90 case 'b': 91 fcn = c_cut; 92 get_list(optarg); 93 bflag = 1; 94 break; 95 case 'c': 96 fcn = c_cut; 97 get_list(optarg); 98 cflag = 1; 99 break; 100 case 'd': 101 dchar = *optarg; 102 dflag = 1; 103 break; 104 case 'f': 105 get_list(optarg); 106 fcn = f_cut; 107 fflag = 1; 108 break; 109 case 's': 110 sflag = 1; 111 break; 112 case 'n': 113 nflag = 1; 114 break; 115 case '?': 116 default: 117 usage(); 118 } 119 argc -= optind; 120 argv += optind; 121 122 if (fflag) { 123 if (bflag || cflag || nflag) 124 usage(); 125 } else if (!(bflag || cflag) || dflag || sflag) 126 usage(); 127 else if (!bflag && nflag) 128 usage(); 129 130 if (nflag) 131 fcn = b_n_cut; 132 133 rval = 0; 134 if (*argv) 135 for (; *argv; ++argv) { 136 if (strcmp(*argv, "-") == 0) 137 fcn(stdin, "stdin"); 138 else { 139 if (!(fp = fopen(*argv, "r"))) { 140 warn("%s", *argv); 141 rval = 1; 142 continue; 143 } 144 fcn(fp, *argv); 145 (void)fclose(fp); 146 } 147 } 148 else 149 fcn(stdin, "stdin"); 150 exit(rval); 151 } 152 153 size_t autostart, autostop, maxval; 154 155 char *positions; 156 157 void 158 get_list(list) 159 char *list; 160 { 161 size_t setautostart, start, stop; 162 char *pos; 163 char *p; 164 165 /* 166 * set a byte in the positions array to indicate if a field or 167 * column is to be selected; use +1, it's 1-based, not 0-based. 168 * This parser is less restrictive than the Draft 9 POSIX spec. 169 * POSIX doesn't allow lists that aren't in increasing order or 170 * overlapping lists. We also handle "-3-5" although there's no 171 * real reason too. 172 */ 173 for (; (p = strsep(&list, ", \t")) != NULL;) { 174 setautostart = start = stop = 0; 175 if (*p == '-') { 176 ++p; 177 setautostart = 1; 178 } 179 if (isdigit((unsigned char)*p)) { 180 start = stop = strtol(p, &p, 10); 181 if (setautostart && start > autostart) 182 autostart = start; 183 } 184 if (*p == '-') { 185 if (isdigit((unsigned char)p[1])) 186 stop = strtol(p + 1, &p, 10); 187 if (*p == '-') { 188 ++p; 189 if (!autostop || autostop > stop) 190 autostop = stop; 191 } 192 } 193 if (*p) 194 errx(1, "[-cf] list: illegal list value"); 195 if (!stop || !start) 196 errx(1, "[-cf] list: values may not include zero"); 197 if (maxval < stop) { 198 maxval = stop; 199 needpos(maxval + 1); 200 } 201 for (pos = positions + start; start++ <= stop; *pos++ = 1); 202 } 203 204 /* overlapping ranges */ 205 if (autostop && maxval > autostop) { 206 maxval = autostop; 207 needpos(maxval + 1); 208 } 209 210 /* set autostart */ 211 if (autostart) 212 memset(positions + 1, '1', autostart); 213 } 214 215 void 216 needpos(size_t n) 217 { 218 static size_t npos; 219 size_t oldnpos; 220 221 /* Grow the positions array to at least the specified size. */ 222 if (n > npos) { 223 oldnpos = npos; 224 if (npos == 0) 225 npos = n; 226 while (n > npos) 227 npos *= 2; 228 if ((positions = realloc(positions, npos)) == NULL) 229 err(1, "realloc"); 230 memset((char *)positions + oldnpos, 0, npos - oldnpos); 231 } 232 } 233 234 /* 235 * Cut based on byte positions, taking care not to split multibyte characters. 236 * Although this function also handles the case where -n is not specified, 237 * c_cut() ought to be much faster. 238 */ 239 void 240 b_n_cut(fp, fname) 241 FILE *fp; 242 const char *fname; 243 { 244 size_t col, i, lbuflen; 245 char *lbuf; 246 int canwrite, clen, warned; 247 248 warned = 0; 249 while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { 250 for (col = 0; lbuflen > 0; col += clen) { 251 if ((clen = mblen(lbuf, lbuflen)) < 0) { 252 if (!warned) { 253 warn("%s", fname); 254 warned = 1; 255 } 256 clen = 1; 257 } 258 if (clen == 0 || *lbuf == '\n') 259 break; 260 if (col < maxval && !positions[1 + col]) { 261 /* 262 * Print the character if (1) after an initial 263 * segment of un-selected bytes, the rest of 264 * it is selected, and (2) the last byte is 265 * selected. 266 */ 267 i = col; 268 while (i < col + clen && i < maxval && 269 !positions[1 + i]) 270 i++; 271 canwrite = i < col + clen; 272 for (; i < col + clen && i < maxval; i++) 273 canwrite &= positions[1 + i]; 274 if (canwrite) 275 fwrite(lbuf, 1, clen, stdout); 276 } else { 277 /* 278 * Print the character if all of it has 279 * been selected. 280 */ 281 canwrite = 1; 282 for (i = col; i < col + clen; i++) 283 if ((i >= maxval && !autostop) || 284 (i < maxval && !positions[1 + i])) { 285 canwrite = 0; 286 break; 287 } 288 if (canwrite) 289 fwrite(lbuf, 1, clen, stdout); 290 } 291 lbuf += clen; 292 lbuflen -= clen; 293 } 294 if (lbuflen > 0) 295 putchar('\n'); 296 } 297 } 298 299 void 300 c_cut(fp, fname) 301 FILE *fp; 302 const char *fname __unused; 303 { 304 int ch, col; 305 char *pos; 306 307 ch = 0; 308 for (;;) { 309 pos = positions + 1; 310 for (col = maxval; col; --col) { 311 if ((ch = getc(fp)) == EOF) 312 return; 313 if (ch == '\n') 314 break; 315 if (*pos++) 316 (void)putchar(ch); 317 } 318 if (ch != '\n') { 319 if (autostop) 320 while ((ch = getc(fp)) != EOF && ch != '\n') 321 (void)putchar(ch); 322 else 323 while ((ch = getc(fp)) != EOF && ch != '\n'); 324 } 325 (void)putchar('\n'); 326 } 327 } 328 329 void 330 f_cut(fp, fname) 331 FILE *fp; 332 const char *fname __unused; 333 { 334 int ch, field, isdelim; 335 char *pos, *p, sep; 336 int output; 337 char *lbuf, *mlbuf; 338 size_t lbuflen; 339 340 mlbuf = NULL; 341 for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) { 342 /* Assert EOL has a newline. */ 343 if (*(lbuf + lbuflen - 1) != '\n') { 344 /* Can't have > 1 line with no trailing newline. */ 345 mlbuf = malloc(lbuflen + 1); 346 if (mlbuf == NULL) 347 err(1, "malloc"); 348 memcpy(mlbuf, lbuf, lbuflen); 349 *(mlbuf + lbuflen) = '\n'; 350 lbuf = mlbuf; 351 } 352 output = 0; 353 for (isdelim = 0, p = lbuf;; ++p) { 354 ch = *p; 355 /* this should work if newline is delimiter */ 356 if (ch == sep) 357 isdelim = 1; 358 if (ch == '\n') { 359 if (!isdelim && !sflag) 360 (void)fwrite(lbuf, lbuflen, 1, stdout); 361 break; 362 } 363 } 364 if (!isdelim) 365 continue; 366 367 pos = positions + 1; 368 for (field = maxval, p = lbuf; field; --field, ++pos) { 369 if (*pos) { 370 if (output++) 371 (void)putchar(sep); 372 while ((ch = *p++) != '\n' && ch != sep) 373 (void)putchar(ch); 374 } else { 375 while ((ch = *p++) != '\n' && ch != sep) 376 continue; 377 } 378 if (ch == '\n') 379 break; 380 } 381 if (ch != '\n') { 382 if (autostop) { 383 if (output) 384 (void)putchar(sep); 385 for (; (ch = *p) != '\n'; ++p) 386 (void)putchar(ch); 387 } else 388 for (; (ch = *p) != '\n'; ++p); 389 } 390 (void)putchar('\n'); 391 } 392 if (mlbuf != NULL) 393 free(mlbuf); 394 } 395 396 static void 397 usage() 398 { 399 (void)fprintf(stderr, "%s\n%s\n%s\n", 400 "usage: cut -b list [-n] [file ...]", 401 " cut -c list [file ...]", 402 " cut -f list [-s] [-d delim] [file ...]"); 403 exit(1); 404 } 405