1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #ifndef lint 38 static const char copyright[] = 39 "@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41 static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 42 static const char rcsid[] = 43 "$FreeBSD$"; 44 #endif /* not lint */ 45 46 #include <ctype.h> 47 #include <err.h> 48 #include <limits.h> 49 #include <locale.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <unistd.h> 54 55 int bflag; 56 int cflag; 57 char dchar; 58 int dflag; 59 int fflag; 60 int nflag; 61 int sflag; 62 63 void b_n_cut(FILE *, const char *); 64 void c_cut(FILE *, const char *); 65 void f_cut(FILE *, const char *); 66 void get_list(char *); 67 int main(int, char **); 68 void needpos(size_t); 69 static void usage(void); 70 71 int 72 main(argc, argv) 73 int argc; 74 char *argv[]; 75 { 76 FILE *fp; 77 void (*fcn)(FILE *, const char *); 78 int ch, rval; 79 80 setlocale(LC_ALL, ""); 81 82 fcn = NULL; 83 dchar = '\t'; /* default delimiter is \t */ 84 85 /* 86 * Since we don't support multi-byte characters, the -c and -b 87 * options are equivalent. 88 */ 89 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 90 switch(ch) { 91 case 'b': 92 fcn = c_cut; 93 get_list(optarg); 94 bflag = 1; 95 break; 96 case 'c': 97 fcn = c_cut; 98 get_list(optarg); 99 cflag = 1; 100 break; 101 case 'd': 102 dchar = *optarg; 103 dflag = 1; 104 break; 105 case 'f': 106 get_list(optarg); 107 fcn = f_cut; 108 fflag = 1; 109 break; 110 case 's': 111 sflag = 1; 112 break; 113 case 'n': 114 nflag = 1; 115 break; 116 case '?': 117 default: 118 usage(); 119 } 120 argc -= optind; 121 argv += optind; 122 123 if (fflag) { 124 if (bflag || cflag || nflag) 125 usage(); 126 } else if (!(bflag || cflag) || dflag || sflag) 127 usage(); 128 else if (!bflag && nflag) 129 usage(); 130 131 if (nflag) 132 fcn = b_n_cut; 133 134 rval = 0; 135 if (*argv) 136 for (; *argv; ++argv) { 137 if (strcmp(*argv, "-") == 0) 138 fcn(stdin, "stdin"); 139 else { 140 if (!(fp = fopen(*argv, "r"))) { 141 warn("%s", *argv); 142 rval = 1; 143 continue; 144 } 145 fcn(fp, *argv); 146 (void)fclose(fp); 147 } 148 } 149 else 150 fcn(stdin, "stdin"); 151 exit(rval); 152 } 153 154 size_t autostart, autostop, maxval; 155 156 char *positions; 157 158 void 159 get_list(list) 160 char *list; 161 { 162 size_t setautostart, start, stop; 163 char *pos; 164 char *p; 165 166 /* 167 * set a byte in the positions array to indicate if a field or 168 * column is to be selected; use +1, it's 1-based, not 0-based. 169 * This parser is less restrictive than the Draft 9 POSIX spec. 170 * POSIX doesn't allow lists that aren't in increasing order or 171 * overlapping lists. We also handle "-3-5" although there's no 172 * real reason too. 173 */ 174 for (; (p = strsep(&list, ", \t")) != NULL;) { 175 setautostart = start = stop = 0; 176 if (*p == '-') { 177 ++p; 178 setautostart = 1; 179 } 180 if (isdigit((unsigned char)*p)) { 181 start = stop = strtol(p, &p, 10); 182 if (setautostart && start > autostart) 183 autostart = start; 184 } 185 if (*p == '-') { 186 if (isdigit((unsigned char)p[1])) 187 stop = strtol(p + 1, &p, 10); 188 if (*p == '-') { 189 ++p; 190 if (!autostop || autostop > stop) 191 autostop = stop; 192 } 193 } 194 if (*p) 195 errx(1, "[-cf] list: illegal list value"); 196 if (!stop || !start) 197 errx(1, "[-cf] list: values may not include zero"); 198 if (maxval < stop) { 199 maxval = stop; 200 needpos(maxval + 1); 201 } 202 for (pos = positions + start; start++ <= stop; *pos++ = 1); 203 } 204 205 /* overlapping ranges */ 206 if (autostop && maxval > autostop) { 207 maxval = autostop; 208 needpos(maxval + 1); 209 } 210 211 /* set autostart */ 212 if (autostart) 213 memset(positions + 1, '1', autostart); 214 } 215 216 void 217 needpos(size_t n) 218 { 219 static size_t npos; 220 size_t oldnpos; 221 222 /* Grow the positions array to at least the specified size. */ 223 if (n > npos) { 224 oldnpos = npos; 225 if (npos == 0) 226 npos = n; 227 while (n > npos) 228 npos *= 2; 229 if ((positions = realloc(positions, npos)) == NULL) 230 err(1, "realloc"); 231 memset((char *)positions + oldnpos, 0, npos - oldnpos); 232 } 233 } 234 235 /* 236 * Cut based on byte positions, taking care not to split multibyte characters. 237 * Although this function also handles the case where -n is not specified, 238 * c_cut() ought to be much faster. 239 */ 240 void 241 b_n_cut(fp, fname) 242 FILE *fp; 243 const char *fname; 244 { 245 size_t col, i, lbuflen; 246 char *lbuf; 247 int canwrite, clen, warned; 248 249 warned = 0; 250 while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { 251 for (col = 0; lbuflen > 0; col += clen) { 252 if ((clen = mblen(lbuf, lbuflen)) < 0) { 253 if (!warned) { 254 warn("%s", fname); 255 warned = 1; 256 } 257 clen = 1; 258 } 259 if (clen == 0 || *lbuf == '\n') 260 break; 261 if (col < maxval && !positions[1 + col]) { 262 /* 263 * Print the character if (1) after an initial 264 * segment of un-selected bytes, the rest of 265 * it is selected, and (2) the last byte is 266 * selected. 267 */ 268 i = col; 269 while (i < col + clen && i < maxval && 270 !positions[1 + i]) 271 i++; 272 canwrite = i < col + clen; 273 for (; i < col + clen && i < maxval; i++) 274 canwrite &= positions[1 + i]; 275 if (canwrite) 276 fwrite(lbuf, 1, clen, stdout); 277 } else { 278 /* 279 * Print the character if all of it has 280 * been selected. 281 */ 282 canwrite = 1; 283 for (i = col; i < col + clen; i++) 284 if ((i >= maxval && !autostop) || 285 (i < maxval && !positions[1 + i])) { 286 canwrite = 0; 287 break; 288 } 289 if (canwrite) 290 fwrite(lbuf, 1, clen, stdout); 291 } 292 lbuf += clen; 293 lbuflen -= clen; 294 } 295 if (lbuflen > 0) 296 putchar('\n'); 297 } 298 } 299 300 void 301 c_cut(fp, fname) 302 FILE *fp; 303 const char *fname __unused; 304 { 305 int ch, col; 306 char *pos; 307 308 ch = 0; 309 for (;;) { 310 pos = positions + 1; 311 for (col = maxval; col; --col) { 312 if ((ch = getc(fp)) == EOF) 313 return; 314 if (ch == '\n') 315 break; 316 if (*pos++) 317 (void)putchar(ch); 318 } 319 if (ch != '\n') { 320 if (autostop) 321 while ((ch = getc(fp)) != EOF && ch != '\n') 322 (void)putchar(ch); 323 else 324 while ((ch = getc(fp)) != EOF && ch != '\n'); 325 } 326 (void)putchar('\n'); 327 } 328 } 329 330 void 331 f_cut(fp, fname) 332 FILE *fp; 333 const char *fname __unused; 334 { 335 int ch, field, isdelim; 336 char *pos, *p, sep; 337 int output; 338 char *lbuf, *mlbuf; 339 size_t lbuflen; 340 341 mlbuf = NULL; 342 for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) { 343 /* Assert EOL has a newline. */ 344 if (*(lbuf + lbuflen - 1) != '\n') { 345 /* Can't have > 1 line with no trailing newline. */ 346 mlbuf = malloc(lbuflen + 1); 347 if (mlbuf == NULL) 348 err(1, "malloc"); 349 memcpy(mlbuf, lbuf, lbuflen); 350 *(mlbuf + lbuflen) = '\n'; 351 lbuf = mlbuf; 352 } 353 output = 0; 354 for (isdelim = 0, p = lbuf;; ++p) { 355 ch = *p; 356 /* this should work if newline is delimiter */ 357 if (ch == sep) 358 isdelim = 1; 359 if (ch == '\n') { 360 if (!isdelim && !sflag) 361 (void)fwrite(lbuf, lbuflen, 1, stdout); 362 break; 363 } 364 } 365 if (!isdelim) 366 continue; 367 368 pos = positions + 1; 369 for (field = maxval, p = lbuf; field; --field, ++pos) { 370 if (*pos) { 371 if (output++) 372 (void)putchar(sep); 373 while ((ch = *p++) != '\n' && ch != sep) 374 (void)putchar(ch); 375 } else { 376 while ((ch = *p++) != '\n' && ch != sep) 377 continue; 378 } 379 if (ch == '\n') 380 break; 381 } 382 if (ch != '\n') { 383 if (autostop) { 384 if (output) 385 (void)putchar(sep); 386 for (; (ch = *p) != '\n'; ++p) 387 (void)putchar(ch); 388 } else 389 for (; (ch = *p) != '\n'; ++p); 390 } 391 (void)putchar('\n'); 392 } 393 if (mlbuf != NULL) 394 free(mlbuf); 395 } 396 397 static void 398 usage() 399 { 400 (void)fprintf(stderr, "%s\n%s\n%s\n", 401 "usage: cut -b list [-n] [file ...]", 402 " cut -c list [file ...]", 403 " cut -f list [-s] [-d delim] [file ...]"); 404 exit(1); 405 } 406