1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #ifndef lint 38 static const char copyright[] = 39 "@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41 static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 42 static const char rcsid[] = 43 "$FreeBSD$"; 44 #endif /* not lint */ 45 46 #include <ctype.h> 47 #include <err.h> 48 #include <limits.h> 49 #include <locale.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <unistd.h> 54 55 int cflag; 56 char dchar; 57 int dflag; 58 int fflag; 59 int sflag; 60 61 void c_cut (FILE *, const char *); 62 void f_cut (FILE *, const char *); 63 void get_list (char *); 64 int main (int, char **); 65 static void usage (void); 66 67 int 68 main(argc, argv) 69 int argc; 70 char *argv[]; 71 { 72 FILE *fp; 73 void (*fcn) (FILE *, const char *) = NULL; 74 int ch; 75 76 fcn = NULL; 77 setlocale (LC_ALL, ""); 78 79 dchar = '\t'; /* default delimiter is \t */ 80 81 /* Since we don't support multi-byte characters, the -c and -b 82 options are equivalent, and the -n option is meaningless. */ 83 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 84 switch(ch) { 85 case 'b': 86 case 'c': 87 fcn = c_cut; 88 get_list(optarg); 89 cflag = 1; 90 break; 91 case 'd': 92 dchar = *optarg; 93 dflag = 1; 94 break; 95 case 'f': 96 get_list(optarg); 97 fcn = f_cut; 98 fflag = 1; 99 break; 100 case 's': 101 sflag = 1; 102 break; 103 case 'n': 104 break; 105 case '?': 106 default: 107 usage(); 108 } 109 argc -= optind; 110 argv += optind; 111 112 if (fflag) { 113 if (cflag) 114 usage(); 115 } else if (!cflag || dflag || sflag) 116 usage(); 117 118 if (*argv) 119 for (; *argv; ++argv) { 120 if (!(fp = fopen(*argv, "r"))) 121 err(1, "%s", *argv); 122 fcn(fp, *argv); 123 (void)fclose(fp); 124 } 125 else 126 fcn(stdin, "stdin"); 127 exit(0); 128 } 129 130 size_t autostart, autostop, maxval; 131 132 char positions[_POSIX2_LINE_MAX + 1]; 133 134 void 135 get_list(list) 136 char *list; 137 { 138 size_t setautostart, start, stop; 139 char *pos; 140 char *p; 141 142 /* 143 * set a byte in the positions array to indicate if a field or 144 * column is to be selected; use +1, it's 1-based, not 0-based. 145 * This parser is less restrictive than the Draft 9 POSIX spec. 146 * POSIX doesn't allow lists that aren't in increasing order or 147 * overlapping lists. We also handle "-3-5" although there's no 148 * real reason too. 149 */ 150 for (; (p = strsep(&list, ", \t")) != NULL;) { 151 setautostart = start = stop = 0; 152 if (*p == '-') { 153 ++p; 154 setautostart = 1; 155 } 156 if (isdigit((unsigned char)*p)) { 157 start = stop = strtol(p, &p, 10); 158 if (setautostart && start > autostart) 159 autostart = start; 160 } 161 if (*p == '-') { 162 if (isdigit((unsigned char)p[1])) 163 stop = strtol(p + 1, &p, 10); 164 if (*p == '-') { 165 ++p; 166 if (!autostop || autostop > stop) 167 autostop = stop; 168 } 169 } 170 if (*p) 171 errx(1, "[-cf] list: illegal list value"); 172 if (!stop || !start) 173 errx(1, "[-cf] list: values may not include zero"); 174 if (stop > _POSIX2_LINE_MAX) 175 errx(1, "[-cf] list: %ld too large (max %d)", 176 (long)stop, _POSIX2_LINE_MAX); 177 if (maxval < stop) 178 maxval = stop; 179 for (pos = positions + start; start++ <= stop; *pos++ = 1); 180 } 181 182 /* overlapping ranges */ 183 if (autostop && maxval > autostop) 184 maxval = autostop; 185 186 /* set autostart */ 187 if (autostart) 188 memset(positions + 1, '1', autostart); 189 } 190 191 /* ARGSUSED */ 192 void 193 c_cut(fp, fname) 194 FILE *fp; 195 const char *fname; 196 { 197 int ch, col; 198 char *pos; 199 fname = NULL; 200 201 ch = 0; 202 for (;;) { 203 pos = positions + 1; 204 for (col = maxval; col; --col) { 205 if ((ch = getc(fp)) == EOF) 206 return; 207 if (ch == '\n') 208 break; 209 if (*pos++) 210 (void)putchar(ch); 211 } 212 if (ch != '\n') { 213 if (autostop) 214 while ((ch = getc(fp)) != EOF && ch != '\n') 215 (void)putchar(ch); 216 else 217 while ((ch = getc(fp)) != EOF && ch != '\n'); 218 } 219 (void)putchar('\n'); 220 } 221 } 222 223 void 224 f_cut(fp, fname) 225 FILE *fp; 226 const char *fname __unused; 227 { 228 int ch, field, isdelim; 229 char *pos, *p, sep; 230 int output; 231 char *lbuf, *mlbuf = NULL; 232 size_t lbuflen; 233 234 for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) { 235 /* Assert EOL has a newline. */ 236 if (*(lbuf + lbuflen - 1) != '\n') { 237 /* Can't have > 1 line with no trailing newline. */ 238 mlbuf = malloc(lbuflen + 1); 239 if (mlbuf == NULL) 240 err(1, "malloc"); 241 memcpy(mlbuf, lbuf, lbuflen); 242 *(mlbuf + lbuflen) = '\n'; 243 lbuf = mlbuf; 244 } 245 output = 0; 246 for (isdelim = 0, p = lbuf;; ++p) { 247 ch = *p; 248 /* this should work if newline is delimiter */ 249 if (ch == sep) 250 isdelim = 1; 251 if (ch == '\n') { 252 if (!isdelim && !sflag) 253 (void)fwrite(lbuf, lbuflen, 1, stdout); 254 break; 255 } 256 } 257 if (!isdelim) 258 continue; 259 260 pos = positions + 1; 261 for (field = maxval, p = lbuf; field; --field, ++pos) { 262 if (*pos) { 263 if (output++) 264 (void)putchar(sep); 265 while ((ch = *p++) != '\n' && ch != sep) 266 (void)putchar(ch); 267 } else { 268 while ((ch = *p++) != '\n' && ch != sep) 269 continue; 270 } 271 if (ch == '\n') 272 break; 273 } 274 if (ch != '\n') { 275 if (autostop) { 276 if (output) 277 (void)putchar(sep); 278 for (; (ch = *p) != '\n'; ++p) 279 (void)putchar(ch); 280 } else 281 for (; (ch = *p) != '\n'; ++p); 282 } 283 (void)putchar('\n'); 284 } 285 if (mlbuf != NULL) 286 free(mlbuf); 287 } 288 289 static void 290 usage() 291 { 292 (void)fprintf(stderr, "%s\n%s\n%s\n", 293 "usage: cut -b list [-n] [file ...]", 294 " cut -c list [file ...]", 295 " cut -f list [-s] [-d delim] [file ...]"); 296 exit(1); 297 } 298