1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #ifndef lint 38 static const char copyright[] = 39 "@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41 #endif /* not lint */ 42 43 #ifndef lint 44 static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 45 #endif /* not lint */ 46 47 #include <ctype.h> 48 #include <err.h> 49 #include <errno.h> 50 #include <limits.h> 51 #include <locale.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include <unistd.h> 56 57 int cflag; 58 char dchar; 59 int dflag; 60 int fflag; 61 int sflag; 62 63 void c_cut __P((FILE *, char *)); 64 void f_cut __P((FILE *, char *)); 65 void get_list __P((char *)); 66 int main __P((int, char **)); 67 static void usage __P((void)); 68 69 int 70 main(argc, argv) 71 int argc; 72 char *argv[]; 73 { 74 FILE *fp; 75 void (*fcn) __P((FILE *, char *)) = NULL; 76 int ch; 77 78 fcn = NULL; 79 setlocale (LC_ALL, ""); 80 81 dchar = '\t'; /* default delimiter is \t */ 82 83 /* Since we don't support multi-byte characters, the -c and -b 84 options are equivalent, and the -n option is meaningless. */ 85 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 86 switch(ch) { 87 case 'b': 88 case 'c': 89 fcn = c_cut; 90 get_list(optarg); 91 cflag = 1; 92 break; 93 case 'd': 94 dchar = *optarg; 95 dflag = 1; 96 break; 97 case 'f': 98 get_list(optarg); 99 fcn = f_cut; 100 fflag = 1; 101 break; 102 case 's': 103 sflag = 1; 104 break; 105 case 'n': 106 break; 107 case '?': 108 default: 109 usage(); 110 } 111 argc -= optind; 112 argv += optind; 113 114 if (fflag) { 115 if (cflag) 116 usage(); 117 } else if (!cflag || dflag || sflag) 118 usage(); 119 120 if (*argv) 121 for (; *argv; ++argv) { 122 if (!(fp = fopen(*argv, "r"))) 123 err(1, "%s", *argv); 124 fcn(fp, *argv); 125 (void)fclose(fp); 126 } 127 else 128 fcn(stdin, "stdin"); 129 exit(0); 130 } 131 132 int autostart, autostop, maxval; 133 134 char positions[_POSIX2_LINE_MAX + 1]; 135 136 void 137 get_list(list) 138 char *list; 139 { 140 int setautostart, start, stop; 141 char *pos; 142 char *p; 143 144 /* 145 * set a byte in the positions array to indicate if a field or 146 * column is to be selected; use +1, it's 1-based, not 0-based. 147 * This parser is less restrictive than the Draft 9 POSIX spec. 148 * POSIX doesn't allow lists that aren't in increasing order or 149 * overlapping lists. We also handle "-3-5" although there's no 150 * real reason too. 151 */ 152 for (; (p = strsep(&list, ", \t")) != NULL;) { 153 setautostart = start = stop = 0; 154 if (*p == '-') { 155 ++p; 156 setautostart = 1; 157 } 158 if (isdigit((unsigned char)*p)) { 159 start = stop = strtol(p, &p, 10); 160 if (setautostart && start > autostart) 161 autostart = start; 162 } 163 if (*p == '-') { 164 if (isdigit((unsigned char)p[1])) 165 stop = strtol(p + 1, &p, 10); 166 if (*p == '-') { 167 ++p; 168 if (!autostop || autostop > stop) 169 autostop = stop; 170 } 171 } 172 if (*p) 173 errx(1, "[-cf] list: illegal list value"); 174 if (!stop || !start) 175 errx(1, "[-cf] list: values may not include zero"); 176 if (stop > _POSIX2_LINE_MAX) 177 errx(1, "[-cf] list: %d too large (max %d)", 178 stop, _POSIX2_LINE_MAX); 179 if (maxval < stop) 180 maxval = stop; 181 for (pos = positions + start; start++ <= stop; *pos++ = 1); 182 } 183 184 /* overlapping ranges */ 185 if (autostop && maxval > autostop) 186 maxval = autostop; 187 188 /* set autostart */ 189 if (autostart) 190 memset(positions + 1, '1', autostart); 191 } 192 193 /* ARGSUSED */ 194 void 195 c_cut(fp, fname) 196 FILE *fp; 197 char *fname; 198 { 199 int ch, col; 200 char *pos; 201 202 ch = 0; 203 for (;;) { 204 pos = positions + 1; 205 for (col = maxval; col; --col) { 206 if ((ch = getc(fp)) == EOF) 207 return; 208 if (ch == '\n') 209 break; 210 if (*pos++) 211 (void)putchar(ch); 212 } 213 if (ch != '\n') { 214 if (autostop) 215 while ((ch = getc(fp)) != EOF && ch != '\n') 216 (void)putchar(ch); 217 else 218 while ((ch = getc(fp)) != EOF && ch != '\n'); 219 } 220 (void)putchar('\n'); 221 } 222 } 223 224 void 225 f_cut(fp, fname) 226 FILE *fp; 227 char *fname; 228 { 229 int ch, field, isdelim; 230 char *pos, *p, sep; 231 int output; 232 char lbuf[_POSIX2_LINE_MAX + 1]; 233 234 for (sep = dchar; fgets(lbuf, sizeof(lbuf), fp);) { 235 output = 0; 236 for (isdelim = 0, p = lbuf;; ++p) { 237 if (!(ch = *p)) 238 errx(1, "%s: line too long.", fname); 239 /* this should work if newline is delimiter */ 240 if (ch == sep) 241 isdelim = 1; 242 if (ch == '\n') { 243 if (!isdelim && !sflag) 244 (void)printf("%s", lbuf); 245 break; 246 } 247 } 248 if (!isdelim) 249 continue; 250 251 pos = positions + 1; 252 for (field = maxval, p = lbuf; field; --field, ++pos) { 253 if (*pos) { 254 if (output++) 255 (void)putchar(sep); 256 while ((ch = *p++) != '\n' && ch != sep) 257 (void)putchar(ch); 258 } else { 259 while ((ch = *p++) != '\n' && ch != sep) 260 continue; 261 } 262 if (ch == '\n') 263 break; 264 } 265 if (ch != '\n') { 266 if (autostop) { 267 if (output) 268 (void)putchar(sep); 269 for (; (ch = *p) != '\n'; ++p) 270 (void)putchar(ch); 271 } else 272 for (; (ch = *p) != '\n'; ++p); 273 } 274 (void)putchar('\n'); 275 } 276 } 277 278 static void 279 usage() 280 { 281 (void)fprintf(stderr, "%s\n%s\n%s\n", 282 "usage: cut -b list [-n] [file ...]", 283 " cut -c list [file ...]", 284 " cut -f list [-s] [-d delim] [file ...]"); 285 exit(1); 286 } 287