1 /* 2 * Copyright (c) 1987, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #ifndef lint 35 static const char copyright[] = 36 "@(#) Copyright (c) 1987, 1993, 1994\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38 #endif /* not lint */ 39 40 #ifndef lint 41 #if 0 42 static char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94"; 43 #else 44 static const char rcsid[] = 45 "$FreeBSD$"; 46 #endif 47 #endif /* not lint */ 48 49 #include <sys/param.h> 50 #include <sys/types.h> 51 52 #include <ctype.h> 53 #include <err.h> 54 #include <fcntl.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <unistd.h> 59 #include <regex.h> 60 #include <sysexits.h> 61 62 #define DEFLINE 1000 /* Default num lines per file. */ 63 64 size_t bytecnt; /* Byte count to split on. */ 65 long numlines; /* Line count to split on. */ 66 int file_open; /* If a file open. */ 67 int ifd = -1, ofd = -1; /* Input/output file descriptors. */ 68 char bfr[MAXBSIZE]; /* I/O buffer. */ 69 char fname[MAXPATHLEN]; /* File name prefix. */ 70 regex_t rgx; 71 int pflag; 72 73 void newfile __P((void)); 74 void split1 __P((void)); 75 void split2 __P((void)); 76 static void usage __P((void)); 77 78 int 79 main(argc, argv) 80 int argc; 81 char *argv[]; 82 { 83 int ch; 84 char *ep, *p; 85 86 while ((ch = getopt(argc, argv, "-0123456789b:l:p:")) != -1) 87 switch (ch) { 88 case '0': case '1': case '2': case '3': case '4': 89 case '5': case '6': case '7': case '8': case '9': 90 /* 91 * Undocumented kludge: split was originally designed 92 * to take a number after a dash. 93 */ 94 if (numlines == 0) { 95 p = argv[optind - 1]; 96 if (p[0] == '-' && p[1] == ch && !p[2]) 97 numlines = strtol(++p, &ep, 10); 98 else 99 numlines = 100 strtol(argv[optind] + 1, &ep, 10); 101 if (numlines <= 0 || *ep) 102 errx(EX_USAGE, 103 "%s: illegal line count", optarg); 104 } 105 break; 106 case '-': /* Undocumented: historic stdin flag. */ 107 if (ifd != -1) 108 usage(); 109 ifd = 0; 110 break; 111 case 'b': /* Byte count. */ 112 if ((bytecnt = strtoq(optarg, &ep, 10)) <= 0 || 113 (*ep != '\0' && *ep != 'k' && *ep != 'm')) 114 errx(EX_USAGE, 115 "%s: illegal byte count", optarg); 116 if (*ep == 'k') 117 bytecnt *= 1024; 118 else if (*ep == 'm') 119 bytecnt *= 1048576; 120 break; 121 case 'p' : /* pattern matching. */ 122 if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0) 123 errx(EX_USAGE, "%s: illegal regexp", optarg); 124 pflag = 1; 125 break; 126 case 'l': /* Line count. */ 127 if (numlines != 0) 128 usage(); 129 if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep) 130 errx(EX_USAGE, 131 "%s: illegal line count", optarg); 132 break; 133 default: 134 usage(); 135 } 136 argv += optind; 137 argc -= optind; 138 139 if (*argv != NULL) 140 if (ifd == -1) { /* Input file. */ 141 if ((ifd = open(*argv, O_RDONLY, 0)) < 0) 142 err(EX_NOINPUT, "%s", *argv); 143 ++argv; 144 } 145 if (*argv != NULL) /* File name prefix. */ 146 (void)strcpy(fname, *argv++); 147 if (*argv != NULL) 148 usage(); 149 150 if (pflag && (numlines != 0 || bytecnt != 0)) 151 usage(); 152 153 if (numlines == 0) 154 numlines = DEFLINE; 155 else if (bytecnt != 0) 156 usage(); 157 158 if (ifd == -1) /* Stdin by default. */ 159 ifd = 0; 160 161 if (bytecnt) { 162 split1(); 163 exit (0); 164 } 165 split2(); 166 if (pflag) 167 regfree(&rgx); 168 exit(0); 169 } 170 171 /* 172 * split1 -- 173 * Split the input by bytes. 174 */ 175 void 176 split1() 177 { 178 size_t bcnt, dist, len; 179 char *C; 180 181 for (bcnt = 0;;) 182 switch ((len = read(ifd, bfr, MAXBSIZE))) { 183 case 0: 184 exit(0); 185 case -1: 186 err(EX_IOERR, "read"); 187 /* NOTREACHED */ 188 default: 189 if (!file_open) 190 newfile(); 191 if (bcnt + len >= bytecnt) { 192 dist = bytecnt - bcnt; 193 if (write(ofd, bfr, dist) != dist) 194 err(EX_IOERR, "write"); 195 len -= dist; 196 for (C = bfr + dist; len >= bytecnt; 197 len -= bytecnt, C += bytecnt) { 198 newfile(); 199 if (write(ofd, 200 C, (int)bytecnt) != bytecnt) 201 err(EX_IOERR, "write"); 202 } 203 if (len != 0) { 204 newfile(); 205 if (write(ofd, C, len) != len) 206 err(EX_IOERR, "write"); 207 } else 208 file_open = 0; 209 bcnt = len; 210 } else { 211 bcnt += len; 212 if (write(ofd, bfr, len) != len) 213 err(EX_IOERR, "write"); 214 } 215 } 216 } 217 218 /* 219 * split2 -- 220 * Split the input by lines. 221 */ 222 void 223 split2() 224 { 225 long lcnt = 0; 226 FILE *infp; 227 228 /* Stick a stream on top of input file descriptor */ 229 if ((infp = fdopen(ifd, "r")) == NULL) 230 err(EX_NOINPUT, "fdopen"); 231 232 /* Process input one line at a time */ 233 while (fgets(bfr, sizeof(bfr), infp) != NULL) { 234 const int len = strlen(bfr); 235 236 /* If line is too long to deal with, just write it out */ 237 if (bfr[len - 1] != '\n') 238 goto writeit; 239 240 /* Check if we need to start a new file */ 241 if (pflag) { 242 regmatch_t pmatch; 243 244 pmatch.rm_so = 0; 245 pmatch.rm_eo = len - 1; 246 if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) 247 newfile(); 248 } else if (lcnt++ == numlines) { 249 newfile(); 250 lcnt = 1; 251 } 252 253 writeit: 254 /* Open output file if needed */ 255 if (!file_open) 256 newfile(); 257 258 /* Write out line */ 259 if (write(ofd, bfr, len) != len) 260 err(EX_IOERR, "write"); 261 } 262 263 /* EOF or error? */ 264 if (ferror(infp)) 265 err(EX_IOERR, "read"); 266 else 267 exit(0); 268 } 269 270 /* 271 * newfile -- 272 * Open a new output file. 273 */ 274 void 275 newfile() 276 { 277 static long fnum; 278 static int defname; 279 static char *fpnt; 280 281 if (ofd == -1) { 282 if (fname[0] == '\0') { 283 fname[0] = 'x'; 284 fpnt = fname + 1; 285 defname = 1; 286 } else { 287 fpnt = fname + strlen(fname); 288 defname = 0; 289 } 290 ofd = fileno(stdout); 291 } 292 /* 293 * Hack to increase max files; original code wandered through 294 * magic characters. Maximum files is 3 * 26 * 26 == 2028 295 */ 296 #define MAXFILES 676 297 if (fnum == MAXFILES) { 298 if (!defname || fname[0] == 'z') 299 errx(EX_DATAERR, "too many files"); 300 ++fname[0]; 301 fnum = 0; 302 } 303 fpnt[0] = fnum / 26 + 'a'; 304 fpnt[1] = fnum % 26 + 'a'; 305 ++fnum; 306 if (!freopen(fname, "w", stdout)) 307 err(EX_IOERR, "%s", fname); 308 file_open = 1; 309 } 310 311 static void 312 usage() 313 { 314 (void)fprintf(stderr, 315 "usage: split [-b byte_count] [-l line_count] [-p pattern] [file [prefix]]\n"); 316 exit(EX_USAGE); 317 } 318