1 /* 2 * Copyright (c) 1987, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #ifndef lint 38 static const char copyright[] = 39 "@(#) Copyright (c) 1987, 1993, 1994\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41 #endif 42 43 #ifndef lint 44 static const char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94"; 45 #endif 46 47 #include <sys/param.h> 48 #include <sys/types.h> 49 #include <sys/stat.h> 50 51 #include <ctype.h> 52 #include <err.h> 53 #include <errno.h> 54 #include <fcntl.h> 55 #include <inttypes.h> 56 #include <limits.h> 57 #include <locale.h> 58 #include <stdint.h> 59 #include <stdio.h> 60 #include <stdlib.h> 61 #include <string.h> 62 #include <unistd.h> 63 #include <regex.h> 64 #include <sysexits.h> 65 66 #define DEFLINE 1000 /* Default num lines per file. */ 67 68 off_t bytecnt; /* Byte count to split on. */ 69 off_t chunks = 0; /* Chunks count to split into. */ 70 long numlines; /* Line count to split on. */ 71 int file_open; /* If a file open. */ 72 int ifd = -1, ofd = -1; /* Input/output file descriptors. */ 73 char bfr[MAXBSIZE]; /* I/O buffer. */ 74 char fname[MAXPATHLEN]; /* File name prefix. */ 75 regex_t rgx; 76 int pflag; 77 long sufflen = 2; /* File name suffix length. */ 78 79 static void newfile(void); 80 static void split1(void); 81 static void split2(void); 82 static void split3(void); 83 static void usage(void); 84 85 int 86 main(int argc, char **argv) 87 { 88 intmax_t bytecnti; 89 long scale; 90 int ch; 91 char *ep, *p; 92 93 setlocale(LC_ALL, ""); 94 95 while ((ch = getopt(argc, argv, "0123456789a:b:l:n:p:")) != -1) 96 switch (ch) { 97 case '0': case '1': case '2': case '3': case '4': 98 case '5': case '6': case '7': case '8': case '9': 99 /* 100 * Undocumented kludge: split was originally designed 101 * to take a number after a dash. 102 */ 103 if (numlines == 0) { 104 p = argv[optind - 1]; 105 if (p[0] == '-' && p[1] == ch && !p[2]) 106 numlines = strtol(++p, &ep, 10); 107 else 108 numlines = 109 strtol(argv[optind] + 1, &ep, 10); 110 if (numlines <= 0 || *ep) 111 errx(EX_USAGE, 112 "%s: illegal line count", optarg); 113 } 114 break; 115 case 'a': /* Suffix length */ 116 if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep) 117 errx(EX_USAGE, 118 "%s: illegal suffix length", optarg); 119 break; 120 case 'b': /* Byte count. */ 121 errno = 0; 122 if ((bytecnti = strtoimax(optarg, &ep, 10)) <= 0 || 123 strchr("kKmMgG", *ep) == NULL || errno != 0) 124 errx(EX_USAGE, 125 "%s: illegal byte count", optarg); 126 if (*ep == 'k' || *ep == 'K') 127 scale = 1024; 128 else if (*ep == 'm' || *ep == 'M') 129 scale = 1024 * 1024; 130 else if (*ep == 'g' || *ep == 'G') 131 scale = 1024 * 1024 * 1024; 132 else 133 scale = 1; 134 if (bytecnti > OFF_MAX / scale) 135 errx(EX_USAGE, "%s: offset too large", optarg); 136 bytecnt = (off_t)(bytecnti * scale); 137 break; 138 case 'l': /* Line count. */ 139 if (numlines != 0) 140 usage(); 141 if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep) 142 errx(EX_USAGE, 143 "%s: illegal line count", optarg); 144 break; 145 case 'n': /* Chunks. */ 146 if (!isdigit((unsigned char)optarg[0]) || 147 (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 || 148 *ep != '\0') { 149 errx(EX_USAGE, "%s: illegal number of chunks", 150 optarg); 151 } 152 break; 153 154 case 'p': /* pattern matching. */ 155 if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0) 156 errx(EX_USAGE, "%s: illegal regexp", optarg); 157 pflag = 1; 158 break; 159 default: 160 usage(); 161 } 162 argv += optind; 163 argc -= optind; 164 165 if (*argv != NULL) { /* Input file. */ 166 if (strcmp(*argv, "-") == 0) 167 ifd = STDIN_FILENO; 168 else if ((ifd = open(*argv, O_RDONLY, 0)) < 0) 169 err(EX_NOINPUT, "%s", *argv); 170 ++argv; 171 } 172 if (*argv != NULL) /* File name prefix. */ 173 if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname)) 174 errx(EX_USAGE, "file name prefix is too long"); 175 if (*argv != NULL) 176 usage(); 177 178 if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname)) 179 errx(EX_USAGE, "suffix is too long"); 180 if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0)) 181 usage(); 182 183 if (numlines == 0) 184 numlines = DEFLINE; 185 else if (bytecnt != 0 || chunks != 0) 186 usage(); 187 188 if (bytecnt && chunks) 189 usage(); 190 191 if (ifd == -1) /* Stdin by default. */ 192 ifd = 0; 193 194 if (bytecnt) { 195 split1(); 196 exit (0); 197 } else if (chunks) { 198 split3(); 199 exit (0); 200 } 201 split2(); 202 if (pflag) 203 regfree(&rgx); 204 exit(0); 205 } 206 207 /* 208 * split1 -- 209 * Split the input by bytes. 210 */ 211 static void 212 split1(void) 213 { 214 off_t bcnt; 215 char *C; 216 ssize_t dist, len; 217 int nfiles; 218 219 nfiles = 0; 220 221 for (bcnt = 0;;) 222 switch ((len = read(ifd, bfr, MAXBSIZE))) { 223 case 0: 224 exit(0); 225 case -1: 226 err(EX_IOERR, "read"); 227 /* NOTREACHED */ 228 default: 229 if (!file_open) { 230 if (!chunks || (nfiles < chunks)) { 231 newfile(); 232 nfiles++; 233 } 234 } 235 if (bcnt + len >= bytecnt) { 236 dist = bytecnt - bcnt; 237 if (write(ofd, bfr, dist) != dist) 238 err(EX_IOERR, "write"); 239 len -= dist; 240 for (C = bfr + dist; len >= bytecnt; 241 len -= bytecnt, C += bytecnt) { 242 if (!chunks || (nfiles < chunks)) { 243 newfile(); 244 nfiles++; 245 } 246 if (write(ofd, 247 C, bytecnt) != bytecnt) 248 err(EX_IOERR, "write"); 249 } 250 if (len != 0) { 251 if (!chunks || (nfiles < chunks)) { 252 newfile(); 253 nfiles++; 254 } 255 if (write(ofd, C, len) != len) 256 err(EX_IOERR, "write"); 257 } else 258 file_open = 0; 259 bcnt = len; 260 } else { 261 bcnt += len; 262 if (write(ofd, bfr, len) != len) 263 err(EX_IOERR, "write"); 264 } 265 } 266 } 267 268 /* 269 * split2 -- 270 * Split the input by lines. 271 */ 272 static void 273 split2(void) 274 { 275 long lcnt = 0; 276 FILE *infp; 277 278 /* Stick a stream on top of input file descriptor */ 279 if ((infp = fdopen(ifd, "r")) == NULL) 280 err(EX_NOINPUT, "fdopen"); 281 282 /* Process input one line at a time */ 283 while (fgets(bfr, sizeof(bfr), infp) != NULL) { 284 const int len = strlen(bfr); 285 286 /* If line is too long to deal with, just write it out */ 287 if (bfr[len - 1] != '\n') 288 goto writeit; 289 290 /* Check if we need to start a new file */ 291 if (pflag) { 292 regmatch_t pmatch; 293 294 pmatch.rm_so = 0; 295 pmatch.rm_eo = len - 1; 296 if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) 297 newfile(); 298 } else if (lcnt++ == numlines) { 299 newfile(); 300 lcnt = 1; 301 } 302 303 writeit: 304 /* Open output file if needed */ 305 if (!file_open) 306 newfile(); 307 308 /* Write out line */ 309 if (write(ofd, bfr, len) != len) 310 err(EX_IOERR, "write"); 311 } 312 313 /* EOF or error? */ 314 if (ferror(infp)) 315 err(EX_IOERR, "read"); 316 else 317 exit(0); 318 } 319 320 /* 321 * split3 -- 322 * Split the input into specified number of chunks 323 */ 324 static void 325 split3(void) 326 { 327 struct stat sb; 328 329 if (fstat(ifd, &sb) == -1) { 330 err(1, "stat"); 331 /* NOTREACHED */ 332 } 333 334 if (chunks > sb.st_size) { 335 errx(1, "can't split into more than %d files", 336 (int)sb.st_size); 337 /* NOTREACHED */ 338 } 339 340 bytecnt = sb.st_size / chunks; 341 split1(); 342 } 343 344 345 /* 346 * newfile -- 347 * Open a new output file. 348 */ 349 static void 350 newfile(void) 351 { 352 long i, maxfiles, tfnum; 353 static long fnum; 354 static int defname; 355 static char *fpnt; 356 357 if (ofd == -1) { 358 if (fname[0] == '\0') { 359 fname[0] = 'x'; 360 fpnt = fname + 1; 361 defname = 1; 362 } else { 363 fpnt = fname + strlen(fname); 364 defname = 0; 365 } 366 ofd = fileno(stdout); 367 } 368 369 /* maxfiles = 26^sufflen, but don't use libm. */ 370 for (maxfiles = 1, i = 0; i < sufflen; i++) 371 if ((maxfiles *= 26) <= 0) 372 errx(EX_USAGE, "suffix is too long (max %ld)", i); 373 374 if (fnum == maxfiles) 375 errx(EX_DATAERR, "too many files"); 376 377 /* Generate suffix of sufflen letters */ 378 tfnum = fnum; 379 i = sufflen - 1; 380 do { 381 fpnt[i] = tfnum % 26 + 'a'; 382 tfnum /= 26; 383 } while (i-- > 0); 384 fpnt[sufflen] = '\0'; 385 386 ++fnum; 387 if (!freopen(fname, "w", stdout)) 388 err(EX_IOERR, "%s", fname); 389 file_open = 1; 390 } 391 392 static void 393 usage(void) 394 { 395 (void)fprintf(stderr, 396 "usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n" 397 " split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n" 398 " split -n chunk_count [-a suffix_length] [file [prefix]]\n" 399 " split -p pattern [-a suffix_length] [file [prefix]]\n"); 400 exit(EX_USAGE); 401 } 402