1 /* 2 * Copyright (c) 1987, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #ifndef lint 34 static const char copyright[] = 35 "@(#) Copyright (c) 1987, 1993, 1994\n\ 36 The Regents of the University of California. All rights reserved.\n"; 37 #endif 38 39 #ifndef lint 40 static const char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94"; 41 #endif 42 43 #include <sys/param.h> 44 #include <sys/types.h> 45 #include <sys/stat.h> 46 47 #include <ctype.h> 48 #include <err.h> 49 #include <errno.h> 50 #include <fcntl.h> 51 #include <inttypes.h> 52 #include <limits.h> 53 #include <locale.h> 54 #include <stdint.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <unistd.h> 59 #include <regex.h> 60 #include <sysexits.h> 61 62 #define DEFLINE 1000 /* Default num lines per file. */ 63 64 off_t bytecnt; /* Byte count to split on. */ 65 off_t chunks = 0; /* Chunks count to split into. */ 66 long numlines; /* Line count to split on. */ 67 int file_open; /* If a file open. */ 68 int ifd = -1, ofd = -1; /* Input/output file descriptors. */ 69 char bfr[MAXBSIZE]; /* I/O buffer. */ 70 char fname[MAXPATHLEN]; /* File name prefix. */ 71 regex_t rgx; 72 int pflag; 73 long sufflen = 2; /* File name suffix length. */ 74 75 static void newfile(void); 76 static void split1(void); 77 static void split2(void); 78 static void split3(void); 79 static void usage(void); 80 81 int 82 main(int argc, char **argv) 83 { 84 intmax_t bytecnti; 85 long scale; 86 int ch; 87 char *ep, *p; 88 89 setlocale(LC_ALL, ""); 90 91 while ((ch = getopt(argc, argv, "0123456789a:b:l:n:p:")) != -1) 92 switch (ch) { 93 case '0': case '1': case '2': case '3': case '4': 94 case '5': case '6': case '7': case '8': case '9': 95 /* 96 * Undocumented kludge: split was originally designed 97 * to take a number after a dash. 98 */ 99 if (numlines == 0) { 100 p = argv[optind - 1]; 101 if (p[0] == '-' && p[1] == ch && !p[2]) 102 numlines = strtol(++p, &ep, 10); 103 else 104 numlines = 105 strtol(argv[optind] + 1, &ep, 10); 106 if (numlines <= 0 || *ep) 107 errx(EX_USAGE, 108 "%s: illegal line count", optarg); 109 } 110 break; 111 case 'a': /* Suffix length */ 112 if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep) 113 errx(EX_USAGE, 114 "%s: illegal suffix length", optarg); 115 break; 116 case 'b': /* Byte count. */ 117 errno = 0; 118 if ((bytecnti = strtoimax(optarg, &ep, 10)) <= 0 || 119 strchr("kKmMgG", *ep) == NULL || errno != 0) 120 errx(EX_USAGE, 121 "%s: illegal byte count", optarg); 122 if (*ep == 'k' || *ep == 'K') 123 scale = 1024; 124 else if (*ep == 'm' || *ep == 'M') 125 scale = 1024 * 1024; 126 else if (*ep == 'g' || *ep == 'G') 127 scale = 1024 * 1024 * 1024; 128 else 129 scale = 1; 130 if (bytecnti > OFF_MAX / scale) 131 errx(EX_USAGE, "%s: offset too large", optarg); 132 bytecnt = (off_t)(bytecnti * scale); 133 break; 134 case 'l': /* Line count. */ 135 if (numlines != 0) 136 usage(); 137 if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep) 138 errx(EX_USAGE, 139 "%s: illegal line count", optarg); 140 break; 141 case 'n': /* Chunks. */ 142 if (!isdigit((unsigned char)optarg[0]) || 143 (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 || 144 *ep != '\0') { 145 errx(EX_USAGE, "%s: illegal number of chunks", 146 optarg); 147 } 148 break; 149 150 case 'p': /* pattern matching. */ 151 if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0) 152 errx(EX_USAGE, "%s: illegal regexp", optarg); 153 pflag = 1; 154 break; 155 default: 156 usage(); 157 } 158 argv += optind; 159 argc -= optind; 160 161 if (*argv != NULL) { /* Input file. */ 162 if (strcmp(*argv, "-") == 0) 163 ifd = STDIN_FILENO; 164 else if ((ifd = open(*argv, O_RDONLY, 0)) < 0) 165 err(EX_NOINPUT, "%s", *argv); 166 ++argv; 167 } 168 if (*argv != NULL) /* File name prefix. */ 169 if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname)) 170 errx(EX_USAGE, "file name prefix is too long"); 171 if (*argv != NULL) 172 usage(); 173 174 if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname)) 175 errx(EX_USAGE, "suffix is too long"); 176 if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0)) 177 usage(); 178 179 if (numlines == 0) 180 numlines = DEFLINE; 181 else if (bytecnt != 0 || chunks != 0) 182 usage(); 183 184 if (bytecnt && chunks) 185 usage(); 186 187 if (ifd == -1) /* Stdin by default. */ 188 ifd = 0; 189 190 if (bytecnt) { 191 split1(); 192 exit (0); 193 } else if (chunks) { 194 split3(); 195 exit (0); 196 } 197 split2(); 198 if (pflag) 199 regfree(&rgx); 200 exit(0); 201 } 202 203 /* 204 * split1 -- 205 * Split the input by bytes. 206 */ 207 static void 208 split1(void) 209 { 210 off_t bcnt; 211 char *C; 212 ssize_t dist, len; 213 int nfiles; 214 215 nfiles = 0; 216 217 for (bcnt = 0;;) 218 switch ((len = read(ifd, bfr, MAXBSIZE))) { 219 case 0: 220 exit(0); 221 case -1: 222 err(EX_IOERR, "read"); 223 /* NOTREACHED */ 224 default: 225 if (!file_open) { 226 if (!chunks || (nfiles < chunks)) { 227 newfile(); 228 nfiles++; 229 } 230 } 231 if (bcnt + len >= bytecnt) { 232 dist = bytecnt - bcnt; 233 if (write(ofd, bfr, dist) != dist) 234 err(EX_IOERR, "write"); 235 len -= dist; 236 for (C = bfr + dist; len >= bytecnt; 237 len -= bytecnt, C += bytecnt) { 238 if (!chunks || (nfiles < chunks)) { 239 newfile(); 240 nfiles++; 241 } 242 if (write(ofd, 243 C, bytecnt) != bytecnt) 244 err(EX_IOERR, "write"); 245 } 246 if (len != 0) { 247 if (!chunks || (nfiles < chunks)) { 248 newfile(); 249 nfiles++; 250 } 251 if (write(ofd, C, len) != len) 252 err(EX_IOERR, "write"); 253 } else 254 file_open = 0; 255 bcnt = len; 256 } else { 257 bcnt += len; 258 if (write(ofd, bfr, len) != len) 259 err(EX_IOERR, "write"); 260 } 261 } 262 } 263 264 /* 265 * split2 -- 266 * Split the input by lines. 267 */ 268 static void 269 split2(void) 270 { 271 long lcnt = 0; 272 FILE *infp; 273 274 /* Stick a stream on top of input file descriptor */ 275 if ((infp = fdopen(ifd, "r")) == NULL) 276 err(EX_NOINPUT, "fdopen"); 277 278 /* Process input one line at a time */ 279 while (fgets(bfr, sizeof(bfr), infp) != NULL) { 280 const int len = strlen(bfr); 281 282 /* If line is too long to deal with, just write it out */ 283 if (bfr[len - 1] != '\n') 284 goto writeit; 285 286 /* Check if we need to start a new file */ 287 if (pflag) { 288 regmatch_t pmatch; 289 290 pmatch.rm_so = 0; 291 pmatch.rm_eo = len - 1; 292 if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) 293 newfile(); 294 } else if (lcnt++ == numlines) { 295 newfile(); 296 lcnt = 1; 297 } 298 299 writeit: 300 /* Open output file if needed */ 301 if (!file_open) 302 newfile(); 303 304 /* Write out line */ 305 if (write(ofd, bfr, len) != len) 306 err(EX_IOERR, "write"); 307 } 308 309 /* EOF or error? */ 310 if (ferror(infp)) 311 err(EX_IOERR, "read"); 312 else 313 exit(0); 314 } 315 316 /* 317 * split3 -- 318 * Split the input into specified number of chunks 319 */ 320 static void 321 split3(void) 322 { 323 struct stat sb; 324 325 if (fstat(ifd, &sb) == -1) { 326 err(1, "stat"); 327 /* NOTREACHED */ 328 } 329 330 if (chunks > sb.st_size) { 331 errx(1, "can't split into more than %d files", 332 (int)sb.st_size); 333 /* NOTREACHED */ 334 } 335 336 bytecnt = sb.st_size / chunks; 337 split1(); 338 } 339 340 341 /* 342 * newfile -- 343 * Open a new output file. 344 */ 345 static void 346 newfile(void) 347 { 348 long i, maxfiles, tfnum; 349 static long fnum; 350 static int defname; 351 static char *fpnt; 352 353 if (ofd == -1) { 354 if (fname[0] == '\0') { 355 fname[0] = 'x'; 356 fpnt = fname + 1; 357 defname = 1; 358 } else { 359 fpnt = fname + strlen(fname); 360 defname = 0; 361 } 362 ofd = fileno(stdout); 363 } 364 365 /* maxfiles = 26^sufflen, but don't use libm. */ 366 for (maxfiles = 1, i = 0; i < sufflen; i++) 367 if ((maxfiles *= 26) <= 0) 368 errx(EX_USAGE, "suffix is too long (max %ld)", i); 369 370 if (fnum == maxfiles) 371 errx(EX_DATAERR, "too many files"); 372 373 /* Generate suffix of sufflen letters */ 374 tfnum = fnum; 375 i = sufflen - 1; 376 do { 377 fpnt[i] = tfnum % 26 + 'a'; 378 tfnum /= 26; 379 } while (i-- > 0); 380 fpnt[sufflen] = '\0'; 381 382 ++fnum; 383 if (!freopen(fname, "w", stdout)) 384 err(EX_IOERR, "%s", fname); 385 file_open = 1; 386 } 387 388 static void 389 usage(void) 390 { 391 (void)fprintf(stderr, 392 "usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n" 393 " split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n" 394 " split -n chunk_count [-a suffix_length] [file [prefix]]\n" 395 " split -p pattern [-a suffix_length] [file [prefix]]\n"); 396 exit(EX_USAGE); 397 } 398