1 /* $NetBSD: sort.c,v 1.26 2001/04/30 00:25:09 ross Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 /* Sort sorts a file using an optional user-defined key. 40 * Sort uses radix sort for internal sorting, and allows 41 * a choice of merge sort and radix sort for external sorting. 42 */ 43 44 #include "sort.h" 45 #include "fsort.h" 46 #include "pathnames.h" 47 48 #ifndef lint 49 __COPYRIGHT("@(#) Copyright (c) 1993\nThe Regents of the University of California. All rights reserved.\n"); 50 #endif /* not lint */ 51 52 #ifndef lint 53 #if 0 54 __RCSID("$NetBSD: sort.c,v 1.26 2001/04/30 00:25:09 ross Exp $"); 55 __SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93"); 56 #endif 57 #endif /* not lint */ 58 59 #include <sys/cdefs.h> 60 __FBSDID("$FreeBSD$"); 61 62 #include <sys/types.h> 63 #include <sys/time.h> 64 #include <sys/resource.h> 65 66 #include <paths.h> 67 #include <signal.h> 68 #include <stdlib.h> 69 #include <string.h> 70 #include <unistd.h> 71 #include <locale.h> 72 73 int REC_D = '\n'; 74 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 75 /* 76 * weight tables. Gweights is one of ascii, Rascii.. 77 * modified to weight rec_d = 0 (or 255) 78 */ 79 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 80 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 81 struct coldesc clist[(ND+1)*2]; 82 int ncols = 0; 83 extern struct coldesc clist[(ND+1)*2]; 84 extern int ncols; 85 86 /* 87 * Default to stable sort. 88 */ 89 int stable_sort = 1; 90 91 char toutpath[MAXPATHLEN]; 92 93 const char *tmpdir; /* where temporary files should be put */ 94 95 static void cleanup __P((void)); 96 static void onsignal __P((int)); 97 static void usage __P((const char *)); 98 static void many_files __P((void)); 99 100 int main __P((int argc, char **argv)); 101 102 int 103 main(argc, argv) 104 int argc; 105 char *argv[]; 106 { 107 get_func_t get; 108 int ch, i, stdinflag = 0, tmp = 0; 109 char cflag = 0, mflag = 0; 110 char *outfile, *outpath = 0; 111 struct field fldtab[ND+2], *ftpos; 112 struct filelist filelist; 113 FILE *outfp = NULL; 114 115 setlocale(LC_ALL, ""); 116 117 memset(fldtab, 0, (ND+2)*sizeof(struct field)); 118 memset(d_mask, 0, NBINS); 119 d_mask[REC_D = '\n'] = REC_D_F; 120 SINGL_FLD = SEP_FLAG = 0; 121 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 122 ftpos = fldtab; 123 many_files(); 124 125 fixit(&argc, argv); 126 if (!(tmpdir = getenv("TMPDIR"))) 127 tmpdir = _PATH_TMP; 128 129 while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:sSt:T:ux")) != -1) { 130 switch (ch) { 131 case 'b': 132 fldtab->flags |= BI | BT; 133 break; 134 case 'c': 135 cflag = 1; 136 break; 137 case 'd': case 'f': case 'i': case 'n': case 'r': 138 tmp |= optval(ch, 0); 139 if ((tmp & R) && (tmp & F)) 140 fldtab->weights = RFtable; 141 else if (tmp & F) 142 fldtab->weights = Ftable; 143 else if (tmp & R) 144 fldtab->weights = Rascii; 145 fldtab->flags |= tmp; 146 break; 147 case 'H': 148 PANIC = 0; 149 break; 150 case 'k': 151 setfield(optarg, ++ftpos, fldtab->flags); 152 break; 153 case 'm': 154 mflag = 1; 155 break; 156 case 'o': 157 outpath = optarg; 158 break; 159 case 's': 160 /* for GNU sort compatibility (this is our default) */ 161 stable_sort = 1; 162 break; 163 case 'S': 164 stable_sort = 0; 165 break; 166 case 't': 167 if (SEP_FLAG) 168 usage("multiple field delimiters"); 169 SEP_FLAG = 1; 170 d_mask[' '] &= ~FLD_D; 171 d_mask['\t'] &= ~FLD_D; 172 d_mask[(u_char)*optarg] |= FLD_D; 173 if (d_mask[(u_char)*optarg] & REC_D_F) 174 errx(2, "record/field delimiter clash"); 175 break; 176 case 'R': 177 if (REC_D != '\n') 178 usage("multiple record delimiters"); 179 if ('\n' == (REC_D = *optarg)) 180 break; 181 d_mask['\n'] = d_mask[' ']; 182 d_mask[REC_D] = REC_D_F; 183 break; 184 case 'T': 185 /* -T tmpdir */ 186 tmpdir = optarg; 187 break; 188 case 'u': 189 UNIQUE = 1; 190 break; 191 case '?': 192 default: 193 usage(NULL); 194 } 195 } 196 if (cflag && argc > optind+1) 197 errx(2, "too many input files for -c option"); 198 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 199 outpath = argv[argc-1]; 200 argc -= 2; 201 } 202 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 203 errx(2, "too many input files for -m option"); 204 for (i = optind; i < argc; i++) { 205 /* allow one occurrence of /dev/stdin */ 206 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 207 if (stdinflag) 208 warnx("ignoring extra \"%s\" in file list", 209 argv[i]); 210 else 211 stdinflag = 1; 212 213 /* change to /dev/stdin if '-' */ 214 if (argv[i][0] == '-') 215 argv[i] = strdup(_PATH_STDIN); 216 217 } else if ((ch = access(argv[i], R_OK))) 218 err(2, "%s", argv[i]); 219 } 220 if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { 221 SINGL_FLD = 1; 222 fldtab[0].icol.num = 1; 223 } else { 224 if (!fldtab[1].icol.num) { 225 fldtab[0].flags &= ~(BI|BT); 226 setfield("1", ++ftpos, fldtab->flags); 227 } 228 fldreset(fldtab); 229 fldtab[0].flags &= ~F; 230 } 231 settables(fldtab[0].flags); 232 num_init(); 233 fldtab->weights = gweights; 234 if (optind == argc) { 235 static const char * const names[] = { _PATH_STDIN, NULL }; 236 237 filelist.names = names; 238 optind--; 239 } else 240 filelist.names = (const char * const *) &argv[optind]; 241 242 if (SINGL_FLD) 243 get = makeline; 244 else 245 get = makekey; 246 247 if (cflag) { 248 order(&filelist, get, fldtab); 249 /* NOT REACHED */ 250 } 251 if (!outpath) { 252 (void)snprintf(toutpath, 253 sizeof(toutpath), "%sstdout", _PATH_DEV); 254 outfile = outpath = toutpath; 255 outfp = stdout; 256 } else if (!(ch = access(outpath, 0)) && 257 strncmp(_PATH_DEV, outpath, 5)) { 258 static const struct sigaction act = 259 { { onsignal }, SA_RESTART | SA_RESETHAND, { { 0 } } }; 260 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 261 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 262 int outfd; 263 errno = 0; 264 if (access(outpath, W_OK)) 265 err(2, "%s", outpath); 266 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 267 outpath); 268 if ((outfd = mkstemp(toutpath)) == -1) 269 err(2, "Cannot create temporary file `%s'", toutpath); 270 if ((outfp = fdopen(outfd, "w")) == NULL) 271 err(2, "Cannot open temporary file `%s'", toutpath); 272 outfile = toutpath; 273 (void)atexit(cleanup); 274 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 275 sigaction(sigtable[i], &act, 0); 276 } else 277 outfile = outpath; 278 279 if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL) 280 err(2, "output file %s", outfile); 281 282 if (mflag) { 283 fmerge(-1, 0, &filelist, argc-optind, get, outfp, putline, 284 fldtab); 285 } else 286 fsort(-1, 0, 0, &filelist, argc-optind, outfp, fldtab); 287 288 if (outfile != outpath) { 289 if (access(outfile, 0)) 290 err(2, "%s", outfile); 291 (void)unlink(outpath); 292 if (link(outfile, outpath)) 293 err(2, "cannot link %s: output left in %s", 294 outpath, outfile); 295 (void)unlink(outfile); 296 } 297 exit(0); 298 } 299 300 static void 301 onsignal(sig) 302 int sig __unused; 303 { 304 cleanup(); 305 } 306 307 static void 308 cleanup() 309 { 310 if (toutpath[0]) 311 (void)unlink(toutpath); 312 } 313 314 static void 315 usage(msg) 316 const char *msg; 317 { 318 if (msg != NULL) 319 (void)fprintf(stderr, "sort: %s\n", msg); 320 (void)fprintf(stderr, "usage: [-o output] [-cmubdfinrsS] [-t char] "); 321 (void)fprintf(stderr, "[-R char] [-k keydef] ... [files]\n"); 322 exit(2); 323 } 324 325 static void 326 many_files() 327 { 328 #if 0 329 struct rlimit rlp_many_files[1]; 330 331 if (getrlimit(RLIMIT_NOFILE, rlp_many_files) == 0) { 332 rlp_many_files->rlim_cur = rlp_many_files->rlim_max; 333 setrlimit(RLIMIT_NOFILE, rlp_many_files); 334 } 335 #endif 336 } 337