1 /* $NetBSD: sort.c,v 1.26 2001/04/30 00:25:09 ross Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 /* Sort sorts a file using an optional user-defined key. 40 * Sort uses radix sort for internal sorting, and allows 41 * a choice of merge sort and radix sort for external sorting. 42 */ 43 44 #include "sort.h" 45 #include "fsort.h" 46 #include "pathnames.h" 47 48 #ifndef lint 49 __COPYRIGHT("@(#) Copyright (c) 1993\n\ 50 The Regents of the University of California. All rights reserved.\n"); 51 #endif /* not lint */ 52 53 #ifndef lint 54 __RCSID("$NetBSD: sort.c,v 1.26 2001/04/30 00:25:09 ross Exp $"); 55 __SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93"); 56 #endif /* not lint */ 57 58 #include <sys/types.h> 59 #include <sys/time.h> 60 #include <sys/resource.h> 61 62 #include <paths.h> 63 #include <signal.h> 64 #include <stdlib.h> 65 #include <string.h> 66 #include <unistd.h> 67 #include <locale.h> 68 69 int REC_D = '\n'; 70 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 71 /* 72 * weight tables. Gweights is one of ascii, Rascii.. 73 * modified to weight rec_d = 0 (or 255) 74 */ 75 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 76 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 77 struct coldesc clist[(ND+1)*2]; 78 int ncols = 0; 79 extern struct coldesc clist[(ND+1)*2]; 80 extern int ncols; 81 82 /* 83 * Default to stable sort. 84 */ 85 int stable_sort = 1; 86 87 char toutpath[MAXPATHLEN]; 88 89 const char *tmpdir; /* where temporary files should be put */ 90 91 static void cleanup __P((void)); 92 static void onsignal __P((int)); 93 static void usage __P((const char *)); 94 static void many_files __P((void)); 95 96 int main __P((int argc, char **argv)); 97 98 int 99 main(argc, argv) 100 int argc; 101 char *argv[]; 102 { 103 get_func_t get; 104 int ch, i, stdinflag = 0, tmp = 0; 105 char cflag = 0, mflag = 0; 106 char *outfile, *outpath = 0; 107 struct field fldtab[ND+2], *ftpos; 108 struct filelist filelist; 109 FILE *outfp = NULL; 110 111 setlocale(LC_ALL, ""); 112 113 memset(fldtab, 0, (ND+2)*sizeof(struct field)); 114 memset(d_mask, 0, NBINS); 115 d_mask[REC_D = '\n'] = REC_D_F; 116 SINGL_FLD = SEP_FLAG = 0; 117 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 118 ftpos = fldtab; 119 many_files(); 120 121 fixit(&argc, argv); 122 if (!(tmpdir = getenv("TMPDIR"))) 123 tmpdir = _PATH_TMP; 124 125 while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:sSt:T:ux")) != -1) { 126 switch (ch) { 127 case 'b': 128 fldtab->flags |= BI | BT; 129 break; 130 case 'c': 131 cflag = 1; 132 break; 133 case 'd': case 'f': case 'i': case 'n': case 'r': 134 tmp |= optval(ch, 0); 135 if ((tmp & R) && (tmp & F)) 136 fldtab->weights = RFtable; 137 else if (tmp & F) 138 fldtab->weights = Ftable; 139 else if (tmp & R) 140 fldtab->weights = Rascii; 141 fldtab->flags |= tmp; 142 break; 143 case 'H': 144 PANIC = 0; 145 break; 146 case 'k': 147 setfield(optarg, ++ftpos, fldtab->flags); 148 break; 149 case 'm': 150 mflag = 1; 151 break; 152 case 'o': 153 outpath = optarg; 154 break; 155 case 's': 156 /* for GNU sort compatibility (this is our default) */ 157 stable_sort = 1; 158 break; 159 case 'S': 160 stable_sort = 0; 161 break; 162 case 't': 163 if (SEP_FLAG) 164 usage("multiple field delimiters"); 165 SEP_FLAG = 1; 166 d_mask[' '] &= ~FLD_D; 167 d_mask['\t'] &= ~FLD_D; 168 d_mask[(u_char)*optarg] |= FLD_D; 169 if (d_mask[(u_char)*optarg] & REC_D_F) 170 errx(2, "record/field delimiter clash"); 171 break; 172 case 'R': 173 if (REC_D != '\n') 174 usage("multiple record delimiters"); 175 if ('\n' == (REC_D = *optarg)) 176 break; 177 d_mask['\n'] = d_mask[' ']; 178 d_mask[REC_D] = REC_D_F; 179 break; 180 case 'T': 181 /* -T tmpdir */ 182 tmpdir = optarg; 183 break; 184 case 'u': 185 UNIQUE = 1; 186 break; 187 case '?': 188 default: 189 usage(NULL); 190 } 191 } 192 if (cflag && argc > optind+1) 193 errx(2, "too many input files for -c option"); 194 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 195 outpath = argv[argc-1]; 196 argc -= 2; 197 } 198 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 199 errx(2, "too many input files for -m option"); 200 for (i = optind; i < argc; i++) { 201 /* allow one occurrence of /dev/stdin */ 202 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 203 if (stdinflag) 204 warnx("ignoring extra \"%s\" in file list", 205 argv[i]); 206 else 207 stdinflag = 1; 208 209 /* change to /dev/stdin if '-' */ 210 if (argv[i][0] == '-') 211 argv[i] = _PATH_STDIN; 212 213 } else if ((ch = access(argv[i], R_OK))) 214 err(2, "%s", argv[i]); 215 } 216 if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { 217 SINGL_FLD = 1; 218 fldtab[0].icol.num = 1; 219 } else { 220 if (!fldtab[1].icol.num) { 221 fldtab[0].flags &= ~(BI|BT); 222 setfield("1", ++ftpos, fldtab->flags); 223 } 224 fldreset(fldtab); 225 fldtab[0].flags &= ~F; 226 } 227 settables(fldtab[0].flags); 228 num_init(); 229 fldtab->weights = gweights; 230 if (optind == argc) { 231 static const char * const names[] = { _PATH_STDIN, NULL }; 232 233 filelist.names = names; 234 optind--; 235 } else 236 filelist.names = (const char * const *) &argv[optind]; 237 238 if (SINGL_FLD) 239 get = makeline; 240 else 241 get = makekey; 242 243 if (cflag) { 244 order(&filelist, get, fldtab); 245 /* NOT REACHED */ 246 } 247 if (!outpath) { 248 (void)snprintf(toutpath, 249 sizeof(toutpath), "%sstdout", _PATH_DEV); 250 outfile = outpath = toutpath; 251 outfp = stdout; 252 } else if (!(ch = access(outpath, 0)) && 253 strncmp(_PATH_DEV, outpath, 5)) { 254 static const struct sigaction act = 255 { onsignal, {{0}}, SA_RESTART | SA_RESETHAND }; 256 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 257 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 258 int outfd; 259 errno = 0; 260 if (access(outpath, W_OK)) 261 err(2, "%s", outpath); 262 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 263 outpath); 264 if ((outfd = mkstemp(toutpath)) == -1) 265 err(2, "Cannot create temporary file `%s'", toutpath); 266 if ((outfp = fdopen(outfd, "w")) == NULL) 267 err(2, "Cannot open temporary file `%s'", toutpath); 268 outfile = toutpath; 269 (void)atexit(cleanup); 270 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 271 sigaction(sigtable[i], &act, 0); 272 } else 273 outfile = outpath; 274 275 if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL) 276 err(2, "output file %s", outfile); 277 278 if (mflag) { 279 fmerge(-1, 0, &filelist, argc-optind, get, outfp, putline, 280 fldtab); 281 } else 282 fsort(-1, 0, 0, &filelist, argc-optind, outfp, fldtab); 283 284 if (outfile != outpath) { 285 if (access(outfile, 0)) 286 err(2, "%s", outfile); 287 (void)unlink(outpath); 288 if (link(outfile, outpath)) 289 err(2, "cannot link %s: output left in %s", 290 outpath, outfile); 291 (void)unlink(outfile); 292 } 293 exit(0); 294 } 295 296 static void 297 onsignal(sig) 298 int sig; 299 { 300 cleanup(); 301 } 302 303 static void 304 cleanup() 305 { 306 if (toutpath[0]) 307 (void)unlink(toutpath); 308 } 309 310 static void 311 usage(msg) 312 const char *msg; 313 { 314 if (msg != NULL) 315 (void)fprintf(stderr, "sort: %s\n", msg); 316 (void)fprintf(stderr, "usage: [-o output] [-cmubdfinrsS] [-t char] "); 317 (void)fprintf(stderr, "[-R char] [-k keydef] ... [files]\n"); 318 exit(2); 319 } 320 321 static void 322 many_files() 323 { 324 #if 0 325 struct rlimit rlp_many_files[1]; 326 327 if (getrlimit(RLIMIT_NOFILE, rlp_many_files) == 0) { 328 rlp_many_files->rlim_cur = rlp_many_files->rlim_max; 329 setrlimit(RLIMIT_NOFILE, rlp_many_files); 330 } 331 #endif 332 } 333