1 /*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <oleg.moskalenko@citrix.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/stat.h> 32 #include <sys/sysctl.h> 33 #include <sys/types.h> 34 35 #include <err.h> 36 #include <errno.h> 37 #include <getopt.h> 38 #include <limits.h> 39 #include <locale.h> 40 #include <md5.h> 41 #include <regex.h> 42 #include <signal.h> 43 #include <stdbool.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <wchar.h> 49 #include <wctype.h> 50 51 #include "coll.h" 52 #include "file.h" 53 #include "sort.h" 54 55 #ifndef WITHOUT_NLS 56 #include <nl_types.h> 57 nl_catd catalog; 58 #endif 59 60 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz" 61 62 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random") 63 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024) 64 65 static bool need_random; 66 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE; 67 static const void *random_seed; 68 static size_t random_seed_size; 69 70 MD5_CTX md5_ctx; 71 72 /* 73 * Default messages to use when NLS is disabled or no catalogue 74 * is found. 75 */ 76 const char *nlsstr[] = { "", 77 /* 1*/"mutually exclusive flags", 78 /* 2*/"extra argument not allowed with -c", 79 /* 3*/"Unknown feature", 80 /* 4*/"Wrong memory buffer specification", 81 /* 5*/"0 field in key specs", 82 /* 6*/"0 column in key specs", 83 /* 7*/"Wrong file mode", 84 /* 8*/"Cannot open file for reading", 85 /* 9*/"Radix sort cannot be used with these sort options", 86 /*10*/"The chosen sort method cannot be used with stable and/or unique sort", 87 /*11*/"Invalid key position", 88 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] " 89 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " 90 "[-o outfile] [--batch-size size] [--files0-from file] " 91 "[--heapsort] [--mergesort] [--radixsort] [--qsort] " 92 "[--mmap] " 93 #if defined(SORT_THREADS) 94 "[--parallel thread_no] " 95 #endif 96 "[--human-numeric-sort] " 97 "[--version-sort] [--random-sort [--random-source file]] " 98 "[--compress-program program] [file ...]\n" }; 99 100 struct sort_opts sort_opts_vals; 101 102 bool debug_sort; 103 bool need_hint; 104 105 #if defined(SORT_THREADS) 106 size_t ncpu = 1; 107 size_t nthreads = 1; 108 #endif 109 110 static bool gnusort_numeric_compatibility; 111 112 static struct sort_mods default_sort_mods_object; 113 struct sort_mods * const default_sort_mods = &default_sort_mods_object; 114 115 static bool print_symbols_on_debug; 116 117 /* 118 * Arguments from file (when file0-from option is used: 119 */ 120 static size_t argc_from_file0 = (size_t)-1; 121 static char **argv_from_file0; 122 123 /* 124 * Placeholder symbols for options which have no single-character equivalent 125 */ 126 enum 127 { 128 SORT_OPT = CHAR_MAX + 1, 129 HELP_OPT, 130 FF_OPT, 131 BS_OPT, 132 VERSION_OPT, 133 DEBUG_OPT, 134 #if defined(SORT_THREADS) 135 PARALLEL_OPT, 136 #endif 137 RANDOMSOURCE_OPT, 138 COMPRESSPROGRAM_OPT, 139 QSORT_OPT, 140 MERGESORT_OPT, 141 HEAPSORT_OPT, 142 RADIXSORT_OPT, 143 MMAP_OPT 144 }; 145 146 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 147 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; 148 149 static struct option long_options[] = { 150 { "batch-size", required_argument, NULL, BS_OPT }, 151 { "buffer-size", required_argument, NULL, 'S' }, 152 { "check", optional_argument, NULL, 'c' }, 153 { "check=silent|quiet", optional_argument, NULL, 'C' }, 154 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, 155 { "debug", no_argument, NULL, DEBUG_OPT }, 156 { "dictionary-order", no_argument, NULL, 'd' }, 157 { "field-separator", required_argument, NULL, 't' }, 158 { "files0-from", required_argument, NULL, FF_OPT }, 159 { "general-numeric-sort", no_argument, NULL, 'g' }, 160 { "heapsort", no_argument, NULL, HEAPSORT_OPT }, 161 { "help",no_argument, NULL, HELP_OPT }, 162 { "human-numeric-sort", no_argument, NULL, 'h' }, 163 { "ignore-leading-blanks", no_argument, NULL, 'b' }, 164 { "ignore-case", no_argument, NULL, 'f' }, 165 { "ignore-nonprinting", no_argument, NULL, 'i' }, 166 { "key", required_argument, NULL, 'k' }, 167 { "merge", no_argument, NULL, 'm' }, 168 { "mergesort", no_argument, NULL, MERGESORT_OPT }, 169 { "mmap", no_argument, NULL, MMAP_OPT }, 170 { "month-sort", no_argument, NULL, 'M' }, 171 { "numeric-sort", no_argument, NULL, 'n' }, 172 { "output", required_argument, NULL, 'o' }, 173 #if defined(SORT_THREADS) 174 { "parallel", required_argument, NULL, PARALLEL_OPT }, 175 #endif 176 { "qsort", no_argument, NULL, QSORT_OPT }, 177 { "radixsort", no_argument, NULL, RADIXSORT_OPT }, 178 { "random-sort", no_argument, NULL, 'R' }, 179 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, 180 { "reverse", no_argument, NULL, 'r' }, 181 { "sort", required_argument, NULL, SORT_OPT }, 182 { "stable", no_argument, NULL, 's' }, 183 { "temporary-directory",required_argument, NULL, 'T' }, 184 { "unique", no_argument, NULL, 'u' }, 185 { "version", no_argument, NULL, VERSION_OPT }, 186 { "version-sort",no_argument, NULL, 'V' }, 187 { "zero-terminated", no_argument, NULL, 'z' }, 188 { NULL, no_argument, NULL, 0 } 189 }; 190 191 void fix_obsolete_keys(int *argc, char **argv); 192 193 /* 194 * Check where sort modifier is present 195 */ 196 static bool 197 sort_modifier_empty(struct sort_mods *sm) 198 { 199 200 if (sm == NULL) 201 return (true); 202 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || 203 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag)); 204 } 205 206 /* 207 * Print out usage text. 208 */ 209 static void 210 usage(bool opt_err) 211 { 212 struct option *o; 213 FILE *out; 214 215 out = stdout; 216 o = &(long_options[0]); 217 218 if (opt_err) 219 out = stderr; 220 fprintf(out, getstr(12), getprogname()); 221 if (opt_err) 222 exit(2); 223 exit(0); 224 } 225 226 /* 227 * Read input file names from a file (file0-from option). 228 */ 229 static void 230 read_fns_from_file0(const char *fn) 231 { 232 if (fn) { 233 struct file0_reader f0r; 234 FILE *f; 235 236 f = fopen(fn, "r"); 237 if (f == NULL) 238 err(2, NULL); 239 240 memset(&f0r, 0, sizeof(f0r)); 241 f0r.f = f; 242 243 while (!feof(f)) { 244 char *line = read_file0_line(&f0r); 245 246 if (line && *line) { 247 if (argc_from_file0 == (size_t)-1) 248 argc_from_file0 = 0; 249 ++argc_from_file0; 250 argv_from_file0 = sort_realloc(argv_from_file0, 251 argc_from_file0 * sizeof(char *)); 252 if (argv_from_file0 == NULL) 253 err(2, NULL); 254 argv_from_file0[argc_from_file0 - 1] = 255 sort_strdup(line); 256 } 257 } 258 closefile(f, fn); 259 } 260 } 261 262 /* 263 * Check how much RAM is available for the sort. 264 */ 265 static void 266 set_hw_params(void) 267 { 268 #if defined(SORT_THREADS) 269 size_t ncpusz; 270 #endif 271 size_t pages, psize, psz, pszsz; 272 273 pages = psize = 0; 274 #if defined(SORT_THREADS) 275 ncpu = 1; 276 ncpusz = sizeof(size_t); 277 #endif 278 psz = pszsz = sizeof(size_t); 279 280 if (sysctlbyname("vm.stats.vm.v_free_count", &pages, &psz, 281 NULL, 0) < 0) { 282 perror("vm.stats.vm.v_free_count"); 283 return; 284 } 285 if (sysctlbyname("vm.stats.vm.v_page_size", &psize, &pszsz, 286 NULL, 0) < 0) { 287 perror("vm.stats.vm.v_page_size"); 288 return; 289 } 290 #if defined(SORT_THREADS) 291 if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, 292 NULL, 0) < 0) 293 ncpu = 1; 294 else if(ncpu > 32) 295 ncpu = 32; 296 297 nthreads = ncpu; 298 #endif 299 300 free_memory = (unsigned long long) pages * (unsigned long long) psize; 301 available_free_memory = (free_memory * 9) / 10; 302 } 303 304 /* 305 * Convert "plain" symbol to wide symbol, with default value. 306 */ 307 static void 308 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) 309 { 310 311 if (wc && c) { 312 int res; 313 314 res = mbtowc(wc, c, MB_CUR_MAX); 315 if (res < 1) 316 *wc = def; 317 } 318 } 319 320 /* 321 * Set current locale symbols. 322 */ 323 static void 324 set_locale(void) 325 { 326 struct lconv *lc; 327 const char *locale; 328 329 setlocale(LC_ALL, ""); 330 331 lc = localeconv(); 332 333 if (lc) { 334 /* obtain LC_NUMERIC info */ 335 /* Convert to wide char form */ 336 conv_mbtowc(&symbol_decimal_point, lc->decimal_point, 337 symbol_decimal_point); 338 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, 339 symbol_thousands_sep); 340 conv_mbtowc(&symbol_positive_sign, lc->positive_sign, 341 symbol_positive_sign); 342 conv_mbtowc(&symbol_negative_sign, lc->negative_sign, 343 symbol_negative_sign); 344 } 345 346 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) 347 gnusort_numeric_compatibility = true; 348 349 locale = setlocale(LC_COLLATE, NULL); 350 351 if (locale) { 352 char *tmpl; 353 const char *cclocale; 354 355 tmpl = sort_strdup(locale); 356 cclocale = setlocale(LC_COLLATE, "C"); 357 if (cclocale && !strcmp(cclocale, tmpl)) 358 byte_sort = true; 359 else { 360 const char *pclocale; 361 362 pclocale = setlocale(LC_COLLATE, "POSIX"); 363 if (pclocale && !strcmp(pclocale, tmpl)) 364 byte_sort = true; 365 } 366 setlocale(LC_COLLATE, tmpl); 367 sort_free(tmpl); 368 } 369 } 370 371 /* 372 * Set directory temporary files. 373 */ 374 static void 375 set_tmpdir(void) 376 { 377 char *td; 378 379 td = getenv("TMPDIR"); 380 if (td != NULL) 381 tmpdir = sort_strdup(td); 382 } 383 384 /* 385 * Parse -S option. 386 */ 387 static unsigned long long 388 parse_memory_buffer_value(const char *value) 389 { 390 391 if (value == NULL) 392 return (available_free_memory); 393 else { 394 char *endptr; 395 unsigned long long membuf; 396 397 endptr = NULL; 398 errno = 0; 399 membuf = strtoll(value, &endptr, 10); 400 401 if (errno != 0) { 402 warn("%s",getstr(4)); 403 membuf = available_free_memory; 404 } else { 405 switch (*endptr){ 406 case 'Y': 407 membuf *= 1024; 408 /* FALLTHROUGH */ 409 case 'Z': 410 membuf *= 1024; 411 /* FALLTHROUGH */ 412 case 'E': 413 membuf *= 1024; 414 /* FALLTHROUGH */ 415 case 'P': 416 membuf *= 1024; 417 /* FALLTHROUGH */ 418 case 'T': 419 membuf *= 1024; 420 /* FALLTHROUGH */ 421 case 'G': 422 membuf *= 1024; 423 /* FALLTHROUGH */ 424 case 'M': 425 membuf *= 1024; 426 /* FALLTHROUGH */ 427 case '\0': 428 case 'K': 429 membuf *= 1024; 430 /* FALLTHROUGH */ 431 case 'b': 432 break; 433 case '%': 434 membuf = (available_free_memory * membuf) / 435 100; 436 break; 437 default: 438 fprintf(stderr, "%s: %s\n", strerror(EINVAL), 439 optarg); 440 membuf = available_free_memory; 441 } 442 } 443 return (membuf); 444 } 445 } 446 447 /* 448 * Signal handler that clears the temporary files. 449 */ 450 static void 451 sig_handler(int sig __unused, siginfo_t *siginfo __unused, 452 void *context __unused) 453 { 454 455 clear_tmp_files(); 456 exit(-1); 457 } 458 459 /* 460 * Set signal handler on panic signals. 461 */ 462 static void 463 set_signal_handler(void) 464 { 465 struct sigaction sa; 466 467 memset(&sa, 0, sizeof(sa)); 468 sa.sa_sigaction = &sig_handler; 469 sa.sa_flags = SA_SIGINFO; 470 471 if (sigaction(SIGTERM, &sa, NULL) < 0) { 472 perror("sigaction"); 473 return; 474 } 475 if (sigaction(SIGHUP, &sa, NULL) < 0) { 476 perror("sigaction"); 477 return; 478 } 479 if (sigaction(SIGINT, &sa, NULL) < 0) { 480 perror("sigaction"); 481 return; 482 } 483 if (sigaction(SIGQUIT, &sa, NULL) < 0) { 484 perror("sigaction"); 485 return; 486 } 487 if (sigaction(SIGABRT, &sa, NULL) < 0) { 488 perror("sigaction"); 489 return; 490 } 491 if (sigaction(SIGBUS, &sa, NULL) < 0) { 492 perror("sigaction"); 493 return; 494 } 495 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 496 perror("sigaction"); 497 return; 498 } 499 if (sigaction(SIGUSR1, &sa, NULL) < 0) { 500 perror("sigaction"); 501 return; 502 } 503 if (sigaction(SIGUSR2, &sa, NULL) < 0) { 504 perror("sigaction"); 505 return; 506 } 507 } 508 509 /* 510 * Print "unknown" message and exit with status 2. 511 */ 512 static void 513 unknown(const char *what) 514 { 515 516 errx(2, "%s: %s", getstr(3), what); 517 } 518 519 /* 520 * Check whether contradictory input options are used. 521 */ 522 static void 523 check_mutually_exclusive_flags(char c, bool *mef_flags) 524 { 525 int fo_index, mec; 526 bool found_others, found_this; 527 528 found_others = found_this =false; 529 fo_index = 0; 530 531 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { 532 mec = mutually_exclusive_flags[i]; 533 534 if (mec != c) { 535 if (mef_flags[i]) { 536 if (found_this) 537 errx(1, "%c:%c: %s", c, mec, getstr(1)); 538 found_others = true; 539 fo_index = i; 540 } 541 } else { 542 if (found_others) 543 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1)); 544 mef_flags[i] = true; 545 found_this = true; 546 } 547 } 548 } 549 550 /* 551 * Initialise sort opts data. 552 */ 553 static void 554 set_sort_opts(void) 555 { 556 557 memset(&default_sort_mods_object, 0, 558 sizeof(default_sort_mods_object)); 559 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); 560 default_sort_mods_object.func = 561 get_sort_func(&default_sort_mods_object); 562 } 563 564 /* 565 * Set a sort modifier on a sort modifiers object. 566 */ 567 static bool 568 set_sort_modifier(struct sort_mods *sm, int c) 569 { 570 571 if (sm) { 572 switch (c){ 573 case 'b': 574 sm->bflag = true; 575 break; 576 case 'd': 577 sm->dflag = true; 578 break; 579 case 'f': 580 sm->fflag = true; 581 break; 582 case 'g': 583 sm->gflag = true; 584 need_hint = true; 585 break; 586 case 'i': 587 sm->iflag = true; 588 break; 589 case 'R': 590 sm->Rflag = true; 591 need_random = true; 592 break; 593 case 'M': 594 initialise_months(); 595 sm->Mflag = true; 596 need_hint = true; 597 break; 598 case 'n': 599 sm->nflag = true; 600 need_hint = true; 601 print_symbols_on_debug = true; 602 break; 603 case 'r': 604 sm->rflag = true; 605 break; 606 case 'V': 607 sm->Vflag = true; 608 break; 609 case 'h': 610 sm->hflag = true; 611 need_hint = true; 612 print_symbols_on_debug = true; 613 break; 614 default: 615 return false; 616 } 617 sort_opts_vals.complex_sort = true; 618 sm->func = get_sort_func(sm); 619 } 620 return (true); 621 } 622 623 /* 624 * Parse POS in -k option. 625 */ 626 static int 627 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) 628 { 629 regmatch_t pmatch[4]; 630 regex_t re; 631 char *c, *f; 632 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; 633 size_t len, nmatch; 634 int ret; 635 636 ret = -1; 637 nmatch = 4; 638 c = f = NULL; 639 640 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 641 return (-1); 642 643 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 644 goto end; 645 646 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 647 goto end; 648 649 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 650 goto end; 651 652 len = pmatch[1].rm_eo - pmatch[1].rm_so; 653 f = sort_malloc((len + 1) * sizeof(char)); 654 655 strncpy(f, s + pmatch[1].rm_so, len); 656 f[len] = '\0'; 657 658 if (second) { 659 errno = 0; 660 ks->f2 = (size_t) strtoul(f, NULL, 10); 661 if (errno != 0) 662 errx(2, "%s: -k", strerror(errno)); 663 if (ks->f2 == 0) { 664 warn("%s",getstr(5)); 665 goto end; 666 } 667 } else { 668 errno = 0; 669 ks->f1 = (size_t) strtoul(f, NULL, 10); 670 if (errno != 0) 671 errx(2, "%s: -k", strerror(errno)); 672 if (ks->f1 == 0) { 673 warn("%s",getstr(5)); 674 goto end; 675 } 676 } 677 678 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 679 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 680 c = sort_malloc((len + 1) * sizeof(char)); 681 682 strncpy(c, s + pmatch[2].rm_so + 1, len); 683 c[len] = '\0'; 684 685 if (second) { 686 errno = 0; 687 ks->c2 = (size_t) strtoul(c, NULL, 10); 688 if (errno != 0) 689 errx(2, "%s: -k", strerror(errno)); 690 } else { 691 errno = 0; 692 ks->c1 = (size_t) strtoul(c, NULL, 10); 693 if (errno != 0) 694 errx(2, "%s: -k", strerror(errno)); 695 if (ks->c1 == 0) { 696 warn("%s",getstr(6)); 697 goto end; 698 } 699 } 700 } else { 701 if (second) 702 ks->c2 = 0; 703 else 704 ks->c1 = 1; 705 } 706 707 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 708 regoff_t i = 0; 709 710 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { 711 check_mutually_exclusive_flags(s[i], mef_flags); 712 if (s[i] == 'b') { 713 if (second) 714 ks->pos2b = true; 715 else 716 ks->pos1b = true; 717 } else if (!set_sort_modifier(&(ks->sm), s[i])) 718 goto end; 719 } 720 } 721 722 ret = 0; 723 724 end: 725 726 if (c) 727 sort_free(c); 728 if (f) 729 sort_free(f); 730 regfree(&re); 731 732 return (ret); 733 } 734 735 /* 736 * Parse -k option value. 737 */ 738 static int 739 parse_k(const char *s, struct key_specs *ks) 740 { 741 int ret = -1; 742 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 743 { false, false, false, false, false, false }; 744 745 if (s && *s) { 746 char *sptr; 747 748 sptr = strchr(s, ','); 749 if (sptr) { 750 size_t size1; 751 char *pos1, *pos2; 752 753 size1 = sptr - s; 754 755 if (size1 < 1) 756 return (-1); 757 pos1 = sort_malloc((size1 + 1) * sizeof(char)); 758 759 strncpy(pos1, s, size1); 760 pos1[size1] = '\0'; 761 762 ret = parse_pos(pos1, ks, mef_flags, false); 763 764 sort_free(pos1); 765 if (ret < 0) 766 return (ret); 767 768 pos2 = sort_strdup(sptr + 1); 769 ret = parse_pos(pos2, ks, mef_flags, true); 770 sort_free(pos2); 771 } else 772 ret = parse_pos(s, ks, mef_flags, false); 773 } 774 775 return (ret); 776 } 777 778 /* 779 * Parse POS in +POS -POS option. 780 */ 781 static int 782 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts) 783 { 784 regex_t re; 785 regmatch_t pmatch[4]; 786 char *c, *f; 787 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; 788 int ret; 789 size_t len, nmatch; 790 791 ret = -1; 792 nmatch = 4; 793 c = f = NULL; 794 *nc = *nf = 0; 795 796 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 797 return (-1); 798 799 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 800 goto end; 801 802 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 803 goto end; 804 805 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 806 goto end; 807 808 len = pmatch[1].rm_eo - pmatch[1].rm_so; 809 f = sort_malloc((len + 1) * sizeof(char)); 810 811 strncpy(f, s + pmatch[1].rm_so, len); 812 f[len] = '\0'; 813 814 errno = 0; 815 *nf = (size_t) strtoul(f, NULL, 10); 816 if (errno != 0) 817 errx(2, "%s", getstr(11)); 818 819 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 820 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 821 c = sort_malloc((len + 1) * sizeof(char)); 822 823 strncpy(c, s + pmatch[2].rm_so + 1, len); 824 c[len] = '\0'; 825 826 errno = 0; 827 *nc = (size_t) strtoul(c, NULL, 10); 828 if (errno != 0) 829 errx(2, "%s", getstr(11)); 830 } 831 832 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 833 834 len = pmatch[3].rm_eo - pmatch[3].rm_so; 835 836 strncpy(sopts, s + pmatch[3].rm_so, len); 837 sopts[len] = '\0'; 838 } 839 840 ret = 0; 841 842 end: 843 if (c) 844 sort_free(c); 845 if (f) 846 sort_free(f); 847 regfree(&re); 848 849 return (ret); 850 } 851 852 /* 853 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax 854 */ 855 void 856 fix_obsolete_keys(int *argc, char **argv) 857 { 858 char sopt[129]; 859 860 for (int i = 1; i < *argc; i++) { 861 char *arg1; 862 863 arg1 = argv[i]; 864 865 if (strlen(arg1) > 1 && arg1[0] == '+') { 866 int c1, f1; 867 char sopts1[128]; 868 869 sopts1[0] = 0; 870 c1 = f1 = 0; 871 872 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0) 873 continue; 874 else { 875 f1 += 1; 876 c1 += 1; 877 if (i + 1 < *argc) { 878 char *arg2 = argv[i + 1]; 879 880 if (strlen(arg2) > 1 && 881 arg2[0] == '-') { 882 int c2, f2; 883 char sopts2[128]; 884 885 sopts2[0] = 0; 886 c2 = f2 = 0; 887 888 if (parse_pos_obs(arg2 + 1, 889 &f2, &c2, sopts2) >= 0) { 890 if (c2 > 0) 891 f2 += 1; 892 sprintf(sopt, "-k%d.%d%s,%d.%d%s", 893 f1, c1, sopts1, f2, c2, sopts2); 894 argv[i] = sort_strdup(sopt); 895 for (int j = i + 1; j + 1 < *argc; j++) 896 argv[j] = argv[j + 1]; 897 *argc -= 1; 898 continue; 899 } 900 } 901 } 902 sprintf(sopt, "-k%d.%d", f1, c1); 903 argv[i] = sort_strdup(sopt); 904 } 905 } 906 } 907 } 908 909 /* 910 * Set random seed 911 */ 912 static void 913 set_random_seed(void) 914 { 915 if (need_random) { 916 917 if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) { 918 FILE* fseed; 919 MD5_CTX ctx; 920 char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE]; 921 size_t sz = 0; 922 923 fseed = openfile(random_source, "r"); 924 while (!feof(fseed)) { 925 int cr; 926 927 cr = fgetc(fseed); 928 if (cr == EOF) 929 break; 930 931 rsd[sz++] = (char) cr; 932 933 if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE) 934 break; 935 } 936 937 closefile(fseed, random_source); 938 939 MD5Init(&ctx); 940 MD5Update(&ctx, rsd, sz); 941 942 random_seed = MD5End(&ctx, NULL); 943 random_seed_size = strlen(random_seed); 944 945 } else { 946 MD5_CTX ctx; 947 char *b; 948 949 MD5Init(&ctx); 950 b = MD5File(random_source, NULL); 951 if (b == NULL) 952 err(2, NULL); 953 954 random_seed = b; 955 random_seed_size = strlen(b); 956 } 957 958 MD5Init(&md5_ctx); 959 if(random_seed_size>0) { 960 MD5Update(&md5_ctx, random_seed, random_seed_size); 961 } 962 } 963 } 964 965 /* 966 * Main function. 967 */ 968 int 969 main(int argc, char **argv) 970 { 971 char *outfile, *real_outfile; 972 int c, result; 973 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 974 { false, false, false, false, false, false }; 975 976 result = 0; 977 outfile = sort_strdup("-"); 978 real_outfile = NULL; 979 980 struct sort_mods *sm = &default_sort_mods_object; 981 982 init_tmp_files(); 983 984 set_signal_handler(); 985 986 set_hw_params(); 987 set_locale(); 988 set_tmpdir(); 989 set_sort_opts(); 990 991 #if 0 992 { 993 static int counter = 0; 994 char fn[128]; 995 sprintf(fn, "/var/tmp/debug.sort.%d", counter++); 996 FILE* f = fopen(fn, "w"); 997 fprintf(f, ">>sort>>"); 998 for (int i = 0; i < argc; i++) { 999 fprintf(f, "<%s>", argv[i]); 1000 } 1001 fprintf(f, "<<sort<<\n"); 1002 fclose(f); 1003 } 1004 #endif 1005 1006 fix_obsolete_keys(&argc, argv); 1007 1008 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) 1009 != -1)) { 1010 1011 check_mutually_exclusive_flags(c, mef_flags); 1012 1013 if (!set_sort_modifier(sm, c)) { 1014 1015 switch (c) { 1016 case 'c': 1017 sort_opts_vals.cflag = true; 1018 if (optarg) { 1019 if (!strcmp(optarg, "diagnose-first")) 1020 ; 1021 else if (!strcmp(optarg, "silent") || 1022 !strcmp(optarg, "quiet")) 1023 sort_opts_vals.csilentflag = true; 1024 else if (*optarg) 1025 unknown(optarg); 1026 } 1027 break; 1028 case 'C': 1029 sort_opts_vals.cflag = true; 1030 sort_opts_vals.csilentflag = true; 1031 break; 1032 case 'k': 1033 { 1034 sort_opts_vals.complex_sort = true; 1035 sort_opts_vals.kflag = true; 1036 1037 keys_num++; 1038 keys = sort_realloc(keys, keys_num * 1039 sizeof(struct key_specs)); 1040 memset(&(keys[keys_num - 1]), 0, 1041 sizeof(struct key_specs)); 1042 1043 if (parse_k(optarg, &(keys[keys_num - 1])) 1044 < 0) { 1045 errx(2, "%s: -k %s\n", 1046 strerror(EINVAL), optarg); 1047 } 1048 1049 break; 1050 } 1051 case 'm': 1052 sort_opts_vals.mflag = true; 1053 break; 1054 case 'o': 1055 outfile = sort_realloc(outfile, (strlen(optarg) + 1)); 1056 strcpy(outfile, optarg); 1057 break; 1058 case 's': 1059 sort_opts_vals.sflag = true; 1060 break; 1061 case 'S': 1062 available_free_memory = 1063 parse_memory_buffer_value(optarg); 1064 break; 1065 case 'T': 1066 tmpdir = sort_strdup(optarg); 1067 break; 1068 case 't': 1069 while (strlen(optarg) > 1) { 1070 if (optarg[0] != '\\') { 1071 errx(2, "%s: %s\n", 1072 strerror(EINVAL), optarg); 1073 } 1074 optarg += 1; 1075 if (*optarg == '0') { 1076 *optarg = 0; 1077 break; 1078 } 1079 } 1080 sort_opts_vals.tflag = true; 1081 sort_opts_vals.field_sep = btowc(optarg[0]); 1082 if (sort_opts_vals.field_sep == WEOF) { 1083 errno = EINVAL; 1084 err(2, NULL); 1085 } 1086 if (!gnusort_numeric_compatibility) { 1087 if (symbol_decimal_point == sort_opts_vals.field_sep) 1088 symbol_decimal_point = WEOF; 1089 if (symbol_thousands_sep == sort_opts_vals.field_sep) 1090 symbol_thousands_sep = WEOF; 1091 if (symbol_negative_sign == sort_opts_vals.field_sep) 1092 symbol_negative_sign = WEOF; 1093 if (symbol_positive_sign == sort_opts_vals.field_sep) 1094 symbol_positive_sign = WEOF; 1095 } 1096 break; 1097 case 'u': 1098 sort_opts_vals.uflag = true; 1099 /* stable sort for the correct unique val */ 1100 sort_opts_vals.sflag = true; 1101 break; 1102 case 'z': 1103 sort_opts_vals.zflag = true; 1104 break; 1105 case SORT_OPT: 1106 if (optarg) { 1107 if (!strcmp(optarg, "general-numeric")) 1108 set_sort_modifier(sm, 'g'); 1109 else if (!strcmp(optarg, "human-numeric")) 1110 set_sort_modifier(sm, 'h'); 1111 else if (!strcmp(optarg, "numeric")) 1112 set_sort_modifier(sm, 'n'); 1113 else if (!strcmp(optarg, "month")) 1114 set_sort_modifier(sm, 'M'); 1115 else if (!strcmp(optarg, "random")) 1116 set_sort_modifier(sm, 'R'); 1117 else 1118 unknown(optarg); 1119 } 1120 break; 1121 #if defined(SORT_THREADS) 1122 case PARALLEL_OPT: 1123 nthreads = (size_t)(atoi(optarg)); 1124 if (nthreads < 1) 1125 nthreads = 1; 1126 if (nthreads > 1024) 1127 nthreads = 1024; 1128 break; 1129 #endif 1130 case QSORT_OPT: 1131 sort_opts_vals.sort_method = SORT_QSORT; 1132 break; 1133 case MERGESORT_OPT: 1134 sort_opts_vals.sort_method = SORT_MERGESORT; 1135 break; 1136 case MMAP_OPT: 1137 use_mmap = true; 1138 break; 1139 case HEAPSORT_OPT: 1140 sort_opts_vals.sort_method = SORT_HEAPSORT; 1141 break; 1142 case RADIXSORT_OPT: 1143 sort_opts_vals.sort_method = SORT_RADIXSORT; 1144 break; 1145 case RANDOMSOURCE_OPT: 1146 random_source = strdup(optarg); 1147 break; 1148 case COMPRESSPROGRAM_OPT: 1149 compress_program = strdup(optarg); 1150 break; 1151 case FF_OPT: 1152 read_fns_from_file0(optarg); 1153 break; 1154 case BS_OPT: 1155 { 1156 errno = 0; 1157 long mof = strtol(optarg, NULL, 10); 1158 if (errno != 0) 1159 errx(2, "--batch-size: %s", 1160 strerror(errno)); 1161 if (mof >= 2) 1162 max_open_files = (size_t) mof + 1; 1163 } 1164 break; 1165 case VERSION_OPT: 1166 printf("%s\n", VERSION); 1167 exit(EXIT_SUCCESS); 1168 /* NOTREACHED */ 1169 break; 1170 case DEBUG_OPT: 1171 debug_sort = true; 1172 break; 1173 case HELP_OPT: 1174 usage(false); 1175 /* NOTREACHED */ 1176 break; 1177 default: 1178 usage(true); 1179 /* NOTREACHED */ 1180 } 1181 } 1182 } 1183 1184 argc -= optind; 1185 argv += optind; 1186 1187 #ifndef WITHOUT_NLS 1188 catalog = catopen("sort", NL_CAT_LOCALE); 1189 #endif 1190 1191 if (sort_opts_vals.cflag && sort_opts_vals.mflag) 1192 errx(1, "%c:%c: %s", 'm', 'c', getstr(1)); 1193 1194 #ifndef WITHOUT_NLS 1195 catclose(catalog); 1196 #endif 1197 1198 if (keys_num == 0) { 1199 keys_num = 1; 1200 keys = sort_realloc(keys, sizeof(struct key_specs)); 1201 memset(&(keys[0]), 0, sizeof(struct key_specs)); 1202 keys[0].c1 = 1; 1203 keys[0].pos1b = default_sort_mods->bflag; 1204 keys[0].pos2b = default_sort_mods->bflag; 1205 memcpy(&(keys[0].sm), default_sort_mods, 1206 sizeof(struct sort_mods)); 1207 } 1208 1209 for (size_t i = 0; i < keys_num; i++) { 1210 struct key_specs *ks; 1211 1212 ks = &(keys[i]); 1213 1214 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && 1215 !(ks->pos2b)) { 1216 ks->pos1b = sm->bflag; 1217 ks->pos2b = sm->bflag; 1218 memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); 1219 } 1220 1221 ks->sm.func = get_sort_func(&(ks->sm)); 1222 } 1223 1224 if (argv_from_file0) { 1225 argc = argc_from_file0; 1226 argv = argv_from_file0; 1227 } 1228 1229 if (debug_sort) { 1230 #if defined(SORT_THREADS) 1231 nthreads = 1; 1232 #endif 1233 printf("Using collate rules of %s locale\n", 1234 setlocale(LC_COLLATE, NULL)); 1235 if (byte_sort) 1236 printf("Byte sort is used\n"); 1237 if (print_symbols_on_debug) { 1238 printf("Decimal Point: <%lc>\n", symbol_decimal_point); 1239 if (symbol_thousands_sep) 1240 printf("Thousands separator: <%lc>\n", 1241 symbol_thousands_sep); 1242 printf("Positive sign: <%lc>\n", symbol_positive_sign); 1243 printf("Negative sign: <%lc>\n", symbol_negative_sign); 1244 } 1245 } 1246 1247 set_random_seed(); 1248 1249 /* Case when the outfile equals one of the input files: */ 1250 if (strcmp(outfile, "-")) { 1251 1252 for(int i = 0; i < argc; ++i) { 1253 if (strcmp(argv[i], outfile) == 0) { 1254 real_outfile = sort_strdup(outfile); 1255 for(;;) { 1256 char* tmp = sort_malloc(strlen(outfile) + 1257 strlen(".tmp") + 1); 1258 1259 strcpy(tmp, outfile); 1260 strcpy(tmp + strlen(tmp), ".tmp"); 1261 sort_free(outfile); 1262 outfile = tmp; 1263 if (access(outfile, F_OK) < 0) 1264 break; 1265 } 1266 tmp_file_atexit(outfile); 1267 } 1268 } 1269 } 1270 1271 #if defined(SORT_THREADS) 1272 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) 1273 nthreads = 1; 1274 #endif 1275 1276 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { 1277 struct file_list fl; 1278 struct sort_list list; 1279 1280 sort_list_init(&list); 1281 file_list_init(&fl, true); 1282 1283 if (argc < 1) 1284 procfile("-", &list, &fl); 1285 else { 1286 while (argc > 0) { 1287 procfile(*argv, &list, &fl); 1288 --argc; 1289 ++argv; 1290 } 1291 } 1292 1293 if (fl.count < 1) 1294 sort_list_to_file(&list, outfile); 1295 else { 1296 if (list.count > 0) { 1297 char *flast = new_tmp_file_name(); 1298 1299 sort_list_to_file(&list, flast); 1300 file_list_add(&fl, flast, false); 1301 } 1302 merge_files(&fl, outfile); 1303 } 1304 1305 file_list_clean(&fl); 1306 1307 /* 1308 * We are about to exit the program, so we can ignore 1309 * the clean-up for speed 1310 * 1311 * sort_list_clean(&list); 1312 */ 1313 1314 } else if (sort_opts_vals.cflag) { 1315 result = (argc == 0) ? (check("-")) : (check(*argv)); 1316 } else if (sort_opts_vals.mflag) { 1317 struct file_list fl; 1318 1319 file_list_init(&fl, false); 1320 file_list_populate(&fl, argc, argv, true); 1321 merge_files(&fl, outfile); 1322 file_list_clean(&fl); 1323 } 1324 1325 if (real_outfile) { 1326 unlink(real_outfile); 1327 if (rename(outfile, real_outfile) < 0) 1328 err(2, NULL); 1329 sort_free(real_outfile); 1330 } 1331 1332 sort_free(outfile); 1333 1334 return (result); 1335 } 1336