1 /*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <oleg.moskalenko@citrix.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/stat.h> 32 #include <sys/sysctl.h> 33 #include <sys/types.h> 34 35 #include <err.h> 36 #include <errno.h> 37 #include <getopt.h> 38 #include <limits.h> 39 #include <locale.h> 40 #include <md5.h> 41 #include <regex.h> 42 #include <signal.h> 43 #include <stdbool.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <wchar.h> 49 #include <wctype.h> 50 51 #include "coll.h" 52 #include "file.h" 53 #include "sort.h" 54 55 #ifndef WITHOUT_NLS 56 #include <nl_types.h> 57 nl_catd catalog; 58 #endif 59 60 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz" 61 62 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random") 63 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024) 64 65 static bool need_random; 66 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE; 67 static const void *random_seed; 68 static size_t random_seed_size; 69 70 MD5_CTX md5_ctx; 71 72 /* 73 * Default messages to use when NLS is disabled or no catalogue 74 * is found. 75 */ 76 const char *nlsstr[] = { "", 77 /* 1*/"mutually exclusive flags", 78 /* 2*/"extra argument not allowed with -c", 79 /* 3*/"Unknown feature", 80 /* 4*/"Wrong memory buffer specification", 81 /* 5*/"0 field in key specs", 82 /* 6*/"0 column in key specs", 83 /* 7*/"Wrong file mode", 84 /* 8*/"Cannot open file for reading", 85 /* 9*/"Radix sort cannot be used with these sort options", 86 /*10*/"The chosen sort method cannot be used with stable and/or unique sort", 87 /*11*/"Invalid key position", 88 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] " 89 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " 90 "[-o outfile] [--batch-size size] [--files0-from file] " 91 "[--heapsort] [--mergesort] [--radixsort] [--qsort] " 92 #if defined(SORT_THREADS) 93 "[--nthreads thread_no] " 94 #endif 95 "[--human-numeric-sort] " 96 "[--version-sort] [--random-sort [--random-source file]] " 97 "[--compress-program program] [file ...]\n" }; 98 99 struct sort_opts sort_opts_vals; 100 101 bool debug_sort; 102 bool need_hint; 103 104 #if defined(SORT_THREADS) 105 size_t ncpu = 1; 106 size_t nthreads = 1; 107 #endif 108 109 static bool gnusort_numeric_compatibility; 110 111 static struct sort_mods default_sort_mods_object; 112 struct sort_mods * const default_sort_mods = &default_sort_mods_object; 113 114 static bool print_symbols_on_debug; 115 116 /* 117 * Arguments from file (when file0-from option is used: 118 */ 119 static int argc_from_file0 = -1; 120 static char **argv_from_file0; 121 122 /* 123 * Placeholder symbols for options which have no single-character equivalent 124 */ 125 enum 126 { 127 SORT_OPT = CHAR_MAX + 1, 128 HELP_OPT, 129 FF_OPT, 130 BS_OPT, 131 VERSION_OPT, 132 DEBUG_OPT, 133 #if defined(SORT_THREADS) 134 NTHREADS_OPT, 135 #endif 136 RANDOMSOURCE_OPT, 137 COMPRESSPROGRAM_OPT, 138 QSORT_OPT, 139 MERGESORT_OPT, 140 HEAPSORT_OPT, 141 RADIXSORT_OPT 142 }; 143 144 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 145 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; 146 147 struct option long_options[] = { 148 { "batch-size", required_argument, NULL, BS_OPT }, 149 { "buffer-size", required_argument, NULL, 'S' }, 150 { "check", optional_argument, NULL, 'c' }, 151 { "check=silent|quiet", optional_argument, NULL, 'C' }, 152 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, 153 { "debug", no_argument, NULL, DEBUG_OPT }, 154 { "dictionary-order", no_argument, NULL, 'd' }, 155 { "field-separator", required_argument, NULL, 't' }, 156 { "files0-from", required_argument, NULL, FF_OPT }, 157 { "general-numeric-sort", no_argument, NULL, 'g' }, 158 { "heapsort", no_argument, NULL, HEAPSORT_OPT }, 159 { "help",no_argument, NULL, HELP_OPT }, 160 { "human-numeric-sort", no_argument, NULL, 'h' }, 161 { "ignore-leading-blanks", no_argument, NULL, 'b' }, 162 { "ignore-case", no_argument, NULL, 'f' }, 163 { "ignore-nonprinting", no_argument, NULL, 'i' }, 164 { "key", required_argument, NULL, 'k' }, 165 { "merge", no_argument, NULL, 'm' }, 166 { "mergesort", no_argument, NULL, MERGESORT_OPT }, 167 { "month-sort", no_argument, NULL, 'M' }, 168 { "numeric-sort", no_argument, NULL, 'n' }, 169 { "output", required_argument, NULL, 'o' }, 170 #if defined(SORT_THREADS) 171 { "nthreads", required_argument, NULL, NTHREADS_OPT }, 172 #endif 173 { "qsort", no_argument, NULL, QSORT_OPT }, 174 { "radixsort", no_argument, NULL, RADIXSORT_OPT }, 175 { "random-sort", no_argument, NULL, 'R' }, 176 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, 177 { "reverse", no_argument, NULL, 'r' }, 178 { "sort", required_argument, NULL, SORT_OPT }, 179 { "stable", no_argument, NULL, 's' }, 180 { "temporary-directory",required_argument, NULL, 'T' }, 181 { "unique", no_argument, NULL, 'u' }, 182 { "version", no_argument, NULL, VERSION_OPT }, 183 { "version-sort",no_argument, NULL, 'V' }, 184 { "zero-terminated", no_argument, NULL, 'z' }, 185 { NULL, no_argument, NULL, 0 } 186 }; 187 188 void fix_obsolete_keys(int *argc, char **argv); 189 190 /* 191 * Check where sort modifier is present 192 */ 193 static bool 194 sort_modifier_empty(struct sort_mods *sm) 195 { 196 197 if (sm == NULL) 198 return (true); 199 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || 200 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag)); 201 } 202 203 /* 204 * Print out usage text. 205 */ 206 static void 207 usage(bool opt_err) 208 { 209 struct option *o; 210 FILE *out; 211 212 out = stdout; 213 o = &(long_options[0]); 214 215 if (opt_err) 216 out = stderr; 217 fprintf(out, getstr(12), getprogname()); 218 if (opt_err) 219 exit(2); 220 exit(0); 221 } 222 223 /* 224 * Read input file names from a file (file0-from option). 225 */ 226 static void 227 read_fns_from_file0(const char *fn) 228 { 229 if (fn) { 230 struct file0_reader f0r; 231 FILE *f; 232 233 f = fopen(fn, "r"); 234 if (f == NULL) 235 err(2, NULL); 236 237 memset(&f0r, 0, sizeof(f0r)); 238 f0r.f = f; 239 240 while (!feof(f)) { 241 char *line = read_file0_line(&f0r); 242 243 if (line && *line) { 244 ++argc_from_file0; 245 if (argc_from_file0 < 1) 246 argc_from_file0 = 1; 247 argv_from_file0 = sort_realloc(argv_from_file0, 248 argc_from_file0 * sizeof(char *)); 249 if (argv_from_file0 == NULL) 250 err(2, NULL); 251 argv_from_file0[argc_from_file0 - 1] = 252 sort_strdup(line); 253 } 254 } 255 closefile(f, fn); 256 } 257 } 258 259 /* 260 * Check how much RAM is available for the sort. 261 */ 262 static void 263 set_hw_params(void) 264 { 265 #if defined(SORT_THREADS) 266 size_t ncpusz; 267 #endif 268 size_t pages, psize, psz, pszsz; 269 270 pages = psize = 0; 271 #if defined(SORT_THREADS) 272 ncpu = 1; 273 ncpusz = sizeof(size_t); 274 #endif 275 psz = pszsz = sizeof(size_t); 276 277 if (sysctlbyname("vm.stats.vm.v_free_count", &pages, &psz, 278 NULL, 0) < 0) { 279 perror("vm.stats.vm.v_free_count"); 280 return; 281 } 282 if (sysctlbyname("vm.stats.vm.v_page_size", &psize, &pszsz, 283 NULL, 0) < 0) { 284 perror("vm.stats.vm.v_page_size"); 285 return; 286 } 287 #if defined(SORT_THREADS) 288 if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, 289 NULL, 0) < 0) 290 ncpu = 1; 291 else if(ncpu > 32) 292 ncpu = 32; 293 294 nthreads = ncpu; 295 #endif 296 297 free_memory = (unsigned long long) pages * (unsigned long long) psize; 298 available_free_memory = (free_memory * 9) / 10; 299 } 300 301 /* 302 * Convert "plain" symbol to wide symbol, with default value. 303 */ 304 static void 305 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) 306 { 307 308 if (wc && c) { 309 int res; 310 311 res = mbtowc(wc, c, MB_CUR_MAX); 312 if (res < 1) 313 *wc = def; 314 } 315 } 316 317 /* 318 * Set current locale symbols. 319 */ 320 static void 321 set_locale(void) 322 { 323 struct lconv *lc; 324 const char *locale; 325 326 setlocale(LC_ALL, ""); 327 328 lc = localeconv(); 329 330 if (lc) { 331 /* obtain LC_NUMERIC info */ 332 /* Convert to wide char form */ 333 conv_mbtowc(&symbol_decimal_point, lc->decimal_point, 334 symbol_decimal_point); 335 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, 336 symbol_thousands_sep); 337 conv_mbtowc(&symbol_positive_sign, lc->positive_sign, 338 symbol_positive_sign); 339 conv_mbtowc(&symbol_negative_sign, lc->negative_sign, 340 symbol_negative_sign); 341 } 342 343 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) 344 gnusort_numeric_compatibility = true; 345 346 locale = setlocale(LC_COLLATE, NULL); 347 348 if (locale) { 349 char *tmpl; 350 const char *cclocale; 351 352 tmpl = sort_strdup(locale); 353 cclocale = setlocale(LC_COLLATE, "C"); 354 if (cclocale && !strcmp(cclocale, tmpl)) 355 byte_sort = true; 356 else { 357 const char *pclocale; 358 359 pclocale = setlocale(LC_COLLATE, "POSIX"); 360 if (pclocale && !strcmp(pclocale, tmpl)) 361 byte_sort = true; 362 } 363 setlocale(LC_COLLATE, tmpl); 364 sort_free(tmpl); 365 } 366 } 367 368 /* 369 * Set directory temporary files. 370 */ 371 static void 372 set_tmpdir(void) 373 { 374 char *td; 375 376 td = getenv("TMPDIR"); 377 if (td != NULL) 378 tmpdir = sort_strdup(td); 379 } 380 381 /* 382 * Parse -S option. 383 */ 384 static unsigned long long 385 parse_memory_buffer_value(const char *value) 386 { 387 388 if (value == NULL) 389 return (available_free_memory); 390 else { 391 char *endptr; 392 unsigned long long membuf; 393 394 endptr = NULL; 395 errno = 0; 396 membuf = strtoll(value, &endptr, 10); 397 398 if (errno != 0) { 399 warn("%s",getstr(4)); 400 membuf = available_free_memory; 401 } else { 402 switch (*endptr){ 403 case 'Y': 404 membuf *= 1024; 405 /* FALLTHROUGH */ 406 case 'Z': 407 membuf *= 1024; 408 /* FALLTHROUGH */ 409 case 'E': 410 membuf *= 1024; 411 /* FALLTHROUGH */ 412 case 'P': 413 membuf *= 1024; 414 /* FALLTHROUGH */ 415 case 'T': 416 membuf *= 1024; 417 /* FALLTHROUGH */ 418 case 'G': 419 membuf *= 1024; 420 /* FALLTHROUGH */ 421 case 'M': 422 membuf *= 1024; 423 /* FALLTHROUGH */ 424 case '\0': 425 case 'K': 426 membuf *= 1024; 427 /* FALLTHROUGH */ 428 case 'b': 429 break; 430 case '%': 431 membuf = (available_free_memory * membuf) / 432 100; 433 break; 434 default: 435 fprintf(stderr, "%s: %s\n", strerror(EINVAL), 436 optarg); 437 membuf = available_free_memory; 438 } 439 } 440 return (membuf); 441 } 442 } 443 444 /* 445 * Signal handler that clears the temporary files. 446 */ 447 static void 448 sig_handler(int sig __unused, siginfo_t *siginfo __unused, 449 void *context __unused) 450 { 451 452 clear_tmp_files(); 453 exit(-1); 454 } 455 456 /* 457 * Set signal handler on panic signals. 458 */ 459 static void 460 set_signal_handler(void) 461 { 462 struct sigaction sa; 463 464 memset(&sa, 0, sizeof(sa)); 465 sa.sa_sigaction = &sig_handler; 466 sa.sa_flags = SA_SIGINFO; 467 468 if (sigaction(SIGTERM, &sa, NULL) < 0) { 469 perror("sigaction"); 470 return; 471 } 472 if (sigaction(SIGHUP, &sa, NULL) < 0) { 473 perror("sigaction"); 474 return; 475 } 476 if (sigaction(SIGINT, &sa, NULL) < 0) { 477 perror("sigaction"); 478 return; 479 } 480 if (sigaction(SIGQUIT, &sa, NULL) < 0) { 481 perror("sigaction"); 482 return; 483 } 484 if (sigaction(SIGABRT, &sa, NULL) < 0) { 485 perror("sigaction"); 486 return; 487 } 488 if (sigaction(SIGBUS, &sa, NULL) < 0) { 489 perror("sigaction"); 490 return; 491 } 492 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 493 perror("sigaction"); 494 return; 495 } 496 if (sigaction(SIGUSR1, &sa, NULL) < 0) { 497 perror("sigaction"); 498 return; 499 } 500 if (sigaction(SIGUSR2, &sa, NULL) < 0) { 501 perror("sigaction"); 502 return; 503 } 504 } 505 506 /* 507 * Print "unknown" message and exit with status 2. 508 */ 509 static void 510 unknown(const char *what) 511 { 512 513 errx(2, "%s: %s", getstr(3), what); 514 } 515 516 /* 517 * Check whether contradictory input options are used. 518 */ 519 static void 520 check_mutually_exclusive_flags(char c, bool *mef_flags) 521 { 522 int fo_index, mec; 523 bool found_others, found_this; 524 525 found_others = found_this =false; 526 fo_index = 0; 527 528 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { 529 mec = mutually_exclusive_flags[i]; 530 531 if (mec != c) { 532 if (mef_flags[i]) { 533 if (found_this) 534 errx(1, "%c:%c: %s", c, mec, getstr(1)); 535 found_others = true; 536 fo_index = i; 537 } 538 } else { 539 if (found_others) 540 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1)); 541 mef_flags[i] = true; 542 found_this = true; 543 } 544 } 545 } 546 547 /* 548 * Initialise sort opts data. 549 */ 550 static void 551 set_sort_opts(void) 552 { 553 554 memset(&default_sort_mods_object, 0, 555 sizeof(default_sort_mods_object)); 556 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); 557 default_sort_mods_object.func = 558 get_sort_func(&default_sort_mods_object); 559 } 560 561 /* 562 * Set a sort modifier on a sort modifiers object. 563 */ 564 static bool 565 set_sort_modifier(struct sort_mods *sm, int c) 566 { 567 568 if (sm) { 569 switch (c){ 570 case 'b': 571 sm->bflag = true; 572 break; 573 case 'd': 574 sm->dflag = true; 575 break; 576 case 'f': 577 sm->fflag = true; 578 break; 579 case 'g': 580 sm->gflag = true; 581 need_hint = true; 582 break; 583 case 'i': 584 sm->iflag = true; 585 break; 586 case 'R': 587 sm->Rflag = true; 588 need_random = true; 589 break; 590 case 'M': 591 initialise_months(); 592 sm->Mflag = true; 593 need_hint = true; 594 break; 595 case 'n': 596 sm->nflag = true; 597 need_hint = true; 598 print_symbols_on_debug = true; 599 break; 600 case 'r': 601 sm->rflag = true; 602 break; 603 case 'V': 604 sm->Vflag = true; 605 break; 606 case 'h': 607 sm->hflag = true; 608 need_hint = true; 609 print_symbols_on_debug = true; 610 break; 611 default: 612 return false; 613 } 614 sort_opts_vals.complex_sort = true; 615 sm->func = get_sort_func(sm); 616 } 617 return (true); 618 } 619 620 /* 621 * Parse POS in -k option. 622 */ 623 static int 624 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) 625 { 626 regmatch_t pmatch[4]; 627 regex_t re; 628 char *c, *f; 629 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; 630 size_t len, nmatch; 631 int ret; 632 633 ret = -1; 634 nmatch = 4; 635 c = f = NULL; 636 637 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 638 return (-1); 639 640 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 641 goto end; 642 643 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 644 goto end; 645 646 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 647 goto end; 648 649 len = pmatch[1].rm_eo - pmatch[1].rm_so; 650 f = sort_malloc((len + 1) * sizeof(char)); 651 652 strncpy(f, s + pmatch[1].rm_so, len); 653 f[len] = '\0'; 654 655 if (second) { 656 errno = 0; 657 ks->f2 = (size_t) strtoul(f, NULL, 10); 658 if (errno != 0) 659 errx(2, "%s: -k", strerror(errno)); 660 if (ks->f2 == 0) { 661 warn("%s",getstr(5)); 662 goto end; 663 } 664 } else { 665 errno = 0; 666 ks->f1 = (size_t) strtoul(f, NULL, 10); 667 if (errno != 0) 668 errx(2, "%s: -k", strerror(errno)); 669 if (ks->f1 == 0) { 670 warn("%s",getstr(5)); 671 goto end; 672 } 673 } 674 675 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 676 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 677 c = sort_malloc((len + 1) * sizeof(char)); 678 679 strncpy(c, s + pmatch[2].rm_so + 1, len); 680 c[len] = '\0'; 681 682 if (second) { 683 errno = 0; 684 ks->c2 = (size_t) strtoul(c, NULL, 10); 685 if (errno != 0) 686 errx(2, "%s: -k", strerror(errno)); 687 } else { 688 errno = 0; 689 ks->c1 = (size_t) strtoul(c, NULL, 10); 690 if (errno != 0) 691 errx(2, "%s: -k", strerror(errno)); 692 if (ks->c1 == 0) { 693 warn("%s",getstr(6)); 694 goto end; 695 } 696 } 697 } else { 698 if (second) 699 ks->c2 = 0; 700 else 701 ks->c1 = 1; 702 } 703 704 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 705 regoff_t i = 0; 706 707 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { 708 check_mutually_exclusive_flags(s[i], mef_flags); 709 if (s[i] == 'b') { 710 if (second) 711 ks->pos2b = true; 712 else 713 ks->pos1b = true; 714 } else if (!set_sort_modifier(&(ks->sm), s[i])) 715 goto end; 716 } 717 } 718 719 ret = 0; 720 721 end: 722 723 if (c) 724 sort_free(c); 725 if (f) 726 sort_free(f); 727 regfree(&re); 728 729 return (ret); 730 } 731 732 /* 733 * Parse -k option value. 734 */ 735 static int 736 parse_k(const char *s, struct key_specs *ks) 737 { 738 int ret = -1; 739 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 740 { false, false, false, false, false, false }; 741 742 if (s && *s) { 743 char *sptr; 744 745 sptr = strchr(s, ','); 746 if (sptr) { 747 size_t size1; 748 char *pos1, *pos2; 749 750 size1 = sptr - s; 751 752 if (size1 < 1) 753 return (-1); 754 pos1 = sort_malloc((size1 + 1) * sizeof(char)); 755 756 strncpy(pos1, s, size1); 757 pos1[size1] = '\0'; 758 759 ret = parse_pos(pos1, ks, mef_flags, false); 760 761 sort_free(pos1); 762 if (ret < 0) 763 return (ret); 764 765 pos2 = sort_strdup(sptr + 1); 766 ret = parse_pos(pos2, ks, mef_flags, true); 767 sort_free(pos2); 768 } else 769 ret = parse_pos(s, ks, mef_flags, false); 770 } 771 772 return (ret); 773 } 774 775 /* 776 * Parse POS in +POS -POS option. 777 */ 778 static int 779 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts) 780 { 781 regex_t re; 782 regmatch_t pmatch[4]; 783 char *c, *f; 784 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; 785 int ret; 786 size_t len, nmatch; 787 788 ret = -1; 789 nmatch = 4; 790 c = f = NULL; 791 *nc = *nf = 0; 792 793 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 794 return (-1); 795 796 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 797 goto end; 798 799 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 800 goto end; 801 802 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 803 goto end; 804 805 len = pmatch[1].rm_eo - pmatch[1].rm_so; 806 f = sort_malloc((len + 1) * sizeof(char)); 807 808 strncpy(f, s + pmatch[1].rm_so, len); 809 f[len] = '\0'; 810 811 errno = 0; 812 *nf = (size_t) strtoul(f, NULL, 10); 813 if (errno != 0) 814 errx(2, "%s", getstr(11)); 815 816 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 817 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 818 c = sort_malloc((len + 1) * sizeof(char)); 819 820 strncpy(c, s + pmatch[2].rm_so + 1, len); 821 c[len] = '\0'; 822 823 errno = 0; 824 *nc = (size_t) strtoul(c, NULL, 10); 825 if (errno != 0) 826 errx(2, "%s", getstr(11)); 827 } 828 829 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 830 831 len = pmatch[3].rm_eo - pmatch[3].rm_so; 832 833 strncpy(sopts, s + pmatch[3].rm_so, len); 834 sopts[len] = '\0'; 835 } 836 837 ret = 0; 838 839 end: 840 if (c) 841 sort_free(c); 842 if (f) 843 sort_free(f); 844 regfree(&re); 845 846 return (ret); 847 } 848 849 /* 850 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax 851 */ 852 void 853 fix_obsolete_keys(int *argc, char **argv) 854 { 855 char sopt[129]; 856 857 for (int i = 1; i < *argc; i++) { 858 char *arg1; 859 860 arg1 = argv[i]; 861 862 if (strlen(arg1) > 1 && arg1[0] == '+') { 863 int c1, f1; 864 char sopts1[128]; 865 866 sopts1[0] = 0; 867 c1 = f1 = 0; 868 869 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0) 870 continue; 871 else { 872 f1 += 1; 873 c1 += 1; 874 if (i + 1 < *argc) { 875 char *arg2 = argv[i + 1]; 876 877 if (strlen(arg2) > 1 && 878 arg2[0] == '-') { 879 int c2, f2; 880 char sopts2[128]; 881 882 sopts2[0] = 0; 883 c2 = f2 = 0; 884 885 if (parse_pos_obs(arg2 + 1, 886 &f2, &c2, sopts2) >= 0) { 887 if (c2 > 0) 888 f2 += 1; 889 sprintf(sopt, "-k%d.%d%s,%d.%d%s", 890 f1, c1, sopts1, f2, c2, sopts2); 891 argv[i] = sort_strdup(sopt); 892 for (int j = i + 1; j + 1 < *argc; j++) 893 argv[j] = argv[j + 1]; 894 *argc -= 1; 895 continue; 896 } 897 } 898 } 899 sprintf(sopt, "-k%d.%d", f1, c1); 900 argv[i] = sort_strdup(sopt); 901 } 902 } 903 } 904 } 905 906 /* 907 * Set random seed 908 */ 909 static void 910 set_random_seed(void) 911 { 912 if (need_random) { 913 914 if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) { 915 FILE* fseed; 916 MD5_CTX ctx; 917 char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE]; 918 size_t sz = 0; 919 920 fseed = openfile(random_source, "r"); 921 while (!feof(fseed)) { 922 int cr; 923 924 cr = fgetc(fseed); 925 if (cr == EOF) 926 break; 927 928 rsd[sz++] = (char) cr; 929 930 if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE) 931 break; 932 } 933 934 closefile(fseed, random_source); 935 936 MD5Init(&ctx); 937 MD5Update(&ctx, rsd, sz); 938 939 random_seed = MD5End(&ctx, NULL); 940 random_seed_size = strlen(random_seed); 941 942 } else { 943 MD5_CTX ctx; 944 char *b; 945 946 MD5Init(&ctx); 947 b = MD5File(random_source, NULL); 948 if (b == NULL) 949 err(2, NULL); 950 951 random_seed = b; 952 random_seed_size = strlen(b); 953 } 954 955 MD5Init(&md5_ctx); 956 if(random_seed_size>0) { 957 MD5Update(&md5_ctx, random_seed, random_seed_size); 958 } 959 } 960 } 961 962 /* 963 * Main function. 964 */ 965 int 966 main(int argc, char **argv) 967 { 968 char *outfile, *real_outfile; 969 int c, result; 970 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 971 { false, false, false, false, false, false }; 972 973 result = 0; 974 outfile = sort_strdup("-"); 975 real_outfile = NULL; 976 977 struct sort_mods *sm = &default_sort_mods_object; 978 979 init_tmp_files(); 980 981 set_signal_handler(); 982 983 set_hw_params(); 984 set_locale(); 985 set_tmpdir(); 986 set_sort_opts(); 987 988 #if 0 989 { 990 static int counter = 0; 991 char fn[128]; 992 sprintf(fn, "/var/tmp/debug.sort.%d", counter++); 993 FILE* f = fopen(fn, "w"); 994 fprintf(f, ">>sort>>"); 995 for (int i = 0; i < argc; i++) { 996 fprintf(f, "<%s>", argv[i]); 997 } 998 fprintf(f, "<<sort<<\n"); 999 fclose(f); 1000 } 1001 #endif 1002 1003 fix_obsolete_keys(&argc, argv); 1004 1005 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) 1006 != -1)) { 1007 1008 check_mutually_exclusive_flags(c, mef_flags); 1009 1010 if (!set_sort_modifier(sm, c)) { 1011 1012 switch (c) { 1013 case 'c': 1014 sort_opts_vals.cflag = true; 1015 if (optarg) { 1016 if (!strcmp(optarg, "diagnose-first")) 1017 ; 1018 else if (!strcmp(optarg, "silent") || 1019 !strcmp(optarg, "quiet")) 1020 sort_opts_vals.csilentflag = true; 1021 else if (*optarg) 1022 unknown(optarg); 1023 } 1024 break; 1025 case 'C': 1026 sort_opts_vals.cflag = true; 1027 sort_opts_vals.csilentflag = true; 1028 break; 1029 case 'k': 1030 { 1031 sort_opts_vals.complex_sort = true; 1032 sort_opts_vals.kflag = true; 1033 1034 keys_num++; 1035 keys = sort_realloc(keys, keys_num * 1036 sizeof(struct key_specs)); 1037 memset(&(keys[keys_num - 1]), 0, 1038 sizeof(struct key_specs)); 1039 1040 if (parse_k(optarg, &(keys[keys_num - 1])) 1041 < 0) { 1042 errx(2, "%s: -k %s\n", 1043 strerror(EINVAL), optarg); 1044 } 1045 1046 break; 1047 } 1048 case 'm': 1049 sort_opts_vals.mflag = true; 1050 break; 1051 case 'o': 1052 outfile = sort_realloc(outfile, (strlen(optarg) + 1)); 1053 strcpy(outfile, optarg); 1054 break; 1055 case 's': 1056 sort_opts_vals.sflag = true; 1057 break; 1058 case 'S': 1059 available_free_memory = 1060 parse_memory_buffer_value(optarg); 1061 break; 1062 case 'T': 1063 tmpdir = sort_strdup(optarg); 1064 break; 1065 case 't': 1066 if (strlen(optarg) > 1) { 1067 if (strcmp(optarg, "\\0")) { 1068 errx(2, "%s: %s\n", 1069 strerror(EINVAL), optarg); 1070 } 1071 *optarg = 0; 1072 } 1073 sort_opts_vals.tflag = true; 1074 sort_opts_vals.field_sep = btowc(optarg[0]); 1075 if (sort_opts_vals.field_sep == WEOF) { 1076 errno = EINVAL; 1077 err(2, NULL); 1078 } 1079 if (!gnusort_numeric_compatibility) { 1080 if (symbol_decimal_point == sort_opts_vals.field_sep) 1081 symbol_decimal_point = WEOF; 1082 if (symbol_thousands_sep == sort_opts_vals.field_sep) 1083 symbol_thousands_sep = WEOF; 1084 if (symbol_negative_sign == sort_opts_vals.field_sep) 1085 symbol_negative_sign = WEOF; 1086 if (symbol_positive_sign == sort_opts_vals.field_sep) 1087 symbol_positive_sign = WEOF; 1088 } 1089 break; 1090 case 'u': 1091 sort_opts_vals.uflag = true; 1092 /* stable sort for the correct unique val */ 1093 sort_opts_vals.sflag = true; 1094 break; 1095 case 'z': 1096 sort_opts_vals.zflag = true; 1097 break; 1098 case SORT_OPT: 1099 if (optarg) { 1100 if (!strcmp(optarg, "general-numeric")) 1101 set_sort_modifier(sm, 'g'); 1102 else if (!strcmp(optarg, "human-numeric")) 1103 set_sort_modifier(sm, 'h'); 1104 else if (!strcmp(optarg, "numeric")) 1105 set_sort_modifier(sm, 'n'); 1106 else if (!strcmp(optarg, "month")) 1107 set_sort_modifier(sm, 'M'); 1108 else if (!strcmp(optarg, "random")) 1109 set_sort_modifier(sm, 'R'); 1110 else 1111 unknown(optarg); 1112 } 1113 break; 1114 #if defined(SORT_THREADS) 1115 case NTHREADS_OPT: 1116 nthreads = (size_t)(atoi(optarg)); 1117 if (nthreads < 1) 1118 nthreads = 1; 1119 if (nthreads > 1024) 1120 nthreads = 1024; 1121 break; 1122 #endif 1123 case QSORT_OPT: 1124 sort_opts_vals.sort_method = SORT_QSORT; 1125 break; 1126 case MERGESORT_OPT: 1127 sort_opts_vals.sort_method = SORT_MERGESORT; 1128 break; 1129 case HEAPSORT_OPT: 1130 sort_opts_vals.sort_method = SORT_HEAPSORT; 1131 break; 1132 case RADIXSORT_OPT: 1133 sort_opts_vals.sort_method = SORT_RADIXSORT; 1134 break; 1135 case RANDOMSOURCE_OPT: 1136 random_source = strdup(optarg); 1137 break; 1138 case COMPRESSPROGRAM_OPT: 1139 compress_program = strdup(optarg); 1140 break; 1141 case FF_OPT: 1142 read_fns_from_file0(optarg); 1143 break; 1144 case BS_OPT: 1145 { 1146 errno = 0; 1147 long mof = strtol(optarg, NULL, 10); 1148 if (errno != 0) 1149 errx(2, "--batch-size: %s", 1150 strerror(errno)); 1151 if (mof >= 2) 1152 max_open_files = (size_t) mof + 1; 1153 } 1154 break; 1155 case VERSION_OPT: 1156 printf("%s\n", VERSION); 1157 exit(EXIT_SUCCESS); 1158 /* NOTREACHED */ 1159 break; 1160 case DEBUG_OPT: 1161 debug_sort = true; 1162 break; 1163 case HELP_OPT: 1164 usage(false); 1165 /* NOTREACHED */ 1166 break; 1167 default: 1168 usage(true); 1169 /* NOTREACHED */ 1170 } 1171 } 1172 } 1173 1174 argc -= optind; 1175 argv += optind; 1176 1177 #ifndef WITHOUT_NLS 1178 catalog = catopen("sort", NL_CAT_LOCALE); 1179 #endif 1180 1181 if (sort_opts_vals.cflag && sort_opts_vals.mflag) 1182 errx(1, "%c:%c: %s", 'm', 'c', getstr(1)); 1183 1184 #ifndef WITHOUT_NLS 1185 catclose(catalog); 1186 #endif 1187 1188 if (keys_num == 0) { 1189 keys_num = 1; 1190 keys = sort_realloc(keys, sizeof(struct key_specs)); 1191 memset(&(keys[0]), 0, sizeof(struct key_specs)); 1192 keys[0].c1 = 1; 1193 keys[0].pos1b = default_sort_mods->bflag; 1194 keys[0].pos2b = default_sort_mods->bflag; 1195 memcpy(&(keys[0].sm), default_sort_mods, 1196 sizeof(struct sort_mods)); 1197 } 1198 1199 for (size_t i = 0; i < keys_num; i++) { 1200 struct key_specs *ks; 1201 1202 ks = &(keys[i]); 1203 1204 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && 1205 !(ks->pos2b)) { 1206 ks->pos1b = sm->bflag; 1207 ks->pos2b = sm->bflag; 1208 memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); 1209 } 1210 1211 ks->sm.func = get_sort_func(&(ks->sm)); 1212 } 1213 1214 if (argc_from_file0 >= 0) { 1215 argc = argc_from_file0; 1216 argv = argv_from_file0; 1217 } 1218 1219 if (debug_sort) { 1220 #if defined(SORT_THREADS) 1221 nthreads = 1; 1222 #endif 1223 printf("Using collate rules of %s locale\n", 1224 setlocale(LC_COLLATE, NULL)); 1225 if (byte_sort) 1226 printf("Byte sort is used\n"); 1227 if (print_symbols_on_debug) { 1228 printf("Decimal Point: <%lc>\n", symbol_decimal_point); 1229 if (symbol_thousands_sep) 1230 printf("Thousands separator: <%lc>\n", 1231 symbol_thousands_sep); 1232 printf("Positive sign: <%lc>\n", symbol_positive_sign); 1233 printf("Negative sign: <%lc>\n", symbol_negative_sign); 1234 } 1235 } 1236 1237 set_random_seed(); 1238 1239 /* Case when the outfile equals one of the input files: */ 1240 if (strcmp(outfile, "-")) { 1241 1242 for(int i = 0; i < argc; ++i) { 1243 if (strcmp(argv[i], outfile) == 0) { 1244 real_outfile = sort_strdup(outfile); 1245 for(;;) { 1246 char* tmp = sort_malloc(strlen(outfile) + 1247 strlen(".tmp") + 1); 1248 1249 strcpy(tmp, outfile); 1250 strcpy(tmp + strlen(tmp), ".tmp"); 1251 sort_free(outfile); 1252 outfile = tmp; 1253 if (access(outfile, F_OK) < 0) 1254 break; 1255 } 1256 tmp_file_atexit(outfile); 1257 } 1258 } 1259 } 1260 1261 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { 1262 struct file_list fl; 1263 struct sort_list list; 1264 1265 sort_list_init(&list); 1266 file_list_init(&fl, true); 1267 1268 if (argc < 1) 1269 procfile("-", &list, &fl); 1270 else { 1271 while (argc > 0) { 1272 procfile(*argv, &list, &fl); 1273 --argc; 1274 ++argv; 1275 } 1276 } 1277 1278 if (fl.count < 1) 1279 sort_list_to_file(&list, outfile); 1280 else { 1281 if (list.count > 0) { 1282 char *flast = new_tmp_file_name(); 1283 1284 sort_list_to_file(&list, flast); 1285 file_list_add(&fl, flast, false); 1286 } 1287 merge_files(&fl, outfile); 1288 } 1289 1290 file_list_clean(&fl); 1291 1292 /* 1293 * We are about to exit the program, so we can ignore 1294 * the clean-up for speed 1295 * 1296 * sort_list_clean(&list); 1297 */ 1298 1299 } else if (sort_opts_vals.cflag) { 1300 result = (argc == 0) ? (check("-")) : (check(*argv)); 1301 } else if (sort_opts_vals.mflag) { 1302 struct file_list fl; 1303 1304 file_list_init(&fl, false); 1305 file_list_populate(&fl, argc, argv, true); 1306 merge_files(&fl, outfile); 1307 file_list_clean(&fl); 1308 } 1309 1310 if (real_outfile) { 1311 unlink(real_outfile); 1312 if (rename(outfile, real_outfile) < 0) 1313 err(2, NULL); 1314 sort_free(real_outfile); 1315 } 1316 1317 sort_free(outfile); 1318 1319 return (result); 1320 } 1321