1 /*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <oleg.moskalenko@citrix.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/stat.h> 32 #include <sys/sysctl.h> 33 #include <sys/types.h> 34 35 #include <err.h> 36 #include <errno.h> 37 #include <getopt.h> 38 #include <limits.h> 39 #include <locale.h> 40 #include <md5.h> 41 #include <regex.h> 42 #include <signal.h> 43 #include <stdbool.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <wchar.h> 49 #include <wctype.h> 50 51 #include "coll.h" 52 #include "file.h" 53 #include "sort.h" 54 55 #ifndef WITHOUT_NLS 56 #include <nl_types.h> 57 nl_catd catalog; 58 #endif 59 60 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz" 61 62 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random") 63 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024) 64 65 static bool need_random; 66 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE; 67 static const void *random_seed; 68 static size_t random_seed_size; 69 70 MD5_CTX md5_ctx; 71 72 /* 73 * Default messages to use when NLS is disabled or no catalogue 74 * is found. 75 */ 76 const char *nlsstr[] = { "", 77 /* 1*/"mutually exclusive flags", 78 /* 2*/"extra argument not allowed with -c", 79 /* 3*/"Unknown feature", 80 /* 4*/"Wrong memory buffer specification", 81 /* 5*/"0 field in key specs", 82 /* 6*/"0 column in key specs", 83 /* 7*/"Wrong file mode", 84 /* 8*/"Cannot open file for reading", 85 /* 9*/"Radix sort cannot be used with these sort options", 86 /*10*/"The chosen sort method cannot be used with stable and/or unique sort", 87 /*11*/"Invalid key position", 88 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] " 89 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " 90 "[-o outfile] [--batch-size size] [--files0-from file] " 91 "[--heapsort] [--mergesort] [--radixsort] [--qsort] " 92 "[--mmap] " 93 #if defined(SORT_THREADS) 94 "[--parallel thread_no] " 95 #endif 96 "[--human-numeric-sort] " 97 "[--version-sort] [--random-sort [--random-source file]] " 98 "[--compress-program program] [file ...]\n" }; 99 100 struct sort_opts sort_opts_vals; 101 102 bool debug_sort; 103 bool need_hint; 104 105 #if defined(SORT_THREADS) 106 unsigned int ncpu = 1; 107 size_t nthreads = 1; 108 #endif 109 110 static bool gnusort_numeric_compatibility; 111 112 static struct sort_mods default_sort_mods_object; 113 struct sort_mods * const default_sort_mods = &default_sort_mods_object; 114 115 static bool print_symbols_on_debug; 116 117 /* 118 * Arguments from file (when file0-from option is used: 119 */ 120 static size_t argc_from_file0 = (size_t)-1; 121 static char **argv_from_file0; 122 123 /* 124 * Placeholder symbols for options which have no single-character equivalent 125 */ 126 enum 127 { 128 SORT_OPT = CHAR_MAX + 1, 129 HELP_OPT, 130 FF_OPT, 131 BS_OPT, 132 VERSION_OPT, 133 DEBUG_OPT, 134 #if defined(SORT_THREADS) 135 PARALLEL_OPT, 136 #endif 137 RANDOMSOURCE_OPT, 138 COMPRESSPROGRAM_OPT, 139 QSORT_OPT, 140 MERGESORT_OPT, 141 HEAPSORT_OPT, 142 RADIXSORT_OPT, 143 MMAP_OPT 144 }; 145 146 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 147 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; 148 149 static struct option long_options[] = { 150 { "batch-size", required_argument, NULL, BS_OPT }, 151 { "buffer-size", required_argument, NULL, 'S' }, 152 { "check", optional_argument, NULL, 'c' }, 153 { "check=silent|quiet", optional_argument, NULL, 'C' }, 154 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, 155 { "debug", no_argument, NULL, DEBUG_OPT }, 156 { "dictionary-order", no_argument, NULL, 'd' }, 157 { "field-separator", required_argument, NULL, 't' }, 158 { "files0-from", required_argument, NULL, FF_OPT }, 159 { "general-numeric-sort", no_argument, NULL, 'g' }, 160 { "heapsort", no_argument, NULL, HEAPSORT_OPT }, 161 { "help",no_argument, NULL, HELP_OPT }, 162 { "human-numeric-sort", no_argument, NULL, 'h' }, 163 { "ignore-leading-blanks", no_argument, NULL, 'b' }, 164 { "ignore-case", no_argument, NULL, 'f' }, 165 { "ignore-nonprinting", no_argument, NULL, 'i' }, 166 { "key", required_argument, NULL, 'k' }, 167 { "merge", no_argument, NULL, 'm' }, 168 { "mergesort", no_argument, NULL, MERGESORT_OPT }, 169 { "mmap", no_argument, NULL, MMAP_OPT }, 170 { "month-sort", no_argument, NULL, 'M' }, 171 { "numeric-sort", no_argument, NULL, 'n' }, 172 { "output", required_argument, NULL, 'o' }, 173 #if defined(SORT_THREADS) 174 { "parallel", required_argument, NULL, PARALLEL_OPT }, 175 #endif 176 { "qsort", no_argument, NULL, QSORT_OPT }, 177 { "radixsort", no_argument, NULL, RADIXSORT_OPT }, 178 { "random-sort", no_argument, NULL, 'R' }, 179 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, 180 { "reverse", no_argument, NULL, 'r' }, 181 { "sort", required_argument, NULL, SORT_OPT }, 182 { "stable", no_argument, NULL, 's' }, 183 { "temporary-directory",required_argument, NULL, 'T' }, 184 { "unique", no_argument, NULL, 'u' }, 185 { "version", no_argument, NULL, VERSION_OPT }, 186 { "version-sort",no_argument, NULL, 'V' }, 187 { "zero-terminated", no_argument, NULL, 'z' }, 188 { NULL, no_argument, NULL, 0 } 189 }; 190 191 void fix_obsolete_keys(int *argc, char **argv); 192 193 /* 194 * Check where sort modifier is present 195 */ 196 static bool 197 sort_modifier_empty(struct sort_mods *sm) 198 { 199 200 if (sm == NULL) 201 return (true); 202 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || 203 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag)); 204 } 205 206 /* 207 * Print out usage text. 208 */ 209 static void 210 usage(bool opt_err) 211 { 212 struct option *o; 213 FILE *out; 214 215 out = stdout; 216 o = &(long_options[0]); 217 218 if (opt_err) 219 out = stderr; 220 fprintf(out, getstr(12), getprogname()); 221 if (opt_err) 222 exit(2); 223 exit(0); 224 } 225 226 /* 227 * Read input file names from a file (file0-from option). 228 */ 229 static void 230 read_fns_from_file0(const char *fn) 231 { 232 if (fn) { 233 struct file0_reader f0r; 234 FILE *f; 235 236 f = fopen(fn, "r"); 237 if (f == NULL) 238 err(2, NULL); 239 240 memset(&f0r, 0, sizeof(f0r)); 241 f0r.f = f; 242 243 while (!feof(f)) { 244 char *line = read_file0_line(&f0r); 245 246 if (line && *line) { 247 if (argc_from_file0 == (size_t)-1) 248 argc_from_file0 = 0; 249 ++argc_from_file0; 250 argv_from_file0 = sort_realloc(argv_from_file0, 251 argc_from_file0 * sizeof(char *)); 252 if (argv_from_file0 == NULL) 253 err(2, NULL); 254 argv_from_file0[argc_from_file0 - 1] = 255 sort_strdup(line); 256 } 257 } 258 closefile(f, fn); 259 } 260 } 261 262 /* 263 * Check how much RAM is available for the sort. 264 */ 265 static void 266 set_hw_params(void) 267 { 268 long pages, psize; 269 270 pages = psize = 0; 271 272 #if defined(SORT_THREADS) 273 ncpu = 1; 274 #endif 275 276 pages = sysconf(_SC_PHYS_PAGES); 277 if (pages < 1) { 278 perror("sysconf pages"); 279 psize = 1; 280 } 281 psize = sysconf(_SC_PAGESIZE); 282 if (psize < 1) { 283 perror("sysconf psize"); 284 psize = 4096; 285 } 286 #if defined(SORT_THREADS) 287 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN); 288 if (ncpu < 1) 289 ncpu = 1; 290 else if(ncpu > 32) 291 ncpu = 32; 292 293 nthreads = ncpu; 294 #endif 295 296 free_memory = (unsigned long long) pages * (unsigned long long) psize; 297 available_free_memory = free_memory / 2; 298 299 if (available_free_memory < 1024) 300 available_free_memory = 1024; 301 } 302 303 /* 304 * Convert "plain" symbol to wide symbol, with default value. 305 */ 306 static void 307 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) 308 { 309 310 if (wc && c) { 311 int res; 312 313 res = mbtowc(wc, c, MB_CUR_MAX); 314 if (res < 1) 315 *wc = def; 316 } 317 } 318 319 /* 320 * Set current locale symbols. 321 */ 322 static void 323 set_locale(void) 324 { 325 struct lconv *lc; 326 const char *locale; 327 328 setlocale(LC_ALL, ""); 329 330 lc = localeconv(); 331 332 if (lc) { 333 /* obtain LC_NUMERIC info */ 334 /* Convert to wide char form */ 335 conv_mbtowc(&symbol_decimal_point, lc->decimal_point, 336 symbol_decimal_point); 337 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, 338 symbol_thousands_sep); 339 conv_mbtowc(&symbol_positive_sign, lc->positive_sign, 340 symbol_positive_sign); 341 conv_mbtowc(&symbol_negative_sign, lc->negative_sign, 342 symbol_negative_sign); 343 } 344 345 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) 346 gnusort_numeric_compatibility = true; 347 348 locale = setlocale(LC_COLLATE, NULL); 349 350 if (locale) { 351 char *tmpl; 352 const char *cclocale; 353 354 tmpl = sort_strdup(locale); 355 cclocale = setlocale(LC_COLLATE, "C"); 356 if (cclocale && !strcmp(cclocale, tmpl)) 357 byte_sort = true; 358 else { 359 const char *pclocale; 360 361 pclocale = setlocale(LC_COLLATE, "POSIX"); 362 if (pclocale && !strcmp(pclocale, tmpl)) 363 byte_sort = true; 364 } 365 setlocale(LC_COLLATE, tmpl); 366 sort_free(tmpl); 367 } 368 } 369 370 /* 371 * Set directory temporary files. 372 */ 373 static void 374 set_tmpdir(void) 375 { 376 char *td; 377 378 td = getenv("TMPDIR"); 379 if (td != NULL) 380 tmpdir = sort_strdup(td); 381 } 382 383 /* 384 * Parse -S option. 385 */ 386 static unsigned long long 387 parse_memory_buffer_value(const char *value) 388 { 389 390 if (value == NULL) 391 return (available_free_memory); 392 else { 393 char *endptr; 394 unsigned long long membuf; 395 396 endptr = NULL; 397 errno = 0; 398 membuf = strtoll(value, &endptr, 10); 399 400 if (errno != 0) { 401 warn("%s",getstr(4)); 402 membuf = available_free_memory; 403 } else { 404 switch (*endptr){ 405 case 'Y': 406 membuf *= 1024; 407 /* FALLTHROUGH */ 408 case 'Z': 409 membuf *= 1024; 410 /* FALLTHROUGH */ 411 case 'E': 412 membuf *= 1024; 413 /* FALLTHROUGH */ 414 case 'P': 415 membuf *= 1024; 416 /* FALLTHROUGH */ 417 case 'T': 418 membuf *= 1024; 419 /* FALLTHROUGH */ 420 case 'G': 421 membuf *= 1024; 422 /* FALLTHROUGH */ 423 case 'M': 424 membuf *= 1024; 425 /* FALLTHROUGH */ 426 case '\0': 427 case 'K': 428 membuf *= 1024; 429 /* FALLTHROUGH */ 430 case 'b': 431 break; 432 case '%': 433 membuf = (available_free_memory * membuf) / 434 100; 435 break; 436 default: 437 fprintf(stderr, "%s: %s\n", strerror(EINVAL), 438 optarg); 439 membuf = available_free_memory; 440 } 441 } 442 return (membuf); 443 } 444 } 445 446 /* 447 * Signal handler that clears the temporary files. 448 */ 449 static void 450 sig_handler(int sig __unused, siginfo_t *siginfo __unused, 451 void *context __unused) 452 { 453 454 clear_tmp_files(); 455 exit(-1); 456 } 457 458 /* 459 * Set signal handler on panic signals. 460 */ 461 static void 462 set_signal_handler(void) 463 { 464 struct sigaction sa; 465 466 memset(&sa, 0, sizeof(sa)); 467 sa.sa_sigaction = &sig_handler; 468 sa.sa_flags = SA_SIGINFO; 469 470 if (sigaction(SIGTERM, &sa, NULL) < 0) { 471 perror("sigaction"); 472 return; 473 } 474 if (sigaction(SIGHUP, &sa, NULL) < 0) { 475 perror("sigaction"); 476 return; 477 } 478 if (sigaction(SIGINT, &sa, NULL) < 0) { 479 perror("sigaction"); 480 return; 481 } 482 if (sigaction(SIGQUIT, &sa, NULL) < 0) { 483 perror("sigaction"); 484 return; 485 } 486 if (sigaction(SIGABRT, &sa, NULL) < 0) { 487 perror("sigaction"); 488 return; 489 } 490 if (sigaction(SIGBUS, &sa, NULL) < 0) { 491 perror("sigaction"); 492 return; 493 } 494 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 495 perror("sigaction"); 496 return; 497 } 498 if (sigaction(SIGUSR1, &sa, NULL) < 0) { 499 perror("sigaction"); 500 return; 501 } 502 if (sigaction(SIGUSR2, &sa, NULL) < 0) { 503 perror("sigaction"); 504 return; 505 } 506 } 507 508 /* 509 * Print "unknown" message and exit with status 2. 510 */ 511 static void 512 unknown(const char *what) 513 { 514 515 errx(2, "%s: %s", getstr(3), what); 516 } 517 518 /* 519 * Check whether contradictory input options are used. 520 */ 521 static void 522 check_mutually_exclusive_flags(char c, bool *mef_flags) 523 { 524 int fo_index, mec; 525 bool found_others, found_this; 526 527 found_others = found_this =false; 528 fo_index = 0; 529 530 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { 531 mec = mutually_exclusive_flags[i]; 532 533 if (mec != c) { 534 if (mef_flags[i]) { 535 if (found_this) 536 errx(1, "%c:%c: %s", c, mec, getstr(1)); 537 found_others = true; 538 fo_index = i; 539 } 540 } else { 541 if (found_others) 542 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1)); 543 mef_flags[i] = true; 544 found_this = true; 545 } 546 } 547 } 548 549 /* 550 * Initialise sort opts data. 551 */ 552 static void 553 set_sort_opts(void) 554 { 555 556 memset(&default_sort_mods_object, 0, 557 sizeof(default_sort_mods_object)); 558 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); 559 default_sort_mods_object.func = 560 get_sort_func(&default_sort_mods_object); 561 } 562 563 /* 564 * Set a sort modifier on a sort modifiers object. 565 */ 566 static bool 567 set_sort_modifier(struct sort_mods *sm, int c) 568 { 569 570 if (sm) { 571 switch (c){ 572 case 'b': 573 sm->bflag = true; 574 break; 575 case 'd': 576 sm->dflag = true; 577 break; 578 case 'f': 579 sm->fflag = true; 580 break; 581 case 'g': 582 sm->gflag = true; 583 need_hint = true; 584 break; 585 case 'i': 586 sm->iflag = true; 587 break; 588 case 'R': 589 sm->Rflag = true; 590 need_random = true; 591 break; 592 case 'M': 593 initialise_months(); 594 sm->Mflag = true; 595 need_hint = true; 596 break; 597 case 'n': 598 sm->nflag = true; 599 need_hint = true; 600 print_symbols_on_debug = true; 601 break; 602 case 'r': 603 sm->rflag = true; 604 break; 605 case 'V': 606 sm->Vflag = true; 607 break; 608 case 'h': 609 sm->hflag = true; 610 need_hint = true; 611 print_symbols_on_debug = true; 612 break; 613 default: 614 return false; 615 } 616 sort_opts_vals.complex_sort = true; 617 sm->func = get_sort_func(sm); 618 } 619 return (true); 620 } 621 622 /* 623 * Parse POS in -k option. 624 */ 625 static int 626 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) 627 { 628 regmatch_t pmatch[4]; 629 regex_t re; 630 char *c, *f; 631 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; 632 size_t len, nmatch; 633 int ret; 634 635 ret = -1; 636 nmatch = 4; 637 c = f = NULL; 638 639 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 640 return (-1); 641 642 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 643 goto end; 644 645 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 646 goto end; 647 648 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 649 goto end; 650 651 len = pmatch[1].rm_eo - pmatch[1].rm_so; 652 f = sort_malloc((len + 1) * sizeof(char)); 653 654 strncpy(f, s + pmatch[1].rm_so, len); 655 f[len] = '\0'; 656 657 if (second) { 658 errno = 0; 659 ks->f2 = (size_t) strtoul(f, NULL, 10); 660 if (errno != 0) 661 errx(2, "%s: -k", strerror(errno)); 662 if (ks->f2 == 0) { 663 warn("%s",getstr(5)); 664 goto end; 665 } 666 } else { 667 errno = 0; 668 ks->f1 = (size_t) strtoul(f, NULL, 10); 669 if (errno != 0) 670 errx(2, "%s: -k", strerror(errno)); 671 if (ks->f1 == 0) { 672 warn("%s",getstr(5)); 673 goto end; 674 } 675 } 676 677 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 678 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 679 c = sort_malloc((len + 1) * sizeof(char)); 680 681 strncpy(c, s + pmatch[2].rm_so + 1, len); 682 c[len] = '\0'; 683 684 if (second) { 685 errno = 0; 686 ks->c2 = (size_t) strtoul(c, NULL, 10); 687 if (errno != 0) 688 errx(2, "%s: -k", strerror(errno)); 689 } else { 690 errno = 0; 691 ks->c1 = (size_t) strtoul(c, NULL, 10); 692 if (errno != 0) 693 errx(2, "%s: -k", strerror(errno)); 694 if (ks->c1 == 0) { 695 warn("%s",getstr(6)); 696 goto end; 697 } 698 } 699 } else { 700 if (second) 701 ks->c2 = 0; 702 else 703 ks->c1 = 1; 704 } 705 706 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 707 regoff_t i = 0; 708 709 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { 710 check_mutually_exclusive_flags(s[i], mef_flags); 711 if (s[i] == 'b') { 712 if (second) 713 ks->pos2b = true; 714 else 715 ks->pos1b = true; 716 } else if (!set_sort_modifier(&(ks->sm), s[i])) 717 goto end; 718 } 719 } 720 721 ret = 0; 722 723 end: 724 725 if (c) 726 sort_free(c); 727 if (f) 728 sort_free(f); 729 regfree(&re); 730 731 return (ret); 732 } 733 734 /* 735 * Parse -k option value. 736 */ 737 static int 738 parse_k(const char *s, struct key_specs *ks) 739 { 740 int ret = -1; 741 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 742 { false, false, false, false, false, false }; 743 744 if (s && *s) { 745 char *sptr; 746 747 sptr = strchr(s, ','); 748 if (sptr) { 749 size_t size1; 750 char *pos1, *pos2; 751 752 size1 = sptr - s; 753 754 if (size1 < 1) 755 return (-1); 756 pos1 = sort_malloc((size1 + 1) * sizeof(char)); 757 758 strncpy(pos1, s, size1); 759 pos1[size1] = '\0'; 760 761 ret = parse_pos(pos1, ks, mef_flags, false); 762 763 sort_free(pos1); 764 if (ret < 0) 765 return (ret); 766 767 pos2 = sort_strdup(sptr + 1); 768 ret = parse_pos(pos2, ks, mef_flags, true); 769 sort_free(pos2); 770 } else 771 ret = parse_pos(s, ks, mef_flags, false); 772 } 773 774 return (ret); 775 } 776 777 /* 778 * Parse POS in +POS -POS option. 779 */ 780 static int 781 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts) 782 { 783 regex_t re; 784 regmatch_t pmatch[4]; 785 char *c, *f; 786 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; 787 int ret; 788 size_t len, nmatch; 789 790 ret = -1; 791 nmatch = 4; 792 c = f = NULL; 793 *nc = *nf = 0; 794 795 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 796 return (-1); 797 798 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 799 goto end; 800 801 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 802 goto end; 803 804 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 805 goto end; 806 807 len = pmatch[1].rm_eo - pmatch[1].rm_so; 808 f = sort_malloc((len + 1) * sizeof(char)); 809 810 strncpy(f, s + pmatch[1].rm_so, len); 811 f[len] = '\0'; 812 813 errno = 0; 814 *nf = (size_t) strtoul(f, NULL, 10); 815 if (errno != 0) 816 errx(2, "%s", getstr(11)); 817 818 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 819 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 820 c = sort_malloc((len + 1) * sizeof(char)); 821 822 strncpy(c, s + pmatch[2].rm_so + 1, len); 823 c[len] = '\0'; 824 825 errno = 0; 826 *nc = (size_t) strtoul(c, NULL, 10); 827 if (errno != 0) 828 errx(2, "%s", getstr(11)); 829 } 830 831 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 832 833 len = pmatch[3].rm_eo - pmatch[3].rm_so; 834 835 strncpy(sopts, s + pmatch[3].rm_so, len); 836 sopts[len] = '\0'; 837 } 838 839 ret = 0; 840 841 end: 842 if (c) 843 sort_free(c); 844 if (f) 845 sort_free(f); 846 regfree(&re); 847 848 return (ret); 849 } 850 851 /* 852 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax 853 */ 854 void 855 fix_obsolete_keys(int *argc, char **argv) 856 { 857 char sopt[129]; 858 859 for (int i = 1; i < *argc; i++) { 860 char *arg1; 861 862 arg1 = argv[i]; 863 864 if (strlen(arg1) > 1 && arg1[0] == '+') { 865 int c1, f1; 866 char sopts1[128]; 867 868 sopts1[0] = 0; 869 c1 = f1 = 0; 870 871 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0) 872 continue; 873 else { 874 f1 += 1; 875 c1 += 1; 876 if (i + 1 < *argc) { 877 char *arg2 = argv[i + 1]; 878 879 if (strlen(arg2) > 1 && 880 arg2[0] == '-') { 881 int c2, f2; 882 char sopts2[128]; 883 884 sopts2[0] = 0; 885 c2 = f2 = 0; 886 887 if (parse_pos_obs(arg2 + 1, 888 &f2, &c2, sopts2) >= 0) { 889 if (c2 > 0) 890 f2 += 1; 891 sprintf(sopt, "-k%d.%d%s,%d.%d%s", 892 f1, c1, sopts1, f2, c2, sopts2); 893 argv[i] = sort_strdup(sopt); 894 for (int j = i + 1; j + 1 < *argc; j++) 895 argv[j] = argv[j + 1]; 896 *argc -= 1; 897 continue; 898 } 899 } 900 } 901 sprintf(sopt, "-k%d.%d", f1, c1); 902 argv[i] = sort_strdup(sopt); 903 } 904 } 905 } 906 } 907 908 /* 909 * Set random seed 910 */ 911 static void 912 set_random_seed(void) 913 { 914 if (need_random) { 915 916 if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) { 917 FILE* fseed; 918 MD5_CTX ctx; 919 char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE]; 920 size_t sz = 0; 921 922 fseed = openfile(random_source, "r"); 923 while (!feof(fseed)) { 924 int cr; 925 926 cr = fgetc(fseed); 927 if (cr == EOF) 928 break; 929 930 rsd[sz++] = (char) cr; 931 932 if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE) 933 break; 934 } 935 936 closefile(fseed, random_source); 937 938 MD5Init(&ctx); 939 MD5Update(&ctx, rsd, sz); 940 941 random_seed = MD5End(&ctx, NULL); 942 random_seed_size = strlen(random_seed); 943 944 } else { 945 MD5_CTX ctx; 946 char *b; 947 948 MD5Init(&ctx); 949 b = MD5File(random_source, NULL); 950 if (b == NULL) 951 err(2, NULL); 952 953 random_seed = b; 954 random_seed_size = strlen(b); 955 } 956 957 MD5Init(&md5_ctx); 958 if(random_seed_size>0) { 959 MD5Update(&md5_ctx, random_seed, random_seed_size); 960 } 961 } 962 } 963 964 /* 965 * Main function. 966 */ 967 int 968 main(int argc, char **argv) 969 { 970 char *outfile, *real_outfile; 971 int c, result; 972 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 973 { false, false, false, false, false, false }; 974 975 result = 0; 976 outfile = sort_strdup("-"); 977 real_outfile = NULL; 978 979 struct sort_mods *sm = &default_sort_mods_object; 980 981 init_tmp_files(); 982 983 set_signal_handler(); 984 985 set_hw_params(); 986 set_locale(); 987 set_tmpdir(); 988 set_sort_opts(); 989 990 #if 0 991 { 992 static int counter = 0; 993 char fn[128]; 994 sprintf(fn, "/var/tmp/debug.sort.%d", counter++); 995 FILE* f = fopen(fn, "w"); 996 fprintf(f, ">>sort>>"); 997 for (int i = 0; i < argc; i++) { 998 fprintf(f, "<%s>", argv[i]); 999 } 1000 fprintf(f, "<<sort<<\n"); 1001 fclose(f); 1002 } 1003 #endif 1004 1005 fix_obsolete_keys(&argc, argv); 1006 1007 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) 1008 != -1)) { 1009 1010 check_mutually_exclusive_flags(c, mef_flags); 1011 1012 if (!set_sort_modifier(sm, c)) { 1013 1014 switch (c) { 1015 case 'c': 1016 sort_opts_vals.cflag = true; 1017 if (optarg) { 1018 if (!strcmp(optarg, "diagnose-first")) 1019 ; 1020 else if (!strcmp(optarg, "silent") || 1021 !strcmp(optarg, "quiet")) 1022 sort_opts_vals.csilentflag = true; 1023 else if (*optarg) 1024 unknown(optarg); 1025 } 1026 break; 1027 case 'C': 1028 sort_opts_vals.cflag = true; 1029 sort_opts_vals.csilentflag = true; 1030 break; 1031 case 'k': 1032 { 1033 sort_opts_vals.complex_sort = true; 1034 sort_opts_vals.kflag = true; 1035 1036 keys_num++; 1037 keys = sort_realloc(keys, keys_num * 1038 sizeof(struct key_specs)); 1039 memset(&(keys[keys_num - 1]), 0, 1040 sizeof(struct key_specs)); 1041 1042 if (parse_k(optarg, &(keys[keys_num - 1])) 1043 < 0) { 1044 errx(2, "%s: -k %s\n", 1045 strerror(EINVAL), optarg); 1046 } 1047 1048 break; 1049 } 1050 case 'm': 1051 sort_opts_vals.mflag = true; 1052 break; 1053 case 'o': 1054 outfile = sort_realloc(outfile, (strlen(optarg) + 1)); 1055 strcpy(outfile, optarg); 1056 break; 1057 case 's': 1058 sort_opts_vals.sflag = true; 1059 break; 1060 case 'S': 1061 available_free_memory = 1062 parse_memory_buffer_value(optarg); 1063 break; 1064 case 'T': 1065 tmpdir = sort_strdup(optarg); 1066 break; 1067 case 't': 1068 while (strlen(optarg) > 1) { 1069 if (optarg[0] != '\\') { 1070 errx(2, "%s: %s\n", 1071 strerror(EINVAL), optarg); 1072 } 1073 optarg += 1; 1074 if (*optarg == '0') { 1075 *optarg = 0; 1076 break; 1077 } 1078 } 1079 sort_opts_vals.tflag = true; 1080 sort_opts_vals.field_sep = btowc(optarg[0]); 1081 if (sort_opts_vals.field_sep == WEOF) { 1082 errno = EINVAL; 1083 err(2, NULL); 1084 } 1085 if (!gnusort_numeric_compatibility) { 1086 if (symbol_decimal_point == sort_opts_vals.field_sep) 1087 symbol_decimal_point = WEOF; 1088 if (symbol_thousands_sep == sort_opts_vals.field_sep) 1089 symbol_thousands_sep = WEOF; 1090 if (symbol_negative_sign == sort_opts_vals.field_sep) 1091 symbol_negative_sign = WEOF; 1092 if (symbol_positive_sign == sort_opts_vals.field_sep) 1093 symbol_positive_sign = WEOF; 1094 } 1095 break; 1096 case 'u': 1097 sort_opts_vals.uflag = true; 1098 /* stable sort for the correct unique val */ 1099 sort_opts_vals.sflag = true; 1100 break; 1101 case 'z': 1102 sort_opts_vals.zflag = true; 1103 break; 1104 case SORT_OPT: 1105 if (optarg) { 1106 if (!strcmp(optarg, "general-numeric")) 1107 set_sort_modifier(sm, 'g'); 1108 else if (!strcmp(optarg, "human-numeric")) 1109 set_sort_modifier(sm, 'h'); 1110 else if (!strcmp(optarg, "numeric")) 1111 set_sort_modifier(sm, 'n'); 1112 else if (!strcmp(optarg, "month")) 1113 set_sort_modifier(sm, 'M'); 1114 else if (!strcmp(optarg, "random")) 1115 set_sort_modifier(sm, 'R'); 1116 else 1117 unknown(optarg); 1118 } 1119 break; 1120 #if defined(SORT_THREADS) 1121 case PARALLEL_OPT: 1122 nthreads = (size_t)(atoi(optarg)); 1123 if (nthreads < 1) 1124 nthreads = 1; 1125 if (nthreads > 1024) 1126 nthreads = 1024; 1127 break; 1128 #endif 1129 case QSORT_OPT: 1130 sort_opts_vals.sort_method = SORT_QSORT; 1131 break; 1132 case MERGESORT_OPT: 1133 sort_opts_vals.sort_method = SORT_MERGESORT; 1134 break; 1135 case MMAP_OPT: 1136 use_mmap = true; 1137 break; 1138 case HEAPSORT_OPT: 1139 sort_opts_vals.sort_method = SORT_HEAPSORT; 1140 break; 1141 case RADIXSORT_OPT: 1142 sort_opts_vals.sort_method = SORT_RADIXSORT; 1143 break; 1144 case RANDOMSOURCE_OPT: 1145 random_source = strdup(optarg); 1146 break; 1147 case COMPRESSPROGRAM_OPT: 1148 compress_program = strdup(optarg); 1149 break; 1150 case FF_OPT: 1151 read_fns_from_file0(optarg); 1152 break; 1153 case BS_OPT: 1154 { 1155 errno = 0; 1156 long mof = strtol(optarg, NULL, 10); 1157 if (errno != 0) 1158 errx(2, "--batch-size: %s", 1159 strerror(errno)); 1160 if (mof >= 2) 1161 max_open_files = (size_t) mof + 1; 1162 } 1163 break; 1164 case VERSION_OPT: 1165 printf("%s\n", VERSION); 1166 exit(EXIT_SUCCESS); 1167 /* NOTREACHED */ 1168 break; 1169 case DEBUG_OPT: 1170 debug_sort = true; 1171 break; 1172 case HELP_OPT: 1173 usage(false); 1174 /* NOTREACHED */ 1175 break; 1176 default: 1177 usage(true); 1178 /* NOTREACHED */ 1179 } 1180 } 1181 } 1182 1183 argc -= optind; 1184 argv += optind; 1185 1186 #ifndef WITHOUT_NLS 1187 catalog = catopen("sort", NL_CAT_LOCALE); 1188 #endif 1189 1190 if (sort_opts_vals.cflag && sort_opts_vals.mflag) 1191 errx(1, "%c:%c: %s", 'm', 'c', getstr(1)); 1192 1193 #ifndef WITHOUT_NLS 1194 catclose(catalog); 1195 #endif 1196 1197 if (keys_num == 0) { 1198 keys_num = 1; 1199 keys = sort_realloc(keys, sizeof(struct key_specs)); 1200 memset(&(keys[0]), 0, sizeof(struct key_specs)); 1201 keys[0].c1 = 1; 1202 keys[0].pos1b = default_sort_mods->bflag; 1203 keys[0].pos2b = default_sort_mods->bflag; 1204 memcpy(&(keys[0].sm), default_sort_mods, 1205 sizeof(struct sort_mods)); 1206 } 1207 1208 for (size_t i = 0; i < keys_num; i++) { 1209 struct key_specs *ks; 1210 1211 ks = &(keys[i]); 1212 1213 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && 1214 !(ks->pos2b)) { 1215 ks->pos1b = sm->bflag; 1216 ks->pos2b = sm->bflag; 1217 memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); 1218 } 1219 1220 ks->sm.func = get_sort_func(&(ks->sm)); 1221 } 1222 1223 if (argv_from_file0) { 1224 argc = argc_from_file0; 1225 argv = argv_from_file0; 1226 } 1227 1228 if (debug_sort) { 1229 printf("Memory to be used for sorting: %llu\n",available_free_memory); 1230 #if defined(SORT_THREADS) 1231 printf("Number of CPUs: %d\n",(int)ncpu); 1232 nthreads = 1; 1233 #endif 1234 printf("Using collate rules of %s locale\n", 1235 setlocale(LC_COLLATE, NULL)); 1236 if (byte_sort) 1237 printf("Byte sort is used\n"); 1238 if (print_symbols_on_debug) { 1239 printf("Decimal Point: <%lc>\n", symbol_decimal_point); 1240 if (symbol_thousands_sep) 1241 printf("Thousands separator: <%lc>\n", 1242 symbol_thousands_sep); 1243 printf("Positive sign: <%lc>\n", symbol_positive_sign); 1244 printf("Negative sign: <%lc>\n", symbol_negative_sign); 1245 } 1246 } 1247 1248 set_random_seed(); 1249 1250 /* Case when the outfile equals one of the input files: */ 1251 if (strcmp(outfile, "-")) { 1252 1253 for(int i = 0; i < argc; ++i) { 1254 if (strcmp(argv[i], outfile) == 0) { 1255 real_outfile = sort_strdup(outfile); 1256 for(;;) { 1257 char* tmp = sort_malloc(strlen(outfile) + 1258 strlen(".tmp") + 1); 1259 1260 strcpy(tmp, outfile); 1261 strcpy(tmp + strlen(tmp), ".tmp"); 1262 sort_free(outfile); 1263 outfile = tmp; 1264 if (access(outfile, F_OK) < 0) 1265 break; 1266 } 1267 tmp_file_atexit(outfile); 1268 } 1269 } 1270 } 1271 1272 #if defined(SORT_THREADS) 1273 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) 1274 nthreads = 1; 1275 #endif 1276 1277 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { 1278 struct file_list fl; 1279 struct sort_list list; 1280 1281 sort_list_init(&list); 1282 file_list_init(&fl, true); 1283 1284 if (argc < 1) 1285 procfile("-", &list, &fl); 1286 else { 1287 while (argc > 0) { 1288 procfile(*argv, &list, &fl); 1289 --argc; 1290 ++argv; 1291 } 1292 } 1293 1294 if (fl.count < 1) 1295 sort_list_to_file(&list, outfile); 1296 else { 1297 if (list.count > 0) { 1298 char *flast = new_tmp_file_name(); 1299 1300 sort_list_to_file(&list, flast); 1301 file_list_add(&fl, flast, false); 1302 } 1303 merge_files(&fl, outfile); 1304 } 1305 1306 file_list_clean(&fl); 1307 1308 /* 1309 * We are about to exit the program, so we can ignore 1310 * the clean-up for speed 1311 * 1312 * sort_list_clean(&list); 1313 */ 1314 1315 } else if (sort_opts_vals.cflag) { 1316 result = (argc == 0) ? (check("-")) : (check(*argv)); 1317 } else if (sort_opts_vals.mflag) { 1318 struct file_list fl; 1319 1320 file_list_init(&fl, false); 1321 file_list_populate(&fl, argc, argv, true); 1322 merge_files(&fl, outfile); 1323 file_list_clean(&fl); 1324 } 1325 1326 if (real_outfile) { 1327 unlink(real_outfile); 1328 if (rename(outfile, real_outfile) < 0) 1329 err(2, NULL); 1330 sort_free(real_outfile); 1331 } 1332 1333 sort_free(outfile); 1334 1335 return (result); 1336 } 1337