1 /*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/stat.h> 32 #include <sys/sysctl.h> 33 #include <sys/types.h> 34 35 #include <err.h> 36 #include <errno.h> 37 #include <getopt.h> 38 #include <limits.h> 39 #include <locale.h> 40 #include <md5.h> 41 #include <regex.h> 42 #include <signal.h> 43 #include <stdbool.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <wchar.h> 49 #include <wctype.h> 50 51 #include "coll.h" 52 #include "file.h" 53 #include "sort.h" 54 55 #ifndef WITHOUT_NLS 56 #include <nl_types.h> 57 nl_catd catalog; 58 #endif 59 60 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz" 61 62 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random") 63 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024) 64 65 static bool need_random; 66 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE; 67 static const void *random_seed; 68 static size_t random_seed_size; 69 70 MD5_CTX md5_ctx; 71 72 /* 73 * Default messages to use when NLS is disabled or no catalogue 74 * is found. 75 */ 76 const char *nlsstr[] = { "", 77 /* 1*/"mutually exclusive flags", 78 /* 2*/"extra argument not allowed with -c", 79 /* 3*/"Unknown feature", 80 /* 4*/"Wrong memory buffer specification", 81 /* 5*/"0 field in key specs", 82 /* 6*/"0 column in key specs", 83 /* 7*/"Wrong file mode", 84 /* 8*/"Cannot open file for reading", 85 /* 9*/"Radix sort cannot be used with these sort options", 86 /*10*/"The chosen sort method cannot be used with stable and/or unique sort", 87 /*11*/"Invalid key position", 88 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] " 89 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " 90 "[-o outfile] [--batch-size size] [--files0-from file] " 91 "[--heapsort] [--mergesort] [--radixsort] [--qsort] " 92 "[--mmap] " 93 #if defined(SORT_THREADS) 94 "[--parallel thread_no] " 95 #endif 96 "[--human-numeric-sort] " 97 "[--version-sort] [--random-sort [--random-source file]] " 98 "[--compress-program program] [file ...]\n" }; 99 100 struct sort_opts sort_opts_vals; 101 102 bool debug_sort; 103 bool need_hint; 104 105 #if defined(SORT_THREADS) 106 unsigned int ncpu = 1; 107 size_t nthreads = 1; 108 #endif 109 110 static bool gnusort_numeric_compatibility; 111 112 static struct sort_mods default_sort_mods_object; 113 struct sort_mods * const default_sort_mods = &default_sort_mods_object; 114 115 static bool print_symbols_on_debug; 116 117 /* 118 * Arguments from file (when file0-from option is used: 119 */ 120 static size_t argc_from_file0 = (size_t)-1; 121 static char **argv_from_file0; 122 123 /* 124 * Placeholder symbols for options which have no single-character equivalent 125 */ 126 enum 127 { 128 SORT_OPT = CHAR_MAX + 1, 129 HELP_OPT, 130 FF_OPT, 131 BS_OPT, 132 VERSION_OPT, 133 DEBUG_OPT, 134 #if defined(SORT_THREADS) 135 PARALLEL_OPT, 136 #endif 137 RANDOMSOURCE_OPT, 138 COMPRESSPROGRAM_OPT, 139 QSORT_OPT, 140 MERGESORT_OPT, 141 HEAPSORT_OPT, 142 RADIXSORT_OPT, 143 MMAP_OPT 144 }; 145 146 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 147 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; 148 149 static struct option long_options[] = { 150 { "batch-size", required_argument, NULL, BS_OPT }, 151 { "buffer-size", required_argument, NULL, 'S' }, 152 { "check", optional_argument, NULL, 'c' }, 153 { "check=silent|quiet", optional_argument, NULL, 'C' }, 154 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, 155 { "debug", no_argument, NULL, DEBUG_OPT }, 156 { "dictionary-order", no_argument, NULL, 'd' }, 157 { "field-separator", required_argument, NULL, 't' }, 158 { "files0-from", required_argument, NULL, FF_OPT }, 159 { "general-numeric-sort", no_argument, NULL, 'g' }, 160 { "heapsort", no_argument, NULL, HEAPSORT_OPT }, 161 { "help",no_argument, NULL, HELP_OPT }, 162 { "human-numeric-sort", no_argument, NULL, 'h' }, 163 { "ignore-leading-blanks", no_argument, NULL, 'b' }, 164 { "ignore-case", no_argument, NULL, 'f' }, 165 { "ignore-nonprinting", no_argument, NULL, 'i' }, 166 { "key", required_argument, NULL, 'k' }, 167 { "merge", no_argument, NULL, 'm' }, 168 { "mergesort", no_argument, NULL, MERGESORT_OPT }, 169 { "mmap", no_argument, NULL, MMAP_OPT }, 170 { "month-sort", no_argument, NULL, 'M' }, 171 { "numeric-sort", no_argument, NULL, 'n' }, 172 { "output", required_argument, NULL, 'o' }, 173 #if defined(SORT_THREADS) 174 { "parallel", required_argument, NULL, PARALLEL_OPT }, 175 #endif 176 { "qsort", no_argument, NULL, QSORT_OPT }, 177 { "radixsort", no_argument, NULL, RADIXSORT_OPT }, 178 { "random-sort", no_argument, NULL, 'R' }, 179 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, 180 { "reverse", no_argument, NULL, 'r' }, 181 { "sort", required_argument, NULL, SORT_OPT }, 182 { "stable", no_argument, NULL, 's' }, 183 { "temporary-directory",required_argument, NULL, 'T' }, 184 { "unique", no_argument, NULL, 'u' }, 185 { "version", no_argument, NULL, VERSION_OPT }, 186 { "version-sort",no_argument, NULL, 'V' }, 187 { "zero-terminated", no_argument, NULL, 'z' }, 188 { NULL, no_argument, NULL, 0 } 189 }; 190 191 void fix_obsolete_keys(int *argc, char **argv); 192 193 /* 194 * Check where sort modifier is present 195 */ 196 static bool 197 sort_modifier_empty(struct sort_mods *sm) 198 { 199 200 if (sm == NULL) 201 return (true); 202 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || 203 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag)); 204 } 205 206 /* 207 * Print out usage text. 208 */ 209 static void 210 usage(bool opt_err) 211 { 212 struct option *o; 213 FILE *out; 214 215 out = stdout; 216 o = &(long_options[0]); 217 218 if (opt_err) 219 out = stderr; 220 fprintf(out, getstr(12), getprogname()); 221 if (opt_err) 222 exit(2); 223 exit(0); 224 } 225 226 /* 227 * Read input file names from a file (file0-from option). 228 */ 229 static void 230 read_fns_from_file0(const char *fn) 231 { 232 FILE *f; 233 char *line = NULL; 234 size_t linesize = 0; 235 ssize_t linelen; 236 237 if (fn == NULL) 238 return; 239 240 f = fopen(fn, "r"); 241 if (f == NULL) 242 err(2, "%s", fn); 243 244 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) { 245 if (*line != '\0') { 246 if (argc_from_file0 == (size_t) - 1) 247 argc_from_file0 = 0; 248 ++argc_from_file0; 249 argv_from_file0 = sort_realloc(argv_from_file0, 250 argc_from_file0 * sizeof(char *)); 251 if (argv_from_file0 == NULL) 252 err(2, NULL); 253 argv_from_file0[argc_from_file0 - 1] = line; 254 } else { 255 free(line); 256 } 257 line = NULL; 258 linesize = 0; 259 } 260 if (ferror(f)) 261 err(2, "%s: getdelim", fn); 262 263 closefile(f, fn); 264 } 265 266 /* 267 * Check how much RAM is available for the sort. 268 */ 269 static void 270 set_hw_params(void) 271 { 272 long pages, psize; 273 274 pages = psize = 0; 275 276 #if defined(SORT_THREADS) 277 ncpu = 1; 278 #endif 279 280 pages = sysconf(_SC_PHYS_PAGES); 281 if (pages < 1) { 282 perror("sysconf pages"); 283 psize = 1; 284 } 285 psize = sysconf(_SC_PAGESIZE); 286 if (psize < 1) { 287 perror("sysconf psize"); 288 psize = 4096; 289 } 290 #if defined(SORT_THREADS) 291 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN); 292 if (ncpu < 1) 293 ncpu = 1; 294 else if(ncpu > 32) 295 ncpu = 32; 296 297 nthreads = ncpu; 298 #endif 299 300 free_memory = (unsigned long long) pages * (unsigned long long) psize; 301 available_free_memory = free_memory / 2; 302 303 if (available_free_memory < 1024) 304 available_free_memory = 1024; 305 } 306 307 /* 308 * Convert "plain" symbol to wide symbol, with default value. 309 */ 310 static void 311 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) 312 { 313 314 if (wc && c) { 315 int res; 316 317 res = mbtowc(wc, c, MB_CUR_MAX); 318 if (res < 1) 319 *wc = def; 320 } 321 } 322 323 /* 324 * Set current locale symbols. 325 */ 326 static void 327 set_locale(void) 328 { 329 struct lconv *lc; 330 const char *locale; 331 332 setlocale(LC_ALL, ""); 333 334 lc = localeconv(); 335 336 if (lc) { 337 /* obtain LC_NUMERIC info */ 338 /* Convert to wide char form */ 339 conv_mbtowc(&symbol_decimal_point, lc->decimal_point, 340 symbol_decimal_point); 341 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, 342 symbol_thousands_sep); 343 conv_mbtowc(&symbol_positive_sign, lc->positive_sign, 344 symbol_positive_sign); 345 conv_mbtowc(&symbol_negative_sign, lc->negative_sign, 346 symbol_negative_sign); 347 } 348 349 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) 350 gnusort_numeric_compatibility = true; 351 352 locale = setlocale(LC_COLLATE, NULL); 353 354 if (locale) { 355 char *tmpl; 356 const char *cclocale; 357 358 tmpl = sort_strdup(locale); 359 cclocale = setlocale(LC_COLLATE, "C"); 360 if (cclocale && !strcmp(cclocale, tmpl)) 361 byte_sort = true; 362 else { 363 const char *pclocale; 364 365 pclocale = setlocale(LC_COLLATE, "POSIX"); 366 if (pclocale && !strcmp(pclocale, tmpl)) 367 byte_sort = true; 368 } 369 setlocale(LC_COLLATE, tmpl); 370 sort_free(tmpl); 371 } 372 } 373 374 /* 375 * Set directory temporary files. 376 */ 377 static void 378 set_tmpdir(void) 379 { 380 char *td; 381 382 td = getenv("TMPDIR"); 383 if (td != NULL) 384 tmpdir = sort_strdup(td); 385 } 386 387 /* 388 * Parse -S option. 389 */ 390 static unsigned long long 391 parse_memory_buffer_value(const char *value) 392 { 393 394 if (value == NULL) 395 return (available_free_memory); 396 else { 397 char *endptr; 398 unsigned long long membuf; 399 400 endptr = NULL; 401 errno = 0; 402 membuf = strtoll(value, &endptr, 10); 403 404 if (errno != 0) { 405 warn("%s",getstr(4)); 406 membuf = available_free_memory; 407 } else { 408 switch (*endptr){ 409 case 'Y': 410 membuf *= 1024; 411 /* FALLTHROUGH */ 412 case 'Z': 413 membuf *= 1024; 414 /* FALLTHROUGH */ 415 case 'E': 416 membuf *= 1024; 417 /* FALLTHROUGH */ 418 case 'P': 419 membuf *= 1024; 420 /* FALLTHROUGH */ 421 case 'T': 422 membuf *= 1024; 423 /* FALLTHROUGH */ 424 case 'G': 425 membuf *= 1024; 426 /* FALLTHROUGH */ 427 case 'M': 428 membuf *= 1024; 429 /* FALLTHROUGH */ 430 case '\0': 431 case 'K': 432 membuf *= 1024; 433 /* FALLTHROUGH */ 434 case 'b': 435 break; 436 case '%': 437 membuf = (available_free_memory * membuf) / 438 100; 439 break; 440 default: 441 warnc(EINVAL, "%s", optarg); 442 membuf = available_free_memory; 443 } 444 } 445 return (membuf); 446 } 447 } 448 449 /* 450 * Signal handler that clears the temporary files. 451 */ 452 static void 453 sig_handler(int sig __unused, siginfo_t *siginfo __unused, 454 void *context __unused) 455 { 456 457 clear_tmp_files(); 458 exit(-1); 459 } 460 461 /* 462 * Set signal handler on panic signals. 463 */ 464 static void 465 set_signal_handler(void) 466 { 467 struct sigaction sa; 468 469 memset(&sa, 0, sizeof(sa)); 470 sa.sa_sigaction = &sig_handler; 471 sa.sa_flags = SA_SIGINFO; 472 473 if (sigaction(SIGTERM, &sa, NULL) < 0) { 474 perror("sigaction"); 475 return; 476 } 477 if (sigaction(SIGHUP, &sa, NULL) < 0) { 478 perror("sigaction"); 479 return; 480 } 481 if (sigaction(SIGINT, &sa, NULL) < 0) { 482 perror("sigaction"); 483 return; 484 } 485 if (sigaction(SIGQUIT, &sa, NULL) < 0) { 486 perror("sigaction"); 487 return; 488 } 489 if (sigaction(SIGABRT, &sa, NULL) < 0) { 490 perror("sigaction"); 491 return; 492 } 493 if (sigaction(SIGBUS, &sa, NULL) < 0) { 494 perror("sigaction"); 495 return; 496 } 497 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 498 perror("sigaction"); 499 return; 500 } 501 if (sigaction(SIGUSR1, &sa, NULL) < 0) { 502 perror("sigaction"); 503 return; 504 } 505 if (sigaction(SIGUSR2, &sa, NULL) < 0) { 506 perror("sigaction"); 507 return; 508 } 509 } 510 511 /* 512 * Print "unknown" message and exit with status 2. 513 */ 514 static void 515 unknown(const char *what) 516 { 517 518 errx(2, "%s: %s", getstr(3), what); 519 } 520 521 /* 522 * Check whether contradictory input options are used. 523 */ 524 static void 525 check_mutually_exclusive_flags(char c, bool *mef_flags) 526 { 527 int fo_index, mec; 528 bool found_others, found_this; 529 530 found_others = found_this = false; 531 fo_index = 0; 532 533 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { 534 mec = mutually_exclusive_flags[i]; 535 536 if (mec != c) { 537 if (mef_flags[i]) { 538 if (found_this) 539 errx(1, "%c:%c: %s", c, mec, getstr(1)); 540 found_others = true; 541 fo_index = i; 542 } 543 } else { 544 if (found_others) 545 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1)); 546 mef_flags[i] = true; 547 found_this = true; 548 } 549 } 550 } 551 552 /* 553 * Initialise sort opts data. 554 */ 555 static void 556 set_sort_opts(void) 557 { 558 559 memset(&default_sort_mods_object, 0, 560 sizeof(default_sort_mods_object)); 561 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); 562 default_sort_mods_object.func = 563 get_sort_func(&default_sort_mods_object); 564 } 565 566 /* 567 * Set a sort modifier on a sort modifiers object. 568 */ 569 static bool 570 set_sort_modifier(struct sort_mods *sm, int c) 571 { 572 573 if (sm) { 574 switch (c){ 575 case 'b': 576 sm->bflag = true; 577 break; 578 case 'd': 579 sm->dflag = true; 580 break; 581 case 'f': 582 sm->fflag = true; 583 break; 584 case 'g': 585 sm->gflag = true; 586 need_hint = true; 587 break; 588 case 'i': 589 sm->iflag = true; 590 break; 591 case 'R': 592 sm->Rflag = true; 593 need_random = true; 594 break; 595 case 'M': 596 initialise_months(); 597 sm->Mflag = true; 598 need_hint = true; 599 break; 600 case 'n': 601 sm->nflag = true; 602 need_hint = true; 603 print_symbols_on_debug = true; 604 break; 605 case 'r': 606 sm->rflag = true; 607 break; 608 case 'V': 609 sm->Vflag = true; 610 break; 611 case 'h': 612 sm->hflag = true; 613 need_hint = true; 614 print_symbols_on_debug = true; 615 break; 616 default: 617 return false; 618 } 619 sort_opts_vals.complex_sort = true; 620 sm->func = get_sort_func(sm); 621 } 622 return (true); 623 } 624 625 /* 626 * Parse POS in -k option. 627 */ 628 static int 629 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) 630 { 631 regmatch_t pmatch[4]; 632 regex_t re; 633 char *c, *f; 634 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; 635 size_t len, nmatch; 636 int ret; 637 638 ret = -1; 639 nmatch = 4; 640 c = f = NULL; 641 642 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 643 return (-1); 644 645 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 646 goto end; 647 648 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 649 goto end; 650 651 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 652 goto end; 653 654 len = pmatch[1].rm_eo - pmatch[1].rm_so; 655 f = sort_malloc((len + 1) * sizeof(char)); 656 657 strncpy(f, s + pmatch[1].rm_so, len); 658 f[len] = '\0'; 659 660 if (second) { 661 errno = 0; 662 ks->f2 = (size_t) strtoul(f, NULL, 10); 663 if (errno != 0) 664 err(2, "-k"); 665 if (ks->f2 == 0) { 666 warn("%s",getstr(5)); 667 goto end; 668 } 669 } else { 670 errno = 0; 671 ks->f1 = (size_t) strtoul(f, NULL, 10); 672 if (errno != 0) 673 err(2, "-k"); 674 if (ks->f1 == 0) { 675 warn("%s",getstr(5)); 676 goto end; 677 } 678 } 679 680 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 681 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 682 c = sort_malloc((len + 1) * sizeof(char)); 683 684 strncpy(c, s + pmatch[2].rm_so + 1, len); 685 c[len] = '\0'; 686 687 if (second) { 688 errno = 0; 689 ks->c2 = (size_t) strtoul(c, NULL, 10); 690 if (errno != 0) 691 err(2, "-k"); 692 } else { 693 errno = 0; 694 ks->c1 = (size_t) strtoul(c, NULL, 10); 695 if (errno != 0) 696 err(2, "-k"); 697 if (ks->c1 == 0) { 698 warn("%s",getstr(6)); 699 goto end; 700 } 701 } 702 } else { 703 if (second) 704 ks->c2 = 0; 705 else 706 ks->c1 = 1; 707 } 708 709 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 710 regoff_t i = 0; 711 712 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { 713 check_mutually_exclusive_flags(s[i], mef_flags); 714 if (s[i] == 'b') { 715 if (second) 716 ks->pos2b = true; 717 else 718 ks->pos1b = true; 719 } else if (!set_sort_modifier(&(ks->sm), s[i])) 720 goto end; 721 } 722 } 723 724 ret = 0; 725 726 end: 727 728 if (c) 729 sort_free(c); 730 if (f) 731 sort_free(f); 732 regfree(&re); 733 734 return (ret); 735 } 736 737 /* 738 * Parse -k option value. 739 */ 740 static int 741 parse_k(const char *s, struct key_specs *ks) 742 { 743 int ret = -1; 744 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 745 { false, false, false, false, false, false }; 746 747 if (s && *s) { 748 char *sptr; 749 750 sptr = strchr(s, ','); 751 if (sptr) { 752 size_t size1; 753 char *pos1, *pos2; 754 755 size1 = sptr - s; 756 757 if (size1 < 1) 758 return (-1); 759 pos1 = sort_malloc((size1 + 1) * sizeof(char)); 760 761 strncpy(pos1, s, size1); 762 pos1[size1] = '\0'; 763 764 ret = parse_pos(pos1, ks, mef_flags, false); 765 766 sort_free(pos1); 767 if (ret < 0) 768 return (ret); 769 770 pos2 = sort_strdup(sptr + 1); 771 ret = parse_pos(pos2, ks, mef_flags, true); 772 sort_free(pos2); 773 } else 774 ret = parse_pos(s, ks, mef_flags, false); 775 } 776 777 return (ret); 778 } 779 780 /* 781 * Parse POS in +POS -POS option. 782 */ 783 static int 784 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts) 785 { 786 regex_t re; 787 regmatch_t pmatch[4]; 788 char *c, *f; 789 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; 790 int ret; 791 size_t len, nmatch; 792 793 ret = -1; 794 nmatch = 4; 795 c = f = NULL; 796 *nc = *nf = 0; 797 798 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 799 return (-1); 800 801 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 802 goto end; 803 804 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 805 goto end; 806 807 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 808 goto end; 809 810 len = pmatch[1].rm_eo - pmatch[1].rm_so; 811 f = sort_malloc((len + 1) * sizeof(char)); 812 813 strncpy(f, s + pmatch[1].rm_so, len); 814 f[len] = '\0'; 815 816 errno = 0; 817 *nf = (size_t) strtoul(f, NULL, 10); 818 if (errno != 0) 819 errx(2, "%s", getstr(11)); 820 821 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 822 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 823 c = sort_malloc((len + 1) * sizeof(char)); 824 825 strncpy(c, s + pmatch[2].rm_so + 1, len); 826 c[len] = '\0'; 827 828 errno = 0; 829 *nc = (size_t) strtoul(c, NULL, 10); 830 if (errno != 0) 831 errx(2, "%s", getstr(11)); 832 } 833 834 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 835 836 len = pmatch[3].rm_eo - pmatch[3].rm_so; 837 838 strncpy(sopts, s + pmatch[3].rm_so, len); 839 sopts[len] = '\0'; 840 } 841 842 ret = 0; 843 844 end: 845 if (c) 846 sort_free(c); 847 if (f) 848 sort_free(f); 849 regfree(&re); 850 851 return (ret); 852 } 853 854 /* 855 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax 856 */ 857 void 858 fix_obsolete_keys(int *argc, char **argv) 859 { 860 char sopt[129]; 861 862 for (int i = 1; i < *argc; i++) { 863 char *arg1; 864 865 arg1 = argv[i]; 866 867 if (strlen(arg1) > 1 && arg1[0] == '+') { 868 int c1, f1; 869 char sopts1[128]; 870 871 sopts1[0] = 0; 872 c1 = f1 = 0; 873 874 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0) 875 continue; 876 else { 877 f1 += 1; 878 c1 += 1; 879 if (i + 1 < *argc) { 880 char *arg2 = argv[i + 1]; 881 882 if (strlen(arg2) > 1 && 883 arg2[0] == '-') { 884 int c2, f2; 885 char sopts2[128]; 886 887 sopts2[0] = 0; 888 c2 = f2 = 0; 889 890 if (parse_pos_obs(arg2 + 1, 891 &f2, &c2, sopts2) >= 0) { 892 if (c2 > 0) 893 f2 += 1; 894 sprintf(sopt, "-k%d.%d%s,%d.%d%s", 895 f1, c1, sopts1, f2, c2, sopts2); 896 argv[i] = sort_strdup(sopt); 897 for (int j = i + 1; j + 1 < *argc; j++) 898 argv[j] = argv[j + 1]; 899 *argc -= 1; 900 continue; 901 } 902 } 903 } 904 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1); 905 argv[i] = sort_strdup(sopt); 906 } 907 } 908 } 909 } 910 911 /* 912 * Set random seed 913 */ 914 static void 915 set_random_seed(void) 916 { 917 if (need_random) { 918 919 if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) { 920 FILE* fseed; 921 MD5_CTX ctx; 922 char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE]; 923 size_t sz = 0; 924 925 fseed = openfile(random_source, "r"); 926 while (!feof(fseed)) { 927 int cr; 928 929 cr = fgetc(fseed); 930 if (cr == EOF) 931 break; 932 933 rsd[sz++] = (char) cr; 934 935 if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE) 936 break; 937 } 938 939 closefile(fseed, random_source); 940 941 MD5Init(&ctx); 942 MD5Update(&ctx, rsd, sz); 943 944 random_seed = MD5End(&ctx, NULL); 945 random_seed_size = strlen(random_seed); 946 947 } else { 948 MD5_CTX ctx; 949 char *b; 950 951 MD5Init(&ctx); 952 b = MD5File(random_source, NULL); 953 if (b == NULL) 954 err(2, NULL); 955 956 random_seed = b; 957 random_seed_size = strlen(b); 958 } 959 960 MD5Init(&md5_ctx); 961 if(random_seed_size>0) { 962 MD5Update(&md5_ctx, random_seed, random_seed_size); 963 } 964 } 965 } 966 967 /* 968 * Main function. 969 */ 970 int 971 main(int argc, char **argv) 972 { 973 char *outfile, *real_outfile; 974 int c, result; 975 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 976 { false, false, false, false, false, false }; 977 978 result = 0; 979 outfile = sort_strdup("-"); 980 real_outfile = NULL; 981 982 struct sort_mods *sm = &default_sort_mods_object; 983 984 init_tmp_files(); 985 986 set_signal_handler(); 987 988 set_hw_params(); 989 set_locale(); 990 set_tmpdir(); 991 set_sort_opts(); 992 993 fix_obsolete_keys(&argc, argv); 994 995 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) 996 != -1)) { 997 998 check_mutually_exclusive_flags(c, mef_flags); 999 1000 if (!set_sort_modifier(sm, c)) { 1001 1002 switch (c) { 1003 case 'c': 1004 sort_opts_vals.cflag = true; 1005 if (optarg) { 1006 if (!strcmp(optarg, "diagnose-first")) 1007 ; 1008 else if (!strcmp(optarg, "silent") || 1009 !strcmp(optarg, "quiet")) 1010 sort_opts_vals.csilentflag = true; 1011 else if (*optarg) 1012 unknown(optarg); 1013 } 1014 break; 1015 case 'C': 1016 sort_opts_vals.cflag = true; 1017 sort_opts_vals.csilentflag = true; 1018 break; 1019 case 'k': 1020 { 1021 sort_opts_vals.complex_sort = true; 1022 sort_opts_vals.kflag = true; 1023 1024 keys_num++; 1025 keys = sort_realloc(keys, keys_num * 1026 sizeof(struct key_specs)); 1027 memset(&(keys[keys_num - 1]), 0, 1028 sizeof(struct key_specs)); 1029 1030 if (parse_k(optarg, &(keys[keys_num - 1])) 1031 < 0) { 1032 errc(2, EINVAL, "-k %s", optarg); 1033 } 1034 1035 break; 1036 } 1037 case 'm': 1038 sort_opts_vals.mflag = true; 1039 break; 1040 case 'o': 1041 outfile = sort_realloc(outfile, (strlen(optarg) + 1)); 1042 strcpy(outfile, optarg); 1043 break; 1044 case 's': 1045 sort_opts_vals.sflag = true; 1046 break; 1047 case 'S': 1048 available_free_memory = 1049 parse_memory_buffer_value(optarg); 1050 break; 1051 case 'T': 1052 tmpdir = sort_strdup(optarg); 1053 break; 1054 case 't': 1055 while (strlen(optarg) > 1) { 1056 if (optarg[0] != '\\') { 1057 errc(2, EINVAL, "%s", optarg); 1058 } 1059 optarg += 1; 1060 if (*optarg == '0') { 1061 *optarg = 0; 1062 break; 1063 } 1064 } 1065 sort_opts_vals.tflag = true; 1066 sort_opts_vals.field_sep = btowc(optarg[0]); 1067 if (sort_opts_vals.field_sep == WEOF) { 1068 errno = EINVAL; 1069 err(2, NULL); 1070 } 1071 if (!gnusort_numeric_compatibility) { 1072 if (symbol_decimal_point == sort_opts_vals.field_sep) 1073 symbol_decimal_point = WEOF; 1074 if (symbol_thousands_sep == sort_opts_vals.field_sep) 1075 symbol_thousands_sep = WEOF; 1076 if (symbol_negative_sign == sort_opts_vals.field_sep) 1077 symbol_negative_sign = WEOF; 1078 if (symbol_positive_sign == sort_opts_vals.field_sep) 1079 symbol_positive_sign = WEOF; 1080 } 1081 break; 1082 case 'u': 1083 sort_opts_vals.uflag = true; 1084 /* stable sort for the correct unique val */ 1085 sort_opts_vals.sflag = true; 1086 break; 1087 case 'z': 1088 sort_opts_vals.zflag = true; 1089 break; 1090 case SORT_OPT: 1091 if (optarg) { 1092 if (!strcmp(optarg, "general-numeric")) 1093 set_sort_modifier(sm, 'g'); 1094 else if (!strcmp(optarg, "human-numeric")) 1095 set_sort_modifier(sm, 'h'); 1096 else if (!strcmp(optarg, "numeric")) 1097 set_sort_modifier(sm, 'n'); 1098 else if (!strcmp(optarg, "month")) 1099 set_sort_modifier(sm, 'M'); 1100 else if (!strcmp(optarg, "random")) 1101 set_sort_modifier(sm, 'R'); 1102 else 1103 unknown(optarg); 1104 } 1105 break; 1106 #if defined(SORT_THREADS) 1107 case PARALLEL_OPT: 1108 nthreads = (size_t)(atoi(optarg)); 1109 if (nthreads < 1) 1110 nthreads = 1; 1111 if (nthreads > 1024) 1112 nthreads = 1024; 1113 break; 1114 #endif 1115 case QSORT_OPT: 1116 sort_opts_vals.sort_method = SORT_QSORT; 1117 break; 1118 case MERGESORT_OPT: 1119 sort_opts_vals.sort_method = SORT_MERGESORT; 1120 break; 1121 case MMAP_OPT: 1122 use_mmap = true; 1123 break; 1124 case HEAPSORT_OPT: 1125 sort_opts_vals.sort_method = SORT_HEAPSORT; 1126 break; 1127 case RADIXSORT_OPT: 1128 sort_opts_vals.sort_method = SORT_RADIXSORT; 1129 break; 1130 case RANDOMSOURCE_OPT: 1131 random_source = strdup(optarg); 1132 break; 1133 case COMPRESSPROGRAM_OPT: 1134 compress_program = strdup(optarg); 1135 break; 1136 case FF_OPT: 1137 read_fns_from_file0(optarg); 1138 break; 1139 case BS_OPT: 1140 { 1141 errno = 0; 1142 long mof = strtol(optarg, NULL, 10); 1143 if (errno != 0) 1144 err(2, "--batch-size"); 1145 if (mof >= 2) 1146 max_open_files = (size_t) mof + 1; 1147 } 1148 break; 1149 case VERSION_OPT: 1150 printf("%s\n", VERSION); 1151 exit(EXIT_SUCCESS); 1152 /* NOTREACHED */ 1153 break; 1154 case DEBUG_OPT: 1155 debug_sort = true; 1156 break; 1157 case HELP_OPT: 1158 usage(false); 1159 /* NOTREACHED */ 1160 break; 1161 default: 1162 usage(true); 1163 /* NOTREACHED */ 1164 } 1165 } 1166 } 1167 1168 argc -= optind; 1169 argv += optind; 1170 1171 #ifndef WITHOUT_NLS 1172 catalog = catopen("sort", NL_CAT_LOCALE); 1173 #endif 1174 1175 if (sort_opts_vals.cflag && sort_opts_vals.mflag) 1176 errx(1, "%c:%c: %s", 'm', 'c', getstr(1)); 1177 1178 #ifndef WITHOUT_NLS 1179 catclose(catalog); 1180 #endif 1181 1182 if (keys_num == 0) { 1183 keys_num = 1; 1184 keys = sort_realloc(keys, sizeof(struct key_specs)); 1185 memset(&(keys[0]), 0, sizeof(struct key_specs)); 1186 keys[0].c1 = 1; 1187 keys[0].pos1b = default_sort_mods->bflag; 1188 keys[0].pos2b = default_sort_mods->bflag; 1189 memcpy(&(keys[0].sm), default_sort_mods, 1190 sizeof(struct sort_mods)); 1191 } 1192 1193 for (size_t i = 0; i < keys_num; i++) { 1194 struct key_specs *ks; 1195 1196 ks = &(keys[i]); 1197 1198 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && 1199 !(ks->pos2b)) { 1200 ks->pos1b = sm->bflag; 1201 ks->pos2b = sm->bflag; 1202 memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); 1203 } 1204 1205 ks->sm.func = get_sort_func(&(ks->sm)); 1206 } 1207 1208 if (argv_from_file0) { 1209 argc = argc_from_file0; 1210 argv = argv_from_file0; 1211 } 1212 1213 if (debug_sort) { 1214 printf("Memory to be used for sorting: %llu\n",available_free_memory); 1215 #if defined(SORT_THREADS) 1216 printf("Number of CPUs: %d\n",(int)ncpu); 1217 nthreads = 1; 1218 #endif 1219 printf("Using collate rules of %s locale\n", 1220 setlocale(LC_COLLATE, NULL)); 1221 if (byte_sort) 1222 printf("Byte sort is used\n"); 1223 if (print_symbols_on_debug) { 1224 printf("Decimal Point: <%lc>\n", symbol_decimal_point); 1225 if (symbol_thousands_sep) 1226 printf("Thousands separator: <%lc>\n", 1227 symbol_thousands_sep); 1228 printf("Positive sign: <%lc>\n", symbol_positive_sign); 1229 printf("Negative sign: <%lc>\n", symbol_negative_sign); 1230 } 1231 } 1232 1233 set_random_seed(); 1234 1235 /* Case when the outfile equals one of the input files: */ 1236 if (strcmp(outfile, "-")) { 1237 1238 for(int i = 0; i < argc; ++i) { 1239 if (strcmp(argv[i], outfile) == 0) { 1240 real_outfile = sort_strdup(outfile); 1241 for(;;) { 1242 char* tmp = sort_malloc(strlen(outfile) + 1243 strlen(".tmp") + 1); 1244 1245 strcpy(tmp, outfile); 1246 strcpy(tmp + strlen(tmp), ".tmp"); 1247 sort_free(outfile); 1248 outfile = tmp; 1249 if (access(outfile, F_OK) < 0) 1250 break; 1251 } 1252 tmp_file_atexit(outfile); 1253 } 1254 } 1255 } 1256 1257 #if defined(SORT_THREADS) 1258 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) 1259 nthreads = 1; 1260 #endif 1261 1262 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { 1263 struct file_list fl; 1264 struct sort_list list; 1265 1266 sort_list_init(&list); 1267 file_list_init(&fl, true); 1268 1269 if (argc < 1) 1270 procfile("-", &list, &fl); 1271 else { 1272 while (argc > 0) { 1273 procfile(*argv, &list, &fl); 1274 --argc; 1275 ++argv; 1276 } 1277 } 1278 1279 if (fl.count < 1) 1280 sort_list_to_file(&list, outfile); 1281 else { 1282 if (list.count > 0) { 1283 char *flast = new_tmp_file_name(); 1284 1285 sort_list_to_file(&list, flast); 1286 file_list_add(&fl, flast, false); 1287 } 1288 merge_files(&fl, outfile); 1289 } 1290 1291 file_list_clean(&fl); 1292 1293 /* 1294 * We are about to exit the program, so we can ignore 1295 * the clean-up for speed 1296 * 1297 * sort_list_clean(&list); 1298 */ 1299 1300 } else if (sort_opts_vals.cflag) { 1301 result = (argc == 0) ? (check("-")) : (check(*argv)); 1302 } else if (sort_opts_vals.mflag) { 1303 struct file_list fl; 1304 1305 file_list_init(&fl, false); 1306 file_list_populate(&fl, argc, argv, true); 1307 merge_files(&fl, outfile); 1308 file_list_clean(&fl); 1309 } 1310 1311 if (real_outfile) { 1312 unlink(real_outfile); 1313 if (rename(outfile, real_outfile) < 0) 1314 err(2, NULL); 1315 sort_free(real_outfile); 1316 } 1317 1318 sort_free(outfile); 1319 1320 return (result); 1321 } 1322