1 /*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/stat.h> 32 #include <sys/sysctl.h> 33 #include <sys/types.h> 34 35 #include <err.h> 36 #include <errno.h> 37 #include <getopt.h> 38 #include <limits.h> 39 #include <locale.h> 40 #include <md5.h> 41 #include <regex.h> 42 #include <signal.h> 43 #include <stdbool.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <wchar.h> 49 #include <wctype.h> 50 51 #include "coll.h" 52 #include "file.h" 53 #include "sort.h" 54 55 #ifndef WITHOUT_NLS 56 #include <nl_types.h> 57 nl_catd catalog; 58 #endif 59 60 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz" 61 62 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random") 63 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024) 64 65 static bool need_random; 66 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE; 67 static const void *random_seed; 68 static size_t random_seed_size; 69 70 MD5_CTX md5_ctx; 71 72 /* 73 * Default messages to use when NLS is disabled or no catalogue 74 * is found. 75 */ 76 const char *nlsstr[] = { "", 77 /* 1*/"mutually exclusive flags", 78 /* 2*/"extra argument not allowed with -c", 79 /* 3*/"Unknown feature", 80 /* 4*/"Wrong memory buffer specification", 81 /* 5*/"0 field in key specs", 82 /* 6*/"0 column in key specs", 83 /* 7*/"Wrong file mode", 84 /* 8*/"Cannot open file for reading", 85 /* 9*/"Radix sort cannot be used with these sort options", 86 /*10*/"The chosen sort method cannot be used with stable and/or unique sort", 87 /*11*/"Invalid key position", 88 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] " 89 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " 90 "[-o outfile] [--batch-size size] [--files0-from file] " 91 "[--heapsort] [--mergesort] [--radixsort] [--qsort] " 92 "[--mmap] " 93 #if defined(SORT_THREADS) 94 "[--parallel thread_no] " 95 #endif 96 "[--human-numeric-sort] " 97 "[--version-sort] [--random-sort [--random-source file]] " 98 "[--compress-program program] [file ...]\n" }; 99 100 struct sort_opts sort_opts_vals; 101 102 bool debug_sort; 103 bool need_hint; 104 105 #if defined(SORT_THREADS) 106 unsigned int ncpu = 1; 107 size_t nthreads = 1; 108 #endif 109 110 static bool gnusort_numeric_compatibility; 111 112 static struct sort_mods default_sort_mods_object; 113 struct sort_mods * const default_sort_mods = &default_sort_mods_object; 114 115 static bool print_symbols_on_debug; 116 117 /* 118 * Arguments from file (when file0-from option is used: 119 */ 120 static size_t argc_from_file0 = (size_t)-1; 121 static char **argv_from_file0; 122 123 /* 124 * Placeholder symbols for options which have no single-character equivalent 125 */ 126 enum 127 { 128 SORT_OPT = CHAR_MAX + 1, 129 HELP_OPT, 130 FF_OPT, 131 BS_OPT, 132 VERSION_OPT, 133 DEBUG_OPT, 134 #if defined(SORT_THREADS) 135 PARALLEL_OPT, 136 #endif 137 RANDOMSOURCE_OPT, 138 COMPRESSPROGRAM_OPT, 139 QSORT_OPT, 140 MERGESORT_OPT, 141 HEAPSORT_OPT, 142 RADIXSORT_OPT, 143 MMAP_OPT 144 }; 145 146 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 147 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; 148 149 static struct option long_options[] = { 150 { "batch-size", required_argument, NULL, BS_OPT }, 151 { "buffer-size", required_argument, NULL, 'S' }, 152 { "check", optional_argument, NULL, 'c' }, 153 { "check=silent|quiet", optional_argument, NULL, 'C' }, 154 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, 155 { "debug", no_argument, NULL, DEBUG_OPT }, 156 { "dictionary-order", no_argument, NULL, 'd' }, 157 { "field-separator", required_argument, NULL, 't' }, 158 { "files0-from", required_argument, NULL, FF_OPT }, 159 { "general-numeric-sort", no_argument, NULL, 'g' }, 160 { "heapsort", no_argument, NULL, HEAPSORT_OPT }, 161 { "help",no_argument, NULL, HELP_OPT }, 162 { "human-numeric-sort", no_argument, NULL, 'h' }, 163 { "ignore-leading-blanks", no_argument, NULL, 'b' }, 164 { "ignore-case", no_argument, NULL, 'f' }, 165 { "ignore-nonprinting", no_argument, NULL, 'i' }, 166 { "key", required_argument, NULL, 'k' }, 167 { "merge", no_argument, NULL, 'm' }, 168 { "mergesort", no_argument, NULL, MERGESORT_OPT }, 169 { "mmap", no_argument, NULL, MMAP_OPT }, 170 { "month-sort", no_argument, NULL, 'M' }, 171 { "numeric-sort", no_argument, NULL, 'n' }, 172 { "output", required_argument, NULL, 'o' }, 173 #if defined(SORT_THREADS) 174 { "parallel", required_argument, NULL, PARALLEL_OPT }, 175 #endif 176 { "qsort", no_argument, NULL, QSORT_OPT }, 177 { "radixsort", no_argument, NULL, RADIXSORT_OPT }, 178 { "random-sort", no_argument, NULL, 'R' }, 179 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, 180 { "reverse", no_argument, NULL, 'r' }, 181 { "sort", required_argument, NULL, SORT_OPT }, 182 { "stable", no_argument, NULL, 's' }, 183 { "temporary-directory",required_argument, NULL, 'T' }, 184 { "unique", no_argument, NULL, 'u' }, 185 { "version", no_argument, NULL, VERSION_OPT }, 186 { "version-sort",no_argument, NULL, 'V' }, 187 { "zero-terminated", no_argument, NULL, 'z' }, 188 { NULL, no_argument, NULL, 0 } 189 }; 190 191 void fix_obsolete_keys(int *argc, char **argv); 192 193 /* 194 * Check where sort modifier is present 195 */ 196 static bool 197 sort_modifier_empty(struct sort_mods *sm) 198 { 199 200 if (sm == NULL) 201 return (true); 202 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || 203 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag)); 204 } 205 206 /* 207 * Print out usage text. 208 */ 209 static void 210 usage(bool opt_err) 211 { 212 FILE *out; 213 214 out = opt_err ? stderr : stdout; 215 216 fprintf(out, getstr(12), getprogname()); 217 if (opt_err) 218 exit(2); 219 exit(0); 220 } 221 222 /* 223 * Read input file names from a file (file0-from option). 224 */ 225 static void 226 read_fns_from_file0(const char *fn) 227 { 228 FILE *f; 229 char *line = NULL; 230 size_t linesize = 0; 231 ssize_t linelen; 232 233 if (fn == NULL) 234 return; 235 236 f = fopen(fn, "r"); 237 if (f == NULL) 238 err(2, "%s", fn); 239 240 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) { 241 if (*line != '\0') { 242 if (argc_from_file0 == (size_t) - 1) 243 argc_from_file0 = 0; 244 ++argc_from_file0; 245 argv_from_file0 = sort_realloc(argv_from_file0, 246 argc_from_file0 * sizeof(char *)); 247 if (argv_from_file0 == NULL) 248 err(2, NULL); 249 argv_from_file0[argc_from_file0 - 1] = line; 250 } else { 251 free(line); 252 } 253 line = NULL; 254 linesize = 0; 255 } 256 if (ferror(f)) 257 err(2, "%s: getdelim", fn); 258 259 closefile(f, fn); 260 } 261 262 /* 263 * Check how much RAM is available for the sort. 264 */ 265 static void 266 set_hw_params(void) 267 { 268 long pages, psize; 269 270 #if defined(SORT_THREADS) 271 ncpu = 1; 272 #endif 273 274 pages = sysconf(_SC_PHYS_PAGES); 275 if (pages < 1) { 276 perror("sysconf pages"); 277 pages = 1; 278 } 279 psize = sysconf(_SC_PAGESIZE); 280 if (psize < 1) { 281 perror("sysconf psize"); 282 psize = 4096; 283 } 284 #if defined(SORT_THREADS) 285 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN); 286 if (ncpu < 1) 287 ncpu = 1; 288 else if(ncpu > 32) 289 ncpu = 32; 290 291 nthreads = ncpu; 292 #endif 293 294 free_memory = (unsigned long long) pages * (unsigned long long) psize; 295 available_free_memory = free_memory / 2; 296 297 if (available_free_memory < 1024) 298 available_free_memory = 1024; 299 } 300 301 /* 302 * Convert "plain" symbol to wide symbol, with default value. 303 */ 304 static void 305 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) 306 { 307 308 if (wc && c) { 309 int res; 310 311 res = mbtowc(wc, c, MB_CUR_MAX); 312 if (res < 1) 313 *wc = def; 314 } 315 } 316 317 /* 318 * Set current locale symbols. 319 */ 320 static void 321 set_locale(void) 322 { 323 struct lconv *lc; 324 const char *locale; 325 326 setlocale(LC_ALL, ""); 327 328 lc = localeconv(); 329 330 if (lc) { 331 /* obtain LC_NUMERIC info */ 332 /* Convert to wide char form */ 333 conv_mbtowc(&symbol_decimal_point, lc->decimal_point, 334 symbol_decimal_point); 335 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, 336 symbol_thousands_sep); 337 conv_mbtowc(&symbol_positive_sign, lc->positive_sign, 338 symbol_positive_sign); 339 conv_mbtowc(&symbol_negative_sign, lc->negative_sign, 340 symbol_negative_sign); 341 } 342 343 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) 344 gnusort_numeric_compatibility = true; 345 346 locale = setlocale(LC_COLLATE, NULL); 347 348 if (locale) { 349 char *tmpl; 350 const char *cclocale; 351 352 tmpl = sort_strdup(locale); 353 cclocale = setlocale(LC_COLLATE, "C"); 354 if (cclocale && !strcmp(cclocale, tmpl)) 355 byte_sort = true; 356 else { 357 const char *pclocale; 358 359 pclocale = setlocale(LC_COLLATE, "POSIX"); 360 if (pclocale && !strcmp(pclocale, tmpl)) 361 byte_sort = true; 362 } 363 setlocale(LC_COLLATE, tmpl); 364 sort_free(tmpl); 365 } 366 } 367 368 /* 369 * Set directory temporary files. 370 */ 371 static void 372 set_tmpdir(void) 373 { 374 char *td; 375 376 td = getenv("TMPDIR"); 377 if (td != NULL) 378 tmpdir = sort_strdup(td); 379 } 380 381 /* 382 * Parse -S option. 383 */ 384 static unsigned long long 385 parse_memory_buffer_value(const char *value) 386 { 387 388 if (value == NULL) 389 return (available_free_memory); 390 else { 391 char *endptr; 392 unsigned long long membuf; 393 394 endptr = NULL; 395 errno = 0; 396 membuf = strtoll(value, &endptr, 10); 397 398 if (errno != 0) { 399 warn("%s",getstr(4)); 400 membuf = available_free_memory; 401 } else { 402 switch (*endptr){ 403 case 'Y': 404 membuf *= 1024; 405 /* FALLTHROUGH */ 406 case 'Z': 407 membuf *= 1024; 408 /* FALLTHROUGH */ 409 case 'E': 410 membuf *= 1024; 411 /* FALLTHROUGH */ 412 case 'P': 413 membuf *= 1024; 414 /* FALLTHROUGH */ 415 case 'T': 416 membuf *= 1024; 417 /* FALLTHROUGH */ 418 case 'G': 419 membuf *= 1024; 420 /* FALLTHROUGH */ 421 case 'M': 422 membuf *= 1024; 423 /* FALLTHROUGH */ 424 case '\0': 425 case 'K': 426 membuf *= 1024; 427 /* FALLTHROUGH */ 428 case 'b': 429 break; 430 case '%': 431 membuf = (available_free_memory * membuf) / 432 100; 433 break; 434 default: 435 warnc(EINVAL, "%s", optarg); 436 membuf = available_free_memory; 437 } 438 } 439 return (membuf); 440 } 441 } 442 443 /* 444 * Signal handler that clears the temporary files. 445 */ 446 static void 447 sig_handler(int sig __unused, siginfo_t *siginfo __unused, 448 void *context __unused) 449 { 450 451 clear_tmp_files(); 452 exit(-1); 453 } 454 455 /* 456 * Set signal handler on panic signals. 457 */ 458 static void 459 set_signal_handler(void) 460 { 461 struct sigaction sa; 462 463 memset(&sa, 0, sizeof(sa)); 464 sa.sa_sigaction = &sig_handler; 465 sa.sa_flags = SA_SIGINFO; 466 467 if (sigaction(SIGTERM, &sa, NULL) < 0) { 468 perror("sigaction"); 469 return; 470 } 471 if (sigaction(SIGHUP, &sa, NULL) < 0) { 472 perror("sigaction"); 473 return; 474 } 475 if (sigaction(SIGINT, &sa, NULL) < 0) { 476 perror("sigaction"); 477 return; 478 } 479 if (sigaction(SIGQUIT, &sa, NULL) < 0) { 480 perror("sigaction"); 481 return; 482 } 483 if (sigaction(SIGABRT, &sa, NULL) < 0) { 484 perror("sigaction"); 485 return; 486 } 487 if (sigaction(SIGBUS, &sa, NULL) < 0) { 488 perror("sigaction"); 489 return; 490 } 491 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 492 perror("sigaction"); 493 return; 494 } 495 if (sigaction(SIGUSR1, &sa, NULL) < 0) { 496 perror("sigaction"); 497 return; 498 } 499 if (sigaction(SIGUSR2, &sa, NULL) < 0) { 500 perror("sigaction"); 501 return; 502 } 503 } 504 505 /* 506 * Print "unknown" message and exit with status 2. 507 */ 508 static void 509 unknown(const char *what) 510 { 511 512 errx(2, "%s: %s", getstr(3), what); 513 } 514 515 /* 516 * Check whether contradictory input options are used. 517 */ 518 static void 519 check_mutually_exclusive_flags(char c, bool *mef_flags) 520 { 521 int fo_index, mec; 522 bool found_others, found_this; 523 524 found_others = found_this = false; 525 fo_index = 0; 526 527 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { 528 mec = mutually_exclusive_flags[i]; 529 530 if (mec != c) { 531 if (mef_flags[i]) { 532 if (found_this) 533 errx(1, "%c:%c: %s", c, mec, getstr(1)); 534 found_others = true; 535 fo_index = i; 536 } 537 } else { 538 if (found_others) 539 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1)); 540 mef_flags[i] = true; 541 found_this = true; 542 } 543 } 544 } 545 546 /* 547 * Initialise sort opts data. 548 */ 549 static void 550 set_sort_opts(void) 551 { 552 553 memset(&default_sort_mods_object, 0, 554 sizeof(default_sort_mods_object)); 555 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); 556 default_sort_mods_object.func = 557 get_sort_func(&default_sort_mods_object); 558 } 559 560 /* 561 * Set a sort modifier on a sort modifiers object. 562 */ 563 static bool 564 set_sort_modifier(struct sort_mods *sm, int c) 565 { 566 567 if (sm) { 568 switch (c){ 569 case 'b': 570 sm->bflag = true; 571 break; 572 case 'd': 573 sm->dflag = true; 574 break; 575 case 'f': 576 sm->fflag = true; 577 break; 578 case 'g': 579 sm->gflag = true; 580 need_hint = true; 581 break; 582 case 'i': 583 sm->iflag = true; 584 break; 585 case 'R': 586 sm->Rflag = true; 587 need_random = true; 588 break; 589 case 'M': 590 initialise_months(); 591 sm->Mflag = true; 592 need_hint = true; 593 break; 594 case 'n': 595 sm->nflag = true; 596 need_hint = true; 597 print_symbols_on_debug = true; 598 break; 599 case 'r': 600 sm->rflag = true; 601 break; 602 case 'V': 603 sm->Vflag = true; 604 break; 605 case 'h': 606 sm->hflag = true; 607 need_hint = true; 608 print_symbols_on_debug = true; 609 break; 610 default: 611 return false; 612 } 613 sort_opts_vals.complex_sort = true; 614 sm->func = get_sort_func(sm); 615 } 616 return (true); 617 } 618 619 /* 620 * Parse POS in -k option. 621 */ 622 static int 623 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) 624 { 625 regmatch_t pmatch[4]; 626 regex_t re; 627 char *c, *f; 628 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; 629 size_t len, nmatch; 630 int ret; 631 632 ret = -1; 633 nmatch = 4; 634 c = f = NULL; 635 636 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 637 return (-1); 638 639 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 640 goto end; 641 642 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 643 goto end; 644 645 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 646 goto end; 647 648 len = pmatch[1].rm_eo - pmatch[1].rm_so; 649 f = sort_malloc((len + 1) * sizeof(char)); 650 651 strncpy(f, s + pmatch[1].rm_so, len); 652 f[len] = '\0'; 653 654 if (second) { 655 errno = 0; 656 ks->f2 = (size_t) strtoul(f, NULL, 10); 657 if (errno != 0) 658 err(2, "-k"); 659 if (ks->f2 == 0) { 660 warn("%s",getstr(5)); 661 goto end; 662 } 663 } else { 664 errno = 0; 665 ks->f1 = (size_t) strtoul(f, NULL, 10); 666 if (errno != 0) 667 err(2, "-k"); 668 if (ks->f1 == 0) { 669 warn("%s",getstr(5)); 670 goto end; 671 } 672 } 673 674 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 675 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 676 c = sort_malloc((len + 1) * sizeof(char)); 677 678 strncpy(c, s + pmatch[2].rm_so + 1, len); 679 c[len] = '\0'; 680 681 if (second) { 682 errno = 0; 683 ks->c2 = (size_t) strtoul(c, NULL, 10); 684 if (errno != 0) 685 err(2, "-k"); 686 } else { 687 errno = 0; 688 ks->c1 = (size_t) strtoul(c, NULL, 10); 689 if (errno != 0) 690 err(2, "-k"); 691 if (ks->c1 == 0) { 692 warn("%s",getstr(6)); 693 goto end; 694 } 695 } 696 } else { 697 if (second) 698 ks->c2 = 0; 699 else 700 ks->c1 = 1; 701 } 702 703 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 704 regoff_t i = 0; 705 706 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { 707 check_mutually_exclusive_flags(s[i], mef_flags); 708 if (s[i] == 'b') { 709 if (second) 710 ks->pos2b = true; 711 else 712 ks->pos1b = true; 713 } else if (!set_sort_modifier(&(ks->sm), s[i])) 714 goto end; 715 } 716 } 717 718 ret = 0; 719 720 end: 721 722 if (c) 723 sort_free(c); 724 if (f) 725 sort_free(f); 726 regfree(&re); 727 728 return (ret); 729 } 730 731 /* 732 * Parse -k option value. 733 */ 734 static int 735 parse_k(const char *s, struct key_specs *ks) 736 { 737 int ret = -1; 738 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 739 { false, false, false, false, false, false }; 740 741 if (s && *s) { 742 char *sptr; 743 744 sptr = strchr(s, ','); 745 if (sptr) { 746 size_t size1; 747 char *pos1, *pos2; 748 749 size1 = sptr - s; 750 751 if (size1 < 1) 752 return (-1); 753 pos1 = sort_malloc((size1 + 1) * sizeof(char)); 754 755 strncpy(pos1, s, size1); 756 pos1[size1] = '\0'; 757 758 ret = parse_pos(pos1, ks, mef_flags, false); 759 760 sort_free(pos1); 761 if (ret < 0) 762 return (ret); 763 764 pos2 = sort_strdup(sptr + 1); 765 ret = parse_pos(pos2, ks, mef_flags, true); 766 sort_free(pos2); 767 } else 768 ret = parse_pos(s, ks, mef_flags, false); 769 } 770 771 return (ret); 772 } 773 774 /* 775 * Parse POS in +POS -POS option. 776 */ 777 static int 778 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts) 779 { 780 regex_t re; 781 regmatch_t pmatch[4]; 782 char *c, *f; 783 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; 784 int ret; 785 size_t len, nmatch; 786 787 ret = -1; 788 nmatch = 4; 789 c = f = NULL; 790 *nc = *nf = 0; 791 792 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 793 return (-1); 794 795 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 796 goto end; 797 798 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 799 goto end; 800 801 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 802 goto end; 803 804 len = pmatch[1].rm_eo - pmatch[1].rm_so; 805 f = sort_malloc((len + 1) * sizeof(char)); 806 807 strncpy(f, s + pmatch[1].rm_so, len); 808 f[len] = '\0'; 809 810 errno = 0; 811 *nf = (size_t) strtoul(f, NULL, 10); 812 if (errno != 0) 813 errx(2, "%s", getstr(11)); 814 815 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 816 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 817 c = sort_malloc((len + 1) * sizeof(char)); 818 819 strncpy(c, s + pmatch[2].rm_so + 1, len); 820 c[len] = '\0'; 821 822 errno = 0; 823 *nc = (size_t) strtoul(c, NULL, 10); 824 if (errno != 0) 825 errx(2, "%s", getstr(11)); 826 } 827 828 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 829 830 len = pmatch[3].rm_eo - pmatch[3].rm_so; 831 832 strncpy(sopts, s + pmatch[3].rm_so, len); 833 sopts[len] = '\0'; 834 } 835 836 ret = 0; 837 838 end: 839 if (c) 840 sort_free(c); 841 if (f) 842 sort_free(f); 843 regfree(&re); 844 845 return (ret); 846 } 847 848 /* 849 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax 850 */ 851 void 852 fix_obsolete_keys(int *argc, char **argv) 853 { 854 char sopt[129]; 855 856 for (int i = 1; i < *argc; i++) { 857 char *arg1; 858 859 arg1 = argv[i]; 860 861 if (strlen(arg1) > 1 && arg1[0] == '+') { 862 int c1, f1; 863 char sopts1[128]; 864 865 sopts1[0] = 0; 866 c1 = f1 = 0; 867 868 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0) 869 continue; 870 else { 871 f1 += 1; 872 c1 += 1; 873 if (i + 1 < *argc) { 874 char *arg2 = argv[i + 1]; 875 876 if (strlen(arg2) > 1 && 877 arg2[0] == '-') { 878 int c2, f2; 879 char sopts2[128]; 880 881 sopts2[0] = 0; 882 c2 = f2 = 0; 883 884 if (parse_pos_obs(arg2 + 1, 885 &f2, &c2, sopts2) >= 0) { 886 if (c2 > 0) 887 f2 += 1; 888 sprintf(sopt, "-k%d.%d%s,%d.%d%s", 889 f1, c1, sopts1, f2, c2, sopts2); 890 argv[i] = sort_strdup(sopt); 891 for (int j = i + 1; j + 1 < *argc; j++) 892 argv[j] = argv[j + 1]; 893 *argc -= 1; 894 continue; 895 } 896 } 897 } 898 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1); 899 argv[i] = sort_strdup(sopt); 900 } 901 } 902 } 903 } 904 905 /* 906 * Set random seed 907 */ 908 static void 909 set_random_seed(void) 910 { 911 if (need_random) { 912 913 if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) { 914 FILE* fseed; 915 MD5_CTX ctx; 916 char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE]; 917 size_t sz = 0; 918 919 fseed = openfile(random_source, "r"); 920 while (!feof(fseed)) { 921 int cr; 922 923 cr = fgetc(fseed); 924 if (cr == EOF) 925 break; 926 927 rsd[sz++] = (char) cr; 928 929 if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE) 930 break; 931 } 932 933 closefile(fseed, random_source); 934 935 MD5Init(&ctx); 936 MD5Update(&ctx, rsd, sz); 937 938 random_seed = MD5End(&ctx, NULL); 939 random_seed_size = strlen(random_seed); 940 941 } else { 942 MD5_CTX ctx; 943 char *b; 944 945 MD5Init(&ctx); 946 b = MD5File(random_source, NULL); 947 if (b == NULL) 948 err(2, NULL); 949 950 random_seed = b; 951 random_seed_size = strlen(b); 952 } 953 954 MD5Init(&md5_ctx); 955 if(random_seed_size>0) { 956 MD5Update(&md5_ctx, random_seed, random_seed_size); 957 } 958 } 959 } 960 961 /* 962 * Main function. 963 */ 964 int 965 main(int argc, char **argv) 966 { 967 char *outfile, *real_outfile; 968 int c, result; 969 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 970 { false, false, false, false, false, false }; 971 972 result = 0; 973 outfile = sort_strdup("-"); 974 real_outfile = NULL; 975 976 struct sort_mods *sm = &default_sort_mods_object; 977 978 init_tmp_files(); 979 980 set_signal_handler(); 981 982 set_hw_params(); 983 set_locale(); 984 set_tmpdir(); 985 set_sort_opts(); 986 987 fix_obsolete_keys(&argc, argv); 988 989 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) 990 != -1)) { 991 992 check_mutually_exclusive_flags(c, mef_flags); 993 994 if (!set_sort_modifier(sm, c)) { 995 996 switch (c) { 997 case 'c': 998 sort_opts_vals.cflag = true; 999 if (optarg) { 1000 if (!strcmp(optarg, "diagnose-first")) 1001 ; 1002 else if (!strcmp(optarg, "silent") || 1003 !strcmp(optarg, "quiet")) 1004 sort_opts_vals.csilentflag = true; 1005 else if (*optarg) 1006 unknown(optarg); 1007 } 1008 break; 1009 case 'C': 1010 sort_opts_vals.cflag = true; 1011 sort_opts_vals.csilentflag = true; 1012 break; 1013 case 'k': 1014 { 1015 sort_opts_vals.complex_sort = true; 1016 sort_opts_vals.kflag = true; 1017 1018 keys_num++; 1019 keys = sort_realloc(keys, keys_num * 1020 sizeof(struct key_specs)); 1021 memset(&(keys[keys_num - 1]), 0, 1022 sizeof(struct key_specs)); 1023 1024 if (parse_k(optarg, &(keys[keys_num - 1])) 1025 < 0) { 1026 errc(2, EINVAL, "-k %s", optarg); 1027 } 1028 1029 break; 1030 } 1031 case 'm': 1032 sort_opts_vals.mflag = true; 1033 break; 1034 case 'o': 1035 outfile = sort_realloc(outfile, (strlen(optarg) + 1)); 1036 strcpy(outfile, optarg); 1037 break; 1038 case 's': 1039 sort_opts_vals.sflag = true; 1040 break; 1041 case 'S': 1042 available_free_memory = 1043 parse_memory_buffer_value(optarg); 1044 break; 1045 case 'T': 1046 tmpdir = sort_strdup(optarg); 1047 break; 1048 case 't': 1049 while (strlen(optarg) > 1) { 1050 if (optarg[0] != '\\') { 1051 errc(2, EINVAL, "%s", optarg); 1052 } 1053 optarg += 1; 1054 if (*optarg == '0') { 1055 *optarg = 0; 1056 break; 1057 } 1058 } 1059 sort_opts_vals.tflag = true; 1060 sort_opts_vals.field_sep = btowc(optarg[0]); 1061 if (sort_opts_vals.field_sep == WEOF) { 1062 errno = EINVAL; 1063 err(2, NULL); 1064 } 1065 if (!gnusort_numeric_compatibility) { 1066 if (symbol_decimal_point == sort_opts_vals.field_sep) 1067 symbol_decimal_point = WEOF; 1068 if (symbol_thousands_sep == sort_opts_vals.field_sep) 1069 symbol_thousands_sep = WEOF; 1070 if (symbol_negative_sign == sort_opts_vals.field_sep) 1071 symbol_negative_sign = WEOF; 1072 if (symbol_positive_sign == sort_opts_vals.field_sep) 1073 symbol_positive_sign = WEOF; 1074 } 1075 break; 1076 case 'u': 1077 sort_opts_vals.uflag = true; 1078 /* stable sort for the correct unique val */ 1079 sort_opts_vals.sflag = true; 1080 break; 1081 case 'z': 1082 sort_opts_vals.zflag = true; 1083 break; 1084 case SORT_OPT: 1085 if (optarg) { 1086 if (!strcmp(optarg, "general-numeric")) 1087 set_sort_modifier(sm, 'g'); 1088 else if (!strcmp(optarg, "human-numeric")) 1089 set_sort_modifier(sm, 'h'); 1090 else if (!strcmp(optarg, "numeric")) 1091 set_sort_modifier(sm, 'n'); 1092 else if (!strcmp(optarg, "month")) 1093 set_sort_modifier(sm, 'M'); 1094 else if (!strcmp(optarg, "random")) 1095 set_sort_modifier(sm, 'R'); 1096 else 1097 unknown(optarg); 1098 } 1099 break; 1100 #if defined(SORT_THREADS) 1101 case PARALLEL_OPT: 1102 nthreads = (size_t)(atoi(optarg)); 1103 if (nthreads < 1) 1104 nthreads = 1; 1105 if (nthreads > 1024) 1106 nthreads = 1024; 1107 break; 1108 #endif 1109 case QSORT_OPT: 1110 sort_opts_vals.sort_method = SORT_QSORT; 1111 break; 1112 case MERGESORT_OPT: 1113 sort_opts_vals.sort_method = SORT_MERGESORT; 1114 break; 1115 case MMAP_OPT: 1116 use_mmap = true; 1117 break; 1118 case HEAPSORT_OPT: 1119 sort_opts_vals.sort_method = SORT_HEAPSORT; 1120 break; 1121 case RADIXSORT_OPT: 1122 sort_opts_vals.sort_method = SORT_RADIXSORT; 1123 break; 1124 case RANDOMSOURCE_OPT: 1125 random_source = strdup(optarg); 1126 break; 1127 case COMPRESSPROGRAM_OPT: 1128 compress_program = strdup(optarg); 1129 break; 1130 case FF_OPT: 1131 read_fns_from_file0(optarg); 1132 break; 1133 case BS_OPT: 1134 { 1135 errno = 0; 1136 long mof = strtol(optarg, NULL, 10); 1137 if (errno != 0) 1138 err(2, "--batch-size"); 1139 if (mof >= 2) 1140 max_open_files = (size_t) mof + 1; 1141 } 1142 break; 1143 case VERSION_OPT: 1144 printf("%s\n", VERSION); 1145 exit(EXIT_SUCCESS); 1146 /* NOTREACHED */ 1147 break; 1148 case DEBUG_OPT: 1149 debug_sort = true; 1150 break; 1151 case HELP_OPT: 1152 usage(false); 1153 /* NOTREACHED */ 1154 break; 1155 default: 1156 usage(true); 1157 /* NOTREACHED */ 1158 } 1159 } 1160 } 1161 1162 argc -= optind; 1163 argv += optind; 1164 1165 if (argv_from_file0) { 1166 argc = argc_from_file0; 1167 argv = argv_from_file0; 1168 } 1169 1170 #ifndef WITHOUT_NLS 1171 catalog = catopen("sort", NL_CAT_LOCALE); 1172 #endif 1173 1174 if (sort_opts_vals.cflag && sort_opts_vals.mflag) 1175 errx(1, "%c:%c: %s", 'm', 'c', getstr(1)); 1176 1177 #ifndef WITHOUT_NLS 1178 catclose(catalog); 1179 #endif 1180 1181 if (keys_num == 0) { 1182 keys_num = 1; 1183 keys = sort_realloc(keys, sizeof(struct key_specs)); 1184 memset(&(keys[0]), 0, sizeof(struct key_specs)); 1185 keys[0].c1 = 1; 1186 keys[0].pos1b = default_sort_mods->bflag; 1187 keys[0].pos2b = default_sort_mods->bflag; 1188 memcpy(&(keys[0].sm), default_sort_mods, 1189 sizeof(struct sort_mods)); 1190 } 1191 1192 for (size_t i = 0; i < keys_num; i++) { 1193 struct key_specs *ks; 1194 1195 ks = &(keys[i]); 1196 1197 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && 1198 !(ks->pos2b)) { 1199 ks->pos1b = sm->bflag; 1200 ks->pos2b = sm->bflag; 1201 memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); 1202 } 1203 1204 ks->sm.func = get_sort_func(&(ks->sm)); 1205 } 1206 1207 if (debug_sort) { 1208 printf("Memory to be used for sorting: %llu\n",available_free_memory); 1209 #if defined(SORT_THREADS) 1210 printf("Number of CPUs: %d\n",(int)ncpu); 1211 nthreads = 1; 1212 #endif 1213 printf("Using collate rules of %s locale\n", 1214 setlocale(LC_COLLATE, NULL)); 1215 if (byte_sort) 1216 printf("Byte sort is used\n"); 1217 if (print_symbols_on_debug) { 1218 printf("Decimal Point: <%lc>\n", symbol_decimal_point); 1219 if (symbol_thousands_sep) 1220 printf("Thousands separator: <%lc>\n", 1221 symbol_thousands_sep); 1222 printf("Positive sign: <%lc>\n", symbol_positive_sign); 1223 printf("Negative sign: <%lc>\n", symbol_negative_sign); 1224 } 1225 } 1226 1227 set_random_seed(); 1228 1229 /* Case when the outfile equals one of the input files: */ 1230 if (strcmp(outfile, "-")) { 1231 1232 for(int i = 0; i < argc; ++i) { 1233 if (strcmp(argv[i], outfile) == 0) { 1234 real_outfile = sort_strdup(outfile); 1235 for(;;) { 1236 char* tmp = sort_malloc(strlen(outfile) + 1237 strlen(".tmp") + 1); 1238 1239 strcpy(tmp, outfile); 1240 strcpy(tmp + strlen(tmp), ".tmp"); 1241 sort_free(outfile); 1242 outfile = tmp; 1243 if (access(outfile, F_OK) < 0) 1244 break; 1245 } 1246 tmp_file_atexit(outfile); 1247 } 1248 } 1249 } 1250 1251 #if defined(SORT_THREADS) 1252 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) 1253 nthreads = 1; 1254 #endif 1255 1256 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { 1257 struct file_list fl; 1258 struct sort_list list; 1259 1260 sort_list_init(&list); 1261 file_list_init(&fl, true); 1262 1263 if (argc < 1) 1264 procfile("-", &list, &fl); 1265 else { 1266 while (argc > 0) { 1267 procfile(*argv, &list, &fl); 1268 --argc; 1269 ++argv; 1270 } 1271 } 1272 1273 if (fl.count < 1) 1274 sort_list_to_file(&list, outfile); 1275 else { 1276 if (list.count > 0) { 1277 char *flast = new_tmp_file_name(); 1278 1279 sort_list_to_file(&list, flast); 1280 file_list_add(&fl, flast, false); 1281 } 1282 merge_files(&fl, outfile); 1283 } 1284 1285 file_list_clean(&fl); 1286 1287 /* 1288 * We are about to exit the program, so we can ignore 1289 * the clean-up for speed 1290 * 1291 * sort_list_clean(&list); 1292 */ 1293 1294 } else if (sort_opts_vals.cflag) { 1295 result = (argc == 0) ? (check("-")) : (check(*argv)); 1296 } else if (sort_opts_vals.mflag) { 1297 struct file_list fl; 1298 1299 file_list_init(&fl, false); 1300 file_list_populate(&fl, argc, argv, true); 1301 merge_files(&fl, outfile); 1302 file_list_clean(&fl); 1303 } 1304 1305 if (real_outfile) { 1306 unlink(real_outfile); 1307 if (rename(outfile, real_outfile) < 0) 1308 err(2, NULL); 1309 sort_free(real_outfile); 1310 } 1311 1312 sort_free(outfile); 1313 1314 return (result); 1315 } 1316