1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/stat.h> 34 #include <sys/sysctl.h> 35 #include <sys/types.h> 36 37 #include <err.h> 38 #include <errno.h> 39 #include <fcntl.h> 40 #include <getopt.h> 41 #include <limits.h> 42 #include <locale.h> 43 #include <md5.h> 44 #include <regex.h> 45 #include <signal.h> 46 #include <stdbool.h> 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <unistd.h> 51 #include <wchar.h> 52 #include <wctype.h> 53 54 #include "coll.h" 55 #include "file.h" 56 #include "sort.h" 57 58 #ifndef WITHOUT_NLS 59 #include <nl_types.h> 60 nl_catd catalog; 61 #endif 62 63 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz" 64 65 static bool need_random; 66 67 MD5_CTX md5_ctx; 68 69 /* 70 * Default messages to use when NLS is disabled or no catalogue 71 * is found. 72 */ 73 const char *nlsstr[] = { "", 74 /* 1*/"mutually exclusive flags", 75 /* 2*/"extra argument not allowed with -c", 76 /* 3*/"Unknown feature", 77 /* 4*/"Wrong memory buffer specification", 78 /* 5*/"0 field in key specs", 79 /* 6*/"0 column in key specs", 80 /* 7*/"Wrong file mode", 81 /* 8*/"Cannot open file for reading", 82 /* 9*/"Radix sort cannot be used with these sort options", 83 /*10*/"The chosen sort method cannot be used with stable and/or unique sort", 84 /*11*/"Invalid key position", 85 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] " 86 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " 87 "[-o outfile] [--batch-size size] [--files0-from file] " 88 "[--heapsort] [--mergesort] [--radixsort] [--qsort] " 89 "[--mmap] " 90 #if defined(SORT_THREADS) 91 "[--parallel thread_no] " 92 #endif 93 "[--human-numeric-sort] " 94 "[--version-sort] [--random-sort [--random-source file]] " 95 "[--compress-program program] [file ...]\n" }; 96 97 struct sort_opts sort_opts_vals; 98 99 bool debug_sort; 100 bool need_hint; 101 102 #if defined(SORT_THREADS) 103 unsigned int ncpu = 1; 104 size_t nthreads = 1; 105 #endif 106 107 static bool gnusort_numeric_compatibility; 108 109 static struct sort_mods default_sort_mods_object; 110 struct sort_mods * const default_sort_mods = &default_sort_mods_object; 111 112 static bool print_symbols_on_debug; 113 114 /* 115 * Arguments from file (when file0-from option is used: 116 */ 117 static size_t argc_from_file0 = (size_t)-1; 118 static char **argv_from_file0; 119 120 /* 121 * Placeholder symbols for options which have no single-character equivalent 122 */ 123 enum 124 { 125 SORT_OPT = CHAR_MAX + 1, 126 HELP_OPT, 127 FF_OPT, 128 BS_OPT, 129 VERSION_OPT, 130 DEBUG_OPT, 131 #if defined(SORT_THREADS) 132 PARALLEL_OPT, 133 #endif 134 RANDOMSOURCE_OPT, 135 COMPRESSPROGRAM_OPT, 136 QSORT_OPT, 137 MERGESORT_OPT, 138 HEAPSORT_OPT, 139 RADIXSORT_OPT, 140 MMAP_OPT 141 }; 142 143 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 144 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; 145 146 static struct option long_options[] = { 147 { "batch-size", required_argument, NULL, BS_OPT }, 148 { "buffer-size", required_argument, NULL, 'S' }, 149 { "check", optional_argument, NULL, 'c' }, 150 { "check=silent|quiet", optional_argument, NULL, 'C' }, 151 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, 152 { "debug", no_argument, NULL, DEBUG_OPT }, 153 { "dictionary-order", no_argument, NULL, 'd' }, 154 { "field-separator", required_argument, NULL, 't' }, 155 { "files0-from", required_argument, NULL, FF_OPT }, 156 { "general-numeric-sort", no_argument, NULL, 'g' }, 157 { "heapsort", no_argument, NULL, HEAPSORT_OPT }, 158 { "help",no_argument, NULL, HELP_OPT }, 159 { "human-numeric-sort", no_argument, NULL, 'h' }, 160 { "ignore-leading-blanks", no_argument, NULL, 'b' }, 161 { "ignore-case", no_argument, NULL, 'f' }, 162 { "ignore-nonprinting", no_argument, NULL, 'i' }, 163 { "key", required_argument, NULL, 'k' }, 164 { "merge", no_argument, NULL, 'm' }, 165 { "mergesort", no_argument, NULL, MERGESORT_OPT }, 166 { "mmap", no_argument, NULL, MMAP_OPT }, 167 { "month-sort", no_argument, NULL, 'M' }, 168 { "numeric-sort", no_argument, NULL, 'n' }, 169 { "output", required_argument, NULL, 'o' }, 170 #if defined(SORT_THREADS) 171 { "parallel", required_argument, NULL, PARALLEL_OPT }, 172 #endif 173 { "qsort", no_argument, NULL, QSORT_OPT }, 174 { "radixsort", no_argument, NULL, RADIXSORT_OPT }, 175 { "random-sort", no_argument, NULL, 'R' }, 176 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, 177 { "reverse", no_argument, NULL, 'r' }, 178 { "sort", required_argument, NULL, SORT_OPT }, 179 { "stable", no_argument, NULL, 's' }, 180 { "temporary-directory",required_argument, NULL, 'T' }, 181 { "unique", no_argument, NULL, 'u' }, 182 { "version", no_argument, NULL, VERSION_OPT }, 183 { "version-sort",no_argument, NULL, 'V' }, 184 { "zero-terminated", no_argument, NULL, 'z' }, 185 { NULL, no_argument, NULL, 0 } 186 }; 187 188 void fix_obsolete_keys(int *argc, char **argv); 189 190 /* 191 * Check where sort modifier is present 192 */ 193 static bool 194 sort_modifier_empty(struct sort_mods *sm) 195 { 196 197 if (sm == NULL) 198 return (true); 199 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || 200 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag)); 201 } 202 203 /* 204 * Print out usage text. 205 */ 206 static void 207 usage(bool opt_err) 208 { 209 FILE *out; 210 211 out = opt_err ? stderr : stdout; 212 213 fprintf(out, getstr(12), getprogname()); 214 if (opt_err) 215 exit(2); 216 exit(0); 217 } 218 219 /* 220 * Read input file names from a file (file0-from option). 221 */ 222 static void 223 read_fns_from_file0(const char *fn) 224 { 225 FILE *f; 226 char *line = NULL; 227 size_t linesize = 0; 228 ssize_t linelen; 229 230 if (fn == NULL) 231 return; 232 233 f = fopen(fn, "r"); 234 if (f == NULL) 235 err(2, "%s", fn); 236 237 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) { 238 if (*line != '\0') { 239 if (argc_from_file0 == (size_t) - 1) 240 argc_from_file0 = 0; 241 ++argc_from_file0; 242 argv_from_file0 = sort_realloc(argv_from_file0, 243 argc_from_file0 * sizeof(char *)); 244 if (argv_from_file0 == NULL) 245 err(2, NULL); 246 argv_from_file0[argc_from_file0 - 1] = line; 247 } else { 248 free(line); 249 } 250 line = NULL; 251 linesize = 0; 252 } 253 if (ferror(f)) 254 err(2, "%s: getdelim", fn); 255 256 closefile(f, fn); 257 } 258 259 /* 260 * Check how much RAM is available for the sort. 261 */ 262 static void 263 set_hw_params(void) 264 { 265 long pages, psize; 266 267 #if defined(SORT_THREADS) 268 ncpu = 1; 269 #endif 270 271 pages = sysconf(_SC_PHYS_PAGES); 272 if (pages < 1) { 273 perror("sysconf pages"); 274 pages = 1; 275 } 276 psize = sysconf(_SC_PAGESIZE); 277 if (psize < 1) { 278 perror("sysconf psize"); 279 psize = 4096; 280 } 281 #if defined(SORT_THREADS) 282 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN); 283 if (ncpu < 1) 284 ncpu = 1; 285 else if(ncpu > 32) 286 ncpu = 32; 287 288 nthreads = ncpu; 289 #endif 290 291 free_memory = (unsigned long long) pages * (unsigned long long) psize; 292 available_free_memory = free_memory / 2; 293 294 if (available_free_memory < 1024) 295 available_free_memory = 1024; 296 } 297 298 /* 299 * Convert "plain" symbol to wide symbol, with default value. 300 */ 301 static void 302 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) 303 { 304 305 if (wc && c) { 306 int res; 307 308 res = mbtowc(wc, c, MB_CUR_MAX); 309 if (res < 1) 310 *wc = def; 311 } 312 } 313 314 /* 315 * Set current locale symbols. 316 */ 317 static void 318 set_locale(void) 319 { 320 struct lconv *lc; 321 const char *locale; 322 323 setlocale(LC_ALL, ""); 324 325 lc = localeconv(); 326 327 if (lc) { 328 /* obtain LC_NUMERIC info */ 329 /* Convert to wide char form */ 330 conv_mbtowc(&symbol_decimal_point, lc->decimal_point, 331 symbol_decimal_point); 332 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, 333 symbol_thousands_sep); 334 conv_mbtowc(&symbol_positive_sign, lc->positive_sign, 335 symbol_positive_sign); 336 conv_mbtowc(&symbol_negative_sign, lc->negative_sign, 337 symbol_negative_sign); 338 } 339 340 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) 341 gnusort_numeric_compatibility = true; 342 343 locale = setlocale(LC_COLLATE, NULL); 344 345 if (locale) { 346 char *tmpl; 347 const char *cclocale; 348 349 tmpl = sort_strdup(locale); 350 cclocale = setlocale(LC_COLLATE, "C"); 351 if (cclocale && !strcmp(cclocale, tmpl)) 352 byte_sort = true; 353 else { 354 const char *pclocale; 355 356 pclocale = setlocale(LC_COLLATE, "POSIX"); 357 if (pclocale && !strcmp(pclocale, tmpl)) 358 byte_sort = true; 359 } 360 setlocale(LC_COLLATE, tmpl); 361 sort_free(tmpl); 362 } 363 } 364 365 /* 366 * Set directory temporary files. 367 */ 368 static void 369 set_tmpdir(void) 370 { 371 char *td; 372 373 td = getenv("TMPDIR"); 374 if (td != NULL) 375 tmpdir = sort_strdup(td); 376 } 377 378 /* 379 * Parse -S option. 380 */ 381 static unsigned long long 382 parse_memory_buffer_value(const char *value) 383 { 384 385 if (value == NULL) 386 return (available_free_memory); 387 else { 388 char *endptr; 389 unsigned long long membuf; 390 391 endptr = NULL; 392 errno = 0; 393 membuf = strtoll(value, &endptr, 10); 394 395 if (errno != 0) { 396 warn("%s",getstr(4)); 397 membuf = available_free_memory; 398 } else { 399 switch (*endptr){ 400 case 'Y': 401 membuf *= 1024; 402 /* FALLTHROUGH */ 403 case 'Z': 404 membuf *= 1024; 405 /* FALLTHROUGH */ 406 case 'E': 407 membuf *= 1024; 408 /* FALLTHROUGH */ 409 case 'P': 410 membuf *= 1024; 411 /* FALLTHROUGH */ 412 case 'T': 413 membuf *= 1024; 414 /* FALLTHROUGH */ 415 case 'G': 416 membuf *= 1024; 417 /* FALLTHROUGH */ 418 case 'M': 419 membuf *= 1024; 420 /* FALLTHROUGH */ 421 case '\0': 422 case 'K': 423 membuf *= 1024; 424 /* FALLTHROUGH */ 425 case 'b': 426 break; 427 case '%': 428 membuf = (available_free_memory * membuf) / 429 100; 430 break; 431 default: 432 warnc(EINVAL, "%s", optarg); 433 membuf = available_free_memory; 434 } 435 } 436 return (membuf); 437 } 438 } 439 440 /* 441 * Signal handler that clears the temporary files. 442 */ 443 static void 444 sig_handler(int sig __unused, siginfo_t *siginfo __unused, 445 void *context __unused) 446 { 447 448 clear_tmp_files(); 449 exit(-1); 450 } 451 452 /* 453 * Set signal handler on panic signals. 454 */ 455 static void 456 set_signal_handler(void) 457 { 458 struct sigaction sa; 459 460 memset(&sa, 0, sizeof(sa)); 461 sa.sa_sigaction = &sig_handler; 462 sa.sa_flags = SA_SIGINFO; 463 464 if (sigaction(SIGTERM, &sa, NULL) < 0) { 465 perror("sigaction"); 466 return; 467 } 468 if (sigaction(SIGHUP, &sa, NULL) < 0) { 469 perror("sigaction"); 470 return; 471 } 472 if (sigaction(SIGINT, &sa, NULL) < 0) { 473 perror("sigaction"); 474 return; 475 } 476 if (sigaction(SIGQUIT, &sa, NULL) < 0) { 477 perror("sigaction"); 478 return; 479 } 480 if (sigaction(SIGABRT, &sa, NULL) < 0) { 481 perror("sigaction"); 482 return; 483 } 484 if (sigaction(SIGBUS, &sa, NULL) < 0) { 485 perror("sigaction"); 486 return; 487 } 488 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 489 perror("sigaction"); 490 return; 491 } 492 if (sigaction(SIGUSR1, &sa, NULL) < 0) { 493 perror("sigaction"); 494 return; 495 } 496 if (sigaction(SIGUSR2, &sa, NULL) < 0) { 497 perror("sigaction"); 498 return; 499 } 500 } 501 502 /* 503 * Print "unknown" message and exit with status 2. 504 */ 505 static void 506 unknown(const char *what) 507 { 508 509 errx(2, "%s: %s", getstr(3), what); 510 } 511 512 /* 513 * Check whether contradictory input options are used. 514 */ 515 static void 516 check_mutually_exclusive_flags(char c, bool *mef_flags) 517 { 518 int fo_index, mec; 519 bool found_others, found_this; 520 521 found_others = found_this = false; 522 fo_index = 0; 523 524 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { 525 mec = mutually_exclusive_flags[i]; 526 527 if (mec != c) { 528 if (mef_flags[i]) { 529 if (found_this) 530 errx(1, "%c:%c: %s", c, mec, getstr(1)); 531 found_others = true; 532 fo_index = i; 533 } 534 } else { 535 if (found_others) 536 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1)); 537 mef_flags[i] = true; 538 found_this = true; 539 } 540 } 541 } 542 543 /* 544 * Initialise sort opts data. 545 */ 546 static void 547 set_sort_opts(void) 548 { 549 550 memset(&default_sort_mods_object, 0, 551 sizeof(default_sort_mods_object)); 552 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); 553 default_sort_mods_object.func = 554 get_sort_func(&default_sort_mods_object); 555 } 556 557 /* 558 * Set a sort modifier on a sort modifiers object. 559 */ 560 static bool 561 set_sort_modifier(struct sort_mods *sm, int c) 562 { 563 564 if (sm == NULL) 565 return (true); 566 567 switch (c){ 568 case 'b': 569 sm->bflag = true; 570 break; 571 case 'd': 572 sm->dflag = true; 573 break; 574 case 'f': 575 sm->fflag = true; 576 break; 577 case 'g': 578 sm->gflag = true; 579 need_hint = true; 580 break; 581 case 'i': 582 sm->iflag = true; 583 break; 584 case 'R': 585 sm->Rflag = true; 586 need_hint = true; 587 need_random = true; 588 break; 589 case 'M': 590 initialise_months(); 591 sm->Mflag = true; 592 need_hint = true; 593 break; 594 case 'n': 595 sm->nflag = true; 596 need_hint = true; 597 print_symbols_on_debug = true; 598 break; 599 case 'r': 600 sm->rflag = true; 601 break; 602 case 'V': 603 sm->Vflag = true; 604 break; 605 case 'h': 606 sm->hflag = true; 607 need_hint = true; 608 print_symbols_on_debug = true; 609 break; 610 default: 611 return (false); 612 } 613 614 sort_opts_vals.complex_sort = true; 615 sm->func = get_sort_func(sm); 616 return (true); 617 } 618 619 /* 620 * Parse POS in -k option. 621 */ 622 static int 623 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) 624 { 625 regmatch_t pmatch[4]; 626 regex_t re; 627 char *c, *f; 628 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; 629 size_t len, nmatch; 630 int ret; 631 632 ret = -1; 633 nmatch = 4; 634 c = f = NULL; 635 636 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 637 return (-1); 638 639 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 640 goto end; 641 642 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 643 goto end; 644 645 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 646 goto end; 647 648 len = pmatch[1].rm_eo - pmatch[1].rm_so; 649 f = sort_malloc((len + 1) * sizeof(char)); 650 651 strncpy(f, s + pmatch[1].rm_so, len); 652 f[len] = '\0'; 653 654 if (second) { 655 errno = 0; 656 ks->f2 = (size_t) strtoul(f, NULL, 10); 657 if (errno != 0) 658 err(2, "-k"); 659 if (ks->f2 == 0) { 660 warn("%s",getstr(5)); 661 goto end; 662 } 663 } else { 664 errno = 0; 665 ks->f1 = (size_t) strtoul(f, NULL, 10); 666 if (errno != 0) 667 err(2, "-k"); 668 if (ks->f1 == 0) { 669 warn("%s",getstr(5)); 670 goto end; 671 } 672 } 673 674 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 675 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 676 c = sort_malloc((len + 1) * sizeof(char)); 677 678 strncpy(c, s + pmatch[2].rm_so + 1, len); 679 c[len] = '\0'; 680 681 if (second) { 682 errno = 0; 683 ks->c2 = (size_t) strtoul(c, NULL, 10); 684 if (errno != 0) 685 err(2, "-k"); 686 } else { 687 errno = 0; 688 ks->c1 = (size_t) strtoul(c, NULL, 10); 689 if (errno != 0) 690 err(2, "-k"); 691 if (ks->c1 == 0) { 692 warn("%s",getstr(6)); 693 goto end; 694 } 695 } 696 } else { 697 if (second) 698 ks->c2 = 0; 699 else 700 ks->c1 = 1; 701 } 702 703 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 704 regoff_t i = 0; 705 706 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { 707 check_mutually_exclusive_flags(s[i], mef_flags); 708 if (s[i] == 'b') { 709 if (second) 710 ks->pos2b = true; 711 else 712 ks->pos1b = true; 713 } else if (!set_sort_modifier(&(ks->sm), s[i])) 714 goto end; 715 } 716 } 717 718 ret = 0; 719 720 end: 721 722 if (c) 723 sort_free(c); 724 if (f) 725 sort_free(f); 726 regfree(&re); 727 728 return (ret); 729 } 730 731 /* 732 * Parse -k option value. 733 */ 734 static int 735 parse_k(const char *s, struct key_specs *ks) 736 { 737 int ret = -1; 738 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 739 { false, false, false, false, false, false }; 740 741 if (s && *s) { 742 char *sptr; 743 744 sptr = strchr(s, ','); 745 if (sptr) { 746 size_t size1; 747 char *pos1, *pos2; 748 749 size1 = sptr - s; 750 751 if (size1 < 1) 752 return (-1); 753 pos1 = sort_malloc((size1 + 1) * sizeof(char)); 754 755 strncpy(pos1, s, size1); 756 pos1[size1] = '\0'; 757 758 ret = parse_pos(pos1, ks, mef_flags, false); 759 760 sort_free(pos1); 761 if (ret < 0) 762 return (ret); 763 764 pos2 = sort_strdup(sptr + 1); 765 ret = parse_pos(pos2, ks, mef_flags, true); 766 sort_free(pos2); 767 } else 768 ret = parse_pos(s, ks, mef_flags, false); 769 } 770 771 return (ret); 772 } 773 774 /* 775 * Parse POS in +POS -POS option. 776 */ 777 static int 778 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts) 779 { 780 regex_t re; 781 regmatch_t pmatch[4]; 782 char *c, *f; 783 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; 784 int ret; 785 size_t len, nmatch; 786 787 ret = -1; 788 nmatch = 4; 789 c = f = NULL; 790 *nc = *nf = 0; 791 792 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 793 return (-1); 794 795 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 796 goto end; 797 798 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 799 goto end; 800 801 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 802 goto end; 803 804 len = pmatch[1].rm_eo - pmatch[1].rm_so; 805 f = sort_malloc((len + 1) * sizeof(char)); 806 807 strncpy(f, s + pmatch[1].rm_so, len); 808 f[len] = '\0'; 809 810 errno = 0; 811 *nf = (size_t) strtoul(f, NULL, 10); 812 if (errno != 0) 813 errx(2, "%s", getstr(11)); 814 815 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 816 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 817 c = sort_malloc((len + 1) * sizeof(char)); 818 819 strncpy(c, s + pmatch[2].rm_so + 1, len); 820 c[len] = '\0'; 821 822 errno = 0; 823 *nc = (size_t) strtoul(c, NULL, 10); 824 if (errno != 0) 825 errx(2, "%s", getstr(11)); 826 } 827 828 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 829 830 len = pmatch[3].rm_eo - pmatch[3].rm_so; 831 832 strncpy(sopts, s + pmatch[3].rm_so, len); 833 sopts[len] = '\0'; 834 } 835 836 ret = 0; 837 838 end: 839 if (c) 840 sort_free(c); 841 if (f) 842 sort_free(f); 843 regfree(&re); 844 845 return (ret); 846 } 847 848 /* 849 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax 850 */ 851 void 852 fix_obsolete_keys(int *argc, char **argv) 853 { 854 char sopt[129]; 855 856 for (int i = 1; i < *argc; i++) { 857 char *arg1; 858 859 arg1 = argv[i]; 860 861 if (strlen(arg1) > 1 && arg1[0] == '+') { 862 int c1, f1; 863 char sopts1[128]; 864 865 sopts1[0] = 0; 866 c1 = f1 = 0; 867 868 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0) 869 continue; 870 else { 871 f1 += 1; 872 c1 += 1; 873 if (i + 1 < *argc) { 874 char *arg2 = argv[i + 1]; 875 876 if (strlen(arg2) > 1 && 877 arg2[0] == '-') { 878 int c2, f2; 879 char sopts2[128]; 880 881 sopts2[0] = 0; 882 c2 = f2 = 0; 883 884 if (parse_pos_obs(arg2 + 1, 885 &f2, &c2, sopts2) >= 0) { 886 if (c2 > 0) 887 f2 += 1; 888 sprintf(sopt, "-k%d.%d%s,%d.%d%s", 889 f1, c1, sopts1, f2, c2, sopts2); 890 argv[i] = sort_strdup(sopt); 891 for (int j = i + 1; j + 1 < *argc; j++) 892 argv[j] = argv[j + 1]; 893 *argc -= 1; 894 continue; 895 } 896 } 897 } 898 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1); 899 argv[i] = sort_strdup(sopt); 900 } 901 } 902 } 903 } 904 905 /* 906 * Seed random sort 907 */ 908 static void 909 get_random_seed(const char *random_source) 910 { 911 char randseed[32]; 912 struct stat fsb, rsb; 913 ssize_t rd; 914 int rsfd; 915 916 rsfd = -1; 917 rd = sizeof(randseed); 918 919 if (random_source == NULL) { 920 if (getentropy(randseed, sizeof(randseed)) < 0) 921 err(EX_SOFTWARE, "getentropy"); 922 goto out; 923 } 924 925 rsfd = open(random_source, O_RDONLY | O_CLOEXEC); 926 if (rsfd < 0) 927 err(EX_NOINPUT, "open: %s", random_source); 928 929 if (fstat(rsfd, &fsb) != 0) 930 err(EX_SOFTWARE, "fstat"); 931 932 if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode)) 933 err(EX_USAGE, 934 "random seed isn't a regular file or /dev/random"); 935 936 /* 937 * Regular files: read up to maximum seed size and explicitly 938 * reject longer files. 939 */ 940 if (S_ISREG(fsb.st_mode)) { 941 if (fsb.st_size > (off_t)sizeof(randseed)) 942 errx(EX_USAGE, "random seed is too large (%jd >" 943 " %zu)!", (intmax_t)fsb.st_size, 944 sizeof(randseed)); 945 else if (fsb.st_size < 1) 946 errx(EX_USAGE, "random seed is too small (" 947 "0 bytes)"); 948 949 memset(randseed, 0, sizeof(randseed)); 950 951 rd = read(rsfd, randseed, fsb.st_size); 952 if (rd < 0) 953 err(EX_SOFTWARE, "reading random seed file %s", 954 random_source); 955 if (rd < (ssize_t)fsb.st_size) 956 errx(EX_SOFTWARE, "short read from %s", random_source); 957 } else if (S_ISCHR(fsb.st_mode)) { 958 if (stat("/dev/random", &rsb) < 0) 959 err(EX_SOFTWARE, "stat"); 960 961 if (fsb.st_dev != rsb.st_dev || 962 fsb.st_ino != rsb.st_ino) 963 errx(EX_USAGE, "random seed is a character " 964 "device other than /dev/random"); 965 966 if (getentropy(randseed, sizeof(randseed)) < 0) 967 err(EX_SOFTWARE, "getentropy"); 968 } 969 970 out: 971 if (rsfd >= 0) 972 close(rsfd); 973 974 MD5Init(&md5_ctx); 975 MD5Update(&md5_ctx, randseed, rd); 976 } 977 978 /* 979 * Main function. 980 */ 981 int 982 main(int argc, char **argv) 983 { 984 char *outfile, *real_outfile; 985 char *random_source = NULL; 986 int c, result; 987 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 988 { false, false, false, false, false, false }; 989 990 result = 0; 991 outfile = sort_strdup("-"); 992 real_outfile = NULL; 993 994 struct sort_mods *sm = &default_sort_mods_object; 995 996 init_tmp_files(); 997 998 set_signal_handler(); 999 1000 set_hw_params(); 1001 set_locale(); 1002 set_tmpdir(); 1003 set_sort_opts(); 1004 1005 fix_obsolete_keys(&argc, argv); 1006 1007 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) 1008 != -1)) { 1009 1010 check_mutually_exclusive_flags(c, mef_flags); 1011 1012 if (!set_sort_modifier(sm, c)) { 1013 1014 switch (c) { 1015 case 'c': 1016 sort_opts_vals.cflag = true; 1017 if (optarg) { 1018 if (!strcmp(optarg, "diagnose-first")) 1019 ; 1020 else if (!strcmp(optarg, "silent") || 1021 !strcmp(optarg, "quiet")) 1022 sort_opts_vals.csilentflag = true; 1023 else if (*optarg) 1024 unknown(optarg); 1025 } 1026 break; 1027 case 'C': 1028 sort_opts_vals.cflag = true; 1029 sort_opts_vals.csilentflag = true; 1030 break; 1031 case 'k': 1032 { 1033 sort_opts_vals.complex_sort = true; 1034 sort_opts_vals.kflag = true; 1035 1036 keys_num++; 1037 keys = sort_realloc(keys, keys_num * 1038 sizeof(struct key_specs)); 1039 memset(&(keys[keys_num - 1]), 0, 1040 sizeof(struct key_specs)); 1041 1042 if (parse_k(optarg, &(keys[keys_num - 1])) 1043 < 0) { 1044 errc(2, EINVAL, "-k %s", optarg); 1045 } 1046 1047 break; 1048 } 1049 case 'm': 1050 sort_opts_vals.mflag = true; 1051 break; 1052 case 'o': 1053 outfile = sort_realloc(outfile, (strlen(optarg) + 1)); 1054 strcpy(outfile, optarg); 1055 break; 1056 case 's': 1057 sort_opts_vals.sflag = true; 1058 break; 1059 case 'S': 1060 available_free_memory = 1061 parse_memory_buffer_value(optarg); 1062 break; 1063 case 'T': 1064 tmpdir = sort_strdup(optarg); 1065 break; 1066 case 't': 1067 while (strlen(optarg) > 1) { 1068 if (optarg[0] != '\\') { 1069 errc(2, EINVAL, "%s", optarg); 1070 } 1071 optarg += 1; 1072 if (*optarg == '0') { 1073 *optarg = 0; 1074 break; 1075 } 1076 } 1077 sort_opts_vals.tflag = true; 1078 sort_opts_vals.field_sep = btowc(optarg[0]); 1079 if (sort_opts_vals.field_sep == WEOF) { 1080 errno = EINVAL; 1081 err(2, NULL); 1082 } 1083 if (!gnusort_numeric_compatibility) { 1084 if (symbol_decimal_point == sort_opts_vals.field_sep) 1085 symbol_decimal_point = WEOF; 1086 if (symbol_thousands_sep == sort_opts_vals.field_sep) 1087 symbol_thousands_sep = WEOF; 1088 if (symbol_negative_sign == sort_opts_vals.field_sep) 1089 symbol_negative_sign = WEOF; 1090 if (symbol_positive_sign == sort_opts_vals.field_sep) 1091 symbol_positive_sign = WEOF; 1092 } 1093 break; 1094 case 'u': 1095 sort_opts_vals.uflag = true; 1096 /* stable sort for the correct unique val */ 1097 sort_opts_vals.sflag = true; 1098 break; 1099 case 'z': 1100 sort_opts_vals.zflag = true; 1101 break; 1102 case SORT_OPT: 1103 if (optarg) { 1104 if (!strcmp(optarg, "general-numeric")) 1105 set_sort_modifier(sm, 'g'); 1106 else if (!strcmp(optarg, "human-numeric")) 1107 set_sort_modifier(sm, 'h'); 1108 else if (!strcmp(optarg, "numeric")) 1109 set_sort_modifier(sm, 'n'); 1110 else if (!strcmp(optarg, "month")) 1111 set_sort_modifier(sm, 'M'); 1112 else if (!strcmp(optarg, "random")) 1113 set_sort_modifier(sm, 'R'); 1114 else 1115 unknown(optarg); 1116 } 1117 break; 1118 #if defined(SORT_THREADS) 1119 case PARALLEL_OPT: 1120 nthreads = (size_t)(atoi(optarg)); 1121 if (nthreads < 1) 1122 nthreads = 1; 1123 if (nthreads > 1024) 1124 nthreads = 1024; 1125 break; 1126 #endif 1127 case QSORT_OPT: 1128 sort_opts_vals.sort_method = SORT_QSORT; 1129 break; 1130 case MERGESORT_OPT: 1131 sort_opts_vals.sort_method = SORT_MERGESORT; 1132 break; 1133 case MMAP_OPT: 1134 use_mmap = true; 1135 break; 1136 case HEAPSORT_OPT: 1137 sort_opts_vals.sort_method = SORT_HEAPSORT; 1138 break; 1139 case RADIXSORT_OPT: 1140 sort_opts_vals.sort_method = SORT_RADIXSORT; 1141 break; 1142 case RANDOMSOURCE_OPT: 1143 random_source = strdup(optarg); 1144 break; 1145 case COMPRESSPROGRAM_OPT: 1146 compress_program = strdup(optarg); 1147 break; 1148 case FF_OPT: 1149 read_fns_from_file0(optarg); 1150 break; 1151 case BS_OPT: 1152 { 1153 errno = 0; 1154 long mof = strtol(optarg, NULL, 10); 1155 if (errno != 0) 1156 err(2, "--batch-size"); 1157 if (mof >= 2) 1158 max_open_files = (size_t) mof + 1; 1159 } 1160 break; 1161 case VERSION_OPT: 1162 printf("%s\n", VERSION); 1163 exit(EXIT_SUCCESS); 1164 /* NOTREACHED */ 1165 break; 1166 case DEBUG_OPT: 1167 debug_sort = true; 1168 break; 1169 case HELP_OPT: 1170 usage(false); 1171 /* NOTREACHED */ 1172 break; 1173 default: 1174 usage(true); 1175 /* NOTREACHED */ 1176 } 1177 } 1178 } 1179 1180 argc -= optind; 1181 argv += optind; 1182 1183 if (argv_from_file0) { 1184 argc = argc_from_file0; 1185 argv = argv_from_file0; 1186 } 1187 1188 #ifndef WITHOUT_NLS 1189 catalog = catopen("sort", NL_CAT_LOCALE); 1190 #endif 1191 1192 if (sort_opts_vals.cflag && sort_opts_vals.mflag) 1193 errx(1, "%c:%c: %s", 'm', 'c', getstr(1)); 1194 1195 #ifndef WITHOUT_NLS 1196 catclose(catalog); 1197 #endif 1198 1199 if (keys_num == 0) { 1200 keys_num = 1; 1201 keys = sort_realloc(keys, sizeof(struct key_specs)); 1202 memset(&(keys[0]), 0, sizeof(struct key_specs)); 1203 keys[0].c1 = 1; 1204 keys[0].pos1b = default_sort_mods->bflag; 1205 keys[0].pos2b = default_sort_mods->bflag; 1206 memcpy(&(keys[0].sm), default_sort_mods, 1207 sizeof(struct sort_mods)); 1208 } 1209 1210 for (size_t i = 0; i < keys_num; i++) { 1211 struct key_specs *ks; 1212 1213 ks = &(keys[i]); 1214 1215 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && 1216 !(ks->pos2b)) { 1217 ks->pos1b = sm->bflag; 1218 ks->pos2b = sm->bflag; 1219 memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); 1220 } 1221 1222 ks->sm.func = get_sort_func(&(ks->sm)); 1223 } 1224 1225 if (debug_sort) { 1226 printf("Memory to be used for sorting: %llu\n",available_free_memory); 1227 #if defined(SORT_THREADS) 1228 printf("Number of CPUs: %d\n",(int)ncpu); 1229 nthreads = 1; 1230 #endif 1231 printf("Using collate rules of %s locale\n", 1232 setlocale(LC_COLLATE, NULL)); 1233 if (byte_sort) 1234 printf("Byte sort is used\n"); 1235 if (print_symbols_on_debug) { 1236 printf("Decimal Point: <%lc>\n", symbol_decimal_point); 1237 if (symbol_thousands_sep) 1238 printf("Thousands separator: <%lc>\n", 1239 symbol_thousands_sep); 1240 printf("Positive sign: <%lc>\n", symbol_positive_sign); 1241 printf("Negative sign: <%lc>\n", symbol_negative_sign); 1242 } 1243 } 1244 1245 if (need_random) 1246 get_random_seed(random_source); 1247 1248 /* Case when the outfile equals one of the input files: */ 1249 if (strcmp(outfile, "-")) { 1250 1251 for(int i = 0; i < argc; ++i) { 1252 if (strcmp(argv[i], outfile) == 0) { 1253 real_outfile = sort_strdup(outfile); 1254 for(;;) { 1255 char* tmp = sort_malloc(strlen(outfile) + 1256 strlen(".tmp") + 1); 1257 1258 strcpy(tmp, outfile); 1259 strcpy(tmp + strlen(tmp), ".tmp"); 1260 sort_free(outfile); 1261 outfile = tmp; 1262 if (access(outfile, F_OK) < 0) 1263 break; 1264 } 1265 tmp_file_atexit(outfile); 1266 } 1267 } 1268 } 1269 1270 #if defined(SORT_THREADS) 1271 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) 1272 nthreads = 1; 1273 #endif 1274 1275 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { 1276 struct file_list fl; 1277 struct sort_list list; 1278 1279 sort_list_init(&list); 1280 file_list_init(&fl, true); 1281 1282 if (argc < 1) 1283 procfile("-", &list, &fl); 1284 else { 1285 while (argc > 0) { 1286 procfile(*argv, &list, &fl); 1287 --argc; 1288 ++argv; 1289 } 1290 } 1291 1292 if (fl.count < 1) 1293 sort_list_to_file(&list, outfile); 1294 else { 1295 if (list.count > 0) { 1296 char *flast = new_tmp_file_name(); 1297 1298 sort_list_to_file(&list, flast); 1299 file_list_add(&fl, flast, false); 1300 } 1301 merge_files(&fl, outfile); 1302 } 1303 1304 file_list_clean(&fl); 1305 1306 /* 1307 * We are about to exit the program, so we can ignore 1308 * the clean-up for speed 1309 * 1310 * sort_list_clean(&list); 1311 */ 1312 1313 } else if (sort_opts_vals.cflag) { 1314 result = (argc == 0) ? (check("-")) : (check(*argv)); 1315 } else if (sort_opts_vals.mflag) { 1316 struct file_list fl; 1317 1318 file_list_init(&fl, false); 1319 /* No file arguments remaining means "read from stdin." */ 1320 if (argc == 0) 1321 file_list_add(&fl, "-", true); 1322 else 1323 file_list_populate(&fl, argc, argv, true); 1324 merge_files(&fl, outfile); 1325 file_list_clean(&fl); 1326 } 1327 1328 if (real_outfile) { 1329 unlink(real_outfile); 1330 if (rename(outfile, real_outfile) < 0) 1331 err(2, NULL); 1332 sort_free(real_outfile); 1333 } 1334 1335 sort_free(outfile); 1336 1337 return (result); 1338 } 1339