1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 #include <sys/stat.h> 32 #include <sys/sysctl.h> 33 #include <sys/types.h> 34 35 #include <err.h> 36 #include <errno.h> 37 #include <fcntl.h> 38 #include <getopt.h> 39 #include <limits.h> 40 #include <locale.h> 41 #include <md5.h> 42 #include <regex.h> 43 #include <signal.h> 44 #include <stdbool.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 #include <wchar.h> 50 #include <wctype.h> 51 52 #include "coll.h" 53 #include "file.h" 54 #include "sort.h" 55 56 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz" 57 58 static bool need_random; 59 60 MD5_CTX md5_ctx; 61 62 /* 63 * Default messages to use 64 */ 65 const char *nlsstr[] = { "", 66 /* 1*/"mutually exclusive flags", 67 /* 2*/"extra argument not allowed with -c", 68 /* 3*/"Unknown feature", 69 /* 4*/"Wrong memory buffer specification", 70 /* 5*/"0 field in key specs", 71 /* 6*/"0 column in key specs", 72 /* 7*/"Wrong file mode", 73 /* 8*/"Cannot open file for reading", 74 /* 9*/"Radix sort cannot be used with these sort options", 75 /*10*/"The chosen sort method cannot be used with stable and/or unique sort", 76 /*11*/"Invalid key position", 77 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] " 78 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " 79 "[-o outfile] [--batch-size size] [--files0-from file] " 80 "[--heapsort] [--mergesort] [--radixsort] [--qsort] " 81 "[--mmap] " 82 #if defined(SORT_THREADS) 83 "[--parallel thread_no] " 84 #endif 85 "[--human-numeric-sort] " 86 "[--version-sort] [--random-sort [--random-source file]] " 87 "[--compress-program program] [file ...]\n" }; 88 89 struct sort_opts sort_opts_vals; 90 91 bool debug_sort; 92 bool need_hint; 93 94 size_t mb_cur_max; 95 96 #if defined(SORT_THREADS) 97 unsigned int ncpu = 1; 98 size_t nthreads = 1; 99 #endif 100 101 static bool gnusort_numeric_compatibility; 102 103 static struct sort_mods default_sort_mods_object; 104 struct sort_mods * const default_sort_mods = &default_sort_mods_object; 105 106 static bool print_symbols_on_debug; 107 108 /* 109 * Arguments from file (when file0-from option is used: 110 */ 111 static size_t argc_from_file0 = (size_t)-1; 112 static char **argv_from_file0; 113 114 /* 115 * Placeholder symbols for options which have no single-character equivalent 116 */ 117 enum 118 { 119 SORT_OPT = CHAR_MAX + 1, 120 HELP_OPT, 121 FF_OPT, 122 BS_OPT, 123 VERSION_OPT, 124 DEBUG_OPT, 125 #if defined(SORT_THREADS) 126 PARALLEL_OPT, 127 #endif 128 RANDOMSOURCE_OPT, 129 COMPRESSPROGRAM_OPT, 130 QSORT_OPT, 131 MERGESORT_OPT, 132 HEAPSORT_OPT, 133 RADIXSORT_OPT, 134 MMAP_OPT 135 }; 136 137 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 138 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; 139 140 static struct option long_options[] = { 141 { "batch-size", required_argument, NULL, BS_OPT }, 142 { "buffer-size", required_argument, NULL, 'S' }, 143 { "check", optional_argument, NULL, 'c' }, 144 { "check=silent|quiet", optional_argument, NULL, 'C' }, 145 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, 146 { "debug", no_argument, NULL, DEBUG_OPT }, 147 { "dictionary-order", no_argument, NULL, 'd' }, 148 { "field-separator", required_argument, NULL, 't' }, 149 { "files0-from", required_argument, NULL, FF_OPT }, 150 { "general-numeric-sort", no_argument, NULL, 'g' }, 151 { "heapsort", no_argument, NULL, HEAPSORT_OPT }, 152 { "help",no_argument, NULL, HELP_OPT }, 153 { "human-numeric-sort", no_argument, NULL, 'h' }, 154 { "ignore-leading-blanks", no_argument, NULL, 'b' }, 155 { "ignore-case", no_argument, NULL, 'f' }, 156 { "ignore-nonprinting", no_argument, NULL, 'i' }, 157 { "key", required_argument, NULL, 'k' }, 158 { "merge", no_argument, NULL, 'm' }, 159 { "mergesort", no_argument, NULL, MERGESORT_OPT }, 160 { "mmap", no_argument, NULL, MMAP_OPT }, 161 { "month-sort", no_argument, NULL, 'M' }, 162 { "numeric-sort", no_argument, NULL, 'n' }, 163 { "output", required_argument, NULL, 'o' }, 164 #if defined(SORT_THREADS) 165 { "parallel", required_argument, NULL, PARALLEL_OPT }, 166 #endif 167 { "qsort", no_argument, NULL, QSORT_OPT }, 168 { "radixsort", no_argument, NULL, RADIXSORT_OPT }, 169 { "random-sort", no_argument, NULL, 'R' }, 170 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, 171 { "reverse", no_argument, NULL, 'r' }, 172 { "sort", required_argument, NULL, SORT_OPT }, 173 { "stable", no_argument, NULL, 's' }, 174 { "temporary-directory",required_argument, NULL, 'T' }, 175 { "unique", no_argument, NULL, 'u' }, 176 { "version", no_argument, NULL, VERSION_OPT }, 177 { "version-sort",no_argument, NULL, 'V' }, 178 { "zero-terminated", no_argument, NULL, 'z' }, 179 { NULL, no_argument, NULL, 0 } 180 }; 181 182 void fix_obsolete_keys(int *argc, char **argv); 183 184 /* 185 * Check where sort modifier is present 186 */ 187 static bool 188 sort_modifier_empty(struct sort_mods *sm) 189 { 190 191 if (sm == NULL) 192 return (true); 193 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || 194 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag)); 195 } 196 197 /* 198 * Print out usage text. 199 */ 200 static void 201 usage(bool opt_err) 202 { 203 FILE *out; 204 205 out = opt_err ? stderr : stdout; 206 207 fprintf(out, getstr(12), getprogname()); 208 if (opt_err) 209 exit(2); 210 exit(0); 211 } 212 213 /* 214 * Read input file names from a file (file0-from option). 215 */ 216 static void 217 read_fns_from_file0(const char *fn) 218 { 219 FILE *f; 220 char *line = NULL; 221 size_t linesize = 0; 222 ssize_t linelen; 223 224 if (fn == NULL) 225 return; 226 227 f = fopen(fn, "r"); 228 if (f == NULL) 229 err(2, "%s", fn); 230 231 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) { 232 if (*line != '\0') { 233 if (argc_from_file0 == (size_t) - 1) 234 argc_from_file0 = 0; 235 ++argc_from_file0; 236 argv_from_file0 = sort_realloc(argv_from_file0, 237 argc_from_file0 * sizeof(char *)); 238 if (argv_from_file0 == NULL) 239 err(2, NULL); 240 argv_from_file0[argc_from_file0 - 1] = line; 241 } else { 242 free(line); 243 } 244 line = NULL; 245 linesize = 0; 246 } 247 if (ferror(f)) 248 err(2, "%s: getdelim", fn); 249 250 closefile(f, fn); 251 } 252 253 /* 254 * Check how much RAM is available for the sort. 255 */ 256 static void 257 set_hw_params(void) 258 { 259 long pages, psize; 260 261 #if defined(SORT_THREADS) 262 ncpu = 1; 263 #endif 264 265 pages = sysconf(_SC_PHYS_PAGES); 266 if (pages < 1) { 267 perror("sysconf pages"); 268 pages = 1; 269 } 270 psize = sysconf(_SC_PAGESIZE); 271 if (psize < 1) { 272 perror("sysconf psize"); 273 psize = 4096; 274 } 275 #if defined(SORT_THREADS) 276 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN); 277 if (ncpu < 1) 278 ncpu = 1; 279 else if(ncpu > 32) 280 ncpu = 32; 281 282 nthreads = ncpu; 283 #endif 284 285 free_memory = (unsigned long long) pages * (unsigned long long) psize; 286 available_free_memory = free_memory / 2; 287 288 if (available_free_memory < 1024) 289 available_free_memory = 1024; 290 } 291 292 /* 293 * Convert "plain" symbol to wide symbol, with default value. 294 */ 295 static void 296 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) 297 { 298 299 if (wc && c) { 300 int res; 301 302 res = mbtowc(wc, c, mb_cur_max); 303 if (res < 1) 304 *wc = def; 305 } 306 } 307 308 /* 309 * Set current locale symbols. 310 */ 311 static void 312 set_locale(void) 313 { 314 struct lconv *lc; 315 const char *locale; 316 317 setlocale(LC_ALL, ""); 318 319 mb_cur_max = MB_CUR_MAX; 320 321 lc = localeconv(); 322 323 if (lc) { 324 /* obtain LC_NUMERIC info */ 325 /* Convert to wide char form */ 326 conv_mbtowc(&symbol_decimal_point, lc->decimal_point, 327 symbol_decimal_point); 328 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, 329 symbol_thousands_sep); 330 conv_mbtowc(&symbol_positive_sign, lc->positive_sign, 331 symbol_positive_sign); 332 conv_mbtowc(&symbol_negative_sign, lc->negative_sign, 333 symbol_negative_sign); 334 } 335 336 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) 337 gnusort_numeric_compatibility = true; 338 339 locale = setlocale(LC_COLLATE, NULL); 340 341 if (locale) { 342 char *tmpl; 343 const char *cclocale; 344 345 tmpl = sort_strdup(locale); 346 cclocale = setlocale(LC_COLLATE, "C"); 347 if (cclocale && !strcmp(cclocale, tmpl)) 348 byte_sort = true; 349 else { 350 const char *pclocale; 351 352 pclocale = setlocale(LC_COLLATE, "POSIX"); 353 if (pclocale && !strcmp(pclocale, tmpl)) 354 byte_sort = true; 355 } 356 setlocale(LC_COLLATE, tmpl); 357 sort_free(tmpl); 358 } 359 } 360 361 /* 362 * Set directory temporary files. 363 */ 364 static void 365 set_tmpdir(void) 366 { 367 char *td; 368 369 td = getenv("TMPDIR"); 370 if (td != NULL) 371 tmpdir = sort_strdup(td); 372 } 373 374 /* 375 * Parse -S option. 376 */ 377 static unsigned long long 378 parse_memory_buffer_value(const char *value) 379 { 380 381 if (value == NULL) 382 return (available_free_memory); 383 else { 384 char *endptr; 385 unsigned long long membuf; 386 387 endptr = NULL; 388 errno = 0; 389 membuf = strtoll(value, &endptr, 10); 390 391 if (errno != 0) { 392 warn("%s",getstr(4)); 393 membuf = available_free_memory; 394 } else { 395 switch (*endptr){ 396 case 'Y': 397 membuf *= 1024; 398 /* FALLTHROUGH */ 399 case 'Z': 400 membuf *= 1024; 401 /* FALLTHROUGH */ 402 case 'E': 403 membuf *= 1024; 404 /* FALLTHROUGH */ 405 case 'P': 406 membuf *= 1024; 407 /* FALLTHROUGH */ 408 case 'T': 409 membuf *= 1024; 410 /* FALLTHROUGH */ 411 case 'G': 412 membuf *= 1024; 413 /* FALLTHROUGH */ 414 case 'M': 415 membuf *= 1024; 416 /* FALLTHROUGH */ 417 case '\0': 418 case 'K': 419 membuf *= 1024; 420 /* FALLTHROUGH */ 421 case 'b': 422 break; 423 case '%': 424 membuf = (available_free_memory * membuf) / 425 100; 426 break; 427 default: 428 warnc(EINVAL, "%s", optarg); 429 membuf = available_free_memory; 430 } 431 } 432 return (membuf); 433 } 434 } 435 436 /* 437 * Signal handler that clears the temporary files. 438 */ 439 static void 440 sig_handler(int sig __unused, siginfo_t *siginfo __unused, 441 void *context __unused) 442 { 443 444 clear_tmp_files(); 445 exit(-1); 446 } 447 448 /* 449 * Set signal handler on panic signals. 450 */ 451 static void 452 set_signal_handler(void) 453 { 454 struct sigaction sa; 455 456 memset(&sa, 0, sizeof(sa)); 457 sa.sa_sigaction = &sig_handler; 458 sa.sa_flags = SA_SIGINFO; 459 460 if (sigaction(SIGTERM, &sa, NULL) < 0) { 461 perror("sigaction"); 462 return; 463 } 464 if (sigaction(SIGHUP, &sa, NULL) < 0) { 465 perror("sigaction"); 466 return; 467 } 468 if (sigaction(SIGINT, &sa, NULL) < 0) { 469 perror("sigaction"); 470 return; 471 } 472 if (sigaction(SIGQUIT, &sa, NULL) < 0) { 473 perror("sigaction"); 474 return; 475 } 476 if (sigaction(SIGABRT, &sa, NULL) < 0) { 477 perror("sigaction"); 478 return; 479 } 480 if (sigaction(SIGBUS, &sa, NULL) < 0) { 481 perror("sigaction"); 482 return; 483 } 484 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 485 perror("sigaction"); 486 return; 487 } 488 if (sigaction(SIGUSR1, &sa, NULL) < 0) { 489 perror("sigaction"); 490 return; 491 } 492 if (sigaction(SIGUSR2, &sa, NULL) < 0) { 493 perror("sigaction"); 494 return; 495 } 496 } 497 498 /* 499 * Print "unknown" message and exit with status 2. 500 */ 501 static void 502 unknown(const char *what) 503 { 504 505 errx(2, "%s: %s", getstr(3), what); 506 } 507 508 /* 509 * Check whether contradictory input options are used. 510 */ 511 static void 512 check_mutually_exclusive_flags(char c, bool *mef_flags) 513 { 514 int fo_index, mec; 515 bool found_others, found_this; 516 517 found_others = found_this = false; 518 fo_index = 0; 519 520 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { 521 mec = mutually_exclusive_flags[i]; 522 523 if (mec != c) { 524 if (mef_flags[i]) { 525 if (found_this) 526 errx(1, "%c:%c: %s", c, mec, getstr(1)); 527 found_others = true; 528 fo_index = i; 529 } 530 } else { 531 if (found_others) 532 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1)); 533 mef_flags[i] = true; 534 found_this = true; 535 } 536 } 537 } 538 539 /* 540 * Initialise sort opts data. 541 */ 542 static void 543 set_sort_opts(void) 544 { 545 546 memset(&default_sort_mods_object, 0, 547 sizeof(default_sort_mods_object)); 548 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); 549 default_sort_mods_object.func = 550 get_sort_func(&default_sort_mods_object); 551 } 552 553 /* 554 * Set a sort modifier on a sort modifiers object. 555 */ 556 static bool 557 set_sort_modifier(struct sort_mods *sm, int c) 558 { 559 560 if (sm == NULL) 561 return (true); 562 563 switch (c){ 564 case 'b': 565 sm->bflag = true; 566 break; 567 case 'd': 568 sm->dflag = true; 569 break; 570 case 'f': 571 sm->fflag = true; 572 break; 573 case 'g': 574 sm->gflag = true; 575 need_hint = true; 576 break; 577 case 'i': 578 sm->iflag = true; 579 break; 580 case 'R': 581 sm->Rflag = true; 582 need_hint = true; 583 need_random = true; 584 break; 585 case 'M': 586 initialise_months(); 587 sm->Mflag = true; 588 need_hint = true; 589 break; 590 case 'n': 591 sm->nflag = true; 592 need_hint = true; 593 print_symbols_on_debug = true; 594 break; 595 case 'r': 596 sm->rflag = true; 597 break; 598 case 'V': 599 sm->Vflag = true; 600 break; 601 case 'h': 602 sm->hflag = true; 603 need_hint = true; 604 print_symbols_on_debug = true; 605 break; 606 default: 607 return (false); 608 } 609 610 sort_opts_vals.complex_sort = true; 611 sm->func = get_sort_func(sm); 612 return (true); 613 } 614 615 /* 616 * Parse POS in -k option. 617 */ 618 static int 619 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) 620 { 621 regmatch_t pmatch[4]; 622 regex_t re; 623 char *c, *f; 624 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; 625 size_t len, nmatch; 626 int ret; 627 628 ret = -1; 629 nmatch = 4; 630 c = f = NULL; 631 632 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 633 return (-1); 634 635 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 636 goto end; 637 638 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 639 goto end; 640 641 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 642 goto end; 643 644 len = pmatch[1].rm_eo - pmatch[1].rm_so; 645 f = sort_malloc((len + 1) * sizeof(char)); 646 647 strncpy(f, s + pmatch[1].rm_so, len); 648 f[len] = '\0'; 649 650 if (second) { 651 errno = 0; 652 ks->f2 = (size_t) strtoul(f, NULL, 10); 653 if (errno != 0) 654 err(2, "-k"); 655 if (ks->f2 == 0) { 656 warn("%s",getstr(5)); 657 goto end; 658 } 659 } else { 660 errno = 0; 661 ks->f1 = (size_t) strtoul(f, NULL, 10); 662 if (errno != 0) 663 err(2, "-k"); 664 if (ks->f1 == 0) { 665 warn("%s",getstr(5)); 666 goto end; 667 } 668 } 669 670 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 671 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 672 c = sort_malloc((len + 1) * sizeof(char)); 673 674 strncpy(c, s + pmatch[2].rm_so + 1, len); 675 c[len] = '\0'; 676 677 if (second) { 678 errno = 0; 679 ks->c2 = (size_t) strtoul(c, NULL, 10); 680 if (errno != 0) 681 err(2, "-k"); 682 } else { 683 errno = 0; 684 ks->c1 = (size_t) strtoul(c, NULL, 10); 685 if (errno != 0) 686 err(2, "-k"); 687 if (ks->c1 == 0) { 688 warn("%s",getstr(6)); 689 goto end; 690 } 691 } 692 } else { 693 if (second) 694 ks->c2 = 0; 695 else 696 ks->c1 = 1; 697 } 698 699 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 700 regoff_t i = 0; 701 702 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { 703 check_mutually_exclusive_flags(s[i], mef_flags); 704 if (s[i] == 'b') { 705 if (second) 706 ks->pos2b = true; 707 else 708 ks->pos1b = true; 709 } else if (!set_sort_modifier(&(ks->sm), s[i])) 710 goto end; 711 } 712 } 713 714 ret = 0; 715 716 end: 717 718 if (c) 719 sort_free(c); 720 if (f) 721 sort_free(f); 722 regfree(&re); 723 724 return (ret); 725 } 726 727 /* 728 * Parse -k option value. 729 */ 730 static int 731 parse_k(const char *s, struct key_specs *ks) 732 { 733 int ret = -1; 734 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 735 { false, false, false, false, false, false }; 736 737 if (s && *s) { 738 char *sptr; 739 740 sptr = strchr(s, ','); 741 if (sptr) { 742 size_t size1; 743 char *pos1, *pos2; 744 745 size1 = sptr - s; 746 747 if (size1 < 1) 748 return (-1); 749 pos1 = sort_malloc((size1 + 1) * sizeof(char)); 750 751 strncpy(pos1, s, size1); 752 pos1[size1] = '\0'; 753 754 ret = parse_pos(pos1, ks, mef_flags, false); 755 756 sort_free(pos1); 757 if (ret < 0) 758 return (ret); 759 760 pos2 = sort_strdup(sptr + 1); 761 ret = parse_pos(pos2, ks, mef_flags, true); 762 sort_free(pos2); 763 } else 764 ret = parse_pos(s, ks, mef_flags, false); 765 } 766 767 return (ret); 768 } 769 770 /* 771 * Parse POS in +POS -POS option. 772 */ 773 static int 774 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts) 775 { 776 regex_t re; 777 regmatch_t pmatch[4]; 778 char *c, *f; 779 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; 780 int ret; 781 size_t len, nmatch; 782 783 ret = -1; 784 nmatch = 4; 785 c = f = NULL; 786 *nc = *nf = 0; 787 788 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 789 return (-1); 790 791 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 792 goto end; 793 794 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 795 goto end; 796 797 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 798 goto end; 799 800 len = pmatch[1].rm_eo - pmatch[1].rm_so; 801 f = sort_malloc((len + 1) * sizeof(char)); 802 803 strncpy(f, s + pmatch[1].rm_so, len); 804 f[len] = '\0'; 805 806 errno = 0; 807 *nf = (size_t) strtoul(f, NULL, 10); 808 if (errno != 0) 809 errx(2, "%s", getstr(11)); 810 811 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 812 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 813 c = sort_malloc((len + 1) * sizeof(char)); 814 815 strncpy(c, s + pmatch[2].rm_so + 1, len); 816 c[len] = '\0'; 817 818 errno = 0; 819 *nc = (size_t) strtoul(c, NULL, 10); 820 if (errno != 0) 821 errx(2, "%s", getstr(11)); 822 } 823 824 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 825 826 len = pmatch[3].rm_eo - pmatch[3].rm_so; 827 828 strncpy(sopts, s + pmatch[3].rm_so, len); 829 sopts[len] = '\0'; 830 } 831 832 ret = 0; 833 834 end: 835 if (c) 836 sort_free(c); 837 if (f) 838 sort_free(f); 839 regfree(&re); 840 841 return (ret); 842 } 843 844 /* 845 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax 846 */ 847 void 848 fix_obsolete_keys(int *argc, char **argv) 849 { 850 char sopt[129]; 851 852 for (int i = 1; i < *argc; i++) { 853 char *arg1; 854 855 arg1 = argv[i]; 856 857 if (strcmp(arg1, "--") == 0) { 858 /* Following arguments are treated as filenames. */ 859 break; 860 } 861 862 if (strlen(arg1) > 1 && arg1[0] == '+') { 863 int c1, f1; 864 char sopts1[128]; 865 866 sopts1[0] = 0; 867 c1 = f1 = 0; 868 869 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0) 870 continue; 871 else { 872 f1 += 1; 873 c1 += 1; 874 if (i + 1 < *argc) { 875 char *arg2 = argv[i + 1]; 876 877 if (strlen(arg2) > 1 && 878 arg2[0] == '-') { 879 int c2, f2; 880 char sopts2[128]; 881 882 sopts2[0] = 0; 883 c2 = f2 = 0; 884 885 if (parse_pos_obs(arg2 + 1, 886 &f2, &c2, sopts2) >= 0) { 887 if (c2 > 0) 888 f2 += 1; 889 sprintf(sopt, "-k%d.%d%s,%d.%d%s", 890 f1, c1, sopts1, f2, c2, sopts2); 891 argv[i] = sort_strdup(sopt); 892 for (int j = i + 1; j + 1 < *argc; j++) 893 argv[j] = argv[j + 1]; 894 *argc -= 1; 895 continue; 896 } 897 } 898 } 899 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1); 900 argv[i] = sort_strdup(sopt); 901 } 902 } 903 } 904 } 905 906 /* 907 * Seed random sort 908 */ 909 static void 910 get_random_seed(const char *random_source) 911 { 912 char randseed[32]; 913 struct stat fsb, rsb; 914 ssize_t rd; 915 int rsfd; 916 917 rsfd = -1; 918 rd = sizeof(randseed); 919 920 if (random_source == NULL) { 921 if (getentropy(randseed, sizeof(randseed)) < 0) 922 err(EX_SOFTWARE, "getentropy"); 923 goto out; 924 } 925 926 rsfd = open(random_source, O_RDONLY | O_CLOEXEC); 927 if (rsfd < 0) 928 err(EX_NOINPUT, "open: %s", random_source); 929 930 if (fstat(rsfd, &fsb) != 0) 931 err(EX_SOFTWARE, "fstat"); 932 933 if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode)) 934 err(EX_USAGE, 935 "random seed isn't a regular file or /dev/random"); 936 937 /* 938 * Regular files: read up to maximum seed size and explicitly 939 * reject longer files. 940 */ 941 if (S_ISREG(fsb.st_mode)) { 942 if (fsb.st_size > (off_t)sizeof(randseed)) 943 errx(EX_USAGE, "random seed is too large (%jd >" 944 " %zu)!", (intmax_t)fsb.st_size, 945 sizeof(randseed)); 946 else if (fsb.st_size < 1) 947 errx(EX_USAGE, "random seed is too small (" 948 "0 bytes)"); 949 950 memset(randseed, 0, sizeof(randseed)); 951 952 rd = read(rsfd, randseed, fsb.st_size); 953 if (rd < 0) 954 err(EX_SOFTWARE, "reading random seed file %s", 955 random_source); 956 if (rd < (ssize_t)fsb.st_size) 957 errx(EX_SOFTWARE, "short read from %s", random_source); 958 } else if (S_ISCHR(fsb.st_mode)) { 959 if (stat("/dev/random", &rsb) < 0) 960 err(EX_SOFTWARE, "stat"); 961 962 if (fsb.st_dev != rsb.st_dev || 963 fsb.st_ino != rsb.st_ino) 964 errx(EX_USAGE, "random seed is a character " 965 "device other than /dev/random"); 966 967 if (getentropy(randseed, sizeof(randseed)) < 0) 968 err(EX_SOFTWARE, "getentropy"); 969 } 970 971 out: 972 if (rsfd >= 0) 973 close(rsfd); 974 975 MD5Init(&md5_ctx); 976 MD5Update(&md5_ctx, randseed, rd); 977 } 978 979 /* 980 * Main function. 981 */ 982 int 983 main(int argc, char **argv) 984 { 985 char *outfile, *real_outfile; 986 char *random_source = NULL; 987 int c, result; 988 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 989 { false, false, false, false, false, false }; 990 991 result = 0; 992 outfile = sort_strdup("-"); 993 real_outfile = NULL; 994 995 struct sort_mods *sm = &default_sort_mods_object; 996 997 init_tmp_files(); 998 999 set_signal_handler(); 1000 1001 set_hw_params(); 1002 set_locale(); 1003 set_tmpdir(); 1004 set_sort_opts(); 1005 1006 fix_obsolete_keys(&argc, argv); 1007 1008 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) 1009 != -1)) { 1010 1011 check_mutually_exclusive_flags(c, mef_flags); 1012 1013 if (!set_sort_modifier(sm, c)) { 1014 1015 switch (c) { 1016 case 'c': 1017 sort_opts_vals.cflag = true; 1018 if (optarg) { 1019 if (!strcmp(optarg, "diagnose-first")) 1020 ; 1021 else if (!strcmp(optarg, "silent") || 1022 !strcmp(optarg, "quiet")) 1023 sort_opts_vals.csilentflag = true; 1024 else if (*optarg) 1025 unknown(optarg); 1026 } 1027 break; 1028 case 'C': 1029 sort_opts_vals.cflag = true; 1030 sort_opts_vals.csilentflag = true; 1031 break; 1032 case 'k': 1033 { 1034 sort_opts_vals.complex_sort = true; 1035 sort_opts_vals.kflag = true; 1036 1037 keys_num++; 1038 keys = sort_realloc(keys, keys_num * 1039 sizeof(struct key_specs)); 1040 memset(&(keys[keys_num - 1]), 0, 1041 sizeof(struct key_specs)); 1042 1043 if (parse_k(optarg, &(keys[keys_num - 1])) 1044 < 0) { 1045 errc(2, EINVAL, "-k %s", optarg); 1046 } 1047 1048 break; 1049 } 1050 case 'm': 1051 sort_opts_vals.mflag = true; 1052 break; 1053 case 'o': 1054 outfile = sort_realloc(outfile, (strlen(optarg) + 1)); 1055 strcpy(outfile, optarg); 1056 break; 1057 case 's': 1058 sort_opts_vals.sflag = true; 1059 break; 1060 case 'S': 1061 available_free_memory = 1062 parse_memory_buffer_value(optarg); 1063 break; 1064 case 'T': 1065 tmpdir = sort_strdup(optarg); 1066 break; 1067 case 't': 1068 while (strlen(optarg) > 1) { 1069 if (optarg[0] != '\\') { 1070 errc(2, EINVAL, "%s", optarg); 1071 } 1072 optarg += 1; 1073 if (*optarg == '0') { 1074 *optarg = 0; 1075 break; 1076 } 1077 } 1078 sort_opts_vals.tflag = true; 1079 sort_opts_vals.field_sep = btowc(optarg[0]); 1080 if (sort_opts_vals.field_sep == WEOF) { 1081 errno = EINVAL; 1082 err(2, NULL); 1083 } 1084 if (!gnusort_numeric_compatibility) { 1085 if (symbol_decimal_point == sort_opts_vals.field_sep) 1086 symbol_decimal_point = WEOF; 1087 if (symbol_thousands_sep == sort_opts_vals.field_sep) 1088 symbol_thousands_sep = WEOF; 1089 if (symbol_negative_sign == sort_opts_vals.field_sep) 1090 symbol_negative_sign = WEOF; 1091 if (symbol_positive_sign == sort_opts_vals.field_sep) 1092 symbol_positive_sign = WEOF; 1093 } 1094 break; 1095 case 'u': 1096 sort_opts_vals.uflag = true; 1097 /* stable sort for the correct unique val */ 1098 sort_opts_vals.sflag = true; 1099 break; 1100 case 'z': 1101 sort_opts_vals.zflag = true; 1102 break; 1103 case SORT_OPT: 1104 if (optarg) { 1105 if (!strcmp(optarg, "general-numeric")) 1106 set_sort_modifier(sm, 'g'); 1107 else if (!strcmp(optarg, "human-numeric")) 1108 set_sort_modifier(sm, 'h'); 1109 else if (!strcmp(optarg, "numeric")) 1110 set_sort_modifier(sm, 'n'); 1111 else if (!strcmp(optarg, "month")) 1112 set_sort_modifier(sm, 'M'); 1113 else if (!strcmp(optarg, "random")) 1114 set_sort_modifier(sm, 'R'); 1115 else 1116 unknown(optarg); 1117 } 1118 break; 1119 #if defined(SORT_THREADS) 1120 case PARALLEL_OPT: 1121 nthreads = (size_t)(atoi(optarg)); 1122 if (nthreads < 1) 1123 nthreads = 1; 1124 if (nthreads > 1024) 1125 nthreads = 1024; 1126 break; 1127 #endif 1128 case QSORT_OPT: 1129 sort_opts_vals.sort_method = SORT_QSORT; 1130 break; 1131 case MERGESORT_OPT: 1132 sort_opts_vals.sort_method = SORT_MERGESORT; 1133 break; 1134 case MMAP_OPT: 1135 use_mmap = true; 1136 break; 1137 case HEAPSORT_OPT: 1138 sort_opts_vals.sort_method = SORT_HEAPSORT; 1139 break; 1140 case RADIXSORT_OPT: 1141 sort_opts_vals.sort_method = SORT_RADIXSORT; 1142 break; 1143 case RANDOMSOURCE_OPT: 1144 random_source = strdup(optarg); 1145 break; 1146 case COMPRESSPROGRAM_OPT: 1147 compress_program = strdup(optarg); 1148 break; 1149 case FF_OPT: 1150 read_fns_from_file0(optarg); 1151 break; 1152 case BS_OPT: 1153 { 1154 errno = 0; 1155 long mof = strtol(optarg, NULL, 10); 1156 if (errno != 0) 1157 err(2, "--batch-size"); 1158 if (mof >= 2) 1159 max_open_files = (size_t) mof + 1; 1160 } 1161 break; 1162 case VERSION_OPT: 1163 printf("%s\n", VERSION); 1164 exit(EXIT_SUCCESS); 1165 /* NOTREACHED */ 1166 break; 1167 case DEBUG_OPT: 1168 debug_sort = true; 1169 break; 1170 case HELP_OPT: 1171 usage(false); 1172 /* NOTREACHED */ 1173 break; 1174 default: 1175 usage(true); 1176 /* NOTREACHED */ 1177 } 1178 } 1179 } 1180 1181 argc -= optind; 1182 argv += optind; 1183 1184 if (argv_from_file0) { 1185 argc = argc_from_file0; 1186 argv = argv_from_file0; 1187 } 1188 1189 if (sort_opts_vals.cflag && sort_opts_vals.mflag) 1190 errx(1, "%c:%c: %s", 'm', 'c', getstr(1)); 1191 1192 if (keys_num == 0) { 1193 keys_num = 1; 1194 keys = sort_realloc(keys, sizeof(struct key_specs)); 1195 memset(&(keys[0]), 0, sizeof(struct key_specs)); 1196 keys[0].c1 = 1; 1197 keys[0].pos1b = default_sort_mods->bflag; 1198 keys[0].pos2b = default_sort_mods->bflag; 1199 memcpy(&(keys[0].sm), default_sort_mods, 1200 sizeof(struct sort_mods)); 1201 } 1202 1203 for (size_t i = 0; i < keys_num; i++) { 1204 struct key_specs *ks; 1205 1206 ks = &(keys[i]); 1207 1208 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && 1209 !(ks->pos2b)) { 1210 ks->pos1b = sm->bflag; 1211 ks->pos2b = sm->bflag; 1212 memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); 1213 } 1214 1215 ks->sm.func = get_sort_func(&(ks->sm)); 1216 } 1217 1218 if (debug_sort) { 1219 printf("Memory to be used for sorting: %llu\n",available_free_memory); 1220 #if defined(SORT_THREADS) 1221 printf("Number of CPUs: %d\n",(int)ncpu); 1222 nthreads = 1; 1223 #endif 1224 printf("Using collate rules of %s locale\n", 1225 setlocale(LC_COLLATE, NULL)); 1226 if (byte_sort) 1227 printf("Byte sort is used\n"); 1228 if (print_symbols_on_debug) { 1229 printf("Decimal Point: <%lc>\n", symbol_decimal_point); 1230 if (symbol_thousands_sep) 1231 printf("Thousands separator: <%lc>\n", 1232 symbol_thousands_sep); 1233 printf("Positive sign: <%lc>\n", symbol_positive_sign); 1234 printf("Negative sign: <%lc>\n", symbol_negative_sign); 1235 } 1236 } 1237 1238 if (need_random) 1239 get_random_seed(random_source); 1240 1241 /* Case when the outfile equals one of the input files: */ 1242 if (strcmp(outfile, "-")) { 1243 1244 for(int i = 0; i < argc; ++i) { 1245 if (strcmp(argv[i], outfile) == 0) { 1246 real_outfile = sort_strdup(outfile); 1247 for(;;) { 1248 char* tmp = sort_malloc(strlen(outfile) + 1249 strlen(".tmp") + 1); 1250 1251 strcpy(tmp, outfile); 1252 strcpy(tmp + strlen(tmp), ".tmp"); 1253 sort_free(outfile); 1254 outfile = tmp; 1255 if (access(outfile, F_OK) < 0) 1256 break; 1257 } 1258 tmp_file_atexit(outfile); 1259 } 1260 } 1261 } 1262 1263 #if defined(SORT_THREADS) 1264 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) 1265 nthreads = 1; 1266 #endif 1267 1268 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { 1269 struct file_list fl; 1270 struct sort_list list; 1271 1272 sort_list_init(&list); 1273 file_list_init(&fl, true); 1274 1275 if (argc < 1) 1276 procfile("-", &list, &fl); 1277 else { 1278 while (argc > 0) { 1279 procfile(*argv, &list, &fl); 1280 --argc; 1281 ++argv; 1282 } 1283 } 1284 1285 if (fl.count < 1) 1286 sort_list_to_file(&list, outfile); 1287 else { 1288 if (list.count > 0) { 1289 char *flast = new_tmp_file_name(); 1290 1291 sort_list_to_file(&list, flast); 1292 file_list_add(&fl, flast, false); 1293 } 1294 merge_files(&fl, outfile); 1295 } 1296 1297 file_list_clean(&fl); 1298 1299 /* 1300 * We are about to exit the program, so we can ignore 1301 * the clean-up for speed 1302 * 1303 * sort_list_clean(&list); 1304 */ 1305 1306 } else if (sort_opts_vals.cflag) { 1307 result = (argc == 0) ? (check("-")) : (check(*argv)); 1308 } else if (sort_opts_vals.mflag) { 1309 struct file_list fl; 1310 1311 file_list_init(&fl, false); 1312 /* No file arguments remaining means "read from stdin." */ 1313 if (argc == 0) 1314 file_list_add(&fl, "-", true); 1315 else 1316 file_list_populate(&fl, argc, argv, true); 1317 merge_files(&fl, outfile); 1318 file_list_clean(&fl); 1319 } 1320 1321 if (real_outfile) { 1322 unlink(real_outfile); 1323 if (rename(outfile, real_outfile) < 0) 1324 err(2, NULL); 1325 sort_free(real_outfile); 1326 } 1327 1328 sort_free(outfile); 1329 1330 return (result); 1331 } 1332