1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/stat.h> 34 #include <sys/sysctl.h> 35 #include <sys/types.h> 36 37 #include <err.h> 38 #include <errno.h> 39 #include <fcntl.h> 40 #include <getopt.h> 41 #include <limits.h> 42 #include <locale.h> 43 #include <md5.h> 44 #include <regex.h> 45 #include <signal.h> 46 #include <stdbool.h> 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <unistd.h> 51 #include <wchar.h> 52 #include <wctype.h> 53 54 #include "coll.h" 55 #include "file.h" 56 #include "sort.h" 57 58 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz" 59 60 static bool need_random; 61 62 MD5_CTX md5_ctx; 63 64 /* 65 * Default messages to use 66 */ 67 const char *nlsstr[] = { "", 68 /* 1*/"mutually exclusive flags", 69 /* 2*/"extra argument not allowed with -c", 70 /* 3*/"Unknown feature", 71 /* 4*/"Wrong memory buffer specification", 72 /* 5*/"0 field in key specs", 73 /* 6*/"0 column in key specs", 74 /* 7*/"Wrong file mode", 75 /* 8*/"Cannot open file for reading", 76 /* 9*/"Radix sort cannot be used with these sort options", 77 /*10*/"The chosen sort method cannot be used with stable and/or unique sort", 78 /*11*/"Invalid key position", 79 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] " 80 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " 81 "[-o outfile] [--batch-size size] [--files0-from file] " 82 "[--heapsort] [--mergesort] [--radixsort] [--qsort] " 83 "[--mmap] " 84 #if defined(SORT_THREADS) 85 "[--parallel thread_no] " 86 #endif 87 "[--human-numeric-sort] " 88 "[--version-sort] [--random-sort [--random-source file]] " 89 "[--compress-program program] [file ...]\n" }; 90 91 struct sort_opts sort_opts_vals; 92 93 bool debug_sort; 94 bool need_hint; 95 96 size_t mb_cur_max; 97 98 #if defined(SORT_THREADS) 99 unsigned int ncpu = 1; 100 size_t nthreads = 1; 101 #endif 102 103 static bool gnusort_numeric_compatibility; 104 105 static struct sort_mods default_sort_mods_object; 106 struct sort_mods * const default_sort_mods = &default_sort_mods_object; 107 108 static bool print_symbols_on_debug; 109 110 /* 111 * Arguments from file (when file0-from option is used: 112 */ 113 static size_t argc_from_file0 = (size_t)-1; 114 static char **argv_from_file0; 115 116 /* 117 * Placeholder symbols for options which have no single-character equivalent 118 */ 119 enum 120 { 121 SORT_OPT = CHAR_MAX + 1, 122 HELP_OPT, 123 FF_OPT, 124 BS_OPT, 125 VERSION_OPT, 126 DEBUG_OPT, 127 #if defined(SORT_THREADS) 128 PARALLEL_OPT, 129 #endif 130 RANDOMSOURCE_OPT, 131 COMPRESSPROGRAM_OPT, 132 QSORT_OPT, 133 MERGESORT_OPT, 134 HEAPSORT_OPT, 135 RADIXSORT_OPT, 136 MMAP_OPT 137 }; 138 139 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 140 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; 141 142 static struct option long_options[] = { 143 { "batch-size", required_argument, NULL, BS_OPT }, 144 { "buffer-size", required_argument, NULL, 'S' }, 145 { "check", optional_argument, NULL, 'c' }, 146 { "check=silent|quiet", optional_argument, NULL, 'C' }, 147 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, 148 { "debug", no_argument, NULL, DEBUG_OPT }, 149 { "dictionary-order", no_argument, NULL, 'd' }, 150 { "field-separator", required_argument, NULL, 't' }, 151 { "files0-from", required_argument, NULL, FF_OPT }, 152 { "general-numeric-sort", no_argument, NULL, 'g' }, 153 { "heapsort", no_argument, NULL, HEAPSORT_OPT }, 154 { "help",no_argument, NULL, HELP_OPT }, 155 { "human-numeric-sort", no_argument, NULL, 'h' }, 156 { "ignore-leading-blanks", no_argument, NULL, 'b' }, 157 { "ignore-case", no_argument, NULL, 'f' }, 158 { "ignore-nonprinting", no_argument, NULL, 'i' }, 159 { "key", required_argument, NULL, 'k' }, 160 { "merge", no_argument, NULL, 'm' }, 161 { "mergesort", no_argument, NULL, MERGESORT_OPT }, 162 { "mmap", no_argument, NULL, MMAP_OPT }, 163 { "month-sort", no_argument, NULL, 'M' }, 164 { "numeric-sort", no_argument, NULL, 'n' }, 165 { "output", required_argument, NULL, 'o' }, 166 #if defined(SORT_THREADS) 167 { "parallel", required_argument, NULL, PARALLEL_OPT }, 168 #endif 169 { "qsort", no_argument, NULL, QSORT_OPT }, 170 { "radixsort", no_argument, NULL, RADIXSORT_OPT }, 171 { "random-sort", no_argument, NULL, 'R' }, 172 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, 173 { "reverse", no_argument, NULL, 'r' }, 174 { "sort", required_argument, NULL, SORT_OPT }, 175 { "stable", no_argument, NULL, 's' }, 176 { "temporary-directory",required_argument, NULL, 'T' }, 177 { "unique", no_argument, NULL, 'u' }, 178 { "version", no_argument, NULL, VERSION_OPT }, 179 { "version-sort",no_argument, NULL, 'V' }, 180 { "zero-terminated", no_argument, NULL, 'z' }, 181 { NULL, no_argument, NULL, 0 } 182 }; 183 184 void fix_obsolete_keys(int *argc, char **argv); 185 186 /* 187 * Check where sort modifier is present 188 */ 189 static bool 190 sort_modifier_empty(struct sort_mods *sm) 191 { 192 193 if (sm == NULL) 194 return (true); 195 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || 196 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag)); 197 } 198 199 /* 200 * Print out usage text. 201 */ 202 static void 203 usage(bool opt_err) 204 { 205 FILE *out; 206 207 out = opt_err ? stderr : stdout; 208 209 fprintf(out, getstr(12), getprogname()); 210 if (opt_err) 211 exit(2); 212 exit(0); 213 } 214 215 /* 216 * Read input file names from a file (file0-from option). 217 */ 218 static void 219 read_fns_from_file0(const char *fn) 220 { 221 FILE *f; 222 char *line = NULL; 223 size_t linesize = 0; 224 ssize_t linelen; 225 226 if (fn == NULL) 227 return; 228 229 f = fopen(fn, "r"); 230 if (f == NULL) 231 err(2, "%s", fn); 232 233 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) { 234 if (*line != '\0') { 235 if (argc_from_file0 == (size_t) - 1) 236 argc_from_file0 = 0; 237 ++argc_from_file0; 238 argv_from_file0 = sort_realloc(argv_from_file0, 239 argc_from_file0 * sizeof(char *)); 240 if (argv_from_file0 == NULL) 241 err(2, NULL); 242 argv_from_file0[argc_from_file0 - 1] = line; 243 } else { 244 free(line); 245 } 246 line = NULL; 247 linesize = 0; 248 } 249 if (ferror(f)) 250 err(2, "%s: getdelim", fn); 251 252 closefile(f, fn); 253 } 254 255 /* 256 * Check how much RAM is available for the sort. 257 */ 258 static void 259 set_hw_params(void) 260 { 261 long pages, psize; 262 263 #if defined(SORT_THREADS) 264 ncpu = 1; 265 #endif 266 267 pages = sysconf(_SC_PHYS_PAGES); 268 if (pages < 1) { 269 perror("sysconf pages"); 270 pages = 1; 271 } 272 psize = sysconf(_SC_PAGESIZE); 273 if (psize < 1) { 274 perror("sysconf psize"); 275 psize = 4096; 276 } 277 #if defined(SORT_THREADS) 278 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN); 279 if (ncpu < 1) 280 ncpu = 1; 281 else if(ncpu > 32) 282 ncpu = 32; 283 284 nthreads = ncpu; 285 #endif 286 287 free_memory = (unsigned long long) pages * (unsigned long long) psize; 288 available_free_memory = free_memory / 2; 289 290 if (available_free_memory < 1024) 291 available_free_memory = 1024; 292 } 293 294 /* 295 * Convert "plain" symbol to wide symbol, with default value. 296 */ 297 static void 298 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) 299 { 300 301 if (wc && c) { 302 int res; 303 304 res = mbtowc(wc, c, mb_cur_max); 305 if (res < 1) 306 *wc = def; 307 } 308 } 309 310 /* 311 * Set current locale symbols. 312 */ 313 static void 314 set_locale(void) 315 { 316 struct lconv *lc; 317 const char *locale; 318 319 setlocale(LC_ALL, ""); 320 321 mb_cur_max = MB_CUR_MAX; 322 323 lc = localeconv(); 324 325 if (lc) { 326 /* obtain LC_NUMERIC info */ 327 /* Convert to wide char form */ 328 conv_mbtowc(&symbol_decimal_point, lc->decimal_point, 329 symbol_decimal_point); 330 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, 331 symbol_thousands_sep); 332 conv_mbtowc(&symbol_positive_sign, lc->positive_sign, 333 symbol_positive_sign); 334 conv_mbtowc(&symbol_negative_sign, lc->negative_sign, 335 symbol_negative_sign); 336 } 337 338 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) 339 gnusort_numeric_compatibility = true; 340 341 locale = setlocale(LC_COLLATE, NULL); 342 343 if (locale) { 344 char *tmpl; 345 const char *cclocale; 346 347 tmpl = sort_strdup(locale); 348 cclocale = setlocale(LC_COLLATE, "C"); 349 if (cclocale && !strcmp(cclocale, tmpl)) 350 byte_sort = true; 351 else { 352 const char *pclocale; 353 354 pclocale = setlocale(LC_COLLATE, "POSIX"); 355 if (pclocale && !strcmp(pclocale, tmpl)) 356 byte_sort = true; 357 } 358 setlocale(LC_COLLATE, tmpl); 359 sort_free(tmpl); 360 } 361 } 362 363 /* 364 * Set directory temporary files. 365 */ 366 static void 367 set_tmpdir(void) 368 { 369 char *td; 370 371 td = getenv("TMPDIR"); 372 if (td != NULL) 373 tmpdir = sort_strdup(td); 374 } 375 376 /* 377 * Parse -S option. 378 */ 379 static unsigned long long 380 parse_memory_buffer_value(const char *value) 381 { 382 383 if (value == NULL) 384 return (available_free_memory); 385 else { 386 char *endptr; 387 unsigned long long membuf; 388 389 endptr = NULL; 390 errno = 0; 391 membuf = strtoll(value, &endptr, 10); 392 393 if (errno != 0) { 394 warn("%s",getstr(4)); 395 membuf = available_free_memory; 396 } else { 397 switch (*endptr){ 398 case 'Y': 399 membuf *= 1024; 400 /* FALLTHROUGH */ 401 case 'Z': 402 membuf *= 1024; 403 /* FALLTHROUGH */ 404 case 'E': 405 membuf *= 1024; 406 /* FALLTHROUGH */ 407 case 'P': 408 membuf *= 1024; 409 /* FALLTHROUGH */ 410 case 'T': 411 membuf *= 1024; 412 /* FALLTHROUGH */ 413 case 'G': 414 membuf *= 1024; 415 /* FALLTHROUGH */ 416 case 'M': 417 membuf *= 1024; 418 /* FALLTHROUGH */ 419 case '\0': 420 case 'K': 421 membuf *= 1024; 422 /* FALLTHROUGH */ 423 case 'b': 424 break; 425 case '%': 426 membuf = (available_free_memory * membuf) / 427 100; 428 break; 429 default: 430 warnc(EINVAL, "%s", optarg); 431 membuf = available_free_memory; 432 } 433 } 434 return (membuf); 435 } 436 } 437 438 /* 439 * Signal handler that clears the temporary files. 440 */ 441 static void 442 sig_handler(int sig __unused, siginfo_t *siginfo __unused, 443 void *context __unused) 444 { 445 446 clear_tmp_files(); 447 exit(-1); 448 } 449 450 /* 451 * Set signal handler on panic signals. 452 */ 453 static void 454 set_signal_handler(void) 455 { 456 struct sigaction sa; 457 458 memset(&sa, 0, sizeof(sa)); 459 sa.sa_sigaction = &sig_handler; 460 sa.sa_flags = SA_SIGINFO; 461 462 if (sigaction(SIGTERM, &sa, NULL) < 0) { 463 perror("sigaction"); 464 return; 465 } 466 if (sigaction(SIGHUP, &sa, NULL) < 0) { 467 perror("sigaction"); 468 return; 469 } 470 if (sigaction(SIGINT, &sa, NULL) < 0) { 471 perror("sigaction"); 472 return; 473 } 474 if (sigaction(SIGQUIT, &sa, NULL) < 0) { 475 perror("sigaction"); 476 return; 477 } 478 if (sigaction(SIGABRT, &sa, NULL) < 0) { 479 perror("sigaction"); 480 return; 481 } 482 if (sigaction(SIGBUS, &sa, NULL) < 0) { 483 perror("sigaction"); 484 return; 485 } 486 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 487 perror("sigaction"); 488 return; 489 } 490 if (sigaction(SIGUSR1, &sa, NULL) < 0) { 491 perror("sigaction"); 492 return; 493 } 494 if (sigaction(SIGUSR2, &sa, NULL) < 0) { 495 perror("sigaction"); 496 return; 497 } 498 } 499 500 /* 501 * Print "unknown" message and exit with status 2. 502 */ 503 static void 504 unknown(const char *what) 505 { 506 507 errx(2, "%s: %s", getstr(3), what); 508 } 509 510 /* 511 * Check whether contradictory input options are used. 512 */ 513 static void 514 check_mutually_exclusive_flags(char c, bool *mef_flags) 515 { 516 int fo_index, mec; 517 bool found_others, found_this; 518 519 found_others = found_this = false; 520 fo_index = 0; 521 522 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { 523 mec = mutually_exclusive_flags[i]; 524 525 if (mec != c) { 526 if (mef_flags[i]) { 527 if (found_this) 528 errx(1, "%c:%c: %s", c, mec, getstr(1)); 529 found_others = true; 530 fo_index = i; 531 } 532 } else { 533 if (found_others) 534 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1)); 535 mef_flags[i] = true; 536 found_this = true; 537 } 538 } 539 } 540 541 /* 542 * Initialise sort opts data. 543 */ 544 static void 545 set_sort_opts(void) 546 { 547 548 memset(&default_sort_mods_object, 0, 549 sizeof(default_sort_mods_object)); 550 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); 551 default_sort_mods_object.func = 552 get_sort_func(&default_sort_mods_object); 553 } 554 555 /* 556 * Set a sort modifier on a sort modifiers object. 557 */ 558 static bool 559 set_sort_modifier(struct sort_mods *sm, int c) 560 { 561 562 if (sm == NULL) 563 return (true); 564 565 switch (c){ 566 case 'b': 567 sm->bflag = true; 568 break; 569 case 'd': 570 sm->dflag = true; 571 break; 572 case 'f': 573 sm->fflag = true; 574 break; 575 case 'g': 576 sm->gflag = true; 577 need_hint = true; 578 break; 579 case 'i': 580 sm->iflag = true; 581 break; 582 case 'R': 583 sm->Rflag = true; 584 need_hint = true; 585 need_random = true; 586 break; 587 case 'M': 588 initialise_months(); 589 sm->Mflag = true; 590 need_hint = true; 591 break; 592 case 'n': 593 sm->nflag = true; 594 need_hint = true; 595 print_symbols_on_debug = true; 596 break; 597 case 'r': 598 sm->rflag = true; 599 break; 600 case 'V': 601 sm->Vflag = true; 602 break; 603 case 'h': 604 sm->hflag = true; 605 need_hint = true; 606 print_symbols_on_debug = true; 607 break; 608 default: 609 return (false); 610 } 611 612 sort_opts_vals.complex_sort = true; 613 sm->func = get_sort_func(sm); 614 return (true); 615 } 616 617 /* 618 * Parse POS in -k option. 619 */ 620 static int 621 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) 622 { 623 regmatch_t pmatch[4]; 624 regex_t re; 625 char *c, *f; 626 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; 627 size_t len, nmatch; 628 int ret; 629 630 ret = -1; 631 nmatch = 4; 632 c = f = NULL; 633 634 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 635 return (-1); 636 637 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 638 goto end; 639 640 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 641 goto end; 642 643 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 644 goto end; 645 646 len = pmatch[1].rm_eo - pmatch[1].rm_so; 647 f = sort_malloc((len + 1) * sizeof(char)); 648 649 strncpy(f, s + pmatch[1].rm_so, len); 650 f[len] = '\0'; 651 652 if (second) { 653 errno = 0; 654 ks->f2 = (size_t) strtoul(f, NULL, 10); 655 if (errno != 0) 656 err(2, "-k"); 657 if (ks->f2 == 0) { 658 warn("%s",getstr(5)); 659 goto end; 660 } 661 } else { 662 errno = 0; 663 ks->f1 = (size_t) strtoul(f, NULL, 10); 664 if (errno != 0) 665 err(2, "-k"); 666 if (ks->f1 == 0) { 667 warn("%s",getstr(5)); 668 goto end; 669 } 670 } 671 672 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 673 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 674 c = sort_malloc((len + 1) * sizeof(char)); 675 676 strncpy(c, s + pmatch[2].rm_so + 1, len); 677 c[len] = '\0'; 678 679 if (second) { 680 errno = 0; 681 ks->c2 = (size_t) strtoul(c, NULL, 10); 682 if (errno != 0) 683 err(2, "-k"); 684 } else { 685 errno = 0; 686 ks->c1 = (size_t) strtoul(c, NULL, 10); 687 if (errno != 0) 688 err(2, "-k"); 689 if (ks->c1 == 0) { 690 warn("%s",getstr(6)); 691 goto end; 692 } 693 } 694 } else { 695 if (second) 696 ks->c2 = 0; 697 else 698 ks->c1 = 1; 699 } 700 701 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 702 regoff_t i = 0; 703 704 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { 705 check_mutually_exclusive_flags(s[i], mef_flags); 706 if (s[i] == 'b') { 707 if (second) 708 ks->pos2b = true; 709 else 710 ks->pos1b = true; 711 } else if (!set_sort_modifier(&(ks->sm), s[i])) 712 goto end; 713 } 714 } 715 716 ret = 0; 717 718 end: 719 720 if (c) 721 sort_free(c); 722 if (f) 723 sort_free(f); 724 regfree(&re); 725 726 return (ret); 727 } 728 729 /* 730 * Parse -k option value. 731 */ 732 static int 733 parse_k(const char *s, struct key_specs *ks) 734 { 735 int ret = -1; 736 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 737 { false, false, false, false, false, false }; 738 739 if (s && *s) { 740 char *sptr; 741 742 sptr = strchr(s, ','); 743 if (sptr) { 744 size_t size1; 745 char *pos1, *pos2; 746 747 size1 = sptr - s; 748 749 if (size1 < 1) 750 return (-1); 751 pos1 = sort_malloc((size1 + 1) * sizeof(char)); 752 753 strncpy(pos1, s, size1); 754 pos1[size1] = '\0'; 755 756 ret = parse_pos(pos1, ks, mef_flags, false); 757 758 sort_free(pos1); 759 if (ret < 0) 760 return (ret); 761 762 pos2 = sort_strdup(sptr + 1); 763 ret = parse_pos(pos2, ks, mef_flags, true); 764 sort_free(pos2); 765 } else 766 ret = parse_pos(s, ks, mef_flags, false); 767 } 768 769 return (ret); 770 } 771 772 /* 773 * Parse POS in +POS -POS option. 774 */ 775 static int 776 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts) 777 { 778 regex_t re; 779 regmatch_t pmatch[4]; 780 char *c, *f; 781 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; 782 int ret; 783 size_t len, nmatch; 784 785 ret = -1; 786 nmatch = 4; 787 c = f = NULL; 788 *nc = *nf = 0; 789 790 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 791 return (-1); 792 793 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 794 goto end; 795 796 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 797 goto end; 798 799 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 800 goto end; 801 802 len = pmatch[1].rm_eo - pmatch[1].rm_so; 803 f = sort_malloc((len + 1) * sizeof(char)); 804 805 strncpy(f, s + pmatch[1].rm_so, len); 806 f[len] = '\0'; 807 808 errno = 0; 809 *nf = (size_t) strtoul(f, NULL, 10); 810 if (errno != 0) 811 errx(2, "%s", getstr(11)); 812 813 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 814 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 815 c = sort_malloc((len + 1) * sizeof(char)); 816 817 strncpy(c, s + pmatch[2].rm_so + 1, len); 818 c[len] = '\0'; 819 820 errno = 0; 821 *nc = (size_t) strtoul(c, NULL, 10); 822 if (errno != 0) 823 errx(2, "%s", getstr(11)); 824 } 825 826 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 827 828 len = pmatch[3].rm_eo - pmatch[3].rm_so; 829 830 strncpy(sopts, s + pmatch[3].rm_so, len); 831 sopts[len] = '\0'; 832 } 833 834 ret = 0; 835 836 end: 837 if (c) 838 sort_free(c); 839 if (f) 840 sort_free(f); 841 regfree(&re); 842 843 return (ret); 844 } 845 846 /* 847 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax 848 */ 849 void 850 fix_obsolete_keys(int *argc, char **argv) 851 { 852 char sopt[129]; 853 854 for (int i = 1; i < *argc; i++) { 855 char *arg1; 856 857 arg1 = argv[i]; 858 859 if (strcmp(arg1, "--") == 0) { 860 /* Following arguments are treated as filenames. */ 861 break; 862 } 863 864 if (strlen(arg1) > 1 && arg1[0] == '+') { 865 int c1, f1; 866 char sopts1[128]; 867 868 sopts1[0] = 0; 869 c1 = f1 = 0; 870 871 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0) 872 continue; 873 else { 874 f1 += 1; 875 c1 += 1; 876 if (i + 1 < *argc) { 877 char *arg2 = argv[i + 1]; 878 879 if (strlen(arg2) > 1 && 880 arg2[0] == '-') { 881 int c2, f2; 882 char sopts2[128]; 883 884 sopts2[0] = 0; 885 c2 = f2 = 0; 886 887 if (parse_pos_obs(arg2 + 1, 888 &f2, &c2, sopts2) >= 0) { 889 if (c2 > 0) 890 f2 += 1; 891 sprintf(sopt, "-k%d.%d%s,%d.%d%s", 892 f1, c1, sopts1, f2, c2, sopts2); 893 argv[i] = sort_strdup(sopt); 894 for (int j = i + 1; j + 1 < *argc; j++) 895 argv[j] = argv[j + 1]; 896 *argc -= 1; 897 continue; 898 } 899 } 900 } 901 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1); 902 argv[i] = sort_strdup(sopt); 903 } 904 } 905 } 906 } 907 908 /* 909 * Seed random sort 910 */ 911 static void 912 get_random_seed(const char *random_source) 913 { 914 char randseed[32]; 915 struct stat fsb, rsb; 916 ssize_t rd; 917 int rsfd; 918 919 rsfd = -1; 920 rd = sizeof(randseed); 921 922 if (random_source == NULL) { 923 if (getentropy(randseed, sizeof(randseed)) < 0) 924 err(EX_SOFTWARE, "getentropy"); 925 goto out; 926 } 927 928 rsfd = open(random_source, O_RDONLY | O_CLOEXEC); 929 if (rsfd < 0) 930 err(EX_NOINPUT, "open: %s", random_source); 931 932 if (fstat(rsfd, &fsb) != 0) 933 err(EX_SOFTWARE, "fstat"); 934 935 if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode)) 936 err(EX_USAGE, 937 "random seed isn't a regular file or /dev/random"); 938 939 /* 940 * Regular files: read up to maximum seed size and explicitly 941 * reject longer files. 942 */ 943 if (S_ISREG(fsb.st_mode)) { 944 if (fsb.st_size > (off_t)sizeof(randseed)) 945 errx(EX_USAGE, "random seed is too large (%jd >" 946 " %zu)!", (intmax_t)fsb.st_size, 947 sizeof(randseed)); 948 else if (fsb.st_size < 1) 949 errx(EX_USAGE, "random seed is too small (" 950 "0 bytes)"); 951 952 memset(randseed, 0, sizeof(randseed)); 953 954 rd = read(rsfd, randseed, fsb.st_size); 955 if (rd < 0) 956 err(EX_SOFTWARE, "reading random seed file %s", 957 random_source); 958 if (rd < (ssize_t)fsb.st_size) 959 errx(EX_SOFTWARE, "short read from %s", random_source); 960 } else if (S_ISCHR(fsb.st_mode)) { 961 if (stat("/dev/random", &rsb) < 0) 962 err(EX_SOFTWARE, "stat"); 963 964 if (fsb.st_dev != rsb.st_dev || 965 fsb.st_ino != rsb.st_ino) 966 errx(EX_USAGE, "random seed is a character " 967 "device other than /dev/random"); 968 969 if (getentropy(randseed, sizeof(randseed)) < 0) 970 err(EX_SOFTWARE, "getentropy"); 971 } 972 973 out: 974 if (rsfd >= 0) 975 close(rsfd); 976 977 MD5Init(&md5_ctx); 978 MD5Update(&md5_ctx, randseed, rd); 979 } 980 981 /* 982 * Main function. 983 */ 984 int 985 main(int argc, char **argv) 986 { 987 char *outfile, *real_outfile; 988 char *random_source = NULL; 989 int c, result; 990 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 991 { false, false, false, false, false, false }; 992 993 result = 0; 994 outfile = sort_strdup("-"); 995 real_outfile = NULL; 996 997 struct sort_mods *sm = &default_sort_mods_object; 998 999 init_tmp_files(); 1000 1001 set_signal_handler(); 1002 1003 set_hw_params(); 1004 set_locale(); 1005 set_tmpdir(); 1006 set_sort_opts(); 1007 1008 fix_obsolete_keys(&argc, argv); 1009 1010 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) 1011 != -1)) { 1012 1013 check_mutually_exclusive_flags(c, mef_flags); 1014 1015 if (!set_sort_modifier(sm, c)) { 1016 1017 switch (c) { 1018 case 'c': 1019 sort_opts_vals.cflag = true; 1020 if (optarg) { 1021 if (!strcmp(optarg, "diagnose-first")) 1022 ; 1023 else if (!strcmp(optarg, "silent") || 1024 !strcmp(optarg, "quiet")) 1025 sort_opts_vals.csilentflag = true; 1026 else if (*optarg) 1027 unknown(optarg); 1028 } 1029 break; 1030 case 'C': 1031 sort_opts_vals.cflag = true; 1032 sort_opts_vals.csilentflag = true; 1033 break; 1034 case 'k': 1035 { 1036 sort_opts_vals.complex_sort = true; 1037 sort_opts_vals.kflag = true; 1038 1039 keys_num++; 1040 keys = sort_realloc(keys, keys_num * 1041 sizeof(struct key_specs)); 1042 memset(&(keys[keys_num - 1]), 0, 1043 sizeof(struct key_specs)); 1044 1045 if (parse_k(optarg, &(keys[keys_num - 1])) 1046 < 0) { 1047 errc(2, EINVAL, "-k %s", optarg); 1048 } 1049 1050 break; 1051 } 1052 case 'm': 1053 sort_opts_vals.mflag = true; 1054 break; 1055 case 'o': 1056 outfile = sort_realloc(outfile, (strlen(optarg) + 1)); 1057 strcpy(outfile, optarg); 1058 break; 1059 case 's': 1060 sort_opts_vals.sflag = true; 1061 break; 1062 case 'S': 1063 available_free_memory = 1064 parse_memory_buffer_value(optarg); 1065 break; 1066 case 'T': 1067 tmpdir = sort_strdup(optarg); 1068 break; 1069 case 't': 1070 while (strlen(optarg) > 1) { 1071 if (optarg[0] != '\\') { 1072 errc(2, EINVAL, "%s", optarg); 1073 } 1074 optarg += 1; 1075 if (*optarg == '0') { 1076 *optarg = 0; 1077 break; 1078 } 1079 } 1080 sort_opts_vals.tflag = true; 1081 sort_opts_vals.field_sep = btowc(optarg[0]); 1082 if (sort_opts_vals.field_sep == WEOF) { 1083 errno = EINVAL; 1084 err(2, NULL); 1085 } 1086 if (!gnusort_numeric_compatibility) { 1087 if (symbol_decimal_point == sort_opts_vals.field_sep) 1088 symbol_decimal_point = WEOF; 1089 if (symbol_thousands_sep == sort_opts_vals.field_sep) 1090 symbol_thousands_sep = WEOF; 1091 if (symbol_negative_sign == sort_opts_vals.field_sep) 1092 symbol_negative_sign = WEOF; 1093 if (symbol_positive_sign == sort_opts_vals.field_sep) 1094 symbol_positive_sign = WEOF; 1095 } 1096 break; 1097 case 'u': 1098 sort_opts_vals.uflag = true; 1099 /* stable sort for the correct unique val */ 1100 sort_opts_vals.sflag = true; 1101 break; 1102 case 'z': 1103 sort_opts_vals.zflag = true; 1104 break; 1105 case SORT_OPT: 1106 if (optarg) { 1107 if (!strcmp(optarg, "general-numeric")) 1108 set_sort_modifier(sm, 'g'); 1109 else if (!strcmp(optarg, "human-numeric")) 1110 set_sort_modifier(sm, 'h'); 1111 else if (!strcmp(optarg, "numeric")) 1112 set_sort_modifier(sm, 'n'); 1113 else if (!strcmp(optarg, "month")) 1114 set_sort_modifier(sm, 'M'); 1115 else if (!strcmp(optarg, "random")) 1116 set_sort_modifier(sm, 'R'); 1117 else 1118 unknown(optarg); 1119 } 1120 break; 1121 #if defined(SORT_THREADS) 1122 case PARALLEL_OPT: 1123 nthreads = (size_t)(atoi(optarg)); 1124 if (nthreads < 1) 1125 nthreads = 1; 1126 if (nthreads > 1024) 1127 nthreads = 1024; 1128 break; 1129 #endif 1130 case QSORT_OPT: 1131 sort_opts_vals.sort_method = SORT_QSORT; 1132 break; 1133 case MERGESORT_OPT: 1134 sort_opts_vals.sort_method = SORT_MERGESORT; 1135 break; 1136 case MMAP_OPT: 1137 use_mmap = true; 1138 break; 1139 case HEAPSORT_OPT: 1140 sort_opts_vals.sort_method = SORT_HEAPSORT; 1141 break; 1142 case RADIXSORT_OPT: 1143 sort_opts_vals.sort_method = SORT_RADIXSORT; 1144 break; 1145 case RANDOMSOURCE_OPT: 1146 random_source = strdup(optarg); 1147 break; 1148 case COMPRESSPROGRAM_OPT: 1149 compress_program = strdup(optarg); 1150 break; 1151 case FF_OPT: 1152 read_fns_from_file0(optarg); 1153 break; 1154 case BS_OPT: 1155 { 1156 errno = 0; 1157 long mof = strtol(optarg, NULL, 10); 1158 if (errno != 0) 1159 err(2, "--batch-size"); 1160 if (mof >= 2) 1161 max_open_files = (size_t) mof + 1; 1162 } 1163 break; 1164 case VERSION_OPT: 1165 printf("%s\n", VERSION); 1166 exit(EXIT_SUCCESS); 1167 /* NOTREACHED */ 1168 break; 1169 case DEBUG_OPT: 1170 debug_sort = true; 1171 break; 1172 case HELP_OPT: 1173 usage(false); 1174 /* NOTREACHED */ 1175 break; 1176 default: 1177 usage(true); 1178 /* NOTREACHED */ 1179 } 1180 } 1181 } 1182 1183 argc -= optind; 1184 argv += optind; 1185 1186 if (argv_from_file0) { 1187 argc = argc_from_file0; 1188 argv = argv_from_file0; 1189 } 1190 1191 if (sort_opts_vals.cflag && sort_opts_vals.mflag) 1192 errx(1, "%c:%c: %s", 'm', 'c', getstr(1)); 1193 1194 if (keys_num == 0) { 1195 keys_num = 1; 1196 keys = sort_realloc(keys, sizeof(struct key_specs)); 1197 memset(&(keys[0]), 0, sizeof(struct key_specs)); 1198 keys[0].c1 = 1; 1199 keys[0].pos1b = default_sort_mods->bflag; 1200 keys[0].pos2b = default_sort_mods->bflag; 1201 memcpy(&(keys[0].sm), default_sort_mods, 1202 sizeof(struct sort_mods)); 1203 } 1204 1205 for (size_t i = 0; i < keys_num; i++) { 1206 struct key_specs *ks; 1207 1208 ks = &(keys[i]); 1209 1210 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && 1211 !(ks->pos2b)) { 1212 ks->pos1b = sm->bflag; 1213 ks->pos2b = sm->bflag; 1214 memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); 1215 } 1216 1217 ks->sm.func = get_sort_func(&(ks->sm)); 1218 } 1219 1220 if (debug_sort) { 1221 printf("Memory to be used for sorting: %llu\n",available_free_memory); 1222 #if defined(SORT_THREADS) 1223 printf("Number of CPUs: %d\n",(int)ncpu); 1224 nthreads = 1; 1225 #endif 1226 printf("Using collate rules of %s locale\n", 1227 setlocale(LC_COLLATE, NULL)); 1228 if (byte_sort) 1229 printf("Byte sort is used\n"); 1230 if (print_symbols_on_debug) { 1231 printf("Decimal Point: <%lc>\n", symbol_decimal_point); 1232 if (symbol_thousands_sep) 1233 printf("Thousands separator: <%lc>\n", 1234 symbol_thousands_sep); 1235 printf("Positive sign: <%lc>\n", symbol_positive_sign); 1236 printf("Negative sign: <%lc>\n", symbol_negative_sign); 1237 } 1238 } 1239 1240 if (need_random) 1241 get_random_seed(random_source); 1242 1243 /* Case when the outfile equals one of the input files: */ 1244 if (strcmp(outfile, "-")) { 1245 1246 for(int i = 0; i < argc; ++i) { 1247 if (strcmp(argv[i], outfile) == 0) { 1248 real_outfile = sort_strdup(outfile); 1249 for(;;) { 1250 char* tmp = sort_malloc(strlen(outfile) + 1251 strlen(".tmp") + 1); 1252 1253 strcpy(tmp, outfile); 1254 strcpy(tmp + strlen(tmp), ".tmp"); 1255 sort_free(outfile); 1256 outfile = tmp; 1257 if (access(outfile, F_OK) < 0) 1258 break; 1259 } 1260 tmp_file_atexit(outfile); 1261 } 1262 } 1263 } 1264 1265 #if defined(SORT_THREADS) 1266 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) 1267 nthreads = 1; 1268 #endif 1269 1270 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { 1271 struct file_list fl; 1272 struct sort_list list; 1273 1274 sort_list_init(&list); 1275 file_list_init(&fl, true); 1276 1277 if (argc < 1) 1278 procfile("-", &list, &fl); 1279 else { 1280 while (argc > 0) { 1281 procfile(*argv, &list, &fl); 1282 --argc; 1283 ++argv; 1284 } 1285 } 1286 1287 if (fl.count < 1) 1288 sort_list_to_file(&list, outfile); 1289 else { 1290 if (list.count > 0) { 1291 char *flast = new_tmp_file_name(); 1292 1293 sort_list_to_file(&list, flast); 1294 file_list_add(&fl, flast, false); 1295 } 1296 merge_files(&fl, outfile); 1297 } 1298 1299 file_list_clean(&fl); 1300 1301 /* 1302 * We are about to exit the program, so we can ignore 1303 * the clean-up for speed 1304 * 1305 * sort_list_clean(&list); 1306 */ 1307 1308 } else if (sort_opts_vals.cflag) { 1309 result = (argc == 0) ? (check("-")) : (check(*argv)); 1310 } else if (sort_opts_vals.mflag) { 1311 struct file_list fl; 1312 1313 file_list_init(&fl, false); 1314 /* No file arguments remaining means "read from stdin." */ 1315 if (argc == 0) 1316 file_list_add(&fl, "-", true); 1317 else 1318 file_list_populate(&fl, argc, argv, true); 1319 merge_files(&fl, outfile); 1320 file_list_clean(&fl); 1321 } 1322 1323 if (real_outfile) { 1324 unlink(real_outfile); 1325 if (rename(outfile, real_outfile) < 0) 1326 err(2, NULL); 1327 sort_free(real_outfile); 1328 } 1329 1330 sort_free(outfile); 1331 1332 return (result); 1333 } 1334