1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/stat.h> 34 #include <sys/sysctl.h> 35 #include <sys/types.h> 36 37 #include <err.h> 38 #include <errno.h> 39 #include <fcntl.h> 40 #include <getopt.h> 41 #include <limits.h> 42 #include <locale.h> 43 #include <md5.h> 44 #include <regex.h> 45 #include <signal.h> 46 #include <stdbool.h> 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <unistd.h> 51 #include <wchar.h> 52 #include <wctype.h> 53 54 #include "coll.h" 55 #include "file.h" 56 #include "sort.h" 57 58 #ifndef WITHOUT_NLS 59 #include <nl_types.h> 60 nl_catd catalog; 61 #endif 62 63 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz" 64 65 static bool need_random; 66 67 MD5_CTX md5_ctx; 68 69 /* 70 * Default messages to use when NLS is disabled or no catalogue 71 * is found. 72 */ 73 const char *nlsstr[] = { "", 74 /* 1*/"mutually exclusive flags", 75 /* 2*/"extra argument not allowed with -c", 76 /* 3*/"Unknown feature", 77 /* 4*/"Wrong memory buffer specification", 78 /* 5*/"0 field in key specs", 79 /* 6*/"0 column in key specs", 80 /* 7*/"Wrong file mode", 81 /* 8*/"Cannot open file for reading", 82 /* 9*/"Radix sort cannot be used with these sort options", 83 /*10*/"The chosen sort method cannot be used with stable and/or unique sort", 84 /*11*/"Invalid key position", 85 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] " 86 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " 87 "[-o outfile] [--batch-size size] [--files0-from file] " 88 "[--heapsort] [--mergesort] [--radixsort] [--qsort] " 89 "[--mmap] " 90 #if defined(SORT_THREADS) 91 "[--parallel thread_no] " 92 #endif 93 "[--human-numeric-sort] " 94 "[--version-sort] [--random-sort [--random-source file]] " 95 "[--compress-program program] [file ...]\n" }; 96 97 struct sort_opts sort_opts_vals; 98 99 bool debug_sort; 100 bool need_hint; 101 102 size_t mb_cur_max; 103 104 #if defined(SORT_THREADS) 105 unsigned int ncpu = 1; 106 size_t nthreads = 1; 107 #endif 108 109 static bool gnusort_numeric_compatibility; 110 111 static struct sort_mods default_sort_mods_object; 112 struct sort_mods * const default_sort_mods = &default_sort_mods_object; 113 114 static bool print_symbols_on_debug; 115 116 /* 117 * Arguments from file (when file0-from option is used: 118 */ 119 static size_t argc_from_file0 = (size_t)-1; 120 static char **argv_from_file0; 121 122 /* 123 * Placeholder symbols for options which have no single-character equivalent 124 */ 125 enum 126 { 127 SORT_OPT = CHAR_MAX + 1, 128 HELP_OPT, 129 FF_OPT, 130 BS_OPT, 131 VERSION_OPT, 132 DEBUG_OPT, 133 #if defined(SORT_THREADS) 134 PARALLEL_OPT, 135 #endif 136 RANDOMSOURCE_OPT, 137 COMPRESSPROGRAM_OPT, 138 QSORT_OPT, 139 MERGESORT_OPT, 140 HEAPSORT_OPT, 141 RADIXSORT_OPT, 142 MMAP_OPT 143 }; 144 145 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 146 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; 147 148 static struct option long_options[] = { 149 { "batch-size", required_argument, NULL, BS_OPT }, 150 { "buffer-size", required_argument, NULL, 'S' }, 151 { "check", optional_argument, NULL, 'c' }, 152 { "check=silent|quiet", optional_argument, NULL, 'C' }, 153 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, 154 { "debug", no_argument, NULL, DEBUG_OPT }, 155 { "dictionary-order", no_argument, NULL, 'd' }, 156 { "field-separator", required_argument, NULL, 't' }, 157 { "files0-from", required_argument, NULL, FF_OPT }, 158 { "general-numeric-sort", no_argument, NULL, 'g' }, 159 { "heapsort", no_argument, NULL, HEAPSORT_OPT }, 160 { "help",no_argument, NULL, HELP_OPT }, 161 { "human-numeric-sort", no_argument, NULL, 'h' }, 162 { "ignore-leading-blanks", no_argument, NULL, 'b' }, 163 { "ignore-case", no_argument, NULL, 'f' }, 164 { "ignore-nonprinting", no_argument, NULL, 'i' }, 165 { "key", required_argument, NULL, 'k' }, 166 { "merge", no_argument, NULL, 'm' }, 167 { "mergesort", no_argument, NULL, MERGESORT_OPT }, 168 { "mmap", no_argument, NULL, MMAP_OPT }, 169 { "month-sort", no_argument, NULL, 'M' }, 170 { "numeric-sort", no_argument, NULL, 'n' }, 171 { "output", required_argument, NULL, 'o' }, 172 #if defined(SORT_THREADS) 173 { "parallel", required_argument, NULL, PARALLEL_OPT }, 174 #endif 175 { "qsort", no_argument, NULL, QSORT_OPT }, 176 { "radixsort", no_argument, NULL, RADIXSORT_OPT }, 177 { "random-sort", no_argument, NULL, 'R' }, 178 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, 179 { "reverse", no_argument, NULL, 'r' }, 180 { "sort", required_argument, NULL, SORT_OPT }, 181 { "stable", no_argument, NULL, 's' }, 182 { "temporary-directory",required_argument, NULL, 'T' }, 183 { "unique", no_argument, NULL, 'u' }, 184 { "version", no_argument, NULL, VERSION_OPT }, 185 { "version-sort",no_argument, NULL, 'V' }, 186 { "zero-terminated", no_argument, NULL, 'z' }, 187 { NULL, no_argument, NULL, 0 } 188 }; 189 190 void fix_obsolete_keys(int *argc, char **argv); 191 192 /* 193 * Check where sort modifier is present 194 */ 195 static bool 196 sort_modifier_empty(struct sort_mods *sm) 197 { 198 199 if (sm == NULL) 200 return (true); 201 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || 202 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag)); 203 } 204 205 /* 206 * Print out usage text. 207 */ 208 static void 209 usage(bool opt_err) 210 { 211 FILE *out; 212 213 out = opt_err ? stderr : stdout; 214 215 fprintf(out, getstr(12), getprogname()); 216 if (opt_err) 217 exit(2); 218 exit(0); 219 } 220 221 /* 222 * Read input file names from a file (file0-from option). 223 */ 224 static void 225 read_fns_from_file0(const char *fn) 226 { 227 FILE *f; 228 char *line = NULL; 229 size_t linesize = 0; 230 ssize_t linelen; 231 232 if (fn == NULL) 233 return; 234 235 f = fopen(fn, "r"); 236 if (f == NULL) 237 err(2, "%s", fn); 238 239 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) { 240 if (*line != '\0') { 241 if (argc_from_file0 == (size_t) - 1) 242 argc_from_file0 = 0; 243 ++argc_from_file0; 244 argv_from_file0 = sort_realloc(argv_from_file0, 245 argc_from_file0 * sizeof(char *)); 246 if (argv_from_file0 == NULL) 247 err(2, NULL); 248 argv_from_file0[argc_from_file0 - 1] = line; 249 } else { 250 free(line); 251 } 252 line = NULL; 253 linesize = 0; 254 } 255 if (ferror(f)) 256 err(2, "%s: getdelim", fn); 257 258 closefile(f, fn); 259 } 260 261 /* 262 * Check how much RAM is available for the sort. 263 */ 264 static void 265 set_hw_params(void) 266 { 267 long pages, psize; 268 269 #if defined(SORT_THREADS) 270 ncpu = 1; 271 #endif 272 273 pages = sysconf(_SC_PHYS_PAGES); 274 if (pages < 1) { 275 perror("sysconf pages"); 276 pages = 1; 277 } 278 psize = sysconf(_SC_PAGESIZE); 279 if (psize < 1) { 280 perror("sysconf psize"); 281 psize = 4096; 282 } 283 #if defined(SORT_THREADS) 284 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN); 285 if (ncpu < 1) 286 ncpu = 1; 287 else if(ncpu > 32) 288 ncpu = 32; 289 290 nthreads = ncpu; 291 #endif 292 293 free_memory = (unsigned long long) pages * (unsigned long long) psize; 294 available_free_memory = free_memory / 2; 295 296 if (available_free_memory < 1024) 297 available_free_memory = 1024; 298 } 299 300 /* 301 * Convert "plain" symbol to wide symbol, with default value. 302 */ 303 static void 304 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) 305 { 306 307 if (wc && c) { 308 int res; 309 310 res = mbtowc(wc, c, mb_cur_max); 311 if (res < 1) 312 *wc = def; 313 } 314 } 315 316 /* 317 * Set current locale symbols. 318 */ 319 static void 320 set_locale(void) 321 { 322 struct lconv *lc; 323 const char *locale; 324 325 setlocale(LC_ALL, ""); 326 327 mb_cur_max = MB_CUR_MAX; 328 329 lc = localeconv(); 330 331 if (lc) { 332 /* obtain LC_NUMERIC info */ 333 /* Convert to wide char form */ 334 conv_mbtowc(&symbol_decimal_point, lc->decimal_point, 335 symbol_decimal_point); 336 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, 337 symbol_thousands_sep); 338 conv_mbtowc(&symbol_positive_sign, lc->positive_sign, 339 symbol_positive_sign); 340 conv_mbtowc(&symbol_negative_sign, lc->negative_sign, 341 symbol_negative_sign); 342 } 343 344 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) 345 gnusort_numeric_compatibility = true; 346 347 locale = setlocale(LC_COLLATE, NULL); 348 349 if (locale) { 350 char *tmpl; 351 const char *cclocale; 352 353 tmpl = sort_strdup(locale); 354 cclocale = setlocale(LC_COLLATE, "C"); 355 if (cclocale && !strcmp(cclocale, tmpl)) 356 byte_sort = true; 357 else { 358 const char *pclocale; 359 360 pclocale = setlocale(LC_COLLATE, "POSIX"); 361 if (pclocale && !strcmp(pclocale, tmpl)) 362 byte_sort = true; 363 } 364 setlocale(LC_COLLATE, tmpl); 365 sort_free(tmpl); 366 } 367 } 368 369 /* 370 * Set directory temporary files. 371 */ 372 static void 373 set_tmpdir(void) 374 { 375 char *td; 376 377 td = getenv("TMPDIR"); 378 if (td != NULL) 379 tmpdir = sort_strdup(td); 380 } 381 382 /* 383 * Parse -S option. 384 */ 385 static unsigned long long 386 parse_memory_buffer_value(const char *value) 387 { 388 389 if (value == NULL) 390 return (available_free_memory); 391 else { 392 char *endptr; 393 unsigned long long membuf; 394 395 endptr = NULL; 396 errno = 0; 397 membuf = strtoll(value, &endptr, 10); 398 399 if (errno != 0) { 400 warn("%s",getstr(4)); 401 membuf = available_free_memory; 402 } else { 403 switch (*endptr){ 404 case 'Y': 405 membuf *= 1024; 406 /* FALLTHROUGH */ 407 case 'Z': 408 membuf *= 1024; 409 /* FALLTHROUGH */ 410 case 'E': 411 membuf *= 1024; 412 /* FALLTHROUGH */ 413 case 'P': 414 membuf *= 1024; 415 /* FALLTHROUGH */ 416 case 'T': 417 membuf *= 1024; 418 /* FALLTHROUGH */ 419 case 'G': 420 membuf *= 1024; 421 /* FALLTHROUGH */ 422 case 'M': 423 membuf *= 1024; 424 /* FALLTHROUGH */ 425 case '\0': 426 case 'K': 427 membuf *= 1024; 428 /* FALLTHROUGH */ 429 case 'b': 430 break; 431 case '%': 432 membuf = (available_free_memory * membuf) / 433 100; 434 break; 435 default: 436 warnc(EINVAL, "%s", optarg); 437 membuf = available_free_memory; 438 } 439 } 440 return (membuf); 441 } 442 } 443 444 /* 445 * Signal handler that clears the temporary files. 446 */ 447 static void 448 sig_handler(int sig __unused, siginfo_t *siginfo __unused, 449 void *context __unused) 450 { 451 452 clear_tmp_files(); 453 exit(-1); 454 } 455 456 /* 457 * Set signal handler on panic signals. 458 */ 459 static void 460 set_signal_handler(void) 461 { 462 struct sigaction sa; 463 464 memset(&sa, 0, sizeof(sa)); 465 sa.sa_sigaction = &sig_handler; 466 sa.sa_flags = SA_SIGINFO; 467 468 if (sigaction(SIGTERM, &sa, NULL) < 0) { 469 perror("sigaction"); 470 return; 471 } 472 if (sigaction(SIGHUP, &sa, NULL) < 0) { 473 perror("sigaction"); 474 return; 475 } 476 if (sigaction(SIGINT, &sa, NULL) < 0) { 477 perror("sigaction"); 478 return; 479 } 480 if (sigaction(SIGQUIT, &sa, NULL) < 0) { 481 perror("sigaction"); 482 return; 483 } 484 if (sigaction(SIGABRT, &sa, NULL) < 0) { 485 perror("sigaction"); 486 return; 487 } 488 if (sigaction(SIGBUS, &sa, NULL) < 0) { 489 perror("sigaction"); 490 return; 491 } 492 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 493 perror("sigaction"); 494 return; 495 } 496 if (sigaction(SIGUSR1, &sa, NULL) < 0) { 497 perror("sigaction"); 498 return; 499 } 500 if (sigaction(SIGUSR2, &sa, NULL) < 0) { 501 perror("sigaction"); 502 return; 503 } 504 } 505 506 /* 507 * Print "unknown" message and exit with status 2. 508 */ 509 static void 510 unknown(const char *what) 511 { 512 513 errx(2, "%s: %s", getstr(3), what); 514 } 515 516 /* 517 * Check whether contradictory input options are used. 518 */ 519 static void 520 check_mutually_exclusive_flags(char c, bool *mef_flags) 521 { 522 int fo_index, mec; 523 bool found_others, found_this; 524 525 found_others = found_this = false; 526 fo_index = 0; 527 528 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { 529 mec = mutually_exclusive_flags[i]; 530 531 if (mec != c) { 532 if (mef_flags[i]) { 533 if (found_this) 534 errx(1, "%c:%c: %s", c, mec, getstr(1)); 535 found_others = true; 536 fo_index = i; 537 } 538 } else { 539 if (found_others) 540 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1)); 541 mef_flags[i] = true; 542 found_this = true; 543 } 544 } 545 } 546 547 /* 548 * Initialise sort opts data. 549 */ 550 static void 551 set_sort_opts(void) 552 { 553 554 memset(&default_sort_mods_object, 0, 555 sizeof(default_sort_mods_object)); 556 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); 557 default_sort_mods_object.func = 558 get_sort_func(&default_sort_mods_object); 559 } 560 561 /* 562 * Set a sort modifier on a sort modifiers object. 563 */ 564 static bool 565 set_sort_modifier(struct sort_mods *sm, int c) 566 { 567 568 if (sm == NULL) 569 return (true); 570 571 switch (c){ 572 case 'b': 573 sm->bflag = true; 574 break; 575 case 'd': 576 sm->dflag = true; 577 break; 578 case 'f': 579 sm->fflag = true; 580 break; 581 case 'g': 582 sm->gflag = true; 583 need_hint = true; 584 break; 585 case 'i': 586 sm->iflag = true; 587 break; 588 case 'R': 589 sm->Rflag = true; 590 need_hint = true; 591 need_random = true; 592 break; 593 case 'M': 594 initialise_months(); 595 sm->Mflag = true; 596 need_hint = true; 597 break; 598 case 'n': 599 sm->nflag = true; 600 need_hint = true; 601 print_symbols_on_debug = true; 602 break; 603 case 'r': 604 sm->rflag = true; 605 break; 606 case 'V': 607 sm->Vflag = true; 608 break; 609 case 'h': 610 sm->hflag = true; 611 need_hint = true; 612 print_symbols_on_debug = true; 613 break; 614 default: 615 return (false); 616 } 617 618 sort_opts_vals.complex_sort = true; 619 sm->func = get_sort_func(sm); 620 return (true); 621 } 622 623 /* 624 * Parse POS in -k option. 625 */ 626 static int 627 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) 628 { 629 regmatch_t pmatch[4]; 630 regex_t re; 631 char *c, *f; 632 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; 633 size_t len, nmatch; 634 int ret; 635 636 ret = -1; 637 nmatch = 4; 638 c = f = NULL; 639 640 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 641 return (-1); 642 643 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 644 goto end; 645 646 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 647 goto end; 648 649 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 650 goto end; 651 652 len = pmatch[1].rm_eo - pmatch[1].rm_so; 653 f = sort_malloc((len + 1) * sizeof(char)); 654 655 strncpy(f, s + pmatch[1].rm_so, len); 656 f[len] = '\0'; 657 658 if (second) { 659 errno = 0; 660 ks->f2 = (size_t) strtoul(f, NULL, 10); 661 if (errno != 0) 662 err(2, "-k"); 663 if (ks->f2 == 0) { 664 warn("%s",getstr(5)); 665 goto end; 666 } 667 } else { 668 errno = 0; 669 ks->f1 = (size_t) strtoul(f, NULL, 10); 670 if (errno != 0) 671 err(2, "-k"); 672 if (ks->f1 == 0) { 673 warn("%s",getstr(5)); 674 goto end; 675 } 676 } 677 678 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 679 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 680 c = sort_malloc((len + 1) * sizeof(char)); 681 682 strncpy(c, s + pmatch[2].rm_so + 1, len); 683 c[len] = '\0'; 684 685 if (second) { 686 errno = 0; 687 ks->c2 = (size_t) strtoul(c, NULL, 10); 688 if (errno != 0) 689 err(2, "-k"); 690 } else { 691 errno = 0; 692 ks->c1 = (size_t) strtoul(c, NULL, 10); 693 if (errno != 0) 694 err(2, "-k"); 695 if (ks->c1 == 0) { 696 warn("%s",getstr(6)); 697 goto end; 698 } 699 } 700 } else { 701 if (second) 702 ks->c2 = 0; 703 else 704 ks->c1 = 1; 705 } 706 707 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 708 regoff_t i = 0; 709 710 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { 711 check_mutually_exclusive_flags(s[i], mef_flags); 712 if (s[i] == 'b') { 713 if (second) 714 ks->pos2b = true; 715 else 716 ks->pos1b = true; 717 } else if (!set_sort_modifier(&(ks->sm), s[i])) 718 goto end; 719 } 720 } 721 722 ret = 0; 723 724 end: 725 726 if (c) 727 sort_free(c); 728 if (f) 729 sort_free(f); 730 regfree(&re); 731 732 return (ret); 733 } 734 735 /* 736 * Parse -k option value. 737 */ 738 static int 739 parse_k(const char *s, struct key_specs *ks) 740 { 741 int ret = -1; 742 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 743 { false, false, false, false, false, false }; 744 745 if (s && *s) { 746 char *sptr; 747 748 sptr = strchr(s, ','); 749 if (sptr) { 750 size_t size1; 751 char *pos1, *pos2; 752 753 size1 = sptr - s; 754 755 if (size1 < 1) 756 return (-1); 757 pos1 = sort_malloc((size1 + 1) * sizeof(char)); 758 759 strncpy(pos1, s, size1); 760 pos1[size1] = '\0'; 761 762 ret = parse_pos(pos1, ks, mef_flags, false); 763 764 sort_free(pos1); 765 if (ret < 0) 766 return (ret); 767 768 pos2 = sort_strdup(sptr + 1); 769 ret = parse_pos(pos2, ks, mef_flags, true); 770 sort_free(pos2); 771 } else 772 ret = parse_pos(s, ks, mef_flags, false); 773 } 774 775 return (ret); 776 } 777 778 /* 779 * Parse POS in +POS -POS option. 780 */ 781 static int 782 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts) 783 { 784 regex_t re; 785 regmatch_t pmatch[4]; 786 char *c, *f; 787 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; 788 int ret; 789 size_t len, nmatch; 790 791 ret = -1; 792 nmatch = 4; 793 c = f = NULL; 794 *nc = *nf = 0; 795 796 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 797 return (-1); 798 799 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 800 goto end; 801 802 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 803 goto end; 804 805 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 806 goto end; 807 808 len = pmatch[1].rm_eo - pmatch[1].rm_so; 809 f = sort_malloc((len + 1) * sizeof(char)); 810 811 strncpy(f, s + pmatch[1].rm_so, len); 812 f[len] = '\0'; 813 814 errno = 0; 815 *nf = (size_t) strtoul(f, NULL, 10); 816 if (errno != 0) 817 errx(2, "%s", getstr(11)); 818 819 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 820 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 821 c = sort_malloc((len + 1) * sizeof(char)); 822 823 strncpy(c, s + pmatch[2].rm_so + 1, len); 824 c[len] = '\0'; 825 826 errno = 0; 827 *nc = (size_t) strtoul(c, NULL, 10); 828 if (errno != 0) 829 errx(2, "%s", getstr(11)); 830 } 831 832 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 833 834 len = pmatch[3].rm_eo - pmatch[3].rm_so; 835 836 strncpy(sopts, s + pmatch[3].rm_so, len); 837 sopts[len] = '\0'; 838 } 839 840 ret = 0; 841 842 end: 843 if (c) 844 sort_free(c); 845 if (f) 846 sort_free(f); 847 regfree(&re); 848 849 return (ret); 850 } 851 852 /* 853 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax 854 */ 855 void 856 fix_obsolete_keys(int *argc, char **argv) 857 { 858 char sopt[129]; 859 860 for (int i = 1; i < *argc; i++) { 861 char *arg1; 862 863 arg1 = argv[i]; 864 865 if (strcmp(arg1, "--") == 0) { 866 /* Following arguments are treated as filenames. */ 867 break; 868 } 869 870 if (strlen(arg1) > 1 && arg1[0] == '+') { 871 int c1, f1; 872 char sopts1[128]; 873 874 sopts1[0] = 0; 875 c1 = f1 = 0; 876 877 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0) 878 continue; 879 else { 880 f1 += 1; 881 c1 += 1; 882 if (i + 1 < *argc) { 883 char *arg2 = argv[i + 1]; 884 885 if (strlen(arg2) > 1 && 886 arg2[0] == '-') { 887 int c2, f2; 888 char sopts2[128]; 889 890 sopts2[0] = 0; 891 c2 = f2 = 0; 892 893 if (parse_pos_obs(arg2 + 1, 894 &f2, &c2, sopts2) >= 0) { 895 if (c2 > 0) 896 f2 += 1; 897 sprintf(sopt, "-k%d.%d%s,%d.%d%s", 898 f1, c1, sopts1, f2, c2, sopts2); 899 argv[i] = sort_strdup(sopt); 900 for (int j = i + 1; j + 1 < *argc; j++) 901 argv[j] = argv[j + 1]; 902 *argc -= 1; 903 continue; 904 } 905 } 906 } 907 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1); 908 argv[i] = sort_strdup(sopt); 909 } 910 } 911 } 912 } 913 914 /* 915 * Seed random sort 916 */ 917 static void 918 get_random_seed(const char *random_source) 919 { 920 char randseed[32]; 921 struct stat fsb, rsb; 922 ssize_t rd; 923 int rsfd; 924 925 rsfd = -1; 926 rd = sizeof(randseed); 927 928 if (random_source == NULL) { 929 if (getentropy(randseed, sizeof(randseed)) < 0) 930 err(EX_SOFTWARE, "getentropy"); 931 goto out; 932 } 933 934 rsfd = open(random_source, O_RDONLY | O_CLOEXEC); 935 if (rsfd < 0) 936 err(EX_NOINPUT, "open: %s", random_source); 937 938 if (fstat(rsfd, &fsb) != 0) 939 err(EX_SOFTWARE, "fstat"); 940 941 if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode)) 942 err(EX_USAGE, 943 "random seed isn't a regular file or /dev/random"); 944 945 /* 946 * Regular files: read up to maximum seed size and explicitly 947 * reject longer files. 948 */ 949 if (S_ISREG(fsb.st_mode)) { 950 if (fsb.st_size > (off_t)sizeof(randseed)) 951 errx(EX_USAGE, "random seed is too large (%jd >" 952 " %zu)!", (intmax_t)fsb.st_size, 953 sizeof(randseed)); 954 else if (fsb.st_size < 1) 955 errx(EX_USAGE, "random seed is too small (" 956 "0 bytes)"); 957 958 memset(randseed, 0, sizeof(randseed)); 959 960 rd = read(rsfd, randseed, fsb.st_size); 961 if (rd < 0) 962 err(EX_SOFTWARE, "reading random seed file %s", 963 random_source); 964 if (rd < (ssize_t)fsb.st_size) 965 errx(EX_SOFTWARE, "short read from %s", random_source); 966 } else if (S_ISCHR(fsb.st_mode)) { 967 if (stat("/dev/random", &rsb) < 0) 968 err(EX_SOFTWARE, "stat"); 969 970 if (fsb.st_dev != rsb.st_dev || 971 fsb.st_ino != rsb.st_ino) 972 errx(EX_USAGE, "random seed is a character " 973 "device other than /dev/random"); 974 975 if (getentropy(randseed, sizeof(randseed)) < 0) 976 err(EX_SOFTWARE, "getentropy"); 977 } 978 979 out: 980 if (rsfd >= 0) 981 close(rsfd); 982 983 MD5Init(&md5_ctx); 984 MD5Update(&md5_ctx, randseed, rd); 985 } 986 987 /* 988 * Main function. 989 */ 990 int 991 main(int argc, char **argv) 992 { 993 char *outfile, *real_outfile; 994 char *random_source = NULL; 995 int c, result; 996 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 997 { false, false, false, false, false, false }; 998 999 result = 0; 1000 outfile = sort_strdup("-"); 1001 real_outfile = NULL; 1002 1003 struct sort_mods *sm = &default_sort_mods_object; 1004 1005 init_tmp_files(); 1006 1007 set_signal_handler(); 1008 1009 set_hw_params(); 1010 set_locale(); 1011 set_tmpdir(); 1012 set_sort_opts(); 1013 1014 fix_obsolete_keys(&argc, argv); 1015 1016 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) 1017 != -1)) { 1018 1019 check_mutually_exclusive_flags(c, mef_flags); 1020 1021 if (!set_sort_modifier(sm, c)) { 1022 1023 switch (c) { 1024 case 'c': 1025 sort_opts_vals.cflag = true; 1026 if (optarg) { 1027 if (!strcmp(optarg, "diagnose-first")) 1028 ; 1029 else if (!strcmp(optarg, "silent") || 1030 !strcmp(optarg, "quiet")) 1031 sort_opts_vals.csilentflag = true; 1032 else if (*optarg) 1033 unknown(optarg); 1034 } 1035 break; 1036 case 'C': 1037 sort_opts_vals.cflag = true; 1038 sort_opts_vals.csilentflag = true; 1039 break; 1040 case 'k': 1041 { 1042 sort_opts_vals.complex_sort = true; 1043 sort_opts_vals.kflag = true; 1044 1045 keys_num++; 1046 keys = sort_realloc(keys, keys_num * 1047 sizeof(struct key_specs)); 1048 memset(&(keys[keys_num - 1]), 0, 1049 sizeof(struct key_specs)); 1050 1051 if (parse_k(optarg, &(keys[keys_num - 1])) 1052 < 0) { 1053 errc(2, EINVAL, "-k %s", optarg); 1054 } 1055 1056 break; 1057 } 1058 case 'm': 1059 sort_opts_vals.mflag = true; 1060 break; 1061 case 'o': 1062 outfile = sort_realloc(outfile, (strlen(optarg) + 1)); 1063 strcpy(outfile, optarg); 1064 break; 1065 case 's': 1066 sort_opts_vals.sflag = true; 1067 break; 1068 case 'S': 1069 available_free_memory = 1070 parse_memory_buffer_value(optarg); 1071 break; 1072 case 'T': 1073 tmpdir = sort_strdup(optarg); 1074 break; 1075 case 't': 1076 while (strlen(optarg) > 1) { 1077 if (optarg[0] != '\\') { 1078 errc(2, EINVAL, "%s", optarg); 1079 } 1080 optarg += 1; 1081 if (*optarg == '0') { 1082 *optarg = 0; 1083 break; 1084 } 1085 } 1086 sort_opts_vals.tflag = true; 1087 sort_opts_vals.field_sep = btowc(optarg[0]); 1088 if (sort_opts_vals.field_sep == WEOF) { 1089 errno = EINVAL; 1090 err(2, NULL); 1091 } 1092 if (!gnusort_numeric_compatibility) { 1093 if (symbol_decimal_point == sort_opts_vals.field_sep) 1094 symbol_decimal_point = WEOF; 1095 if (symbol_thousands_sep == sort_opts_vals.field_sep) 1096 symbol_thousands_sep = WEOF; 1097 if (symbol_negative_sign == sort_opts_vals.field_sep) 1098 symbol_negative_sign = WEOF; 1099 if (symbol_positive_sign == sort_opts_vals.field_sep) 1100 symbol_positive_sign = WEOF; 1101 } 1102 break; 1103 case 'u': 1104 sort_opts_vals.uflag = true; 1105 /* stable sort for the correct unique val */ 1106 sort_opts_vals.sflag = true; 1107 break; 1108 case 'z': 1109 sort_opts_vals.zflag = true; 1110 break; 1111 case SORT_OPT: 1112 if (optarg) { 1113 if (!strcmp(optarg, "general-numeric")) 1114 set_sort_modifier(sm, 'g'); 1115 else if (!strcmp(optarg, "human-numeric")) 1116 set_sort_modifier(sm, 'h'); 1117 else if (!strcmp(optarg, "numeric")) 1118 set_sort_modifier(sm, 'n'); 1119 else if (!strcmp(optarg, "month")) 1120 set_sort_modifier(sm, 'M'); 1121 else if (!strcmp(optarg, "random")) 1122 set_sort_modifier(sm, 'R'); 1123 else 1124 unknown(optarg); 1125 } 1126 break; 1127 #if defined(SORT_THREADS) 1128 case PARALLEL_OPT: 1129 nthreads = (size_t)(atoi(optarg)); 1130 if (nthreads < 1) 1131 nthreads = 1; 1132 if (nthreads > 1024) 1133 nthreads = 1024; 1134 break; 1135 #endif 1136 case QSORT_OPT: 1137 sort_opts_vals.sort_method = SORT_QSORT; 1138 break; 1139 case MERGESORT_OPT: 1140 sort_opts_vals.sort_method = SORT_MERGESORT; 1141 break; 1142 case MMAP_OPT: 1143 use_mmap = true; 1144 break; 1145 case HEAPSORT_OPT: 1146 sort_opts_vals.sort_method = SORT_HEAPSORT; 1147 break; 1148 case RADIXSORT_OPT: 1149 sort_opts_vals.sort_method = SORT_RADIXSORT; 1150 break; 1151 case RANDOMSOURCE_OPT: 1152 random_source = strdup(optarg); 1153 break; 1154 case COMPRESSPROGRAM_OPT: 1155 compress_program = strdup(optarg); 1156 break; 1157 case FF_OPT: 1158 read_fns_from_file0(optarg); 1159 break; 1160 case BS_OPT: 1161 { 1162 errno = 0; 1163 long mof = strtol(optarg, NULL, 10); 1164 if (errno != 0) 1165 err(2, "--batch-size"); 1166 if (mof >= 2) 1167 max_open_files = (size_t) mof + 1; 1168 } 1169 break; 1170 case VERSION_OPT: 1171 printf("%s\n", VERSION); 1172 exit(EXIT_SUCCESS); 1173 /* NOTREACHED */ 1174 break; 1175 case DEBUG_OPT: 1176 debug_sort = true; 1177 break; 1178 case HELP_OPT: 1179 usage(false); 1180 /* NOTREACHED */ 1181 break; 1182 default: 1183 usage(true); 1184 /* NOTREACHED */ 1185 } 1186 } 1187 } 1188 1189 argc -= optind; 1190 argv += optind; 1191 1192 if (argv_from_file0) { 1193 argc = argc_from_file0; 1194 argv = argv_from_file0; 1195 } 1196 1197 #ifndef WITHOUT_NLS 1198 catalog = catopen("sort", NL_CAT_LOCALE); 1199 #endif 1200 1201 if (sort_opts_vals.cflag && sort_opts_vals.mflag) 1202 errx(1, "%c:%c: %s", 'm', 'c', getstr(1)); 1203 1204 #ifndef WITHOUT_NLS 1205 catclose(catalog); 1206 #endif 1207 1208 if (keys_num == 0) { 1209 keys_num = 1; 1210 keys = sort_realloc(keys, sizeof(struct key_specs)); 1211 memset(&(keys[0]), 0, sizeof(struct key_specs)); 1212 keys[0].c1 = 1; 1213 keys[0].pos1b = default_sort_mods->bflag; 1214 keys[0].pos2b = default_sort_mods->bflag; 1215 memcpy(&(keys[0].sm), default_sort_mods, 1216 sizeof(struct sort_mods)); 1217 } 1218 1219 for (size_t i = 0; i < keys_num; i++) { 1220 struct key_specs *ks; 1221 1222 ks = &(keys[i]); 1223 1224 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && 1225 !(ks->pos2b)) { 1226 ks->pos1b = sm->bflag; 1227 ks->pos2b = sm->bflag; 1228 memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); 1229 } 1230 1231 ks->sm.func = get_sort_func(&(ks->sm)); 1232 } 1233 1234 if (debug_sort) { 1235 printf("Memory to be used for sorting: %llu\n",available_free_memory); 1236 #if defined(SORT_THREADS) 1237 printf("Number of CPUs: %d\n",(int)ncpu); 1238 nthreads = 1; 1239 #endif 1240 printf("Using collate rules of %s locale\n", 1241 setlocale(LC_COLLATE, NULL)); 1242 if (byte_sort) 1243 printf("Byte sort is used\n"); 1244 if (print_symbols_on_debug) { 1245 printf("Decimal Point: <%lc>\n", symbol_decimal_point); 1246 if (symbol_thousands_sep) 1247 printf("Thousands separator: <%lc>\n", 1248 symbol_thousands_sep); 1249 printf("Positive sign: <%lc>\n", symbol_positive_sign); 1250 printf("Negative sign: <%lc>\n", symbol_negative_sign); 1251 } 1252 } 1253 1254 if (need_random) 1255 get_random_seed(random_source); 1256 1257 /* Case when the outfile equals one of the input files: */ 1258 if (strcmp(outfile, "-")) { 1259 1260 for(int i = 0; i < argc; ++i) { 1261 if (strcmp(argv[i], outfile) == 0) { 1262 real_outfile = sort_strdup(outfile); 1263 for(;;) { 1264 char* tmp = sort_malloc(strlen(outfile) + 1265 strlen(".tmp") + 1); 1266 1267 strcpy(tmp, outfile); 1268 strcpy(tmp + strlen(tmp), ".tmp"); 1269 sort_free(outfile); 1270 outfile = tmp; 1271 if (access(outfile, F_OK) < 0) 1272 break; 1273 } 1274 tmp_file_atexit(outfile); 1275 } 1276 } 1277 } 1278 1279 #if defined(SORT_THREADS) 1280 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) 1281 nthreads = 1; 1282 #endif 1283 1284 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { 1285 struct file_list fl; 1286 struct sort_list list; 1287 1288 sort_list_init(&list); 1289 file_list_init(&fl, true); 1290 1291 if (argc < 1) 1292 procfile("-", &list, &fl); 1293 else { 1294 while (argc > 0) { 1295 procfile(*argv, &list, &fl); 1296 --argc; 1297 ++argv; 1298 } 1299 } 1300 1301 if (fl.count < 1) 1302 sort_list_to_file(&list, outfile); 1303 else { 1304 if (list.count > 0) { 1305 char *flast = new_tmp_file_name(); 1306 1307 sort_list_to_file(&list, flast); 1308 file_list_add(&fl, flast, false); 1309 } 1310 merge_files(&fl, outfile); 1311 } 1312 1313 file_list_clean(&fl); 1314 1315 /* 1316 * We are about to exit the program, so we can ignore 1317 * the clean-up for speed 1318 * 1319 * sort_list_clean(&list); 1320 */ 1321 1322 } else if (sort_opts_vals.cflag) { 1323 result = (argc == 0) ? (check("-")) : (check(*argv)); 1324 } else if (sort_opts_vals.mflag) { 1325 struct file_list fl; 1326 1327 file_list_init(&fl, false); 1328 /* No file arguments remaining means "read from stdin." */ 1329 if (argc == 0) 1330 file_list_add(&fl, "-", true); 1331 else 1332 file_list_populate(&fl, argc, argv, true); 1333 merge_files(&fl, outfile); 1334 file_list_clean(&fl); 1335 } 1336 1337 if (real_outfile) { 1338 unlink(real_outfile); 1339 if (rename(outfile, real_outfile) < 0) 1340 err(2, NULL); 1341 sort_free(real_outfile); 1342 } 1343 1344 sort_free(outfile); 1345 1346 return (result); 1347 } 1348