1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
34
35 #include <err.h>
36 #include <errno.h>
37 #include <fcntl.h>
38 #include <getopt.h>
39 #include <limits.h>
40 #include <locale.h>
41 #include <md5.h>
42 #include <regex.h>
43 #include <signal.h>
44 #include <stdbool.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 #include <wchar.h>
50 #include <wctype.h>
51
52 #include "coll.h"
53 #include "file.h"
54 #include "sort.h"
55
56 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz"
57
58 static bool need_random;
59
60 MD5_CTX md5_ctx;
61
62 /*
63 * Default messages to use
64 */
65 const char *nlsstr[] = { "",
66 /* 1*/"mutually exclusive flags",
67 /* 2*/"extra argument not allowed with -c",
68 /* 3*/"Unknown feature",
69 /* 4*/"Wrong memory buffer specification",
70 /* 5*/"0 field in key specs",
71 /* 6*/"0 column in key specs",
72 /* 7*/"Wrong file mode",
73 /* 8*/"Cannot open file for reading",
74 /* 9*/"Radix sort cannot be used with these sort options",
75 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
76 /*11*/"Invalid key position",
77 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
78 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
79 "[-o outfile] [--batch-size size] [--files0-from file] "
80 "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
81 "[--mmap] "
82 #if defined(SORT_THREADS)
83 "[--parallel thread_no] "
84 #endif
85 "[--human-numeric-sort] "
86 "[--version-sort] [--random-sort [--random-source file]] "
87 "[--compress-program program] [file ...]\n" };
88
89 struct sort_opts sort_opts_vals;
90
91 bool debug_sort;
92 bool need_hint;
93
94 size_t mb_cur_max;
95
96 #if defined(SORT_THREADS)
97 unsigned int ncpu = 1;
98 size_t nthreads = 1;
99 #endif
100
101 static bool gnusort_numeric_compatibility;
102
103 static struct sort_mods default_sort_mods_object;
104 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
105
106 static bool print_symbols_on_debug;
107
108 /*
109 * Arguments from file (when file0-from option is used:
110 */
111 static size_t argc_from_file0 = (size_t)-1;
112 static char **argv_from_file0;
113
114 /*
115 * Placeholder symbols for options which have no single-character equivalent
116 */
117 enum
118 {
119 SORT_OPT = CHAR_MAX + 1,
120 HELP_OPT,
121 FF_OPT,
122 BS_OPT,
123 VERSION_OPT,
124 DEBUG_OPT,
125 #if defined(SORT_THREADS)
126 PARALLEL_OPT,
127 #endif
128 RANDOMSOURCE_OPT,
129 COMPRESSPROGRAM_OPT,
130 QSORT_OPT,
131 MERGESORT_OPT,
132 HEAPSORT_OPT,
133 RADIXSORT_OPT,
134 MMAP_OPT
135 };
136
137 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
138 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
139
140 static struct option long_options[] = {
141 { "batch-size", required_argument, NULL, BS_OPT },
142 { "buffer-size", required_argument, NULL, 'S' },
143 { "check", optional_argument, NULL, 'c' },
144 { "check=silent|quiet", optional_argument, NULL, 'C' },
145 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
146 { "debug", no_argument, NULL, DEBUG_OPT },
147 { "dictionary-order", no_argument, NULL, 'd' },
148 { "field-separator", required_argument, NULL, 't' },
149 { "files0-from", required_argument, NULL, FF_OPT },
150 { "general-numeric-sort", no_argument, NULL, 'g' },
151 { "heapsort", no_argument, NULL, HEAPSORT_OPT },
152 { "help",no_argument, NULL, HELP_OPT },
153 { "human-numeric-sort", no_argument, NULL, 'h' },
154 { "ignore-leading-blanks", no_argument, NULL, 'b' },
155 { "ignore-case", no_argument, NULL, 'f' },
156 { "ignore-nonprinting", no_argument, NULL, 'i' },
157 { "key", required_argument, NULL, 'k' },
158 { "merge", no_argument, NULL, 'm' },
159 { "mergesort", no_argument, NULL, MERGESORT_OPT },
160 { "mmap", no_argument, NULL, MMAP_OPT },
161 { "month-sort", no_argument, NULL, 'M' },
162 { "numeric-sort", no_argument, NULL, 'n' },
163 { "output", required_argument, NULL, 'o' },
164 #if defined(SORT_THREADS)
165 { "parallel", required_argument, NULL, PARALLEL_OPT },
166 #endif
167 { "qsort", no_argument, NULL, QSORT_OPT },
168 { "radixsort", no_argument, NULL, RADIXSORT_OPT },
169 { "random-sort", no_argument, NULL, 'R' },
170 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
171 { "reverse", no_argument, NULL, 'r' },
172 { "sort", required_argument, NULL, SORT_OPT },
173 { "stable", no_argument, NULL, 's' },
174 { "temporary-directory",required_argument, NULL, 'T' },
175 { "unique", no_argument, NULL, 'u' },
176 { "version", no_argument, NULL, VERSION_OPT },
177 { "version-sort",no_argument, NULL, 'V' },
178 { "zero-terminated", no_argument, NULL, 'z' },
179 { NULL, no_argument, NULL, 0 }
180 };
181
182 void fix_obsolete_keys(int *argc, char **argv);
183
184 /*
185 * Check where sort modifier is present
186 */
187 static bool
sort_modifier_empty(struct sort_mods * sm)188 sort_modifier_empty(struct sort_mods *sm)
189 {
190
191 if (sm == NULL)
192 return (true);
193 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
194 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
195 }
196
197 /*
198 * Print out usage text.
199 */
200 static void
usage(bool opt_err)201 usage(bool opt_err)
202 {
203 FILE *out;
204
205 out = opt_err ? stderr : stdout;
206
207 fprintf(out, getstr(12), getprogname());
208 if (opt_err)
209 exit(2);
210 exit(0);
211 }
212
213 /*
214 * Read input file names from a file (file0-from option).
215 */
216 static void
read_fns_from_file0(const char * fn)217 read_fns_from_file0(const char *fn)
218 {
219 FILE *f;
220 char *line = NULL;
221 size_t linesize = 0;
222 ssize_t linelen;
223
224 if (fn == NULL)
225 return;
226
227 f = fopen(fn, "r");
228 if (f == NULL)
229 err(2, "%s", fn);
230
231 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
232 if (*line != '\0') {
233 if (argc_from_file0 == (size_t) - 1)
234 argc_from_file0 = 0;
235 ++argc_from_file0;
236 argv_from_file0 = sort_realloc(argv_from_file0,
237 argc_from_file0 * sizeof(char *));
238 if (argv_from_file0 == NULL)
239 err(2, NULL);
240 argv_from_file0[argc_from_file0 - 1] = line;
241 } else {
242 free(line);
243 }
244 line = NULL;
245 linesize = 0;
246 }
247 if (ferror(f))
248 err(2, "%s: getdelim", fn);
249
250 closefile(f, fn);
251 }
252
253 /*
254 * Check how much RAM is available for the sort.
255 */
256 static void
set_hw_params(void)257 set_hw_params(void)
258 {
259 long pages, psize;
260
261 #if defined(SORT_THREADS)
262 ncpu = 1;
263 #endif
264
265 pages = sysconf(_SC_PHYS_PAGES);
266 if (pages < 1) {
267 perror("sysconf pages");
268 pages = 1;
269 }
270 psize = sysconf(_SC_PAGESIZE);
271 if (psize < 1) {
272 perror("sysconf psize");
273 psize = 4096;
274 }
275 #if defined(SORT_THREADS)
276 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
277 if (ncpu < 1)
278 ncpu = 1;
279 else if(ncpu > 32)
280 ncpu = 32;
281
282 nthreads = ncpu;
283 #endif
284
285 free_memory = (unsigned long long) pages * (unsigned long long) psize;
286 available_free_memory = free_memory / 2;
287
288 if (available_free_memory < 1024)
289 available_free_memory = 1024;
290 }
291
292 /*
293 * Convert "plain" symbol to wide symbol, with default value.
294 */
295 static void
conv_mbtowc(wchar_t * wc,const char * c,const wchar_t def)296 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
297 {
298
299 if (wc && c) {
300 int res;
301
302 res = mbtowc(wc, c, mb_cur_max);
303 if (res < 1)
304 *wc = def;
305 }
306 }
307
308 /*
309 * Set current locale symbols.
310 */
311 static void
set_locale(void)312 set_locale(void)
313 {
314 struct lconv *lc;
315 const char *locale;
316
317 setlocale(LC_ALL, "");
318
319 mb_cur_max = MB_CUR_MAX;
320
321 lc = localeconv();
322
323 if (lc) {
324 /* obtain LC_NUMERIC info */
325 /* Convert to wide char form */
326 conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
327 symbol_decimal_point);
328 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
329 symbol_thousands_sep);
330 conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
331 symbol_positive_sign);
332 conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
333 symbol_negative_sign);
334 }
335
336 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
337 gnusort_numeric_compatibility = true;
338
339 locale = setlocale(LC_COLLATE, NULL);
340
341 if (locale) {
342 char *tmpl;
343 const char *cclocale;
344
345 tmpl = sort_strdup(locale);
346 cclocale = setlocale(LC_COLLATE, "C");
347 if (cclocale && !strcmp(cclocale, tmpl))
348 byte_sort = true;
349 else {
350 const char *pclocale;
351
352 pclocale = setlocale(LC_COLLATE, "POSIX");
353 if (pclocale && !strcmp(pclocale, tmpl))
354 byte_sort = true;
355 }
356 setlocale(LC_COLLATE, tmpl);
357 sort_free(tmpl);
358 }
359 }
360
361 /*
362 * Set directory temporary files.
363 */
364 static void
set_tmpdir(void)365 set_tmpdir(void)
366 {
367 char *td;
368
369 td = getenv("TMPDIR");
370 if (td != NULL)
371 tmpdir = sort_strdup(td);
372 }
373
374 /*
375 * Parse -S option.
376 */
377 static unsigned long long
parse_memory_buffer_value(const char * value)378 parse_memory_buffer_value(const char *value)
379 {
380
381 if (value == NULL)
382 return (available_free_memory);
383 else {
384 char *endptr;
385 unsigned long long membuf;
386
387 endptr = NULL;
388 errno = 0;
389 membuf = strtoll(value, &endptr, 10);
390
391 if (errno != 0) {
392 warn("%s",getstr(4));
393 membuf = available_free_memory;
394 } else {
395 switch (*endptr){
396 case 'Y':
397 membuf *= 1024;
398 /* FALLTHROUGH */
399 case 'Z':
400 membuf *= 1024;
401 /* FALLTHROUGH */
402 case 'E':
403 membuf *= 1024;
404 /* FALLTHROUGH */
405 case 'P':
406 membuf *= 1024;
407 /* FALLTHROUGH */
408 case 'T':
409 membuf *= 1024;
410 /* FALLTHROUGH */
411 case 'G':
412 membuf *= 1024;
413 /* FALLTHROUGH */
414 case 'M':
415 membuf *= 1024;
416 /* FALLTHROUGH */
417 case '\0':
418 case 'K':
419 membuf *= 1024;
420 /* FALLTHROUGH */
421 case 'b':
422 break;
423 case '%':
424 membuf = (available_free_memory * membuf) /
425 100;
426 break;
427 default:
428 warnc(EINVAL, "%s", optarg);
429 membuf = available_free_memory;
430 }
431 }
432 return (membuf);
433 }
434 }
435
436 /*
437 * Signal handler that clears the temporary files.
438 */
439 static void
sig_handler(int sig __unused,siginfo_t * siginfo __unused,void * context __unused)440 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
441 void *context __unused)
442 {
443
444 clear_tmp_files();
445 exit(-1);
446 }
447
448 /*
449 * Set signal handler on panic signals.
450 */
451 static void
set_signal_handler(void)452 set_signal_handler(void)
453 {
454 struct sigaction sa;
455
456 memset(&sa, 0, sizeof(sa));
457 sa.sa_sigaction = &sig_handler;
458 sa.sa_flags = SA_SIGINFO;
459
460 if (sigaction(SIGTERM, &sa, NULL) < 0) {
461 perror("sigaction");
462 return;
463 }
464 if (sigaction(SIGHUP, &sa, NULL) < 0) {
465 perror("sigaction");
466 return;
467 }
468 if (sigaction(SIGINT, &sa, NULL) < 0) {
469 perror("sigaction");
470 return;
471 }
472 if (sigaction(SIGQUIT, &sa, NULL) < 0) {
473 perror("sigaction");
474 return;
475 }
476 if (sigaction(SIGABRT, &sa, NULL) < 0) {
477 perror("sigaction");
478 return;
479 }
480 if (sigaction(SIGBUS, &sa, NULL) < 0) {
481 perror("sigaction");
482 return;
483 }
484 if (sigaction(SIGSEGV, &sa, NULL) < 0) {
485 perror("sigaction");
486 return;
487 }
488 if (sigaction(SIGUSR1, &sa, NULL) < 0) {
489 perror("sigaction");
490 return;
491 }
492 if (sigaction(SIGUSR2, &sa, NULL) < 0) {
493 perror("sigaction");
494 return;
495 }
496 }
497
498 /*
499 * Print "unknown" message and exit with status 2.
500 */
501 static void
unknown(const char * what)502 unknown(const char *what)
503 {
504
505 errx(2, "%s: %s", getstr(3), what);
506 }
507
508 /*
509 * Check whether contradictory input options are used.
510 */
511 static void
check_mutually_exclusive_flags(char c,bool * mef_flags)512 check_mutually_exclusive_flags(char c, bool *mef_flags)
513 {
514 int fo_index, mec;
515 bool found_others, found_this;
516
517 found_others = found_this = false;
518 fo_index = 0;
519
520 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
521 mec = mutually_exclusive_flags[i];
522
523 if (mec != c) {
524 if (mef_flags[i]) {
525 if (found_this)
526 errx(1, "%c:%c: %s", c, mec, getstr(1));
527 found_others = true;
528 fo_index = i;
529 }
530 } else {
531 if (found_others)
532 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
533 mef_flags[i] = true;
534 found_this = true;
535 }
536 }
537 }
538
539 /*
540 * Initialise sort opts data.
541 */
542 static void
set_sort_opts(void)543 set_sort_opts(void)
544 {
545
546 memset(&default_sort_mods_object, 0,
547 sizeof(default_sort_mods_object));
548 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
549 default_sort_mods_object.func =
550 get_sort_func(&default_sort_mods_object);
551 }
552
553 /*
554 * Set a sort modifier on a sort modifiers object.
555 */
556 static bool
set_sort_modifier(struct sort_mods * sm,int c)557 set_sort_modifier(struct sort_mods *sm, int c)
558 {
559
560 if (sm == NULL)
561 return (true);
562
563 switch (c){
564 case 'b':
565 sm->bflag = true;
566 break;
567 case 'd':
568 sm->dflag = true;
569 break;
570 case 'f':
571 sm->fflag = true;
572 break;
573 case 'g':
574 sm->gflag = true;
575 need_hint = true;
576 break;
577 case 'i':
578 sm->iflag = true;
579 break;
580 case 'R':
581 sm->Rflag = true;
582 need_hint = true;
583 need_random = true;
584 break;
585 case 'M':
586 initialise_months();
587 sm->Mflag = true;
588 need_hint = true;
589 break;
590 case 'n':
591 sm->nflag = true;
592 need_hint = true;
593 print_symbols_on_debug = true;
594 break;
595 case 'r':
596 sm->rflag = true;
597 break;
598 case 'V':
599 sm->Vflag = true;
600 break;
601 case 'h':
602 sm->hflag = true;
603 need_hint = true;
604 print_symbols_on_debug = true;
605 break;
606 default:
607 return (false);
608 }
609
610 sort_opts_vals.complex_sort = true;
611 sm->func = get_sort_func(sm);
612 return (true);
613 }
614
615 /*
616 * Parse POS in -k option.
617 */
618 static int
parse_pos(const char * s,struct key_specs * ks,bool * mef_flags,bool second)619 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
620 {
621 regmatch_t pmatch[4];
622 regex_t re;
623 char *c, *f;
624 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
625 size_t len, nmatch;
626 int ret;
627
628 ret = -1;
629 nmatch = 4;
630 c = f = NULL;
631
632 if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
633 return (-1);
634
635 if (regexec(&re, s, nmatch, pmatch, 0) != 0)
636 goto end;
637
638 if (pmatch[0].rm_eo <= pmatch[0].rm_so)
639 goto end;
640
641 if (pmatch[1].rm_eo <= pmatch[1].rm_so)
642 goto end;
643
644 len = pmatch[1].rm_eo - pmatch[1].rm_so;
645 f = sort_malloc((len + 1) * sizeof(char));
646
647 strncpy(f, s + pmatch[1].rm_so, len);
648 f[len] = '\0';
649
650 if (second) {
651 errno = 0;
652 ks->f2 = (size_t) strtoul(f, NULL, 10);
653 if (errno != 0)
654 err(2, "-k");
655 if (ks->f2 == 0) {
656 warn("%s",getstr(5));
657 goto end;
658 }
659 } else {
660 errno = 0;
661 ks->f1 = (size_t) strtoul(f, NULL, 10);
662 if (errno != 0)
663 err(2, "-k");
664 if (ks->f1 == 0) {
665 warn("%s",getstr(5));
666 goto end;
667 }
668 }
669
670 if (pmatch[2].rm_eo > pmatch[2].rm_so) {
671 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
672 c = sort_malloc((len + 1) * sizeof(char));
673
674 strncpy(c, s + pmatch[2].rm_so + 1, len);
675 c[len] = '\0';
676
677 if (second) {
678 errno = 0;
679 ks->c2 = (size_t) strtoul(c, NULL, 10);
680 if (errno != 0)
681 err(2, "-k");
682 } else {
683 errno = 0;
684 ks->c1 = (size_t) strtoul(c, NULL, 10);
685 if (errno != 0)
686 err(2, "-k");
687 if (ks->c1 == 0) {
688 warn("%s",getstr(6));
689 goto end;
690 }
691 }
692 } else {
693 if (second)
694 ks->c2 = 0;
695 else
696 ks->c1 = 1;
697 }
698
699 if (pmatch[3].rm_eo > pmatch[3].rm_so) {
700 regoff_t i = 0;
701
702 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
703 check_mutually_exclusive_flags(s[i], mef_flags);
704 if (s[i] == 'b') {
705 if (second)
706 ks->pos2b = true;
707 else
708 ks->pos1b = true;
709 } else if (!set_sort_modifier(&(ks->sm), s[i]))
710 goto end;
711 }
712 }
713
714 ret = 0;
715
716 end:
717
718 if (c)
719 sort_free(c);
720 if (f)
721 sort_free(f);
722 regfree(&re);
723
724 return (ret);
725 }
726
727 /*
728 * Parse -k option value.
729 */
730 static int
parse_k(const char * s,struct key_specs * ks)731 parse_k(const char *s, struct key_specs *ks)
732 {
733 int ret = -1;
734 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
735 { false, false, false, false, false, false };
736
737 if (s && *s) {
738 char *sptr;
739
740 sptr = strchr(s, ',');
741 if (sptr) {
742 size_t size1;
743 char *pos1, *pos2;
744
745 size1 = sptr - s;
746
747 if (size1 < 1)
748 return (-1);
749 pos1 = sort_malloc((size1 + 1) * sizeof(char));
750
751 strncpy(pos1, s, size1);
752 pos1[size1] = '\0';
753
754 ret = parse_pos(pos1, ks, mef_flags, false);
755
756 sort_free(pos1);
757 if (ret < 0)
758 return (ret);
759
760 pos2 = sort_strdup(sptr + 1);
761 ret = parse_pos(pos2, ks, mef_flags, true);
762 sort_free(pos2);
763 } else
764 ret = parse_pos(s, ks, mef_flags, false);
765 }
766
767 return (ret);
768 }
769
770 /*
771 * Parse POS in +POS -POS option.
772 */
773 static int
parse_pos_obs(const char * s,int * nf,int * nc,char * sopts)774 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
775 {
776 regex_t re;
777 regmatch_t pmatch[4];
778 char *c, *f;
779 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
780 int ret;
781 size_t len, nmatch;
782
783 ret = -1;
784 nmatch = 4;
785 c = f = NULL;
786 *nc = *nf = 0;
787
788 if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
789 return (-1);
790
791 if (regexec(&re, s, nmatch, pmatch, 0) != 0)
792 goto end;
793
794 if (pmatch[0].rm_eo <= pmatch[0].rm_so)
795 goto end;
796
797 if (pmatch[1].rm_eo <= pmatch[1].rm_so)
798 goto end;
799
800 len = pmatch[1].rm_eo - pmatch[1].rm_so;
801 f = sort_malloc((len + 1) * sizeof(char));
802
803 strncpy(f, s + pmatch[1].rm_so, len);
804 f[len] = '\0';
805
806 errno = 0;
807 *nf = (size_t) strtoul(f, NULL, 10);
808 if (errno != 0)
809 errx(2, "%s", getstr(11));
810
811 if (pmatch[2].rm_eo > pmatch[2].rm_so) {
812 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
813 c = sort_malloc((len + 1) * sizeof(char));
814
815 strncpy(c, s + pmatch[2].rm_so + 1, len);
816 c[len] = '\0';
817
818 errno = 0;
819 *nc = (size_t) strtoul(c, NULL, 10);
820 if (errno != 0)
821 errx(2, "%s", getstr(11));
822 }
823
824 if (pmatch[3].rm_eo > pmatch[3].rm_so) {
825
826 len = pmatch[3].rm_eo - pmatch[3].rm_so;
827
828 strncpy(sopts, s + pmatch[3].rm_so, len);
829 sopts[len] = '\0';
830 }
831
832 ret = 0;
833
834 end:
835 if (c)
836 sort_free(c);
837 if (f)
838 sort_free(f);
839 regfree(&re);
840
841 return (ret);
842 }
843
844 /*
845 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
846 */
847 void
fix_obsolete_keys(int * argc,char ** argv)848 fix_obsolete_keys(int *argc, char **argv)
849 {
850 char sopt[129];
851
852 for (int i = 1; i < *argc; i++) {
853 char *arg1;
854
855 arg1 = argv[i];
856
857 if (strcmp(arg1, "--") == 0) {
858 /* Following arguments are treated as filenames. */
859 break;
860 }
861
862 if (strlen(arg1) > 1 && arg1[0] == '+') {
863 int c1, f1;
864 char sopts1[128];
865
866 sopts1[0] = 0;
867 c1 = f1 = 0;
868
869 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
870 continue;
871 else {
872 f1 += 1;
873 c1 += 1;
874 if (i + 1 < *argc) {
875 char *arg2 = argv[i + 1];
876
877 if (strlen(arg2) > 1 &&
878 arg2[0] == '-') {
879 int c2, f2;
880 char sopts2[128];
881
882 sopts2[0] = 0;
883 c2 = f2 = 0;
884
885 if (parse_pos_obs(arg2 + 1,
886 &f2, &c2, sopts2) >= 0) {
887 if (c2 > 0)
888 f2 += 1;
889 sprintf(sopt, "-k%d.%d%s,%d.%d%s",
890 f1, c1, sopts1, f2, c2, sopts2);
891 argv[i] = sort_strdup(sopt);
892 for (int j = i + 1; j + 1 < *argc; j++)
893 argv[j] = argv[j + 1];
894 *argc -= 1;
895 continue;
896 }
897 }
898 }
899 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
900 argv[i] = sort_strdup(sopt);
901 }
902 }
903 }
904 }
905
906 /*
907 * Seed random sort
908 */
909 static void
get_random_seed(const char * random_source)910 get_random_seed(const char *random_source)
911 {
912 char randseed[32];
913 struct stat fsb, rsb;
914 ssize_t rd;
915 int rsfd;
916
917 rsfd = -1;
918 rd = sizeof(randseed);
919
920 if (random_source == NULL) {
921 if (getentropy(randseed, sizeof(randseed)) < 0)
922 err(EX_SOFTWARE, "getentropy");
923 goto out;
924 }
925
926 rsfd = open(random_source, O_RDONLY | O_CLOEXEC);
927 if (rsfd < 0)
928 err(EX_NOINPUT, "open: %s", random_source);
929
930 if (fstat(rsfd, &fsb) != 0)
931 err(EX_SOFTWARE, "fstat");
932
933 if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode))
934 err(EX_USAGE,
935 "random seed isn't a regular file or /dev/random");
936
937 /*
938 * Regular files: read up to maximum seed size and explicitly
939 * reject longer files.
940 */
941 if (S_ISREG(fsb.st_mode)) {
942 if (fsb.st_size > (off_t)sizeof(randseed))
943 errx(EX_USAGE, "random seed is too large (%jd >"
944 " %zu)!", (intmax_t)fsb.st_size,
945 sizeof(randseed));
946 else if (fsb.st_size < 1)
947 errx(EX_USAGE, "random seed is too small ("
948 "0 bytes)");
949
950 memset(randseed, 0, sizeof(randseed));
951
952 rd = read(rsfd, randseed, fsb.st_size);
953 if (rd < 0)
954 err(EX_SOFTWARE, "reading random seed file %s",
955 random_source);
956 if (rd < (ssize_t)fsb.st_size)
957 errx(EX_SOFTWARE, "short read from %s", random_source);
958 } else if (S_ISCHR(fsb.st_mode)) {
959 if (stat("/dev/random", &rsb) < 0)
960 err(EX_SOFTWARE, "stat");
961
962 if (fsb.st_dev != rsb.st_dev ||
963 fsb.st_ino != rsb.st_ino)
964 errx(EX_USAGE, "random seed is a character "
965 "device other than /dev/random");
966
967 if (getentropy(randseed, sizeof(randseed)) < 0)
968 err(EX_SOFTWARE, "getentropy");
969 }
970
971 out:
972 if (rsfd >= 0)
973 close(rsfd);
974
975 MD5Init(&md5_ctx);
976 MD5Update(&md5_ctx, randseed, rd);
977 }
978
979 /*
980 * Main function.
981 */
982 int
main(int argc,char ** argv)983 main(int argc, char **argv)
984 {
985 char *outfile, *real_outfile;
986 char *random_source = NULL;
987 int c, result;
988 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
989 { false, false, false, false, false, false };
990
991 result = 0;
992 outfile = sort_strdup("-");
993 real_outfile = NULL;
994
995 struct sort_mods *sm = &default_sort_mods_object;
996
997 init_tmp_files();
998
999 set_signal_handler();
1000
1001 set_hw_params();
1002 set_locale();
1003 set_tmpdir();
1004 set_sort_opts();
1005
1006 fix_obsolete_keys(&argc, argv);
1007
1008 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1009 != -1)) {
1010
1011 check_mutually_exclusive_flags(c, mef_flags);
1012
1013 if (!set_sort_modifier(sm, c)) {
1014
1015 switch (c) {
1016 case 'c':
1017 sort_opts_vals.cflag = true;
1018 if (optarg) {
1019 if (!strcmp(optarg, "diagnose-first"))
1020 ;
1021 else if (!strcmp(optarg, "silent") ||
1022 !strcmp(optarg, "quiet"))
1023 sort_opts_vals.csilentflag = true;
1024 else if (*optarg)
1025 unknown(optarg);
1026 }
1027 break;
1028 case 'C':
1029 sort_opts_vals.cflag = true;
1030 sort_opts_vals.csilentflag = true;
1031 break;
1032 case 'k':
1033 {
1034 sort_opts_vals.complex_sort = true;
1035 sort_opts_vals.kflag = true;
1036
1037 keys_num++;
1038 keys = sort_realloc(keys, keys_num *
1039 sizeof(struct key_specs));
1040 memset(&(keys[keys_num - 1]), 0,
1041 sizeof(struct key_specs));
1042
1043 if (parse_k(optarg, &(keys[keys_num - 1]))
1044 < 0) {
1045 errc(2, EINVAL, "-k %s", optarg);
1046 }
1047
1048 break;
1049 }
1050 case 'm':
1051 sort_opts_vals.mflag = true;
1052 break;
1053 case 'o':
1054 outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1055 strcpy(outfile, optarg);
1056 break;
1057 case 's':
1058 sort_opts_vals.sflag = true;
1059 break;
1060 case 'S':
1061 available_free_memory =
1062 parse_memory_buffer_value(optarg);
1063 break;
1064 case 'T':
1065 tmpdir = sort_strdup(optarg);
1066 break;
1067 case 't':
1068 while (strlen(optarg) > 1) {
1069 if (optarg[0] != '\\') {
1070 errc(2, EINVAL, "%s", optarg);
1071 }
1072 optarg += 1;
1073 if (*optarg == '0') {
1074 *optarg = 0;
1075 break;
1076 }
1077 }
1078 sort_opts_vals.tflag = true;
1079 sort_opts_vals.field_sep = btowc(optarg[0]);
1080 if (sort_opts_vals.field_sep == WEOF) {
1081 errno = EINVAL;
1082 err(2, NULL);
1083 }
1084 if (!gnusort_numeric_compatibility) {
1085 if (symbol_decimal_point == sort_opts_vals.field_sep)
1086 symbol_decimal_point = WEOF;
1087 if (symbol_thousands_sep == sort_opts_vals.field_sep)
1088 symbol_thousands_sep = WEOF;
1089 if (symbol_negative_sign == sort_opts_vals.field_sep)
1090 symbol_negative_sign = WEOF;
1091 if (symbol_positive_sign == sort_opts_vals.field_sep)
1092 symbol_positive_sign = WEOF;
1093 }
1094 break;
1095 case 'u':
1096 sort_opts_vals.uflag = true;
1097 /* stable sort for the correct unique val */
1098 sort_opts_vals.sflag = true;
1099 break;
1100 case 'z':
1101 sort_opts_vals.zflag = true;
1102 break;
1103 case SORT_OPT:
1104 if (optarg) {
1105 if (!strcmp(optarg, "general-numeric"))
1106 set_sort_modifier(sm, 'g');
1107 else if (!strcmp(optarg, "human-numeric"))
1108 set_sort_modifier(sm, 'h');
1109 else if (!strcmp(optarg, "numeric"))
1110 set_sort_modifier(sm, 'n');
1111 else if (!strcmp(optarg, "month"))
1112 set_sort_modifier(sm, 'M');
1113 else if (!strcmp(optarg, "random"))
1114 set_sort_modifier(sm, 'R');
1115 else
1116 unknown(optarg);
1117 }
1118 break;
1119 #if defined(SORT_THREADS)
1120 case PARALLEL_OPT:
1121 nthreads = (size_t)(atoi(optarg));
1122 if (nthreads < 1)
1123 nthreads = 1;
1124 if (nthreads > 1024)
1125 nthreads = 1024;
1126 break;
1127 #endif
1128 case QSORT_OPT:
1129 sort_opts_vals.sort_method = SORT_QSORT;
1130 break;
1131 case MERGESORT_OPT:
1132 sort_opts_vals.sort_method = SORT_MERGESORT;
1133 break;
1134 case MMAP_OPT:
1135 use_mmap = true;
1136 break;
1137 case HEAPSORT_OPT:
1138 sort_opts_vals.sort_method = SORT_HEAPSORT;
1139 break;
1140 case RADIXSORT_OPT:
1141 sort_opts_vals.sort_method = SORT_RADIXSORT;
1142 break;
1143 case RANDOMSOURCE_OPT:
1144 random_source = strdup(optarg);
1145 break;
1146 case COMPRESSPROGRAM_OPT:
1147 compress_program = strdup(optarg);
1148 break;
1149 case FF_OPT:
1150 read_fns_from_file0(optarg);
1151 break;
1152 case BS_OPT:
1153 {
1154 errno = 0;
1155 long mof = strtol(optarg, NULL, 10);
1156 if (errno != 0)
1157 err(2, "--batch-size");
1158 if (mof >= 2)
1159 max_open_files = (size_t) mof + 1;
1160 }
1161 break;
1162 case VERSION_OPT:
1163 printf("%s\n", VERSION);
1164 exit(EXIT_SUCCESS);
1165 /* NOTREACHED */
1166 break;
1167 case DEBUG_OPT:
1168 debug_sort = true;
1169 break;
1170 case HELP_OPT:
1171 usage(false);
1172 /* NOTREACHED */
1173 break;
1174 default:
1175 usage(true);
1176 /* NOTREACHED */
1177 }
1178 }
1179 }
1180
1181 argc -= optind;
1182 argv += optind;
1183
1184 if (argv_from_file0) {
1185 argc = argc_from_file0;
1186 argv = argv_from_file0;
1187 }
1188
1189 if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1190 errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1191
1192 if (keys_num == 0) {
1193 keys_num = 1;
1194 keys = sort_realloc(keys, sizeof(struct key_specs));
1195 memset(&(keys[0]), 0, sizeof(struct key_specs));
1196 keys[0].c1 = 1;
1197 keys[0].pos1b = default_sort_mods->bflag;
1198 keys[0].pos2b = default_sort_mods->bflag;
1199 memcpy(&(keys[0].sm), default_sort_mods,
1200 sizeof(struct sort_mods));
1201 }
1202
1203 for (size_t i = 0; i < keys_num; i++) {
1204 struct key_specs *ks;
1205
1206 ks = &(keys[i]);
1207
1208 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1209 !(ks->pos2b)) {
1210 ks->pos1b = sm->bflag;
1211 ks->pos2b = sm->bflag;
1212 memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1213 }
1214
1215 ks->sm.func = get_sort_func(&(ks->sm));
1216 }
1217
1218 if (debug_sort) {
1219 printf("Memory to be used for sorting: %llu\n",available_free_memory);
1220 #if defined(SORT_THREADS)
1221 printf("Number of CPUs: %d\n",(int)ncpu);
1222 nthreads = 1;
1223 #endif
1224 printf("Using collate rules of %s locale\n",
1225 setlocale(LC_COLLATE, NULL));
1226 if (byte_sort)
1227 printf("Byte sort is used\n");
1228 if (print_symbols_on_debug) {
1229 printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1230 if (symbol_thousands_sep)
1231 printf("Thousands separator: <%lc>\n",
1232 symbol_thousands_sep);
1233 printf("Positive sign: <%lc>\n", symbol_positive_sign);
1234 printf("Negative sign: <%lc>\n", symbol_negative_sign);
1235 }
1236 }
1237
1238 if (need_random)
1239 get_random_seed(random_source);
1240
1241 /* Case when the outfile equals one of the input files: */
1242 if (strcmp(outfile, "-")) {
1243
1244 for(int i = 0; i < argc; ++i) {
1245 if (strcmp(argv[i], outfile) == 0) {
1246 real_outfile = sort_strdup(outfile);
1247 for(;;) {
1248 char* tmp = sort_malloc(strlen(outfile) +
1249 strlen(".tmp") + 1);
1250
1251 strcpy(tmp, outfile);
1252 strcpy(tmp + strlen(tmp), ".tmp");
1253 sort_free(outfile);
1254 outfile = tmp;
1255 if (access(outfile, F_OK) < 0)
1256 break;
1257 }
1258 tmp_file_atexit(outfile);
1259 }
1260 }
1261 }
1262
1263 #if defined(SORT_THREADS)
1264 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1265 nthreads = 1;
1266 #endif
1267
1268 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1269 struct file_list fl;
1270 struct sort_list list;
1271
1272 sort_list_init(&list);
1273 file_list_init(&fl, true);
1274
1275 if (argc < 1)
1276 procfile("-", &list, &fl);
1277 else {
1278 while (argc > 0) {
1279 procfile(*argv, &list, &fl);
1280 --argc;
1281 ++argv;
1282 }
1283 }
1284
1285 if (fl.count < 1)
1286 sort_list_to_file(&list, outfile);
1287 else {
1288 if (list.count > 0) {
1289 char *flast = new_tmp_file_name();
1290
1291 sort_list_to_file(&list, flast);
1292 file_list_add(&fl, flast, false);
1293 }
1294 merge_files(&fl, outfile);
1295 }
1296
1297 file_list_clean(&fl);
1298
1299 /*
1300 * We are about to exit the program, so we can ignore
1301 * the clean-up for speed
1302 *
1303 * sort_list_clean(&list);
1304 */
1305
1306 } else if (sort_opts_vals.cflag) {
1307 result = (argc == 0) ? (check("-")) : (check(*argv));
1308 } else if (sort_opts_vals.mflag) {
1309 struct file_list fl;
1310
1311 file_list_init(&fl, false);
1312 /* No file arguments remaining means "read from stdin." */
1313 if (argc == 0)
1314 file_list_add(&fl, "-", true);
1315 else
1316 file_list_populate(&fl, argc, argv, true);
1317 merge_files(&fl, outfile);
1318 file_list_clean(&fl);
1319 }
1320
1321 if (real_outfile) {
1322 unlink(real_outfile);
1323 if (rename(outfile, real_outfile) < 0)
1324 err(2, NULL);
1325 sort_free(real_outfile);
1326 }
1327
1328 sort_free(outfile);
1329
1330 return (result);
1331 }
1332