xref: /freebsd/usr.bin/sort/sort.c (revision 7bda9663949a80e4e56006369d6df8dc8eeb6cff)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/stat.h>
34 #include <sys/sysctl.h>
35 #include <sys/types.h>
36 
37 #include <err.h>
38 #include <errno.h>
39 #include <getopt.h>
40 #include <limits.h>
41 #include <locale.h>
42 #include <md5.h>
43 #include <regex.h>
44 #include <signal.h>
45 #include <stdbool.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <wchar.h>
51 #include <wctype.h>
52 
53 #include "coll.h"
54 #include "file.h"
55 #include "sort.h"
56 
57 #ifndef WITHOUT_NLS
58 #include <nl_types.h>
59 nl_catd catalog;
60 #endif
61 
62 #define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
63 
64 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
65 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
66 
67 static bool need_random;
68 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
69 static const void *random_seed;
70 static size_t random_seed_size;
71 
72 MD5_CTX md5_ctx;
73 
74 /*
75  * Default messages to use when NLS is disabled or no catalogue
76  * is found.
77  */
78 const char *nlsstr[] = { "",
79 /* 1*/"mutually exclusive flags",
80 /* 2*/"extra argument not allowed with -c",
81 /* 3*/"Unknown feature",
82 /* 4*/"Wrong memory buffer specification",
83 /* 5*/"0 field in key specs",
84 /* 6*/"0 column in key specs",
85 /* 7*/"Wrong file mode",
86 /* 8*/"Cannot open file for reading",
87 /* 9*/"Radix sort cannot be used with these sort options",
88 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
89 /*11*/"Invalid key position",
90 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
91       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
92       "[-o outfile] [--batch-size size] [--files0-from file] "
93       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
94       "[--mmap] "
95 #if defined(SORT_THREADS)
96       "[--parallel thread_no] "
97 #endif
98       "[--human-numeric-sort] "
99       "[--version-sort] [--random-sort [--random-source file]] "
100       "[--compress-program program] [file ...]\n" };
101 
102 struct sort_opts sort_opts_vals;
103 
104 bool debug_sort;
105 bool need_hint;
106 
107 #if defined(SORT_THREADS)
108 unsigned int ncpu = 1;
109 size_t nthreads = 1;
110 #endif
111 
112 static bool gnusort_numeric_compatibility;
113 
114 static struct sort_mods default_sort_mods_object;
115 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
116 
117 static bool print_symbols_on_debug;
118 
119 /*
120  * Arguments from file (when file0-from option is used:
121  */
122 static size_t argc_from_file0 = (size_t)-1;
123 static char **argv_from_file0;
124 
125 /*
126  * Placeholder symbols for options which have no single-character equivalent
127  */
128 enum
129 {
130 	SORT_OPT = CHAR_MAX + 1,
131 	HELP_OPT,
132 	FF_OPT,
133 	BS_OPT,
134 	VERSION_OPT,
135 	DEBUG_OPT,
136 #if defined(SORT_THREADS)
137 	PARALLEL_OPT,
138 #endif
139 	RANDOMSOURCE_OPT,
140 	COMPRESSPROGRAM_OPT,
141 	QSORT_OPT,
142 	MERGESORT_OPT,
143 	HEAPSORT_OPT,
144 	RADIXSORT_OPT,
145 	MMAP_OPT
146 };
147 
148 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
149 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
150 
151 static struct option long_options[] = {
152 				{ "batch-size", required_argument, NULL, BS_OPT },
153 				{ "buffer-size", required_argument, NULL, 'S' },
154 				{ "check", optional_argument, NULL, 'c' },
155 				{ "check=silent|quiet", optional_argument, NULL, 'C' },
156 				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
157 				{ "debug", no_argument, NULL, DEBUG_OPT },
158 				{ "dictionary-order", no_argument, NULL, 'd' },
159 				{ "field-separator", required_argument, NULL, 't' },
160 				{ "files0-from", required_argument, NULL, FF_OPT },
161 				{ "general-numeric-sort", no_argument, NULL, 'g' },
162 				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
163 				{ "help",no_argument, NULL, HELP_OPT },
164 				{ "human-numeric-sort", no_argument, NULL, 'h' },
165 				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
166 				{ "ignore-case", no_argument, NULL, 'f' },
167 				{ "ignore-nonprinting", no_argument, NULL, 'i' },
168 				{ "key", required_argument, NULL, 'k' },
169 				{ "merge", no_argument, NULL, 'm' },
170 				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
171 				{ "mmap", no_argument, NULL, MMAP_OPT },
172 				{ "month-sort", no_argument, NULL, 'M' },
173 				{ "numeric-sort", no_argument, NULL, 'n' },
174 				{ "output", required_argument, NULL, 'o' },
175 #if defined(SORT_THREADS)
176 				{ "parallel", required_argument, NULL, PARALLEL_OPT },
177 #endif
178 				{ "qsort", no_argument, NULL, QSORT_OPT },
179 				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
180 				{ "random-sort", no_argument, NULL, 'R' },
181 				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
182 				{ "reverse", no_argument, NULL, 'r' },
183 				{ "sort", required_argument, NULL, SORT_OPT },
184 				{ "stable", no_argument, NULL, 's' },
185 				{ "temporary-directory",required_argument, NULL, 'T' },
186 				{ "unique", no_argument, NULL, 'u' },
187 				{ "version", no_argument, NULL, VERSION_OPT },
188 				{ "version-sort",no_argument, NULL, 'V' },
189 				{ "zero-terminated", no_argument, NULL, 'z' },
190 				{ NULL, no_argument, NULL, 0 }
191 };
192 
193 void fix_obsolete_keys(int *argc, char **argv);
194 
195 /*
196  * Check where sort modifier is present
197  */
198 static bool
199 sort_modifier_empty(struct sort_mods *sm)
200 {
201 
202 	if (sm == NULL)
203 		return (true);
204 	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
205 	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
206 }
207 
208 /*
209  * Print out usage text.
210  */
211 static void
212 usage(bool opt_err)
213 {
214 	FILE *out;
215 
216 	out = opt_err ? stderr : stdout;
217 
218 	fprintf(out, getstr(12), getprogname());
219 	if (opt_err)
220 		exit(2);
221 	exit(0);
222 }
223 
224 /*
225  * Read input file names from a file (file0-from option).
226  */
227 static void
228 read_fns_from_file0(const char *fn)
229 {
230 	FILE *f;
231 	char *line = NULL;
232 	size_t linesize = 0;
233 	ssize_t linelen;
234 
235 	if (fn == NULL)
236 		return;
237 
238 	f = fopen(fn, "r");
239 	if (f == NULL)
240 		err(2, "%s", fn);
241 
242 	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
243 		if (*line != '\0') {
244 			if (argc_from_file0 == (size_t) - 1)
245 				argc_from_file0 = 0;
246 			++argc_from_file0;
247 			argv_from_file0 = sort_realloc(argv_from_file0,
248 			    argc_from_file0 * sizeof(char *));
249 			if (argv_from_file0 == NULL)
250 				err(2, NULL);
251 			argv_from_file0[argc_from_file0 - 1] = line;
252 		} else {
253 			free(line);
254 		}
255 		line = NULL;
256 		linesize = 0;
257 	}
258 	if (ferror(f))
259 		err(2, "%s: getdelim", fn);
260 
261 	closefile(f, fn);
262 }
263 
264 /*
265  * Check how much RAM is available for the sort.
266  */
267 static void
268 set_hw_params(void)
269 {
270 	long pages, psize;
271 
272 #if defined(SORT_THREADS)
273 	ncpu = 1;
274 #endif
275 
276 	pages = sysconf(_SC_PHYS_PAGES);
277 	if (pages < 1) {
278 		perror("sysconf pages");
279 		pages = 1;
280 	}
281 	psize = sysconf(_SC_PAGESIZE);
282 	if (psize < 1) {
283 		perror("sysconf psize");
284 		psize = 4096;
285 	}
286 #if defined(SORT_THREADS)
287 	ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
288 	if (ncpu < 1)
289 		ncpu = 1;
290 	else if(ncpu > 32)
291 		ncpu = 32;
292 
293 	nthreads = ncpu;
294 #endif
295 
296 	free_memory = (unsigned long long) pages * (unsigned long long) psize;
297 	available_free_memory = free_memory / 2;
298 
299 	if (available_free_memory < 1024)
300 		available_free_memory = 1024;
301 }
302 
303 /*
304  * Convert "plain" symbol to wide symbol, with default value.
305  */
306 static void
307 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
308 {
309 
310 	if (wc && c) {
311 		int res;
312 
313 		res = mbtowc(wc, c, MB_CUR_MAX);
314 		if (res < 1)
315 			*wc = def;
316 	}
317 }
318 
319 /*
320  * Set current locale symbols.
321  */
322 static void
323 set_locale(void)
324 {
325 	struct lconv *lc;
326 	const char *locale;
327 
328 	setlocale(LC_ALL, "");
329 
330 	lc = localeconv();
331 
332 	if (lc) {
333 		/* obtain LC_NUMERIC info */
334 		/* Convert to wide char form */
335 		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
336 		    symbol_decimal_point);
337 		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
338 		    symbol_thousands_sep);
339 		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
340 		    symbol_positive_sign);
341 		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
342 		    symbol_negative_sign);
343 	}
344 
345 	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
346 		gnusort_numeric_compatibility = true;
347 
348 	locale = setlocale(LC_COLLATE, NULL);
349 
350 	if (locale) {
351 		char *tmpl;
352 		const char *cclocale;
353 
354 		tmpl = sort_strdup(locale);
355 		cclocale = setlocale(LC_COLLATE, "C");
356 		if (cclocale && !strcmp(cclocale, tmpl))
357 			byte_sort = true;
358 		else {
359 			const char *pclocale;
360 
361 			pclocale = setlocale(LC_COLLATE, "POSIX");
362 			if (pclocale && !strcmp(pclocale, tmpl))
363 				byte_sort = true;
364 		}
365 		setlocale(LC_COLLATE, tmpl);
366 		sort_free(tmpl);
367 	}
368 }
369 
370 /*
371  * Set directory temporary files.
372  */
373 static void
374 set_tmpdir(void)
375 {
376 	char *td;
377 
378 	td = getenv("TMPDIR");
379 	if (td != NULL)
380 		tmpdir = sort_strdup(td);
381 }
382 
383 /*
384  * Parse -S option.
385  */
386 static unsigned long long
387 parse_memory_buffer_value(const char *value)
388 {
389 
390 	if (value == NULL)
391 		return (available_free_memory);
392 	else {
393 		char *endptr;
394 		unsigned long long membuf;
395 
396 		endptr = NULL;
397 		errno = 0;
398 		membuf = strtoll(value, &endptr, 10);
399 
400 		if (errno != 0) {
401 			warn("%s",getstr(4));
402 			membuf = available_free_memory;
403 		} else {
404 			switch (*endptr){
405 			case 'Y':
406 				membuf *= 1024;
407 				/* FALLTHROUGH */
408 			case 'Z':
409 				membuf *= 1024;
410 				/* FALLTHROUGH */
411 			case 'E':
412 				membuf *= 1024;
413 				/* FALLTHROUGH */
414 			case 'P':
415 				membuf *= 1024;
416 				/* FALLTHROUGH */
417 			case 'T':
418 				membuf *= 1024;
419 				/* FALLTHROUGH */
420 			case 'G':
421 				membuf *= 1024;
422 				/* FALLTHROUGH */
423 			case 'M':
424 				membuf *= 1024;
425 				/* FALLTHROUGH */
426 			case '\0':
427 			case 'K':
428 				membuf *= 1024;
429 				/* FALLTHROUGH */
430 			case 'b':
431 				break;
432 			case '%':
433 				membuf = (available_free_memory * membuf) /
434 				    100;
435 				break;
436 			default:
437 				warnc(EINVAL, "%s", optarg);
438 				membuf = available_free_memory;
439 			}
440 		}
441 		return (membuf);
442 	}
443 }
444 
445 /*
446  * Signal handler that clears the temporary files.
447  */
448 static void
449 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
450     void *context __unused)
451 {
452 
453 	clear_tmp_files();
454 	exit(-1);
455 }
456 
457 /*
458  * Set signal handler on panic signals.
459  */
460 static void
461 set_signal_handler(void)
462 {
463 	struct sigaction sa;
464 
465 	memset(&sa, 0, sizeof(sa));
466 	sa.sa_sigaction = &sig_handler;
467 	sa.sa_flags = SA_SIGINFO;
468 
469 	if (sigaction(SIGTERM, &sa, NULL) < 0) {
470 		perror("sigaction");
471 		return;
472 	}
473 	if (sigaction(SIGHUP, &sa, NULL) < 0) {
474 		perror("sigaction");
475 		return;
476 	}
477 	if (sigaction(SIGINT, &sa, NULL) < 0) {
478 		perror("sigaction");
479 		return;
480 	}
481 	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
482 		perror("sigaction");
483 		return;
484 	}
485 	if (sigaction(SIGABRT, &sa, NULL) < 0) {
486 		perror("sigaction");
487 		return;
488 	}
489 	if (sigaction(SIGBUS, &sa, NULL) < 0) {
490 		perror("sigaction");
491 		return;
492 	}
493 	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
494 		perror("sigaction");
495 		return;
496 	}
497 	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
498 		perror("sigaction");
499 		return;
500 	}
501 	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
502 		perror("sigaction");
503 		return;
504 	}
505 }
506 
507 /*
508  * Print "unknown" message and exit with status 2.
509  */
510 static void
511 unknown(const char *what)
512 {
513 
514 	errx(2, "%s: %s", getstr(3), what);
515 }
516 
517 /*
518  * Check whether contradictory input options are used.
519  */
520 static void
521 check_mutually_exclusive_flags(char c, bool *mef_flags)
522 {
523 	int fo_index, mec;
524 	bool found_others, found_this;
525 
526 	found_others = found_this = false;
527 	fo_index = 0;
528 
529 	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
530 		mec = mutually_exclusive_flags[i];
531 
532 		if (mec != c) {
533 			if (mef_flags[i]) {
534 				if (found_this)
535 					errx(1, "%c:%c: %s", c, mec, getstr(1));
536 				found_others = true;
537 				fo_index = i;
538 			}
539 		} else {
540 			if (found_others)
541 				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
542 			mef_flags[i] = true;
543 			found_this = true;
544 		}
545 	}
546 }
547 
548 /*
549  * Initialise sort opts data.
550  */
551 static void
552 set_sort_opts(void)
553 {
554 
555 	memset(&default_sort_mods_object, 0,
556 	    sizeof(default_sort_mods_object));
557 	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
558 	default_sort_mods_object.func =
559 	    get_sort_func(&default_sort_mods_object);
560 }
561 
562 /*
563  * Set a sort modifier on a sort modifiers object.
564  */
565 static bool
566 set_sort_modifier(struct sort_mods *sm, int c)
567 {
568 
569 	if (sm) {
570 		switch (c){
571 		case 'b':
572 			sm->bflag = true;
573 			break;
574 		case 'd':
575 			sm->dflag = true;
576 			break;
577 		case 'f':
578 			sm->fflag = true;
579 			break;
580 		case 'g':
581 			sm->gflag = true;
582 			need_hint = true;
583 			break;
584 		case 'i':
585 			sm->iflag = true;
586 			break;
587 		case 'R':
588 			sm->Rflag = true;
589 			need_random = true;
590 			break;
591 		case 'M':
592 			initialise_months();
593 			sm->Mflag = true;
594 			need_hint = true;
595 			break;
596 		case 'n':
597 			sm->nflag = true;
598 			need_hint = true;
599 			print_symbols_on_debug = true;
600 			break;
601 		case 'r':
602 			sm->rflag = true;
603 			break;
604 		case 'V':
605 			sm->Vflag = true;
606 			break;
607 		case 'h':
608 			sm->hflag = true;
609 			need_hint = true;
610 			print_symbols_on_debug = true;
611 			break;
612 		default:
613 			return false;
614 		}
615 		sort_opts_vals.complex_sort = true;
616 		sm->func = get_sort_func(sm);
617 	}
618 	return (true);
619 }
620 
621 /*
622  * Parse POS in -k option.
623  */
624 static int
625 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
626 {
627 	regmatch_t pmatch[4];
628 	regex_t re;
629 	char *c, *f;
630 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
631 	size_t len, nmatch;
632 	int ret;
633 
634 	ret = -1;
635 	nmatch = 4;
636 	c = f = NULL;
637 
638 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
639 		return (-1);
640 
641 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
642 		goto end;
643 
644 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
645 		goto end;
646 
647 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
648 		goto end;
649 
650 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
651 	f = sort_malloc((len + 1) * sizeof(char));
652 
653 	strncpy(f, s + pmatch[1].rm_so, len);
654 	f[len] = '\0';
655 
656 	if (second) {
657 		errno = 0;
658 		ks->f2 = (size_t) strtoul(f, NULL, 10);
659 		if (errno != 0)
660 			err(2, "-k");
661 		if (ks->f2 == 0) {
662 			warn("%s",getstr(5));
663 			goto end;
664 		}
665 	} else {
666 		errno = 0;
667 		ks->f1 = (size_t) strtoul(f, NULL, 10);
668 		if (errno != 0)
669 			err(2, "-k");
670 		if (ks->f1 == 0) {
671 			warn("%s",getstr(5));
672 			goto end;
673 		}
674 	}
675 
676 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
677 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
678 		c = sort_malloc((len + 1) * sizeof(char));
679 
680 		strncpy(c, s + pmatch[2].rm_so + 1, len);
681 		c[len] = '\0';
682 
683 		if (second) {
684 			errno = 0;
685 			ks->c2 = (size_t) strtoul(c, NULL, 10);
686 			if (errno != 0)
687 				err(2, "-k");
688 		} else {
689 			errno = 0;
690 			ks->c1 = (size_t) strtoul(c, NULL, 10);
691 			if (errno != 0)
692 				err(2, "-k");
693 			if (ks->c1 == 0) {
694 				warn("%s",getstr(6));
695 				goto end;
696 			}
697 		}
698 	} else {
699 		if (second)
700 			ks->c2 = 0;
701 		else
702 			ks->c1 = 1;
703 	}
704 
705 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
706 		regoff_t i = 0;
707 
708 		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
709 			check_mutually_exclusive_flags(s[i], mef_flags);
710 			if (s[i] == 'b') {
711 				if (second)
712 					ks->pos2b = true;
713 				else
714 					ks->pos1b = true;
715 			} else if (!set_sort_modifier(&(ks->sm), s[i]))
716 				goto end;
717 		}
718 	}
719 
720 	ret = 0;
721 
722 end:
723 
724 	if (c)
725 		sort_free(c);
726 	if (f)
727 		sort_free(f);
728 	regfree(&re);
729 
730 	return (ret);
731 }
732 
733 /*
734  * Parse -k option value.
735  */
736 static int
737 parse_k(const char *s, struct key_specs *ks)
738 {
739 	int ret = -1;
740 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
741 	    { false, false, false, false, false, false };
742 
743 	if (s && *s) {
744 		char *sptr;
745 
746 		sptr = strchr(s, ',');
747 		if (sptr) {
748 			size_t size1;
749 			char *pos1, *pos2;
750 
751 			size1 = sptr - s;
752 
753 			if (size1 < 1)
754 				return (-1);
755 			pos1 = sort_malloc((size1 + 1) * sizeof(char));
756 
757 			strncpy(pos1, s, size1);
758 			pos1[size1] = '\0';
759 
760 			ret = parse_pos(pos1, ks, mef_flags, false);
761 
762 			sort_free(pos1);
763 			if (ret < 0)
764 				return (ret);
765 
766 			pos2 = sort_strdup(sptr + 1);
767 			ret = parse_pos(pos2, ks, mef_flags, true);
768 			sort_free(pos2);
769 		} else
770 			ret = parse_pos(s, ks, mef_flags, false);
771 	}
772 
773 	return (ret);
774 }
775 
776 /*
777  * Parse POS in +POS -POS option.
778  */
779 static int
780 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
781 {
782 	regex_t re;
783 	regmatch_t pmatch[4];
784 	char *c, *f;
785 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
786 	int ret;
787 	size_t len, nmatch;
788 
789 	ret = -1;
790 	nmatch = 4;
791 	c = f = NULL;
792 	*nc = *nf = 0;
793 
794 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
795 		return (-1);
796 
797 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
798 		goto end;
799 
800 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
801 		goto end;
802 
803 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
804 		goto end;
805 
806 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
807 	f = sort_malloc((len + 1) * sizeof(char));
808 
809 	strncpy(f, s + pmatch[1].rm_so, len);
810 	f[len] = '\0';
811 
812 	errno = 0;
813 	*nf = (size_t) strtoul(f, NULL, 10);
814 	if (errno != 0)
815 		errx(2, "%s", getstr(11));
816 
817 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
818 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
819 		c = sort_malloc((len + 1) * sizeof(char));
820 
821 		strncpy(c, s + pmatch[2].rm_so + 1, len);
822 		c[len] = '\0';
823 
824 		errno = 0;
825 		*nc = (size_t) strtoul(c, NULL, 10);
826 		if (errno != 0)
827 			errx(2, "%s", getstr(11));
828 	}
829 
830 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
831 
832 		len = pmatch[3].rm_eo - pmatch[3].rm_so;
833 
834 		strncpy(sopts, s + pmatch[3].rm_so, len);
835 		sopts[len] = '\0';
836 	}
837 
838 	ret = 0;
839 
840 end:
841 	if (c)
842 		sort_free(c);
843 	if (f)
844 		sort_free(f);
845 	regfree(&re);
846 
847 	return (ret);
848 }
849 
850 /*
851  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
852  */
853 void
854 fix_obsolete_keys(int *argc, char **argv)
855 {
856 	char sopt[129];
857 
858 	for (int i = 1; i < *argc; i++) {
859 		char *arg1;
860 
861 		arg1 = argv[i];
862 
863 		if (strlen(arg1) > 1 && arg1[0] == '+') {
864 			int c1, f1;
865 			char sopts1[128];
866 
867 			sopts1[0] = 0;
868 			c1 = f1 = 0;
869 
870 			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
871 				continue;
872 			else {
873 				f1 += 1;
874 				c1 += 1;
875 				if (i + 1 < *argc) {
876 					char *arg2 = argv[i + 1];
877 
878 					if (strlen(arg2) > 1 &&
879 					    arg2[0] == '-') {
880 						int c2, f2;
881 						char sopts2[128];
882 
883 						sopts2[0] = 0;
884 						c2 = f2 = 0;
885 
886 						if (parse_pos_obs(arg2 + 1,
887 						    &f2, &c2, sopts2) >= 0) {
888 							if (c2 > 0)
889 								f2 += 1;
890 							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
891 							    f1, c1, sopts1, f2, c2, sopts2);
892 							argv[i] = sort_strdup(sopt);
893 							for (int j = i + 1; j + 1 < *argc; j++)
894 								argv[j] = argv[j + 1];
895 							*argc -= 1;
896 							continue;
897 						}
898 					}
899 				}
900 				sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
901 				argv[i] = sort_strdup(sopt);
902 			}
903 		}
904 	}
905 }
906 
907 /*
908  * Set random seed
909  */
910 static void
911 set_random_seed(void)
912 {
913 	if (need_random) {
914 
915 		if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
916 			FILE* fseed;
917 			MD5_CTX ctx;
918 			char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
919 			size_t sz = 0;
920 
921 			fseed = openfile(random_source, "r");
922 			while (!feof(fseed)) {
923 				int cr;
924 
925 				cr = fgetc(fseed);
926 				if (cr == EOF)
927 					break;
928 
929 				rsd[sz++] = (char) cr;
930 
931 				if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
932 					break;
933 			}
934 
935 			closefile(fseed, random_source);
936 
937 			MD5Init(&ctx);
938 			MD5Update(&ctx, rsd, sz);
939 
940 			random_seed = MD5End(&ctx, NULL);
941 			random_seed_size = strlen(random_seed);
942 
943 		} else {
944 			MD5_CTX ctx;
945 			char *b;
946 
947 			MD5Init(&ctx);
948 			b = MD5File(random_source, NULL);
949 			if (b == NULL)
950 				err(2, NULL);
951 
952 			random_seed = b;
953 			random_seed_size = strlen(b);
954 		}
955 
956 		MD5Init(&md5_ctx);
957 		if(random_seed_size>0) {
958 			MD5Update(&md5_ctx, random_seed, random_seed_size);
959 		}
960 	}
961 }
962 
963 /*
964  * Main function.
965  */
966 int
967 main(int argc, char **argv)
968 {
969 	char *outfile, *real_outfile;
970 	int c, result;
971 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
972 	    { false, false, false, false, false, false };
973 
974 	result = 0;
975 	outfile = sort_strdup("-");
976 	real_outfile = NULL;
977 
978 	struct sort_mods *sm = &default_sort_mods_object;
979 
980 	init_tmp_files();
981 
982 	set_signal_handler();
983 
984 	set_hw_params();
985 	set_locale();
986 	set_tmpdir();
987 	set_sort_opts();
988 
989 	fix_obsolete_keys(&argc, argv);
990 
991 	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
992 	    != -1)) {
993 
994 		check_mutually_exclusive_flags(c, mef_flags);
995 
996 		if (!set_sort_modifier(sm, c)) {
997 
998 			switch (c) {
999 			case 'c':
1000 				sort_opts_vals.cflag = true;
1001 				if (optarg) {
1002 					if (!strcmp(optarg, "diagnose-first"))
1003 						;
1004 					else if (!strcmp(optarg, "silent") ||
1005 					    !strcmp(optarg, "quiet"))
1006 						sort_opts_vals.csilentflag = true;
1007 					else if (*optarg)
1008 						unknown(optarg);
1009 				}
1010 				break;
1011 			case 'C':
1012 				sort_opts_vals.cflag = true;
1013 				sort_opts_vals.csilentflag = true;
1014 				break;
1015 			case 'k':
1016 			{
1017 				sort_opts_vals.complex_sort = true;
1018 				sort_opts_vals.kflag = true;
1019 
1020 				keys_num++;
1021 				keys = sort_realloc(keys, keys_num *
1022 				    sizeof(struct key_specs));
1023 				memset(&(keys[keys_num - 1]), 0,
1024 				    sizeof(struct key_specs));
1025 
1026 				if (parse_k(optarg, &(keys[keys_num - 1]))
1027 				    < 0) {
1028 					errc(2, EINVAL, "-k %s", optarg);
1029 				}
1030 
1031 				break;
1032 			}
1033 			case 'm':
1034 				sort_opts_vals.mflag = true;
1035 				break;
1036 			case 'o':
1037 				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1038 				strcpy(outfile, optarg);
1039 				break;
1040 			case 's':
1041 				sort_opts_vals.sflag = true;
1042 				break;
1043 			case 'S':
1044 				available_free_memory =
1045 				    parse_memory_buffer_value(optarg);
1046 				break;
1047 			case 'T':
1048 				tmpdir = sort_strdup(optarg);
1049 				break;
1050 			case 't':
1051 				while (strlen(optarg) > 1) {
1052 					if (optarg[0] != '\\') {
1053 						errc(2, EINVAL, "%s", optarg);
1054 					}
1055 					optarg += 1;
1056 					if (*optarg == '0') {
1057 						*optarg = 0;
1058 						break;
1059 					}
1060 				}
1061 				sort_opts_vals.tflag = true;
1062 				sort_opts_vals.field_sep = btowc(optarg[0]);
1063 				if (sort_opts_vals.field_sep == WEOF) {
1064 					errno = EINVAL;
1065 					err(2, NULL);
1066 				}
1067 				if (!gnusort_numeric_compatibility) {
1068 					if (symbol_decimal_point == sort_opts_vals.field_sep)
1069 						symbol_decimal_point = WEOF;
1070 					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1071 						symbol_thousands_sep = WEOF;
1072 					if (symbol_negative_sign == sort_opts_vals.field_sep)
1073 						symbol_negative_sign = WEOF;
1074 					if (symbol_positive_sign == sort_opts_vals.field_sep)
1075 						symbol_positive_sign = WEOF;
1076 				}
1077 				break;
1078 			case 'u':
1079 				sort_opts_vals.uflag = true;
1080 				/* stable sort for the correct unique val */
1081 				sort_opts_vals.sflag = true;
1082 				break;
1083 			case 'z':
1084 				sort_opts_vals.zflag = true;
1085 				break;
1086 			case SORT_OPT:
1087 				if (optarg) {
1088 					if (!strcmp(optarg, "general-numeric"))
1089 						set_sort_modifier(sm, 'g');
1090 					else if (!strcmp(optarg, "human-numeric"))
1091 						set_sort_modifier(sm, 'h');
1092 					else if (!strcmp(optarg, "numeric"))
1093 						set_sort_modifier(sm, 'n');
1094 					else if (!strcmp(optarg, "month"))
1095 						set_sort_modifier(sm, 'M');
1096 					else if (!strcmp(optarg, "random"))
1097 						set_sort_modifier(sm, 'R');
1098 					else
1099 						unknown(optarg);
1100 				}
1101 				break;
1102 #if defined(SORT_THREADS)
1103 			case PARALLEL_OPT:
1104 				nthreads = (size_t)(atoi(optarg));
1105 				if (nthreads < 1)
1106 					nthreads = 1;
1107 				if (nthreads > 1024)
1108 					nthreads = 1024;
1109 				break;
1110 #endif
1111 			case QSORT_OPT:
1112 				sort_opts_vals.sort_method = SORT_QSORT;
1113 				break;
1114 			case MERGESORT_OPT:
1115 				sort_opts_vals.sort_method = SORT_MERGESORT;
1116 				break;
1117 			case MMAP_OPT:
1118 				use_mmap = true;
1119 				break;
1120 			case HEAPSORT_OPT:
1121 				sort_opts_vals.sort_method = SORT_HEAPSORT;
1122 				break;
1123 			case RADIXSORT_OPT:
1124 				sort_opts_vals.sort_method = SORT_RADIXSORT;
1125 				break;
1126 			case RANDOMSOURCE_OPT:
1127 				random_source = strdup(optarg);
1128 				break;
1129 			case COMPRESSPROGRAM_OPT:
1130 				compress_program = strdup(optarg);
1131 				break;
1132 			case FF_OPT:
1133 				read_fns_from_file0(optarg);
1134 				break;
1135 			case BS_OPT:
1136 			{
1137 				errno = 0;
1138 				long mof = strtol(optarg, NULL, 10);
1139 				if (errno != 0)
1140 					err(2, "--batch-size");
1141 				if (mof >= 2)
1142 					max_open_files = (size_t) mof + 1;
1143 			}
1144 				break;
1145 			case VERSION_OPT:
1146 				printf("%s\n", VERSION);
1147 				exit(EXIT_SUCCESS);
1148 				/* NOTREACHED */
1149 				break;
1150 			case DEBUG_OPT:
1151 				debug_sort = true;
1152 				break;
1153 			case HELP_OPT:
1154 				usage(false);
1155 				/* NOTREACHED */
1156 				break;
1157 			default:
1158 				usage(true);
1159 				/* NOTREACHED */
1160 			}
1161 		}
1162 	}
1163 
1164 	argc -= optind;
1165 	argv += optind;
1166 
1167 	if (argv_from_file0) {
1168 		argc = argc_from_file0;
1169 		argv = argv_from_file0;
1170 	}
1171 
1172 #ifndef WITHOUT_NLS
1173 	catalog = catopen("sort", NL_CAT_LOCALE);
1174 #endif
1175 
1176 	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1177 		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1178 
1179 #ifndef WITHOUT_NLS
1180 	catclose(catalog);
1181 #endif
1182 
1183 	if (keys_num == 0) {
1184 		keys_num = 1;
1185 		keys = sort_realloc(keys, sizeof(struct key_specs));
1186 		memset(&(keys[0]), 0, sizeof(struct key_specs));
1187 		keys[0].c1 = 1;
1188 		keys[0].pos1b = default_sort_mods->bflag;
1189 		keys[0].pos2b = default_sort_mods->bflag;
1190 		memcpy(&(keys[0].sm), default_sort_mods,
1191 		    sizeof(struct sort_mods));
1192 	}
1193 
1194 	for (size_t i = 0; i < keys_num; i++) {
1195 		struct key_specs *ks;
1196 
1197 		ks = &(keys[i]);
1198 
1199 		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1200 		    !(ks->pos2b)) {
1201 			ks->pos1b = sm->bflag;
1202 			ks->pos2b = sm->bflag;
1203 			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1204 		}
1205 
1206 		ks->sm.func = get_sort_func(&(ks->sm));
1207 	}
1208 
1209 	if (debug_sort) {
1210 		printf("Memory to be used for sorting: %llu\n",available_free_memory);
1211 #if defined(SORT_THREADS)
1212 		printf("Number of CPUs: %d\n",(int)ncpu);
1213 		nthreads = 1;
1214 #endif
1215 		printf("Using collate rules of %s locale\n",
1216 		    setlocale(LC_COLLATE, NULL));
1217 		if (byte_sort)
1218 			printf("Byte sort is used\n");
1219 		if (print_symbols_on_debug) {
1220 			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1221 			if (symbol_thousands_sep)
1222 				printf("Thousands separator: <%lc>\n",
1223 				    symbol_thousands_sep);
1224 			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1225 			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1226 		}
1227 	}
1228 
1229 	set_random_seed();
1230 
1231 	/* Case when the outfile equals one of the input files: */
1232 	if (strcmp(outfile, "-")) {
1233 
1234 		for(int i = 0; i < argc; ++i) {
1235 			if (strcmp(argv[i], outfile) == 0) {
1236 				real_outfile = sort_strdup(outfile);
1237 				for(;;) {
1238 					char* tmp = sort_malloc(strlen(outfile) +
1239 					    strlen(".tmp") + 1);
1240 
1241 					strcpy(tmp, outfile);
1242 					strcpy(tmp + strlen(tmp), ".tmp");
1243 					sort_free(outfile);
1244 					outfile = tmp;
1245 					if (access(outfile, F_OK) < 0)
1246 						break;
1247 				}
1248 				tmp_file_atexit(outfile);
1249 			}
1250 		}
1251 	}
1252 
1253 #if defined(SORT_THREADS)
1254 	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1255 		nthreads = 1;
1256 #endif
1257 
1258 	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1259 		struct file_list fl;
1260 		struct sort_list list;
1261 
1262 		sort_list_init(&list);
1263 		file_list_init(&fl, true);
1264 
1265 		if (argc < 1)
1266 			procfile("-", &list, &fl);
1267 		else {
1268 			while (argc > 0) {
1269 				procfile(*argv, &list, &fl);
1270 				--argc;
1271 				++argv;
1272 			}
1273 		}
1274 
1275 		if (fl.count < 1)
1276 			sort_list_to_file(&list, outfile);
1277 		else {
1278 			if (list.count > 0) {
1279 				char *flast = new_tmp_file_name();
1280 
1281 				sort_list_to_file(&list, flast);
1282 				file_list_add(&fl, flast, false);
1283 			}
1284 			merge_files(&fl, outfile);
1285 		}
1286 
1287 		file_list_clean(&fl);
1288 
1289 		/*
1290 		 * We are about to exit the program, so we can ignore
1291 		 * the clean-up for speed
1292 		 *
1293 		 * sort_list_clean(&list);
1294 		 */
1295 
1296 	} else if (sort_opts_vals.cflag) {
1297 		result = (argc == 0) ? (check("-")) : (check(*argv));
1298 	} else if (sort_opts_vals.mflag) {
1299 		struct file_list fl;
1300 
1301 		file_list_init(&fl, false);
1302 		/* No file arguments remaining means "read from stdin." */
1303 		if (argc == 0)
1304 			file_list_add(&fl, "-", true);
1305 		else
1306 			file_list_populate(&fl, argc, argv, true);
1307 		merge_files(&fl, outfile);
1308 		file_list_clean(&fl);
1309 	}
1310 
1311 	if (real_outfile) {
1312 		unlink(real_outfile);
1313 		if (rename(outfile, real_outfile) < 0)
1314 			err(2, NULL);
1315 		sort_free(real_outfile);
1316 	}
1317 
1318 	sort_free(outfile);
1319 
1320 	return (result);
1321 }
1322