xref: /freebsd/usr.bin/sort/sort.c (revision 1f4bcc459a76b7aa664f3fd557684cd0ba6da352)
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
34 
35 #include <err.h>
36 #include <errno.h>
37 #include <getopt.h>
38 #include <limits.h>
39 #include <locale.h>
40 #include <md5.h>
41 #include <regex.h>
42 #include <signal.h>
43 #include <stdbool.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <wchar.h>
49 #include <wctype.h>
50 
51 #include "coll.h"
52 #include "file.h"
53 #include "sort.h"
54 
55 #ifndef WITHOUT_NLS
56 #include <nl_types.h>
57 nl_catd catalog;
58 #endif
59 
60 #define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
61 
62 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
63 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
64 
65 static bool need_random;
66 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
67 static const void *random_seed;
68 static size_t random_seed_size;
69 
70 MD5_CTX md5_ctx;
71 
72 /*
73  * Default messages to use when NLS is disabled or no catalogue
74  * is found.
75  */
76 const char *nlsstr[] = { "",
77 /* 1*/"mutually exclusive flags",
78 /* 2*/"extra argument not allowed with -c",
79 /* 3*/"Unknown feature",
80 /* 4*/"Wrong memory buffer specification",
81 /* 5*/"0 field in key specs",
82 /* 6*/"0 column in key specs",
83 /* 7*/"Wrong file mode",
84 /* 8*/"Cannot open file for reading",
85 /* 9*/"Radix sort cannot be used with these sort options",
86 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
87 /*11*/"Invalid key position",
88 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
89       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
90       "[-o outfile] [--batch-size size] [--files0-from file] "
91       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
92       "[--mmap] "
93 #if defined(SORT_THREADS)
94       "[--parallel thread_no] "
95 #endif
96       "[--human-numeric-sort] "
97       "[--version-sort] [--random-sort [--random-source file]] "
98       "[--compress-program program] [file ...]\n" };
99 
100 struct sort_opts sort_opts_vals;
101 
102 bool debug_sort;
103 bool need_hint;
104 
105 #if defined(SORT_THREADS)
106 unsigned int ncpu = 1;
107 size_t nthreads = 1;
108 #endif
109 
110 static bool gnusort_numeric_compatibility;
111 
112 static struct sort_mods default_sort_mods_object;
113 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
114 
115 static bool print_symbols_on_debug;
116 
117 /*
118  * Arguments from file (when file0-from option is used:
119  */
120 static size_t argc_from_file0 = (size_t)-1;
121 static char **argv_from_file0;
122 
123 /*
124  * Placeholder symbols for options which have no single-character equivalent
125  */
126 enum
127 {
128 	SORT_OPT = CHAR_MAX + 1,
129 	HELP_OPT,
130 	FF_OPT,
131 	BS_OPT,
132 	VERSION_OPT,
133 	DEBUG_OPT,
134 #if defined(SORT_THREADS)
135 	PARALLEL_OPT,
136 #endif
137 	RANDOMSOURCE_OPT,
138 	COMPRESSPROGRAM_OPT,
139 	QSORT_OPT,
140 	MERGESORT_OPT,
141 	HEAPSORT_OPT,
142 	RADIXSORT_OPT,
143 	MMAP_OPT
144 };
145 
146 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
147 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
148 
149 static struct option long_options[] = {
150 				{ "batch-size", required_argument, NULL, BS_OPT },
151 				{ "buffer-size", required_argument, NULL, 'S' },
152 				{ "check", optional_argument, NULL, 'c' },
153 				{ "check=silent|quiet", optional_argument, NULL, 'C' },
154 				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
155 				{ "debug", no_argument, NULL, DEBUG_OPT },
156 				{ "dictionary-order", no_argument, NULL, 'd' },
157 				{ "field-separator", required_argument, NULL, 't' },
158 				{ "files0-from", required_argument, NULL, FF_OPT },
159 				{ "general-numeric-sort", no_argument, NULL, 'g' },
160 				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
161 				{ "help",no_argument, NULL, HELP_OPT },
162 				{ "human-numeric-sort", no_argument, NULL, 'h' },
163 				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
164 				{ "ignore-case", no_argument, NULL, 'f' },
165 				{ "ignore-nonprinting", no_argument, NULL, 'i' },
166 				{ "key", required_argument, NULL, 'k' },
167 				{ "merge", no_argument, NULL, 'm' },
168 				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
169 				{ "mmap", no_argument, NULL, MMAP_OPT },
170 				{ "month-sort", no_argument, NULL, 'M' },
171 				{ "numeric-sort", no_argument, NULL, 'n' },
172 				{ "output", required_argument, NULL, 'o' },
173 #if defined(SORT_THREADS)
174 				{ "parallel", required_argument, NULL, PARALLEL_OPT },
175 #endif
176 				{ "qsort", no_argument, NULL, QSORT_OPT },
177 				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
178 				{ "random-sort", no_argument, NULL, 'R' },
179 				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
180 				{ "reverse", no_argument, NULL, 'r' },
181 				{ "sort", required_argument, NULL, SORT_OPT },
182 				{ "stable", no_argument, NULL, 's' },
183 				{ "temporary-directory",required_argument, NULL, 'T' },
184 				{ "unique", no_argument, NULL, 'u' },
185 				{ "version", no_argument, NULL, VERSION_OPT },
186 				{ "version-sort",no_argument, NULL, 'V' },
187 				{ "zero-terminated", no_argument, NULL, 'z' },
188 				{ NULL, no_argument, NULL, 0 }
189 };
190 
191 void fix_obsolete_keys(int *argc, char **argv);
192 
193 /*
194  * Check where sort modifier is present
195  */
196 static bool
197 sort_modifier_empty(struct sort_mods *sm)
198 {
199 
200 	if (sm == NULL)
201 		return (true);
202 	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
203 	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
204 }
205 
206 /*
207  * Print out usage text.
208  */
209 static void
210 usage(bool opt_err)
211 {
212 	struct option *o;
213 	FILE *out;
214 
215 	out = stdout;
216 	o = &(long_options[0]);
217 
218 	if (opt_err)
219 		out = stderr;
220 	fprintf(out, getstr(12), getprogname());
221 	if (opt_err)
222 		exit(2);
223 	exit(0);
224 }
225 
226 /*
227  * Read input file names from a file (file0-from option).
228  */
229 static void
230 read_fns_from_file0(const char *fn)
231 {
232 	FILE *f;
233 	char *line = NULL;
234 	size_t linesize = 0;
235 	ssize_t linelen;
236 
237 	if (fn == NULL)
238 		return;
239 
240 	f = fopen(fn, "r");
241 	if (f == NULL)
242 		err(2, "%s", fn);
243 
244 	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
245 		if (*line != '\0') {
246 			if (argc_from_file0 == (size_t) - 1)
247 				argc_from_file0 = 0;
248 			++argc_from_file0;
249 			argv_from_file0 = sort_realloc(argv_from_file0,
250 			    argc_from_file0 * sizeof(char *));
251 			if (argv_from_file0 == NULL)
252 				err(2, NULL);
253 			argv_from_file0[argc_from_file0 - 1] = line;
254 		} else {
255 			free(line);
256 		}
257 		line = NULL;
258 		linesize = 0;
259 	}
260 	if (ferror(f))
261 		err(2, "%s: getdelim", fn);
262 
263 	closefile(f, fn);
264 }
265 
266 /*
267  * Check how much RAM is available for the sort.
268  */
269 static void
270 set_hw_params(void)
271 {
272 	long pages, psize;
273 
274 	pages = psize = 0;
275 
276 #if defined(SORT_THREADS)
277 	ncpu = 1;
278 #endif
279 
280 	pages = sysconf(_SC_PHYS_PAGES);
281 	if (pages < 1) {
282 		perror("sysconf pages");
283 		psize = 1;
284 	}
285 	psize = sysconf(_SC_PAGESIZE);
286 	if (psize < 1) {
287 		perror("sysconf psize");
288 		psize = 4096;
289 	}
290 #if defined(SORT_THREADS)
291 	ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
292 	if (ncpu < 1)
293 		ncpu = 1;
294 	else if(ncpu > 32)
295 		ncpu = 32;
296 
297 	nthreads = ncpu;
298 #endif
299 
300 	free_memory = (unsigned long long) pages * (unsigned long long) psize;
301 	available_free_memory = free_memory / 2;
302 
303 	if (available_free_memory < 1024)
304 		available_free_memory = 1024;
305 }
306 
307 /*
308  * Convert "plain" symbol to wide symbol, with default value.
309  */
310 static void
311 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
312 {
313 
314 	if (wc && c) {
315 		int res;
316 
317 		res = mbtowc(wc, c, MB_CUR_MAX);
318 		if (res < 1)
319 			*wc = def;
320 	}
321 }
322 
323 /*
324  * Set current locale symbols.
325  */
326 static void
327 set_locale(void)
328 {
329 	struct lconv *lc;
330 	const char *locale;
331 
332 	setlocale(LC_ALL, "");
333 
334 	lc = localeconv();
335 
336 	if (lc) {
337 		/* obtain LC_NUMERIC info */
338 		/* Convert to wide char form */
339 		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
340 		    symbol_decimal_point);
341 		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
342 		    symbol_thousands_sep);
343 		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
344 		    symbol_positive_sign);
345 		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
346 		    symbol_negative_sign);
347 	}
348 
349 	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
350 		gnusort_numeric_compatibility = true;
351 
352 	locale = setlocale(LC_COLLATE, NULL);
353 
354 	if (locale) {
355 		char *tmpl;
356 		const char *cclocale;
357 
358 		tmpl = sort_strdup(locale);
359 		cclocale = setlocale(LC_COLLATE, "C");
360 		if (cclocale && !strcmp(cclocale, tmpl))
361 			byte_sort = true;
362 		else {
363 			const char *pclocale;
364 
365 			pclocale = setlocale(LC_COLLATE, "POSIX");
366 			if (pclocale && !strcmp(pclocale, tmpl))
367 				byte_sort = true;
368 		}
369 		setlocale(LC_COLLATE, tmpl);
370 		sort_free(tmpl);
371 	}
372 }
373 
374 /*
375  * Set directory temporary files.
376  */
377 static void
378 set_tmpdir(void)
379 {
380 	char *td;
381 
382 	td = getenv("TMPDIR");
383 	if (td != NULL)
384 		tmpdir = sort_strdup(td);
385 }
386 
387 /*
388  * Parse -S option.
389  */
390 static unsigned long long
391 parse_memory_buffer_value(const char *value)
392 {
393 
394 	if (value == NULL)
395 		return (available_free_memory);
396 	else {
397 		char *endptr;
398 		unsigned long long membuf;
399 
400 		endptr = NULL;
401 		errno = 0;
402 		membuf = strtoll(value, &endptr, 10);
403 
404 		if (errno != 0) {
405 			warn("%s",getstr(4));
406 			membuf = available_free_memory;
407 		} else {
408 			switch (*endptr){
409 			case 'Y':
410 				membuf *= 1024;
411 				/* FALLTHROUGH */
412 			case 'Z':
413 				membuf *= 1024;
414 				/* FALLTHROUGH */
415 			case 'E':
416 				membuf *= 1024;
417 				/* FALLTHROUGH */
418 			case 'P':
419 				membuf *= 1024;
420 				/* FALLTHROUGH */
421 			case 'T':
422 				membuf *= 1024;
423 				/* FALLTHROUGH */
424 			case 'G':
425 				membuf *= 1024;
426 				/* FALLTHROUGH */
427 			case 'M':
428 				membuf *= 1024;
429 				/* FALLTHROUGH */
430 			case '\0':
431 			case 'K':
432 				membuf *= 1024;
433 				/* FALLTHROUGH */
434 			case 'b':
435 				break;
436 			case '%':
437 				membuf = (available_free_memory * membuf) /
438 				    100;
439 				break;
440 			default:
441 				warnc(EINVAL, "%s", optarg);
442 				membuf = available_free_memory;
443 			}
444 		}
445 		return (membuf);
446 	}
447 }
448 
449 /*
450  * Signal handler that clears the temporary files.
451  */
452 static void
453 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
454     void *context __unused)
455 {
456 
457 	clear_tmp_files();
458 	exit(-1);
459 }
460 
461 /*
462  * Set signal handler on panic signals.
463  */
464 static void
465 set_signal_handler(void)
466 {
467 	struct sigaction sa;
468 
469 	memset(&sa, 0, sizeof(sa));
470 	sa.sa_sigaction = &sig_handler;
471 	sa.sa_flags = SA_SIGINFO;
472 
473 	if (sigaction(SIGTERM, &sa, NULL) < 0) {
474 		perror("sigaction");
475 		return;
476 	}
477 	if (sigaction(SIGHUP, &sa, NULL) < 0) {
478 		perror("sigaction");
479 		return;
480 	}
481 	if (sigaction(SIGINT, &sa, NULL) < 0) {
482 		perror("sigaction");
483 		return;
484 	}
485 	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
486 		perror("sigaction");
487 		return;
488 	}
489 	if (sigaction(SIGABRT, &sa, NULL) < 0) {
490 		perror("sigaction");
491 		return;
492 	}
493 	if (sigaction(SIGBUS, &sa, NULL) < 0) {
494 		perror("sigaction");
495 		return;
496 	}
497 	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
498 		perror("sigaction");
499 		return;
500 	}
501 	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
502 		perror("sigaction");
503 		return;
504 	}
505 	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
506 		perror("sigaction");
507 		return;
508 	}
509 }
510 
511 /*
512  * Print "unknown" message and exit with status 2.
513  */
514 static void
515 unknown(const char *what)
516 {
517 
518 	errx(2, "%s: %s", getstr(3), what);
519 }
520 
521 /*
522  * Check whether contradictory input options are used.
523  */
524 static void
525 check_mutually_exclusive_flags(char c, bool *mef_flags)
526 {
527 	int fo_index, mec;
528 	bool found_others, found_this;
529 
530 	found_others = found_this = false;
531 	fo_index = 0;
532 
533 	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
534 		mec = mutually_exclusive_flags[i];
535 
536 		if (mec != c) {
537 			if (mef_flags[i]) {
538 				if (found_this)
539 					errx(1, "%c:%c: %s", c, mec, getstr(1));
540 				found_others = true;
541 				fo_index = i;
542 			}
543 		} else {
544 			if (found_others)
545 				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
546 			mef_flags[i] = true;
547 			found_this = true;
548 		}
549 	}
550 }
551 
552 /*
553  * Initialise sort opts data.
554  */
555 static void
556 set_sort_opts(void)
557 {
558 
559 	memset(&default_sort_mods_object, 0,
560 	    sizeof(default_sort_mods_object));
561 	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
562 	default_sort_mods_object.func =
563 	    get_sort_func(&default_sort_mods_object);
564 }
565 
566 /*
567  * Set a sort modifier on a sort modifiers object.
568  */
569 static bool
570 set_sort_modifier(struct sort_mods *sm, int c)
571 {
572 
573 	if (sm) {
574 		switch (c){
575 		case 'b':
576 			sm->bflag = true;
577 			break;
578 		case 'd':
579 			sm->dflag = true;
580 			break;
581 		case 'f':
582 			sm->fflag = true;
583 			break;
584 		case 'g':
585 			sm->gflag = true;
586 			need_hint = true;
587 			break;
588 		case 'i':
589 			sm->iflag = true;
590 			break;
591 		case 'R':
592 			sm->Rflag = true;
593 			need_random = true;
594 			break;
595 		case 'M':
596 			initialise_months();
597 			sm->Mflag = true;
598 			need_hint = true;
599 			break;
600 		case 'n':
601 			sm->nflag = true;
602 			need_hint = true;
603 			print_symbols_on_debug = true;
604 			break;
605 		case 'r':
606 			sm->rflag = true;
607 			break;
608 		case 'V':
609 			sm->Vflag = true;
610 			break;
611 		case 'h':
612 			sm->hflag = true;
613 			need_hint = true;
614 			print_symbols_on_debug = true;
615 			break;
616 		default:
617 			return false;
618 		}
619 		sort_opts_vals.complex_sort = true;
620 		sm->func = get_sort_func(sm);
621 	}
622 	return (true);
623 }
624 
625 /*
626  * Parse POS in -k option.
627  */
628 static int
629 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
630 {
631 	regmatch_t pmatch[4];
632 	regex_t re;
633 	char *c, *f;
634 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
635 	size_t len, nmatch;
636 	int ret;
637 
638 	ret = -1;
639 	nmatch = 4;
640 	c = f = NULL;
641 
642 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
643 		return (-1);
644 
645 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
646 		goto end;
647 
648 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
649 		goto end;
650 
651 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
652 		goto end;
653 
654 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
655 	f = sort_malloc((len + 1) * sizeof(char));
656 
657 	strncpy(f, s + pmatch[1].rm_so, len);
658 	f[len] = '\0';
659 
660 	if (second) {
661 		errno = 0;
662 		ks->f2 = (size_t) strtoul(f, NULL, 10);
663 		if (errno != 0)
664 			err(2, "-k");
665 		if (ks->f2 == 0) {
666 			warn("%s",getstr(5));
667 			goto end;
668 		}
669 	} else {
670 		errno = 0;
671 		ks->f1 = (size_t) strtoul(f, NULL, 10);
672 		if (errno != 0)
673 			err(2, "-k");
674 		if (ks->f1 == 0) {
675 			warn("%s",getstr(5));
676 			goto end;
677 		}
678 	}
679 
680 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
681 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
682 		c = sort_malloc((len + 1) * sizeof(char));
683 
684 		strncpy(c, s + pmatch[2].rm_so + 1, len);
685 		c[len] = '\0';
686 
687 		if (second) {
688 			errno = 0;
689 			ks->c2 = (size_t) strtoul(c, NULL, 10);
690 			if (errno != 0)
691 				err(2, "-k");
692 		} else {
693 			errno = 0;
694 			ks->c1 = (size_t) strtoul(c, NULL, 10);
695 			if (errno != 0)
696 				err(2, "-k");
697 			if (ks->c1 == 0) {
698 				warn("%s",getstr(6));
699 				goto end;
700 			}
701 		}
702 	} else {
703 		if (second)
704 			ks->c2 = 0;
705 		else
706 			ks->c1 = 1;
707 	}
708 
709 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
710 		regoff_t i = 0;
711 
712 		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
713 			check_mutually_exclusive_flags(s[i], mef_flags);
714 			if (s[i] == 'b') {
715 				if (second)
716 					ks->pos2b = true;
717 				else
718 					ks->pos1b = true;
719 			} else if (!set_sort_modifier(&(ks->sm), s[i]))
720 				goto end;
721 		}
722 	}
723 
724 	ret = 0;
725 
726 end:
727 
728 	if (c)
729 		sort_free(c);
730 	if (f)
731 		sort_free(f);
732 	regfree(&re);
733 
734 	return (ret);
735 }
736 
737 /*
738  * Parse -k option value.
739  */
740 static int
741 parse_k(const char *s, struct key_specs *ks)
742 {
743 	int ret = -1;
744 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
745 	    { false, false, false, false, false, false };
746 
747 	if (s && *s) {
748 		char *sptr;
749 
750 		sptr = strchr(s, ',');
751 		if (sptr) {
752 			size_t size1;
753 			char *pos1, *pos2;
754 
755 			size1 = sptr - s;
756 
757 			if (size1 < 1)
758 				return (-1);
759 			pos1 = sort_malloc((size1 + 1) * sizeof(char));
760 
761 			strncpy(pos1, s, size1);
762 			pos1[size1] = '\0';
763 
764 			ret = parse_pos(pos1, ks, mef_flags, false);
765 
766 			sort_free(pos1);
767 			if (ret < 0)
768 				return (ret);
769 
770 			pos2 = sort_strdup(sptr + 1);
771 			ret = parse_pos(pos2, ks, mef_flags, true);
772 			sort_free(pos2);
773 		} else
774 			ret = parse_pos(s, ks, mef_flags, false);
775 	}
776 
777 	return (ret);
778 }
779 
780 /*
781  * Parse POS in +POS -POS option.
782  */
783 static int
784 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
785 {
786 	regex_t re;
787 	regmatch_t pmatch[4];
788 	char *c, *f;
789 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
790 	int ret;
791 	size_t len, nmatch;
792 
793 	ret = -1;
794 	nmatch = 4;
795 	c = f = NULL;
796 	*nc = *nf = 0;
797 
798 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
799 		return (-1);
800 
801 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
802 		goto end;
803 
804 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
805 		goto end;
806 
807 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
808 		goto end;
809 
810 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
811 	f = sort_malloc((len + 1) * sizeof(char));
812 
813 	strncpy(f, s + pmatch[1].rm_so, len);
814 	f[len] = '\0';
815 
816 	errno = 0;
817 	*nf = (size_t) strtoul(f, NULL, 10);
818 	if (errno != 0)
819 		errx(2, "%s", getstr(11));
820 
821 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
822 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
823 		c = sort_malloc((len + 1) * sizeof(char));
824 
825 		strncpy(c, s + pmatch[2].rm_so + 1, len);
826 		c[len] = '\0';
827 
828 		errno = 0;
829 		*nc = (size_t) strtoul(c, NULL, 10);
830 		if (errno != 0)
831 			errx(2, "%s", getstr(11));
832 	}
833 
834 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
835 
836 		len = pmatch[3].rm_eo - pmatch[3].rm_so;
837 
838 		strncpy(sopts, s + pmatch[3].rm_so, len);
839 		sopts[len] = '\0';
840 	}
841 
842 	ret = 0;
843 
844 end:
845 	if (c)
846 		sort_free(c);
847 	if (f)
848 		sort_free(f);
849 	regfree(&re);
850 
851 	return (ret);
852 }
853 
854 /*
855  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
856  */
857 void
858 fix_obsolete_keys(int *argc, char **argv)
859 {
860 	char sopt[129];
861 
862 	for (int i = 1; i < *argc; i++) {
863 		char *arg1;
864 
865 		arg1 = argv[i];
866 
867 		if (strlen(arg1) > 1 && arg1[0] == '+') {
868 			int c1, f1;
869 			char sopts1[128];
870 
871 			sopts1[0] = 0;
872 			c1 = f1 = 0;
873 
874 			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
875 				continue;
876 			else {
877 				f1 += 1;
878 				c1 += 1;
879 				if (i + 1 < *argc) {
880 					char *arg2 = argv[i + 1];
881 
882 					if (strlen(arg2) > 1 &&
883 					    arg2[0] == '-') {
884 						int c2, f2;
885 						char sopts2[128];
886 
887 						sopts2[0] = 0;
888 						c2 = f2 = 0;
889 
890 						if (parse_pos_obs(arg2 + 1,
891 						    &f2, &c2, sopts2) >= 0) {
892 							if (c2 > 0)
893 								f2 += 1;
894 							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
895 							    f1, c1, sopts1, f2, c2, sopts2);
896 							argv[i] = sort_strdup(sopt);
897 							for (int j = i + 1; j + 1 < *argc; j++)
898 								argv[j] = argv[j + 1];
899 							*argc -= 1;
900 							continue;
901 						}
902 					}
903 				}
904 				sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
905 				argv[i] = sort_strdup(sopt);
906 			}
907 		}
908 	}
909 }
910 
911 /*
912  * Set random seed
913  */
914 static void
915 set_random_seed(void)
916 {
917 	if (need_random) {
918 
919 		if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
920 			FILE* fseed;
921 			MD5_CTX ctx;
922 			char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
923 			size_t sz = 0;
924 
925 			fseed = openfile(random_source, "r");
926 			while (!feof(fseed)) {
927 				int cr;
928 
929 				cr = fgetc(fseed);
930 				if (cr == EOF)
931 					break;
932 
933 				rsd[sz++] = (char) cr;
934 
935 				if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
936 					break;
937 			}
938 
939 			closefile(fseed, random_source);
940 
941 			MD5Init(&ctx);
942 			MD5Update(&ctx, rsd, sz);
943 
944 			random_seed = MD5End(&ctx, NULL);
945 			random_seed_size = strlen(random_seed);
946 
947 		} else {
948 			MD5_CTX ctx;
949 			char *b;
950 
951 			MD5Init(&ctx);
952 			b = MD5File(random_source, NULL);
953 			if (b == NULL)
954 				err(2, NULL);
955 
956 			random_seed = b;
957 			random_seed_size = strlen(b);
958 		}
959 
960 		MD5Init(&md5_ctx);
961 		if(random_seed_size>0) {
962 			MD5Update(&md5_ctx, random_seed, random_seed_size);
963 		}
964 	}
965 }
966 
967 /*
968  * Main function.
969  */
970 int
971 main(int argc, char **argv)
972 {
973 	char *outfile, *real_outfile;
974 	int c, result;
975 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
976 	    { false, false, false, false, false, false };
977 
978 	result = 0;
979 	outfile = sort_strdup("-");
980 	real_outfile = NULL;
981 
982 	struct sort_mods *sm = &default_sort_mods_object;
983 
984 	init_tmp_files();
985 
986 	set_signal_handler();
987 
988 	set_hw_params();
989 	set_locale();
990 	set_tmpdir();
991 	set_sort_opts();
992 
993 	fix_obsolete_keys(&argc, argv);
994 
995 	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
996 	    != -1)) {
997 
998 		check_mutually_exclusive_flags(c, mef_flags);
999 
1000 		if (!set_sort_modifier(sm, c)) {
1001 
1002 			switch (c) {
1003 			case 'c':
1004 				sort_opts_vals.cflag = true;
1005 				if (optarg) {
1006 					if (!strcmp(optarg, "diagnose-first"))
1007 						;
1008 					else if (!strcmp(optarg, "silent") ||
1009 					    !strcmp(optarg, "quiet"))
1010 						sort_opts_vals.csilentflag = true;
1011 					else if (*optarg)
1012 						unknown(optarg);
1013 				}
1014 				break;
1015 			case 'C':
1016 				sort_opts_vals.cflag = true;
1017 				sort_opts_vals.csilentflag = true;
1018 				break;
1019 			case 'k':
1020 			{
1021 				sort_opts_vals.complex_sort = true;
1022 				sort_opts_vals.kflag = true;
1023 
1024 				keys_num++;
1025 				keys = sort_realloc(keys, keys_num *
1026 				    sizeof(struct key_specs));
1027 				memset(&(keys[keys_num - 1]), 0,
1028 				    sizeof(struct key_specs));
1029 
1030 				if (parse_k(optarg, &(keys[keys_num - 1]))
1031 				    < 0) {
1032 					errc(2, EINVAL, "-k %s", optarg);
1033 				}
1034 
1035 				break;
1036 			}
1037 			case 'm':
1038 				sort_opts_vals.mflag = true;
1039 				break;
1040 			case 'o':
1041 				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1042 				strcpy(outfile, optarg);
1043 				break;
1044 			case 's':
1045 				sort_opts_vals.sflag = true;
1046 				break;
1047 			case 'S':
1048 				available_free_memory =
1049 				    parse_memory_buffer_value(optarg);
1050 				break;
1051 			case 'T':
1052 				tmpdir = sort_strdup(optarg);
1053 				break;
1054 			case 't':
1055 				while (strlen(optarg) > 1) {
1056 					if (optarg[0] != '\\') {
1057 						errc(2, EINVAL, "%s", optarg);
1058 					}
1059 					optarg += 1;
1060 					if (*optarg == '0') {
1061 						*optarg = 0;
1062 						break;
1063 					}
1064 				}
1065 				sort_opts_vals.tflag = true;
1066 				sort_opts_vals.field_sep = btowc(optarg[0]);
1067 				if (sort_opts_vals.field_sep == WEOF) {
1068 					errno = EINVAL;
1069 					err(2, NULL);
1070 				}
1071 				if (!gnusort_numeric_compatibility) {
1072 					if (symbol_decimal_point == sort_opts_vals.field_sep)
1073 						symbol_decimal_point = WEOF;
1074 					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1075 						symbol_thousands_sep = WEOF;
1076 					if (symbol_negative_sign == sort_opts_vals.field_sep)
1077 						symbol_negative_sign = WEOF;
1078 					if (symbol_positive_sign == sort_opts_vals.field_sep)
1079 						symbol_positive_sign = WEOF;
1080 				}
1081 				break;
1082 			case 'u':
1083 				sort_opts_vals.uflag = true;
1084 				/* stable sort for the correct unique val */
1085 				sort_opts_vals.sflag = true;
1086 				break;
1087 			case 'z':
1088 				sort_opts_vals.zflag = true;
1089 				break;
1090 			case SORT_OPT:
1091 				if (optarg) {
1092 					if (!strcmp(optarg, "general-numeric"))
1093 						set_sort_modifier(sm, 'g');
1094 					else if (!strcmp(optarg, "human-numeric"))
1095 						set_sort_modifier(sm, 'h');
1096 					else if (!strcmp(optarg, "numeric"))
1097 						set_sort_modifier(sm, 'n');
1098 					else if (!strcmp(optarg, "month"))
1099 						set_sort_modifier(sm, 'M');
1100 					else if (!strcmp(optarg, "random"))
1101 						set_sort_modifier(sm, 'R');
1102 					else
1103 						unknown(optarg);
1104 				}
1105 				break;
1106 #if defined(SORT_THREADS)
1107 			case PARALLEL_OPT:
1108 				nthreads = (size_t)(atoi(optarg));
1109 				if (nthreads < 1)
1110 					nthreads = 1;
1111 				if (nthreads > 1024)
1112 					nthreads = 1024;
1113 				break;
1114 #endif
1115 			case QSORT_OPT:
1116 				sort_opts_vals.sort_method = SORT_QSORT;
1117 				break;
1118 			case MERGESORT_OPT:
1119 				sort_opts_vals.sort_method = SORT_MERGESORT;
1120 				break;
1121 			case MMAP_OPT:
1122 				use_mmap = true;
1123 				break;
1124 			case HEAPSORT_OPT:
1125 				sort_opts_vals.sort_method = SORT_HEAPSORT;
1126 				break;
1127 			case RADIXSORT_OPT:
1128 				sort_opts_vals.sort_method = SORT_RADIXSORT;
1129 				break;
1130 			case RANDOMSOURCE_OPT:
1131 				random_source = strdup(optarg);
1132 				break;
1133 			case COMPRESSPROGRAM_OPT:
1134 				compress_program = strdup(optarg);
1135 				break;
1136 			case FF_OPT:
1137 				read_fns_from_file0(optarg);
1138 				break;
1139 			case BS_OPT:
1140 			{
1141 				errno = 0;
1142 				long mof = strtol(optarg, NULL, 10);
1143 				if (errno != 0)
1144 					err(2, "--batch-size");
1145 				if (mof >= 2)
1146 					max_open_files = (size_t) mof + 1;
1147 			}
1148 				break;
1149 			case VERSION_OPT:
1150 				printf("%s\n", VERSION);
1151 				exit(EXIT_SUCCESS);
1152 				/* NOTREACHED */
1153 				break;
1154 			case DEBUG_OPT:
1155 				debug_sort = true;
1156 				break;
1157 			case HELP_OPT:
1158 				usage(false);
1159 				/* NOTREACHED */
1160 				break;
1161 			default:
1162 				usage(true);
1163 				/* NOTREACHED */
1164 			}
1165 		}
1166 	}
1167 
1168 	argc -= optind;
1169 	argv += optind;
1170 
1171 	if (argv_from_file0) {
1172 		argc = argc_from_file0;
1173 		argv = argv_from_file0;
1174 	}
1175 
1176 #ifndef WITHOUT_NLS
1177 	catalog = catopen("sort", NL_CAT_LOCALE);
1178 #endif
1179 
1180 	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1181 		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1182 
1183 #ifndef WITHOUT_NLS
1184 	catclose(catalog);
1185 #endif
1186 
1187 	if (keys_num == 0) {
1188 		keys_num = 1;
1189 		keys = sort_realloc(keys, sizeof(struct key_specs));
1190 		memset(&(keys[0]), 0, sizeof(struct key_specs));
1191 		keys[0].c1 = 1;
1192 		keys[0].pos1b = default_sort_mods->bflag;
1193 		keys[0].pos2b = default_sort_mods->bflag;
1194 		memcpy(&(keys[0].sm), default_sort_mods,
1195 		    sizeof(struct sort_mods));
1196 	}
1197 
1198 	for (size_t i = 0; i < keys_num; i++) {
1199 		struct key_specs *ks;
1200 
1201 		ks = &(keys[i]);
1202 
1203 		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1204 		    !(ks->pos2b)) {
1205 			ks->pos1b = sm->bflag;
1206 			ks->pos2b = sm->bflag;
1207 			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1208 		}
1209 
1210 		ks->sm.func = get_sort_func(&(ks->sm));
1211 	}
1212 
1213 	if (debug_sort) {
1214 		printf("Memory to be used for sorting: %llu\n",available_free_memory);
1215 #if defined(SORT_THREADS)
1216 		printf("Number of CPUs: %d\n",(int)ncpu);
1217 		nthreads = 1;
1218 #endif
1219 		printf("Using collate rules of %s locale\n",
1220 		    setlocale(LC_COLLATE, NULL));
1221 		if (byte_sort)
1222 			printf("Byte sort is used\n");
1223 		if (print_symbols_on_debug) {
1224 			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1225 			if (symbol_thousands_sep)
1226 				printf("Thousands separator: <%lc>\n",
1227 				    symbol_thousands_sep);
1228 			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1229 			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1230 		}
1231 	}
1232 
1233 	set_random_seed();
1234 
1235 	/* Case when the outfile equals one of the input files: */
1236 	if (strcmp(outfile, "-")) {
1237 
1238 		for(int i = 0; i < argc; ++i) {
1239 			if (strcmp(argv[i], outfile) == 0) {
1240 				real_outfile = sort_strdup(outfile);
1241 				for(;;) {
1242 					char* tmp = sort_malloc(strlen(outfile) +
1243 					    strlen(".tmp") + 1);
1244 
1245 					strcpy(tmp, outfile);
1246 					strcpy(tmp + strlen(tmp), ".tmp");
1247 					sort_free(outfile);
1248 					outfile = tmp;
1249 					if (access(outfile, F_OK) < 0)
1250 						break;
1251 				}
1252 				tmp_file_atexit(outfile);
1253 			}
1254 		}
1255 	}
1256 
1257 #if defined(SORT_THREADS)
1258 	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1259 		nthreads = 1;
1260 #endif
1261 
1262 	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1263 		struct file_list fl;
1264 		struct sort_list list;
1265 
1266 		sort_list_init(&list);
1267 		file_list_init(&fl, true);
1268 
1269 		if (argc < 1)
1270 			procfile("-", &list, &fl);
1271 		else {
1272 			while (argc > 0) {
1273 				procfile(*argv, &list, &fl);
1274 				--argc;
1275 				++argv;
1276 			}
1277 		}
1278 
1279 		if (fl.count < 1)
1280 			sort_list_to_file(&list, outfile);
1281 		else {
1282 			if (list.count > 0) {
1283 				char *flast = new_tmp_file_name();
1284 
1285 				sort_list_to_file(&list, flast);
1286 				file_list_add(&fl, flast, false);
1287 			}
1288 			merge_files(&fl, outfile);
1289 		}
1290 
1291 		file_list_clean(&fl);
1292 
1293 		/*
1294 		 * We are about to exit the program, so we can ignore
1295 		 * the clean-up for speed
1296 		 *
1297 		 * sort_list_clean(&list);
1298 		 */
1299 
1300 	} else if (sort_opts_vals.cflag) {
1301 		result = (argc == 0) ? (check("-")) : (check(*argv));
1302 	} else if (sort_opts_vals.mflag) {
1303 		struct file_list fl;
1304 
1305 		file_list_init(&fl, false);
1306 		file_list_populate(&fl, argc, argv, true);
1307 		merge_files(&fl, outfile);
1308 		file_list_clean(&fl);
1309 	}
1310 
1311 	if (real_outfile) {
1312 		unlink(real_outfile);
1313 		if (rename(outfile, real_outfile) < 0)
1314 			err(2, NULL);
1315 		sort_free(real_outfile);
1316 	}
1317 
1318 	sort_free(outfile);
1319 
1320 	return (result);
1321 }
1322