xref: /freebsd/usr.bin/sort/sort.c (revision 50c9ba2c76d9f1b41c0f007beaebd3d8ce112a59)
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <oleg.moskalenko@citrix.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
34 
35 #include <err.h>
36 #include <errno.h>
37 #include <getopt.h>
38 #include <limits.h>
39 #include <locale.h>
40 #include <md5.h>
41 #include <regex.h>
42 #include <signal.h>
43 #include <stdbool.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <wchar.h>
49 #include <wctype.h>
50 
51 #include "coll.h"
52 #include "file.h"
53 #include "sort.h"
54 
55 #ifndef WITHOUT_NLS
56 #include <nl_types.h>
57 nl_catd catalog;
58 #endif
59 
60 #define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
61 
62 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
63 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
64 
65 static bool need_random;
66 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
67 static const void *random_seed;
68 static size_t random_seed_size;
69 
70 MD5_CTX md5_ctx;
71 
72 /*
73  * Default messages to use when NLS is disabled or no catalogue
74  * is found.
75  */
76 const char *nlsstr[] = { "",
77 /* 1*/"mutually exclusive flags",
78 /* 2*/"extra argument not allowed with -c",
79 /* 3*/"Unknown feature",
80 /* 4*/"Wrong memory buffer specification",
81 /* 5*/"0 field in key specs",
82 /* 6*/"0 column in key specs",
83 /* 7*/"Wrong file mode",
84 /* 8*/"Cannot open file for reading",
85 /* 9*/"Radix sort cannot be used with these sort options",
86 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
87 /*11*/"Invalid key position",
88 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
89       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
90       "[-o outfile] [--batch-size size] [--files0-from file] "
91       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
92 #if defined(SORT_THREADS)
93       "[--nthreads thread_no] "
94 #endif
95       "[--human-numeric-sort] "
96       "[--version-sort] [--random-sort [--random-source file]] "
97       "[--compress-program program] [file ...]\n" };
98 
99 struct sort_opts sort_opts_vals;
100 
101 bool debug_sort;
102 bool need_hint;
103 
104 #if defined(SORT_THREADS)
105 size_t ncpu = 1;
106 size_t nthreads = 1;
107 #endif
108 
109 static bool gnusort_numeric_compatibility;
110 
111 static struct sort_mods default_sort_mods_object;
112 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
113 
114 static bool print_symbols_on_debug;
115 
116 /*
117  * Arguments from file (when file0-from option is used:
118  */
119 static int argc_from_file0 = -1;
120 static char **argv_from_file0;
121 
122 /*
123  * Placeholder symbols for options which have no single-character equivalent
124  */
125 enum
126 {
127 	SORT_OPT = CHAR_MAX + 1,
128 	HELP_OPT,
129 	FF_OPT,
130 	BS_OPT,
131 	VERSION_OPT,
132 	DEBUG_OPT,
133 #if defined(SORT_THREADS)
134 	NTHREADS_OPT,
135 #endif
136 	RANDOMSOURCE_OPT,
137 	COMPRESSPROGRAM_OPT,
138 	QSORT_OPT,
139 	MERGESORT_OPT,
140 	HEAPSORT_OPT,
141 	RADIXSORT_OPT
142 };
143 
144 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
145 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
146 
147 struct option long_options[] = {
148 				{ "batch-size", required_argument, NULL, BS_OPT },
149 				{ "buffer-size", required_argument, NULL, 'S' },
150 				{ "check", optional_argument, NULL, 'c' },
151 				{ "check=silent|quiet", optional_argument, NULL, 'C' },
152 				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
153 				{ "debug", no_argument, NULL, DEBUG_OPT },
154 				{ "dictionary-order", no_argument, NULL, 'd' },
155 				{ "field-separator", required_argument, NULL, 't' },
156 				{ "files0-from", required_argument, NULL, FF_OPT },
157 				{ "general-numeric-sort", no_argument, NULL, 'g' },
158 				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
159 				{ "help",no_argument, NULL, HELP_OPT },
160 				{ "human-numeric-sort", no_argument, NULL, 'h' },
161 				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
162 				{ "ignore-case", no_argument, NULL, 'f' },
163 				{ "ignore-nonprinting", no_argument, NULL, 'i' },
164 				{ "key", required_argument, NULL, 'k' },
165 				{ "merge", no_argument, NULL, 'm' },
166 				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
167 				{ "month-sort", no_argument, NULL, 'M' },
168 				{ "numeric-sort", no_argument, NULL, 'n' },
169 				{ "output", required_argument, NULL, 'o' },
170 #if defined(SORT_THREADS)
171 				{ "nthreads", required_argument, NULL, NTHREADS_OPT },
172 #endif
173 				{ "qsort", no_argument, NULL, QSORT_OPT },
174 				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
175 				{ "random-sort", no_argument, NULL, 'R' },
176 				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
177 				{ "reverse", no_argument, NULL, 'r' },
178 				{ "sort", required_argument, NULL, SORT_OPT },
179 				{ "stable", no_argument, NULL, 's' },
180 				{ "temporary-directory",required_argument, NULL, 'T' },
181 				{ "unique", no_argument, NULL, 'u' },
182 				{ "version", no_argument, NULL, VERSION_OPT },
183 				{ "version-sort",no_argument, NULL, 'V' },
184 				{ "zero-terminated", no_argument, NULL, 'z' },
185 				{ NULL, no_argument, NULL, 0 }
186 };
187 
188 void fix_obsolete_keys(int *argc, char **argv);
189 
190 /*
191  * Check where sort modifier is present
192  */
193 static bool
194 sort_modifier_empty(struct sort_mods *sm)
195 {
196 
197 	if (sm == NULL)
198 		return (true);
199 	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
200 	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
201 }
202 
203 /*
204  * Print out usage text.
205  */
206 static void
207 usage(bool opt_err)
208 {
209 	struct option *o;
210 	FILE *out;
211 
212 	out = stdout;
213 	o = &(long_options[0]);
214 
215 	if (opt_err)
216 		out = stderr;
217 	fprintf(out, getstr(12), getprogname());
218 	if (opt_err)
219 		exit(2);
220 	exit(0);
221 }
222 
223 /*
224  * Read input file names from a file (file0-from option).
225  */
226 static void
227 read_fns_from_file0(const char *fn)
228 {
229 	if (fn) {
230 		struct file0_reader f0r;
231 		FILE *f;
232 
233 		f = fopen(fn, "r");
234 		if (f == NULL)
235 			err(2, NULL);
236 
237 		memset(&f0r, 0, sizeof(f0r));
238 		f0r.f = f;
239 
240 		while (!feof(f)) {
241 			char *line = read_file0_line(&f0r);
242 
243 			if (line && *line) {
244 				++argc_from_file0;
245 				if (argc_from_file0 < 1)
246 					argc_from_file0 = 1;
247 				argv_from_file0 = sort_realloc(argv_from_file0,
248 				    argc_from_file0 * sizeof(char *));
249 				if (argv_from_file0 == NULL)
250 					err(2, NULL);
251 				argv_from_file0[argc_from_file0 - 1] =
252 				    sort_strdup(line);
253 			}
254 		}
255 		closefile(f, fn);
256 	}
257 }
258 
259 /*
260  * Check how much RAM is available for the sort.
261  */
262 static void
263 set_hw_params(void)
264 {
265 #if defined(SORT_THREADS)
266 	size_t ncpusz;
267 #endif
268 	size_t pages, psize, psz, pszsz;
269 
270 	pages = psize = 0;
271 #if defined(SORT_THREADS)
272 	ncpu = 1;
273 	ncpusz = sizeof(size_t);
274 #endif
275 	psz = pszsz = sizeof(size_t);
276 
277 	if (sysctlbyname("vm.stats.vm.v_free_count", &pages, &psz,
278 	    NULL, 0) < 0) {
279 		perror("vm.stats.vm.v_free_count");
280 		return;
281 	}
282 	if (sysctlbyname("vm.stats.vm.v_page_size", &psize, &pszsz,
283 	    NULL, 0) < 0) {
284 		perror("vm.stats.vm.v_page_size");
285 		return;
286 	}
287 #if defined(SORT_THREADS)
288 	if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz,
289 	    NULL, 0) < 0)
290 		ncpu = 1;
291 	else if(ncpu > 32)
292 		ncpu = 32;
293 
294 	nthreads = ncpu;
295 #endif
296 
297 	free_memory = (unsigned long long) pages * (unsigned long long) psize;
298 	available_free_memory = (free_memory * 9) / 10;
299 }
300 
301 /*
302  * Convert "plain" symbol to wide symbol, with default value.
303  */
304 static void
305 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
306 {
307 
308 	if (wc && c) {
309 		int res;
310 
311 		res = mbtowc(wc, c, MB_CUR_MAX);
312 		if (res < 1)
313 			*wc = def;
314 	}
315 }
316 
317 /*
318  * Set current locale symbols.
319  */
320 static void
321 set_locale(void)
322 {
323 	struct lconv *lc;
324 	const char *locale;
325 
326 	setlocale(LC_ALL, "");
327 
328 	lc = localeconv();
329 
330 	if (lc) {
331 		/* obtain LC_NUMERIC info */
332 		/* Convert to wide char form */
333 		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
334 		    symbol_decimal_point);
335 		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
336 		    symbol_thousands_sep);
337 		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
338 		    symbol_positive_sign);
339 		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
340 		    symbol_negative_sign);
341 	}
342 
343 	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
344 		gnusort_numeric_compatibility = true;
345 
346 	locale = setlocale(LC_COLLATE, NULL);
347 
348 	if (locale) {
349 		char *tmpl;
350 		const char *cclocale;
351 
352 		tmpl = sort_strdup(locale);
353 		cclocale = setlocale(LC_COLLATE, "C");
354 		if (cclocale && !strcmp(cclocale, tmpl))
355 			byte_sort = true;
356 		else {
357 			const char *pclocale;
358 
359 			pclocale = setlocale(LC_COLLATE, "POSIX");
360 			if (pclocale && !strcmp(pclocale, tmpl))
361 				byte_sort = true;
362 		}
363 		setlocale(LC_COLLATE, tmpl);
364 		sort_free(tmpl);
365 	}
366 }
367 
368 /*
369  * Set directory temporary files.
370  */
371 static void
372 set_tmpdir(void)
373 {
374 	char *td;
375 
376 	td = getenv("TMPDIR");
377 	if (td != NULL)
378 		tmpdir = sort_strdup(td);
379 }
380 
381 /*
382  * Parse -S option.
383  */
384 static unsigned long long
385 parse_memory_buffer_value(const char *value)
386 {
387 
388 	if (value == NULL)
389 		return (available_free_memory);
390 	else {
391 		char *endptr;
392 		unsigned long long membuf;
393 
394 		endptr = NULL;
395 		errno = 0;
396 		membuf = strtoll(value, &endptr, 10);
397 
398 		if (errno != 0) {
399 			warn("%s",getstr(4));
400 			membuf = available_free_memory;
401 		} else {
402 			switch (*endptr){
403 			case 'Y':
404 				membuf *= 1024;
405 				/* FALLTHROUGH */
406 			case 'Z':
407 				membuf *= 1024;
408 				/* FALLTHROUGH */
409 			case 'E':
410 				membuf *= 1024;
411 				/* FALLTHROUGH */
412 			case 'P':
413 				membuf *= 1024;
414 				/* FALLTHROUGH */
415 			case 'T':
416 				membuf *= 1024;
417 				/* FALLTHROUGH */
418 			case 'G':
419 				membuf *= 1024;
420 				/* FALLTHROUGH */
421 			case 'M':
422 				membuf *= 1024;
423 				/* FALLTHROUGH */
424 			case '\0':
425 			case 'K':
426 				membuf *= 1024;
427 				/* FALLTHROUGH */
428 			case 'b':
429 				break;
430 			case '%':
431 				membuf = (available_free_memory * membuf) /
432 				    100;
433 				break;
434 			default:
435 				fprintf(stderr, "%s: %s\n", strerror(EINVAL),
436 				   optarg);
437 				membuf = available_free_memory;
438 			}
439 		}
440 		return (membuf);
441 	}
442 }
443 
444 /*
445  * Signal handler that clears the temporary files.
446  */
447 static void
448 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
449     void *context __unused)
450 {
451 
452 	clear_tmp_files();
453 	exit(-1);
454 }
455 
456 /*
457  * Set signal handler on panic signals.
458  */
459 static void
460 set_signal_handler(void)
461 {
462 	struct sigaction sa;
463 
464 	memset(&sa, 0, sizeof(sa));
465 	sa.sa_sigaction = &sig_handler;
466 	sa.sa_flags = SA_SIGINFO;
467 
468 	if (sigaction(SIGTERM, &sa, NULL) < 0) {
469 		perror("sigaction");
470 		return;
471 	}
472 	if (sigaction(SIGHUP, &sa, NULL) < 0) {
473 		perror("sigaction");
474 		return;
475 	}
476 	if (sigaction(SIGINT, &sa, NULL) < 0) {
477 		perror("sigaction");
478 		return;
479 	}
480 	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
481 		perror("sigaction");
482 		return;
483 	}
484 	if (sigaction(SIGABRT, &sa, NULL) < 0) {
485 		perror("sigaction");
486 		return;
487 	}
488 	if (sigaction(SIGBUS, &sa, NULL) < 0) {
489 		perror("sigaction");
490 		return;
491 	}
492 	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
493 		perror("sigaction");
494 		return;
495 	}
496 	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
497 		perror("sigaction");
498 		return;
499 	}
500 	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
501 		perror("sigaction");
502 		return;
503 	}
504 }
505 
506 /*
507  * Print "unknown" message and exit with status 2.
508  */
509 static void
510 unknown(const char *what)
511 {
512 
513 	errx(2, "%s: %s", getstr(3), what);
514 }
515 
516 /*
517  * Check whether contradictory input options are used.
518  */
519 static void
520 check_mutually_exclusive_flags(char c, bool *mef_flags)
521 {
522 	int fo_index, mec;
523 	bool found_others, found_this;
524 
525 	found_others = found_this =false;
526 	fo_index = 0;
527 
528 	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
529 		mec = mutually_exclusive_flags[i];
530 
531 		if (mec != c) {
532 			if (mef_flags[i]) {
533 				if (found_this)
534 					errx(1, "%c:%c: %s", c, mec, getstr(1));
535 				found_others = true;
536 				fo_index = i;
537 			}
538 		} else {
539 			if (found_others)
540 				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
541 			mef_flags[i] = true;
542 			found_this = true;
543 		}
544 	}
545 }
546 
547 /*
548  * Initialise sort opts data.
549  */
550 static void
551 set_sort_opts(void)
552 {
553 
554 	memset(&default_sort_mods_object, 0,
555 	    sizeof(default_sort_mods_object));
556 	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
557 	default_sort_mods_object.func =
558 	    get_sort_func(&default_sort_mods_object);
559 }
560 
561 /*
562  * Set a sort modifier on a sort modifiers object.
563  */
564 static bool
565 set_sort_modifier(struct sort_mods *sm, int c)
566 {
567 
568 	if (sm) {
569 		switch (c){
570 		case 'b':
571 			sm->bflag = true;
572 			break;
573 		case 'd':
574 			sm->dflag = true;
575 			break;
576 		case 'f':
577 			sm->fflag = true;
578 			break;
579 		case 'g':
580 			sm->gflag = true;
581 			need_hint = true;
582 			break;
583 		case 'i':
584 			sm->iflag = true;
585 			break;
586 		case 'R':
587 			sm->Rflag = true;
588 			need_random = true;
589 			break;
590 		case 'M':
591 			initialise_months();
592 			sm->Mflag = true;
593 			need_hint = true;
594 			break;
595 		case 'n':
596 			sm->nflag = true;
597 			need_hint = true;
598 			print_symbols_on_debug = true;
599 			break;
600 		case 'r':
601 			sm->rflag = true;
602 			break;
603 		case 'V':
604 			sm->Vflag = true;
605 			break;
606 		case 'h':
607 			sm->hflag = true;
608 			need_hint = true;
609 			print_symbols_on_debug = true;
610 			break;
611 		default:
612 			return false;
613 		}
614 		sort_opts_vals.complex_sort = true;
615 		sm->func = get_sort_func(sm);
616 	}
617 	return (true);
618 }
619 
620 /*
621  * Parse POS in -k option.
622  */
623 static int
624 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
625 {
626 	regmatch_t pmatch[4];
627 	regex_t re;
628 	char *c, *f;
629 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
630 	size_t len, nmatch;
631 	int ret;
632 
633 	ret = -1;
634 	nmatch = 4;
635 	c = f = NULL;
636 
637 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
638 		return (-1);
639 
640 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
641 		goto end;
642 
643 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
644 		goto end;
645 
646 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
647 		goto end;
648 
649 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
650 	f = sort_malloc((len + 1) * sizeof(char));
651 
652 	strncpy(f, s + pmatch[1].rm_so, len);
653 	f[len] = '\0';
654 
655 	if (second) {
656 		errno = 0;
657 		ks->f2 = (size_t) strtoul(f, NULL, 10);
658 		if (errno != 0)
659 			errx(2, "%s: -k", strerror(errno));
660 		if (ks->f2 == 0) {
661 			warn("%s",getstr(5));
662 			goto end;
663 		}
664 	} else {
665 		errno = 0;
666 		ks->f1 = (size_t) strtoul(f, NULL, 10);
667 		if (errno != 0)
668 			errx(2, "%s: -k", strerror(errno));
669 		if (ks->f1 == 0) {
670 			warn("%s",getstr(5));
671 			goto end;
672 		}
673 	}
674 
675 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
676 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
677 		c = sort_malloc((len + 1) * sizeof(char));
678 
679 		strncpy(c, s + pmatch[2].rm_so + 1, len);
680 		c[len] = '\0';
681 
682 		if (second) {
683 			errno = 0;
684 			ks->c2 = (size_t) strtoul(c, NULL, 10);
685 			if (errno != 0)
686 				errx(2, "%s: -k", strerror(errno));
687 		} else {
688 			errno = 0;
689 			ks->c1 = (size_t) strtoul(c, NULL, 10);
690 			if (errno != 0)
691 				errx(2, "%s: -k", strerror(errno));
692 			if (ks->c1 == 0) {
693 				warn("%s",getstr(6));
694 				goto end;
695 			}
696 		}
697 	} else {
698 		if (second)
699 			ks->c2 = 0;
700 		else
701 			ks->c1 = 1;
702 	}
703 
704 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
705 		regoff_t i = 0;
706 
707 		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
708 			check_mutually_exclusive_flags(s[i], mef_flags);
709 			if (s[i] == 'b') {
710 				if (second)
711 					ks->pos2b = true;
712 				else
713 					ks->pos1b = true;
714 			} else if (!set_sort_modifier(&(ks->sm), s[i]))
715 				goto end;
716 		}
717 	}
718 
719 	ret = 0;
720 
721 end:
722 
723 	if (c)
724 		sort_free(c);
725 	if (f)
726 		sort_free(f);
727 	regfree(&re);
728 
729 	return (ret);
730 }
731 
732 /*
733  * Parse -k option value.
734  */
735 static int
736 parse_k(const char *s, struct key_specs *ks)
737 {
738 	int ret = -1;
739 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
740 	    { false, false, false, false, false, false };
741 
742 	if (s && *s) {
743 		char *sptr;
744 
745 		sptr = strchr(s, ',');
746 		if (sptr) {
747 			size_t size1;
748 			char *pos1, *pos2;
749 
750 			size1 = sptr - s;
751 
752 			if (size1 < 1)
753 				return (-1);
754 			pos1 = sort_malloc((size1 + 1) * sizeof(char));
755 
756 			strncpy(pos1, s, size1);
757 			pos1[size1] = '\0';
758 
759 			ret = parse_pos(pos1, ks, mef_flags, false);
760 
761 			sort_free(pos1);
762 			if (ret < 0)
763 				return (ret);
764 
765 			pos2 = sort_strdup(sptr + 1);
766 			ret = parse_pos(pos2, ks, mef_flags, true);
767 			sort_free(pos2);
768 		} else
769 			ret = parse_pos(s, ks, mef_flags, false);
770 	}
771 
772 	return (ret);
773 }
774 
775 /*
776  * Parse POS in +POS -POS option.
777  */
778 static int
779 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
780 {
781 	regex_t re;
782 	regmatch_t pmatch[4];
783 	char *c, *f;
784 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
785 	int ret;
786 	size_t len, nmatch;
787 
788 	ret = -1;
789 	nmatch = 4;
790 	c = f = NULL;
791 	*nc = *nf = 0;
792 
793 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
794 		return (-1);
795 
796 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
797 		goto end;
798 
799 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
800 		goto end;
801 
802 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
803 		goto end;
804 
805 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
806 	f = sort_malloc((len + 1) * sizeof(char));
807 
808 	strncpy(f, s + pmatch[1].rm_so, len);
809 	f[len] = '\0';
810 
811 	errno = 0;
812 	*nf = (size_t) strtoul(f, NULL, 10);
813 	if (errno != 0)
814 		errx(2, "%s", getstr(11));
815 
816 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
817 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
818 		c = sort_malloc((len + 1) * sizeof(char));
819 
820 		strncpy(c, s + pmatch[2].rm_so + 1, len);
821 		c[len] = '\0';
822 
823 		errno = 0;
824 		*nc = (size_t) strtoul(c, NULL, 10);
825 		if (errno != 0)
826 			errx(2, "%s", getstr(11));
827 	}
828 
829 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
830 
831 		len = pmatch[3].rm_eo - pmatch[3].rm_so;
832 
833 		strncpy(sopts, s + pmatch[3].rm_so, len);
834 		sopts[len] = '\0';
835 	}
836 
837 	ret = 0;
838 
839 end:
840 	if (c)
841 		sort_free(c);
842 	if (f)
843 		sort_free(f);
844 	regfree(&re);
845 
846 	return (ret);
847 }
848 
849 /*
850  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
851  */
852 void
853 fix_obsolete_keys(int *argc, char **argv)
854 {
855 	char sopt[129];
856 
857 	for (int i = 1; i < *argc; i++) {
858 		char *arg1;
859 
860 		arg1 = argv[i];
861 
862 		if (strlen(arg1) > 1 && arg1[0] == '+') {
863 			int c1, f1;
864 			char sopts1[128];
865 
866 			sopts1[0] = 0;
867 			c1 = f1 = 0;
868 
869 			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
870 				continue;
871 			else {
872 				f1 += 1;
873 				c1 += 1;
874 				if (i + 1 < *argc) {
875 					char *arg2 = argv[i + 1];
876 
877 					if (strlen(arg2) > 1 &&
878 					    arg2[0] == '-') {
879 						int c2, f2;
880 						char sopts2[128];
881 
882 						sopts2[0] = 0;
883 						c2 = f2 = 0;
884 
885 						if (parse_pos_obs(arg2 + 1,
886 						    &f2, &c2, sopts2) >= 0) {
887 							if (c2 > 0)
888 								f2 += 1;
889 							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
890 							    f1, c1, sopts1, f2, c2, sopts2);
891 							argv[i] = sort_strdup(sopt);
892 							for (int j = i + 1; j + 1 < *argc; j++)
893 								argv[j] = argv[j + 1];
894 							*argc -= 1;
895 							continue;
896 						}
897 					}
898 				}
899 				sprintf(sopt, "-k%d.%d", f1, c1);
900 				argv[i] = sort_strdup(sopt);
901 			}
902 		}
903 	}
904 }
905 
906 /*
907  * Set random seed
908  */
909 static void
910 set_random_seed(void)
911 {
912 	if (need_random) {
913 
914 		if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
915 			FILE* fseed;
916 			MD5_CTX ctx;
917 			char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
918 			size_t sz = 0;
919 
920 			fseed = openfile(random_source, "r");
921 			while (!feof(fseed)) {
922 				int cr;
923 
924 				cr = fgetc(fseed);
925 				if (cr == EOF)
926 					break;
927 
928 				rsd[sz++] = (char) cr;
929 
930 				if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
931 					break;
932 			}
933 
934 			closefile(fseed, random_source);
935 
936 			MD5Init(&ctx);
937 			MD5Update(&ctx, rsd, sz);
938 
939 			random_seed = MD5End(&ctx, NULL);
940 			random_seed_size = strlen(random_seed);
941 
942 		} else {
943 			MD5_CTX ctx;
944 			char *b;
945 
946 			MD5Init(&ctx);
947 			b = MD5File(random_source, NULL);
948 			if (b == NULL)
949 				err(2, NULL);
950 
951 			random_seed = b;
952 			random_seed_size = strlen(b);
953 		}
954 
955 		MD5Init(&md5_ctx);
956 		if(random_seed_size>0) {
957 			MD5Update(&md5_ctx, random_seed, random_seed_size);
958 		}
959 	}
960 }
961 
962 /*
963  * Main function.
964  */
965 int
966 main(int argc, char **argv)
967 {
968 	char *outfile, *real_outfile;
969 	int c, result;
970 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
971 	    { false, false, false, false, false, false };
972 
973 	result = 0;
974 	outfile = sort_strdup("-");
975 	real_outfile = NULL;
976 
977 	struct sort_mods *sm = &default_sort_mods_object;
978 
979 	init_tmp_files();
980 
981 	set_signal_handler();
982 
983 	set_hw_params();
984 	set_locale();
985 	set_tmpdir();
986 	set_sort_opts();
987 
988 #if 0
989 	{
990 		static int counter = 0;
991 		char fn[128];
992 		sprintf(fn, "/var/tmp/debug.sort.%d", counter++);
993 		FILE* f = fopen(fn, "w");
994 		fprintf(f, ">>sort>>");
995 		for (int i = 0; i < argc; i++) {
996 			fprintf(f, "<%s>", argv[i]);
997 		}
998 		fprintf(f, "<<sort<<\n");
999 		fclose(f);
1000 	}
1001 #endif
1002 
1003 	fix_obsolete_keys(&argc, argv);
1004 
1005 	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1006 	    != -1)) {
1007 
1008 		check_mutually_exclusive_flags(c, mef_flags);
1009 
1010 		if (!set_sort_modifier(sm, c)) {
1011 
1012 			switch (c) {
1013 			case 'c':
1014 				sort_opts_vals.cflag = true;
1015 				if (optarg) {
1016 					if (!strcmp(optarg, "diagnose-first"))
1017 						;
1018 					else if (!strcmp(optarg, "silent") ||
1019 					    !strcmp(optarg, "quiet"))
1020 						sort_opts_vals.csilentflag = true;
1021 					else if (*optarg)
1022 						unknown(optarg);
1023 				}
1024 				break;
1025 			case 'C':
1026 				sort_opts_vals.cflag = true;
1027 				sort_opts_vals.csilentflag = true;
1028 				break;
1029 			case 'k':
1030 			{
1031 				sort_opts_vals.complex_sort = true;
1032 				sort_opts_vals.kflag = true;
1033 
1034 				keys_num++;
1035 				keys = sort_realloc(keys, keys_num *
1036 				    sizeof(struct key_specs));
1037 				memset(&(keys[keys_num - 1]), 0,
1038 				    sizeof(struct key_specs));
1039 
1040 				if (parse_k(optarg, &(keys[keys_num - 1]))
1041 				    < 0) {
1042 					errx(2, "%s: -k %s\n",
1043 					    strerror(EINVAL), optarg);
1044 				}
1045 
1046 				break;
1047 			}
1048 			case 'm':
1049 				sort_opts_vals.mflag = true;
1050 				break;
1051 			case 'o':
1052 				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1053 				strcpy(outfile, optarg);
1054 				break;
1055 			case 's':
1056 				sort_opts_vals.sflag = true;
1057 				break;
1058 			case 'S':
1059 				available_free_memory =
1060 				    parse_memory_buffer_value(optarg);
1061 				break;
1062 			case 'T':
1063 				tmpdir = sort_strdup(optarg);
1064 				break;
1065 			case 't':
1066 				if (strlen(optarg) > 1) {
1067 					if (strcmp(optarg, "\\0")) {
1068 						errx(2, "%s: %s\n",
1069 						    strerror(EINVAL), optarg);
1070 					}
1071 					*optarg = 0;
1072 				}
1073 				sort_opts_vals.tflag = true;
1074 				sort_opts_vals.field_sep = btowc(optarg[0]);
1075 				if (sort_opts_vals.field_sep == WEOF) {
1076 					errno = EINVAL;
1077 					err(2, NULL);
1078 				}
1079 				if (!gnusort_numeric_compatibility) {
1080 					if (symbol_decimal_point == sort_opts_vals.field_sep)
1081 						symbol_decimal_point = WEOF;
1082 					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1083 						symbol_thousands_sep = WEOF;
1084 					if (symbol_negative_sign == sort_opts_vals.field_sep)
1085 						symbol_negative_sign = WEOF;
1086 					if (symbol_positive_sign == sort_opts_vals.field_sep)
1087 						symbol_positive_sign = WEOF;
1088 				}
1089 				break;
1090 			case 'u':
1091 				sort_opts_vals.uflag = true;
1092 				/* stable sort for the correct unique val */
1093 				sort_opts_vals.sflag = true;
1094 				break;
1095 			case 'z':
1096 				sort_opts_vals.zflag = true;
1097 				break;
1098 			case SORT_OPT:
1099 				if (optarg) {
1100 					if (!strcmp(optarg, "general-numeric"))
1101 						set_sort_modifier(sm, 'g');
1102 					else if (!strcmp(optarg, "human-numeric"))
1103 						set_sort_modifier(sm, 'h');
1104 					else if (!strcmp(optarg, "numeric"))
1105 						set_sort_modifier(sm, 'n');
1106 					else if (!strcmp(optarg, "month"))
1107 						set_sort_modifier(sm, 'M');
1108 					else if (!strcmp(optarg, "random"))
1109 						set_sort_modifier(sm, 'R');
1110 					else
1111 						unknown(optarg);
1112 				}
1113 				break;
1114 #if defined(SORT_THREADS)
1115 			case NTHREADS_OPT:
1116 				nthreads = (size_t)(atoi(optarg));
1117 				if (nthreads < 1)
1118 					nthreads = 1;
1119 				if (nthreads > 1024)
1120 					nthreads = 1024;
1121 				break;
1122 #endif
1123 			case QSORT_OPT:
1124 				sort_opts_vals.sort_method = SORT_QSORT;
1125 				break;
1126 			case MERGESORT_OPT:
1127 				sort_opts_vals.sort_method = SORT_MERGESORT;
1128 				break;
1129 			case HEAPSORT_OPT:
1130 				sort_opts_vals.sort_method = SORT_HEAPSORT;
1131 				break;
1132 			case RADIXSORT_OPT:
1133 				sort_opts_vals.sort_method = SORT_RADIXSORT;
1134 				break;
1135 			case RANDOMSOURCE_OPT:
1136 				random_source = strdup(optarg);
1137 				break;
1138 			case COMPRESSPROGRAM_OPT:
1139 				compress_program = strdup(optarg);
1140 				break;
1141 			case FF_OPT:
1142 				read_fns_from_file0(optarg);
1143 				break;
1144 			case BS_OPT:
1145 			{
1146 				errno = 0;
1147 				long mof = strtol(optarg, NULL, 10);
1148 				if (errno != 0)
1149 					errx(2, "--batch-size: %s",
1150 					    strerror(errno));
1151 				if (mof >= 2)
1152 					max_open_files = (size_t) mof + 1;
1153 			}
1154 				break;
1155 			case VERSION_OPT:
1156 				printf("%s\n", VERSION);
1157 				exit(EXIT_SUCCESS);
1158 				/* NOTREACHED */
1159 				break;
1160 			case DEBUG_OPT:
1161 				debug_sort = true;
1162 				break;
1163 			case HELP_OPT:
1164 				usage(false);
1165 				/* NOTREACHED */
1166 				break;
1167 			default:
1168 				usage(true);
1169 				/* NOTREACHED */
1170 			}
1171 		}
1172 	}
1173 
1174 	argc -= optind;
1175 	argv += optind;
1176 
1177 #ifndef WITHOUT_NLS
1178 	catalog = catopen("sort", NL_CAT_LOCALE);
1179 #endif
1180 
1181 	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1182 		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1183 
1184 #ifndef WITHOUT_NLS
1185 	catclose(catalog);
1186 #endif
1187 
1188 	if (keys_num == 0) {
1189 		keys_num = 1;
1190 		keys = sort_realloc(keys, sizeof(struct key_specs));
1191 		memset(&(keys[0]), 0, sizeof(struct key_specs));
1192 		keys[0].c1 = 1;
1193 		keys[0].pos1b = default_sort_mods->bflag;
1194 		keys[0].pos2b = default_sort_mods->bflag;
1195 		memcpy(&(keys[0].sm), default_sort_mods,
1196 		    sizeof(struct sort_mods));
1197 	}
1198 
1199 	for (size_t i = 0; i < keys_num; i++) {
1200 		struct key_specs *ks;
1201 
1202 		ks = &(keys[i]);
1203 
1204 		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1205 		    !(ks->pos2b)) {
1206 			ks->pos1b = sm->bflag;
1207 			ks->pos2b = sm->bflag;
1208 			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1209 		}
1210 
1211 		ks->sm.func = get_sort_func(&(ks->sm));
1212 	}
1213 
1214 	if (argc_from_file0 >= 0) {
1215 		argc = argc_from_file0;
1216 		argv = argv_from_file0;
1217 	}
1218 
1219 	if (debug_sort) {
1220 #if defined(SORT_THREADS)
1221 		nthreads = 1;
1222 #endif
1223 		printf("Using collate rules of %s locale\n",
1224 		    setlocale(LC_COLLATE, NULL));
1225 		if (byte_sort)
1226 			printf("Byte sort is used\n");
1227 		if (print_symbols_on_debug) {
1228 			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1229 			if (symbol_thousands_sep)
1230 				printf("Thousands separator: <%lc>\n",
1231 				    symbol_thousands_sep);
1232 			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1233 			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1234 		}
1235 	}
1236 
1237 	set_random_seed();
1238 
1239 	/* Case when the outfile equals one of the input files: */
1240 	if (strcmp(outfile, "-")) {
1241 
1242 		for(int i = 0; i < argc; ++i) {
1243 			if (strcmp(argv[i], outfile) == 0) {
1244 				real_outfile = sort_strdup(outfile);
1245 				for(;;) {
1246 					char* tmp = sort_malloc(strlen(outfile) +
1247 					    strlen(".tmp") + 1);
1248 
1249 					strcpy(tmp, outfile);
1250 					strcpy(tmp + strlen(tmp), ".tmp");
1251 					sort_free(outfile);
1252 					outfile = tmp;
1253 					if (access(outfile, F_OK) < 0)
1254 						break;
1255 				}
1256 				tmp_file_atexit(outfile);
1257 			}
1258 		}
1259 	}
1260 
1261 	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1262 		struct file_list fl;
1263 		struct sort_list list;
1264 
1265 		sort_list_init(&list);
1266 		file_list_init(&fl, true);
1267 
1268 		if (argc < 1)
1269 			procfile("-", &list, &fl);
1270 		else {
1271 			while (argc > 0) {
1272 				procfile(*argv, &list, &fl);
1273 				--argc;
1274 				++argv;
1275 			}
1276 		}
1277 
1278 		if (fl.count < 1)
1279 			sort_list_to_file(&list, outfile);
1280 		else {
1281 			if (list.count > 0) {
1282 				char *flast = new_tmp_file_name();
1283 
1284 				sort_list_to_file(&list, flast);
1285 				file_list_add(&fl, flast, false);
1286 			}
1287 			merge_files(&fl, outfile);
1288 		}
1289 
1290 		file_list_clean(&fl);
1291 
1292 		/*
1293 		 * We are about to exit the program, so we can ignore
1294 		 * the clean-up for speed
1295 		 *
1296 		 * sort_list_clean(&list);
1297 		 */
1298 
1299 	} else if (sort_opts_vals.cflag) {
1300 		result = (argc == 0) ? (check("-")) : (check(*argv));
1301 	} else if (sort_opts_vals.mflag) {
1302 		struct file_list fl;
1303 
1304 		file_list_init(&fl, false);
1305 		file_list_populate(&fl, argc, argv, true);
1306 		merge_files(&fl, outfile);
1307 		file_list_clean(&fl);
1308 	}
1309 
1310 	if (real_outfile) {
1311 		unlink(real_outfile);
1312 		if (rename(outfile, real_outfile) < 0)
1313 			err(2, NULL);
1314 		sort_free(real_outfile);
1315 	}
1316 
1317 	sort_free(outfile);
1318 
1319 	return (result);
1320 }
1321