xref: /freebsd/usr.bin/sort/sort.c (revision 7d91d6b83e74edf278dde375e6049aca833cbebd)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/stat.h>
34 #include <sys/sysctl.h>
35 #include <sys/types.h>
36 
37 #include <err.h>
38 #include <errno.h>
39 #include <fcntl.h>
40 #include <getopt.h>
41 #include <limits.h>
42 #include <locale.h>
43 #include <md5.h>
44 #include <regex.h>
45 #include <signal.h>
46 #include <stdbool.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <wchar.h>
52 #include <wctype.h>
53 
54 #include "coll.h"
55 #include "file.h"
56 #include "sort.h"
57 
58 #ifndef WITHOUT_NLS
59 #include <nl_types.h>
60 nl_catd catalog = (nl_catd)-1;
61 #endif
62 
63 #define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
64 
65 static bool need_random;
66 
67 MD5_CTX md5_ctx;
68 
69 /*
70  * Default messages to use when NLS is disabled or no catalogue
71  * is found.
72  */
73 const char *nlsstr[] = { "",
74 /* 1*/"mutually exclusive flags",
75 /* 2*/"extra argument not allowed with -c",
76 /* 3*/"Unknown feature",
77 /* 4*/"Wrong memory buffer specification",
78 /* 5*/"0 field in key specs",
79 /* 6*/"0 column in key specs",
80 /* 7*/"Wrong file mode",
81 /* 8*/"Cannot open file for reading",
82 /* 9*/"Radix sort cannot be used with these sort options",
83 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
84 /*11*/"Invalid key position",
85 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
86       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
87       "[-o outfile] [--batch-size size] [--files0-from file] "
88       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
89       "[--mmap] "
90 #if defined(SORT_THREADS)
91       "[--parallel thread_no] "
92 #endif
93       "[--human-numeric-sort] "
94       "[--version-sort] [--random-sort [--random-source file]] "
95       "[--compress-program program] [file ...]\n" };
96 
97 struct sort_opts sort_opts_vals;
98 
99 bool debug_sort;
100 bool need_hint;
101 
102 size_t mb_cur_max;
103 
104 #if defined(SORT_THREADS)
105 unsigned int ncpu = 1;
106 size_t nthreads = 1;
107 #endif
108 
109 static bool gnusort_numeric_compatibility;
110 
111 static struct sort_mods default_sort_mods_object;
112 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
113 
114 static bool print_symbols_on_debug;
115 
116 /*
117  * Arguments from file (when file0-from option is used:
118  */
119 static size_t argc_from_file0 = (size_t)-1;
120 static char **argv_from_file0;
121 
122 /*
123  * Placeholder symbols for options which have no single-character equivalent
124  */
125 enum
126 {
127 	SORT_OPT = CHAR_MAX + 1,
128 	HELP_OPT,
129 	FF_OPT,
130 	BS_OPT,
131 	VERSION_OPT,
132 	DEBUG_OPT,
133 #if defined(SORT_THREADS)
134 	PARALLEL_OPT,
135 #endif
136 	RANDOMSOURCE_OPT,
137 	COMPRESSPROGRAM_OPT,
138 	QSORT_OPT,
139 	MERGESORT_OPT,
140 	HEAPSORT_OPT,
141 	RADIXSORT_OPT,
142 	MMAP_OPT
143 };
144 
145 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
146 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
147 
148 static struct option long_options[] = {
149 				{ "batch-size", required_argument, NULL, BS_OPT },
150 				{ "buffer-size", required_argument, NULL, 'S' },
151 				{ "check", optional_argument, NULL, 'c' },
152 				{ "check=silent|quiet", optional_argument, NULL, 'C' },
153 				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
154 				{ "debug", no_argument, NULL, DEBUG_OPT },
155 				{ "dictionary-order", no_argument, NULL, 'd' },
156 				{ "field-separator", required_argument, NULL, 't' },
157 				{ "files0-from", required_argument, NULL, FF_OPT },
158 				{ "general-numeric-sort", no_argument, NULL, 'g' },
159 				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
160 				{ "help",no_argument, NULL, HELP_OPT },
161 				{ "human-numeric-sort", no_argument, NULL, 'h' },
162 				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
163 				{ "ignore-case", no_argument, NULL, 'f' },
164 				{ "ignore-nonprinting", no_argument, NULL, 'i' },
165 				{ "key", required_argument, NULL, 'k' },
166 				{ "merge", no_argument, NULL, 'm' },
167 				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
168 				{ "mmap", no_argument, NULL, MMAP_OPT },
169 				{ "month-sort", no_argument, NULL, 'M' },
170 				{ "numeric-sort", no_argument, NULL, 'n' },
171 				{ "output", required_argument, NULL, 'o' },
172 #if defined(SORT_THREADS)
173 				{ "parallel", required_argument, NULL, PARALLEL_OPT },
174 #endif
175 				{ "qsort", no_argument, NULL, QSORT_OPT },
176 				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
177 				{ "random-sort", no_argument, NULL, 'R' },
178 				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
179 				{ "reverse", no_argument, NULL, 'r' },
180 				{ "sort", required_argument, NULL, SORT_OPT },
181 				{ "stable", no_argument, NULL, 's' },
182 				{ "temporary-directory",required_argument, NULL, 'T' },
183 				{ "unique", no_argument, NULL, 'u' },
184 				{ "version", no_argument, NULL, VERSION_OPT },
185 				{ "version-sort",no_argument, NULL, 'V' },
186 				{ "zero-terminated", no_argument, NULL, 'z' },
187 				{ NULL, no_argument, NULL, 0 }
188 };
189 
190 void fix_obsolete_keys(int *argc, char **argv);
191 
192 /*
193  * Check where sort modifier is present
194  */
195 static bool
196 sort_modifier_empty(struct sort_mods *sm)
197 {
198 
199 	if (sm == NULL)
200 		return (true);
201 	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
202 	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
203 }
204 
205 /*
206  * Print out usage text.
207  */
208 static void
209 usage(bool opt_err)
210 {
211 	FILE *out;
212 
213 	out = opt_err ? stderr : stdout;
214 
215 	fprintf(out, getstr(12), getprogname());
216 	if (opt_err)
217 		exit(2);
218 	exit(0);
219 }
220 
221 /*
222  * Read input file names from a file (file0-from option).
223  */
224 static void
225 read_fns_from_file0(const char *fn)
226 {
227 	FILE *f;
228 	char *line = NULL;
229 	size_t linesize = 0;
230 	ssize_t linelen;
231 
232 	if (fn == NULL)
233 		return;
234 
235 	f = fopen(fn, "r");
236 	if (f == NULL)
237 		err(2, "%s", fn);
238 
239 	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
240 		if (*line != '\0') {
241 			if (argc_from_file0 == (size_t) - 1)
242 				argc_from_file0 = 0;
243 			++argc_from_file0;
244 			argv_from_file0 = sort_realloc(argv_from_file0,
245 			    argc_from_file0 * sizeof(char *));
246 			if (argv_from_file0 == NULL)
247 				err(2, NULL);
248 			argv_from_file0[argc_from_file0 - 1] = line;
249 		} else {
250 			free(line);
251 		}
252 		line = NULL;
253 		linesize = 0;
254 	}
255 	if (ferror(f))
256 		err(2, "%s: getdelim", fn);
257 
258 	closefile(f, fn);
259 }
260 
261 /*
262  * Check how much RAM is available for the sort.
263  */
264 static void
265 set_hw_params(void)
266 {
267 	long pages, psize;
268 
269 #if defined(SORT_THREADS)
270 	ncpu = 1;
271 #endif
272 
273 	pages = sysconf(_SC_PHYS_PAGES);
274 	if (pages < 1) {
275 		perror("sysconf pages");
276 		pages = 1;
277 	}
278 	psize = sysconf(_SC_PAGESIZE);
279 	if (psize < 1) {
280 		perror("sysconf psize");
281 		psize = 4096;
282 	}
283 #if defined(SORT_THREADS)
284 	ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
285 	if (ncpu < 1)
286 		ncpu = 1;
287 	else if(ncpu > 32)
288 		ncpu = 32;
289 
290 	nthreads = ncpu;
291 #endif
292 
293 	free_memory = (unsigned long long) pages * (unsigned long long) psize;
294 	available_free_memory = free_memory / 2;
295 
296 	if (available_free_memory < 1024)
297 		available_free_memory = 1024;
298 }
299 
300 /*
301  * Convert "plain" symbol to wide symbol, with default value.
302  */
303 static void
304 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
305 {
306 
307 	if (wc && c) {
308 		int res;
309 
310 		res = mbtowc(wc, c, mb_cur_max);
311 		if (res < 1)
312 			*wc = def;
313 	}
314 }
315 
316 /*
317  * Set current locale symbols.
318  */
319 static void
320 set_locale(void)
321 {
322 	struct lconv *lc;
323 	const char *locale;
324 
325 	setlocale(LC_ALL, "");
326 
327 	mb_cur_max = MB_CUR_MAX;
328 
329 	lc = localeconv();
330 
331 	if (lc) {
332 		/* obtain LC_NUMERIC info */
333 		/* Convert to wide char form */
334 		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
335 		    symbol_decimal_point);
336 		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
337 		    symbol_thousands_sep);
338 		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
339 		    symbol_positive_sign);
340 		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
341 		    symbol_negative_sign);
342 	}
343 
344 	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
345 		gnusort_numeric_compatibility = true;
346 
347 	locale = setlocale(LC_COLLATE, NULL);
348 
349 	if (locale) {
350 		char *tmpl;
351 		const char *cclocale;
352 
353 		tmpl = sort_strdup(locale);
354 		cclocale = setlocale(LC_COLLATE, "C");
355 		if (cclocale && !strcmp(cclocale, tmpl))
356 			byte_sort = true;
357 		else {
358 			const char *pclocale;
359 
360 			pclocale = setlocale(LC_COLLATE, "POSIX");
361 			if (pclocale && !strcmp(pclocale, tmpl))
362 				byte_sort = true;
363 		}
364 		setlocale(LC_COLLATE, tmpl);
365 		sort_free(tmpl);
366 	}
367 }
368 
369 /*
370  * Set directory temporary files.
371  */
372 static void
373 set_tmpdir(void)
374 {
375 	char *td;
376 
377 	td = getenv("TMPDIR");
378 	if (td != NULL)
379 		tmpdir = sort_strdup(td);
380 }
381 
382 /*
383  * Parse -S option.
384  */
385 static unsigned long long
386 parse_memory_buffer_value(const char *value)
387 {
388 
389 	if (value == NULL)
390 		return (available_free_memory);
391 	else {
392 		char *endptr;
393 		unsigned long long membuf;
394 
395 		endptr = NULL;
396 		errno = 0;
397 		membuf = strtoll(value, &endptr, 10);
398 
399 		if (errno != 0) {
400 			warn("%s",getstr(4));
401 			membuf = available_free_memory;
402 		} else {
403 			switch (*endptr){
404 			case 'Y':
405 				membuf *= 1024;
406 				/* FALLTHROUGH */
407 			case 'Z':
408 				membuf *= 1024;
409 				/* FALLTHROUGH */
410 			case 'E':
411 				membuf *= 1024;
412 				/* FALLTHROUGH */
413 			case 'P':
414 				membuf *= 1024;
415 				/* FALLTHROUGH */
416 			case 'T':
417 				membuf *= 1024;
418 				/* FALLTHROUGH */
419 			case 'G':
420 				membuf *= 1024;
421 				/* FALLTHROUGH */
422 			case 'M':
423 				membuf *= 1024;
424 				/* FALLTHROUGH */
425 			case '\0':
426 			case 'K':
427 				membuf *= 1024;
428 				/* FALLTHROUGH */
429 			case 'b':
430 				break;
431 			case '%':
432 				membuf = (available_free_memory * membuf) /
433 				    100;
434 				break;
435 			default:
436 				warnc(EINVAL, "%s", optarg);
437 				membuf = available_free_memory;
438 			}
439 		}
440 		return (membuf);
441 	}
442 }
443 
444 /*
445  * Signal handler that clears the temporary files.
446  */
447 static void
448 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
449     void *context __unused)
450 {
451 
452 	clear_tmp_files();
453 	exit(-1);
454 }
455 
456 /*
457  * Set signal handler on panic signals.
458  */
459 static void
460 set_signal_handler(void)
461 {
462 	struct sigaction sa;
463 
464 	memset(&sa, 0, sizeof(sa));
465 	sa.sa_sigaction = &sig_handler;
466 	sa.sa_flags = SA_SIGINFO;
467 
468 	if (sigaction(SIGTERM, &sa, NULL) < 0) {
469 		perror("sigaction");
470 		return;
471 	}
472 	if (sigaction(SIGHUP, &sa, NULL) < 0) {
473 		perror("sigaction");
474 		return;
475 	}
476 	if (sigaction(SIGINT, &sa, NULL) < 0) {
477 		perror("sigaction");
478 		return;
479 	}
480 	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
481 		perror("sigaction");
482 		return;
483 	}
484 	if (sigaction(SIGABRT, &sa, NULL) < 0) {
485 		perror("sigaction");
486 		return;
487 	}
488 	if (sigaction(SIGBUS, &sa, NULL) < 0) {
489 		perror("sigaction");
490 		return;
491 	}
492 	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
493 		perror("sigaction");
494 		return;
495 	}
496 	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
497 		perror("sigaction");
498 		return;
499 	}
500 	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
501 		perror("sigaction");
502 		return;
503 	}
504 }
505 
506 /*
507  * Print "unknown" message and exit with status 2.
508  */
509 static void
510 unknown(const char *what)
511 {
512 
513 	errx(2, "%s: %s", getstr(3), what);
514 }
515 
516 /*
517  * Check whether contradictory input options are used.
518  */
519 static void
520 check_mutually_exclusive_flags(char c, bool *mef_flags)
521 {
522 	int fo_index, mec;
523 	bool found_others, found_this;
524 
525 	found_others = found_this = false;
526 	fo_index = 0;
527 
528 	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
529 		mec = mutually_exclusive_flags[i];
530 
531 		if (mec != c) {
532 			if (mef_flags[i]) {
533 				if (found_this)
534 					errx(1, "%c:%c: %s", c, mec, getstr(1));
535 				found_others = true;
536 				fo_index = i;
537 			}
538 		} else {
539 			if (found_others)
540 				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
541 			mef_flags[i] = true;
542 			found_this = true;
543 		}
544 	}
545 }
546 
547 /*
548  * Initialise sort opts data.
549  */
550 static void
551 set_sort_opts(void)
552 {
553 
554 	memset(&default_sort_mods_object, 0,
555 	    sizeof(default_sort_mods_object));
556 	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
557 	default_sort_mods_object.func =
558 	    get_sort_func(&default_sort_mods_object);
559 }
560 
561 /*
562  * Set a sort modifier on a sort modifiers object.
563  */
564 static bool
565 set_sort_modifier(struct sort_mods *sm, int c)
566 {
567 
568 	if (sm == NULL)
569 		return (true);
570 
571 	switch (c){
572 	case 'b':
573 		sm->bflag = true;
574 		break;
575 	case 'd':
576 		sm->dflag = true;
577 		break;
578 	case 'f':
579 		sm->fflag = true;
580 		break;
581 	case 'g':
582 		sm->gflag = true;
583 		need_hint = true;
584 		break;
585 	case 'i':
586 		sm->iflag = true;
587 		break;
588 	case 'R':
589 		sm->Rflag = true;
590 		need_hint = true;
591 		need_random = true;
592 		break;
593 	case 'M':
594 		initialise_months();
595 		sm->Mflag = true;
596 		need_hint = true;
597 		break;
598 	case 'n':
599 		sm->nflag = true;
600 		need_hint = true;
601 		print_symbols_on_debug = true;
602 		break;
603 	case 'r':
604 		sm->rflag = true;
605 		break;
606 	case 'V':
607 		sm->Vflag = true;
608 		break;
609 	case 'h':
610 		sm->hflag = true;
611 		need_hint = true;
612 		print_symbols_on_debug = true;
613 		break;
614 	default:
615 		return (false);
616 	}
617 
618 	sort_opts_vals.complex_sort = true;
619 	sm->func = get_sort_func(sm);
620 	return (true);
621 }
622 
623 /*
624  * Parse POS in -k option.
625  */
626 static int
627 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
628 {
629 	regmatch_t pmatch[4];
630 	regex_t re;
631 	char *c, *f;
632 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
633 	size_t len, nmatch;
634 	int ret;
635 
636 	ret = -1;
637 	nmatch = 4;
638 	c = f = NULL;
639 
640 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
641 		return (-1);
642 
643 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
644 		goto end;
645 
646 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
647 		goto end;
648 
649 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
650 		goto end;
651 
652 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
653 	f = sort_malloc((len + 1) * sizeof(char));
654 
655 	strncpy(f, s + pmatch[1].rm_so, len);
656 	f[len] = '\0';
657 
658 	if (second) {
659 		errno = 0;
660 		ks->f2 = (size_t) strtoul(f, NULL, 10);
661 		if (errno != 0)
662 			err(2, "-k");
663 		if (ks->f2 == 0) {
664 			warn("%s",getstr(5));
665 			goto end;
666 		}
667 	} else {
668 		errno = 0;
669 		ks->f1 = (size_t) strtoul(f, NULL, 10);
670 		if (errno != 0)
671 			err(2, "-k");
672 		if (ks->f1 == 0) {
673 			warn("%s",getstr(5));
674 			goto end;
675 		}
676 	}
677 
678 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
679 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
680 		c = sort_malloc((len + 1) * sizeof(char));
681 
682 		strncpy(c, s + pmatch[2].rm_so + 1, len);
683 		c[len] = '\0';
684 
685 		if (second) {
686 			errno = 0;
687 			ks->c2 = (size_t) strtoul(c, NULL, 10);
688 			if (errno != 0)
689 				err(2, "-k");
690 		} else {
691 			errno = 0;
692 			ks->c1 = (size_t) strtoul(c, NULL, 10);
693 			if (errno != 0)
694 				err(2, "-k");
695 			if (ks->c1 == 0) {
696 				warn("%s",getstr(6));
697 				goto end;
698 			}
699 		}
700 	} else {
701 		if (second)
702 			ks->c2 = 0;
703 		else
704 			ks->c1 = 1;
705 	}
706 
707 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
708 		regoff_t i = 0;
709 
710 		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
711 			check_mutually_exclusive_flags(s[i], mef_flags);
712 			if (s[i] == 'b') {
713 				if (second)
714 					ks->pos2b = true;
715 				else
716 					ks->pos1b = true;
717 			} else if (!set_sort_modifier(&(ks->sm), s[i]))
718 				goto end;
719 		}
720 	}
721 
722 	ret = 0;
723 
724 end:
725 
726 	if (c)
727 		sort_free(c);
728 	if (f)
729 		sort_free(f);
730 	regfree(&re);
731 
732 	return (ret);
733 }
734 
735 /*
736  * Parse -k option value.
737  */
738 static int
739 parse_k(const char *s, struct key_specs *ks)
740 {
741 	int ret = -1;
742 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
743 	    { false, false, false, false, false, false };
744 
745 	if (s && *s) {
746 		char *sptr;
747 
748 		sptr = strchr(s, ',');
749 		if (sptr) {
750 			size_t size1;
751 			char *pos1, *pos2;
752 
753 			size1 = sptr - s;
754 
755 			if (size1 < 1)
756 				return (-1);
757 			pos1 = sort_malloc((size1 + 1) * sizeof(char));
758 
759 			strncpy(pos1, s, size1);
760 			pos1[size1] = '\0';
761 
762 			ret = parse_pos(pos1, ks, mef_flags, false);
763 
764 			sort_free(pos1);
765 			if (ret < 0)
766 				return (ret);
767 
768 			pos2 = sort_strdup(sptr + 1);
769 			ret = parse_pos(pos2, ks, mef_flags, true);
770 			sort_free(pos2);
771 		} else
772 			ret = parse_pos(s, ks, mef_flags, false);
773 	}
774 
775 	return (ret);
776 }
777 
778 /*
779  * Parse POS in +POS -POS option.
780  */
781 static int
782 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
783 {
784 	regex_t re;
785 	regmatch_t pmatch[4];
786 	char *c, *f;
787 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
788 	int ret;
789 	size_t len, nmatch;
790 
791 	ret = -1;
792 	nmatch = 4;
793 	c = f = NULL;
794 	*nc = *nf = 0;
795 
796 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
797 		return (-1);
798 
799 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
800 		goto end;
801 
802 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
803 		goto end;
804 
805 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
806 		goto end;
807 
808 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
809 	f = sort_malloc((len + 1) * sizeof(char));
810 
811 	strncpy(f, s + pmatch[1].rm_so, len);
812 	f[len] = '\0';
813 
814 	errno = 0;
815 	*nf = (size_t) strtoul(f, NULL, 10);
816 	if (errno != 0)
817 		errx(2, "%s", getstr(11));
818 
819 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
820 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
821 		c = sort_malloc((len + 1) * sizeof(char));
822 
823 		strncpy(c, s + pmatch[2].rm_so + 1, len);
824 		c[len] = '\0';
825 
826 		errno = 0;
827 		*nc = (size_t) strtoul(c, NULL, 10);
828 		if (errno != 0)
829 			errx(2, "%s", getstr(11));
830 	}
831 
832 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
833 
834 		len = pmatch[3].rm_eo - pmatch[3].rm_so;
835 
836 		strncpy(sopts, s + pmatch[3].rm_so, len);
837 		sopts[len] = '\0';
838 	}
839 
840 	ret = 0;
841 
842 end:
843 	if (c)
844 		sort_free(c);
845 	if (f)
846 		sort_free(f);
847 	regfree(&re);
848 
849 	return (ret);
850 }
851 
852 /*
853  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
854  */
855 void
856 fix_obsolete_keys(int *argc, char **argv)
857 {
858 	char sopt[129];
859 
860 	for (int i = 1; i < *argc; i++) {
861 		char *arg1;
862 
863 		arg1 = argv[i];
864 
865 		if (strcmp(arg1, "--") == 0) {
866 			/* Following arguments are treated as filenames. */
867 			break;
868 		}
869 
870 		if (strlen(arg1) > 1 && arg1[0] == '+') {
871 			int c1, f1;
872 			char sopts1[128];
873 
874 			sopts1[0] = 0;
875 			c1 = f1 = 0;
876 
877 			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
878 				continue;
879 			else {
880 				f1 += 1;
881 				c1 += 1;
882 				if (i + 1 < *argc) {
883 					char *arg2 = argv[i + 1];
884 
885 					if (strlen(arg2) > 1 &&
886 					    arg2[0] == '-') {
887 						int c2, f2;
888 						char sopts2[128];
889 
890 						sopts2[0] = 0;
891 						c2 = f2 = 0;
892 
893 						if (parse_pos_obs(arg2 + 1,
894 						    &f2, &c2, sopts2) >= 0) {
895 							if (c2 > 0)
896 								f2 += 1;
897 							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
898 							    f1, c1, sopts1, f2, c2, sopts2);
899 							argv[i] = sort_strdup(sopt);
900 							for (int j = i + 1; j + 1 < *argc; j++)
901 								argv[j] = argv[j + 1];
902 							*argc -= 1;
903 							continue;
904 						}
905 					}
906 				}
907 				sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
908 				argv[i] = sort_strdup(sopt);
909 			}
910 		}
911 	}
912 }
913 
914 /*
915  * Seed random sort
916  */
917 static void
918 get_random_seed(const char *random_source)
919 {
920 	char randseed[32];
921 	struct stat fsb, rsb;
922 	ssize_t rd;
923 	int rsfd;
924 
925 	rsfd = -1;
926 	rd = sizeof(randseed);
927 
928 	if (random_source == NULL) {
929 		if (getentropy(randseed, sizeof(randseed)) < 0)
930 			err(EX_SOFTWARE, "getentropy");
931 		goto out;
932 	}
933 
934 	rsfd = open(random_source, O_RDONLY | O_CLOEXEC);
935 	if (rsfd < 0)
936 		err(EX_NOINPUT, "open: %s", random_source);
937 
938 	if (fstat(rsfd, &fsb) != 0)
939 		err(EX_SOFTWARE, "fstat");
940 
941 	if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode))
942 		err(EX_USAGE,
943 		    "random seed isn't a regular file or /dev/random");
944 
945 	/*
946 	 * Regular files: read up to maximum seed size and explicitly
947 	 * reject longer files.
948 	 */
949 	if (S_ISREG(fsb.st_mode)) {
950 		if (fsb.st_size > (off_t)sizeof(randseed))
951 			errx(EX_USAGE, "random seed is too large (%jd >"
952 			    " %zu)!", (intmax_t)fsb.st_size,
953 			    sizeof(randseed));
954 		else if (fsb.st_size < 1)
955 			errx(EX_USAGE, "random seed is too small ("
956 			    "0 bytes)");
957 
958 		memset(randseed, 0, sizeof(randseed));
959 
960 		rd = read(rsfd, randseed, fsb.st_size);
961 		if (rd < 0)
962 			err(EX_SOFTWARE, "reading random seed file %s",
963 			    random_source);
964 		if (rd < (ssize_t)fsb.st_size)
965 			errx(EX_SOFTWARE, "short read from %s", random_source);
966 	} else if (S_ISCHR(fsb.st_mode)) {
967 		if (stat("/dev/random", &rsb) < 0)
968 			err(EX_SOFTWARE, "stat");
969 
970 		if (fsb.st_dev != rsb.st_dev ||
971 		    fsb.st_ino != rsb.st_ino)
972 			errx(EX_USAGE, "random seed is a character "
973 			    "device other than /dev/random");
974 
975 		if (getentropy(randseed, sizeof(randseed)) < 0)
976 			err(EX_SOFTWARE, "getentropy");
977 	}
978 
979 out:
980 	if (rsfd >= 0)
981 		close(rsfd);
982 
983 	MD5Init(&md5_ctx);
984 	MD5Update(&md5_ctx, randseed, rd);
985 }
986 
987 /*
988  * Main function.
989  */
990 int
991 main(int argc, char **argv)
992 {
993 	char *outfile, *real_outfile;
994 	char *random_source = NULL;
995 	int c, result;
996 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
997 	    { false, false, false, false, false, false };
998 
999 	result = 0;
1000 	outfile = sort_strdup("-");
1001 	real_outfile = NULL;
1002 
1003 	struct sort_mods *sm = &default_sort_mods_object;
1004 
1005 	init_tmp_files();
1006 
1007 	set_signal_handler();
1008 
1009 	set_hw_params();
1010 	set_locale();
1011 	set_tmpdir();
1012 	set_sort_opts();
1013 
1014 #ifndef WITHOUT_NLS
1015 	catalog = catopen("sort", NL_CAT_LOCALE);
1016 #endif
1017 
1018 	fix_obsolete_keys(&argc, argv);
1019 
1020 	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1021 	    != -1)) {
1022 
1023 		check_mutually_exclusive_flags(c, mef_flags);
1024 
1025 		if (!set_sort_modifier(sm, c)) {
1026 
1027 			switch (c) {
1028 			case 'c':
1029 				sort_opts_vals.cflag = true;
1030 				if (optarg) {
1031 					if (!strcmp(optarg, "diagnose-first"))
1032 						;
1033 					else if (!strcmp(optarg, "silent") ||
1034 					    !strcmp(optarg, "quiet"))
1035 						sort_opts_vals.csilentflag = true;
1036 					else if (*optarg)
1037 						unknown(optarg);
1038 				}
1039 				break;
1040 			case 'C':
1041 				sort_opts_vals.cflag = true;
1042 				sort_opts_vals.csilentflag = true;
1043 				break;
1044 			case 'k':
1045 			{
1046 				sort_opts_vals.complex_sort = true;
1047 				sort_opts_vals.kflag = true;
1048 
1049 				keys_num++;
1050 				keys = sort_realloc(keys, keys_num *
1051 				    sizeof(struct key_specs));
1052 				memset(&(keys[keys_num - 1]), 0,
1053 				    sizeof(struct key_specs));
1054 
1055 				if (parse_k(optarg, &(keys[keys_num - 1]))
1056 				    < 0) {
1057 					errc(2, EINVAL, "-k %s", optarg);
1058 				}
1059 
1060 				break;
1061 			}
1062 			case 'm':
1063 				sort_opts_vals.mflag = true;
1064 				break;
1065 			case 'o':
1066 				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1067 				strcpy(outfile, optarg);
1068 				break;
1069 			case 's':
1070 				sort_opts_vals.sflag = true;
1071 				break;
1072 			case 'S':
1073 				available_free_memory =
1074 				    parse_memory_buffer_value(optarg);
1075 				break;
1076 			case 'T':
1077 				tmpdir = sort_strdup(optarg);
1078 				break;
1079 			case 't':
1080 				while (strlen(optarg) > 1) {
1081 					if (optarg[0] != '\\') {
1082 						errc(2, EINVAL, "%s", optarg);
1083 					}
1084 					optarg += 1;
1085 					if (*optarg == '0') {
1086 						*optarg = 0;
1087 						break;
1088 					}
1089 				}
1090 				sort_opts_vals.tflag = true;
1091 				sort_opts_vals.field_sep = btowc(optarg[0]);
1092 				if (sort_opts_vals.field_sep == WEOF) {
1093 					errno = EINVAL;
1094 					err(2, NULL);
1095 				}
1096 				if (!gnusort_numeric_compatibility) {
1097 					if (symbol_decimal_point == sort_opts_vals.field_sep)
1098 						symbol_decimal_point = WEOF;
1099 					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1100 						symbol_thousands_sep = WEOF;
1101 					if (symbol_negative_sign == sort_opts_vals.field_sep)
1102 						symbol_negative_sign = WEOF;
1103 					if (symbol_positive_sign == sort_opts_vals.field_sep)
1104 						symbol_positive_sign = WEOF;
1105 				}
1106 				break;
1107 			case 'u':
1108 				sort_opts_vals.uflag = true;
1109 				/* stable sort for the correct unique val */
1110 				sort_opts_vals.sflag = true;
1111 				break;
1112 			case 'z':
1113 				sort_opts_vals.zflag = true;
1114 				break;
1115 			case SORT_OPT:
1116 				if (optarg) {
1117 					if (!strcmp(optarg, "general-numeric"))
1118 						set_sort_modifier(sm, 'g');
1119 					else if (!strcmp(optarg, "human-numeric"))
1120 						set_sort_modifier(sm, 'h');
1121 					else if (!strcmp(optarg, "numeric"))
1122 						set_sort_modifier(sm, 'n');
1123 					else if (!strcmp(optarg, "month"))
1124 						set_sort_modifier(sm, 'M');
1125 					else if (!strcmp(optarg, "random"))
1126 						set_sort_modifier(sm, 'R');
1127 					else
1128 						unknown(optarg);
1129 				}
1130 				break;
1131 #if defined(SORT_THREADS)
1132 			case PARALLEL_OPT:
1133 				nthreads = (size_t)(atoi(optarg));
1134 				if (nthreads < 1)
1135 					nthreads = 1;
1136 				if (nthreads > 1024)
1137 					nthreads = 1024;
1138 				break;
1139 #endif
1140 			case QSORT_OPT:
1141 				sort_opts_vals.sort_method = SORT_QSORT;
1142 				break;
1143 			case MERGESORT_OPT:
1144 				sort_opts_vals.sort_method = SORT_MERGESORT;
1145 				break;
1146 			case MMAP_OPT:
1147 				use_mmap = true;
1148 				break;
1149 			case HEAPSORT_OPT:
1150 				sort_opts_vals.sort_method = SORT_HEAPSORT;
1151 				break;
1152 			case RADIXSORT_OPT:
1153 				sort_opts_vals.sort_method = SORT_RADIXSORT;
1154 				break;
1155 			case RANDOMSOURCE_OPT:
1156 				random_source = strdup(optarg);
1157 				break;
1158 			case COMPRESSPROGRAM_OPT:
1159 				compress_program = strdup(optarg);
1160 				break;
1161 			case FF_OPT:
1162 				read_fns_from_file0(optarg);
1163 				break;
1164 			case BS_OPT:
1165 			{
1166 				errno = 0;
1167 				long mof = strtol(optarg, NULL, 10);
1168 				if (errno != 0)
1169 					err(2, "--batch-size");
1170 				if (mof >= 2)
1171 					max_open_files = (size_t) mof + 1;
1172 			}
1173 				break;
1174 			case VERSION_OPT:
1175 				printf("%s\n", VERSION);
1176 				exit(EXIT_SUCCESS);
1177 				/* NOTREACHED */
1178 				break;
1179 			case DEBUG_OPT:
1180 				debug_sort = true;
1181 				break;
1182 			case HELP_OPT:
1183 				usage(false);
1184 				/* NOTREACHED */
1185 				break;
1186 			default:
1187 				usage(true);
1188 				/* NOTREACHED */
1189 			}
1190 		}
1191 	}
1192 
1193 	argc -= optind;
1194 	argv += optind;
1195 
1196 	if (argv_from_file0) {
1197 		argc = argc_from_file0;
1198 		argv = argv_from_file0;
1199 	}
1200 
1201 	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1202 		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1203 
1204 	if (keys_num == 0) {
1205 		keys_num = 1;
1206 		keys = sort_realloc(keys, sizeof(struct key_specs));
1207 		memset(&(keys[0]), 0, sizeof(struct key_specs));
1208 		keys[0].c1 = 1;
1209 		keys[0].pos1b = default_sort_mods->bflag;
1210 		keys[0].pos2b = default_sort_mods->bflag;
1211 		memcpy(&(keys[0].sm), default_sort_mods,
1212 		    sizeof(struct sort_mods));
1213 	}
1214 
1215 	for (size_t i = 0; i < keys_num; i++) {
1216 		struct key_specs *ks;
1217 
1218 		ks = &(keys[i]);
1219 
1220 		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1221 		    !(ks->pos2b)) {
1222 			ks->pos1b = sm->bflag;
1223 			ks->pos2b = sm->bflag;
1224 			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1225 		}
1226 
1227 		ks->sm.func = get_sort_func(&(ks->sm));
1228 	}
1229 
1230 	if (debug_sort) {
1231 		printf("Memory to be used for sorting: %llu\n",available_free_memory);
1232 #if defined(SORT_THREADS)
1233 		printf("Number of CPUs: %d\n",(int)ncpu);
1234 		nthreads = 1;
1235 #endif
1236 		printf("Using collate rules of %s locale\n",
1237 		    setlocale(LC_COLLATE, NULL));
1238 		if (byte_sort)
1239 			printf("Byte sort is used\n");
1240 		if (print_symbols_on_debug) {
1241 			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1242 			if (symbol_thousands_sep)
1243 				printf("Thousands separator: <%lc>\n",
1244 				    symbol_thousands_sep);
1245 			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1246 			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1247 		}
1248 	}
1249 
1250 	if (need_random)
1251 		get_random_seed(random_source);
1252 
1253 	/* Case when the outfile equals one of the input files: */
1254 	if (strcmp(outfile, "-")) {
1255 
1256 		for(int i = 0; i < argc; ++i) {
1257 			if (strcmp(argv[i], outfile) == 0) {
1258 				real_outfile = sort_strdup(outfile);
1259 				for(;;) {
1260 					char* tmp = sort_malloc(strlen(outfile) +
1261 					    strlen(".tmp") + 1);
1262 
1263 					strcpy(tmp, outfile);
1264 					strcpy(tmp + strlen(tmp), ".tmp");
1265 					sort_free(outfile);
1266 					outfile = tmp;
1267 					if (access(outfile, F_OK) < 0)
1268 						break;
1269 				}
1270 				tmp_file_atexit(outfile);
1271 			}
1272 		}
1273 	}
1274 
1275 #if defined(SORT_THREADS)
1276 	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1277 		nthreads = 1;
1278 #endif
1279 
1280 	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1281 		struct file_list fl;
1282 		struct sort_list list;
1283 
1284 		sort_list_init(&list);
1285 		file_list_init(&fl, true);
1286 
1287 		if (argc < 1)
1288 			procfile("-", &list, &fl);
1289 		else {
1290 			while (argc > 0) {
1291 				procfile(*argv, &list, &fl);
1292 				--argc;
1293 				++argv;
1294 			}
1295 		}
1296 
1297 		if (fl.count < 1)
1298 			sort_list_to_file(&list, outfile);
1299 		else {
1300 			if (list.count > 0) {
1301 				char *flast = new_tmp_file_name();
1302 
1303 				sort_list_to_file(&list, flast);
1304 				file_list_add(&fl, flast, false);
1305 			}
1306 			merge_files(&fl, outfile);
1307 		}
1308 
1309 		file_list_clean(&fl);
1310 
1311 		/*
1312 		 * We are about to exit the program, so we can ignore
1313 		 * the clean-up for speed
1314 		 *
1315 		 * sort_list_clean(&list);
1316 		 */
1317 
1318 	} else if (sort_opts_vals.cflag) {
1319 		result = (argc == 0) ? (check("-")) : (check(*argv));
1320 	} else if (sort_opts_vals.mflag) {
1321 		struct file_list fl;
1322 
1323 		file_list_init(&fl, false);
1324 		/* No file arguments remaining means "read from stdin." */
1325 		if (argc == 0)
1326 			file_list_add(&fl, "-", true);
1327 		else
1328 			file_list_populate(&fl, argc, argv, true);
1329 		merge_files(&fl, outfile);
1330 		file_list_clean(&fl);
1331 	}
1332 
1333 	if (real_outfile) {
1334 		unlink(real_outfile);
1335 		if (rename(outfile, real_outfile) < 0)
1336 			err(2, NULL);
1337 		sort_free(real_outfile);
1338 	}
1339 
1340 	sort_free(outfile);
1341 
1342 #ifndef WITHOUT_NLS
1343 	if (catalog != (nl_catd)-1)
1344 		catclose(catalog);
1345 #endif
1346 
1347 	return (result);
1348 }
1349