xref: /freebsd/usr.bin/sort/sort.c (revision ce6a89e27cd190313be39bb479880aeda4778436)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/stat.h>
34 #include <sys/sysctl.h>
35 #include <sys/types.h>
36 
37 #include <err.h>
38 #include <errno.h>
39 #include <fcntl.h>
40 #include <getopt.h>
41 #include <limits.h>
42 #include <locale.h>
43 #include <md5.h>
44 #include <regex.h>
45 #include <signal.h>
46 #include <stdbool.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <wchar.h>
52 #include <wctype.h>
53 
54 #include "coll.h"
55 #include "file.h"
56 #include "sort.h"
57 
58 #ifndef WITHOUT_NLS
59 #include <nl_types.h>
60 nl_catd catalog;
61 #endif
62 
63 #define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
64 
65 static bool need_random;
66 
67 MD5_CTX md5_ctx;
68 
69 /*
70  * Default messages to use when NLS is disabled or no catalogue
71  * is found.
72  */
73 const char *nlsstr[] = { "",
74 /* 1*/"mutually exclusive flags",
75 /* 2*/"extra argument not allowed with -c",
76 /* 3*/"Unknown feature",
77 /* 4*/"Wrong memory buffer specification",
78 /* 5*/"0 field in key specs",
79 /* 6*/"0 column in key specs",
80 /* 7*/"Wrong file mode",
81 /* 8*/"Cannot open file for reading",
82 /* 9*/"Radix sort cannot be used with these sort options",
83 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
84 /*11*/"Invalid key position",
85 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
86       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
87       "[-o outfile] [--batch-size size] [--files0-from file] "
88       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
89       "[--mmap] "
90 #if defined(SORT_THREADS)
91       "[--parallel thread_no] "
92 #endif
93       "[--human-numeric-sort] "
94       "[--version-sort] [--random-sort [--random-source file]] "
95       "[--compress-program program] [file ...]\n" };
96 
97 struct sort_opts sort_opts_vals;
98 
99 bool debug_sort;
100 bool need_hint;
101 
102 #if defined(SORT_THREADS)
103 unsigned int ncpu = 1;
104 size_t nthreads = 1;
105 #endif
106 
107 static bool gnusort_numeric_compatibility;
108 
109 static struct sort_mods default_sort_mods_object;
110 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
111 
112 static bool print_symbols_on_debug;
113 
114 /*
115  * Arguments from file (when file0-from option is used:
116  */
117 static size_t argc_from_file0 = (size_t)-1;
118 static char **argv_from_file0;
119 
120 /*
121  * Placeholder symbols for options which have no single-character equivalent
122  */
123 enum
124 {
125 	SORT_OPT = CHAR_MAX + 1,
126 	HELP_OPT,
127 	FF_OPT,
128 	BS_OPT,
129 	VERSION_OPT,
130 	DEBUG_OPT,
131 #if defined(SORT_THREADS)
132 	PARALLEL_OPT,
133 #endif
134 	RANDOMSOURCE_OPT,
135 	COMPRESSPROGRAM_OPT,
136 	QSORT_OPT,
137 	MERGESORT_OPT,
138 	HEAPSORT_OPT,
139 	RADIXSORT_OPT,
140 	MMAP_OPT
141 };
142 
143 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
144 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
145 
146 static struct option long_options[] = {
147 				{ "batch-size", required_argument, NULL, BS_OPT },
148 				{ "buffer-size", required_argument, NULL, 'S' },
149 				{ "check", optional_argument, NULL, 'c' },
150 				{ "check=silent|quiet", optional_argument, NULL, 'C' },
151 				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
152 				{ "debug", no_argument, NULL, DEBUG_OPT },
153 				{ "dictionary-order", no_argument, NULL, 'd' },
154 				{ "field-separator", required_argument, NULL, 't' },
155 				{ "files0-from", required_argument, NULL, FF_OPT },
156 				{ "general-numeric-sort", no_argument, NULL, 'g' },
157 				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
158 				{ "help",no_argument, NULL, HELP_OPT },
159 				{ "human-numeric-sort", no_argument, NULL, 'h' },
160 				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
161 				{ "ignore-case", no_argument, NULL, 'f' },
162 				{ "ignore-nonprinting", no_argument, NULL, 'i' },
163 				{ "key", required_argument, NULL, 'k' },
164 				{ "merge", no_argument, NULL, 'm' },
165 				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
166 				{ "mmap", no_argument, NULL, MMAP_OPT },
167 				{ "month-sort", no_argument, NULL, 'M' },
168 				{ "numeric-sort", no_argument, NULL, 'n' },
169 				{ "output", required_argument, NULL, 'o' },
170 #if defined(SORT_THREADS)
171 				{ "parallel", required_argument, NULL, PARALLEL_OPT },
172 #endif
173 				{ "qsort", no_argument, NULL, QSORT_OPT },
174 				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
175 				{ "random-sort", no_argument, NULL, 'R' },
176 				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
177 				{ "reverse", no_argument, NULL, 'r' },
178 				{ "sort", required_argument, NULL, SORT_OPT },
179 				{ "stable", no_argument, NULL, 's' },
180 				{ "temporary-directory",required_argument, NULL, 'T' },
181 				{ "unique", no_argument, NULL, 'u' },
182 				{ "version", no_argument, NULL, VERSION_OPT },
183 				{ "version-sort",no_argument, NULL, 'V' },
184 				{ "zero-terminated", no_argument, NULL, 'z' },
185 				{ NULL, no_argument, NULL, 0 }
186 };
187 
188 void fix_obsolete_keys(int *argc, char **argv);
189 
190 /*
191  * Check where sort modifier is present
192  */
193 static bool
194 sort_modifier_empty(struct sort_mods *sm)
195 {
196 
197 	if (sm == NULL)
198 		return (true);
199 	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
200 	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
201 }
202 
203 /*
204  * Print out usage text.
205  */
206 static void
207 usage(bool opt_err)
208 {
209 	FILE *out;
210 
211 	out = opt_err ? stderr : stdout;
212 
213 	fprintf(out, getstr(12), getprogname());
214 	if (opt_err)
215 		exit(2);
216 	exit(0);
217 }
218 
219 /*
220  * Read input file names from a file (file0-from option).
221  */
222 static void
223 read_fns_from_file0(const char *fn)
224 {
225 	FILE *f;
226 	char *line = NULL;
227 	size_t linesize = 0;
228 	ssize_t linelen;
229 
230 	if (fn == NULL)
231 		return;
232 
233 	f = fopen(fn, "r");
234 	if (f == NULL)
235 		err(2, "%s", fn);
236 
237 	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
238 		if (*line != '\0') {
239 			if (argc_from_file0 == (size_t) - 1)
240 				argc_from_file0 = 0;
241 			++argc_from_file0;
242 			argv_from_file0 = sort_realloc(argv_from_file0,
243 			    argc_from_file0 * sizeof(char *));
244 			if (argv_from_file0 == NULL)
245 				err(2, NULL);
246 			argv_from_file0[argc_from_file0 - 1] = line;
247 		} else {
248 			free(line);
249 		}
250 		line = NULL;
251 		linesize = 0;
252 	}
253 	if (ferror(f))
254 		err(2, "%s: getdelim", fn);
255 
256 	closefile(f, fn);
257 }
258 
259 /*
260  * Check how much RAM is available for the sort.
261  */
262 static void
263 set_hw_params(void)
264 {
265 	long pages, psize;
266 
267 #if defined(SORT_THREADS)
268 	ncpu = 1;
269 #endif
270 
271 	pages = sysconf(_SC_PHYS_PAGES);
272 	if (pages < 1) {
273 		perror("sysconf pages");
274 		pages = 1;
275 	}
276 	psize = sysconf(_SC_PAGESIZE);
277 	if (psize < 1) {
278 		perror("sysconf psize");
279 		psize = 4096;
280 	}
281 #if defined(SORT_THREADS)
282 	ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
283 	if (ncpu < 1)
284 		ncpu = 1;
285 	else if(ncpu > 32)
286 		ncpu = 32;
287 
288 	nthreads = ncpu;
289 #endif
290 
291 	free_memory = (unsigned long long) pages * (unsigned long long) psize;
292 	available_free_memory = free_memory / 2;
293 
294 	if (available_free_memory < 1024)
295 		available_free_memory = 1024;
296 }
297 
298 /*
299  * Convert "plain" symbol to wide symbol, with default value.
300  */
301 static void
302 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
303 {
304 
305 	if (wc && c) {
306 		int res;
307 
308 		res = mbtowc(wc, c, MB_CUR_MAX);
309 		if (res < 1)
310 			*wc = def;
311 	}
312 }
313 
314 /*
315  * Set current locale symbols.
316  */
317 static void
318 set_locale(void)
319 {
320 	struct lconv *lc;
321 	const char *locale;
322 
323 	setlocale(LC_ALL, "");
324 
325 	lc = localeconv();
326 
327 	if (lc) {
328 		/* obtain LC_NUMERIC info */
329 		/* Convert to wide char form */
330 		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
331 		    symbol_decimal_point);
332 		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
333 		    symbol_thousands_sep);
334 		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
335 		    symbol_positive_sign);
336 		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
337 		    symbol_negative_sign);
338 	}
339 
340 	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
341 		gnusort_numeric_compatibility = true;
342 
343 	locale = setlocale(LC_COLLATE, NULL);
344 
345 	if (locale) {
346 		char *tmpl;
347 		const char *cclocale;
348 
349 		tmpl = sort_strdup(locale);
350 		cclocale = setlocale(LC_COLLATE, "C");
351 		if (cclocale && !strcmp(cclocale, tmpl))
352 			byte_sort = true;
353 		else {
354 			const char *pclocale;
355 
356 			pclocale = setlocale(LC_COLLATE, "POSIX");
357 			if (pclocale && !strcmp(pclocale, tmpl))
358 				byte_sort = true;
359 		}
360 		setlocale(LC_COLLATE, tmpl);
361 		sort_free(tmpl);
362 	}
363 }
364 
365 /*
366  * Set directory temporary files.
367  */
368 static void
369 set_tmpdir(void)
370 {
371 	char *td;
372 
373 	td = getenv("TMPDIR");
374 	if (td != NULL)
375 		tmpdir = sort_strdup(td);
376 }
377 
378 /*
379  * Parse -S option.
380  */
381 static unsigned long long
382 parse_memory_buffer_value(const char *value)
383 {
384 
385 	if (value == NULL)
386 		return (available_free_memory);
387 	else {
388 		char *endptr;
389 		unsigned long long membuf;
390 
391 		endptr = NULL;
392 		errno = 0;
393 		membuf = strtoll(value, &endptr, 10);
394 
395 		if (errno != 0) {
396 			warn("%s",getstr(4));
397 			membuf = available_free_memory;
398 		} else {
399 			switch (*endptr){
400 			case 'Y':
401 				membuf *= 1024;
402 				/* FALLTHROUGH */
403 			case 'Z':
404 				membuf *= 1024;
405 				/* FALLTHROUGH */
406 			case 'E':
407 				membuf *= 1024;
408 				/* FALLTHROUGH */
409 			case 'P':
410 				membuf *= 1024;
411 				/* FALLTHROUGH */
412 			case 'T':
413 				membuf *= 1024;
414 				/* FALLTHROUGH */
415 			case 'G':
416 				membuf *= 1024;
417 				/* FALLTHROUGH */
418 			case 'M':
419 				membuf *= 1024;
420 				/* FALLTHROUGH */
421 			case '\0':
422 			case 'K':
423 				membuf *= 1024;
424 				/* FALLTHROUGH */
425 			case 'b':
426 				break;
427 			case '%':
428 				membuf = (available_free_memory * membuf) /
429 				    100;
430 				break;
431 			default:
432 				warnc(EINVAL, "%s", optarg);
433 				membuf = available_free_memory;
434 			}
435 		}
436 		return (membuf);
437 	}
438 }
439 
440 /*
441  * Signal handler that clears the temporary files.
442  */
443 static void
444 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
445     void *context __unused)
446 {
447 
448 	clear_tmp_files();
449 	exit(-1);
450 }
451 
452 /*
453  * Set signal handler on panic signals.
454  */
455 static void
456 set_signal_handler(void)
457 {
458 	struct sigaction sa;
459 
460 	memset(&sa, 0, sizeof(sa));
461 	sa.sa_sigaction = &sig_handler;
462 	sa.sa_flags = SA_SIGINFO;
463 
464 	if (sigaction(SIGTERM, &sa, NULL) < 0) {
465 		perror("sigaction");
466 		return;
467 	}
468 	if (sigaction(SIGHUP, &sa, NULL) < 0) {
469 		perror("sigaction");
470 		return;
471 	}
472 	if (sigaction(SIGINT, &sa, NULL) < 0) {
473 		perror("sigaction");
474 		return;
475 	}
476 	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
477 		perror("sigaction");
478 		return;
479 	}
480 	if (sigaction(SIGABRT, &sa, NULL) < 0) {
481 		perror("sigaction");
482 		return;
483 	}
484 	if (sigaction(SIGBUS, &sa, NULL) < 0) {
485 		perror("sigaction");
486 		return;
487 	}
488 	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
489 		perror("sigaction");
490 		return;
491 	}
492 	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
493 		perror("sigaction");
494 		return;
495 	}
496 	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
497 		perror("sigaction");
498 		return;
499 	}
500 }
501 
502 /*
503  * Print "unknown" message and exit with status 2.
504  */
505 static void
506 unknown(const char *what)
507 {
508 
509 	errx(2, "%s: %s", getstr(3), what);
510 }
511 
512 /*
513  * Check whether contradictory input options are used.
514  */
515 static void
516 check_mutually_exclusive_flags(char c, bool *mef_flags)
517 {
518 	int fo_index, mec;
519 	bool found_others, found_this;
520 
521 	found_others = found_this = false;
522 	fo_index = 0;
523 
524 	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
525 		mec = mutually_exclusive_flags[i];
526 
527 		if (mec != c) {
528 			if (mef_flags[i]) {
529 				if (found_this)
530 					errx(1, "%c:%c: %s", c, mec, getstr(1));
531 				found_others = true;
532 				fo_index = i;
533 			}
534 		} else {
535 			if (found_others)
536 				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
537 			mef_flags[i] = true;
538 			found_this = true;
539 		}
540 	}
541 }
542 
543 /*
544  * Initialise sort opts data.
545  */
546 static void
547 set_sort_opts(void)
548 {
549 
550 	memset(&default_sort_mods_object, 0,
551 	    sizeof(default_sort_mods_object));
552 	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
553 	default_sort_mods_object.func =
554 	    get_sort_func(&default_sort_mods_object);
555 }
556 
557 /*
558  * Set a sort modifier on a sort modifiers object.
559  */
560 static bool
561 set_sort_modifier(struct sort_mods *sm, int c)
562 {
563 
564 	if (sm == NULL)
565 		return (true);
566 
567 	switch (c){
568 	case 'b':
569 		sm->bflag = true;
570 		break;
571 	case 'd':
572 		sm->dflag = true;
573 		break;
574 	case 'f':
575 		sm->fflag = true;
576 		break;
577 	case 'g':
578 		sm->gflag = true;
579 		need_hint = true;
580 		break;
581 	case 'i':
582 		sm->iflag = true;
583 		break;
584 	case 'R':
585 		sm->Rflag = true;
586 		need_hint = true;
587 		need_random = true;
588 		break;
589 	case 'M':
590 		initialise_months();
591 		sm->Mflag = true;
592 		need_hint = true;
593 		break;
594 	case 'n':
595 		sm->nflag = true;
596 		need_hint = true;
597 		print_symbols_on_debug = true;
598 		break;
599 	case 'r':
600 		sm->rflag = true;
601 		break;
602 	case 'V':
603 		sm->Vflag = true;
604 		break;
605 	case 'h':
606 		sm->hflag = true;
607 		need_hint = true;
608 		print_symbols_on_debug = true;
609 		break;
610 	default:
611 		return (false);
612 	}
613 
614 	sort_opts_vals.complex_sort = true;
615 	sm->func = get_sort_func(sm);
616 	return (true);
617 }
618 
619 /*
620  * Parse POS in -k option.
621  */
622 static int
623 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
624 {
625 	regmatch_t pmatch[4];
626 	regex_t re;
627 	char *c, *f;
628 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
629 	size_t len, nmatch;
630 	int ret;
631 
632 	ret = -1;
633 	nmatch = 4;
634 	c = f = NULL;
635 
636 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
637 		return (-1);
638 
639 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
640 		goto end;
641 
642 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
643 		goto end;
644 
645 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
646 		goto end;
647 
648 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
649 	f = sort_malloc((len + 1) * sizeof(char));
650 
651 	strncpy(f, s + pmatch[1].rm_so, len);
652 	f[len] = '\0';
653 
654 	if (second) {
655 		errno = 0;
656 		ks->f2 = (size_t) strtoul(f, NULL, 10);
657 		if (errno != 0)
658 			err(2, "-k");
659 		if (ks->f2 == 0) {
660 			warn("%s",getstr(5));
661 			goto end;
662 		}
663 	} else {
664 		errno = 0;
665 		ks->f1 = (size_t) strtoul(f, NULL, 10);
666 		if (errno != 0)
667 			err(2, "-k");
668 		if (ks->f1 == 0) {
669 			warn("%s",getstr(5));
670 			goto end;
671 		}
672 	}
673 
674 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
675 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
676 		c = sort_malloc((len + 1) * sizeof(char));
677 
678 		strncpy(c, s + pmatch[2].rm_so + 1, len);
679 		c[len] = '\0';
680 
681 		if (second) {
682 			errno = 0;
683 			ks->c2 = (size_t) strtoul(c, NULL, 10);
684 			if (errno != 0)
685 				err(2, "-k");
686 		} else {
687 			errno = 0;
688 			ks->c1 = (size_t) strtoul(c, NULL, 10);
689 			if (errno != 0)
690 				err(2, "-k");
691 			if (ks->c1 == 0) {
692 				warn("%s",getstr(6));
693 				goto end;
694 			}
695 		}
696 	} else {
697 		if (second)
698 			ks->c2 = 0;
699 		else
700 			ks->c1 = 1;
701 	}
702 
703 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
704 		regoff_t i = 0;
705 
706 		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
707 			check_mutually_exclusive_flags(s[i], mef_flags);
708 			if (s[i] == 'b') {
709 				if (second)
710 					ks->pos2b = true;
711 				else
712 					ks->pos1b = true;
713 			} else if (!set_sort_modifier(&(ks->sm), s[i]))
714 				goto end;
715 		}
716 	}
717 
718 	ret = 0;
719 
720 end:
721 
722 	if (c)
723 		sort_free(c);
724 	if (f)
725 		sort_free(f);
726 	regfree(&re);
727 
728 	return (ret);
729 }
730 
731 /*
732  * Parse -k option value.
733  */
734 static int
735 parse_k(const char *s, struct key_specs *ks)
736 {
737 	int ret = -1;
738 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
739 	    { false, false, false, false, false, false };
740 
741 	if (s && *s) {
742 		char *sptr;
743 
744 		sptr = strchr(s, ',');
745 		if (sptr) {
746 			size_t size1;
747 			char *pos1, *pos2;
748 
749 			size1 = sptr - s;
750 
751 			if (size1 < 1)
752 				return (-1);
753 			pos1 = sort_malloc((size1 + 1) * sizeof(char));
754 
755 			strncpy(pos1, s, size1);
756 			pos1[size1] = '\0';
757 
758 			ret = parse_pos(pos1, ks, mef_flags, false);
759 
760 			sort_free(pos1);
761 			if (ret < 0)
762 				return (ret);
763 
764 			pos2 = sort_strdup(sptr + 1);
765 			ret = parse_pos(pos2, ks, mef_flags, true);
766 			sort_free(pos2);
767 		} else
768 			ret = parse_pos(s, ks, mef_flags, false);
769 	}
770 
771 	return (ret);
772 }
773 
774 /*
775  * Parse POS in +POS -POS option.
776  */
777 static int
778 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
779 {
780 	regex_t re;
781 	regmatch_t pmatch[4];
782 	char *c, *f;
783 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
784 	int ret;
785 	size_t len, nmatch;
786 
787 	ret = -1;
788 	nmatch = 4;
789 	c = f = NULL;
790 	*nc = *nf = 0;
791 
792 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
793 		return (-1);
794 
795 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
796 		goto end;
797 
798 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
799 		goto end;
800 
801 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
802 		goto end;
803 
804 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
805 	f = sort_malloc((len + 1) * sizeof(char));
806 
807 	strncpy(f, s + pmatch[1].rm_so, len);
808 	f[len] = '\0';
809 
810 	errno = 0;
811 	*nf = (size_t) strtoul(f, NULL, 10);
812 	if (errno != 0)
813 		errx(2, "%s", getstr(11));
814 
815 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
816 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
817 		c = sort_malloc((len + 1) * sizeof(char));
818 
819 		strncpy(c, s + pmatch[2].rm_so + 1, len);
820 		c[len] = '\0';
821 
822 		errno = 0;
823 		*nc = (size_t) strtoul(c, NULL, 10);
824 		if (errno != 0)
825 			errx(2, "%s", getstr(11));
826 	}
827 
828 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
829 
830 		len = pmatch[3].rm_eo - pmatch[3].rm_so;
831 
832 		strncpy(sopts, s + pmatch[3].rm_so, len);
833 		sopts[len] = '\0';
834 	}
835 
836 	ret = 0;
837 
838 end:
839 	if (c)
840 		sort_free(c);
841 	if (f)
842 		sort_free(f);
843 	regfree(&re);
844 
845 	return (ret);
846 }
847 
848 /*
849  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
850  */
851 void
852 fix_obsolete_keys(int *argc, char **argv)
853 {
854 	char sopt[129];
855 
856 	for (int i = 1; i < *argc; i++) {
857 		char *arg1;
858 
859 		arg1 = argv[i];
860 
861 		if (strlen(arg1) > 1 && arg1[0] == '+') {
862 			int c1, f1;
863 			char sopts1[128];
864 
865 			sopts1[0] = 0;
866 			c1 = f1 = 0;
867 
868 			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
869 				continue;
870 			else {
871 				f1 += 1;
872 				c1 += 1;
873 				if (i + 1 < *argc) {
874 					char *arg2 = argv[i + 1];
875 
876 					if (strlen(arg2) > 1 &&
877 					    arg2[0] == '-') {
878 						int c2, f2;
879 						char sopts2[128];
880 
881 						sopts2[0] = 0;
882 						c2 = f2 = 0;
883 
884 						if (parse_pos_obs(arg2 + 1,
885 						    &f2, &c2, sopts2) >= 0) {
886 							if (c2 > 0)
887 								f2 += 1;
888 							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
889 							    f1, c1, sopts1, f2, c2, sopts2);
890 							argv[i] = sort_strdup(sopt);
891 							for (int j = i + 1; j + 1 < *argc; j++)
892 								argv[j] = argv[j + 1];
893 							*argc -= 1;
894 							continue;
895 						}
896 					}
897 				}
898 				sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
899 				argv[i] = sort_strdup(sopt);
900 			}
901 		}
902 	}
903 }
904 
905 /*
906  * Seed random sort
907  */
908 static void
909 get_random_seed(const char *random_source)
910 {
911 	char randseed[32];
912 	struct stat fsb, rsb;
913 	ssize_t rd;
914 	int rsfd;
915 
916 	rsfd = -1;
917 	rd = sizeof(randseed);
918 
919 	if (random_source == NULL) {
920 		if (getentropy(randseed, sizeof(randseed)) < 0)
921 			err(EX_SOFTWARE, "getentropy");
922 		goto out;
923 	}
924 
925 	rsfd = open(random_source, O_RDONLY | O_CLOEXEC);
926 	if (rsfd < 0)
927 		err(EX_NOINPUT, "open: %s", random_source);
928 
929 	if (fstat(rsfd, &fsb) != 0)
930 		err(EX_SOFTWARE, "fstat");
931 
932 	if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode))
933 		err(EX_USAGE,
934 		    "random seed isn't a regular file or /dev/random");
935 
936 	/*
937 	 * Regular files: read up to maximum seed size and explicitly
938 	 * reject longer files.
939 	 */
940 	if (S_ISREG(fsb.st_mode)) {
941 		if (fsb.st_size > (off_t)sizeof(randseed))
942 			errx(EX_USAGE, "random seed is too large (%jd >"
943 			    " %zu)!", (intmax_t)fsb.st_size,
944 			    sizeof(randseed));
945 		else if (fsb.st_size < 1)
946 			errx(EX_USAGE, "random seed is too small ("
947 			    "0 bytes)");
948 
949 		memset(randseed, 0, sizeof(randseed));
950 
951 		rd = read(rsfd, randseed, fsb.st_size);
952 		if (rd < 0)
953 			err(EX_SOFTWARE, "reading random seed file %s",
954 			    random_source);
955 		if (rd < (ssize_t)fsb.st_size)
956 			errx(EX_SOFTWARE, "short read from %s", random_source);
957 	} else if (S_ISCHR(fsb.st_mode)) {
958 		if (stat("/dev/random", &rsb) < 0)
959 			err(EX_SOFTWARE, "stat");
960 
961 		if (fsb.st_dev != rsb.st_dev ||
962 		    fsb.st_ino != rsb.st_ino)
963 			errx(EX_USAGE, "random seed is a character "
964 			    "device other than /dev/random");
965 
966 		if (getentropy(randseed, sizeof(randseed)) < 0)
967 			err(EX_SOFTWARE, "getentropy");
968 	}
969 
970 out:
971 	if (rsfd >= 0)
972 		close(rsfd);
973 
974 	MD5Init(&md5_ctx);
975 	MD5Update(&md5_ctx, randseed, rd);
976 }
977 
978 /*
979  * Main function.
980  */
981 int
982 main(int argc, char **argv)
983 {
984 	char *outfile, *real_outfile;
985 	char *random_source = NULL;
986 	int c, result;
987 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
988 	    { false, false, false, false, false, false };
989 
990 	result = 0;
991 	outfile = sort_strdup("-");
992 	real_outfile = NULL;
993 
994 	struct sort_mods *sm = &default_sort_mods_object;
995 
996 	init_tmp_files();
997 
998 	set_signal_handler();
999 
1000 	set_hw_params();
1001 	set_locale();
1002 	set_tmpdir();
1003 	set_sort_opts();
1004 
1005 	fix_obsolete_keys(&argc, argv);
1006 
1007 	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1008 	    != -1)) {
1009 
1010 		check_mutually_exclusive_flags(c, mef_flags);
1011 
1012 		if (!set_sort_modifier(sm, c)) {
1013 
1014 			switch (c) {
1015 			case 'c':
1016 				sort_opts_vals.cflag = true;
1017 				if (optarg) {
1018 					if (!strcmp(optarg, "diagnose-first"))
1019 						;
1020 					else if (!strcmp(optarg, "silent") ||
1021 					    !strcmp(optarg, "quiet"))
1022 						sort_opts_vals.csilentflag = true;
1023 					else if (*optarg)
1024 						unknown(optarg);
1025 				}
1026 				break;
1027 			case 'C':
1028 				sort_opts_vals.cflag = true;
1029 				sort_opts_vals.csilentflag = true;
1030 				break;
1031 			case 'k':
1032 			{
1033 				sort_opts_vals.complex_sort = true;
1034 				sort_opts_vals.kflag = true;
1035 
1036 				keys_num++;
1037 				keys = sort_realloc(keys, keys_num *
1038 				    sizeof(struct key_specs));
1039 				memset(&(keys[keys_num - 1]), 0,
1040 				    sizeof(struct key_specs));
1041 
1042 				if (parse_k(optarg, &(keys[keys_num - 1]))
1043 				    < 0) {
1044 					errc(2, EINVAL, "-k %s", optarg);
1045 				}
1046 
1047 				break;
1048 			}
1049 			case 'm':
1050 				sort_opts_vals.mflag = true;
1051 				break;
1052 			case 'o':
1053 				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1054 				strcpy(outfile, optarg);
1055 				break;
1056 			case 's':
1057 				sort_opts_vals.sflag = true;
1058 				break;
1059 			case 'S':
1060 				available_free_memory =
1061 				    parse_memory_buffer_value(optarg);
1062 				break;
1063 			case 'T':
1064 				tmpdir = sort_strdup(optarg);
1065 				break;
1066 			case 't':
1067 				while (strlen(optarg) > 1) {
1068 					if (optarg[0] != '\\') {
1069 						errc(2, EINVAL, "%s", optarg);
1070 					}
1071 					optarg += 1;
1072 					if (*optarg == '0') {
1073 						*optarg = 0;
1074 						break;
1075 					}
1076 				}
1077 				sort_opts_vals.tflag = true;
1078 				sort_opts_vals.field_sep = btowc(optarg[0]);
1079 				if (sort_opts_vals.field_sep == WEOF) {
1080 					errno = EINVAL;
1081 					err(2, NULL);
1082 				}
1083 				if (!gnusort_numeric_compatibility) {
1084 					if (symbol_decimal_point == sort_opts_vals.field_sep)
1085 						symbol_decimal_point = WEOF;
1086 					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1087 						symbol_thousands_sep = WEOF;
1088 					if (symbol_negative_sign == sort_opts_vals.field_sep)
1089 						symbol_negative_sign = WEOF;
1090 					if (symbol_positive_sign == sort_opts_vals.field_sep)
1091 						symbol_positive_sign = WEOF;
1092 				}
1093 				break;
1094 			case 'u':
1095 				sort_opts_vals.uflag = true;
1096 				/* stable sort for the correct unique val */
1097 				sort_opts_vals.sflag = true;
1098 				break;
1099 			case 'z':
1100 				sort_opts_vals.zflag = true;
1101 				break;
1102 			case SORT_OPT:
1103 				if (optarg) {
1104 					if (!strcmp(optarg, "general-numeric"))
1105 						set_sort_modifier(sm, 'g');
1106 					else if (!strcmp(optarg, "human-numeric"))
1107 						set_sort_modifier(sm, 'h');
1108 					else if (!strcmp(optarg, "numeric"))
1109 						set_sort_modifier(sm, 'n');
1110 					else if (!strcmp(optarg, "month"))
1111 						set_sort_modifier(sm, 'M');
1112 					else if (!strcmp(optarg, "random"))
1113 						set_sort_modifier(sm, 'R');
1114 					else
1115 						unknown(optarg);
1116 				}
1117 				break;
1118 #if defined(SORT_THREADS)
1119 			case PARALLEL_OPT:
1120 				nthreads = (size_t)(atoi(optarg));
1121 				if (nthreads < 1)
1122 					nthreads = 1;
1123 				if (nthreads > 1024)
1124 					nthreads = 1024;
1125 				break;
1126 #endif
1127 			case QSORT_OPT:
1128 				sort_opts_vals.sort_method = SORT_QSORT;
1129 				break;
1130 			case MERGESORT_OPT:
1131 				sort_opts_vals.sort_method = SORT_MERGESORT;
1132 				break;
1133 			case MMAP_OPT:
1134 				use_mmap = true;
1135 				break;
1136 			case HEAPSORT_OPT:
1137 				sort_opts_vals.sort_method = SORT_HEAPSORT;
1138 				break;
1139 			case RADIXSORT_OPT:
1140 				sort_opts_vals.sort_method = SORT_RADIXSORT;
1141 				break;
1142 			case RANDOMSOURCE_OPT:
1143 				random_source = strdup(optarg);
1144 				break;
1145 			case COMPRESSPROGRAM_OPT:
1146 				compress_program = strdup(optarg);
1147 				break;
1148 			case FF_OPT:
1149 				read_fns_from_file0(optarg);
1150 				break;
1151 			case BS_OPT:
1152 			{
1153 				errno = 0;
1154 				long mof = strtol(optarg, NULL, 10);
1155 				if (errno != 0)
1156 					err(2, "--batch-size");
1157 				if (mof >= 2)
1158 					max_open_files = (size_t) mof + 1;
1159 			}
1160 				break;
1161 			case VERSION_OPT:
1162 				printf("%s\n", VERSION);
1163 				exit(EXIT_SUCCESS);
1164 				/* NOTREACHED */
1165 				break;
1166 			case DEBUG_OPT:
1167 				debug_sort = true;
1168 				break;
1169 			case HELP_OPT:
1170 				usage(false);
1171 				/* NOTREACHED */
1172 				break;
1173 			default:
1174 				usage(true);
1175 				/* NOTREACHED */
1176 			}
1177 		}
1178 	}
1179 
1180 	argc -= optind;
1181 	argv += optind;
1182 
1183 	if (argv_from_file0) {
1184 		argc = argc_from_file0;
1185 		argv = argv_from_file0;
1186 	}
1187 
1188 #ifndef WITHOUT_NLS
1189 	catalog = catopen("sort", NL_CAT_LOCALE);
1190 #endif
1191 
1192 	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1193 		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1194 
1195 #ifndef WITHOUT_NLS
1196 	catclose(catalog);
1197 #endif
1198 
1199 	if (keys_num == 0) {
1200 		keys_num = 1;
1201 		keys = sort_realloc(keys, sizeof(struct key_specs));
1202 		memset(&(keys[0]), 0, sizeof(struct key_specs));
1203 		keys[0].c1 = 1;
1204 		keys[0].pos1b = default_sort_mods->bflag;
1205 		keys[0].pos2b = default_sort_mods->bflag;
1206 		memcpy(&(keys[0].sm), default_sort_mods,
1207 		    sizeof(struct sort_mods));
1208 	}
1209 
1210 	for (size_t i = 0; i < keys_num; i++) {
1211 		struct key_specs *ks;
1212 
1213 		ks = &(keys[i]);
1214 
1215 		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1216 		    !(ks->pos2b)) {
1217 			ks->pos1b = sm->bflag;
1218 			ks->pos2b = sm->bflag;
1219 			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1220 		}
1221 
1222 		ks->sm.func = get_sort_func(&(ks->sm));
1223 	}
1224 
1225 	if (debug_sort) {
1226 		printf("Memory to be used for sorting: %llu\n",available_free_memory);
1227 #if defined(SORT_THREADS)
1228 		printf("Number of CPUs: %d\n",(int)ncpu);
1229 		nthreads = 1;
1230 #endif
1231 		printf("Using collate rules of %s locale\n",
1232 		    setlocale(LC_COLLATE, NULL));
1233 		if (byte_sort)
1234 			printf("Byte sort is used\n");
1235 		if (print_symbols_on_debug) {
1236 			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1237 			if (symbol_thousands_sep)
1238 				printf("Thousands separator: <%lc>\n",
1239 				    symbol_thousands_sep);
1240 			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1241 			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1242 		}
1243 	}
1244 
1245 	if (need_random)
1246 		get_random_seed(random_source);
1247 
1248 	/* Case when the outfile equals one of the input files: */
1249 	if (strcmp(outfile, "-")) {
1250 
1251 		for(int i = 0; i < argc; ++i) {
1252 			if (strcmp(argv[i], outfile) == 0) {
1253 				real_outfile = sort_strdup(outfile);
1254 				for(;;) {
1255 					char* tmp = sort_malloc(strlen(outfile) +
1256 					    strlen(".tmp") + 1);
1257 
1258 					strcpy(tmp, outfile);
1259 					strcpy(tmp + strlen(tmp), ".tmp");
1260 					sort_free(outfile);
1261 					outfile = tmp;
1262 					if (access(outfile, F_OK) < 0)
1263 						break;
1264 				}
1265 				tmp_file_atexit(outfile);
1266 			}
1267 		}
1268 	}
1269 
1270 #if defined(SORT_THREADS)
1271 	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1272 		nthreads = 1;
1273 #endif
1274 
1275 	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1276 		struct file_list fl;
1277 		struct sort_list list;
1278 
1279 		sort_list_init(&list);
1280 		file_list_init(&fl, true);
1281 
1282 		if (argc < 1)
1283 			procfile("-", &list, &fl);
1284 		else {
1285 			while (argc > 0) {
1286 				procfile(*argv, &list, &fl);
1287 				--argc;
1288 				++argv;
1289 			}
1290 		}
1291 
1292 		if (fl.count < 1)
1293 			sort_list_to_file(&list, outfile);
1294 		else {
1295 			if (list.count > 0) {
1296 				char *flast = new_tmp_file_name();
1297 
1298 				sort_list_to_file(&list, flast);
1299 				file_list_add(&fl, flast, false);
1300 			}
1301 			merge_files(&fl, outfile);
1302 		}
1303 
1304 		file_list_clean(&fl);
1305 
1306 		/*
1307 		 * We are about to exit the program, so we can ignore
1308 		 * the clean-up for speed
1309 		 *
1310 		 * sort_list_clean(&list);
1311 		 */
1312 
1313 	} else if (sort_opts_vals.cflag) {
1314 		result = (argc == 0) ? (check("-")) : (check(*argv));
1315 	} else if (sort_opts_vals.mflag) {
1316 		struct file_list fl;
1317 
1318 		file_list_init(&fl, false);
1319 		/* No file arguments remaining means "read from stdin." */
1320 		if (argc == 0)
1321 			file_list_add(&fl, "-", true);
1322 		else
1323 			file_list_populate(&fl, argc, argv, true);
1324 		merge_files(&fl, outfile);
1325 		file_list_clean(&fl);
1326 	}
1327 
1328 	if (real_outfile) {
1329 		unlink(real_outfile);
1330 		if (rename(outfile, real_outfile) < 0)
1331 			err(2, NULL);
1332 		sort_free(real_outfile);
1333 	}
1334 
1335 	sort_free(outfile);
1336 
1337 	return (result);
1338 }
1339