xref: /freebsd/usr.bin/sort/sort.c (revision 3e0efd2ec4fcb4cd68fb8ccf8aea6fc6151c454b)
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <oleg.moskalenko@citrix.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
34 
35 #include <err.h>
36 #include <errno.h>
37 #include <getopt.h>
38 #include <limits.h>
39 #include <locale.h>
40 #include <md5.h>
41 #include <regex.h>
42 #include <signal.h>
43 #include <stdbool.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <wchar.h>
49 #include <wctype.h>
50 
51 #include "coll.h"
52 #include "file.h"
53 #include "sort.h"
54 
55 #ifndef WITHOUT_NLS
56 #include <nl_types.h>
57 nl_catd catalog;
58 #endif
59 
60 #define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
61 
62 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
63 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
64 
65 static bool need_random;
66 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
67 static const void *random_seed;
68 static size_t random_seed_size;
69 
70 MD5_CTX md5_ctx;
71 
72 /*
73  * Default messages to use when NLS is disabled or no catalogue
74  * is found.
75  */
76 const char *nlsstr[] = { "",
77 /* 1*/"mutually exclusive flags",
78 /* 2*/"extra argument not allowed with -c",
79 /* 3*/"Unknown feature",
80 /* 4*/"Wrong memory buffer specification",
81 /* 5*/"0 field in key specs",
82 /* 6*/"0 column in key specs",
83 /* 7*/"Wrong file mode",
84 /* 8*/"Cannot open file for reading",
85 /* 9*/"Radix sort cannot be used with these sort options",
86 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
87 /*11*/"Invalid key position",
88 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
89       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
90       "[-o outfile] [--batch-size size] [--files0-from file] "
91       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
92       "[--mmap] "
93 #if defined(SORT_THREADS)
94       "[--nthreads thread_no] "
95 #endif
96       "[--human-numeric-sort] "
97       "[--version-sort] [--random-sort [--random-source file]] "
98       "[--compress-program program] [file ...]\n" };
99 
100 struct sort_opts sort_opts_vals;
101 
102 bool debug_sort;
103 bool need_hint;
104 
105 #if defined(SORT_THREADS)
106 size_t ncpu = 1;
107 size_t nthreads = 1;
108 #endif
109 
110 static bool gnusort_numeric_compatibility;
111 
112 static struct sort_mods default_sort_mods_object;
113 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
114 
115 static bool print_symbols_on_debug;
116 
117 /*
118  * Arguments from file (when file0-from option is used:
119  */
120 static int argc_from_file0 = -1;
121 static char **argv_from_file0;
122 
123 /*
124  * Placeholder symbols for options which have no single-character equivalent
125  */
126 enum
127 {
128 	SORT_OPT = CHAR_MAX + 1,
129 	HELP_OPT,
130 	FF_OPT,
131 	BS_OPT,
132 	VERSION_OPT,
133 	DEBUG_OPT,
134 #if defined(SORT_THREADS)
135 	NTHREADS_OPT,
136 #endif
137 	RANDOMSOURCE_OPT,
138 	COMPRESSPROGRAM_OPT,
139 	QSORT_OPT,
140 	MERGESORT_OPT,
141 	HEAPSORT_OPT,
142 	RADIXSORT_OPT,
143 	MMAP_OPT
144 };
145 
146 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
147 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
148 
149 struct option long_options[] = {
150 				{ "batch-size", required_argument, NULL, BS_OPT },
151 				{ "buffer-size", required_argument, NULL, 'S' },
152 				{ "check", optional_argument, NULL, 'c' },
153 				{ "check=silent|quiet", optional_argument, NULL, 'C' },
154 				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
155 				{ "debug", no_argument, NULL, DEBUG_OPT },
156 				{ "dictionary-order", no_argument, NULL, 'd' },
157 				{ "field-separator", required_argument, NULL, 't' },
158 				{ "files0-from", required_argument, NULL, FF_OPT },
159 				{ "general-numeric-sort", no_argument, NULL, 'g' },
160 				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
161 				{ "help",no_argument, NULL, HELP_OPT },
162 				{ "human-numeric-sort", no_argument, NULL, 'h' },
163 				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
164 				{ "ignore-case", no_argument, NULL, 'f' },
165 				{ "ignore-nonprinting", no_argument, NULL, 'i' },
166 				{ "key", required_argument, NULL, 'k' },
167 				{ "merge", no_argument, NULL, 'm' },
168 				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
169 				{ "mmap", no_argument, NULL, MMAP_OPT },
170 				{ "month-sort", no_argument, NULL, 'M' },
171 				{ "numeric-sort", no_argument, NULL, 'n' },
172 				{ "output", required_argument, NULL, 'o' },
173 #if defined(SORT_THREADS)
174 				{ "nthreads", required_argument, NULL, NTHREADS_OPT },
175 #endif
176 				{ "qsort", no_argument, NULL, QSORT_OPT },
177 				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
178 				{ "random-sort", no_argument, NULL, 'R' },
179 				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
180 				{ "reverse", no_argument, NULL, 'r' },
181 				{ "sort", required_argument, NULL, SORT_OPT },
182 				{ "stable", no_argument, NULL, 's' },
183 				{ "temporary-directory",required_argument, NULL, 'T' },
184 				{ "unique", no_argument, NULL, 'u' },
185 				{ "version", no_argument, NULL, VERSION_OPT },
186 				{ "version-sort",no_argument, NULL, 'V' },
187 				{ "zero-terminated", no_argument, NULL, 'z' },
188 				{ NULL, no_argument, NULL, 0 }
189 };
190 
191 void fix_obsolete_keys(int *argc, char **argv);
192 
193 /*
194  * Check where sort modifier is present
195  */
196 static bool
197 sort_modifier_empty(struct sort_mods *sm)
198 {
199 
200 	if (sm == NULL)
201 		return (true);
202 	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
203 	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
204 }
205 
206 /*
207  * Print out usage text.
208  */
209 static void
210 usage(bool opt_err)
211 {
212 	struct option *o;
213 	FILE *out;
214 
215 	out = stdout;
216 	o = &(long_options[0]);
217 
218 	if (opt_err)
219 		out = stderr;
220 	fprintf(out, getstr(12), getprogname());
221 	if (opt_err)
222 		exit(2);
223 	exit(0);
224 }
225 
226 /*
227  * Read input file names from a file (file0-from option).
228  */
229 static void
230 read_fns_from_file0(const char *fn)
231 {
232 	if (fn) {
233 		struct file0_reader f0r;
234 		FILE *f;
235 
236 		f = fopen(fn, "r");
237 		if (f == NULL)
238 			err(2, NULL);
239 
240 		memset(&f0r, 0, sizeof(f0r));
241 		f0r.f = f;
242 
243 		while (!feof(f)) {
244 			char *line = read_file0_line(&f0r);
245 
246 			if (line && *line) {
247 				++argc_from_file0;
248 				if (argc_from_file0 < 1)
249 					argc_from_file0 = 1;
250 				argv_from_file0 = sort_realloc(argv_from_file0,
251 				    argc_from_file0 * sizeof(char *));
252 				if (argv_from_file0 == NULL)
253 					err(2, NULL);
254 				argv_from_file0[argc_from_file0 - 1] =
255 				    sort_strdup(line);
256 			}
257 		}
258 		closefile(f, fn);
259 	}
260 }
261 
262 /*
263  * Check how much RAM is available for the sort.
264  */
265 static void
266 set_hw_params(void)
267 {
268 #if defined(SORT_THREADS)
269 	size_t ncpusz;
270 #endif
271 	size_t pages, psize, psz, pszsz;
272 
273 	pages = psize = 0;
274 #if defined(SORT_THREADS)
275 	ncpu = 1;
276 	ncpusz = sizeof(size_t);
277 #endif
278 	psz = pszsz = sizeof(size_t);
279 
280 	if (sysctlbyname("vm.stats.vm.v_free_count", &pages, &psz,
281 	    NULL, 0) < 0) {
282 		perror("vm.stats.vm.v_free_count");
283 		return;
284 	}
285 	if (sysctlbyname("vm.stats.vm.v_page_size", &psize, &pszsz,
286 	    NULL, 0) < 0) {
287 		perror("vm.stats.vm.v_page_size");
288 		return;
289 	}
290 #if defined(SORT_THREADS)
291 	if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz,
292 	    NULL, 0) < 0)
293 		ncpu = 1;
294 	else if(ncpu > 32)
295 		ncpu = 32;
296 
297 	nthreads = ncpu;
298 #endif
299 
300 	free_memory = (unsigned long long) pages * (unsigned long long) psize;
301 	available_free_memory = (free_memory * 9) / 10;
302 }
303 
304 /*
305  * Convert "plain" symbol to wide symbol, with default value.
306  */
307 static void
308 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
309 {
310 
311 	if (wc && c) {
312 		int res;
313 
314 		res = mbtowc(wc, c, MB_CUR_MAX);
315 		if (res < 1)
316 			*wc = def;
317 	}
318 }
319 
320 /*
321  * Set current locale symbols.
322  */
323 static void
324 set_locale(void)
325 {
326 	struct lconv *lc;
327 	const char *locale;
328 
329 	setlocale(LC_ALL, "");
330 
331 	lc = localeconv();
332 
333 	if (lc) {
334 		/* obtain LC_NUMERIC info */
335 		/* Convert to wide char form */
336 		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
337 		    symbol_decimal_point);
338 		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
339 		    symbol_thousands_sep);
340 		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
341 		    symbol_positive_sign);
342 		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
343 		    symbol_negative_sign);
344 	}
345 
346 	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
347 		gnusort_numeric_compatibility = true;
348 
349 	locale = setlocale(LC_COLLATE, NULL);
350 
351 	if (locale) {
352 		char *tmpl;
353 		const char *cclocale;
354 
355 		tmpl = sort_strdup(locale);
356 		cclocale = setlocale(LC_COLLATE, "C");
357 		if (cclocale && !strcmp(cclocale, tmpl))
358 			byte_sort = true;
359 		else {
360 			const char *pclocale;
361 
362 			pclocale = setlocale(LC_COLLATE, "POSIX");
363 			if (pclocale && !strcmp(pclocale, tmpl))
364 				byte_sort = true;
365 		}
366 		setlocale(LC_COLLATE, tmpl);
367 		sort_free(tmpl);
368 	}
369 }
370 
371 /*
372  * Set directory temporary files.
373  */
374 static void
375 set_tmpdir(void)
376 {
377 	char *td;
378 
379 	td = getenv("TMPDIR");
380 	if (td != NULL)
381 		tmpdir = sort_strdup(td);
382 }
383 
384 /*
385  * Parse -S option.
386  */
387 static unsigned long long
388 parse_memory_buffer_value(const char *value)
389 {
390 
391 	if (value == NULL)
392 		return (available_free_memory);
393 	else {
394 		char *endptr;
395 		unsigned long long membuf;
396 
397 		endptr = NULL;
398 		errno = 0;
399 		membuf = strtoll(value, &endptr, 10);
400 
401 		if (errno != 0) {
402 			warn("%s",getstr(4));
403 			membuf = available_free_memory;
404 		} else {
405 			switch (*endptr){
406 			case 'Y':
407 				membuf *= 1024;
408 				/* FALLTHROUGH */
409 			case 'Z':
410 				membuf *= 1024;
411 				/* FALLTHROUGH */
412 			case 'E':
413 				membuf *= 1024;
414 				/* FALLTHROUGH */
415 			case 'P':
416 				membuf *= 1024;
417 				/* FALLTHROUGH */
418 			case 'T':
419 				membuf *= 1024;
420 				/* FALLTHROUGH */
421 			case 'G':
422 				membuf *= 1024;
423 				/* FALLTHROUGH */
424 			case 'M':
425 				membuf *= 1024;
426 				/* FALLTHROUGH */
427 			case '\0':
428 			case 'K':
429 				membuf *= 1024;
430 				/* FALLTHROUGH */
431 			case 'b':
432 				break;
433 			case '%':
434 				membuf = (available_free_memory * membuf) /
435 				    100;
436 				break;
437 			default:
438 				fprintf(stderr, "%s: %s\n", strerror(EINVAL),
439 				   optarg);
440 				membuf = available_free_memory;
441 			}
442 		}
443 		return (membuf);
444 	}
445 }
446 
447 /*
448  * Signal handler that clears the temporary files.
449  */
450 static void
451 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
452     void *context __unused)
453 {
454 
455 	clear_tmp_files();
456 	exit(-1);
457 }
458 
459 /*
460  * Set signal handler on panic signals.
461  */
462 static void
463 set_signal_handler(void)
464 {
465 	struct sigaction sa;
466 
467 	memset(&sa, 0, sizeof(sa));
468 	sa.sa_sigaction = &sig_handler;
469 	sa.sa_flags = SA_SIGINFO;
470 
471 	if (sigaction(SIGTERM, &sa, NULL) < 0) {
472 		perror("sigaction");
473 		return;
474 	}
475 	if (sigaction(SIGHUP, &sa, NULL) < 0) {
476 		perror("sigaction");
477 		return;
478 	}
479 	if (sigaction(SIGINT, &sa, NULL) < 0) {
480 		perror("sigaction");
481 		return;
482 	}
483 	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
484 		perror("sigaction");
485 		return;
486 	}
487 	if (sigaction(SIGABRT, &sa, NULL) < 0) {
488 		perror("sigaction");
489 		return;
490 	}
491 	if (sigaction(SIGBUS, &sa, NULL) < 0) {
492 		perror("sigaction");
493 		return;
494 	}
495 	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
496 		perror("sigaction");
497 		return;
498 	}
499 	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
500 		perror("sigaction");
501 		return;
502 	}
503 	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
504 		perror("sigaction");
505 		return;
506 	}
507 }
508 
509 /*
510  * Print "unknown" message and exit with status 2.
511  */
512 static void
513 unknown(const char *what)
514 {
515 
516 	errx(2, "%s: %s", getstr(3), what);
517 }
518 
519 /*
520  * Check whether contradictory input options are used.
521  */
522 static void
523 check_mutually_exclusive_flags(char c, bool *mef_flags)
524 {
525 	int fo_index, mec;
526 	bool found_others, found_this;
527 
528 	found_others = found_this =false;
529 	fo_index = 0;
530 
531 	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
532 		mec = mutually_exclusive_flags[i];
533 
534 		if (mec != c) {
535 			if (mef_flags[i]) {
536 				if (found_this)
537 					errx(1, "%c:%c: %s", c, mec, getstr(1));
538 				found_others = true;
539 				fo_index = i;
540 			}
541 		} else {
542 			if (found_others)
543 				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
544 			mef_flags[i] = true;
545 			found_this = true;
546 		}
547 	}
548 }
549 
550 /*
551  * Initialise sort opts data.
552  */
553 static void
554 set_sort_opts(void)
555 {
556 
557 	memset(&default_sort_mods_object, 0,
558 	    sizeof(default_sort_mods_object));
559 	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
560 	default_sort_mods_object.func =
561 	    get_sort_func(&default_sort_mods_object);
562 }
563 
564 /*
565  * Set a sort modifier on a sort modifiers object.
566  */
567 static bool
568 set_sort_modifier(struct sort_mods *sm, int c)
569 {
570 
571 	if (sm) {
572 		switch (c){
573 		case 'b':
574 			sm->bflag = true;
575 			break;
576 		case 'd':
577 			sm->dflag = true;
578 			break;
579 		case 'f':
580 			sm->fflag = true;
581 			break;
582 		case 'g':
583 			sm->gflag = true;
584 			need_hint = true;
585 			break;
586 		case 'i':
587 			sm->iflag = true;
588 			break;
589 		case 'R':
590 			sm->Rflag = true;
591 			need_random = true;
592 			break;
593 		case 'M':
594 			initialise_months();
595 			sm->Mflag = true;
596 			need_hint = true;
597 			break;
598 		case 'n':
599 			sm->nflag = true;
600 			need_hint = true;
601 			print_symbols_on_debug = true;
602 			break;
603 		case 'r':
604 			sm->rflag = true;
605 			break;
606 		case 'V':
607 			sm->Vflag = true;
608 			break;
609 		case 'h':
610 			sm->hflag = true;
611 			need_hint = true;
612 			print_symbols_on_debug = true;
613 			break;
614 		default:
615 			return false;
616 		}
617 		sort_opts_vals.complex_sort = true;
618 		sm->func = get_sort_func(sm);
619 	}
620 	return (true);
621 }
622 
623 /*
624  * Parse POS in -k option.
625  */
626 static int
627 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
628 {
629 	regmatch_t pmatch[4];
630 	regex_t re;
631 	char *c, *f;
632 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
633 	size_t len, nmatch;
634 	int ret;
635 
636 	ret = -1;
637 	nmatch = 4;
638 	c = f = NULL;
639 
640 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
641 		return (-1);
642 
643 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
644 		goto end;
645 
646 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
647 		goto end;
648 
649 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
650 		goto end;
651 
652 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
653 	f = sort_malloc((len + 1) * sizeof(char));
654 
655 	strncpy(f, s + pmatch[1].rm_so, len);
656 	f[len] = '\0';
657 
658 	if (second) {
659 		errno = 0;
660 		ks->f2 = (size_t) strtoul(f, NULL, 10);
661 		if (errno != 0)
662 			errx(2, "%s: -k", strerror(errno));
663 		if (ks->f2 == 0) {
664 			warn("%s",getstr(5));
665 			goto end;
666 		}
667 	} else {
668 		errno = 0;
669 		ks->f1 = (size_t) strtoul(f, NULL, 10);
670 		if (errno != 0)
671 			errx(2, "%s: -k", strerror(errno));
672 		if (ks->f1 == 0) {
673 			warn("%s",getstr(5));
674 			goto end;
675 		}
676 	}
677 
678 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
679 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
680 		c = sort_malloc((len + 1) * sizeof(char));
681 
682 		strncpy(c, s + pmatch[2].rm_so + 1, len);
683 		c[len] = '\0';
684 
685 		if (second) {
686 			errno = 0;
687 			ks->c2 = (size_t) strtoul(c, NULL, 10);
688 			if (errno != 0)
689 				errx(2, "%s: -k", strerror(errno));
690 		} else {
691 			errno = 0;
692 			ks->c1 = (size_t) strtoul(c, NULL, 10);
693 			if (errno != 0)
694 				errx(2, "%s: -k", strerror(errno));
695 			if (ks->c1 == 0) {
696 				warn("%s",getstr(6));
697 				goto end;
698 			}
699 		}
700 	} else {
701 		if (second)
702 			ks->c2 = 0;
703 		else
704 			ks->c1 = 1;
705 	}
706 
707 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
708 		regoff_t i = 0;
709 
710 		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
711 			check_mutually_exclusive_flags(s[i], mef_flags);
712 			if (s[i] == 'b') {
713 				if (second)
714 					ks->pos2b = true;
715 				else
716 					ks->pos1b = true;
717 			} else if (!set_sort_modifier(&(ks->sm), s[i]))
718 				goto end;
719 		}
720 	}
721 
722 	ret = 0;
723 
724 end:
725 
726 	if (c)
727 		sort_free(c);
728 	if (f)
729 		sort_free(f);
730 	regfree(&re);
731 
732 	return (ret);
733 }
734 
735 /*
736  * Parse -k option value.
737  */
738 static int
739 parse_k(const char *s, struct key_specs *ks)
740 {
741 	int ret = -1;
742 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
743 	    { false, false, false, false, false, false };
744 
745 	if (s && *s) {
746 		char *sptr;
747 
748 		sptr = strchr(s, ',');
749 		if (sptr) {
750 			size_t size1;
751 			char *pos1, *pos2;
752 
753 			size1 = sptr - s;
754 
755 			if (size1 < 1)
756 				return (-1);
757 			pos1 = sort_malloc((size1 + 1) * sizeof(char));
758 
759 			strncpy(pos1, s, size1);
760 			pos1[size1] = '\0';
761 
762 			ret = parse_pos(pos1, ks, mef_flags, false);
763 
764 			sort_free(pos1);
765 			if (ret < 0)
766 				return (ret);
767 
768 			pos2 = sort_strdup(sptr + 1);
769 			ret = parse_pos(pos2, ks, mef_flags, true);
770 			sort_free(pos2);
771 		} else
772 			ret = parse_pos(s, ks, mef_flags, false);
773 	}
774 
775 	return (ret);
776 }
777 
778 /*
779  * Parse POS in +POS -POS option.
780  */
781 static int
782 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
783 {
784 	regex_t re;
785 	regmatch_t pmatch[4];
786 	char *c, *f;
787 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
788 	int ret;
789 	size_t len, nmatch;
790 
791 	ret = -1;
792 	nmatch = 4;
793 	c = f = NULL;
794 	*nc = *nf = 0;
795 
796 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
797 		return (-1);
798 
799 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
800 		goto end;
801 
802 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
803 		goto end;
804 
805 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
806 		goto end;
807 
808 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
809 	f = sort_malloc((len + 1) * sizeof(char));
810 
811 	strncpy(f, s + pmatch[1].rm_so, len);
812 	f[len] = '\0';
813 
814 	errno = 0;
815 	*nf = (size_t) strtoul(f, NULL, 10);
816 	if (errno != 0)
817 		errx(2, "%s", getstr(11));
818 
819 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
820 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
821 		c = sort_malloc((len + 1) * sizeof(char));
822 
823 		strncpy(c, s + pmatch[2].rm_so + 1, len);
824 		c[len] = '\0';
825 
826 		errno = 0;
827 		*nc = (size_t) strtoul(c, NULL, 10);
828 		if (errno != 0)
829 			errx(2, "%s", getstr(11));
830 	}
831 
832 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
833 
834 		len = pmatch[3].rm_eo - pmatch[3].rm_so;
835 
836 		strncpy(sopts, s + pmatch[3].rm_so, len);
837 		sopts[len] = '\0';
838 	}
839 
840 	ret = 0;
841 
842 end:
843 	if (c)
844 		sort_free(c);
845 	if (f)
846 		sort_free(f);
847 	regfree(&re);
848 
849 	return (ret);
850 }
851 
852 /*
853  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
854  */
855 void
856 fix_obsolete_keys(int *argc, char **argv)
857 {
858 	char sopt[129];
859 
860 	for (int i = 1; i < *argc; i++) {
861 		char *arg1;
862 
863 		arg1 = argv[i];
864 
865 		if (strlen(arg1) > 1 && arg1[0] == '+') {
866 			int c1, f1;
867 			char sopts1[128];
868 
869 			sopts1[0] = 0;
870 			c1 = f1 = 0;
871 
872 			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
873 				continue;
874 			else {
875 				f1 += 1;
876 				c1 += 1;
877 				if (i + 1 < *argc) {
878 					char *arg2 = argv[i + 1];
879 
880 					if (strlen(arg2) > 1 &&
881 					    arg2[0] == '-') {
882 						int c2, f2;
883 						char sopts2[128];
884 
885 						sopts2[0] = 0;
886 						c2 = f2 = 0;
887 
888 						if (parse_pos_obs(arg2 + 1,
889 						    &f2, &c2, sopts2) >= 0) {
890 							if (c2 > 0)
891 								f2 += 1;
892 							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
893 							    f1, c1, sopts1, f2, c2, sopts2);
894 							argv[i] = sort_strdup(sopt);
895 							for (int j = i + 1; j + 1 < *argc; j++)
896 								argv[j] = argv[j + 1];
897 							*argc -= 1;
898 							continue;
899 						}
900 					}
901 				}
902 				sprintf(sopt, "-k%d.%d", f1, c1);
903 				argv[i] = sort_strdup(sopt);
904 			}
905 		}
906 	}
907 }
908 
909 /*
910  * Set random seed
911  */
912 static void
913 set_random_seed(void)
914 {
915 	if (need_random) {
916 
917 		if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
918 			FILE* fseed;
919 			MD5_CTX ctx;
920 			char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
921 			size_t sz = 0;
922 
923 			fseed = openfile(random_source, "r");
924 			while (!feof(fseed)) {
925 				int cr;
926 
927 				cr = fgetc(fseed);
928 				if (cr == EOF)
929 					break;
930 
931 				rsd[sz++] = (char) cr;
932 
933 				if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
934 					break;
935 			}
936 
937 			closefile(fseed, random_source);
938 
939 			MD5Init(&ctx);
940 			MD5Update(&ctx, rsd, sz);
941 
942 			random_seed = MD5End(&ctx, NULL);
943 			random_seed_size = strlen(random_seed);
944 
945 		} else {
946 			MD5_CTX ctx;
947 			char *b;
948 
949 			MD5Init(&ctx);
950 			b = MD5File(random_source, NULL);
951 			if (b == NULL)
952 				err(2, NULL);
953 
954 			random_seed = b;
955 			random_seed_size = strlen(b);
956 		}
957 
958 		MD5Init(&md5_ctx);
959 		if(random_seed_size>0) {
960 			MD5Update(&md5_ctx, random_seed, random_seed_size);
961 		}
962 	}
963 }
964 
965 /*
966  * Main function.
967  */
968 int
969 main(int argc, char **argv)
970 {
971 	char *outfile, *real_outfile;
972 	int c, result;
973 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
974 	    { false, false, false, false, false, false };
975 
976 	result = 0;
977 	outfile = sort_strdup("-");
978 	real_outfile = NULL;
979 
980 	struct sort_mods *sm = &default_sort_mods_object;
981 
982 	init_tmp_files();
983 
984 	set_signal_handler();
985 
986 	set_hw_params();
987 	set_locale();
988 	set_tmpdir();
989 	set_sort_opts();
990 
991 #if 0
992 	{
993 		static int counter = 0;
994 		char fn[128];
995 		sprintf(fn, "/var/tmp/debug.sort.%d", counter++);
996 		FILE* f = fopen(fn, "w");
997 		fprintf(f, ">>sort>>");
998 		for (int i = 0; i < argc; i++) {
999 			fprintf(f, "<%s>", argv[i]);
1000 		}
1001 		fprintf(f, "<<sort<<\n");
1002 		fclose(f);
1003 	}
1004 #endif
1005 
1006 	fix_obsolete_keys(&argc, argv);
1007 
1008 	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1009 	    != -1)) {
1010 
1011 		check_mutually_exclusive_flags(c, mef_flags);
1012 
1013 		if (!set_sort_modifier(sm, c)) {
1014 
1015 			switch (c) {
1016 			case 'c':
1017 				sort_opts_vals.cflag = true;
1018 				if (optarg) {
1019 					if (!strcmp(optarg, "diagnose-first"))
1020 						;
1021 					else if (!strcmp(optarg, "silent") ||
1022 					    !strcmp(optarg, "quiet"))
1023 						sort_opts_vals.csilentflag = true;
1024 					else if (*optarg)
1025 						unknown(optarg);
1026 				}
1027 				break;
1028 			case 'C':
1029 				sort_opts_vals.cflag = true;
1030 				sort_opts_vals.csilentflag = true;
1031 				break;
1032 			case 'k':
1033 			{
1034 				sort_opts_vals.complex_sort = true;
1035 				sort_opts_vals.kflag = true;
1036 
1037 				keys_num++;
1038 				keys = sort_realloc(keys, keys_num *
1039 				    sizeof(struct key_specs));
1040 				memset(&(keys[keys_num - 1]), 0,
1041 				    sizeof(struct key_specs));
1042 
1043 				if (parse_k(optarg, &(keys[keys_num - 1]))
1044 				    < 0) {
1045 					errx(2, "%s: -k %s\n",
1046 					    strerror(EINVAL), optarg);
1047 				}
1048 
1049 				break;
1050 			}
1051 			case 'm':
1052 				sort_opts_vals.mflag = true;
1053 				break;
1054 			case 'o':
1055 				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1056 				strcpy(outfile, optarg);
1057 				break;
1058 			case 's':
1059 				sort_opts_vals.sflag = true;
1060 				break;
1061 			case 'S':
1062 				available_free_memory =
1063 				    parse_memory_buffer_value(optarg);
1064 				break;
1065 			case 'T':
1066 				tmpdir = sort_strdup(optarg);
1067 				break;
1068 			case 't':
1069 				while (strlen(optarg) > 1) {
1070 					if (optarg[0] != '\\') {
1071 						errx(2, "%s: %s\n",
1072 						    strerror(EINVAL), optarg);
1073 					}
1074 					optarg += 1;
1075 					if (*optarg == '0') {
1076 						*optarg = 0;
1077 						break;
1078 					}
1079 				}
1080 				sort_opts_vals.tflag = true;
1081 				sort_opts_vals.field_sep = btowc(optarg[0]);
1082 				if (sort_opts_vals.field_sep == WEOF) {
1083 					errno = EINVAL;
1084 					err(2, NULL);
1085 				}
1086 				if (!gnusort_numeric_compatibility) {
1087 					if (symbol_decimal_point == sort_opts_vals.field_sep)
1088 						symbol_decimal_point = WEOF;
1089 					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1090 						symbol_thousands_sep = WEOF;
1091 					if (symbol_negative_sign == sort_opts_vals.field_sep)
1092 						symbol_negative_sign = WEOF;
1093 					if (symbol_positive_sign == sort_opts_vals.field_sep)
1094 						symbol_positive_sign = WEOF;
1095 				}
1096 				break;
1097 			case 'u':
1098 				sort_opts_vals.uflag = true;
1099 				/* stable sort for the correct unique val */
1100 				sort_opts_vals.sflag = true;
1101 				break;
1102 			case 'z':
1103 				sort_opts_vals.zflag = true;
1104 				break;
1105 			case SORT_OPT:
1106 				if (optarg) {
1107 					if (!strcmp(optarg, "general-numeric"))
1108 						set_sort_modifier(sm, 'g');
1109 					else if (!strcmp(optarg, "human-numeric"))
1110 						set_sort_modifier(sm, 'h');
1111 					else if (!strcmp(optarg, "numeric"))
1112 						set_sort_modifier(sm, 'n');
1113 					else if (!strcmp(optarg, "month"))
1114 						set_sort_modifier(sm, 'M');
1115 					else if (!strcmp(optarg, "random"))
1116 						set_sort_modifier(sm, 'R');
1117 					else
1118 						unknown(optarg);
1119 				}
1120 				break;
1121 #if defined(SORT_THREADS)
1122 			case NTHREADS_OPT:
1123 				nthreads = (size_t)(atoi(optarg));
1124 				if (nthreads < 1)
1125 					nthreads = 1;
1126 				if (nthreads > 1024)
1127 					nthreads = 1024;
1128 				break;
1129 #endif
1130 			case QSORT_OPT:
1131 				sort_opts_vals.sort_method = SORT_QSORT;
1132 				break;
1133 			case MERGESORT_OPT:
1134 				sort_opts_vals.sort_method = SORT_MERGESORT;
1135 				break;
1136 			case MMAP_OPT:
1137 				use_mmap = true;
1138 				break;
1139 			case HEAPSORT_OPT:
1140 				sort_opts_vals.sort_method = SORT_HEAPSORT;
1141 				break;
1142 			case RADIXSORT_OPT:
1143 				sort_opts_vals.sort_method = SORT_RADIXSORT;
1144 				break;
1145 			case RANDOMSOURCE_OPT:
1146 				random_source = strdup(optarg);
1147 				break;
1148 			case COMPRESSPROGRAM_OPT:
1149 				compress_program = strdup(optarg);
1150 				break;
1151 			case FF_OPT:
1152 				read_fns_from_file0(optarg);
1153 				break;
1154 			case BS_OPT:
1155 			{
1156 				errno = 0;
1157 				long mof = strtol(optarg, NULL, 10);
1158 				if (errno != 0)
1159 					errx(2, "--batch-size: %s",
1160 					    strerror(errno));
1161 				if (mof >= 2)
1162 					max_open_files = (size_t) mof + 1;
1163 			}
1164 				break;
1165 			case VERSION_OPT:
1166 				printf("%s\n", VERSION);
1167 				exit(EXIT_SUCCESS);
1168 				/* NOTREACHED */
1169 				break;
1170 			case DEBUG_OPT:
1171 				debug_sort = true;
1172 				break;
1173 			case HELP_OPT:
1174 				usage(false);
1175 				/* NOTREACHED */
1176 				break;
1177 			default:
1178 				usage(true);
1179 				/* NOTREACHED */
1180 			}
1181 		}
1182 	}
1183 
1184 	argc -= optind;
1185 	argv += optind;
1186 
1187 #ifndef WITHOUT_NLS
1188 	catalog = catopen("sort", NL_CAT_LOCALE);
1189 #endif
1190 
1191 	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1192 		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1193 
1194 #ifndef WITHOUT_NLS
1195 	catclose(catalog);
1196 #endif
1197 
1198 	if (keys_num == 0) {
1199 		keys_num = 1;
1200 		keys = sort_realloc(keys, sizeof(struct key_specs));
1201 		memset(&(keys[0]), 0, sizeof(struct key_specs));
1202 		keys[0].c1 = 1;
1203 		keys[0].pos1b = default_sort_mods->bflag;
1204 		keys[0].pos2b = default_sort_mods->bflag;
1205 		memcpy(&(keys[0].sm), default_sort_mods,
1206 		    sizeof(struct sort_mods));
1207 	}
1208 
1209 	for (size_t i = 0; i < keys_num; i++) {
1210 		struct key_specs *ks;
1211 
1212 		ks = &(keys[i]);
1213 
1214 		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1215 		    !(ks->pos2b)) {
1216 			ks->pos1b = sm->bflag;
1217 			ks->pos2b = sm->bflag;
1218 			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1219 		}
1220 
1221 		ks->sm.func = get_sort_func(&(ks->sm));
1222 	}
1223 
1224 	if (argc_from_file0 >= 0) {
1225 		argc = argc_from_file0;
1226 		argv = argv_from_file0;
1227 	}
1228 
1229 	if (debug_sort) {
1230 #if defined(SORT_THREADS)
1231 		nthreads = 1;
1232 #endif
1233 		printf("Using collate rules of %s locale\n",
1234 		    setlocale(LC_COLLATE, NULL));
1235 		if (byte_sort)
1236 			printf("Byte sort is used\n");
1237 		if (print_symbols_on_debug) {
1238 			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1239 			if (symbol_thousands_sep)
1240 				printf("Thousands separator: <%lc>\n",
1241 				    symbol_thousands_sep);
1242 			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1243 			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1244 		}
1245 	}
1246 
1247 	set_random_seed();
1248 
1249 	/* Case when the outfile equals one of the input files: */
1250 	if (strcmp(outfile, "-")) {
1251 
1252 		for(int i = 0; i < argc; ++i) {
1253 			if (strcmp(argv[i], outfile) == 0) {
1254 				real_outfile = sort_strdup(outfile);
1255 				for(;;) {
1256 					char* tmp = sort_malloc(strlen(outfile) +
1257 					    strlen(".tmp") + 1);
1258 
1259 					strcpy(tmp, outfile);
1260 					strcpy(tmp + strlen(tmp), ".tmp");
1261 					sort_free(outfile);
1262 					outfile = tmp;
1263 					if (access(outfile, F_OK) < 0)
1264 						break;
1265 				}
1266 				tmp_file_atexit(outfile);
1267 			}
1268 		}
1269 	}
1270 
1271 #if defined(SORT_THREADS)
1272 	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1273 		nthreads = 1;
1274 #endif
1275 
1276 	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1277 		struct file_list fl;
1278 		struct sort_list list;
1279 
1280 		sort_list_init(&list);
1281 		file_list_init(&fl, true);
1282 
1283 		if (argc < 1)
1284 			procfile("-", &list, &fl);
1285 		else {
1286 			while (argc > 0) {
1287 				procfile(*argv, &list, &fl);
1288 				--argc;
1289 				++argv;
1290 			}
1291 		}
1292 
1293 		if (fl.count < 1)
1294 			sort_list_to_file(&list, outfile);
1295 		else {
1296 			if (list.count > 0) {
1297 				char *flast = new_tmp_file_name();
1298 
1299 				sort_list_to_file(&list, flast);
1300 				file_list_add(&fl, flast, false);
1301 			}
1302 			merge_files(&fl, outfile);
1303 		}
1304 
1305 		file_list_clean(&fl);
1306 
1307 		/*
1308 		 * We are about to exit the program, so we can ignore
1309 		 * the clean-up for speed
1310 		 *
1311 		 * sort_list_clean(&list);
1312 		 */
1313 
1314 	} else if (sort_opts_vals.cflag) {
1315 		result = (argc == 0) ? (check("-")) : (check(*argv));
1316 	} else if (sort_opts_vals.mflag) {
1317 		struct file_list fl;
1318 
1319 		file_list_init(&fl, false);
1320 		file_list_populate(&fl, argc, argv, true);
1321 		merge_files(&fl, outfile);
1322 		file_list_clean(&fl);
1323 	}
1324 
1325 	if (real_outfile) {
1326 		unlink(real_outfile);
1327 		if (rename(outfile, real_outfile) < 0)
1328 			err(2, NULL);
1329 		sort_free(real_outfile);
1330 	}
1331 
1332 	sort_free(outfile);
1333 
1334 	return (result);
1335 }
1336