xref: /freebsd/usr.bin/sort/coll.c (revision 258a0d760aa8b42899a000e30f610f900a402556)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/types.h>
34 
35 #include <errno.h>
36 #include <err.h>
37 #include <langinfo.h>
38 #include <limits.h>
39 #include <math.h>
40 #include <md5.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <wchar.h>
44 #include <wctype.h>
45 
46 #include "coll.h"
47 #include "vsort.h"
48 
49 struct key_specs *keys;
50 size_t keys_num = 0;
51 
52 wint_t symbol_decimal_point = L'.';
53 /* there is no default thousands separator in collate rules: */
54 wint_t symbol_thousands_sep = 0;
55 wint_t symbol_negative_sign = L'-';
56 wint_t symbol_positive_sign = L'+';
57 
58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
60 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
61 static int numcoll(struct key_value*, struct key_value *, size_t offset);
62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
63 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
64 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
65 
66 /*
67  * Allocate keys array
68  */
69 struct keys_array *
70 keys_array_alloc(void)
71 {
72 	struct keys_array *ka;
73 	size_t sz;
74 
75 	sz = keys_array_size();
76 	ka = sort_calloc(1, sz);
77 
78 	return (ka);
79 }
80 
81 /*
82  * Calculate whether we need key hint space
83  */
84 static size_t
85 key_hint_size(void)
86 {
87 
88 	return (need_hint ? sizeof(struct key_hint) : 0);
89 }
90 
91 /*
92  * Calculate keys array size
93  */
94 size_t
95 keys_array_size(void)
96 {
97 
98 	return (keys_num * (sizeof(struct key_value) + key_hint_size()));
99 }
100 
101 /*
102  * Clean data of keys array
103  */
104 void
105 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
106 {
107 
108 	if (ka) {
109 		for (size_t i = 0; i < keys_num; ++i) {
110 			const struct key_value *kv;
111 
112 			kv = get_key_from_keys_array(ka, i);
113 			if (kv->k && kv->k != s)
114 				bwsfree(kv->k);
115 		}
116 		memset(ka, 0, keys_array_size());
117 	}
118 }
119 
120 /*
121  * Get pointer to a key value in the keys set
122  */
123 struct key_value *
124 get_key_from_keys_array(struct keys_array *ka, size_t ind)
125 {
126 
127 	return ((struct key_value *)((caddr_t)ka->key +
128 	    ind * (sizeof(struct key_value) + key_hint_size())));
129 }
130 
131 /*
132  * Set value of a key in the keys set
133  */
134 void
135 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
136 {
137 
138 	if (ka && keys_num > ind) {
139 		struct key_value *kv;
140 
141 		kv = get_key_from_keys_array(ka, ind);
142 
143 		if (kv->k && kv->k != s)
144 			bwsfree(kv->k);
145 		kv->k = s;
146 	}
147 }
148 
149 /*
150  * Initialize a sort list item
151  */
152 struct sort_list_item *
153 sort_list_item_alloc(void)
154 {
155 	struct sort_list_item *si;
156 	size_t sz;
157 
158 	sz = sizeof(struct sort_list_item) + keys_array_size();
159 	si = sort_calloc(1, sz);
160 
161 	return (si);
162 }
163 
164 size_t
165 sort_list_item_size(struct sort_list_item *si)
166 {
167 	size_t ret = 0;
168 
169 	if (si) {
170 		ret = sizeof(struct sort_list_item) + keys_array_size();
171 		if (si->str)
172 			ret += bws_memsize(si->str);
173 		for (size_t i = 0; i < keys_num; ++i) {
174 			const struct key_value *kv;
175 
176 			kv = get_key_from_keys_array(&si->ka, i);
177 
178 			if (kv->k != si->str)
179 				ret += bws_memsize(kv->k);
180 		}
181 	}
182 	return (ret);
183 }
184 
185 /*
186  * Calculate key for a sort list item
187  */
188 static void
189 sort_list_item_make_key(struct sort_list_item *si)
190 {
191 
192 	preproc(si->str, &(si->ka));
193 }
194 
195 /*
196  * Set value of a sort list item.
197  * Return combined string and keys memory size.
198  */
199 void
200 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
201 {
202 
203 	if (si) {
204 		clean_keys_array(si->str, &(si->ka));
205 		if (si->str) {
206 			if (si->str == str) {
207 				/* we are trying to reset the same string */
208 				return;
209 			} else {
210 				bwsfree(si->str);
211 				si->str = NULL;
212 			}
213 		}
214 		si->str = str;
215 		sort_list_item_make_key(si);
216 	}
217 }
218 
219 /*
220  * De-allocate a sort list item object memory
221  */
222 void
223 sort_list_item_clean(struct sort_list_item *si)
224 {
225 
226 	if (si) {
227 		clean_keys_array(si->str, &(si->ka));
228 		if (si->str) {
229 			bwsfree(si->str);
230 			si->str = NULL;
231 		}
232 	}
233 }
234 
235 /*
236  * Skip columns according to specs
237  */
238 static size_t
239 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
240     bool skip_blanks, bool *empty_key)
241 {
242 	if (cols < 1)
243 		return (BWSLEN(s) + 1);
244 
245 	if (skip_blanks)
246 		while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
247 			++start;
248 
249 	while (start < BWSLEN(s) && cols > 1) {
250 		--cols;
251 		++start;
252 	}
253 
254 	if (start >= BWSLEN(s))
255 		*empty_key = true;
256 
257 	return (start);
258 }
259 
260 /*
261  * Skip fields according to specs
262  */
263 static size_t
264 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
265 {
266 
267 	if (fields < 2) {
268 		if (BWSLEN(s) == 0)
269 			*empty_field = true;
270 		return (0);
271 	} else if (!(sort_opts_vals.tflag)) {
272 		size_t cpos = 0;
273 		bool pb = true;
274 
275 		while (cpos < BWSLEN(s)) {
276 			bool isblank;
277 
278 			isblank = iswblank(BWS_GET(s, cpos));
279 
280 			if (isblank && !pb) {
281 				--fields;
282 				if (fields <= 1)
283 					return (cpos);
284 			}
285 			pb = isblank;
286 			++cpos;
287 		}
288 		if (fields > 1)
289 			*empty_field = true;
290 		return (cpos);
291 	} else {
292 		size_t cpos = 0;
293 
294 		while (cpos < BWSLEN(s)) {
295 			if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
296 				--fields;
297 				if (fields <= 1)
298 					return (cpos + 1);
299 			}
300 			++cpos;
301 		}
302 		if (fields > 1)
303 			*empty_field = true;
304 		return (cpos);
305 	}
306 }
307 
308 /*
309  * Find fields start
310  */
311 static void
312 find_field_start(const struct bwstring *s, struct key_specs *ks,
313     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
314 {
315 
316 	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
317 	if (!*empty_field)
318 		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
319 		    ks->pos1b, empty_key);
320 	else
321 		*empty_key = true;
322 }
323 
324 /*
325  * Find end key position
326  */
327 static size_t
328 find_field_end(const struct bwstring *s, struct key_specs *ks)
329 {
330 	size_t f2, next_field_start, pos_end;
331 	bool empty_field, empty_key;
332 
333 	empty_field = false;
334 	empty_key = false;
335 	f2 = ks->f2;
336 
337 	if (f2 == 0)
338 		return (BWSLEN(s) + 1);
339 	else {
340 		if (ks->c2 == 0) {
341 			next_field_start = skip_fields_to_start(s, f2 + 1,
342 			    &empty_field);
343 			if ((next_field_start > 0) && sort_opts_vals.tflag &&
344 			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
345 			    next_field_start - 1)))
346 				--next_field_start;
347 		} else
348 			next_field_start = skip_fields_to_start(s, f2,
349 			    &empty_field);
350 	}
351 
352 	if (empty_field || (next_field_start >= BWSLEN(s)))
353 		return (BWSLEN(s) + 1);
354 
355 	if (ks->c2) {
356 		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
357 		    ks->pos2b, &empty_key);
358 		if (pos_end < BWSLEN(s))
359 			++pos_end;
360 	} else
361 		pos_end = next_field_start;
362 
363 	return (pos_end);
364 }
365 
366 /*
367  * Cut a field according to the key specs
368  */
369 static struct bwstring *
370 cut_field(const struct bwstring *s, struct key_specs *ks)
371 {
372 	struct bwstring *ret = NULL;
373 
374 	if (s && ks) {
375 		size_t field_start, key_end, key_start, sz;
376 		bool empty_field, empty_key;
377 
378 		field_start = 0;
379 		key_start = 0;
380 		empty_field = false;
381 		empty_key = false;
382 
383 		find_field_start(s, ks, &field_start, &key_start,
384 		    &empty_field, &empty_key);
385 
386 		if (empty_key)
387 			sz = 0;
388 		else {
389 			key_end = find_field_end(s, ks);
390 			sz = (key_end < key_start) ? 0 : (key_end - key_start);
391 		}
392 
393 		ret = bwsalloc(sz);
394 		if (sz)
395 			bwsnocpy(ret, s, key_start, sz);
396 	} else
397 		ret = bwsalloc(0);
398 
399 	return (ret);
400 }
401 
402 /*
403  * Preprocesses a line applying the necessary transformations
404  * specified by command line options and returns the preprocessed
405  * string, which can be used to compare.
406  */
407 int
408 preproc(struct bwstring *s, struct keys_array *ka)
409 {
410 
411 	if (sort_opts_vals.kflag)
412 		for (size_t i = 0; i < keys_num; i++) {
413 			struct bwstring *key;
414 			struct key_specs *kspecs;
415 			struct sort_mods *sm;
416 
417 			kspecs = &(keys[i]);
418 			key = cut_field(s, kspecs);
419 
420 			sm = &(kspecs->sm);
421 			if (sm->dflag)
422 				key = dictionary_order(key);
423 			else if (sm->iflag)
424 				key = ignore_nonprinting(key);
425 			if (sm->fflag || sm->Mflag)
426 				key = ignore_case(key);
427 
428 			set_key_on_keys_array(ka, key, i);
429 		}
430 	else {
431 		struct bwstring *ret = NULL;
432 		struct sort_mods *sm = default_sort_mods;
433 
434 		if (sm->bflag) {
435 			if (ret == NULL)
436 				ret = bwsdup(s);
437 			ret = ignore_leading_blanks(ret);
438 		}
439 		if (sm->dflag) {
440 			if (ret == NULL)
441 				ret = bwsdup(s);
442 			ret = dictionary_order(ret);
443 		} else if (sm->iflag) {
444 			if (ret == NULL)
445 				ret = bwsdup(s);
446 			ret = ignore_nonprinting(ret);
447 		}
448 		if (sm->fflag || sm->Mflag) {
449 			if (ret == NULL)
450 				ret = bwsdup(s);
451 			ret = ignore_case(ret);
452 		}
453 		if (ret == NULL)
454 			set_key_on_keys_array(ka, s, 0);
455 		else
456 			set_key_on_keys_array(ka, ret, 0);
457 	}
458 
459 	return 0;
460 }
461 
462 cmpcoll_t
463 get_sort_func(struct sort_mods *sm)
464 {
465 
466 	if (sm->nflag)
467 		return (numcoll);
468 	else if (sm->hflag)
469 		return (hnumcoll);
470 	else if (sm->gflag)
471 		return (gnumcoll);
472 	else if (sm->Mflag)
473 		return (monthcoll);
474 	else if (sm->Rflag)
475 		return (randomcoll);
476 	else if (sm->Vflag)
477 		return (versioncoll);
478 	else
479 		return (wstrcoll);
480 }
481 
482 /*
483  * Compares the given strings.  Returns a positive number if
484  * the first precedes the second, a negative number if the second is
485  * the preceding one, and zero if they are equal.  This function calls
486  * the underlying collate functions, which done the actual comparison.
487  */
488 int
489 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
490 {
491 	struct key_value *kv1, *kv2;
492 	struct sort_mods *sm;
493 	int res = 0;
494 
495 	for (size_t i = 0; i < keys_num; ++i) {
496 		kv1 = get_key_from_keys_array(ps1, i);
497 		kv2 = get_key_from_keys_array(ps2, i);
498 		sm = &(keys[i].sm);
499 
500 		if (sm->rflag)
501 			res = sm->func(kv2, kv1, offset);
502 		else
503 			res = sm->func(kv1, kv2, offset);
504 
505 		if (res)
506 			break;
507 
508 		/* offset applies to only the first key */
509 		offset = 0;
510 	}
511 	return (res);
512 }
513 
514 /*
515  * Compare two strings.
516  * Plain symbol-by-symbol comparison.
517  */
518 int
519 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
520 {
521 
522 	if (default_sort_mods->rflag) {
523 		const struct bwstring *tmp;
524 
525 		tmp = s1;
526 		s1 = s2;
527 		s2 = tmp;
528 	}
529 
530 	return (bwscoll(s1, s2, 0));
531 }
532 
533 /*
534  * Compare a string and a sort list item, according to the sort specs.
535  */
536 int
537 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
538 {
539 	struct keys_array *ka1;
540 	int ret = 0;
541 
542 	ka1 = keys_array_alloc();
543 
544 	preproc(str1, ka1);
545 
546 	sort_list_item_make_key(*ss2);
547 
548 	if (debug_sort) {
549 		bwsprintf(stdout, str1, "; s1=<", ">");
550 		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
551 	}
552 
553 	ret = key_coll(ka1, &((*ss2)->ka), 0);
554 
555 	if (debug_sort)
556 		printf("; cmp1=%d", ret);
557 
558 	clean_keys_array(str1, ka1);
559 	sort_free(ka1);
560 
561 	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
562 		ret = top_level_str_coll(str1, ((*ss2)->str));
563 		if (debug_sort)
564 			printf("; cmp2=%d", ret);
565 	}
566 
567 	if (debug_sort)
568 		printf("\n");
569 
570 	return (ret);
571 }
572 
573 /*
574  * Compare two sort list items, according to the sort specs.
575  */
576 int
577 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
578     size_t offset)
579 {
580 	int ret;
581 
582 	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
583 
584 	if (debug_sort) {
585 		if (offset)
586 			printf("; offset=%d", (int) offset);
587 		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
588 		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
589 		printf("; cmp1=%d\n", ret);
590 	}
591 
592 	if (ret)
593 		return (ret);
594 
595 	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
596 		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
597 		if (debug_sort)
598 			printf("; cmp2=%d\n", ret);
599 	}
600 
601 	return (ret);
602 }
603 
604 /*
605  * Compare two sort list items, according to the sort specs.
606  */
607 int
608 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
609 {
610 
611 	return (list_coll_offset(ss1, ss2, 0));
612 }
613 
614 #define	LSCDEF(N)							\
615 static int 								\
616 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)	\
617 {									\
618 									\
619 	return (list_coll_offset(ss1, ss2, N));				\
620 }
621 
622 LSCDEF(1)
623 LSCDEF(2)
624 LSCDEF(3)
625 LSCDEF(4)
626 LSCDEF(5)
627 LSCDEF(6)
628 LSCDEF(7)
629 LSCDEF(8)
630 LSCDEF(9)
631 LSCDEF(10)
632 LSCDEF(11)
633 LSCDEF(12)
634 LSCDEF(13)
635 LSCDEF(14)
636 LSCDEF(15)
637 LSCDEF(16)
638 LSCDEF(17)
639 LSCDEF(18)
640 LSCDEF(19)
641 LSCDEF(20)
642 
643 listcoll_t
644 get_list_call_func(size_t offset)
645 {
646 	static const listcoll_t lsarray[] = { list_coll, list_coll_1,
647 	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
648 	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
649 	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
650 	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
651 	    list_coll_18, list_coll_19, list_coll_20 };
652 
653 	if (offset <= 20)
654 		return (lsarray[offset]);
655 
656 	return (list_coll);
657 }
658 
659 /*
660  * Compare two sort list items, only by their original string.
661  */
662 int
663 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
664 {
665 
666 	return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
667 }
668 
669 /*
670  * Maximum size of a number in the string (before or after decimal point)
671  */
672 #define MAX_NUM_SIZE (128)
673 
674 /*
675  * Set suffix value
676  */
677 static void setsuffix(wchar_t c, unsigned char *si)
678 {
679 	switch (c){
680 	case L'k':
681 	case L'K':
682 		*si = 1;
683 		break;
684 	case L'M':
685 		*si = 2;
686 		break;
687 	case L'G':
688 		*si = 3;
689 		break;
690 	case L'T':
691 		*si = 4;
692 		break;
693 	case L'P':
694 		*si = 5;
695 		break;
696 	case L'E':
697 		*si = 6;
698 		break;
699 	case L'Z':
700 		*si = 7;
701 		break;
702 	case L'Y':
703 		*si = 8;
704 		break;
705 	default:
706 		*si = 0;
707 	}
708 }
709 
710 /*
711  * Read string s and parse the string into a fixed-decimal-point number.
712  * sign equals -1 if the number is negative (explicit plus is not allowed,
713  * according to GNU sort's "info sort".
714  * The number part before decimal point is in the smain, after the decimal
715  * point is in sfrac, tail is the pointer to the remainder of the string.
716  */
717 static int
718 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
719 {
720 	bwstring_iterator s;
721 
722 	s = bws_begin(s0);
723 
724 	/* always end the fraction with zero, even if we have no fraction */
725 	sfrac[0] = 0;
726 
727 	while (iswblank(bws_get_iter_value(s)))
728 		s = bws_iterator_inc(s, 1);
729 
730 	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
731 		*sign = -1;
732 		s = bws_iterator_inc(s, 1);
733 	}
734 
735 	// This is '0', not '\0', do not change this
736 	while (iswdigit(bws_get_iter_value(s)) &&
737 	    (bws_get_iter_value(s) == L'0'))
738 		s = bws_iterator_inc(s, 1);
739 
740 	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
741 		if (iswdigit(bws_get_iter_value(s))) {
742 			smain[*main_len] = bws_get_iter_value(s);
743 			s = bws_iterator_inc(s, 1);
744 			*main_len += 1;
745 		} else if (symbol_thousands_sep &&
746 		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
747 			s = bws_iterator_inc(s, 1);
748 		else
749 			break;
750 	}
751 
752 	smain[*main_len] = 0;
753 
754 	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
755 		s = bws_iterator_inc(s, 1);
756 		while (iswdigit(bws_get_iter_value(s)) &&
757 		    *frac_len < MAX_NUM_SIZE) {
758 			sfrac[*frac_len] = bws_get_iter_value(s);
759 			s = bws_iterator_inc(s, 1);
760 			*frac_len += 1;
761 		}
762 		sfrac[*frac_len] = 0;
763 
764 		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
765 			--(*frac_len);
766 			sfrac[*frac_len] = L'\0';
767 		}
768 	}
769 
770 	setsuffix(bws_get_iter_value(s),si);
771 
772 	if ((*main_len + *frac_len) == 0)
773 		*sign = 0;
774 
775 	return (0);
776 }
777 
778 /*
779  * Implements string sort.
780  */
781 static int
782 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
783 {
784 
785 	if (debug_sort) {
786 		if (offset)
787 			printf("; offset=%d\n", (int) offset);
788 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
789 		printf("(%zu)", BWSLEN(kv1->k));
790 		bwsprintf(stdout, kv2->k, ", k2=<", ">");
791 		printf("(%zu)", BWSLEN(kv2->k));
792 	}
793 
794 	return (bwscoll(kv1->k, kv2->k, offset));
795 }
796 
797 /*
798  * Compare two suffixes
799  */
800 static inline int
801 cmpsuffix(unsigned char si1, unsigned char si2)
802 {
803 
804 	return ((char)si1 - (char)si2);
805 }
806 
807 /*
808  * Implements numeric sort for -n and -h.
809  */
810 static int
811 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
812     size_t offset __unused, bool use_suffix)
813 {
814 	struct bwstring *s1, *s2;
815 	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
816 	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
817 	int cmp_res, sign1, sign2;
818 	size_t frac1, frac2, main1, main2;
819 	unsigned char SI1, SI2;
820 	bool e1, e2, key1_read, key2_read;
821 
822 	s1 = kv1->k;
823 	s2 = kv2->k;
824 	sign1 = sign2 = 0;
825 	main1 = main2 = 0;
826 	frac1 = frac2 = 0;
827 
828 	key1_read = key2_read = false;
829 
830 	if (debug_sort) {
831 		bwsprintf(stdout, s1, "; k1=<", ">");
832 		bwsprintf(stdout, s2, ", k2=<", ">");
833 	}
834 
835 	if (s1 == s2)
836 		return (0);
837 
838 	if (kv1->hint->status == HS_UNINITIALIZED) {
839 		/* read the number from the string */
840 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
841 		key1_read = true;
842 		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
843 		if(main1 < 1 && frac1 < 1)
844 			kv1->hint->v.nh.empty=true;
845 		kv1->hint->v.nh.si = SI1;
846 		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
847 		    HS_INITIALIZED : HS_ERROR;
848 		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
849 	}
850 
851 	if (kv2->hint->status == HS_UNINITIALIZED) {
852 		/* read the number from the string */
853 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
854 		key2_read = true;
855 		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
856 		if(main2 < 1 && frac2 < 1)
857 			kv2->hint->v.nh.empty=true;
858 		kv2->hint->v.nh.si = SI2;
859 		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
860 		    HS_INITIALIZED : HS_ERROR;
861 		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
862 	}
863 
864 	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
865 	    HS_INITIALIZED) {
866 		unsigned long long n1, n2;
867 		bool neg1, neg2;
868 
869 		e1 = kv1->hint->v.nh.empty;
870 		e2 = kv2->hint->v.nh.empty;
871 
872 		if (e1 && e2)
873 			return (0);
874 
875 		neg1 = kv1->hint->v.nh.neg;
876 		neg2 = kv2->hint->v.nh.neg;
877 
878 		if (neg1 && !neg2)
879 			return (-1);
880 		if (neg2 && !neg1)
881 			return (+1);
882 
883 		if (e1)
884 			return (neg2 ? +1 : -1);
885 		else if (e2)
886 			return (neg1 ? -1 : +1);
887 
888 
889 		if (use_suffix) {
890 			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
891 			if (cmp_res)
892 				return (neg1 ? -cmp_res : cmp_res);
893 		}
894 
895 		n1 = kv1->hint->v.nh.n1;
896 		n2 = kv2->hint->v.nh.n1;
897 		if (n1 < n2)
898 			return (neg1 ? +1 : -1);
899 		else if (n1 > n2)
900 			return (neg1 ? -1 : +1);
901 	}
902 
903 	/* read the numbers from the strings */
904 	if (!key1_read)
905 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
906 	if (!key2_read)
907 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
908 
909 	e1 = ((main1 + frac1) == 0);
910 	e2 = ((main2 + frac2) == 0);
911 
912 	if (e1 && e2)
913 		return (0);
914 
915 	/* we know the result if the signs are different */
916 	if (sign1 < 0 && sign2 >= 0)
917 		return (-1);
918 	if (sign1 >= 0 && sign2 < 0)
919 		return (+1);
920 
921 	if (e1)
922 		return ((sign2 < 0) ? +1 : -1);
923 	else if (e2)
924 		return ((sign1 < 0) ? -1 : +1);
925 
926 	if (use_suffix) {
927 		cmp_res = cmpsuffix(SI1, SI2);
928 		if (cmp_res)
929 			return ((sign1 < 0) ? -cmp_res : cmp_res);
930 	}
931 
932 	/* if both numbers are empty assume that the strings are equal */
933 	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
934 		return (0);
935 
936 	/*
937 	 * if the main part is of different size, we know the result
938 	 * (because the leading zeros are removed)
939 	 */
940 	if (main1 < main2)
941 		cmp_res = -1;
942 	else if (main1 > main2)
943 		cmp_res = +1;
944 	/* if the sizes are equal then simple non-collate string compare gives the correct result */
945 	else
946 		cmp_res = wcscmp(smain1, smain2);
947 
948 	/* check fraction */
949 	if (!cmp_res)
950 		cmp_res = wcscmp(sfrac1, sfrac2);
951 
952 	if (!cmp_res)
953 		return (0);
954 
955 	/* reverse result if the signs are negative */
956 	if (sign1 < 0 && sign2 < 0)
957 		cmp_res = -cmp_res;
958 
959 	return (cmp_res);
960 }
961 
962 /*
963  * Implements numeric sort (-n).
964  */
965 static int
966 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
967 {
968 
969 	return (numcoll_impl(kv1, kv2, offset, false));
970 }
971 
972 /*
973  * Implements 'human' numeric sort (-h).
974  */
975 static int
976 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
977 {
978 
979 	return (numcoll_impl(kv1, kv2, offset, true));
980 }
981 
982 /* Use hint space to memoize md5 computations, at least. */
983 static void
984 randomcoll_init_hint(struct key_value *kv, void *hash)
985 {
986 
987 	memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached));
988 	kv->hint->status = HS_INITIALIZED;
989 }
990 
991 /*
992  * Implements random sort (-R).
993  */
994 static int
995 randomcoll(struct key_value *kv1, struct key_value *kv2,
996     size_t offset __unused)
997 {
998 	struct bwstring *s1, *s2;
999 	MD5_CTX ctx1, ctx2;
1000 	unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH];
1001 	int cmp;
1002 
1003 	s1 = kv1->k;
1004 	s2 = kv2->k;
1005 
1006 	if (debug_sort) {
1007 		bwsprintf(stdout, s1, "; k1=<", ">");
1008 		bwsprintf(stdout, s2, ", k2=<", ">");
1009 	}
1010 
1011 	if (s1 == s2)
1012 		return (0);
1013 
1014 	if (kv1->hint->status == HS_INITIALIZED &&
1015 	    kv2->hint->status == HS_INITIALIZED) {
1016 		cmp = memcmp(kv1->hint->v.Rh.cached,
1017 		    kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached));
1018 		if (cmp != 0)
1019 			return (cmp);
1020 	}
1021 
1022 	memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
1023 	memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
1024 
1025 	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1026 	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1027 
1028 	MD5Final(hash1, &ctx1);
1029 	MD5Final(hash2, &ctx2);
1030 
1031 	if (kv1->hint->status == HS_UNINITIALIZED)
1032 		randomcoll_init_hint(kv1, hash1);
1033 	if (kv2->hint->status == HS_UNINITIALIZED)
1034 		randomcoll_init_hint(kv2, hash2);
1035 
1036 	return (memcmp(hash1, hash2, sizeof(hash1)));
1037 }
1038 
1039 /*
1040  * Implements version sort (-V).
1041  */
1042 static int
1043 versioncoll(struct key_value *kv1, struct key_value *kv2,
1044     size_t offset __unused)
1045 {
1046 	struct bwstring *s1, *s2;
1047 
1048 	s1 = kv1->k;
1049 	s2 = kv2->k;
1050 
1051 	if (debug_sort) {
1052 		bwsprintf(stdout, s1, "; k1=<", ">");
1053 		bwsprintf(stdout, s2, ", k2=<", ">");
1054 	}
1055 
1056 	if (s1 == s2)
1057 		return (0);
1058 
1059 	return (vcmp(s1, s2));
1060 }
1061 
1062 /*
1063  * Check for minus infinity
1064  */
1065 static inline bool
1066 huge_minus(double d, int err1)
1067 {
1068 
1069 	if (err1 == ERANGE)
1070 		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1071 			return (+1);
1072 
1073 	return (0);
1074 }
1075 
1076 /*
1077  * Check for plus infinity
1078  */
1079 static inline bool
1080 huge_plus(double d, int err1)
1081 {
1082 
1083 	if (err1 == ERANGE)
1084 		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1085 			return (+1);
1086 
1087 	return (0);
1088 }
1089 
1090 /*
1091  * Check whether a function is a NAN
1092  */
1093 static bool
1094 is_nan(double d)
1095 {
1096 
1097 	return ((d == NAN) || (isnan(d)));
1098 }
1099 
1100 /*
1101  * Compare two NANs
1102  */
1103 static int
1104 cmp_nans(double d1, double d2)
1105 {
1106 
1107 	if (d1 < d2)
1108 		return (-1);
1109 	if (d1 > d2)
1110 		return (+1);
1111 	return (0);
1112 }
1113 
1114 /*
1115  * Implements general numeric sort (-g).
1116  */
1117 static int
1118 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1119     size_t offset __unused)
1120 {
1121 	double d1, d2;
1122 	int err1, err2;
1123 	bool empty1, empty2, key1_read, key2_read;
1124 
1125 	d1 = d2 = 0;
1126 	err1 = err2 = 0;
1127 	key1_read = key2_read = false;
1128 
1129 	if (debug_sort) {
1130 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1131 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1132 	}
1133 
1134 	if (kv1->hint->status == HS_UNINITIALIZED) {
1135 		errno = 0;
1136 		d1 = bwstod(kv1->k, &empty1);
1137 		err1 = errno;
1138 
1139 		if (empty1)
1140 			kv1->hint->v.gh.notnum = true;
1141 		else if (err1 == 0) {
1142 			kv1->hint->v.gh.d = d1;
1143 			kv1->hint->v.gh.nan = is_nan(d1);
1144 			kv1->hint->status = HS_INITIALIZED;
1145 		} else
1146 			kv1->hint->status = HS_ERROR;
1147 
1148 		key1_read = true;
1149 	}
1150 
1151 	if (kv2->hint->status == HS_UNINITIALIZED) {
1152 		errno = 0;
1153 		d2 = bwstod(kv2->k, &empty2);
1154 		err2 = errno;
1155 
1156 		if (empty2)
1157 			kv2->hint->v.gh.notnum = true;
1158 		else if (err2 == 0) {
1159 			kv2->hint->v.gh.d = d2;
1160 			kv2->hint->v.gh.nan = is_nan(d2);
1161 			kv2->hint->status = HS_INITIALIZED;
1162 		} else
1163 			kv2->hint->status = HS_ERROR;
1164 
1165 		key2_read = true;
1166 	}
1167 
1168 	if (kv1->hint->status == HS_INITIALIZED &&
1169 	    kv2->hint->status == HS_INITIALIZED) {
1170 		if (kv1->hint->v.gh.notnum)
1171 			return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1172 		else if (kv2->hint->v.gh.notnum)
1173 			return (+1);
1174 
1175 		if (kv1->hint->v.gh.nan)
1176 			return ((kv2->hint->v.gh.nan) ?
1177 			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1178 			    -1);
1179 		else if (kv2->hint->v.gh.nan)
1180 			return (+1);
1181 
1182 		d1 = kv1->hint->v.gh.d;
1183 		d2 = kv2->hint->v.gh.d;
1184 
1185 		if (d1 < d2)
1186 			return (-1);
1187 		else if (d1 > d2)
1188 			return (+1);
1189 		else
1190 			return (0);
1191 	}
1192 
1193 	if (!key1_read) {
1194 		errno = 0;
1195 		d1 = bwstod(kv1->k, &empty1);
1196 		err1 = errno;
1197 	}
1198 
1199 	if (!key2_read) {
1200 		errno = 0;
1201 		d2 = bwstod(kv2->k, &empty2);
1202 		err2 = errno;
1203 	}
1204 
1205 	/* Non-value case: */
1206 	if (empty1)
1207 		return (empty2 ? 0 : -1);
1208 	else if (empty2)
1209 		return (+1);
1210 
1211 	/* NAN case */
1212 	if (is_nan(d1))
1213 		return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1214 	else if (is_nan(d2))
1215 		return (+1);
1216 
1217 	/* Infinities */
1218 	if (err1 == ERANGE || err2 == ERANGE) {
1219 		/* Minus infinity case */
1220 		if (huge_minus(d1, err1)) {
1221 			if (huge_minus(d2, err2)) {
1222 				if (d1 < d2)
1223 					return (-1);
1224 				if (d1 > d2)
1225 					return (+1);
1226 				return (0);
1227 			} else
1228 				return (-1);
1229 
1230 		} else if (huge_minus(d2, err2)) {
1231 			if (huge_minus(d1, err1)) {
1232 				if (d1 < d2)
1233 					return (-1);
1234 				if (d1 > d2)
1235 					return (+1);
1236 				return (0);
1237 			} else
1238 				return (+1);
1239 		}
1240 
1241 		/* Plus infinity case */
1242 		if (huge_plus(d1, err1)) {
1243 			if (huge_plus(d2, err2)) {
1244 				if (d1 < d2)
1245 					return (-1);
1246 				if (d1 > d2)
1247 					return (+1);
1248 				return (0);
1249 			} else
1250 				return (+1);
1251 		} else if (huge_plus(d2, err2)) {
1252 			if (huge_plus(d1, err1)) {
1253 				if (d1 < d2)
1254 					return (-1);
1255 				if (d1 > d2)
1256 					return (+1);
1257 				return (0);
1258 			} else
1259 				return (-1);
1260 		}
1261 	}
1262 
1263 	if (d1 < d2)
1264 		return (-1);
1265 	if (d1 > d2)
1266 		return (+1);
1267 
1268 	return (0);
1269 }
1270 
1271 /*
1272  * Implements month sort (-M).
1273  */
1274 static int
1275 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1276 {
1277 	int val1, val2;
1278 	bool key1_read, key2_read;
1279 
1280 	val1 = val2 = 0;
1281 	key1_read = key2_read = false;
1282 
1283 	if (debug_sort) {
1284 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1285 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1286 	}
1287 
1288 	if (kv1->hint->status == HS_UNINITIALIZED) {
1289 		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1290 		key1_read = true;
1291 		kv1->hint->status = HS_INITIALIZED;
1292 	}
1293 
1294 	if (kv2->hint->status == HS_UNINITIALIZED) {
1295 		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1296 		key2_read = true;
1297 		kv2->hint->status = HS_INITIALIZED;
1298 	}
1299 
1300 	if (kv1->hint->status == HS_INITIALIZED) {
1301 		val1 = kv1->hint->v.Mh.m;
1302 		key1_read = true;
1303 	}
1304 
1305 	if (kv2->hint->status == HS_INITIALIZED) {
1306 		val2 = kv2->hint->v.Mh.m;
1307 		key2_read = true;
1308 	}
1309 
1310 	if (!key1_read)
1311 		val1 = bws_month_score(kv1->k);
1312 	if (!key2_read)
1313 		val2 = bws_month_score(kv2->k);
1314 
1315 	if (val1 == val2) {
1316 		return (0);
1317 	}
1318 	if (val1 < val2)
1319 		return (-1);
1320 	return (+1);
1321 }
1322