xref: /freebsd/usr.bin/sort/coll.c (revision b1f92fa22938fe29ab7e53692ffe0ed7a0ecc4d0)
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/types.h>
32 
33 #include <errno.h>
34 #include <err.h>
35 #include <langinfo.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <md5.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42 #include <wctype.h>
43 
44 #include "coll.h"
45 #include "vsort.h"
46 
47 struct key_specs *keys;
48 size_t keys_num = 0;
49 
50 wint_t symbol_decimal_point = L'.';
51 /* there is no default thousands separator in collate rules: */
52 wint_t symbol_thousands_sep = 0;
53 wint_t symbol_negative_sign = L'-';
54 wint_t symbol_positive_sign = L'+';
55 
56 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
57 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
58 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
59 static int numcoll(struct key_value*, struct key_value *, size_t offset);
60 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
61 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
62 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
63 
64 /*
65  * Allocate keys array
66  */
67 struct keys_array *
68 keys_array_alloc(void)
69 {
70 	struct keys_array *ka;
71 	size_t sz;
72 
73 	sz = keys_array_size();
74 	ka = sort_malloc(sz);
75 	memset(ka, 0, sz);
76 
77 	return (ka);
78 }
79 
80 /*
81  * Calculate whether we need key hint space
82  */
83 static size_t
84 key_hint_size(void)
85 {
86 
87 	return (need_hint ? sizeof(struct key_hint) : 0);
88 }
89 
90 /*
91  * Calculate keys array size
92  */
93 size_t
94 keys_array_size(void)
95 {
96 
97 	return (keys_num * (sizeof(struct key_value) + key_hint_size()));
98 }
99 
100 /*
101  * Clean data of keys array
102  */
103 void
104 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
105 {
106 
107 	if (ka) {
108 		for (size_t i = 0; i < keys_num; ++i)
109 			if (ka->key[i].k && ka->key[i].k != s)
110 				bwsfree(ka->key[i].k);
111 		memset(ka, 0, keys_array_size());
112 	}
113 }
114 
115 /*
116  * Set value of a key in the keys set
117  */
118 void
119 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
120 {
121 
122 	if (ka && keys_num > ind) {
123 		struct key_value *kv;
124 
125 		kv = &(ka->key[ind]);
126 
127 		if (kv->k && kv->k != s)
128 			bwsfree(kv->k);
129 		kv->k = s;
130 	}
131 }
132 
133 /*
134  * Initialize a sort list item
135  */
136 struct sort_list_item *
137 sort_list_item_alloc(void)
138 {
139 	struct sort_list_item *si;
140 	size_t sz;
141 
142 	sz = sizeof(struct sort_list_item) + keys_array_size();
143 	si = sort_malloc(sz);
144 	memset(si, 0, sz);
145 
146 	return (si);
147 }
148 
149 size_t
150 sort_list_item_size(struct sort_list_item *si)
151 {
152 	size_t ret = 0;
153 
154 	if (si) {
155 		ret = sizeof(struct sort_list_item) + keys_array_size();
156 		if (si->str)
157 			ret += bws_memsize(si->str);
158 		for (size_t i = 0; i < keys_num; ++i) {
159 			struct key_value *kv;
160 
161 			kv = &(si->ka.key[i]);
162 
163 			if (kv->k != si->str)
164 				ret += bws_memsize(kv->k);
165 		}
166 	}
167 	return (ret);
168 }
169 
170 /*
171  * Calculate key for a sort list item
172  */
173 static void
174 sort_list_item_make_key(struct sort_list_item *si)
175 {
176 
177 	preproc(si->str, &(si->ka));
178 }
179 
180 /*
181  * Set value of a sort list item.
182  * Return combined string and keys memory size.
183  */
184 void
185 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
186 {
187 
188 	if (si) {
189 		clean_keys_array(si->str, &(si->ka));
190 		if (si->str) {
191 			if (si->str == str) {
192 				/* we are trying to reset the same string */
193 				return;
194 			} else {
195 				bwsfree(si->str);
196 				si->str = NULL;
197 			}
198 		}
199 		si->str = str;
200 		sort_list_item_make_key(si);
201 	}
202 }
203 
204 /*
205  * De-allocate a sort list item object memory
206  */
207 void
208 sort_list_item_clean(struct sort_list_item *si)
209 {
210 
211 	if (si) {
212 		clean_keys_array(si->str, &(si->ka));
213 		if (si->str) {
214 			bwsfree(si->str);
215 			si->str = NULL;
216 		}
217 	}
218 }
219 
220 /*
221  * Skip columns according to specs
222  */
223 static size_t
224 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
225     bool skip_blanks, bool *empty_key)
226 {
227 	if (cols < 1)
228 		return (BWSLEN(s) + 1);
229 
230 	if (skip_blanks)
231 		while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
232 			++start;
233 
234 	while (start < BWSLEN(s) && cols > 1) {
235 		--cols;
236 		++start;
237 	}
238 
239 	if (start >= BWSLEN(s))
240 		*empty_key = true;
241 
242 	return (start);
243 }
244 
245 /*
246  * Skip fields according to specs
247  */
248 static size_t
249 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
250 {
251 
252 	if (fields < 2) {
253 		if (BWSLEN(s) == 0)
254 			*empty_field = true;
255 		return (0);
256 	} else if (!(sort_opts_vals.tflag)) {
257 		size_t cpos = 0;
258 		bool pb = true;
259 
260 		while (cpos < BWSLEN(s)) {
261 			bool isblank;
262 
263 			isblank = iswblank(BWS_GET(s, cpos));
264 
265 			if (isblank && !pb) {
266 				--fields;
267 				if (fields <= 1)
268 					return (cpos);
269 			}
270 			pb = isblank;
271 			++cpos;
272 		}
273 		if (fields > 1)
274 			*empty_field = true;
275 		return (cpos);
276 	} else {
277 		size_t cpos = 0;
278 
279 		while (cpos < BWSLEN(s)) {
280 			if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
281 				--fields;
282 				if (fields <= 1)
283 					return (cpos + 1);
284 			}
285 			++cpos;
286 		}
287 		if (fields > 1)
288 			*empty_field = true;
289 		return (cpos);
290 	}
291 }
292 
293 /*
294  * Find fields start
295  */
296 static void
297 find_field_start(const struct bwstring *s, struct key_specs *ks,
298     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
299 {
300 
301 	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
302 	if (!*empty_field)
303 		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
304 		    ks->pos1b, empty_key);
305 	else
306 		*empty_key = true;
307 }
308 
309 /*
310  * Find end key position
311  */
312 static size_t
313 find_field_end(const struct bwstring *s, struct key_specs *ks)
314 {
315 	size_t f2, next_field_start, pos_end;
316 	bool empty_field, empty_key;
317 
318 	pos_end = 0;
319 	next_field_start = 0;
320 	empty_field = false;
321 	empty_key = false;
322 	f2 = ks->f2;
323 
324 	if (f2 == 0)
325 		return (BWSLEN(s) + 1);
326 	else {
327 		if (ks->c2 == 0) {
328 			next_field_start = skip_fields_to_start(s, f2 + 1,
329 			    &empty_field);
330 			if ((next_field_start > 0) && sort_opts_vals.tflag &&
331 			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
332 			    next_field_start - 1)))
333 				--next_field_start;
334 		} else
335 			next_field_start = skip_fields_to_start(s, f2,
336 			    &empty_field);
337 	}
338 
339 	if (empty_field || (next_field_start >= BWSLEN(s)))
340 		return (BWSLEN(s) + 1);
341 
342 	if (ks->c2) {
343 		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
344 		    ks->pos2b, &empty_key);
345 		if (pos_end < BWSLEN(s))
346 			++pos_end;
347 	} else
348 		pos_end = next_field_start;
349 
350 	return (pos_end);
351 }
352 
353 /*
354  * Cut a field according to the key specs
355  */
356 static struct bwstring *
357 cut_field(const struct bwstring *s, struct key_specs *ks)
358 {
359 	struct bwstring *ret = NULL;
360 
361 	if (s && ks) {
362 		size_t field_start, key_end, key_start, sz;
363 		bool empty_field, empty_key;
364 
365 		field_start = 0;
366 		key_start = 0;
367 		empty_field = false;
368 		empty_key = false;
369 
370 		find_field_start(s, ks, &field_start, &key_start,
371 		    &empty_field, &empty_key);
372 
373 		if (empty_key)
374 			sz = 0;
375 		else {
376 			key_end = find_field_end(s, ks);
377 			sz = (key_end < key_start) ? 0 : (key_end - key_start);
378 		}
379 
380 		ret = bwsalloc(sz);
381 		if (sz)
382 			bwsnocpy(ret, s, key_start, sz);
383 	} else
384 		ret = bwsalloc(0);
385 
386 	return (ret);
387 }
388 
389 /*
390  * Preprocesses a line applying the necessary transformations
391  * specified by command line options and returns the preprocessed
392  * string, which can be used to compare.
393  */
394 int
395 preproc(struct bwstring *s, struct keys_array *ka)
396 {
397 
398 	if (sort_opts_vals.kflag)
399 		for (size_t i = 0; i < keys_num; i++) {
400 			struct bwstring *key;
401 			struct key_specs *kspecs;
402 			struct sort_mods *sm;
403 
404 			kspecs = &(keys[i]);
405 			key = cut_field(s, kspecs);
406 
407 			sm = &(kspecs->sm);
408 			if (sm->dflag)
409 				key = dictionary_order(key);
410 			else if (sm->iflag)
411 				key = ignore_nonprinting(key);
412 			if (sm->fflag || sm->Mflag)
413 				key = ignore_case(key);
414 
415 			set_key_on_keys_array(ka, key, i);
416 		}
417 	else {
418 		struct bwstring *ret = NULL;
419 		struct sort_mods *sm = default_sort_mods;
420 
421 		if (sm->bflag) {
422 			if (ret == NULL)
423 				ret = bwsdup(s);
424 			ret = ignore_leading_blanks(ret);
425 		}
426 		if (sm->dflag) {
427 			if (ret == NULL)
428 				ret = bwsdup(s);
429 			ret = dictionary_order(ret);
430 		} else if (sm->iflag) {
431 			if (ret == NULL)
432 				ret = bwsdup(s);
433 			ret = ignore_nonprinting(ret);
434 		}
435 		if (sm->fflag || sm->Mflag) {
436 			if (ret == NULL)
437 				ret = bwsdup(s);
438 			ret = ignore_case(ret);
439 		}
440 		if (ret == NULL)
441 			set_key_on_keys_array(ka, s, 0);
442 		else
443 			set_key_on_keys_array(ka, ret, 0);
444 	}
445 
446 	return 0;
447 }
448 
449 cmpcoll_t
450 get_sort_func(struct sort_mods *sm)
451 {
452 
453 	if (sm->nflag)
454 		return (numcoll);
455 	else if (sm->hflag)
456 		return (hnumcoll);
457 	else if (sm->gflag)
458 		return (gnumcoll);
459 	else if (sm->Mflag)
460 		return (monthcoll);
461 	else if (sm->Rflag)
462 		return (randomcoll);
463 	else if (sm->Vflag)
464 		return (versioncoll);
465 	else
466 		return (wstrcoll);
467 }
468 
469 /*
470  * Compares the given strings.  Returns a positive number if
471  * the first precedes the second, a negative number if the second is
472  * the preceding one, and zero if they are equal.  This function calls
473  * the underlying collate functions, which done the actual comparison.
474  */
475 int
476 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
477 {
478 	struct sort_mods *sm;
479 	int res = 0;
480 
481 	for (size_t i = 0; i < keys_num; ++i) {
482 		sm = &(keys[i].sm);
483 
484 		if (sm->rflag)
485 			res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
486 		else
487 			res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
488 
489 		if (res)
490 			break;
491 
492 		/* offset applies to only the first key */
493 		offset = 0;
494 	}
495 	return (res);
496 }
497 
498 /*
499  * Compare two strings.
500  * Plain symbol-by-symbol comparison.
501  */
502 int
503 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
504 {
505 
506 	if (default_sort_mods->rflag) {
507 		const struct bwstring *tmp;
508 
509 		tmp = s1;
510 		s1 = s2;
511 		s2 = tmp;
512 	}
513 
514 	return (bwscoll(s1, s2, 0));
515 }
516 
517 /*
518  * Compare a string and a sort list item, according to the sort specs.
519  */
520 int
521 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
522 {
523 	struct keys_array *ka1;
524 	int ret = 0;
525 
526 	ka1 = keys_array_alloc();
527 
528 	preproc(str1, ka1);
529 
530 	sort_list_item_make_key(*ss2);
531 
532 	if (debug_sort) {
533 		bwsprintf(stdout, str1, "; s1=<", ">");
534 		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
535 	}
536 
537 	ret = key_coll(ka1, &((*ss2)->ka), 0);
538 
539 	if (debug_sort)
540 		printf("; cmp1=%d", ret);
541 
542 	clean_keys_array(str1, ka1);
543 	sort_free(ka1);
544 
545 	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
546 		ret = top_level_str_coll(str1, ((*ss2)->str));
547 		if (debug_sort)
548 			printf("; cmp2=%d", ret);
549 	}
550 
551 	if (debug_sort)
552 		printf("\n");
553 
554 	return (ret);
555 }
556 
557 /*
558  * Compare two sort list items, according to the sort specs.
559  */
560 int
561 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
562     size_t offset)
563 {
564 	int ret;
565 
566 	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
567 
568 	if (debug_sort) {
569 		if (offset)
570 			printf("; offset=%d", (int) offset);
571 		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
572 		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
573 		printf("; cmp1=%d\n", ret);
574 	}
575 
576 	if (ret)
577 		return (ret);
578 
579 	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
580 		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
581 		if (debug_sort)
582 			printf("; cmp2=%d\n", ret);
583 	}
584 
585 	return (ret);
586 }
587 
588 /*
589  * Compare two sort list items, according to the sort specs.
590  */
591 int
592 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
593 {
594 
595 	return (list_coll_offset(ss1, ss2, 0));
596 }
597 
598 #define	LSCDEF(N)							\
599 static int 								\
600 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)	\
601 {									\
602 									\
603 	return (list_coll_offset(ss1, ss2, N));				\
604 }
605 
606 LSCDEF(1)
607 LSCDEF(2)
608 LSCDEF(3)
609 LSCDEF(4)
610 LSCDEF(5)
611 LSCDEF(6)
612 LSCDEF(7)
613 LSCDEF(8)
614 LSCDEF(9)
615 LSCDEF(10)
616 LSCDEF(11)
617 LSCDEF(12)
618 LSCDEF(13)
619 LSCDEF(14)
620 LSCDEF(15)
621 LSCDEF(16)
622 LSCDEF(17)
623 LSCDEF(18)
624 LSCDEF(19)
625 LSCDEF(20)
626 
627 listcoll_t
628 get_list_call_func(size_t offset)
629 {
630 	static const listcoll_t lsarray[] = { list_coll, list_coll_1,
631 	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
632 	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
633 	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
634 	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
635 	    list_coll_18, list_coll_19, list_coll_20 };
636 
637 	if (offset <= 20)
638 		return (lsarray[offset]);
639 
640 	return (list_coll);
641 }
642 
643 /*
644  * Compare two sort list items, only by their original string.
645  */
646 int
647 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
648 {
649 
650 	return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
651 }
652 
653 /*
654  * Maximum size of a number in the string (before or after decimal point)
655  */
656 #define MAX_NUM_SIZE (128)
657 
658 /*
659  * Set suffix value
660  */
661 static void setsuffix(wchar_t c, unsigned char *si)
662 {
663 	switch (c){
664 	case L'k':
665 	case L'K':
666 		*si = 1;
667 		break;
668 	case L'M':
669 		*si = 2;
670 		break;
671 	case L'G':
672 		*si = 3;
673 		break;
674 	case L'T':
675 		*si = 4;
676 		break;
677 	case L'P':
678 		*si = 5;
679 		break;
680 	case L'E':
681 		*si = 6;
682 		break;
683 	case L'Z':
684 		*si = 7;
685 		break;
686 	case L'Y':
687 		*si = 8;
688 		break;
689 	default:
690 		*si = 0;
691 	};
692 }
693 
694 /*
695  * Read string s and parse the string into a fixed-decimal-point number.
696  * sign equals -1 if the number is negative (explicit plus is not allowed,
697  * according to GNU sort's "info sort".
698  * The number part before decimal point is in the smain, after the decimal
699  * point is in sfrac, tail is the pointer to the remainder of the string.
700  */
701 static int
702 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
703 {
704 	bwstring_iterator s;
705 
706 	s = bws_begin(s0);
707 
708 	/* always end the fraction with zero, even if we have no fraction */
709 	sfrac[0] = 0;
710 
711 	while (iswblank(bws_get_iter_value(s)))
712 		s = bws_iterator_inc(s, 1);
713 
714 	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
715 		*sign = -1;
716 		s = bws_iterator_inc(s, 1);
717 	}
718 
719 	// This is '0', not '\0', do not change this
720 	while (iswdigit(bws_get_iter_value(s)) &&
721 	    (bws_get_iter_value(s) == L'0'))
722 		s = bws_iterator_inc(s, 1);
723 
724 	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
725 		if (iswdigit(bws_get_iter_value(s))) {
726 			smain[*main_len] = bws_get_iter_value(s);
727 			s = bws_iterator_inc(s, 1);
728 			*main_len += 1;
729 		} else if (symbol_thousands_sep &&
730 		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
731 			s = bws_iterator_inc(s, 1);
732 		else
733 			break;
734 	}
735 
736 	smain[*main_len] = 0;
737 
738 	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
739 		s = bws_iterator_inc(s, 1);
740 		while (iswdigit(bws_get_iter_value(s)) &&
741 		    *frac_len < MAX_NUM_SIZE) {
742 			sfrac[*frac_len] = bws_get_iter_value(s);
743 			s = bws_iterator_inc(s, 1);
744 			*frac_len += 1;
745 		}
746 		sfrac[*frac_len] = 0;
747 
748 		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
749 			--(*frac_len);
750 			sfrac[*frac_len] = L'\0';
751 		}
752 	}
753 
754 	setsuffix(bws_get_iter_value(s),si);
755 
756 	if ((*main_len + *frac_len) == 0)
757 		*sign = 0;
758 
759 	return (0);
760 }
761 
762 /*
763  * Implements string sort.
764  */
765 static int
766 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
767 {
768 
769 	if (debug_sort) {
770 		if (offset)
771 			printf("; offset=%d\n", (int) offset);
772 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
773 		printf("(%zu)", BWSLEN(kv1->k));
774 		bwsprintf(stdout, kv2->k, ", k2=<", ">");
775 		printf("(%zu)", BWSLEN(kv2->k));
776 	}
777 
778 	return (bwscoll(kv1->k, kv2->k, offset));
779 }
780 
781 /*
782  * Compare two suffixes
783  */
784 static inline int
785 cmpsuffix(unsigned char si1, unsigned char si2)
786 {
787 
788 	return ((char)si1 - (char)si2);
789 }
790 
791 /*
792  * Implements numeric sort for -n and -h.
793  */
794 static int
795 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
796     size_t offset __unused, bool use_suffix)
797 {
798 	struct bwstring *s1, *s2;
799 	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
800 	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
801 	int cmp_res, sign1, sign2;
802 	size_t frac1, frac2, main1, main2;
803 	unsigned char SI1, SI2;
804 	bool e1, e2, key1_read, key2_read;
805 
806 	s1 = kv1->k;
807 	s2 = kv2->k;
808 	sign1 = sign2 = 0;
809 	main1 = main2 = 0;
810 	frac1 = frac2 = 0;
811 
812 	cmp_res = 0;
813 	key1_read = key2_read = false;
814 
815 	if (debug_sort) {
816 		bwsprintf(stdout, s1, "; k1=<", ">");
817 		bwsprintf(stdout, s2, ", k2=<", ">");
818 	}
819 
820 	if (s1 == s2)
821 		return (0);
822 
823 	if (kv1->hint->status == HS_UNINITIALIZED) {
824 		/* read the number from the string */
825 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
826 		key1_read = true;
827 		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
828 		if(main1 < 1 && frac1 < 1)
829 			kv1->hint->v.nh.empty=true;
830 		kv1->hint->v.nh.si = SI1;
831 		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
832 		    HS_INITIALIZED : HS_ERROR;
833 		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
834 	}
835 
836 	if (kv2->hint->status == HS_UNINITIALIZED) {
837 		/* read the number from the string */
838 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
839 		key2_read = true;
840 		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
841 		if(main2 < 1 && frac2 < 1)
842 			kv2->hint->v.nh.empty=true;
843 		kv2->hint->v.nh.si = SI2;
844 		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
845 		    HS_INITIALIZED : HS_ERROR;
846 		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
847 	}
848 
849 	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
850 	    HS_INITIALIZED) {
851 		unsigned long long n1, n2;
852 		bool neg1, neg2;
853 
854 		e1 = kv1->hint->v.nh.empty;
855 		e2 = kv2->hint->v.nh.empty;
856 
857 		if (e1 && e2)
858 			return (0);
859 
860 		neg1 = kv1->hint->v.nh.neg;
861 		neg2 = kv2->hint->v.nh.neg;
862 
863 		if (neg1 && !neg2)
864 			return (-1);
865 		if (neg2 && !neg1)
866 			return (+1);
867 
868 		if (e1)
869 			return (neg2 ? +1 : -1);
870 		else if (e2)
871 			return (neg1 ? -1 : +1);
872 
873 
874 		if (use_suffix) {
875 			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
876 			if (cmp_res)
877 				return (neg1 ? -cmp_res : cmp_res);
878 		}
879 
880 		n1 = kv1->hint->v.nh.n1;
881 		n2 = kv2->hint->v.nh.n1;
882 		if (n1 < n2)
883 			return (neg1 ? +1 : -1);
884 		else if (n1 > n2)
885 			return (neg1 ? -1 : +1);
886 	}
887 
888 	/* read the numbers from the strings */
889 	if (!key1_read)
890 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
891 	if (!key2_read)
892 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
893 
894 	e1 = ((main1 + frac1) == 0);
895 	e2 = ((main2 + frac2) == 0);
896 
897 	if (e1 && e2)
898 		return (0);
899 
900 	/* we know the result if the signs are different */
901 	if (sign1 < 0 && sign2 >= 0)
902 		return (-1);
903 	if (sign1 >= 0 && sign2 < 0)
904 		return (+1);
905 
906 	if (e1)
907 		return ((sign2 < 0) ? +1 : -1);
908 	else if (e2)
909 		return ((sign1 < 0) ? -1 : +1);
910 
911 	if (use_suffix) {
912 		cmp_res = cmpsuffix(SI1, SI2);
913 		if (cmp_res)
914 			return ((sign1 < 0) ? -cmp_res : cmp_res);
915 	}
916 
917 	/* if both numbers are empty assume that the strings are equal */
918 	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
919 		return (0);
920 
921 	/*
922 	 * if the main part is of different size, we know the result
923 	 * (because the leading zeros are removed)
924 	 */
925 	if (main1 < main2)
926 		cmp_res = -1;
927 	else if (main1 > main2)
928 		cmp_res = +1;
929 	/* if the sizes are equal then simple non-collate string compare gives the correct result */
930 	else
931 		cmp_res = wcscmp(smain1, smain2);
932 
933 	/* check fraction */
934 	if (!cmp_res)
935 		cmp_res = wcscmp(sfrac1, sfrac2);
936 
937 	if (!cmp_res)
938 		return (0);
939 
940 	/* reverse result if the signs are negative */
941 	if (sign1 < 0 && sign2 < 0)
942 		cmp_res = -cmp_res;
943 
944 	return (cmp_res);
945 }
946 
947 /*
948  * Implements numeric sort (-n).
949  */
950 static int
951 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
952 {
953 
954 	return (numcoll_impl(kv1, kv2, offset, false));
955 }
956 
957 /*
958  * Implements 'human' numeric sort (-h).
959  */
960 static int
961 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
962 {
963 
964 	return (numcoll_impl(kv1, kv2, offset, true));
965 }
966 
967 /*
968  * Implements random sort (-R).
969  */
970 static int
971 randomcoll(struct key_value *kv1, struct key_value *kv2,
972     size_t offset __unused)
973 {
974 	struct bwstring *s1, *s2;
975 	MD5_CTX ctx1, ctx2;
976 	char *b1, *b2;
977 
978 	s1 = kv1->k;
979 	s2 = kv2->k;
980 
981 	if (debug_sort) {
982 		bwsprintf(stdout, s1, "; k1=<", ">");
983 		bwsprintf(stdout, s2, ", k2=<", ">");
984 	}
985 
986 	if (s1 == s2)
987 		return (0);
988 
989 	memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
990 	memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
991 
992 	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
993 	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
994 	b1 = MD5End(&ctx1, NULL);
995 	b2 = MD5End(&ctx2, NULL);
996 	if (b1 == NULL) {
997 		if (b2 == NULL)
998 			return (0);
999 		else {
1000 			sort_free(b2);
1001 			return (-1);
1002 		}
1003 	} else if (b2 == NULL) {
1004 		sort_free(b1);
1005 		return (+1);
1006 	} else {
1007 		int cmp_res;
1008 
1009 		cmp_res = strcmp(b1,b2);
1010 		sort_free(b1);
1011 		sort_free(b2);
1012 
1013 		if (!cmp_res)
1014 			cmp_res = bwscoll(s1, s2, 0);
1015 
1016 		return (cmp_res);
1017 	}
1018 }
1019 
1020 /*
1021  * Implements version sort (-V).
1022  */
1023 static int
1024 versioncoll(struct key_value *kv1, struct key_value *kv2,
1025     size_t offset __unused)
1026 {
1027 	struct bwstring *s1, *s2;
1028 
1029 	s1 = kv1->k;
1030 	s2 = kv2->k;
1031 
1032 	if (debug_sort) {
1033 		bwsprintf(stdout, s1, "; k1=<", ">");
1034 		bwsprintf(stdout, s2, ", k2=<", ">");
1035 	}
1036 
1037 	if (s1 == s2)
1038 		return (0);
1039 
1040 	return (vcmp(s1, s2));
1041 }
1042 
1043 /*
1044  * Check for minus infinity
1045  */
1046 static inline bool
1047 huge_minus(double d, int err1)
1048 {
1049 
1050 	if (err1 == ERANGE)
1051 		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1052 			return (+1);
1053 
1054 	return (0);
1055 }
1056 
1057 /*
1058  * Check for plus infinity
1059  */
1060 static inline bool
1061 huge_plus(double d, int err1)
1062 {
1063 
1064 	if (err1 == ERANGE)
1065 		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1066 			return (+1);
1067 
1068 	return (0);
1069 }
1070 
1071 /*
1072  * Check whether a function is a NAN
1073  */
1074 static bool
1075 is_nan(double d)
1076 {
1077 
1078 	return ((d == NAN) || (isnan(d)));
1079 }
1080 
1081 /*
1082  * Compare two NANs
1083  */
1084 static int
1085 cmp_nans(double d1, double d2)
1086 {
1087 
1088 	if (d1 < d2)
1089 		return (-1);
1090 	if (d2 > d2)
1091 		return (+1);
1092 	return (0);
1093 }
1094 
1095 /*
1096  * Implements general numeric sort (-g).
1097  */
1098 static int
1099 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1100     size_t offset __unused)
1101 {
1102 	double d1, d2;
1103 	int err1, err2;
1104 	bool empty1, empty2, key1_read, key2_read;
1105 
1106 	d1 = d2 = 0;
1107 	err1 = err2 = 0;
1108 	key1_read = key2_read = false;
1109 
1110 	if (debug_sort) {
1111 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1112 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1113 	}
1114 
1115 	if (kv1->hint->status == HS_UNINITIALIZED) {
1116 		errno = 0;
1117 		d1 = bwstod(kv1->k, &empty1);
1118 		err1 = errno;
1119 
1120 		if (empty1)
1121 			kv1->hint->v.gh.notnum = true;
1122 		else if (err1 == 0) {
1123 			kv1->hint->v.gh.d = d1;
1124 			kv1->hint->v.gh.nan = is_nan(d1);
1125 			kv1->hint->status = HS_INITIALIZED;
1126 		} else
1127 			kv1->hint->status = HS_ERROR;
1128 
1129 		key1_read = true;
1130 	}
1131 
1132 	if (kv2->hint->status == HS_UNINITIALIZED) {
1133 		errno = 0;
1134 		d2 = bwstod(kv2->k, &empty2);
1135 		err2 = errno;
1136 
1137 		if (empty2)
1138 			kv2->hint->v.gh.notnum = true;
1139 		else if (err2 == 0) {
1140 			kv2->hint->v.gh.d = d2;
1141 			kv2->hint->v.gh.nan = is_nan(d2);
1142 			kv2->hint->status = HS_INITIALIZED;
1143 		} else
1144 			kv2->hint->status = HS_ERROR;
1145 
1146 		key2_read = true;
1147 	}
1148 
1149 	if (kv1->hint->status == HS_INITIALIZED &&
1150 	    kv2->hint->status == HS_INITIALIZED) {
1151 		if (kv1->hint->v.gh.notnum)
1152 			return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1153 		else if (kv2->hint->v.gh.notnum)
1154 			return (+1);
1155 
1156 		if (kv1->hint->v.gh.nan)
1157 			return ((kv2->hint->v.gh.nan) ?
1158 			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1159 			    -1);
1160 		else if (kv2->hint->v.gh.nan)
1161 			return (+1);
1162 
1163 		d1 = kv1->hint->v.gh.d;
1164 		d2 = kv2->hint->v.gh.d;
1165 
1166 		if (d1 < d2)
1167 			return (-1);
1168 		else if (d1 > d2)
1169 			return (+1);
1170 		else
1171 			return (0);
1172 	}
1173 
1174 	if (!key1_read) {
1175 		errno = 0;
1176 		d1 = bwstod(kv1->k, &empty1);
1177 		err1 = errno;
1178 	}
1179 
1180 	if (!key2_read) {
1181 		errno = 0;
1182 		d2 = bwstod(kv2->k, &empty2);
1183 		err2 = errno;
1184 	}
1185 
1186 	/* Non-value case: */
1187 	if (empty1)
1188 		return (empty2 ? 0 : -1);
1189 	else if (empty2)
1190 		return (+1);
1191 
1192 	/* NAN case */
1193 	if (is_nan(d1))
1194 		return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1195 	else if (is_nan(d2))
1196 		return (+1);
1197 
1198 	/* Infinities */
1199 	if (err1 == ERANGE || err2 == ERANGE) {
1200 		/* Minus infinity case */
1201 		if (huge_minus(d1, err1)) {
1202 			if (huge_minus(d2, err2)) {
1203 				if (d1 < d2)
1204 					return (-1);
1205 				if (d1 > d2)
1206 					return (+1);
1207 				return (0);
1208 			} else
1209 				return (-1);
1210 
1211 		} else if (huge_minus(d2, err2)) {
1212 			if (huge_minus(d1, err1)) {
1213 				if (d1 < d2)
1214 					return (-1);
1215 				if (d1 > d2)
1216 					return (+1);
1217 				return (0);
1218 			} else
1219 				return (+1);
1220 		}
1221 
1222 		/* Plus infinity case */
1223 		if (huge_plus(d1, err1)) {
1224 			if (huge_plus(d2, err2)) {
1225 				if (d1 < d2)
1226 					return (-1);
1227 				if (d1 > d2)
1228 					return (+1);
1229 				return (0);
1230 			} else
1231 				return (+1);
1232 		} else if (huge_plus(d2, err2)) {
1233 			if (huge_plus(d1, err1)) {
1234 				if (d1 < d2)
1235 					return (-1);
1236 				if (d1 > d2)
1237 					return (+1);
1238 				return (0);
1239 			} else
1240 				return (-1);
1241 		}
1242 	}
1243 
1244 	if (d1 < d2)
1245 		return (-1);
1246 	if (d1 > d2)
1247 		return (+1);
1248 
1249 	return (0);
1250 }
1251 
1252 /*
1253  * Implements month sort (-M).
1254  */
1255 static int
1256 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1257 {
1258 	int val1, val2;
1259 	bool key1_read, key2_read;
1260 
1261 	val1 = val2 = 0;
1262 	key1_read = key2_read = false;
1263 
1264 	if (debug_sort) {
1265 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1266 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1267 	}
1268 
1269 	if (kv1->hint->status == HS_UNINITIALIZED) {
1270 		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1271 		key1_read = true;
1272 		kv1->hint->status = HS_INITIALIZED;
1273 	}
1274 
1275 	if (kv2->hint->status == HS_UNINITIALIZED) {
1276 		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1277 		key2_read = true;
1278 		kv2->hint->status = HS_INITIALIZED;
1279 	}
1280 
1281 	if (kv1->hint->status == HS_INITIALIZED) {
1282 		val1 = kv1->hint->v.Mh.m;
1283 		key1_read = true;
1284 	}
1285 
1286 	if (kv2->hint->status == HS_INITIALIZED) {
1287 		val2 = kv2->hint->v.Mh.m;
1288 		key2_read = true;
1289 	}
1290 
1291 	if (!key1_read)
1292 		val1 = bws_month_score(kv1->k);
1293 	if (!key2_read)
1294 		val2 = bws_month_score(kv2->k);
1295 
1296 	if (val1 == val2) {
1297 		return (0);
1298 	}
1299 	if (val1 < val2)
1300 		return (-1);
1301 	return (+1);
1302 }
1303