xref: /freebsd/usr.bin/sort/coll.c (revision 6cf87ec8ec46bc472249ebb6a3b3346fe3c10141)
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <oleg.moskalenko@citrix.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/types.h>
32 
33 #include <errno.h>
34 #include <err.h>
35 #include <langinfo.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <md5.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42 #include <wctype.h>
43 
44 #include "coll.h"
45 #include "vsort.h"
46 
47 struct key_specs *keys;
48 size_t keys_num = 0;
49 
50 wchar_t symbol_decimal_point = L'.';
51 /* there is no default thousands separator in collate rules: */
52 wchar_t symbol_thousands_sep = 0;
53 wchar_t symbol_negative_sign = L'-';
54 wchar_t symbol_positive_sign = L'+';
55 
56 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
57 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
58 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
59 static int numcoll(struct key_value*, struct key_value *, size_t offset);
60 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
61 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
62 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
63 
64 /*
65  * Allocate keys array
66  */
67 struct keys_array *
68 keys_array_alloc(void)
69 {
70 	struct keys_array *ka;
71 	size_t sz;
72 
73 	sz = keys_array_size();
74 	ka = sort_malloc(sz);
75 	memset(ka, 0, sz);
76 
77 	return (ka);
78 }
79 
80 /*
81  * Calculate whether we need key hint space
82  */
83 static size_t
84 key_hint_size(void)
85 {
86 
87 	return (need_hint ? sizeof(struct key_hint) : 0);
88 }
89 
90 /*
91  * Calculate keys array size
92  */
93 size_t
94 keys_array_size(void)
95 {
96 
97 	return (keys_num * (sizeof(struct key_value) + key_hint_size()));
98 }
99 
100 /*
101  * Clean data of keys array
102  */
103 void
104 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
105 {
106 
107 	if (ka) {
108 		for (size_t i = 0; i < keys_num; ++i)
109 			if (ka->key[i].k && ka->key[i].k != s)
110 				bwsfree(ka->key[i].k);
111 		memset(ka, 0, keys_array_size());
112 	}
113 }
114 
115 /*
116  * Set value of a key in the keys set
117  */
118 void
119 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
120 {
121 
122 	if (ka && keys_num > ind) {
123 		struct key_value *kv;
124 
125 		kv = &(ka->key[ind]);
126 
127 		if (kv->k && kv->k != s)
128 			bwsfree(kv->k);
129 		kv->k = s;
130 	}
131 }
132 
133 /*
134  * Initialize a sort list item
135  */
136 struct sort_list_item *
137 sort_list_item_alloc(void)
138 {
139 	struct sort_list_item *si;
140 	size_t sz;
141 
142 	sz = sizeof(struct sort_list_item) + keys_array_size();
143 	si = sort_malloc(sz);
144 	memset(si, 0, sz);
145 
146 	return (si);
147 }
148 
149 size_t
150 sort_list_item_size(struct sort_list_item *si)
151 {
152 	size_t ret = 0;
153 
154 	if (si) {
155 		ret = sizeof(struct sort_list_item) + keys_array_size();
156 		if (si->str)
157 			ret += bws_memsize(si->str);
158 		for (size_t i = 0; i < keys_num; ++i) {
159 			struct key_value *kv;
160 
161 			kv = &(si->ka.key[i]);
162 
163 			if (kv->k != si->str)
164 				ret += bws_memsize(kv->k);
165 		}
166 	}
167 	return (ret);
168 }
169 
170 /*
171  * Calculate key for a sort list item
172  */
173 static void
174 sort_list_item_make_key(struct sort_list_item *si)
175 {
176 
177 	preproc(si->str, &(si->ka));
178 }
179 
180 /*
181  * Set value of a sort list item.
182  * Return combined string and keys memory size.
183  */
184 void
185 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
186 {
187 
188 	if (si) {
189 		clean_keys_array(si->str, &(si->ka));
190 		if (si->str) {
191 			if (si->str == str) {
192 				/* we are trying to reset the same string */
193 				return;
194 			} else {
195 				bwsfree(si->str);
196 				si->str = NULL;
197 			}
198 		}
199 		si->str = str;
200 		sort_list_item_make_key(si);
201 	}
202 }
203 
204 /*
205  * De-allocate a sort list item object memory
206  */
207 void
208 sort_list_item_clean(struct sort_list_item *si)
209 {
210 
211 	if (si) {
212 		clean_keys_array(si->str, &(si->ka));
213 		if (si->str) {
214 			bwsfree(si->str);
215 			si->str = NULL;
216 		}
217 	}
218 }
219 
220 /*
221  * Skip columns according to specs
222  */
223 static size_t
224 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
225     bool skip_blanks, bool *empty_key)
226 {
227 	if (cols < 1)
228 		return (BWSLEN(s) + 1);
229 
230 	if (skip_blanks)
231 		while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
232 			++start;
233 
234 	while (start < BWSLEN(s) && cols > 1) {
235 		--cols;
236 		++start;
237 	}
238 
239 	if (start >= BWSLEN(s))
240 		*empty_key = true;
241 
242 	return (start);
243 }
244 
245 /*
246  * Skip fields according to specs
247  */
248 static size_t
249 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
250 {
251 
252 	if (fields < 2) {
253 		if (BWSLEN(s) == 0)
254 			*empty_field = true;
255 		return (0);
256 	} else if (!(sort_opts_vals.tflag)) {
257 		size_t cpos = 0;
258 		bool pb = true;
259 
260 		while (cpos < BWSLEN(s)) {
261 			bool isblank;
262 
263 			isblank = iswblank(BWS_GET(s,cpos));
264 
265 			if (isblank && !pb) {
266 				--fields;
267 				if (fields <= 1)
268 					return (cpos);
269 			}
270 			pb = isblank;
271 			++cpos;
272 		}
273 		if (fields > 1)
274 			*empty_field = true;
275 		return (cpos);
276 	} else {
277 		size_t cpos = 0;
278 
279 		while (cpos < BWSLEN(s)) {
280 			if (BWS_GET(s,cpos) == sort_opts_vals.field_sep) {
281 				--fields;
282 				if (fields <= 1)
283 					return (cpos + 1);
284 			}
285 			++cpos;
286 		}
287 		if (fields > 1)
288 			*empty_field = true;
289 		return (cpos);
290 	}
291 }
292 
293 /*
294  * Find fields start
295  */
296 static void
297 find_field_start(const struct bwstring *s, struct key_specs *ks,
298     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
299 {
300 
301 	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
302 	if (!*empty_field)
303 		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
304 		    ks->pos1b, empty_key);
305 	else
306 		*empty_key = true;
307 }
308 
309 /*
310  * Find end key position
311  */
312 static size_t
313 find_field_end(const struct bwstring *s, struct key_specs *ks)
314 {
315 	size_t f2, next_field_start, pos_end;
316 	bool empty_field, empty_key;
317 
318 	pos_end = 0;
319 	next_field_start = 0;
320 	empty_field = false;
321 	empty_key = false;
322 	f2 = ks->f2;
323 
324 	if (f2 == 0)
325 		return (BWSLEN(s) + 1);
326 	else {
327 		if (ks->c2 == 0) {
328 			next_field_start = skip_fields_to_start(s, f2 + 1,
329 			    &empty_field);
330 			if ((next_field_start > 0) && sort_opts_vals.tflag &&
331 			    (sort_opts_vals.field_sep == BWS_GET(s,
332 			    next_field_start - 1)))
333 				--next_field_start;
334 		} else
335 			next_field_start = skip_fields_to_start(s, f2,
336 			    &empty_field);
337 	}
338 
339 	if (empty_field || (next_field_start >= BWSLEN(s)))
340 		return (BWSLEN(s) + 1);
341 
342 	if (ks->c2) {
343 		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
344 		    ks->pos2b, &empty_key);
345 		if (pos_end < BWSLEN(s))
346 			++pos_end;
347 	} else
348 		pos_end = next_field_start;
349 
350 	return (pos_end);
351 }
352 
353 /*
354  * Cut a field according to the key specs
355  */
356 static struct bwstring *
357 cut_field(const struct bwstring *s, struct key_specs *ks)
358 {
359 	struct bwstring *ret = NULL;
360 
361 	if (s && ks) {
362 		size_t field_start, key_end, key_start, sz;
363 		bool empty_field, empty_key;
364 
365 		field_start = 0;
366 		key_start = 0;
367 		empty_field = false;
368 		empty_key = false;
369 
370 		find_field_start(s, ks, &field_start, &key_start,
371 		    &empty_field, &empty_key);
372 
373 		if (empty_key)
374 			sz = 0;
375 		else {
376 			key_end = find_field_end(s, ks);
377 			sz = (key_end < key_start) ? 0 : (key_end - key_start);
378 		}
379 
380 		ret = bwsalloc(sz);
381 		if (sz)
382 			bwsnocpy(ret, s, key_start, sz);
383 	} else
384 		ret = bwsalloc(0);
385 
386 	return (ret);
387 }
388 
389 /*
390  * Preprocesses a line applying the necessary transformations
391  * specified by command line options and returns the preprocessed
392  * string, which can be used to compare.
393  */
394 int
395 preproc(struct bwstring *s, struct keys_array *ka)
396 {
397 
398 	if (sort_opts_vals.kflag)
399 		for (size_t i = 0; i < keys_num; i++) {
400 			struct bwstring *key;
401 			struct key_specs *kspecs;
402 			struct sort_mods *sm;
403 
404 			kspecs = &(keys[i]);
405 			key = cut_field(s, kspecs);
406 
407 			sm = &(kspecs->sm);
408 			if (sm->dflag)
409 				key = dictionary_order(key);
410 			else if (sm->iflag)
411 				key = ignore_nonprinting(key);
412 			if (sm->fflag || sm->Mflag)
413 				key = ignore_case(key);
414 
415 			set_key_on_keys_array(ka, key, i);
416 		}
417 	else {
418 		struct bwstring *ret = NULL;
419 		struct sort_mods *sm = default_sort_mods;
420 
421 		if (sm->bflag) {
422 			if (ret == NULL)
423 				ret = bwsdup(s);
424 			ret = ignore_leading_blanks(ret);
425 		}
426 		if (sm->dflag) {
427 			if (ret == NULL)
428 				ret = bwsdup(s);
429 			ret = dictionary_order(ret);
430 		} else if (sm->iflag) {
431 			if (ret == NULL)
432 				ret = bwsdup(s);
433 			ret = ignore_nonprinting(ret);
434 		}
435 		if (sm->fflag || sm->Mflag) {
436 			if (ret == NULL)
437 				ret = bwsdup(s);
438 			ret = ignore_case(ret);
439 		}
440 		if (ret == NULL)
441 			set_key_on_keys_array(ka, s, 0);
442 		else
443 			set_key_on_keys_array(ka, ret, 0);
444 	}
445 
446 	return 0;
447 }
448 
449 cmpcoll_t
450 get_sort_func(struct sort_mods *sm)
451 {
452 
453 	if (sm->nflag)
454 		return (numcoll);
455 	else if (sm->hflag)
456 		return (hnumcoll);
457 	else if (sm->gflag)
458 		return (gnumcoll);
459 	else if (sm->Mflag)
460 		return (monthcoll);
461 	else if (sm->Rflag)
462 		return (randomcoll);
463 	else if (sm->Vflag)
464 		return (versioncoll);
465 	else
466 		return (wstrcoll);
467 }
468 
469 /*
470  * Compares the given strings.  Returns a positive number if
471  * the first precedes the second, a negative number if the second is
472  * the preceding one, and zero if they are equal.  This function calls
473  * the underlying collate functions, which done the actual comparison.
474  */
475 int
476 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
477 {
478 	struct sort_mods *sm;
479 	int res = 0;
480 
481 	for (size_t i = 0; i < keys_num; ++i) {
482 		sm = &(keys[i].sm);
483 
484 		if (sm->rflag)
485 			res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
486 		else
487 			res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
488 
489 		if (res)
490 			break;
491 
492 		/* offset applies to only the first key */
493 		offset = 0;
494 	}
495 	return (res);
496 }
497 
498 /*
499  * Compare two strings.
500  * Plain symbol-by-symbol comparison.
501  */
502 int
503 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
504 {
505 
506 	if (default_sort_mods->rflag) {
507 		const struct bwstring *tmp;
508 
509 		tmp = s1;
510 		s1 = s2;
511 		s2 = tmp;
512 	}
513 
514 	return (bwscoll(s1, s2, 0));
515 }
516 
517 /*
518  * Compare a string and a sort list item, according to the sort specs.
519  */
520 int
521 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
522 {
523 	struct keys_array *ka1;
524 	int ret = 0;
525 
526 	ka1 = keys_array_alloc();
527 
528 	preproc(str1, ka1);
529 
530 	sort_list_item_make_key(*ss2);
531 
532 	if (debug_sort) {
533 		bwsprintf(stdout, str1, "; s1=<", ">");
534 		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
535 	}
536 
537 	ret = key_coll(ka1, &((*ss2)->ka), 0);
538 
539 	if (debug_sort)
540 		printf("; cmp1=%d", ret);
541 
542 	clean_keys_array(str1, ka1);
543 	sort_free(ka1);
544 
545 	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
546 		ret = top_level_str_coll(str1, ((*ss2)->str));
547 		if (debug_sort)
548 			printf("; cmp2=%d", ret);
549 	}
550 
551 	if (debug_sort)
552 		printf("\n");
553 
554 	return (ret);
555 }
556 
557 /*
558  * Compare two sort list items, according to the sort specs.
559  */
560 int
561 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
562     size_t offset)
563 {
564 	int ret;
565 
566 	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
567 
568 	if (debug_sort) {
569 		if (offset)
570 			printf("; offset=%d", (int) offset);
571 		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
572 		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
573 		printf("; cmp1=%d\n", ret);
574 	}
575 
576 	if (ret)
577 		return (ret);
578 
579 	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
580 		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
581 		if (debug_sort)
582 			printf("; cmp2=%d\n", ret);
583 	}
584 
585 	return (ret);
586 }
587 
588 /*
589  * Compare two sort list items, according to the sort specs.
590  */
591 int
592 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
593 {
594 
595 	return (list_coll_offset(ss1, ss2, 0));
596 }
597 
598 #define LSCDEF(N) 											\
599 static int 												\
600 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)					\
601 {													\
602 													\
603 	return (list_coll_offset(ss1, ss2, N));								\
604 }
605 
606 LSCDEF(1)
607 LSCDEF(2)
608 LSCDEF(3)
609 LSCDEF(4)
610 LSCDEF(5)
611 LSCDEF(6)
612 LSCDEF(7)
613 LSCDEF(8)
614 LSCDEF(9)
615 LSCDEF(10)
616 LSCDEF(11)
617 LSCDEF(12)
618 LSCDEF(13)
619 LSCDEF(14)
620 LSCDEF(15)
621 LSCDEF(16)
622 LSCDEF(17)
623 LSCDEF(18)
624 LSCDEF(19)
625 LSCDEF(20)
626 
627 listcoll_t
628 get_list_call_func(size_t offset)
629 {
630 	static const listcoll_t lsarray[] = { list_coll, list_coll_1,
631 	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
632 	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
633 	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
634 	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
635 	    list_coll_18, list_coll_19, list_coll_20 };
636 
637 	if (offset <= 20)
638 		return (lsarray[offset]);
639 
640 	return (list_coll);
641 }
642 
643 /*
644  * Compare two sort list items, only by their original string.
645  */
646 int
647 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
648 {
649 
650 	return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
651 }
652 
653 /*
654  * Maximum size of a number in the string (before or after decimal point)
655  */
656 #define MAX_NUM_SIZE (128)
657 
658 /*
659  * Set suffix value
660  */
661 static void setsuffix(wchar_t c, unsigned char *si)
662 {
663 	switch (c){
664 	case L'k':
665 	case L'K':
666 		*si = 1;
667 		break;
668 	case L'M':
669 		*si = 2;
670 		break;
671 	case L'G':
672 		*si = 3;
673 		break;
674 	case L'T':
675 		*si = 4;
676 		break;
677 	case L'P':
678 		*si = 5;
679 		break;
680 	case L'E':
681 		*si = 6;
682 		break;
683 	case L'Z':
684 		*si = 7;
685 		break;
686 	case L'Y':
687 		*si = 8;
688 		break;
689 	default:
690 		*si = 0;
691 	};
692 }
693 
694 /*
695  * Read string s and parse the string into a fixed-decimal-point number.
696  * sign equals -1 if the number is negative (explicit plus is not allowed,
697  * according to GNU sort's "info sort".
698  * The number part before decimal point is in the smain, after the decimal
699  * point is in sfrac, tail is the pointer to the remainder of the string.
700  */
701 static int
702 read_number(struct bwstring *s0, int *sign, wchar_t *smain, int *main_len, wchar_t *sfrac, int *frac_len, unsigned char *si)
703 {
704 	bwstring_iterator s;
705 
706 	s = bws_begin(s0);
707 
708 	/* always end the fraction with zero, even if we have no fraction */
709 	sfrac[0] = 0;
710 
711 	while (iswblank(bws_get_iter_value(s)))
712 		s = bws_iterator_inc(s, 1);
713 
714 	if (bws_get_iter_value(s) == symbol_negative_sign) {
715 		*sign = -1;
716 		s = bws_iterator_inc(s, 1);
717 	}
718 
719 	// This is '0', not '\0', do not change this
720 	while (iswdigit(bws_get_iter_value(s)) &&
721 	    (bws_get_iter_value(s) == L'0'))
722 		s = bws_iterator_inc(s, 1);
723 
724 	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
725 		if (iswdigit(bws_get_iter_value(s))) {
726 			smain[*main_len] = bws_get_iter_value(s);
727 			s = bws_iterator_inc(s, 1);
728 			*main_len += 1;
729 		} else if (symbol_thousands_sep &&
730 		    (bws_get_iter_value(s) == symbol_thousands_sep))
731 			s = bws_iterator_inc(s, 1);
732 		else
733 			break;
734 	}
735 
736 	smain[*main_len] = 0;
737 
738 	if (bws_get_iter_value(s) == symbol_decimal_point) {
739 		s = bws_iterator_inc(s, 1);
740 		while (iswdigit(bws_get_iter_value(s)) &&
741 		    *frac_len < MAX_NUM_SIZE) {
742 			sfrac[*frac_len] = bws_get_iter_value(s);
743 			s = bws_iterator_inc(s, 1);
744 			*frac_len += 1;
745 		}
746 		sfrac[*frac_len] = 0;
747 
748 		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
749 			--(*frac_len);
750 			sfrac[*frac_len] = L'\0';
751 		}
752 	}
753 
754 	setsuffix(bws_get_iter_value(s),si);
755 
756 	if ((*main_len + *frac_len) == 0)
757 		*sign = 0;
758 
759 	return (0);
760 }
761 
762 /*
763  * Implements string sort.
764  */
765 static int
766 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
767 {
768 
769 	if (debug_sort) {
770 		if (offset)
771 			printf("; offset=%d\n", (int) offset);
772 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
773 		printf("(%zu)", BWSLEN(kv1->k));
774 		bwsprintf(stdout, kv2->k, ", k2=<", ">");
775 		printf("(%zu)", BWSLEN(kv2->k));
776 	}
777 
778 	return (bwscoll(kv1->k, kv2->k, offset));
779 }
780 
781 /*
782  * Compare two suffixes
783  */
784 static inline int
785 cmpsuffix(unsigned char si1, unsigned char si2)
786 {
787 
788 	return ((char)si1 - (char)si2);
789 }
790 
791 /*
792  * Implements numeric sort for -n and -h.
793  */
794 static int
795 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
796     size_t offset __unused, bool use_suffix)
797 {
798 	struct bwstring *s1, *s2;
799 	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
800 	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
801 	int cmp_res, frac1, frac2, main1, main2, sign1, sign2;
802 	unsigned char SI1, SI2;
803 	bool e1, e2, key1_read, key2_read;
804 
805 	s1 = kv1->k;
806 	s2 = kv2->k;
807 	sign1 = sign2 = 0;
808 	main1 = main2 = 0;
809 	frac1 = frac2 = 0;
810 
811 	cmp_res = 0;
812 	key1_read = key2_read = false;
813 
814 	if (debug_sort) {
815 		bwsprintf(stdout, s1, "; k1=<", ">");
816 		bwsprintf(stdout, s2, ", k2=<", ">");
817 	}
818 
819 	if (s1 == s2)
820 		return (0);
821 
822 	if (kv1->hint->status == HS_UNINITIALIZED) {
823 		/* read the number from the string */
824 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
825 		key1_read = true;
826 		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
827 		if(main1 < 1 && frac1 < 1)
828 			kv1->hint->v.nh.empty=true;
829 		kv1->hint->v.nh.si = SI1;
830 		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
831 		    HS_INITIALIZED : HS_ERROR;
832 		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
833 	}
834 
835 	if (kv2->hint->status == HS_UNINITIALIZED) {
836 		/* read the number from the string */
837 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
838 		key2_read = true;
839 		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
840 		if(main2 < 1 && frac2 < 1)
841 			kv2->hint->v.nh.empty=true;
842 		kv2->hint->v.nh.si = SI2;
843 		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
844 		    HS_INITIALIZED : HS_ERROR;
845 		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
846 	}
847 
848 	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
849 	    HS_INITIALIZED) {
850 		unsigned long long n1, n2;
851 		bool neg1, neg2;
852 
853 		e1 = kv1->hint->v.nh.empty;
854 		e2 = kv2->hint->v.nh.empty;
855 
856 		if (e1 && e2)
857 			return (0);
858 
859 		neg1 = kv1->hint->v.nh.neg;
860 		neg2 = kv2->hint->v.nh.neg;
861 
862 		if (neg1 && !neg2)
863 			return (-1);
864 		if (neg2 && !neg1)
865 			return (+1);
866 
867 		if (e1)
868 			return (neg2 ? +1 : -1);
869 		else if (e2)
870 			return (neg1 ? -1 : +1);
871 
872 
873 		if (use_suffix) {
874 			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
875 			if (cmp_res)
876 				return (neg1 ? -cmp_res : cmp_res);
877 		}
878 
879 		n1 = kv1->hint->v.nh.n1;
880 		n2 = kv2->hint->v.nh.n1;
881 		if (n1 < n2)
882 			return (neg1 ? +1 : -1);
883 		else if (n1 > n2)
884 			return (neg1 ? -1 : +1);
885 	}
886 
887 	/* read the numbers from the strings */
888 	if (!key1_read)
889 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
890 	if (!key2_read)
891 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
892 
893 	e1 = ((main1 + frac1) == 0);
894 	e2 = ((main2 + frac2) == 0);
895 
896 	if (e1 && e2)
897 		return (0);
898 
899 	/* we know the result if the signs are different */
900 	if (sign1 < 0 && sign2 >= 0)
901 		return (-1);
902 	if (sign1 >= 0 && sign2 < 0)
903 		return (+1);
904 
905 	if (e1)
906 		return ((sign2 < 0) ? +1 : -1);
907 	else if (e2)
908 		return ((sign1 < 0) ? -1 : +1);
909 
910 	if (use_suffix) {
911 		cmp_res = cmpsuffix(SI1, SI2);
912 		if (cmp_res)
913 			return ((sign1 < 0) ? -cmp_res : cmp_res);
914 	}
915 
916 	/* if both numbers are empty assume that the strings are equal */
917 	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
918 		return (0);
919 
920 	/*
921 	 * if the main part is of different size, we know the result
922 	 * (because the leading zeros are removed)
923 	 */
924 	if (main1 < main2)
925 		cmp_res = -1;
926 	else if (main1 > main2)
927 		cmp_res = +1;
928 	/* if the sizes are equal then simple non-collate string compare gives the correct result */
929 	else
930 		cmp_res = wcscmp(smain1, smain2);
931 
932 	/* check fraction */
933 	if (!cmp_res)
934 		cmp_res = wcscmp(sfrac1, sfrac2);
935 
936 	if (!cmp_res)
937 		return (0);
938 
939 	/* reverse result if the signs are negative */
940 	if (sign1 < 0 && sign2 < 0)
941 		cmp_res = -cmp_res;
942 
943 	return (cmp_res);
944 }
945 
946 /*
947  * Implements numeric sort (-n).
948  */
949 static int
950 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
951 {
952 
953 	return (numcoll_impl(kv1, kv2, offset, false));
954 }
955 
956 /*
957  * Implements 'human' numeric sort (-h).
958  */
959 static int
960 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
961 {
962 
963 	return (numcoll_impl(kv1, kv2, offset, true));
964 }
965 
966 /*
967  * Implements random sort (-R).
968  */
969 static int
970 randomcoll(struct key_value *kv1, struct key_value *kv2,
971     size_t offset __unused)
972 {
973 	struct bwstring *s1, *s2;
974 	MD5_CTX ctx1, ctx2;
975 	char *b1, *b2;
976 
977 	s1 = kv1->k;
978 	s2 = kv2->k;
979 
980 	if (debug_sort) {
981 		bwsprintf(stdout, s1, "; k1=<", ">");
982 		bwsprintf(stdout, s2, ", k2=<", ">");
983 	}
984 
985 	if (s1 == s2)
986 		return (0);
987 
988 	memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
989 	memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
990 
991 	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
992 	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
993 	b1 = MD5End(&ctx1, NULL);
994 	b2 = MD5End(&ctx2, NULL);
995 	if (b1 == NULL) {
996 		if (b2 == NULL)
997 			return (0);
998 		else {
999 			sort_free(b2);
1000 			return (-1);
1001 		}
1002 	} else if (b2 == NULL) {
1003 		sort_free(b1);
1004 		return (+1);
1005 	} else {
1006 		int cmp_res;
1007 
1008 		cmp_res = strcmp(b1,b2);
1009 		sort_free(b1);
1010 		sort_free(b2);
1011 
1012 		if (!cmp_res)
1013 			cmp_res = bwscoll(s1, s2, 0);
1014 
1015 		return (cmp_res);
1016 	}
1017 }
1018 
1019 /*
1020  * Implements version sort (-V).
1021  */
1022 static int
1023 versioncoll(struct key_value *kv1, struct key_value *kv2,
1024     size_t offset __unused)
1025 {
1026 	struct bwstring *s1, *s2;
1027 
1028 	s1 = kv1->k;
1029 	s2 = kv2->k;
1030 
1031 	if (debug_sort) {
1032 		bwsprintf(stdout, s1, "; k1=<", ">");
1033 		bwsprintf(stdout, s2, ", k2=<", ">");
1034 	}
1035 
1036 	if (s1 == s2)
1037 		return (0);
1038 
1039 	return (vcmp(s1, s2));
1040 }
1041 
1042 /*
1043  * Check for minus infinity
1044  */
1045 static inline bool
1046 huge_minus(double d, int err1)
1047 {
1048 
1049 	if (err1 == ERANGE)
1050 		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1051 			return (+1);
1052 
1053 	return (0);
1054 }
1055 
1056 /*
1057  * Check for plus infinity
1058  */
1059 static inline bool
1060 huge_plus(double d, int err1)
1061 {
1062 
1063 	if (err1 == ERANGE)
1064 		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1065 			return (+1);
1066 
1067 	return (0);
1068 }
1069 
1070 /*
1071  * Check whether a function is a NAN
1072  */
1073 static bool
1074 is_nan(double d)
1075 {
1076 
1077 	return ((d == NAN) || (isnan(d)));
1078 }
1079 
1080 /*
1081  * Compare two NANs
1082  */
1083 static int
1084 cmp_nans(double d1, double d2)
1085 {
1086 
1087 	if (d1 < d2)
1088 		return (-1);
1089 	if (d2 > d2)
1090 		return (+1);
1091 	return (0);
1092 }
1093 
1094 /*
1095  * Implements general numeric sort (-g).
1096  */
1097 static int
1098 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1099     size_t offset __unused)
1100 {
1101 	double d1, d2;
1102 	int err1, err2;
1103 	bool empty1, empty2, key1_read, key2_read;
1104 
1105 	d1 = d2 = 0;
1106 	err1 = err2 = 0;
1107 	key1_read = key2_read = false;
1108 
1109 	if (debug_sort) {
1110 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1111 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1112 	}
1113 
1114 	if (kv1->hint->status == HS_UNINITIALIZED) {
1115 		errno = 0;
1116 		d1 = bwstod(kv1->k, &empty1);
1117 		err1 = errno;
1118 
1119 		if (empty1)
1120 			kv1->hint->v.gh.notnum = true;
1121 		else if (err1 == 0) {
1122 			kv1->hint->v.gh.d = d1;
1123 			kv1->hint->v.gh.nan = is_nan(d1);
1124 			kv1->hint->status = HS_INITIALIZED;
1125 		} else
1126 			kv1->hint->status = HS_ERROR;
1127 
1128 		key1_read = true;
1129 	}
1130 
1131 	if (kv2->hint->status == HS_UNINITIALIZED) {
1132 		errno = 0;
1133 		d2 = bwstod(kv2->k, &empty2);
1134 		err2 = errno;
1135 
1136 		if (empty2)
1137 			kv2->hint->v.gh.notnum = true;
1138 		else if (err2 == 0) {
1139 			kv2->hint->v.gh.d = d2;
1140 			kv2->hint->v.gh.nan = is_nan(d2);
1141 			kv2->hint->status = HS_INITIALIZED;
1142 		} else
1143 			kv2->hint->status = HS_ERROR;
1144 
1145 		key2_read = true;
1146 	}
1147 
1148 	if (kv1->hint->status == HS_INITIALIZED &&
1149 	    kv2->hint->status == HS_INITIALIZED) {
1150 		if (kv1->hint->v.gh.notnum)
1151 			return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1152 		else if (kv2->hint->v.gh.notnum)
1153 			return (+1);
1154 
1155 		if (kv1->hint->v.gh.nan)
1156 			return ((kv2->hint->v.gh.nan) ?
1157 			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1158 			    -1);
1159 		else if (kv2->hint->v.gh.nan)
1160 			return (+1);
1161 
1162 		d1 = kv1->hint->v.gh.d;
1163 		d2 = kv2->hint->v.gh.d;
1164 
1165 		if (d1 < d2)
1166 			return (-1);
1167 		else if (d1 > d2)
1168 			return (+1);
1169 		else
1170 			return (0);
1171 	}
1172 
1173 	if (!key1_read) {
1174 		errno = 0;
1175 		d1 = bwstod(kv1->k, &empty1);
1176 		err1 = errno;
1177 	}
1178 
1179 	if (!key2_read) {
1180 		errno = 0;
1181 		d2 = bwstod(kv2->k, &empty2);
1182 		err2 = errno;
1183 	}
1184 
1185 	/* Non-value case: */
1186 	if (empty1)
1187 		return (empty2 ? 0 : -1);
1188 	else if (empty2)
1189 		return (+1);
1190 
1191 	/* NAN case */
1192 	if (is_nan(d1))
1193 		return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1194 	else if (is_nan(d2))
1195 		return (+1);
1196 
1197 	/* Infinities */
1198 	if (err1 == ERANGE || err2 == ERANGE) {
1199 		/* Minus infinity case */
1200 		if (huge_minus(d1, err1)) {
1201 			if (huge_minus(d2, err2)) {
1202 				if (d1 < d2)
1203 					return (-1);
1204 				if (d1 > d2)
1205 					return (+1);
1206 				return (0);
1207 			} else
1208 				return (-1);
1209 
1210 		} else if (huge_minus(d2, err2)) {
1211 			if (huge_minus(d1, err1)) {
1212 				if (d1 < d2)
1213 					return (-1);
1214 				if (d1 > d2)
1215 					return (+1);
1216 				return (0);
1217 			} else
1218 				return (+1);
1219 		}
1220 
1221 		/* Plus infinity case */
1222 		if (huge_plus(d1, err1)) {
1223 			if (huge_plus(d2, err2)) {
1224 				if (d1 < d2)
1225 					return (-1);
1226 				if (d1 > d2)
1227 					return (+1);
1228 				return (0);
1229 			} else
1230 				return (+1);
1231 		} else if (huge_plus(d2, err2)) {
1232 			if (huge_plus(d1, err1)) {
1233 				if (d1 < d2)
1234 					return (-1);
1235 				if (d1 > d2)
1236 					return (+1);
1237 				return (0);
1238 			} else
1239 				return (-1);
1240 		}
1241 	}
1242 
1243 	if (d1 < d2)
1244 		return (-1);
1245 	if (d1 > d2)
1246 		return (+1);
1247 
1248 	return (0);
1249 }
1250 
1251 /*
1252  * Implements month sort (-M).
1253  */
1254 static int
1255 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1256 {
1257 	int val1, val2;
1258 	bool key1_read, key2_read;
1259 
1260 	val1 = val2 = 0;
1261 	key1_read = key2_read = false;
1262 
1263 	if (debug_sort) {
1264 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1265 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1266 	}
1267 
1268 	if (kv1->hint->status == HS_UNINITIALIZED) {
1269 		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1270 		key1_read = true;
1271 		kv1->hint->status = HS_INITIALIZED;
1272 	}
1273 
1274 	if (kv2->hint->status == HS_UNINITIALIZED) {
1275 		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1276 		key2_read = true;
1277 		kv2->hint->status = HS_INITIALIZED;
1278 	}
1279 
1280 	if (kv1->hint->status == HS_INITIALIZED) {
1281 		val1 = kv1->hint->v.Mh.m;
1282 		key1_read = true;
1283 	}
1284 
1285 	if (kv2->hint->status == HS_INITIALIZED) {
1286 		val2 = kv2->hint->v.Mh.m;
1287 		key2_read = true;
1288 	}
1289 
1290 	if (!key1_read)
1291 		val1 = bws_month_score(kv1->k);
1292 	if (!key2_read)
1293 		val2 = bws_month_score(kv2->k);
1294 
1295 	if (val1 == val2) {
1296 		return (0);
1297 	}
1298 	if (val1 < val2)
1299 		return (-1);
1300 	return (+1);
1301 }
1302