xref: /freebsd/usr.bin/sort/coll.c (revision c66bbc91434501b889413241e8f1bd1487a92c5b)
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <oleg.moskalenko@citrix.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/types.h>
32 
33 #include <errno.h>
34 #include <err.h>
35 #include <langinfo.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <md5.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42 #include <wctype.h>
43 
44 #include "coll.h"
45 #include "vsort.h"
46 
47 struct key_specs *keys = NULL;
48 size_t keys_num = 0;
49 
50 wchar_t symbol_decimal_point = L'.';
51 /* there is no default thousands separator in collate rules: */
52 wchar_t symbol_thousands_sep = 0;
53 wchar_t symbol_negative_sign = L'-';
54 wchar_t symbol_positive_sign = L'+';
55 
56 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
57 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
58 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
59 static int numcoll(struct key_value*, struct key_value *, size_t offset);
60 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
61 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
62 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
63 
64 /*
65  * Allocate keys array
66  */
67 struct keys_array *
68 keys_array_alloc(void)
69 {
70 	struct keys_array *ka;
71 	size_t sz;
72 
73 	sz = keys_array_size();
74 	ka = sort_malloc(sz);
75 	memset(ka, 0, sz);
76 
77 	return (ka);
78 }
79 
80 /*
81  * Calculate whether we need key hint space
82  */
83 static size_t
84 key_hint_size(void)
85 {
86 
87 	return (need_hint ? sizeof(struct key_hint) : 0);
88 }
89 
90 /*
91  * Calculate keys array size
92  */
93 size_t
94 keys_array_size(void)
95 {
96 
97 	return (keys_num * (sizeof(struct key_value) + key_hint_size()));
98 }
99 
100 /*
101  * Clean data of keys array
102  */
103 void
104 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
105 {
106 
107 	if (ka) {
108 		for (size_t i = 0; i < keys_num; ++i)
109 			if (ka->key[i].k && ka->key[i].k != s)
110 				bwsfree(ka->key[i].k);
111 		memset(ka, 0, keys_array_size());
112 	}
113 }
114 
115 /*
116  * Set value of a key in the keys set
117  */
118 void
119 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
120 {
121 
122 	if (ka && keys_num > ind) {
123 		struct key_value *kv;
124 
125 		kv = &(ka->key[ind]);
126 
127 		if (kv->k && kv->k != s)
128 			bwsfree(kv->k);
129 		kv->k = s;
130 	}
131 }
132 
133 /*
134  * Initialize a sort list item
135  */
136 struct sort_list_item *
137 sort_list_item_alloc(void)
138 {
139 	struct sort_list_item *si;
140 	size_t sz;
141 
142 	sz = sizeof(struct sort_list_item) + keys_array_size();
143 	si = sort_malloc(sz);
144 	memset(si, 0, sz);
145 
146 	return (si);
147 }
148 
149 size_t
150 sort_list_item_size(struct sort_list_item *si)
151 {
152 	size_t ret = 0;
153 
154 	if (si) {
155 		ret = sizeof(struct sort_list_item) + keys_array_size();
156 		if (si->str)
157 			ret += bws_memsize(si->str);
158 		for (size_t i = 0; i < keys_num; ++i) {
159 			struct key_value *kv;
160 
161 			kv = &(si->ka.key[i]);
162 
163 			if (kv->k != si->str)
164 				ret += bws_memsize(kv->k);
165 		}
166 	}
167 	return (ret);
168 }
169 
170 /*
171  * Calculate key for a sort list item
172  */
173 static void
174 sort_list_item_make_key(struct sort_list_item *si)
175 {
176 
177 	preproc(si->str, &(si->ka));
178 }
179 
180 /*
181  * Set value of a sort list item.
182  * Return combined string and keys memory size.
183  */
184 void
185 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
186 {
187 
188 	if (si) {
189 		clean_keys_array(si->str, &(si->ka));
190 		if (si->str) {
191 			if (si->str == str) {
192 				/* we are trying to reset the same string */
193 				return;
194 			} else {
195 				bwsfree(si->str);
196 				si->str = NULL;
197 			}
198 		}
199 		si->str = str;
200 		sort_list_item_make_key(si);
201 	}
202 }
203 
204 /*
205  * De-allocate a sort list item object memory
206  */
207 void
208 sort_list_item_clean(struct sort_list_item *si)
209 {
210 
211 	if (si) {
212 		clean_keys_array(si->str, &(si->ka));
213 		if (si->str) {
214 			bwsfree(si->str);
215 			si->str = NULL;
216 		}
217 	}
218 }
219 
220 /*
221  * Skip columns according to specs
222  */
223 static size_t
224 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
225     bool skip_blanks, bool *empty_key)
226 {
227 	if (cols < 1)
228 		return (BWSLEN(s) + 1);
229 
230 	if (skip_blanks)
231 		while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
232 			++start;
233 
234 	while (start < BWSLEN(s) && cols > 1) {
235 		--cols;
236 		++start;
237 	}
238 
239 	if (start >= BWSLEN(s))
240 		*empty_key = true;
241 
242 	return (start);
243 }
244 
245 /*
246  * Skip fields according to specs
247  */
248 static size_t
249 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
250 {
251 
252 	if (fields < 2) {
253 		if (BWSLEN(s) == 0)
254 			*empty_field = true;
255 		return (0);
256 	} else if (!(sort_opts_vals.tflag)) {
257 		size_t cpos = 0;
258 		bool pb = true;
259 
260 		while (cpos < BWSLEN(s)) {
261 			bool isblank;
262 
263 			isblank = iswblank(BWS_GET(s,cpos));
264 
265 			if (isblank && !pb) {
266 				--fields;
267 				if (fields <= 1)
268 					return (cpos);
269 			}
270 			pb = isblank;
271 			++cpos;
272 		}
273 		if (fields > 1)
274 			*empty_field = true;
275 		return (cpos);
276 	} else {
277 		size_t cpos = 0;
278 
279 		while (cpos < BWSLEN(s)) {
280 			if (BWS_GET(s,cpos) == sort_opts_vals.field_sep) {
281 				--fields;
282 				if (fields <= 1)
283 					return (cpos + 1);
284 			}
285 			++cpos;
286 		}
287 		if (fields > 1)
288 			*empty_field = true;
289 		return (cpos);
290 	}
291 }
292 
293 /*
294  * Find fields start
295  */
296 static void
297 find_field_start(const struct bwstring *s, struct key_specs *ks,
298     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
299 {
300 
301 	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
302 	if (!*empty_field)
303 		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
304 		    ks->pos1b, empty_key);
305 	else
306 		*empty_key = true;
307 }
308 
309 /*
310  * Find end key position
311  */
312 static size_t
313 find_field_end(const struct bwstring *s, struct key_specs *ks)
314 {
315 	size_t f2, next_field_start, pos_end;
316 	bool empty_field, empty_key;
317 
318 	pos_end = 0;
319 	next_field_start = 0;
320 	empty_field = false;
321 	empty_key = false;
322 	f2 = ks->f2;
323 
324 	if (f2 == 0)
325 		return (BWSLEN(s) + 1);
326 	else {
327 		if (ks->c2 == 0) {
328 			next_field_start = skip_fields_to_start(s, f2 + 1,
329 			    &empty_field);
330 			if ((next_field_start > 0) && sort_opts_vals.tflag &&
331 			    (sort_opts_vals.field_sep == BWS_GET(s,
332 			    next_field_start - 1)))
333 				--next_field_start;
334 		} else
335 			next_field_start = skip_fields_to_start(s, f2,
336 			    &empty_field);
337 	}
338 
339 	if (empty_field || (next_field_start >= BWSLEN(s)))
340 		return (BWSLEN(s) + 1);
341 
342 	if (ks->c2) {
343 		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
344 		    ks->pos2b, &empty_key);
345 		if (pos_end < BWSLEN(s))
346 			++pos_end;
347 	} else
348 		pos_end = next_field_start;
349 
350 	return (pos_end);
351 }
352 
353 /*
354  * Cut a field according to the key specs
355  */
356 static struct bwstring *
357 cut_field(const struct bwstring *s, struct key_specs *ks)
358 {
359 	struct bwstring *ret = NULL;
360 
361 	if (s && ks) {
362 		size_t field_start, key_end, key_start, sz;
363 		bool empty_field, empty_key;
364 
365 		field_start = 0;
366 		key_start = 0;
367 		empty_field = false;
368 		empty_key = false;
369 
370 		find_field_start(s, ks, &field_start, &key_start,
371 		    &empty_field, &empty_key);
372 
373 		if (empty_key)
374 			sz = 0;
375 		else {
376 			key_end = find_field_end(s, ks);
377 			sz = (key_end < key_start) ? 0 : (key_end - key_start);
378 		}
379 
380 		ret = bwsalloc(sz);
381 		if (sz)
382 			bwsnocpy(ret, s, key_start, sz);
383 	} else
384 		ret = bwsalloc(0);
385 
386 	return (ret);
387 }
388 
389 /*
390  * Preprocesses a line applying the necessary transformations
391  * specified by command line options and returns the preprocessed
392  * string, which can be used to compare.
393  */
394 int
395 preproc(struct bwstring *s, struct keys_array *ka)
396 {
397 
398 	if (sort_opts_vals.kflag)
399 		for (size_t i = 0; i < keys_num; i++) {
400 			struct bwstring *key;
401 			struct key_specs *kspecs;
402 			struct sort_mods *sm;
403 
404 			kspecs = &(keys[i]);
405 			key = cut_field(s, kspecs);
406 
407 			sm = &(kspecs->sm);
408 			if (sm->dflag)
409 				key = dictionary_order(key);
410 			else if (sm->iflag)
411 				key = ignore_nonprinting(key);
412 			if (sm->fflag || sm->Mflag)
413 				key = ignore_case(key);
414 
415 			set_key_on_keys_array(ka, key, i);
416 		}
417 	else {
418 		struct bwstring *ret = NULL;
419 		struct sort_mods *sm = default_sort_mods;
420 
421 		if (sm->bflag) {
422 			if (ret == NULL)
423 				ret = bwsdup(s);
424 			ret = ignore_leading_blanks(ret);
425 		}
426 		if (sm->dflag) {
427 			if (ret == NULL)
428 				ret = bwsdup(s);
429 			ret = dictionary_order(ret);
430 		} else if (sm->iflag) {
431 			if (ret == NULL)
432 				ret = bwsdup(s);
433 			ret = ignore_nonprinting(ret);
434 		}
435 		if (sm->fflag || sm->Mflag) {
436 			if (ret == NULL)
437 				ret = bwsdup(s);
438 			ret = ignore_case(ret);
439 		}
440 		if (ret == NULL)
441 			set_key_on_keys_array(ka, s, 0);
442 		else
443 			set_key_on_keys_array(ka, ret, 0);
444 	}
445 
446 	return 0;
447 }
448 
449 cmpcoll_t
450 get_sort_func(struct sort_mods *sm)
451 {
452 
453 	if (sm->nflag)
454 		return (numcoll);
455 	else if (sm->hflag)
456 		return (hnumcoll);
457 	else if (sm->gflag)
458 		return (gnumcoll);
459 	else if (sm->Mflag)
460 		return (monthcoll);
461 	else if (sm->Rflag)
462 		return (randomcoll);
463 	else if (sm->Vflag)
464 		return (versioncoll);
465 	else
466 		return (wstrcoll);
467 }
468 
469 /*
470  * Compares the given strings.  Returns a positive number if
471  * the first precedes the second, a negative number if the second is
472  * the preceding one, and zero if they are equal.  This function calls
473  * the underlying collate functions, which done the actual comparison.
474  */
475 int
476 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
477 {
478 	struct sort_mods *sm;
479 	int res = 0;
480 
481 	for (size_t i = 0; i < keys_num; ++i) {
482 		sm = &(keys[i].sm);
483 
484 		if (sm->rflag)
485 			res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
486 		else
487 			res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
488 
489 		if (res)
490 			break;
491 
492 		/* offset applies to only the first key */
493 		offset = 0;
494 	}
495 	return (res);
496 }
497 
498 /*
499  * Compare two strings.
500  * Plain symbol-by-symbol comparison.
501  */
502 int
503 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
504 {
505 
506 	if (default_sort_mods->rflag) {
507 		const struct bwstring *tmp;
508 
509 		tmp = s1;
510 		s1 = s2;
511 		s2 = tmp;
512 	}
513 
514 	return (bwscoll(s1, s2, 0));
515 }
516 
517 /*
518  * Compare a string and a sort list item, according to the sort specs.
519  */
520 int
521 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
522 {
523 	struct keys_array *ka1;
524 	int ret = 0;
525 
526 	ka1 = keys_array_alloc();
527 
528 	preproc(str1, ka1);
529 
530 	sort_list_item_make_key(*ss2);
531 
532 	if (debug_sort) {
533 		bwsprintf(stdout, str1, "; s1=<", ">");
534 		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
535 	}
536 
537 	ret = key_coll(ka1, &((*ss2)->ka), 0);
538 
539 	if (debug_sort)
540 		printf("; cmp1=%d", ret);
541 
542 	clean_keys_array(str1, ka1);
543 	sort_free(ka1);
544 
545 	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
546 		ret = top_level_str_coll(str1, ((*ss2)->str));
547 		if (debug_sort)
548 			printf("; cmp2=%d", ret);
549 	}
550 
551 	if (debug_sort)
552 		printf("\n");
553 
554 	return (ret);
555 }
556 
557 /*
558  * Compare two sort list items, according to the sort specs.
559  */
560 int
561 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
562     size_t offset)
563 {
564 	int ret;
565 
566 	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
567 
568 	if (debug_sort) {
569 		if (offset)
570 			printf("; offset=%d", (int) offset);
571 		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
572 		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
573 		printf("; cmp1=%d\n", ret);
574 	}
575 
576 	if (ret)
577 		return (ret);
578 
579 	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
580 		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
581 		if (debug_sort)
582 			printf("; cmp2=%d\n", ret);
583 	}
584 
585 	return (ret);
586 }
587 
588 /*
589  * Compare two sort list items, according to the sort specs.
590  */
591 int
592 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
593 {
594 
595 	return (list_coll_offset(ss1, ss2, 0));
596 }
597 
598 #define LSCDEF(N) 											\
599 static int 												\
600 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)					\
601 {													\
602 													\
603 	return (list_coll_offset(ss1, ss2, N));								\
604 }
605 
606 LSCDEF(1)
607 LSCDEF(2)
608 LSCDEF(3)
609 LSCDEF(4)
610 LSCDEF(5)
611 LSCDEF(6)
612 LSCDEF(7)
613 LSCDEF(8)
614 LSCDEF(9)
615 LSCDEF(10)
616 LSCDEF(11)
617 LSCDEF(12)
618 LSCDEF(13)
619 LSCDEF(14)
620 LSCDEF(15)
621 LSCDEF(16)
622 LSCDEF(17)
623 LSCDEF(18)
624 LSCDEF(19)
625 LSCDEF(20)
626 
627 listcoll_t
628 get_list_call_func(size_t offset)
629 {
630 	static const listcoll_t lsarray[] = { list_coll, list_coll_1,
631 	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
632 	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
633 	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
634 	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
635 	    list_coll_18, list_coll_19, list_coll_20 };
636 
637 	if (offset <= 20)
638 		return (lsarray[offset]);
639 
640 	return (list_coll);
641 }
642 
643 /*
644  * Compare two sort list items, only by their original string.
645  */
646 int
647 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
648 {
649 
650 	return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
651 }
652 
653 /*
654  * Maximum size of a number in the string (before or after decimal point)
655  */
656 #define MAX_NUM_SIZE (128)
657 
658 /*
659  * Set suffix value
660  */
661 static void setsuffix(wchar_t c, unsigned char *si)
662 {
663 	switch (c){
664 	case L'k':
665 	case L'K':
666 		*si = 1;
667 		break;
668 	case L'M':
669 		*si = 2;
670 		break;
671 	case L'G':
672 		*si = 3;
673 		break;
674 	case L'T':
675 		*si = 4;
676 		break;
677 	case L'P':
678 		*si = 5;
679 		break;
680 	case L'E':
681 		*si = 6;
682 		break;
683 	case L'Z':
684 		*si = 7;
685 		break;
686 	case L'Y':
687 		*si = 8;
688 		break;
689 	default:
690 		*si = 0;
691 	};
692 }
693 
694 /*
695  * Read string s and parse the string into a fixed-decimal-point number.
696  * sign equals -1 if the number is negative (explicit plus is not allowed,
697  * according to GNU sort's "info sort".
698  * The number part before decimal point is in the smain, after the decimal
699  * point is in sfrac, tail is the pointer to the remainder of the string.
700  */
701 static int
702 read_number(struct bwstring *s0, int *sign, wchar_t *smain, int *main_len, wchar_t *sfrac, int *frac_len, unsigned char *si)
703 {
704 	bwstring_iterator s;
705 
706 	s = bws_begin(s0);
707 
708 	/* always end the fraction with zero, even if we have no fraction */
709 	sfrac[0] = 0;
710 
711 	while (iswblank(bws_get_iter_value(s)))
712 		s = bws_iterator_inc(s, 1);
713 
714 	if (bws_get_iter_value(s) == symbol_negative_sign) {
715 		*sign = -1;
716 		s = bws_iterator_inc(s, 1);
717 	}
718 
719 	// This is '0', not '\0', do not change this
720 	while (iswdigit(bws_get_iter_value(s)) &&
721 	    (bws_get_iter_value(s) == L'0'))
722 		s = bws_iterator_inc(s, 1);
723 
724 	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
725 		if (iswdigit(bws_get_iter_value(s))) {
726 			smain[*main_len] = bws_get_iter_value(s);
727 			s = bws_iterator_inc(s, 1);
728 			*main_len += 1;
729 		} else if (symbol_thousands_sep &&
730 		    (bws_get_iter_value(s) == symbol_thousands_sep))
731 			s = bws_iterator_inc(s, 1);
732 		else
733 			break;
734 	}
735 
736 	smain[*main_len] = 0;
737 
738 	if (bws_get_iter_value(s) == symbol_decimal_point) {
739 		s = bws_iterator_inc(s, 1);
740 		while (iswdigit(bws_get_iter_value(s)) &&
741 		    *frac_len < MAX_NUM_SIZE) {
742 			sfrac[*frac_len] = bws_get_iter_value(s);
743 			s = bws_iterator_inc(s, 1);
744 			*frac_len += 1;
745 		}
746 		sfrac[*frac_len] = 0;
747 
748 		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
749 			--(*frac_len);
750 			sfrac[*frac_len] = L'\0';
751 		}
752 	}
753 
754 	setsuffix(bws_get_iter_value(s),si);
755 
756 	if ((*main_len + *frac_len) == 0)
757 		*sign = 0;
758 
759 	return (0);
760 }
761 
762 /*
763  * Implements string sort.
764  */
765 static int
766 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
767 {
768 
769 	if (debug_sort) {
770 		if (offset)
771 			printf("; offset=%d\n", (int) offset);
772 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
773 		printf("(%zu)", BWSLEN(kv1->k));
774 		bwsprintf(stdout, kv2->k, ", k2=<", ">");
775 		printf("(%zu)", BWSLEN(kv2->k));
776 	}
777 
778 	return (bwscoll(kv1->k, kv2->k, offset));
779 }
780 
781 /*
782  * Compare two suffixes
783  */
784 static inline int
785 cmpsuffix(unsigned char si1, unsigned char si2)
786 {
787 
788 	return ((char)si1 - (char)si2);
789 }
790 
791 /*
792  * Implements numeric sort for -n and -h.
793  */
794 static int
795 numcoll_impl(struct key_value *kv1, struct key_value *kv2, size_t offset, bool use_suffix)
796 {
797 	struct bwstring *s1, *s2;
798 	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
799 	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
800 	int cmp_res, frac1, frac2, main1, main2, sign1, sign2;
801 	unsigned char SI1, SI2;
802 	bool e1, e2, key1_read, key2_read;
803 
804 	s1 = kv1->k;
805 	s2 = kv2->k;
806 	sign1 = sign2 = 0;
807 	main1 = main2 = 0;
808 	frac1 = frac2 = 0;
809 
810 	cmp_res = 0;
811 	key1_read = key2_read = false;
812 
813 	UNUSED_ARG(offset);
814 
815 	if (debug_sort) {
816 		bwsprintf(stdout, s1, "; k1=<", ">");
817 		bwsprintf(stdout, s2, ", k2=<", ">");
818 	}
819 
820 	if (s1 == s2)
821 		return (0);
822 
823 	if (kv1->hint->status == HS_UNINITIALIZED) {
824 		/* read the number from the string */
825 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
826 		key1_read = true;
827 		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
828 		if(main1 < 1 && frac1 < 1)
829 			kv1->hint->v.nh.empty=true;
830 		kv1->hint->v.nh.si = SI1;
831 		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
832 		    HS_INITIALIZED : HS_ERROR;
833 		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
834 	}
835 
836 	if (kv2->hint->status == HS_UNINITIALIZED) {
837 		/* read the number from the string */
838 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
839 		key2_read = true;
840 		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
841 		if(main2 < 1 && frac2 < 1)
842 			kv2->hint->v.nh.empty=true;
843 		kv2->hint->v.nh.si = SI2;
844 		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
845 		    HS_INITIALIZED : HS_ERROR;
846 		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
847 	}
848 
849 	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
850 	    HS_INITIALIZED) {
851 		unsigned long long n1, n2;
852 		bool neg1, neg2;
853 
854 		e1 = kv1->hint->v.nh.empty;
855 		e2 = kv2->hint->v.nh.empty;
856 
857 		if (e1 && e2)
858 			return (0);
859 
860 		neg1 = kv1->hint->v.nh.neg;
861 		neg2 = kv2->hint->v.nh.neg;
862 
863 		if (neg1 && !neg2)
864 			return (-1);
865 		if (neg2 && !neg1)
866 			return (+1);
867 
868 		if (e1)
869 			return (neg2 ? +1 : -1);
870 		else if (e2)
871 			return (neg1 ? -1 : +1);
872 
873 
874 		if (use_suffix) {
875 			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
876 			if (cmp_res)
877 				return (neg1 ? -cmp_res : cmp_res);
878 		}
879 
880 		n1 = kv1->hint->v.nh.n1;
881 		n2 = kv2->hint->v.nh.n1;
882 		if (n1 < n2)
883 			return (neg1 ? +1 : -1);
884 		else if (n1 > n2)
885 			return (neg1 ? -1 : +1);
886 	}
887 
888 	/* read the numbers from the strings */
889 	if (!key1_read)
890 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
891 	if (!key2_read)
892 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
893 
894 	e1 = ((main1 + frac1) == 0);
895 	e2 = ((main2 + frac2) == 0);
896 
897 	if (e1 && e2)
898 		return (0);
899 
900 	/* we know the result if the signs are different */
901 	if (sign1 < 0 && sign2 >= 0)
902 		return (-1);
903 	if (sign1 >= 0 && sign2 < 0)
904 		return (+1);
905 
906 	if (e1)
907 		return ((sign2 < 0) ? +1 : -1);
908 	else if (e2)
909 		return ((sign1 < 0) ? -1 : +1);
910 
911 	if (use_suffix) {
912 		cmp_res = cmpsuffix(SI1, SI2);
913 		if (cmp_res)
914 			return ((sign1 < 0) ? -cmp_res : cmp_res);
915 	}
916 
917 	/* if both numbers are empty assume that the strings are equal */
918 	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
919 		return (0);
920 
921 	/*
922 	 * if the main part is of different size, we know the result
923 	 * (because the leading zeros are removed)
924 	 */
925 	if (main1 < main2)
926 		cmp_res = -1;
927 	else if (main1 > main2)
928 		cmp_res = +1;
929 	/* if the sizes are equal then simple non-collate string compare gives the correct result */
930 	else
931 		cmp_res = wcscmp(smain1, smain2);
932 
933 	/* check fraction */
934 	if (!cmp_res)
935 		cmp_res = wcscmp(sfrac1, sfrac2);
936 
937 	if (!cmp_res)
938 		return (0);
939 
940 	/* reverse result if the signs are negative */
941 	if (sign1 < 0 && sign2 < 0)
942 		cmp_res = -cmp_res;
943 
944 	return (cmp_res);
945 }
946 
947 /*
948  * Implements numeric sort (-n).
949  */
950 static int
951 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
952 {
953 
954 	return (numcoll_impl(kv1, kv2, offset, false));
955 }
956 
957 /*
958  * Implements 'human' numeric sort (-h).
959  */
960 static int
961 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
962 {
963 
964 	return (numcoll_impl(kv1, kv2, offset, true));
965 }
966 
967 /*
968  * Implements random sort (-R).
969  */
970 static int
971 randomcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
972 {
973 	struct bwstring *s1, *s2;
974 	MD5_CTX ctx1, ctx2;
975 	char *b1, *b2;
976 
977 	UNUSED_ARG(offset);
978 
979 	s1 = kv1->k;
980 	s2 = kv2->k;
981 
982 	if (debug_sort) {
983 		bwsprintf(stdout, s1, "; k1=<", ">");
984 		bwsprintf(stdout, s2, ", k2=<", ">");
985 	}
986 
987 	if (s1 == s2)
988 		return (0);
989 
990 	memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
991 	memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
992 
993 	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
994 	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
995 	b1 = MD5End(&ctx1, NULL);
996 	b2 = MD5End(&ctx2, NULL);
997 	if (b1 == NULL) {
998 		if (b2 == NULL)
999 			return (0);
1000 		else {
1001 			sort_free(b2);
1002 			return (-1);
1003 		}
1004 	} else if (b2 == NULL) {
1005 		sort_free(b1);
1006 		return (+1);
1007 	} else {
1008 		int cmp_res;
1009 
1010 		cmp_res = strcmp(b1,b2);
1011 		sort_free(b1);
1012 		sort_free(b2);
1013 
1014 		if (!cmp_res)
1015 			cmp_res = bwscoll(s1, s2, 0);
1016 
1017 		return (cmp_res);
1018 	}
1019 }
1020 
1021 /*
1022  * Implements version sort (-V).
1023  */
1024 static int
1025 versioncoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
1026 {
1027 	struct bwstring *s1, *s2;
1028 
1029 	UNUSED_ARG(offset);
1030 
1031 	s1 = kv1->k;
1032 	s2 = kv2->k;
1033 
1034 	if (debug_sort) {
1035 		bwsprintf(stdout, s1, "; k1=<", ">");
1036 		bwsprintf(stdout, s2, ", k2=<", ">");
1037 	}
1038 
1039 	if (s1 == s2)
1040 		return (0);
1041 
1042 	return (vcmp(s1, s2));
1043 }
1044 
1045 /*
1046  * Check for minus infinity
1047  */
1048 static inline bool
1049 huge_minus(double d, int err1)
1050 {
1051 
1052 	if (err1 == ERANGE)
1053 		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1054 			return (+1);
1055 
1056 	return (0);
1057 }
1058 
1059 /*
1060  * Check for plus infinity
1061  */
1062 static inline bool
1063 huge_plus(double d, int err1)
1064 {
1065 
1066 	if (err1 == ERANGE)
1067 		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1068 			return (+1);
1069 
1070 	return (0);
1071 }
1072 
1073 /*
1074  * Check whether a function is a NAN
1075  */
1076 static bool
1077 is_nan(double d)
1078 {
1079 
1080 	return ((d == NAN) || (isnan(d)));
1081 }
1082 
1083 /*
1084  * Compare two NANs
1085  */
1086 static int
1087 cmp_nans(double d1, double d2)
1088 {
1089 
1090 	if (d1 < d2)
1091 		return (-1);
1092 	if (d2 > d2)
1093 		return (+1);
1094 	return (0);
1095 }
1096 
1097 /*
1098  * Implements general numeric sort (-g).
1099  */
1100 static int
1101 gnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
1102 {
1103 	double d1, d2;
1104 	int err1, err2;
1105 	bool empty1, empty2, key1_read, key2_read;
1106 
1107 	d1 = d2 = 0;
1108 	err1 = err2 = 0;
1109 	key1_read = key2_read = false;
1110 
1111 	UNUSED_ARG(offset);
1112 
1113 	if (debug_sort) {
1114 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1115 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1116 	}
1117 
1118 	if (kv1->hint->status == HS_UNINITIALIZED) {
1119 		errno = 0;
1120 		d1 = bwstod(kv1->k, &empty1);
1121 		err1 = errno;
1122 
1123 		if (empty1)
1124 			kv1->hint->v.gh.notnum = true;
1125 		else if (err1 == 0) {
1126 			kv1->hint->v.gh.d = d1;
1127 			kv1->hint->v.gh.nan = is_nan(d1);
1128 			kv1->hint->status = HS_INITIALIZED;
1129 		} else
1130 			kv1->hint->status = HS_ERROR;
1131 
1132 		key1_read = true;
1133 	}
1134 
1135 	if (kv2->hint->status == HS_UNINITIALIZED) {
1136 		errno = 0;
1137 		d2 = bwstod(kv2->k, &empty2);
1138 		err2 = errno;
1139 
1140 		if (empty2)
1141 			kv2->hint->v.gh.notnum = true;
1142 		else if (err2 == 0) {
1143 			kv2->hint->v.gh.d = d2;
1144 			kv2->hint->v.gh.nan = is_nan(d2);
1145 			kv2->hint->status = HS_INITIALIZED;
1146 		} else
1147 			kv2->hint->status = HS_ERROR;
1148 
1149 		key2_read = true;
1150 	}
1151 
1152 	if (kv1->hint->status == HS_INITIALIZED &&
1153 	    kv2->hint->status == HS_INITIALIZED) {
1154 		if (kv1->hint->v.gh.notnum)
1155 			return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1156 		else if (kv2->hint->v.gh.notnum)
1157 			return (+1);
1158 
1159 		if (kv1->hint->v.gh.nan)
1160 			return ((kv2->hint->v.gh.nan) ?
1161 			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1162 			    -1);
1163 		else if (kv2->hint->v.gh.nan)
1164 			return (+1);
1165 
1166 		d1 = kv1->hint->v.gh.d;
1167 		d2 = kv2->hint->v.gh.d;
1168 
1169 		if (d1 < d2)
1170 			return (-1);
1171 		else if (d1 > d2)
1172 			return (+1);
1173 		else
1174 			return (0);
1175 	}
1176 
1177 	if (!key1_read) {
1178 		errno = 0;
1179 		d1 = bwstod(kv1->k, &empty1);
1180 		err1 = errno;
1181 	}
1182 
1183 	if (!key2_read) {
1184 		errno = 0;
1185 		d2 = bwstod(kv2->k, &empty2);
1186 		err2 = errno;
1187 	}
1188 
1189 	/* Non-value case: */
1190 	if (empty1)
1191 		return (empty2 ? 0 : -1);
1192 	else if (empty2)
1193 		return (+1);
1194 
1195 	/* NAN case */
1196 	if (is_nan(d1))
1197 		return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1198 	else if (is_nan(d2))
1199 		return (+1);
1200 
1201 	/* Infinities */
1202 	if (err1 == ERANGE || err2 == ERANGE) {
1203 		/* Minus infinity case */
1204 		if (huge_minus(d1, err1)) {
1205 			if (huge_minus(d2, err2)) {
1206 				if (d1 < d2)
1207 					return (-1);
1208 				if (d1 > d2)
1209 					return (+1);
1210 				return (0);
1211 			} else
1212 				return (-1);
1213 
1214 		} else if (huge_minus(d2, err2)) {
1215 			if (huge_minus(d1, err1)) {
1216 				if (d1 < d2)
1217 					return (-1);
1218 				if (d1 > d2)
1219 					return (+1);
1220 				return (0);
1221 			} else
1222 				return (+1);
1223 		}
1224 
1225 		/* Plus infinity case */
1226 		if (huge_plus(d1, err1)) {
1227 			if (huge_plus(d2, err2)) {
1228 				if (d1 < d2)
1229 					return (-1);
1230 				if (d1 > d2)
1231 					return (+1);
1232 				return (0);
1233 			} else
1234 				return (+1);
1235 		} else if (huge_plus(d2, err2)) {
1236 			if (huge_plus(d1, err1)) {
1237 				if (d1 < d2)
1238 					return (-1);
1239 				if (d1 > d2)
1240 					return (+1);
1241 				return (0);
1242 			} else
1243 				return (-1);
1244 		}
1245 	}
1246 
1247 	if (d1 < d2)
1248 		return (-1);
1249 	if (d1 > d2)
1250 		return (+1);
1251 
1252 	return (0);
1253 }
1254 
1255 /*
1256  * Implements month sort (-M).
1257  */
1258 static int
1259 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
1260 {
1261 	int val1, val2;
1262 	bool key1_read, key2_read;
1263 
1264 	val1 = val2 = 0;
1265 	key1_read = key2_read = false;
1266 
1267 	UNUSED_ARG(offset);
1268 
1269 	if (debug_sort) {
1270 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1271 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1272 	}
1273 
1274 	if (kv1->hint->status == HS_UNINITIALIZED) {
1275 		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1276 		key1_read = true;
1277 		kv1->hint->status = HS_INITIALIZED;
1278 	}
1279 
1280 	if (kv2->hint->status == HS_UNINITIALIZED) {
1281 		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1282 		key2_read = true;
1283 		kv2->hint->status = HS_INITIALIZED;
1284 	}
1285 
1286 	if (kv1->hint->status == HS_INITIALIZED) {
1287 		val1 = kv1->hint->v.Mh.m;
1288 		key1_read = true;
1289 	}
1290 
1291 	if (kv2->hint->status == HS_INITIALIZED) {
1292 		val2 = kv2->hint->v.Mh.m;
1293 		key2_read = true;
1294 	}
1295 
1296 	if (!key1_read)
1297 		val1 = bws_month_score(kv1->k);
1298 	if (!key2_read)
1299 		val2 = bws_month_score(kv2->k);
1300 
1301 	if (val1 == val2) {
1302 		return (0);
1303 	}
1304 	if (val1 < val2)
1305 		return (-1);
1306 	return (+1);
1307 }
1308