xref: /freebsd/usr.bin/sort/coll.c (revision da5069e1f7daaef1e7157876d6044de6f3a08ce2)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/types.h>
34 
35 #include <errno.h>
36 #include <err.h>
37 #include <langinfo.h>
38 #include <limits.h>
39 #include <math.h>
40 #include <md5.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <wchar.h>
44 #include <wctype.h>
45 
46 #include "coll.h"
47 #include "vsort.h"
48 
49 struct key_specs *keys;
50 size_t keys_num = 0;
51 
52 wint_t symbol_decimal_point = L'.';
53 /* there is no default thousands separator in collate rules: */
54 wint_t symbol_thousands_sep = 0;
55 wint_t symbol_negative_sign = L'-';
56 wint_t symbol_positive_sign = L'+';
57 
58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
60 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
61 static int numcoll(struct key_value*, struct key_value *, size_t offset);
62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
63 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
64 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
65 
66 /*
67  * Allocate keys array
68  */
69 struct keys_array *
70 keys_array_alloc(void)
71 {
72 	struct keys_array *ka;
73 	size_t sz;
74 
75 	sz = keys_array_size();
76 	ka = sort_malloc(sz);
77 	memset(ka, 0, sz);
78 
79 	return (ka);
80 }
81 
82 /*
83  * Calculate whether we need key hint space
84  */
85 static size_t
86 key_hint_size(void)
87 {
88 
89 	return (need_hint ? sizeof(struct key_hint) : 0);
90 }
91 
92 /*
93  * Calculate keys array size
94  */
95 size_t
96 keys_array_size(void)
97 {
98 
99 	return (keys_num * (sizeof(struct key_value) + key_hint_size()));
100 }
101 
102 /*
103  * Clean data of keys array
104  */
105 void
106 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
107 {
108 
109 	if (ka) {
110 		for (size_t i = 0; i < keys_num; ++i) {
111 			const struct key_value *kv;
112 
113 			kv = get_key_from_keys_array(ka, i);
114 			if (kv->k && kv->k != s)
115 				bwsfree(kv->k);
116 		}
117 		memset(ka, 0, keys_array_size());
118 	}
119 }
120 
121 /*
122  * Get pointer to a key value in the keys set
123  */
124 struct key_value *
125 get_key_from_keys_array(struct keys_array *ka, size_t ind)
126 {
127 
128 	return ((struct key_value *)((caddr_t)ka->key +
129 	    ind * (sizeof(struct key_value) + key_hint_size())));
130 }
131 
132 /*
133  * Set value of a key in the keys set
134  */
135 void
136 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
137 {
138 
139 	if (ka && keys_num > ind) {
140 		struct key_value *kv;
141 
142 		kv = get_key_from_keys_array(ka, ind);
143 
144 		if (kv->k && kv->k != s)
145 			bwsfree(kv->k);
146 		kv->k = s;
147 	}
148 }
149 
150 /*
151  * Initialize a sort list item
152  */
153 struct sort_list_item *
154 sort_list_item_alloc(void)
155 {
156 	struct sort_list_item *si;
157 	size_t sz;
158 
159 	sz = sizeof(struct sort_list_item) + keys_array_size();
160 	si = sort_malloc(sz);
161 	memset(si, 0, sz);
162 
163 	return (si);
164 }
165 
166 size_t
167 sort_list_item_size(struct sort_list_item *si)
168 {
169 	size_t ret = 0;
170 
171 	if (si) {
172 		ret = sizeof(struct sort_list_item) + keys_array_size();
173 		if (si->str)
174 			ret += bws_memsize(si->str);
175 		for (size_t i = 0; i < keys_num; ++i) {
176 			const struct key_value *kv;
177 
178 			kv = get_key_from_keys_array(&si->ka, i);
179 
180 			if (kv->k != si->str)
181 				ret += bws_memsize(kv->k);
182 		}
183 	}
184 	return (ret);
185 }
186 
187 /*
188  * Calculate key for a sort list item
189  */
190 static void
191 sort_list_item_make_key(struct sort_list_item *si)
192 {
193 
194 	preproc(si->str, &(si->ka));
195 }
196 
197 /*
198  * Set value of a sort list item.
199  * Return combined string and keys memory size.
200  */
201 void
202 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
203 {
204 
205 	if (si) {
206 		clean_keys_array(si->str, &(si->ka));
207 		if (si->str) {
208 			if (si->str == str) {
209 				/* we are trying to reset the same string */
210 				return;
211 			} else {
212 				bwsfree(si->str);
213 				si->str = NULL;
214 			}
215 		}
216 		si->str = str;
217 		sort_list_item_make_key(si);
218 	}
219 }
220 
221 /*
222  * De-allocate a sort list item object memory
223  */
224 void
225 sort_list_item_clean(struct sort_list_item *si)
226 {
227 
228 	if (si) {
229 		clean_keys_array(si->str, &(si->ka));
230 		if (si->str) {
231 			bwsfree(si->str);
232 			si->str = NULL;
233 		}
234 	}
235 }
236 
237 /*
238  * Skip columns according to specs
239  */
240 static size_t
241 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
242     bool skip_blanks, bool *empty_key)
243 {
244 	if (cols < 1)
245 		return (BWSLEN(s) + 1);
246 
247 	if (skip_blanks)
248 		while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
249 			++start;
250 
251 	while (start < BWSLEN(s) && cols > 1) {
252 		--cols;
253 		++start;
254 	}
255 
256 	if (start >= BWSLEN(s))
257 		*empty_key = true;
258 
259 	return (start);
260 }
261 
262 /*
263  * Skip fields according to specs
264  */
265 static size_t
266 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
267 {
268 
269 	if (fields < 2) {
270 		if (BWSLEN(s) == 0)
271 			*empty_field = true;
272 		return (0);
273 	} else if (!(sort_opts_vals.tflag)) {
274 		size_t cpos = 0;
275 		bool pb = true;
276 
277 		while (cpos < BWSLEN(s)) {
278 			bool isblank;
279 
280 			isblank = iswblank(BWS_GET(s, cpos));
281 
282 			if (isblank && !pb) {
283 				--fields;
284 				if (fields <= 1)
285 					return (cpos);
286 			}
287 			pb = isblank;
288 			++cpos;
289 		}
290 		if (fields > 1)
291 			*empty_field = true;
292 		return (cpos);
293 	} else {
294 		size_t cpos = 0;
295 
296 		while (cpos < BWSLEN(s)) {
297 			if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
298 				--fields;
299 				if (fields <= 1)
300 					return (cpos + 1);
301 			}
302 			++cpos;
303 		}
304 		if (fields > 1)
305 			*empty_field = true;
306 		return (cpos);
307 	}
308 }
309 
310 /*
311  * Find fields start
312  */
313 static void
314 find_field_start(const struct bwstring *s, struct key_specs *ks,
315     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
316 {
317 
318 	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
319 	if (!*empty_field)
320 		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
321 		    ks->pos1b, empty_key);
322 	else
323 		*empty_key = true;
324 }
325 
326 /*
327  * Find end key position
328  */
329 static size_t
330 find_field_end(const struct bwstring *s, struct key_specs *ks)
331 {
332 	size_t f2, next_field_start, pos_end;
333 	bool empty_field, empty_key;
334 
335 	empty_field = false;
336 	empty_key = false;
337 	f2 = ks->f2;
338 
339 	if (f2 == 0)
340 		return (BWSLEN(s) + 1);
341 	else {
342 		if (ks->c2 == 0) {
343 			next_field_start = skip_fields_to_start(s, f2 + 1,
344 			    &empty_field);
345 			if ((next_field_start > 0) && sort_opts_vals.tflag &&
346 			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
347 			    next_field_start - 1)))
348 				--next_field_start;
349 		} else
350 			next_field_start = skip_fields_to_start(s, f2,
351 			    &empty_field);
352 	}
353 
354 	if (empty_field || (next_field_start >= BWSLEN(s)))
355 		return (BWSLEN(s) + 1);
356 
357 	if (ks->c2) {
358 		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
359 		    ks->pos2b, &empty_key);
360 		if (pos_end < BWSLEN(s))
361 			++pos_end;
362 	} else
363 		pos_end = next_field_start;
364 
365 	return (pos_end);
366 }
367 
368 /*
369  * Cut a field according to the key specs
370  */
371 static struct bwstring *
372 cut_field(const struct bwstring *s, struct key_specs *ks)
373 {
374 	struct bwstring *ret = NULL;
375 
376 	if (s && ks) {
377 		size_t field_start, key_end, key_start, sz;
378 		bool empty_field, empty_key;
379 
380 		field_start = 0;
381 		key_start = 0;
382 		empty_field = false;
383 		empty_key = false;
384 
385 		find_field_start(s, ks, &field_start, &key_start,
386 		    &empty_field, &empty_key);
387 
388 		if (empty_key)
389 			sz = 0;
390 		else {
391 			key_end = find_field_end(s, ks);
392 			sz = (key_end < key_start) ? 0 : (key_end - key_start);
393 		}
394 
395 		ret = bwsalloc(sz);
396 		if (sz)
397 			bwsnocpy(ret, s, key_start, sz);
398 	} else
399 		ret = bwsalloc(0);
400 
401 	return (ret);
402 }
403 
404 /*
405  * Preprocesses a line applying the necessary transformations
406  * specified by command line options and returns the preprocessed
407  * string, which can be used to compare.
408  */
409 int
410 preproc(struct bwstring *s, struct keys_array *ka)
411 {
412 
413 	if (sort_opts_vals.kflag)
414 		for (size_t i = 0; i < keys_num; i++) {
415 			struct bwstring *key;
416 			struct key_specs *kspecs;
417 			struct sort_mods *sm;
418 
419 			kspecs = &(keys[i]);
420 			key = cut_field(s, kspecs);
421 
422 			sm = &(kspecs->sm);
423 			if (sm->dflag)
424 				key = dictionary_order(key);
425 			else if (sm->iflag)
426 				key = ignore_nonprinting(key);
427 			if (sm->fflag || sm->Mflag)
428 				key = ignore_case(key);
429 
430 			set_key_on_keys_array(ka, key, i);
431 		}
432 	else {
433 		struct bwstring *ret = NULL;
434 		struct sort_mods *sm = default_sort_mods;
435 
436 		if (sm->bflag) {
437 			if (ret == NULL)
438 				ret = bwsdup(s);
439 			ret = ignore_leading_blanks(ret);
440 		}
441 		if (sm->dflag) {
442 			if (ret == NULL)
443 				ret = bwsdup(s);
444 			ret = dictionary_order(ret);
445 		} else if (sm->iflag) {
446 			if (ret == NULL)
447 				ret = bwsdup(s);
448 			ret = ignore_nonprinting(ret);
449 		}
450 		if (sm->fflag || sm->Mflag) {
451 			if (ret == NULL)
452 				ret = bwsdup(s);
453 			ret = ignore_case(ret);
454 		}
455 		if (ret == NULL)
456 			set_key_on_keys_array(ka, s, 0);
457 		else
458 			set_key_on_keys_array(ka, ret, 0);
459 	}
460 
461 	return 0;
462 }
463 
464 cmpcoll_t
465 get_sort_func(struct sort_mods *sm)
466 {
467 
468 	if (sm->nflag)
469 		return (numcoll);
470 	else if (sm->hflag)
471 		return (hnumcoll);
472 	else if (sm->gflag)
473 		return (gnumcoll);
474 	else if (sm->Mflag)
475 		return (monthcoll);
476 	else if (sm->Rflag)
477 		return (randomcoll);
478 	else if (sm->Vflag)
479 		return (versioncoll);
480 	else
481 		return (wstrcoll);
482 }
483 
484 /*
485  * Compares the given strings.  Returns a positive number if
486  * the first precedes the second, a negative number if the second is
487  * the preceding one, and zero if they are equal.  This function calls
488  * the underlying collate functions, which done the actual comparison.
489  */
490 int
491 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
492 {
493 	struct key_value *kv1, *kv2;
494 	struct sort_mods *sm;
495 	int res = 0;
496 
497 	for (size_t i = 0; i < keys_num; ++i) {
498 		kv1 = get_key_from_keys_array(ps1, i);
499 		kv2 = get_key_from_keys_array(ps2, i);
500 		sm = &(keys[i].sm);
501 
502 		if (sm->rflag)
503 			res = sm->func(kv2, kv1, offset);
504 		else
505 			res = sm->func(kv1, kv2, offset);
506 
507 		if (res)
508 			break;
509 
510 		/* offset applies to only the first key */
511 		offset = 0;
512 	}
513 	return (res);
514 }
515 
516 /*
517  * Compare two strings.
518  * Plain symbol-by-symbol comparison.
519  */
520 int
521 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
522 {
523 
524 	if (default_sort_mods->rflag) {
525 		const struct bwstring *tmp;
526 
527 		tmp = s1;
528 		s1 = s2;
529 		s2 = tmp;
530 	}
531 
532 	return (bwscoll(s1, s2, 0));
533 }
534 
535 /*
536  * Compare a string and a sort list item, according to the sort specs.
537  */
538 int
539 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
540 {
541 	struct keys_array *ka1;
542 	int ret = 0;
543 
544 	ka1 = keys_array_alloc();
545 
546 	preproc(str1, ka1);
547 
548 	sort_list_item_make_key(*ss2);
549 
550 	if (debug_sort) {
551 		bwsprintf(stdout, str1, "; s1=<", ">");
552 		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
553 	}
554 
555 	ret = key_coll(ka1, &((*ss2)->ka), 0);
556 
557 	if (debug_sort)
558 		printf("; cmp1=%d", ret);
559 
560 	clean_keys_array(str1, ka1);
561 	sort_free(ka1);
562 
563 	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
564 		ret = top_level_str_coll(str1, ((*ss2)->str));
565 		if (debug_sort)
566 			printf("; cmp2=%d", ret);
567 	}
568 
569 	if (debug_sort)
570 		printf("\n");
571 
572 	return (ret);
573 }
574 
575 /*
576  * Compare two sort list items, according to the sort specs.
577  */
578 int
579 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
580     size_t offset)
581 {
582 	int ret;
583 
584 	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
585 
586 	if (debug_sort) {
587 		if (offset)
588 			printf("; offset=%d", (int) offset);
589 		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
590 		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
591 		printf("; cmp1=%d\n", ret);
592 	}
593 
594 	if (ret)
595 		return (ret);
596 
597 	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
598 		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
599 		if (debug_sort)
600 			printf("; cmp2=%d\n", ret);
601 	}
602 
603 	return (ret);
604 }
605 
606 /*
607  * Compare two sort list items, according to the sort specs.
608  */
609 int
610 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
611 {
612 
613 	return (list_coll_offset(ss1, ss2, 0));
614 }
615 
616 #define	LSCDEF(N)							\
617 static int 								\
618 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)	\
619 {									\
620 									\
621 	return (list_coll_offset(ss1, ss2, N));				\
622 }
623 
624 LSCDEF(1)
625 LSCDEF(2)
626 LSCDEF(3)
627 LSCDEF(4)
628 LSCDEF(5)
629 LSCDEF(6)
630 LSCDEF(7)
631 LSCDEF(8)
632 LSCDEF(9)
633 LSCDEF(10)
634 LSCDEF(11)
635 LSCDEF(12)
636 LSCDEF(13)
637 LSCDEF(14)
638 LSCDEF(15)
639 LSCDEF(16)
640 LSCDEF(17)
641 LSCDEF(18)
642 LSCDEF(19)
643 LSCDEF(20)
644 
645 listcoll_t
646 get_list_call_func(size_t offset)
647 {
648 	static const listcoll_t lsarray[] = { list_coll, list_coll_1,
649 	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
650 	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
651 	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
652 	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
653 	    list_coll_18, list_coll_19, list_coll_20 };
654 
655 	if (offset <= 20)
656 		return (lsarray[offset]);
657 
658 	return (list_coll);
659 }
660 
661 /*
662  * Compare two sort list items, only by their original string.
663  */
664 int
665 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
666 {
667 
668 	return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
669 }
670 
671 /*
672  * Maximum size of a number in the string (before or after decimal point)
673  */
674 #define MAX_NUM_SIZE (128)
675 
676 /*
677  * Set suffix value
678  */
679 static void setsuffix(wchar_t c, unsigned char *si)
680 {
681 	switch (c){
682 	case L'k':
683 	case L'K':
684 		*si = 1;
685 		break;
686 	case L'M':
687 		*si = 2;
688 		break;
689 	case L'G':
690 		*si = 3;
691 		break;
692 	case L'T':
693 		*si = 4;
694 		break;
695 	case L'P':
696 		*si = 5;
697 		break;
698 	case L'E':
699 		*si = 6;
700 		break;
701 	case L'Z':
702 		*si = 7;
703 		break;
704 	case L'Y':
705 		*si = 8;
706 		break;
707 	default:
708 		*si = 0;
709 	}
710 }
711 
712 /*
713  * Read string s and parse the string into a fixed-decimal-point number.
714  * sign equals -1 if the number is negative (explicit plus is not allowed,
715  * according to GNU sort's "info sort".
716  * The number part before decimal point is in the smain, after the decimal
717  * point is in sfrac, tail is the pointer to the remainder of the string.
718  */
719 static int
720 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
721 {
722 	bwstring_iterator s;
723 
724 	s = bws_begin(s0);
725 
726 	/* always end the fraction with zero, even if we have no fraction */
727 	sfrac[0] = 0;
728 
729 	while (iswblank(bws_get_iter_value(s)))
730 		s = bws_iterator_inc(s, 1);
731 
732 	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
733 		*sign = -1;
734 		s = bws_iterator_inc(s, 1);
735 	}
736 
737 	// This is '0', not '\0', do not change this
738 	while (iswdigit(bws_get_iter_value(s)) &&
739 	    (bws_get_iter_value(s) == L'0'))
740 		s = bws_iterator_inc(s, 1);
741 
742 	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
743 		if (iswdigit(bws_get_iter_value(s))) {
744 			smain[*main_len] = bws_get_iter_value(s);
745 			s = bws_iterator_inc(s, 1);
746 			*main_len += 1;
747 		} else if (symbol_thousands_sep &&
748 		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
749 			s = bws_iterator_inc(s, 1);
750 		else
751 			break;
752 	}
753 
754 	smain[*main_len] = 0;
755 
756 	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
757 		s = bws_iterator_inc(s, 1);
758 		while (iswdigit(bws_get_iter_value(s)) &&
759 		    *frac_len < MAX_NUM_SIZE) {
760 			sfrac[*frac_len] = bws_get_iter_value(s);
761 			s = bws_iterator_inc(s, 1);
762 			*frac_len += 1;
763 		}
764 		sfrac[*frac_len] = 0;
765 
766 		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
767 			--(*frac_len);
768 			sfrac[*frac_len] = L'\0';
769 		}
770 	}
771 
772 	setsuffix(bws_get_iter_value(s),si);
773 
774 	if ((*main_len + *frac_len) == 0)
775 		*sign = 0;
776 
777 	return (0);
778 }
779 
780 /*
781  * Implements string sort.
782  */
783 static int
784 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
785 {
786 
787 	if (debug_sort) {
788 		if (offset)
789 			printf("; offset=%d\n", (int) offset);
790 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
791 		printf("(%zu)", BWSLEN(kv1->k));
792 		bwsprintf(stdout, kv2->k, ", k2=<", ">");
793 		printf("(%zu)", BWSLEN(kv2->k));
794 	}
795 
796 	return (bwscoll(kv1->k, kv2->k, offset));
797 }
798 
799 /*
800  * Compare two suffixes
801  */
802 static inline int
803 cmpsuffix(unsigned char si1, unsigned char si2)
804 {
805 
806 	return ((char)si1 - (char)si2);
807 }
808 
809 /*
810  * Implements numeric sort for -n and -h.
811  */
812 static int
813 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
814     size_t offset __unused, bool use_suffix)
815 {
816 	struct bwstring *s1, *s2;
817 	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
818 	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
819 	int cmp_res, sign1, sign2;
820 	size_t frac1, frac2, main1, main2;
821 	unsigned char SI1, SI2;
822 	bool e1, e2, key1_read, key2_read;
823 
824 	s1 = kv1->k;
825 	s2 = kv2->k;
826 	sign1 = sign2 = 0;
827 	main1 = main2 = 0;
828 	frac1 = frac2 = 0;
829 
830 	key1_read = key2_read = false;
831 
832 	if (debug_sort) {
833 		bwsprintf(stdout, s1, "; k1=<", ">");
834 		bwsprintf(stdout, s2, ", k2=<", ">");
835 	}
836 
837 	if (s1 == s2)
838 		return (0);
839 
840 	if (kv1->hint->status == HS_UNINITIALIZED) {
841 		/* read the number from the string */
842 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
843 		key1_read = true;
844 		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
845 		if(main1 < 1 && frac1 < 1)
846 			kv1->hint->v.nh.empty=true;
847 		kv1->hint->v.nh.si = SI1;
848 		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
849 		    HS_INITIALIZED : HS_ERROR;
850 		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
851 	}
852 
853 	if (kv2->hint->status == HS_UNINITIALIZED) {
854 		/* read the number from the string */
855 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
856 		key2_read = true;
857 		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
858 		if(main2 < 1 && frac2 < 1)
859 			kv2->hint->v.nh.empty=true;
860 		kv2->hint->v.nh.si = SI2;
861 		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
862 		    HS_INITIALIZED : HS_ERROR;
863 		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
864 	}
865 
866 	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
867 	    HS_INITIALIZED) {
868 		unsigned long long n1, n2;
869 		bool neg1, neg2;
870 
871 		e1 = kv1->hint->v.nh.empty;
872 		e2 = kv2->hint->v.nh.empty;
873 
874 		if (e1 && e2)
875 			return (0);
876 
877 		neg1 = kv1->hint->v.nh.neg;
878 		neg2 = kv2->hint->v.nh.neg;
879 
880 		if (neg1 && !neg2)
881 			return (-1);
882 		if (neg2 && !neg1)
883 			return (+1);
884 
885 		if (e1)
886 			return (neg2 ? +1 : -1);
887 		else if (e2)
888 			return (neg1 ? -1 : +1);
889 
890 
891 		if (use_suffix) {
892 			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
893 			if (cmp_res)
894 				return (neg1 ? -cmp_res : cmp_res);
895 		}
896 
897 		n1 = kv1->hint->v.nh.n1;
898 		n2 = kv2->hint->v.nh.n1;
899 		if (n1 < n2)
900 			return (neg1 ? +1 : -1);
901 		else if (n1 > n2)
902 			return (neg1 ? -1 : +1);
903 	}
904 
905 	/* read the numbers from the strings */
906 	if (!key1_read)
907 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
908 	if (!key2_read)
909 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
910 
911 	e1 = ((main1 + frac1) == 0);
912 	e2 = ((main2 + frac2) == 0);
913 
914 	if (e1 && e2)
915 		return (0);
916 
917 	/* we know the result if the signs are different */
918 	if (sign1 < 0 && sign2 >= 0)
919 		return (-1);
920 	if (sign1 >= 0 && sign2 < 0)
921 		return (+1);
922 
923 	if (e1)
924 		return ((sign2 < 0) ? +1 : -1);
925 	else if (e2)
926 		return ((sign1 < 0) ? -1 : +1);
927 
928 	if (use_suffix) {
929 		cmp_res = cmpsuffix(SI1, SI2);
930 		if (cmp_res)
931 			return ((sign1 < 0) ? -cmp_res : cmp_res);
932 	}
933 
934 	/* if both numbers are empty assume that the strings are equal */
935 	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
936 		return (0);
937 
938 	/*
939 	 * if the main part is of different size, we know the result
940 	 * (because the leading zeros are removed)
941 	 */
942 	if (main1 < main2)
943 		cmp_res = -1;
944 	else if (main1 > main2)
945 		cmp_res = +1;
946 	/* if the sizes are equal then simple non-collate string compare gives the correct result */
947 	else
948 		cmp_res = wcscmp(smain1, smain2);
949 
950 	/* check fraction */
951 	if (!cmp_res)
952 		cmp_res = wcscmp(sfrac1, sfrac2);
953 
954 	if (!cmp_res)
955 		return (0);
956 
957 	/* reverse result if the signs are negative */
958 	if (sign1 < 0 && sign2 < 0)
959 		cmp_res = -cmp_res;
960 
961 	return (cmp_res);
962 }
963 
964 /*
965  * Implements numeric sort (-n).
966  */
967 static int
968 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
969 {
970 
971 	return (numcoll_impl(kv1, kv2, offset, false));
972 }
973 
974 /*
975  * Implements 'human' numeric sort (-h).
976  */
977 static int
978 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
979 {
980 
981 	return (numcoll_impl(kv1, kv2, offset, true));
982 }
983 
984 /* Use hint space to memoize md5 computations, at least. */
985 static void
986 randomcoll_init_hint(struct key_value *kv, void *hash)
987 {
988 
989 	memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached));
990 	kv->hint->status = HS_INITIALIZED;
991 }
992 
993 /*
994  * Implements random sort (-R).
995  */
996 static int
997 randomcoll(struct key_value *kv1, struct key_value *kv2,
998     size_t offset __unused)
999 {
1000 	struct bwstring *s1, *s2;
1001 	MD5_CTX ctx1, ctx2;
1002 	unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH];
1003 	int cmp;
1004 
1005 	s1 = kv1->k;
1006 	s2 = kv2->k;
1007 
1008 	if (debug_sort) {
1009 		bwsprintf(stdout, s1, "; k1=<", ">");
1010 		bwsprintf(stdout, s2, ", k2=<", ">");
1011 	}
1012 
1013 	if (s1 == s2)
1014 		return (0);
1015 
1016 	if (kv1->hint->status == HS_INITIALIZED &&
1017 	    kv2->hint->status == HS_INITIALIZED) {
1018 		cmp = memcmp(kv1->hint->v.Rh.cached,
1019 		    kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached));
1020 		if (cmp != 0)
1021 			return (cmp);
1022 	}
1023 
1024 	memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
1025 	memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
1026 
1027 	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1028 	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1029 
1030 	MD5Final(hash1, &ctx1);
1031 	MD5Final(hash2, &ctx2);
1032 
1033 	if (kv1->hint->status == HS_UNINITIALIZED)
1034 		randomcoll_init_hint(kv1, hash1);
1035 	if (kv2->hint->status == HS_UNINITIALIZED)
1036 		randomcoll_init_hint(kv2, hash2);
1037 
1038 	return (memcmp(hash1, hash2, sizeof(hash1)));
1039 }
1040 
1041 /*
1042  * Implements version sort (-V).
1043  */
1044 static int
1045 versioncoll(struct key_value *kv1, struct key_value *kv2,
1046     size_t offset __unused)
1047 {
1048 	struct bwstring *s1, *s2;
1049 
1050 	s1 = kv1->k;
1051 	s2 = kv2->k;
1052 
1053 	if (debug_sort) {
1054 		bwsprintf(stdout, s1, "; k1=<", ">");
1055 		bwsprintf(stdout, s2, ", k2=<", ">");
1056 	}
1057 
1058 	if (s1 == s2)
1059 		return (0);
1060 
1061 	return (vcmp(s1, s2));
1062 }
1063 
1064 /*
1065  * Check for minus infinity
1066  */
1067 static inline bool
1068 huge_minus(double d, int err1)
1069 {
1070 
1071 	if (err1 == ERANGE)
1072 		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1073 			return (+1);
1074 
1075 	return (0);
1076 }
1077 
1078 /*
1079  * Check for plus infinity
1080  */
1081 static inline bool
1082 huge_plus(double d, int err1)
1083 {
1084 
1085 	if (err1 == ERANGE)
1086 		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1087 			return (+1);
1088 
1089 	return (0);
1090 }
1091 
1092 /*
1093  * Check whether a function is a NAN
1094  */
1095 static bool
1096 is_nan(double d)
1097 {
1098 
1099 	return ((d == NAN) || (isnan(d)));
1100 }
1101 
1102 /*
1103  * Compare two NANs
1104  */
1105 static int
1106 cmp_nans(double d1, double d2)
1107 {
1108 
1109 	if (d1 < d2)
1110 		return (-1);
1111 	if (d1 > d2)
1112 		return (+1);
1113 	return (0);
1114 }
1115 
1116 /*
1117  * Implements general numeric sort (-g).
1118  */
1119 static int
1120 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1121     size_t offset __unused)
1122 {
1123 	double d1, d2;
1124 	int err1, err2;
1125 	bool empty1, empty2, key1_read, key2_read;
1126 
1127 	d1 = d2 = 0;
1128 	err1 = err2 = 0;
1129 	key1_read = key2_read = false;
1130 
1131 	if (debug_sort) {
1132 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1133 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1134 	}
1135 
1136 	if (kv1->hint->status == HS_UNINITIALIZED) {
1137 		errno = 0;
1138 		d1 = bwstod(kv1->k, &empty1);
1139 		err1 = errno;
1140 
1141 		if (empty1)
1142 			kv1->hint->v.gh.notnum = true;
1143 		else if (err1 == 0) {
1144 			kv1->hint->v.gh.d = d1;
1145 			kv1->hint->v.gh.nan = is_nan(d1);
1146 			kv1->hint->status = HS_INITIALIZED;
1147 		} else
1148 			kv1->hint->status = HS_ERROR;
1149 
1150 		key1_read = true;
1151 	}
1152 
1153 	if (kv2->hint->status == HS_UNINITIALIZED) {
1154 		errno = 0;
1155 		d2 = bwstod(kv2->k, &empty2);
1156 		err2 = errno;
1157 
1158 		if (empty2)
1159 			kv2->hint->v.gh.notnum = true;
1160 		else if (err2 == 0) {
1161 			kv2->hint->v.gh.d = d2;
1162 			kv2->hint->v.gh.nan = is_nan(d2);
1163 			kv2->hint->status = HS_INITIALIZED;
1164 		} else
1165 			kv2->hint->status = HS_ERROR;
1166 
1167 		key2_read = true;
1168 	}
1169 
1170 	if (kv1->hint->status == HS_INITIALIZED &&
1171 	    kv2->hint->status == HS_INITIALIZED) {
1172 		if (kv1->hint->v.gh.notnum)
1173 			return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1174 		else if (kv2->hint->v.gh.notnum)
1175 			return (+1);
1176 
1177 		if (kv1->hint->v.gh.nan)
1178 			return ((kv2->hint->v.gh.nan) ?
1179 			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1180 			    -1);
1181 		else if (kv2->hint->v.gh.nan)
1182 			return (+1);
1183 
1184 		d1 = kv1->hint->v.gh.d;
1185 		d2 = kv2->hint->v.gh.d;
1186 
1187 		if (d1 < d2)
1188 			return (-1);
1189 		else if (d1 > d2)
1190 			return (+1);
1191 		else
1192 			return (0);
1193 	}
1194 
1195 	if (!key1_read) {
1196 		errno = 0;
1197 		d1 = bwstod(kv1->k, &empty1);
1198 		err1 = errno;
1199 	}
1200 
1201 	if (!key2_read) {
1202 		errno = 0;
1203 		d2 = bwstod(kv2->k, &empty2);
1204 		err2 = errno;
1205 	}
1206 
1207 	/* Non-value case: */
1208 	if (empty1)
1209 		return (empty2 ? 0 : -1);
1210 	else if (empty2)
1211 		return (+1);
1212 
1213 	/* NAN case */
1214 	if (is_nan(d1))
1215 		return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1216 	else if (is_nan(d2))
1217 		return (+1);
1218 
1219 	/* Infinities */
1220 	if (err1 == ERANGE || err2 == ERANGE) {
1221 		/* Minus infinity case */
1222 		if (huge_minus(d1, err1)) {
1223 			if (huge_minus(d2, err2)) {
1224 				if (d1 < d2)
1225 					return (-1);
1226 				if (d1 > d2)
1227 					return (+1);
1228 				return (0);
1229 			} else
1230 				return (-1);
1231 
1232 		} else if (huge_minus(d2, err2)) {
1233 			if (huge_minus(d1, err1)) {
1234 				if (d1 < d2)
1235 					return (-1);
1236 				if (d1 > d2)
1237 					return (+1);
1238 				return (0);
1239 			} else
1240 				return (+1);
1241 		}
1242 
1243 		/* Plus infinity case */
1244 		if (huge_plus(d1, err1)) {
1245 			if (huge_plus(d2, err2)) {
1246 				if (d1 < d2)
1247 					return (-1);
1248 				if (d1 > d2)
1249 					return (+1);
1250 				return (0);
1251 			} else
1252 				return (+1);
1253 		} else if (huge_plus(d2, err2)) {
1254 			if (huge_plus(d1, err1)) {
1255 				if (d1 < d2)
1256 					return (-1);
1257 				if (d1 > d2)
1258 					return (+1);
1259 				return (0);
1260 			} else
1261 				return (-1);
1262 		}
1263 	}
1264 
1265 	if (d1 < d2)
1266 		return (-1);
1267 	if (d1 > d2)
1268 		return (+1);
1269 
1270 	return (0);
1271 }
1272 
1273 /*
1274  * Implements month sort (-M).
1275  */
1276 static int
1277 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1278 {
1279 	int val1, val2;
1280 	bool key1_read, key2_read;
1281 
1282 	val1 = val2 = 0;
1283 	key1_read = key2_read = false;
1284 
1285 	if (debug_sort) {
1286 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1287 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1288 	}
1289 
1290 	if (kv1->hint->status == HS_UNINITIALIZED) {
1291 		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1292 		key1_read = true;
1293 		kv1->hint->status = HS_INITIALIZED;
1294 	}
1295 
1296 	if (kv2->hint->status == HS_UNINITIALIZED) {
1297 		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1298 		key2_read = true;
1299 		kv2->hint->status = HS_INITIALIZED;
1300 	}
1301 
1302 	if (kv1->hint->status == HS_INITIALIZED) {
1303 		val1 = kv1->hint->v.Mh.m;
1304 		key1_read = true;
1305 	}
1306 
1307 	if (kv2->hint->status == HS_INITIALIZED) {
1308 		val2 = kv2->hint->v.Mh.m;
1309 		key2_read = true;
1310 	}
1311 
1312 	if (!key1_read)
1313 		val1 = bws_month_score(kv1->k);
1314 	if (!key2_read)
1315 		val2 = bws_month_score(kv2->k);
1316 
1317 	if (val1 == val2) {
1318 		return (0);
1319 	}
1320 	if (val1 < val2)
1321 		return (-1);
1322 	return (+1);
1323 }
1324