xref: /freebsd/usr.bin/sort/coll.c (revision ec994981447e8a974426660b5071bc405280af73)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include <sys/types.h>
32 
33 #include <errno.h>
34 #include <err.h>
35 #include <langinfo.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <md5.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42 #include <wctype.h>
43 
44 #include "coll.h"
45 #include "vsort.h"
46 
47 struct key_specs *keys;
48 size_t keys_num = 0;
49 
50 wint_t symbol_decimal_point = L'.';
51 /* there is no default thousands separator in collate rules: */
52 wint_t symbol_thousands_sep = 0;
53 wint_t symbol_negative_sign = L'-';
54 wint_t symbol_positive_sign = L'+';
55 
56 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
57 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
58 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
59 static int numcoll(struct key_value*, struct key_value *, size_t offset);
60 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
61 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
62 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
63 
64 /*
65  * Allocate keys array
66  */
67 struct keys_array *
68 keys_array_alloc(void)
69 {
70 	struct keys_array *ka;
71 	size_t sz;
72 
73 	sz = keys_array_size();
74 	ka = sort_calloc(1, sz);
75 
76 	return (ka);
77 }
78 
79 /*
80  * Calculate whether we need key hint space
81  */
82 static size_t
83 key_hint_size(void)
84 {
85 
86 	return (need_hint ? sizeof(struct key_hint) : 0);
87 }
88 
89 /*
90  * Calculate keys array size
91  */
92 size_t
93 keys_array_size(void)
94 {
95 
96 	return (keys_num * (sizeof(struct key_value) + key_hint_size()));
97 }
98 
99 /*
100  * Clean data of keys array
101  */
102 void
103 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
104 {
105 
106 	if (ka) {
107 		for (size_t i = 0; i < keys_num; ++i) {
108 			const struct key_value *kv;
109 
110 			kv = get_key_from_keys_array(ka, i);
111 			if (kv->k && kv->k != s)
112 				bwsfree(kv->k);
113 		}
114 		memset(ka, 0, keys_array_size());
115 	}
116 }
117 
118 /*
119  * Get pointer to a key value in the keys set
120  */
121 struct key_value *
122 get_key_from_keys_array(struct keys_array *ka, size_t ind)
123 {
124 
125 	return ((struct key_value *)((caddr_t)ka->key +
126 	    ind * (sizeof(struct key_value) + key_hint_size())));
127 }
128 
129 /*
130  * Set value of a key in the keys set
131  */
132 void
133 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
134 {
135 
136 	if (ka && keys_num > ind) {
137 		struct key_value *kv;
138 
139 		kv = get_key_from_keys_array(ka, ind);
140 
141 		if (kv->k && kv->k != s)
142 			bwsfree(kv->k);
143 		kv->k = s;
144 	}
145 }
146 
147 /*
148  * Initialize a sort list item
149  */
150 struct sort_list_item *
151 sort_list_item_alloc(void)
152 {
153 	struct sort_list_item *si;
154 	size_t sz;
155 
156 	sz = sizeof(struct sort_list_item) + keys_array_size();
157 	si = sort_calloc(1, sz);
158 
159 	return (si);
160 }
161 
162 size_t
163 sort_list_item_size(struct sort_list_item *si)
164 {
165 	size_t ret = 0;
166 
167 	if (si) {
168 		ret = sizeof(struct sort_list_item) + keys_array_size();
169 		if (si->str)
170 			ret += bws_memsize(si->str);
171 		for (size_t i = 0; i < keys_num; ++i) {
172 			const struct key_value *kv;
173 
174 			kv = get_key_from_keys_array(&si->ka, i);
175 
176 			if (kv->k != si->str)
177 				ret += bws_memsize(kv->k);
178 		}
179 	}
180 	return (ret);
181 }
182 
183 /*
184  * Calculate key for a sort list item
185  */
186 static void
187 sort_list_item_make_key(struct sort_list_item *si)
188 {
189 
190 	preproc(si->str, &(si->ka));
191 }
192 
193 /*
194  * Set value of a sort list item.
195  * Return combined string and keys memory size.
196  */
197 void
198 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
199 {
200 
201 	if (si) {
202 		clean_keys_array(si->str, &(si->ka));
203 		if (si->str) {
204 			if (si->str == str) {
205 				/* we are trying to reset the same string */
206 				return;
207 			} else {
208 				bwsfree(si->str);
209 				si->str = NULL;
210 			}
211 		}
212 		si->str = str;
213 		sort_list_item_make_key(si);
214 	}
215 }
216 
217 /*
218  * De-allocate a sort list item object memory
219  */
220 void
221 sort_list_item_clean(struct sort_list_item *si)
222 {
223 
224 	if (si) {
225 		clean_keys_array(si->str, &(si->ka));
226 		if (si->str) {
227 			bwsfree(si->str);
228 			si->str = NULL;
229 		}
230 	}
231 }
232 
233 /*
234  * Skip columns according to specs
235  */
236 static size_t
237 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
238     bool skip_blanks, bool *empty_key)
239 {
240 	if (cols < 1)
241 		return (BWSLEN(s) + 1);
242 
243 	if (skip_blanks)
244 		while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
245 			++start;
246 
247 	while (start < BWSLEN(s) && cols > 1) {
248 		--cols;
249 		++start;
250 	}
251 
252 	if (start >= BWSLEN(s))
253 		*empty_key = true;
254 
255 	return (start);
256 }
257 
258 /*
259  * Skip fields according to specs
260  */
261 static size_t
262 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
263 {
264 
265 	if (fields < 2) {
266 		if (BWSLEN(s) == 0)
267 			*empty_field = true;
268 		return (0);
269 	} else if (!(sort_opts_vals.tflag)) {
270 		size_t cpos = 0;
271 		bool pb = true;
272 
273 		while (cpos < BWSLEN(s)) {
274 			bool isblank;
275 
276 			isblank = iswblank(BWS_GET(s, cpos));
277 
278 			if (isblank && !pb) {
279 				--fields;
280 				if (fields <= 1)
281 					return (cpos);
282 			}
283 			pb = isblank;
284 			++cpos;
285 		}
286 		if (fields > 1)
287 			*empty_field = true;
288 		return (cpos);
289 	} else {
290 		size_t cpos = 0;
291 
292 		while (cpos < BWSLEN(s)) {
293 			if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
294 				--fields;
295 				if (fields <= 1)
296 					return (cpos + 1);
297 			}
298 			++cpos;
299 		}
300 		if (fields > 1)
301 			*empty_field = true;
302 		return (cpos);
303 	}
304 }
305 
306 /*
307  * Find fields start
308  */
309 static void
310 find_field_start(const struct bwstring *s, struct key_specs *ks,
311     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
312 {
313 
314 	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
315 	if (!*empty_field)
316 		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
317 		    ks->pos1b, empty_key);
318 	else
319 		*empty_key = true;
320 }
321 
322 /*
323  * Find end key position
324  */
325 static size_t
326 find_field_end(const struct bwstring *s, struct key_specs *ks)
327 {
328 	size_t f2, next_field_start, pos_end;
329 	bool empty_field, empty_key;
330 
331 	empty_field = false;
332 	empty_key = false;
333 	f2 = ks->f2;
334 
335 	if (f2 == 0)
336 		return (BWSLEN(s) + 1);
337 	else {
338 		if (ks->c2 == 0) {
339 			next_field_start = skip_fields_to_start(s, f2 + 1,
340 			    &empty_field);
341 			if ((next_field_start > 0) && sort_opts_vals.tflag &&
342 			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
343 			    next_field_start - 1)))
344 				--next_field_start;
345 		} else
346 			next_field_start = skip_fields_to_start(s, f2,
347 			    &empty_field);
348 	}
349 
350 	if (empty_field || (next_field_start >= BWSLEN(s)))
351 		return (BWSLEN(s) + 1);
352 
353 	if (ks->c2) {
354 		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
355 		    ks->pos2b, &empty_key);
356 		if (pos_end < BWSLEN(s))
357 			++pos_end;
358 	} else
359 		pos_end = next_field_start;
360 
361 	return (pos_end);
362 }
363 
364 /*
365  * Cut a field according to the key specs
366  */
367 static struct bwstring *
368 cut_field(const struct bwstring *s, struct key_specs *ks)
369 {
370 	struct bwstring *ret = NULL;
371 
372 	if (s && ks) {
373 		size_t field_start, key_end, key_start, sz;
374 		bool empty_field, empty_key;
375 
376 		field_start = 0;
377 		key_start = 0;
378 		empty_field = false;
379 		empty_key = false;
380 
381 		find_field_start(s, ks, &field_start, &key_start,
382 		    &empty_field, &empty_key);
383 
384 		if (empty_key)
385 			sz = 0;
386 		else {
387 			key_end = find_field_end(s, ks);
388 			sz = (key_end < key_start) ? 0 : (key_end - key_start);
389 		}
390 
391 		ret = bwsalloc(sz);
392 		if (sz)
393 			bwsnocpy(ret, s, key_start, sz);
394 	} else
395 		ret = bwsalloc(0);
396 
397 	return (ret);
398 }
399 
400 /*
401  * Preprocesses a line applying the necessary transformations
402  * specified by command line options and returns the preprocessed
403  * string, which can be used to compare.
404  */
405 int
406 preproc(struct bwstring *s, struct keys_array *ka)
407 {
408 
409 	if (sort_opts_vals.kflag)
410 		for (size_t i = 0; i < keys_num; i++) {
411 			struct bwstring *key;
412 			struct key_specs *kspecs;
413 			struct sort_mods *sm;
414 
415 			kspecs = &(keys[i]);
416 			key = cut_field(s, kspecs);
417 
418 			sm = &(kspecs->sm);
419 			if (sm->dflag)
420 				key = dictionary_order(key);
421 			else if (sm->iflag)
422 				key = ignore_nonprinting(key);
423 			if (sm->fflag || sm->Mflag)
424 				key = ignore_case(key);
425 
426 			set_key_on_keys_array(ka, key, i);
427 		}
428 	else {
429 		struct bwstring *ret = NULL;
430 		struct sort_mods *sm = default_sort_mods;
431 
432 		if (sm->bflag) {
433 			if (ret == NULL)
434 				ret = bwsdup(s);
435 			ret = ignore_leading_blanks(ret);
436 		}
437 		if (sm->dflag) {
438 			if (ret == NULL)
439 				ret = bwsdup(s);
440 			ret = dictionary_order(ret);
441 		} else if (sm->iflag) {
442 			if (ret == NULL)
443 				ret = bwsdup(s);
444 			ret = ignore_nonprinting(ret);
445 		}
446 		if (sm->fflag || sm->Mflag) {
447 			if (ret == NULL)
448 				ret = bwsdup(s);
449 			ret = ignore_case(ret);
450 		}
451 		if (ret == NULL)
452 			set_key_on_keys_array(ka, s, 0);
453 		else
454 			set_key_on_keys_array(ka, ret, 0);
455 	}
456 
457 	return 0;
458 }
459 
460 cmpcoll_t
461 get_sort_func(struct sort_mods *sm)
462 {
463 
464 	if (sm->nflag)
465 		return (numcoll);
466 	else if (sm->hflag)
467 		return (hnumcoll);
468 	else if (sm->gflag)
469 		return (gnumcoll);
470 	else if (sm->Mflag)
471 		return (monthcoll);
472 	else if (sm->Rflag)
473 		return (randomcoll);
474 	else if (sm->Vflag)
475 		return (versioncoll);
476 	else
477 		return (wstrcoll);
478 }
479 
480 /*
481  * Compares the given strings.  Returns a positive number if
482  * the first precedes the second, a negative number if the second is
483  * the preceding one, and zero if they are equal.  This function calls
484  * the underlying collate functions, which done the actual comparison.
485  */
486 int
487 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
488 {
489 	struct key_value *kv1, *kv2;
490 	struct sort_mods *sm;
491 	int res = 0;
492 
493 	for (size_t i = 0; i < keys_num; ++i) {
494 		kv1 = get_key_from_keys_array(ps1, i);
495 		kv2 = get_key_from_keys_array(ps2, i);
496 		sm = &(keys[i].sm);
497 
498 		if (sm->rflag)
499 			res = sm->func(kv2, kv1, offset);
500 		else
501 			res = sm->func(kv1, kv2, offset);
502 
503 		if (res)
504 			break;
505 
506 		/* offset applies to only the first key */
507 		offset = 0;
508 	}
509 	return (res);
510 }
511 
512 /*
513  * Compare two strings.
514  * Plain symbol-by-symbol comparison.
515  */
516 int
517 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
518 {
519 
520 	if (default_sort_mods->rflag) {
521 		const struct bwstring *tmp;
522 
523 		tmp = s1;
524 		s1 = s2;
525 		s2 = tmp;
526 	}
527 
528 	return (bwscoll(s1, s2, 0));
529 }
530 
531 /*
532  * Compare a string and a sort list item, according to the sort specs.
533  */
534 int
535 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
536 {
537 	struct keys_array *ka1;
538 	int ret = 0;
539 
540 	ka1 = keys_array_alloc();
541 
542 	preproc(str1, ka1);
543 
544 	sort_list_item_make_key(*ss2);
545 
546 	if (debug_sort) {
547 		bwsprintf(stdout, str1, "; s1=<", ">");
548 		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
549 	}
550 
551 	ret = key_coll(ka1, &((*ss2)->ka), 0);
552 
553 	if (debug_sort)
554 		printf("; cmp1=%d", ret);
555 
556 	clean_keys_array(str1, ka1);
557 	sort_free(ka1);
558 
559 	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
560 		ret = top_level_str_coll(str1, ((*ss2)->str));
561 		if (debug_sort)
562 			printf("; cmp2=%d", ret);
563 	}
564 
565 	if (debug_sort)
566 		printf("\n");
567 
568 	return (ret);
569 }
570 
571 /*
572  * Compare two sort list items, according to the sort specs.
573  */
574 int
575 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
576     size_t offset)
577 {
578 	int ret;
579 
580 	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
581 
582 	if (debug_sort) {
583 		if (offset)
584 			printf("; offset=%d", (int) offset);
585 		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
586 		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
587 		printf("; cmp1=%d\n", ret);
588 	}
589 
590 	if (ret)
591 		return (ret);
592 
593 	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
594 		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
595 		if (debug_sort)
596 			printf("; cmp2=%d\n", ret);
597 	}
598 
599 	return (ret);
600 }
601 
602 /*
603  * Compare two sort list items, according to the sort specs.
604  */
605 int
606 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
607 {
608 
609 	return (list_coll_offset(ss1, ss2, 0));
610 }
611 
612 #define	LSCDEF(N)							\
613 static int 								\
614 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)	\
615 {									\
616 									\
617 	return (list_coll_offset(ss1, ss2, N));				\
618 }
619 
620 LSCDEF(1)
621 LSCDEF(2)
622 LSCDEF(3)
623 LSCDEF(4)
624 LSCDEF(5)
625 LSCDEF(6)
626 LSCDEF(7)
627 LSCDEF(8)
628 LSCDEF(9)
629 LSCDEF(10)
630 LSCDEF(11)
631 LSCDEF(12)
632 LSCDEF(13)
633 LSCDEF(14)
634 LSCDEF(15)
635 LSCDEF(16)
636 LSCDEF(17)
637 LSCDEF(18)
638 LSCDEF(19)
639 LSCDEF(20)
640 
641 listcoll_t
642 get_list_call_func(size_t offset)
643 {
644 	static const listcoll_t lsarray[] = { list_coll, list_coll_1,
645 	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
646 	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
647 	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
648 	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
649 	    list_coll_18, list_coll_19, list_coll_20 };
650 
651 	if (offset <= 20)
652 		return (lsarray[offset]);
653 
654 	return (list_coll);
655 }
656 
657 /*
658  * Compare two sort list items, only by their original string.
659  */
660 int
661 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
662 {
663 
664 	return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
665 }
666 
667 /*
668  * Maximum size of a number in the string (before or after decimal point)
669  */
670 #define MAX_NUM_SIZE (128)
671 
672 /*
673  * Set suffix value
674  */
675 static void setsuffix(wchar_t c, unsigned char *si)
676 {
677 	switch (c){
678 	case L'k':
679 	case L'K':
680 		*si = 1;
681 		break;
682 	case L'M':
683 		*si = 2;
684 		break;
685 	case L'G':
686 		*si = 3;
687 		break;
688 	case L'T':
689 		*si = 4;
690 		break;
691 	case L'P':
692 		*si = 5;
693 		break;
694 	case L'E':
695 		*si = 6;
696 		break;
697 	case L'Z':
698 		*si = 7;
699 		break;
700 	case L'Y':
701 		*si = 8;
702 		break;
703 	default:
704 		*si = 0;
705 	}
706 }
707 
708 /*
709  * Read string s and parse the string into a fixed-decimal-point number.
710  * sign equals -1 if the number is negative (explicit plus is not allowed,
711  * according to GNU sort's "info sort".
712  * The number part before decimal point is in the smain, after the decimal
713  * point is in sfrac, tail is the pointer to the remainder of the string.
714  */
715 static int
716 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
717 {
718 	bwstring_iterator s;
719 
720 	s = bws_begin(s0);
721 
722 	/* always end the fraction with zero, even if we have no fraction */
723 	sfrac[0] = 0;
724 
725 	while (iswblank(bws_get_iter_value(s)))
726 		s = bws_iterator_inc(s, 1);
727 
728 	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
729 		*sign = -1;
730 		s = bws_iterator_inc(s, 1);
731 	}
732 
733 	// This is '0', not '\0', do not change this
734 	while (iswdigit(bws_get_iter_value(s)) &&
735 	    (bws_get_iter_value(s) == L'0'))
736 		s = bws_iterator_inc(s, 1);
737 
738 	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
739 		if (iswdigit(bws_get_iter_value(s))) {
740 			smain[*main_len] = bws_get_iter_value(s);
741 			s = bws_iterator_inc(s, 1);
742 			*main_len += 1;
743 		} else if (symbol_thousands_sep &&
744 		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
745 			s = bws_iterator_inc(s, 1);
746 		else
747 			break;
748 	}
749 
750 	smain[*main_len] = 0;
751 
752 	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
753 		s = bws_iterator_inc(s, 1);
754 		while (iswdigit(bws_get_iter_value(s)) &&
755 		    *frac_len < MAX_NUM_SIZE) {
756 			sfrac[*frac_len] = bws_get_iter_value(s);
757 			s = bws_iterator_inc(s, 1);
758 			*frac_len += 1;
759 		}
760 		sfrac[*frac_len] = 0;
761 
762 		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
763 			--(*frac_len);
764 			sfrac[*frac_len] = L'\0';
765 		}
766 	}
767 
768 	setsuffix(bws_get_iter_value(s),si);
769 
770 	if ((*main_len + *frac_len) == 0)
771 		*sign = 0;
772 
773 	return (0);
774 }
775 
776 /*
777  * Implements string sort.
778  */
779 static int
780 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
781 {
782 
783 	if (debug_sort) {
784 		if (offset)
785 			printf("; offset=%d\n", (int) offset);
786 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
787 		printf("(%zu)", BWSLEN(kv1->k));
788 		bwsprintf(stdout, kv2->k, ", k2=<", ">");
789 		printf("(%zu)", BWSLEN(kv2->k));
790 	}
791 
792 	return (bwscoll(kv1->k, kv2->k, offset));
793 }
794 
795 /*
796  * Compare two suffixes
797  */
798 static inline int
799 cmpsuffix(unsigned char si1, unsigned char si2)
800 {
801 
802 	return ((char)si1 - (char)si2);
803 }
804 
805 /*
806  * Implements numeric sort for -n and -h.
807  */
808 static int
809 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
810     size_t offset __unused, bool use_suffix)
811 {
812 	struct bwstring *s1, *s2;
813 	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
814 	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
815 	int cmp_res, sign1, sign2;
816 	size_t frac1, frac2, main1, main2;
817 	unsigned char SI1, SI2;
818 	bool e1, e2, key1_read, key2_read;
819 
820 	s1 = kv1->k;
821 	s2 = kv2->k;
822 	sign1 = sign2 = 0;
823 	main1 = main2 = 0;
824 	frac1 = frac2 = 0;
825 
826 	key1_read = key2_read = false;
827 
828 	if (debug_sort) {
829 		bwsprintf(stdout, s1, "; k1=<", ">");
830 		bwsprintf(stdout, s2, ", k2=<", ">");
831 	}
832 
833 	if (s1 == s2)
834 		return (0);
835 
836 	if (kv1->hint->status == HS_UNINITIALIZED) {
837 		/* read the number from the string */
838 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
839 		key1_read = true;
840 		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
841 		if(main1 < 1 && frac1 < 1)
842 			kv1->hint->v.nh.empty=true;
843 		kv1->hint->v.nh.si = SI1;
844 		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
845 		    HS_INITIALIZED : HS_ERROR;
846 		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
847 	}
848 
849 	if (kv2->hint->status == HS_UNINITIALIZED) {
850 		/* read the number from the string */
851 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
852 		key2_read = true;
853 		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
854 		if(main2 < 1 && frac2 < 1)
855 			kv2->hint->v.nh.empty=true;
856 		kv2->hint->v.nh.si = SI2;
857 		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
858 		    HS_INITIALIZED : HS_ERROR;
859 		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
860 	}
861 
862 	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
863 	    HS_INITIALIZED) {
864 		unsigned long long n1, n2;
865 		bool neg1, neg2;
866 
867 		e1 = kv1->hint->v.nh.empty;
868 		e2 = kv2->hint->v.nh.empty;
869 
870 		if (e1 && e2)
871 			return (0);
872 
873 		neg1 = kv1->hint->v.nh.neg;
874 		neg2 = kv2->hint->v.nh.neg;
875 
876 		if (neg1 && !neg2)
877 			return (-1);
878 		if (neg2 && !neg1)
879 			return (+1);
880 
881 		if (e1)
882 			return (neg2 ? +1 : -1);
883 		else if (e2)
884 			return (neg1 ? -1 : +1);
885 
886 
887 		if (use_suffix) {
888 			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
889 			if (cmp_res)
890 				return (neg1 ? -cmp_res : cmp_res);
891 		}
892 
893 		n1 = kv1->hint->v.nh.n1;
894 		n2 = kv2->hint->v.nh.n1;
895 		if (n1 < n2)
896 			return (neg1 ? +1 : -1);
897 		else if (n1 > n2)
898 			return (neg1 ? -1 : +1);
899 	}
900 
901 	/* read the numbers from the strings */
902 	if (!key1_read)
903 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
904 	if (!key2_read)
905 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
906 
907 	e1 = ((main1 + frac1) == 0);
908 	e2 = ((main2 + frac2) == 0);
909 
910 	if (e1 && e2)
911 		return (0);
912 
913 	/* we know the result if the signs are different */
914 	if (sign1 < 0 && sign2 >= 0)
915 		return (-1);
916 	if (sign1 >= 0 && sign2 < 0)
917 		return (+1);
918 
919 	if (e1)
920 		return ((sign2 < 0) ? +1 : -1);
921 	else if (e2)
922 		return ((sign1 < 0) ? -1 : +1);
923 
924 	if (use_suffix) {
925 		cmp_res = cmpsuffix(SI1, SI2);
926 		if (cmp_res)
927 			return ((sign1 < 0) ? -cmp_res : cmp_res);
928 	}
929 
930 	/* if both numbers are empty assume that the strings are equal */
931 	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
932 		return (0);
933 
934 	/*
935 	 * if the main part is of different size, we know the result
936 	 * (because the leading zeros are removed)
937 	 */
938 	if (main1 < main2)
939 		cmp_res = -1;
940 	else if (main1 > main2)
941 		cmp_res = +1;
942 	/* if the sizes are equal then simple non-collate string compare gives the correct result */
943 	else
944 		cmp_res = wcscmp(smain1, smain2);
945 
946 	/* check fraction */
947 	if (!cmp_res)
948 		cmp_res = wcscmp(sfrac1, sfrac2);
949 
950 	if (!cmp_res)
951 		return (0);
952 
953 	/* reverse result if the signs are negative */
954 	if (sign1 < 0 && sign2 < 0)
955 		cmp_res = -cmp_res;
956 
957 	return (cmp_res);
958 }
959 
960 /*
961  * Implements numeric sort (-n).
962  */
963 static int
964 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
965 {
966 
967 	return (numcoll_impl(kv1, kv2, offset, false));
968 }
969 
970 /*
971  * Implements 'human' numeric sort (-h).
972  */
973 static int
974 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
975 {
976 
977 	return (numcoll_impl(kv1, kv2, offset, true));
978 }
979 
980 /* Use hint space to memoize md5 computations, at least. */
981 static void
982 randomcoll_init_hint(struct key_value *kv, void *hash)
983 {
984 
985 	memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached));
986 	kv->hint->status = HS_INITIALIZED;
987 }
988 
989 /*
990  * Implements random sort (-R).
991  */
992 static int
993 randomcoll(struct key_value *kv1, struct key_value *kv2,
994     size_t offset __unused)
995 {
996 	struct bwstring *s1, *s2;
997 	MD5_CTX ctx1, ctx2;
998 	unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH];
999 	int cmp;
1000 
1001 	s1 = kv1->k;
1002 	s2 = kv2->k;
1003 
1004 	if (debug_sort) {
1005 		bwsprintf(stdout, s1, "; k1=<", ">");
1006 		bwsprintf(stdout, s2, ", k2=<", ">");
1007 	}
1008 
1009 	if (s1 == s2)
1010 		return (0);
1011 
1012 	if (kv1->hint->status == HS_INITIALIZED &&
1013 	    kv2->hint->status == HS_INITIALIZED) {
1014 		cmp = memcmp(kv1->hint->v.Rh.cached,
1015 		    kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached));
1016 		if (cmp != 0)
1017 			return (cmp);
1018 	}
1019 
1020 	memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
1021 	memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
1022 
1023 	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1024 	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1025 
1026 	MD5Final(hash1, &ctx1);
1027 	MD5Final(hash2, &ctx2);
1028 
1029 	if (kv1->hint->status == HS_UNINITIALIZED)
1030 		randomcoll_init_hint(kv1, hash1);
1031 	if (kv2->hint->status == HS_UNINITIALIZED)
1032 		randomcoll_init_hint(kv2, hash2);
1033 
1034 	return (memcmp(hash1, hash2, sizeof(hash1)));
1035 }
1036 
1037 /*
1038  * Implements version sort (-V).
1039  */
1040 static int
1041 versioncoll(struct key_value *kv1, struct key_value *kv2,
1042     size_t offset __unused)
1043 {
1044 	struct bwstring *s1, *s2;
1045 
1046 	s1 = kv1->k;
1047 	s2 = kv2->k;
1048 
1049 	if (debug_sort) {
1050 		bwsprintf(stdout, s1, "; k1=<", ">");
1051 		bwsprintf(stdout, s2, ", k2=<", ">");
1052 	}
1053 
1054 	if (s1 == s2)
1055 		return (0);
1056 
1057 	return (vcmp(s1, s2));
1058 }
1059 
1060 /*
1061  * Check for minus infinity
1062  */
1063 static inline bool
1064 huge_minus(double d, int err1)
1065 {
1066 
1067 	if (err1 == ERANGE)
1068 		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1069 			return (+1);
1070 
1071 	return (0);
1072 }
1073 
1074 /*
1075  * Check for plus infinity
1076  */
1077 static inline bool
1078 huge_plus(double d, int err1)
1079 {
1080 
1081 	if (err1 == ERANGE)
1082 		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1083 			return (+1);
1084 
1085 	return (0);
1086 }
1087 
1088 /*
1089  * Check whether a function is a NAN
1090  */
1091 static bool
1092 is_nan(double d)
1093 {
1094 
1095 	return ((d == NAN) || (isnan(d)));
1096 }
1097 
1098 /*
1099  * Compare two NANs
1100  */
1101 static int
1102 cmp_nans(double d1, double d2)
1103 {
1104 
1105 	if (d1 < d2)
1106 		return (-1);
1107 	if (d1 > d2)
1108 		return (+1);
1109 	return (0);
1110 }
1111 
1112 /*
1113  * Implements general numeric sort (-g).
1114  */
1115 static int
1116 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1117     size_t offset __unused)
1118 {
1119 	double d1, d2;
1120 	int err1, err2;
1121 	bool empty1, empty2, key1_read, key2_read;
1122 
1123 	d1 = d2 = 0;
1124 	err1 = err2 = 0;
1125 	key1_read = key2_read = false;
1126 
1127 	if (debug_sort) {
1128 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1129 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1130 	}
1131 
1132 	if (kv1->hint->status == HS_UNINITIALIZED) {
1133 		errno = 0;
1134 		d1 = bwstod(kv1->k, &empty1);
1135 		err1 = errno;
1136 
1137 		if (empty1)
1138 			kv1->hint->v.gh.notnum = true;
1139 		else if (err1 == 0) {
1140 			kv1->hint->v.gh.d = d1;
1141 			kv1->hint->v.gh.nan = is_nan(d1);
1142 			kv1->hint->status = HS_INITIALIZED;
1143 		} else
1144 			kv1->hint->status = HS_ERROR;
1145 
1146 		key1_read = true;
1147 	}
1148 
1149 	if (kv2->hint->status == HS_UNINITIALIZED) {
1150 		errno = 0;
1151 		d2 = bwstod(kv2->k, &empty2);
1152 		err2 = errno;
1153 
1154 		if (empty2)
1155 			kv2->hint->v.gh.notnum = true;
1156 		else if (err2 == 0) {
1157 			kv2->hint->v.gh.d = d2;
1158 			kv2->hint->v.gh.nan = is_nan(d2);
1159 			kv2->hint->status = HS_INITIALIZED;
1160 		} else
1161 			kv2->hint->status = HS_ERROR;
1162 
1163 		key2_read = true;
1164 	}
1165 
1166 	if (kv1->hint->status == HS_INITIALIZED &&
1167 	    kv2->hint->status == HS_INITIALIZED) {
1168 		if (kv1->hint->v.gh.notnum)
1169 			return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1170 		else if (kv2->hint->v.gh.notnum)
1171 			return (+1);
1172 
1173 		if (kv1->hint->v.gh.nan)
1174 			return ((kv2->hint->v.gh.nan) ?
1175 			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1176 			    -1);
1177 		else if (kv2->hint->v.gh.nan)
1178 			return (+1);
1179 
1180 		d1 = kv1->hint->v.gh.d;
1181 		d2 = kv2->hint->v.gh.d;
1182 
1183 		if (d1 < d2)
1184 			return (-1);
1185 		else if (d1 > d2)
1186 			return (+1);
1187 		else
1188 			return (0);
1189 	}
1190 
1191 	if (!key1_read) {
1192 		errno = 0;
1193 		d1 = bwstod(kv1->k, &empty1);
1194 		err1 = errno;
1195 	}
1196 
1197 	if (!key2_read) {
1198 		errno = 0;
1199 		d2 = bwstod(kv2->k, &empty2);
1200 		err2 = errno;
1201 	}
1202 
1203 	/* Non-value case: */
1204 	if (empty1)
1205 		return (empty2 ? 0 : -1);
1206 	else if (empty2)
1207 		return (+1);
1208 
1209 	/* NAN case */
1210 	if (is_nan(d1))
1211 		return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1212 	else if (is_nan(d2))
1213 		return (+1);
1214 
1215 	/* Infinities */
1216 	if (err1 == ERANGE || err2 == ERANGE) {
1217 		/* Minus infinity case */
1218 		if (huge_minus(d1, err1)) {
1219 			if (huge_minus(d2, err2)) {
1220 				if (d1 < d2)
1221 					return (-1);
1222 				if (d1 > d2)
1223 					return (+1);
1224 				return (0);
1225 			} else
1226 				return (-1);
1227 
1228 		} else if (huge_minus(d2, err2)) {
1229 			if (huge_minus(d1, err1)) {
1230 				if (d1 < d2)
1231 					return (-1);
1232 				if (d1 > d2)
1233 					return (+1);
1234 				return (0);
1235 			} else
1236 				return (+1);
1237 		}
1238 
1239 		/* Plus infinity case */
1240 		if (huge_plus(d1, err1)) {
1241 			if (huge_plus(d2, err2)) {
1242 				if (d1 < d2)
1243 					return (-1);
1244 				if (d1 > d2)
1245 					return (+1);
1246 				return (0);
1247 			} else
1248 				return (+1);
1249 		} else if (huge_plus(d2, err2)) {
1250 			if (huge_plus(d1, err1)) {
1251 				if (d1 < d2)
1252 					return (-1);
1253 				if (d1 > d2)
1254 					return (+1);
1255 				return (0);
1256 			} else
1257 				return (-1);
1258 		}
1259 	}
1260 
1261 	if (d1 < d2)
1262 		return (-1);
1263 	if (d1 > d2)
1264 		return (+1);
1265 
1266 	return (0);
1267 }
1268 
1269 /*
1270  * Implements month sort (-M).
1271  */
1272 static int
1273 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1274 {
1275 	int val1, val2;
1276 	bool key1_read, key2_read;
1277 
1278 	val1 = val2 = 0;
1279 	key1_read = key2_read = false;
1280 
1281 	if (debug_sort) {
1282 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1283 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1284 	}
1285 
1286 	if (kv1->hint->status == HS_UNINITIALIZED) {
1287 		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1288 		key1_read = true;
1289 		kv1->hint->status = HS_INITIALIZED;
1290 	}
1291 
1292 	if (kv2->hint->status == HS_UNINITIALIZED) {
1293 		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1294 		key2_read = true;
1295 		kv2->hint->status = HS_INITIALIZED;
1296 	}
1297 
1298 	if (kv1->hint->status == HS_INITIALIZED) {
1299 		val1 = kv1->hint->v.Mh.m;
1300 		key1_read = true;
1301 	}
1302 
1303 	if (kv2->hint->status == HS_INITIALIZED) {
1304 		val2 = kv2->hint->v.Mh.m;
1305 		key2_read = true;
1306 	}
1307 
1308 	if (!key1_read)
1309 		val1 = bws_month_score(kv1->k);
1310 	if (!key2_read)
1311 		val2 = bws_month_score(kv2->k);
1312 
1313 	if (val1 == val2) {
1314 		return (0);
1315 	}
1316 	if (val1 < val2)
1317 		return (-1);
1318 	return (+1);
1319 }
1320