xref: /freebsd/usr.bin/sort/coll.c (revision 5e3934b15a2741b2de6b217e77dc9d798d740804)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/types.h>
31 
32 #include <errno.h>
33 #include <err.h>
34 #include <langinfo.h>
35 #include <limits.h>
36 #include <math.h>
37 #include <md5.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42 
43 #include "coll.h"
44 #include "vsort.h"
45 
46 struct key_specs *keys;
47 size_t keys_num = 0;
48 
49 wint_t symbol_decimal_point = L'.';
50 /* there is no default thousands separator in collate rules: */
51 wint_t symbol_thousands_sep = 0;
52 wint_t symbol_negative_sign = L'-';
53 wint_t symbol_positive_sign = L'+';
54 
55 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
56 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
57 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
58 static int numcoll(struct key_value*, struct key_value *, size_t offset);
59 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
60 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
61 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
62 
63 /*
64  * Allocate keys array
65  */
66 struct keys_array *
keys_array_alloc(void)67 keys_array_alloc(void)
68 {
69 	struct keys_array *ka;
70 	size_t sz;
71 
72 	sz = keys_array_size();
73 	ka = sort_calloc(1, sz);
74 
75 	return (ka);
76 }
77 
78 /*
79  * Calculate whether we need key hint space
80  */
81 static size_t
key_hint_size(void)82 key_hint_size(void)
83 {
84 
85 	return (need_hint ? sizeof(struct key_hint) : 0);
86 }
87 
88 /*
89  * Calculate keys array size
90  */
91 size_t
keys_array_size(void)92 keys_array_size(void)
93 {
94 
95 	return (keys_num * (sizeof(struct key_value) + key_hint_size()));
96 }
97 
98 /*
99  * Clean data of keys array
100  */
101 void
clean_keys_array(const struct bwstring * s,struct keys_array * ka)102 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
103 {
104 
105 	if (ka) {
106 		for (size_t i = 0; i < keys_num; ++i) {
107 			const struct key_value *kv;
108 
109 			kv = get_key_from_keys_array(ka, i);
110 			if (kv->k && kv->k != s)
111 				bwsfree(kv->k);
112 		}
113 		memset(ka, 0, keys_array_size());
114 	}
115 }
116 
117 /*
118  * Get pointer to a key value in the keys set
119  */
120 struct key_value *
get_key_from_keys_array(struct keys_array * ka,size_t ind)121 get_key_from_keys_array(struct keys_array *ka, size_t ind)
122 {
123 
124 	return ((struct key_value *)((caddr_t)ka->key +
125 	    ind * (sizeof(struct key_value) + key_hint_size())));
126 }
127 
128 /*
129  * Set value of a key in the keys set
130  */
131 void
set_key_on_keys_array(struct keys_array * ka,struct bwstring * s,size_t ind)132 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
133 {
134 
135 	if (ka && keys_num > ind) {
136 		struct key_value *kv;
137 
138 		kv = get_key_from_keys_array(ka, ind);
139 
140 		if (kv->k && kv->k != s)
141 			bwsfree(kv->k);
142 		kv->k = s;
143 	}
144 }
145 
146 /*
147  * Initialize a sort list item
148  */
149 struct sort_list_item *
sort_list_item_alloc(void)150 sort_list_item_alloc(void)
151 {
152 	struct sort_list_item *si;
153 	size_t sz;
154 
155 	sz = sizeof(struct sort_list_item) + keys_array_size();
156 	si = sort_calloc(1, sz);
157 
158 	return (si);
159 }
160 
161 size_t
sort_list_item_size(struct sort_list_item * si)162 sort_list_item_size(struct sort_list_item *si)
163 {
164 	size_t ret = 0;
165 
166 	if (si) {
167 		ret = sizeof(struct sort_list_item) + keys_array_size();
168 		if (si->str)
169 			ret += bws_memsize(si->str);
170 		for (size_t i = 0; i < keys_num; ++i) {
171 			const struct key_value *kv;
172 
173 			kv = get_key_from_keys_array(&si->ka, i);
174 
175 			if (kv->k != si->str)
176 				ret += bws_memsize(kv->k);
177 		}
178 	}
179 	return (ret);
180 }
181 
182 /*
183  * Calculate key for a sort list item
184  */
185 static void
sort_list_item_make_key(struct sort_list_item * si)186 sort_list_item_make_key(struct sort_list_item *si)
187 {
188 
189 	preproc(si->str, &(si->ka));
190 }
191 
192 /*
193  * Set value of a sort list item.
194  * Return combined string and keys memory size.
195  */
196 void
sort_list_item_set(struct sort_list_item * si,struct bwstring * str)197 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
198 {
199 
200 	if (si) {
201 		clean_keys_array(si->str, &(si->ka));
202 		if (si->str) {
203 			if (si->str == str) {
204 				/* we are trying to reset the same string */
205 				return;
206 			} else {
207 				bwsfree(si->str);
208 				si->str = NULL;
209 			}
210 		}
211 		si->str = str;
212 		sort_list_item_make_key(si);
213 	}
214 }
215 
216 /*
217  * De-allocate a sort list item object memory
218  */
219 void
sort_list_item_clean(struct sort_list_item * si)220 sort_list_item_clean(struct sort_list_item *si)
221 {
222 
223 	if (si) {
224 		clean_keys_array(si->str, &(si->ka));
225 		if (si->str) {
226 			bwsfree(si->str);
227 			si->str = NULL;
228 		}
229 	}
230 }
231 
232 /*
233  * Skip columns according to specs
234  */
235 static size_t
skip_cols_to_start(const struct bwstring * s,size_t cols,size_t start,bool skip_blanks,bool * empty_key)236 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
237     bool skip_blanks, bool *empty_key)
238 {
239 	if (cols < 1)
240 		return (BWSLEN(s) + 1);
241 
242 	if (skip_blanks)
243 		while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
244 			++start;
245 
246 	while (start < BWSLEN(s) && cols > 1) {
247 		--cols;
248 		++start;
249 	}
250 
251 	if (start >= BWSLEN(s))
252 		*empty_key = true;
253 
254 	return (start);
255 }
256 
257 /*
258  * Skip fields according to specs
259  */
260 static size_t
skip_fields_to_start(const struct bwstring * s,size_t fields,bool * empty_field)261 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
262 {
263 
264 	if (fields < 2) {
265 		if (BWSLEN(s) == 0)
266 			*empty_field = true;
267 		return (0);
268 	} else if (!(sort_opts_vals.tflag)) {
269 		size_t cpos = 0;
270 		bool pb = true;
271 
272 		while (cpos < BWSLEN(s)) {
273 			bool isblank;
274 
275 			isblank = iswblank(BWS_GET(s, cpos));
276 
277 			if (isblank && !pb) {
278 				--fields;
279 				if (fields <= 1)
280 					return (cpos);
281 			}
282 			pb = isblank;
283 			++cpos;
284 		}
285 		if (fields > 1)
286 			*empty_field = true;
287 		return (cpos);
288 	} else {
289 		size_t cpos = 0;
290 
291 		while (cpos < BWSLEN(s)) {
292 			if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
293 				--fields;
294 				if (fields <= 1)
295 					return (cpos + 1);
296 			}
297 			++cpos;
298 		}
299 		if (fields > 1)
300 			*empty_field = true;
301 		return (cpos);
302 	}
303 }
304 
305 /*
306  * Find fields start
307  */
308 static void
find_field_start(const struct bwstring * s,struct key_specs * ks,size_t * field_start,size_t * key_start,bool * empty_field,bool * empty_key)309 find_field_start(const struct bwstring *s, struct key_specs *ks,
310     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
311 {
312 
313 	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
314 	if (!*empty_field)
315 		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
316 		    ks->pos1b, empty_key);
317 	else
318 		*empty_key = true;
319 }
320 
321 /*
322  * Find end key position
323  */
324 static size_t
find_field_end(const struct bwstring * s,struct key_specs * ks)325 find_field_end(const struct bwstring *s, struct key_specs *ks)
326 {
327 	size_t f2, next_field_start, pos_end;
328 	bool empty_field, empty_key;
329 
330 	empty_field = false;
331 	empty_key = false;
332 	f2 = ks->f2;
333 
334 	if (f2 == 0)
335 		return (BWSLEN(s) + 1);
336 	else {
337 		if (ks->c2 == 0) {
338 			next_field_start = skip_fields_to_start(s, f2 + 1,
339 			    &empty_field);
340 			if ((next_field_start > 0) && sort_opts_vals.tflag &&
341 			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
342 			    next_field_start - 1)))
343 				--next_field_start;
344 		} else
345 			next_field_start = skip_fields_to_start(s, f2,
346 			    &empty_field);
347 	}
348 
349 	if (empty_field || (next_field_start >= BWSLEN(s)))
350 		return (BWSLEN(s) + 1);
351 
352 	if (ks->c2) {
353 		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
354 		    ks->pos2b, &empty_key);
355 		if (pos_end < BWSLEN(s))
356 			++pos_end;
357 	} else
358 		pos_end = next_field_start;
359 
360 	return (pos_end);
361 }
362 
363 /*
364  * Cut a field according to the key specs
365  */
366 static struct bwstring *
cut_field(const struct bwstring * s,struct key_specs * ks)367 cut_field(const struct bwstring *s, struct key_specs *ks)
368 {
369 	struct bwstring *ret = NULL;
370 
371 	if (s && ks) {
372 		size_t field_start, key_end, key_start, sz;
373 		bool empty_field, empty_key;
374 
375 		field_start = 0;
376 		key_start = 0;
377 		empty_field = false;
378 		empty_key = false;
379 
380 		find_field_start(s, ks, &field_start, &key_start,
381 		    &empty_field, &empty_key);
382 
383 		if (empty_key)
384 			sz = 0;
385 		else {
386 			key_end = find_field_end(s, ks);
387 			sz = (key_end < key_start) ? 0 : (key_end - key_start);
388 		}
389 
390 		ret = bwsalloc(sz);
391 		if (sz)
392 			bwsnocpy(ret, s, key_start, sz);
393 	} else
394 		ret = bwsalloc(0);
395 
396 	return (ret);
397 }
398 
399 /*
400  * Preprocesses a line applying the necessary transformations
401  * specified by command line options and returns the preprocessed
402  * string, which can be used to compare.
403  */
404 int
preproc(struct bwstring * s,struct keys_array * ka)405 preproc(struct bwstring *s, struct keys_array *ka)
406 {
407 
408 	if (sort_opts_vals.kflag)
409 		for (size_t i = 0; i < keys_num; i++) {
410 			struct bwstring *key;
411 			struct key_specs *kspecs;
412 			struct sort_mods *sm;
413 
414 			kspecs = &(keys[i]);
415 			key = cut_field(s, kspecs);
416 
417 			sm = &(kspecs->sm);
418 			if (sm->dflag)
419 				key = dictionary_order(key);
420 			else if (sm->iflag)
421 				key = ignore_nonprinting(key);
422 			if (sm->fflag || sm->Mflag)
423 				key = ignore_case(key);
424 
425 			set_key_on_keys_array(ka, key, i);
426 		}
427 	else {
428 		struct bwstring *ret = NULL;
429 		struct sort_mods *sm = default_sort_mods;
430 
431 		if (sm->bflag) {
432 			if (ret == NULL)
433 				ret = bwsdup(s);
434 			ret = ignore_leading_blanks(ret);
435 		}
436 		if (sm->dflag) {
437 			if (ret == NULL)
438 				ret = bwsdup(s);
439 			ret = dictionary_order(ret);
440 		} else if (sm->iflag) {
441 			if (ret == NULL)
442 				ret = bwsdup(s);
443 			ret = ignore_nonprinting(ret);
444 		}
445 		if (sm->fflag || sm->Mflag) {
446 			if (ret == NULL)
447 				ret = bwsdup(s);
448 			ret = ignore_case(ret);
449 		}
450 		if (ret == NULL)
451 			set_key_on_keys_array(ka, s, 0);
452 		else
453 			set_key_on_keys_array(ka, ret, 0);
454 	}
455 
456 	return 0;
457 }
458 
459 cmpcoll_t
get_sort_func(struct sort_mods * sm)460 get_sort_func(struct sort_mods *sm)
461 {
462 
463 	if (sm->nflag)
464 		return (numcoll);
465 	else if (sm->hflag)
466 		return (hnumcoll);
467 	else if (sm->gflag)
468 		return (gnumcoll);
469 	else if (sm->Mflag)
470 		return (monthcoll);
471 	else if (sm->Rflag)
472 		return (randomcoll);
473 	else if (sm->Vflag)
474 		return (versioncoll);
475 	else
476 		return (wstrcoll);
477 }
478 
479 /*
480  * Compares the given strings.  Returns a positive number if
481  * the first precedes the second, a negative number if the second is
482  * the preceding one, and zero if they are equal.  This function calls
483  * the underlying collate functions, which done the actual comparison.
484  */
485 int
key_coll(struct keys_array * ps1,struct keys_array * ps2,size_t offset)486 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
487 {
488 	struct key_value *kv1, *kv2;
489 	struct sort_mods *sm;
490 	int res = 0;
491 
492 	for (size_t i = 0; i < keys_num; ++i) {
493 		kv1 = get_key_from_keys_array(ps1, i);
494 		kv2 = get_key_from_keys_array(ps2, i);
495 		sm = &(keys[i].sm);
496 
497 		if (sm->rflag)
498 			res = sm->func(kv2, kv1, offset);
499 		else
500 			res = sm->func(kv1, kv2, offset);
501 
502 		if (res)
503 			break;
504 
505 		/* offset applies to only the first key */
506 		offset = 0;
507 	}
508 	return (res);
509 }
510 
511 /*
512  * Compare two strings.
513  * Plain symbol-by-symbol comparison.
514  */
515 int
top_level_str_coll(const struct bwstring * s1,const struct bwstring * s2)516 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
517 {
518 
519 	if (default_sort_mods->rflag) {
520 		const struct bwstring *tmp;
521 
522 		tmp = s1;
523 		s1 = s2;
524 		s2 = tmp;
525 	}
526 
527 	return (bwscoll(s1, s2, 0));
528 }
529 
530 /*
531  * Compare a string and a sort list item, according to the sort specs.
532  */
533 int
str_list_coll(struct bwstring * str1,struct sort_list_item ** ss2)534 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
535 {
536 	struct keys_array *ka1;
537 	int ret = 0;
538 
539 	ka1 = keys_array_alloc();
540 
541 	preproc(str1, ka1);
542 
543 	sort_list_item_make_key(*ss2);
544 
545 	if (debug_sort) {
546 		bwsprintf(stdout, str1, "; s1=<", ">");
547 		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
548 	}
549 
550 	ret = key_coll(ka1, &((*ss2)->ka), 0);
551 
552 	if (debug_sort)
553 		printf("; cmp1=%d", ret);
554 
555 	clean_keys_array(str1, ka1);
556 	sort_free(ka1);
557 
558 	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
559 		ret = top_level_str_coll(str1, ((*ss2)->str));
560 		if (debug_sort)
561 			printf("; cmp2=%d", ret);
562 	}
563 
564 	if (debug_sort)
565 		printf("\n");
566 
567 	return (ret);
568 }
569 
570 /*
571  * Compare two sort list items, according to the sort specs.
572  */
573 int
list_coll_offset(struct sort_list_item ** ss1,struct sort_list_item ** ss2,size_t offset)574 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
575     size_t offset)
576 {
577 	int ret;
578 
579 	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
580 
581 	if (debug_sort) {
582 		if (offset)
583 			printf("; offset=%d", (int) offset);
584 		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
585 		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
586 		printf("; cmp1=%d\n", ret);
587 	}
588 
589 	if (ret)
590 		return (ret);
591 
592 	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
593 		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
594 		if (debug_sort)
595 			printf("; cmp2=%d\n", ret);
596 	}
597 
598 	return (ret);
599 }
600 
601 /*
602  * Compare two sort list items, according to the sort specs.
603  */
604 int
list_coll(struct sort_list_item ** ss1,struct sort_list_item ** ss2)605 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
606 {
607 
608 	return (list_coll_offset(ss1, ss2, 0));
609 }
610 
611 #define	LSCDEF(N)							\
612 static int 								\
613 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)	\
614 {									\
615 									\
616 	return (list_coll_offset(ss1, ss2, N));				\
617 }
618 
619 LSCDEF(1)
620 LSCDEF(2)
621 LSCDEF(3)
622 LSCDEF(4)
623 LSCDEF(5)
624 LSCDEF(6)
625 LSCDEF(7)
626 LSCDEF(8)
627 LSCDEF(9)
628 LSCDEF(10)
629 LSCDEF(11)
630 LSCDEF(12)
631 LSCDEF(13)
632 LSCDEF(14)
633 LSCDEF(15)
634 LSCDEF(16)
635 LSCDEF(17)
636 LSCDEF(18)
637 LSCDEF(19)
638 LSCDEF(20)
639 
640 listcoll_t
get_list_call_func(size_t offset)641 get_list_call_func(size_t offset)
642 {
643 	static const listcoll_t lsarray[] = { list_coll, list_coll_1,
644 	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
645 	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
646 	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
647 	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
648 	    list_coll_18, list_coll_19, list_coll_20 };
649 
650 	if (offset <= 20)
651 		return (lsarray[offset]);
652 
653 	return (list_coll);
654 }
655 
656 /*
657  * Compare two sort list items, only by their original string.
658  */
659 int
list_coll_by_str_only(struct sort_list_item ** ss1,struct sort_list_item ** ss2)660 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
661 {
662 
663 	return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
664 }
665 
666 /*
667  * Maximum size of a number in the string (before or after decimal point)
668  */
669 #define MAX_NUM_SIZE (128)
670 
671 /*
672  * Set suffix value
673  */
setsuffix(wchar_t c,unsigned char * si)674 static void setsuffix(wchar_t c, unsigned char *si)
675 {
676 	switch (c){
677 	case L'k':
678 	case L'K':
679 		*si = 1;
680 		break;
681 	case L'M':
682 		*si = 2;
683 		break;
684 	case L'G':
685 		*si = 3;
686 		break;
687 	case L'T':
688 		*si = 4;
689 		break;
690 	case L'P':
691 		*si = 5;
692 		break;
693 	case L'E':
694 		*si = 6;
695 		break;
696 	case L'Z':
697 		*si = 7;
698 		break;
699 	case L'Y':
700 		*si = 8;
701 		break;
702 	default:
703 		*si = 0;
704 	}
705 }
706 
707 /*
708  * Read string s and parse the string into a fixed-decimal-point number.
709  * sign equals -1 if the number is negative (explicit plus is not allowed,
710  * according to GNU sort's "info sort".
711  * The number part before decimal point is in the smain, after the decimal
712  * point is in sfrac, tail is the pointer to the remainder of the string.
713  */
714 static int
read_number(struct bwstring * s0,int * sign,wchar_t * smain,size_t * main_len,wchar_t * sfrac,size_t * frac_len,unsigned char * si)715 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
716 {
717 	bwstring_iterator s;
718 
719 	s = bws_begin(s0);
720 
721 	/* always end the fraction with zero, even if we have no fraction */
722 	sfrac[0] = 0;
723 
724 	while (iswblank(bws_get_iter_value(s)))
725 		s = bws_iterator_inc(s, 1);
726 
727 	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
728 		*sign = -1;
729 		s = bws_iterator_inc(s, 1);
730 	}
731 
732 	// This is '0', not '\0', do not change this
733 	while (iswdigit(bws_get_iter_value(s)) &&
734 	    (bws_get_iter_value(s) == L'0'))
735 		s = bws_iterator_inc(s, 1);
736 
737 	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
738 		if (iswdigit(bws_get_iter_value(s))) {
739 			smain[*main_len] = bws_get_iter_value(s);
740 			s = bws_iterator_inc(s, 1);
741 			*main_len += 1;
742 		} else if (symbol_thousands_sep &&
743 		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
744 			s = bws_iterator_inc(s, 1);
745 		else
746 			break;
747 	}
748 
749 	smain[*main_len] = 0;
750 
751 	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
752 		s = bws_iterator_inc(s, 1);
753 		while (iswdigit(bws_get_iter_value(s)) &&
754 		    *frac_len < MAX_NUM_SIZE) {
755 			sfrac[*frac_len] = bws_get_iter_value(s);
756 			s = bws_iterator_inc(s, 1);
757 			*frac_len += 1;
758 		}
759 		sfrac[*frac_len] = 0;
760 
761 		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
762 			--(*frac_len);
763 			sfrac[*frac_len] = L'\0';
764 		}
765 	}
766 
767 	setsuffix(bws_get_iter_value(s),si);
768 
769 	if ((*main_len + *frac_len) == 0)
770 		*sign = 0;
771 
772 	return (0);
773 }
774 
775 /*
776  * Implements string sort.
777  */
778 static int
wstrcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)779 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
780 {
781 
782 	if (debug_sort) {
783 		if (offset)
784 			printf("; offset=%d\n", (int) offset);
785 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
786 		printf("(%zu)", BWSLEN(kv1->k));
787 		bwsprintf(stdout, kv2->k, ", k2=<", ">");
788 		printf("(%zu)", BWSLEN(kv2->k));
789 	}
790 
791 	return (bwscoll(kv1->k, kv2->k, offset));
792 }
793 
794 /*
795  * Compare two suffixes
796  */
797 static inline int
cmpsuffix(unsigned char si1,unsigned char si2)798 cmpsuffix(unsigned char si1, unsigned char si2)
799 {
800 
801 	return ((char)si1 - (char)si2);
802 }
803 
804 /*
805  * Implements numeric sort for -n and -h.
806  */
807 static int
numcoll_impl(struct key_value * kv1,struct key_value * kv2,size_t offset __unused,bool use_suffix)808 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
809     size_t offset __unused, bool use_suffix)
810 {
811 	struct bwstring *s1, *s2;
812 	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
813 	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
814 	int cmp_res, sign1, sign2;
815 	size_t frac1, frac2, main1, main2;
816 	unsigned char SI1, SI2;
817 	bool e1, e2, key1_read, key2_read;
818 
819 	s1 = kv1->k;
820 	s2 = kv2->k;
821 	sign1 = sign2 = 0;
822 	main1 = main2 = 0;
823 	frac1 = frac2 = 0;
824 
825 	key1_read = key2_read = false;
826 
827 	if (debug_sort) {
828 		bwsprintf(stdout, s1, "; k1=<", ">");
829 		bwsprintf(stdout, s2, ", k2=<", ">");
830 	}
831 
832 	if (s1 == s2)
833 		return (0);
834 
835 	if (kv1->hint->status == HS_UNINITIALIZED) {
836 		/* read the number from the string */
837 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
838 		key1_read = true;
839 		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
840 		if(main1 < 1 && frac1 < 1)
841 			kv1->hint->v.nh.empty=true;
842 		kv1->hint->v.nh.si = SI1;
843 		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
844 		    HS_INITIALIZED : HS_ERROR;
845 		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
846 	}
847 
848 	if (kv2->hint->status == HS_UNINITIALIZED) {
849 		/* read the number from the string */
850 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
851 		key2_read = true;
852 		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
853 		if(main2 < 1 && frac2 < 1)
854 			kv2->hint->v.nh.empty=true;
855 		kv2->hint->v.nh.si = SI2;
856 		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
857 		    HS_INITIALIZED : HS_ERROR;
858 		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
859 	}
860 
861 	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
862 	    HS_INITIALIZED) {
863 		unsigned long long n1, n2;
864 		bool neg1, neg2;
865 
866 		e1 = kv1->hint->v.nh.empty;
867 		e2 = kv2->hint->v.nh.empty;
868 
869 		if (e1 && e2)
870 			return (0);
871 
872 		neg1 = kv1->hint->v.nh.neg;
873 		neg2 = kv2->hint->v.nh.neg;
874 
875 		if (neg1 && !neg2)
876 			return (-1);
877 		if (neg2 && !neg1)
878 			return (+1);
879 
880 		if (e1)
881 			return (neg2 ? +1 : -1);
882 		else if (e2)
883 			return (neg1 ? -1 : +1);
884 
885 
886 		if (use_suffix) {
887 			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
888 			if (cmp_res)
889 				return (neg1 ? -cmp_res : cmp_res);
890 		}
891 
892 		n1 = kv1->hint->v.nh.n1;
893 		n2 = kv2->hint->v.nh.n1;
894 		if (n1 < n2)
895 			return (neg1 ? +1 : -1);
896 		else if (n1 > n2)
897 			return (neg1 ? -1 : +1);
898 	}
899 
900 	/* read the numbers from the strings */
901 	if (!key1_read)
902 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
903 	if (!key2_read)
904 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
905 
906 	e1 = ((main1 + frac1) == 0);
907 	e2 = ((main2 + frac2) == 0);
908 
909 	if (e1 && e2)
910 		return (0);
911 
912 	/* we know the result if the signs are different */
913 	if (sign1 < 0 && sign2 >= 0)
914 		return (-1);
915 	if (sign1 >= 0 && sign2 < 0)
916 		return (+1);
917 
918 	if (e1)
919 		return ((sign2 < 0) ? +1 : -1);
920 	else if (e2)
921 		return ((sign1 < 0) ? -1 : +1);
922 
923 	if (use_suffix) {
924 		cmp_res = cmpsuffix(SI1, SI2);
925 		if (cmp_res)
926 			return ((sign1 < 0) ? -cmp_res : cmp_res);
927 	}
928 
929 	/* if both numbers are empty assume that the strings are equal */
930 	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
931 		return (0);
932 
933 	/*
934 	 * if the main part is of different size, we know the result
935 	 * (because the leading zeros are removed)
936 	 */
937 	if (main1 < main2)
938 		cmp_res = -1;
939 	else if (main1 > main2)
940 		cmp_res = +1;
941 	/* if the sizes are equal then simple non-collate string compare gives the correct result */
942 	else
943 		cmp_res = wcscmp(smain1, smain2);
944 
945 	/* check fraction */
946 	if (!cmp_res)
947 		cmp_res = wcscmp(sfrac1, sfrac2);
948 
949 	if (!cmp_res)
950 		return (0);
951 
952 	/* reverse result if the signs are negative */
953 	if (sign1 < 0 && sign2 < 0)
954 		cmp_res = -cmp_res;
955 
956 	return (cmp_res);
957 }
958 
959 /*
960  * Implements numeric sort (-n).
961  */
962 static int
numcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)963 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
964 {
965 
966 	return (numcoll_impl(kv1, kv2, offset, false));
967 }
968 
969 /*
970  * Implements 'human' numeric sort (-h).
971  */
972 static int
hnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)973 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
974 {
975 
976 	return (numcoll_impl(kv1, kv2, offset, true));
977 }
978 
979 /* Use hint space to memoize md5 computations, at least. */
980 static void
randomcoll_init_hint(struct key_value * kv,void * hash)981 randomcoll_init_hint(struct key_value *kv, void *hash)
982 {
983 
984 	memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached));
985 	kv->hint->status = HS_INITIALIZED;
986 }
987 
988 /*
989  * Implements random sort (-R).
990  */
991 static int
randomcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)992 randomcoll(struct key_value *kv1, struct key_value *kv2,
993     size_t offset __unused)
994 {
995 	struct bwstring *s1, *s2;
996 	MD5_CTX ctx1, ctx2;
997 	unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH];
998 	int cmp;
999 
1000 	s1 = kv1->k;
1001 	s2 = kv2->k;
1002 
1003 	if (debug_sort) {
1004 		bwsprintf(stdout, s1, "; k1=<", ">");
1005 		bwsprintf(stdout, s2, ", k2=<", ">");
1006 	}
1007 
1008 	if (s1 == s2)
1009 		return (0);
1010 
1011 	if (kv1->hint->status == HS_INITIALIZED &&
1012 	    kv2->hint->status == HS_INITIALIZED) {
1013 		cmp = memcmp(kv1->hint->v.Rh.cached,
1014 		    kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached));
1015 		if (cmp != 0)
1016 			return (cmp);
1017 	}
1018 
1019 	memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
1020 	memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
1021 
1022 	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1023 	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1024 
1025 	MD5Final(hash1, &ctx1);
1026 	MD5Final(hash2, &ctx2);
1027 
1028 	if (kv1->hint->status == HS_UNINITIALIZED)
1029 		randomcoll_init_hint(kv1, hash1);
1030 	if (kv2->hint->status == HS_UNINITIALIZED)
1031 		randomcoll_init_hint(kv2, hash2);
1032 
1033 	return (memcmp(hash1, hash2, sizeof(hash1)));
1034 }
1035 
1036 /*
1037  * Implements version sort (-V).
1038  */
1039 static int
versioncoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1040 versioncoll(struct key_value *kv1, struct key_value *kv2,
1041     size_t offset __unused)
1042 {
1043 	struct bwstring *s1, *s2;
1044 
1045 	s1 = kv1->k;
1046 	s2 = kv2->k;
1047 
1048 	if (debug_sort) {
1049 		bwsprintf(stdout, s1, "; k1=<", ">");
1050 		bwsprintf(stdout, s2, ", k2=<", ">");
1051 	}
1052 
1053 	if (s1 == s2)
1054 		return (0);
1055 
1056 	return (vcmp(s1, s2));
1057 }
1058 
1059 /*
1060  * Check for minus infinity
1061  */
1062 static inline bool
huge_minus(double d,int err1)1063 huge_minus(double d, int err1)
1064 {
1065 
1066 	if (err1 == ERANGE)
1067 		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1068 			return (+1);
1069 
1070 	return (0);
1071 }
1072 
1073 /*
1074  * Check for plus infinity
1075  */
1076 static inline bool
huge_plus(double d,int err1)1077 huge_plus(double d, int err1)
1078 {
1079 
1080 	if (err1 == ERANGE)
1081 		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1082 			return (+1);
1083 
1084 	return (0);
1085 }
1086 
1087 /*
1088  * Check whether a function is a NAN
1089  */
1090 static bool
is_nan(double d)1091 is_nan(double d)
1092 {
1093 
1094 	return ((d == NAN) || (isnan(d)));
1095 }
1096 
1097 /*
1098  * Compare two NANs
1099  */
1100 static int
cmp_nans(double d1,double d2)1101 cmp_nans(double d1, double d2)
1102 {
1103 
1104 	if (d1 < d2)
1105 		return (-1);
1106 	if (d1 > d2)
1107 		return (+1);
1108 	return (0);
1109 }
1110 
1111 /*
1112  * Implements general numeric sort (-g).
1113  */
1114 static int
gnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1115 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1116     size_t offset __unused)
1117 {
1118 	double d1, d2;
1119 	int err1, err2;
1120 	bool empty1, empty2, key1_read, key2_read;
1121 
1122 	d1 = d2 = 0;
1123 	err1 = err2 = 0;
1124 	key1_read = key2_read = false;
1125 
1126 	if (debug_sort) {
1127 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1128 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1129 	}
1130 
1131 	if (kv1->hint->status == HS_UNINITIALIZED) {
1132 		errno = 0;
1133 		d1 = bwstod(kv1->k, &empty1);
1134 		err1 = errno;
1135 
1136 		if (empty1)
1137 			kv1->hint->v.gh.notnum = true;
1138 		else if (err1 == 0) {
1139 			kv1->hint->v.gh.d = d1;
1140 			kv1->hint->v.gh.nan = is_nan(d1);
1141 			kv1->hint->status = HS_INITIALIZED;
1142 		} else
1143 			kv1->hint->status = HS_ERROR;
1144 
1145 		key1_read = true;
1146 	}
1147 
1148 	if (kv2->hint->status == HS_UNINITIALIZED) {
1149 		errno = 0;
1150 		d2 = bwstod(kv2->k, &empty2);
1151 		err2 = errno;
1152 
1153 		if (empty2)
1154 			kv2->hint->v.gh.notnum = true;
1155 		else if (err2 == 0) {
1156 			kv2->hint->v.gh.d = d2;
1157 			kv2->hint->v.gh.nan = is_nan(d2);
1158 			kv2->hint->status = HS_INITIALIZED;
1159 		} else
1160 			kv2->hint->status = HS_ERROR;
1161 
1162 		key2_read = true;
1163 	}
1164 
1165 	if (kv1->hint->status == HS_INITIALIZED &&
1166 	    kv2->hint->status == HS_INITIALIZED) {
1167 		if (kv1->hint->v.gh.notnum)
1168 			return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1169 		else if (kv2->hint->v.gh.notnum)
1170 			return (+1);
1171 
1172 		if (kv1->hint->v.gh.nan)
1173 			return ((kv2->hint->v.gh.nan) ?
1174 			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1175 			    -1);
1176 		else if (kv2->hint->v.gh.nan)
1177 			return (+1);
1178 
1179 		d1 = kv1->hint->v.gh.d;
1180 		d2 = kv2->hint->v.gh.d;
1181 
1182 		if (d1 < d2)
1183 			return (-1);
1184 		else if (d1 > d2)
1185 			return (+1);
1186 		else
1187 			return (0);
1188 	}
1189 
1190 	if (!key1_read) {
1191 		errno = 0;
1192 		d1 = bwstod(kv1->k, &empty1);
1193 		err1 = errno;
1194 	}
1195 
1196 	if (!key2_read) {
1197 		errno = 0;
1198 		d2 = bwstod(kv2->k, &empty2);
1199 		err2 = errno;
1200 	}
1201 
1202 	/* Non-value case: */
1203 	if (empty1)
1204 		return (empty2 ? 0 : -1);
1205 	else if (empty2)
1206 		return (+1);
1207 
1208 	/* NAN case */
1209 	if (is_nan(d1))
1210 		return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1211 	else if (is_nan(d2))
1212 		return (+1);
1213 
1214 	/* Infinities */
1215 	if (err1 == ERANGE || err2 == ERANGE) {
1216 		/* Minus infinity case */
1217 		if (huge_minus(d1, err1)) {
1218 			if (huge_minus(d2, err2)) {
1219 				if (d1 < d2)
1220 					return (-1);
1221 				if (d1 > d2)
1222 					return (+1);
1223 				return (0);
1224 			} else
1225 				return (-1);
1226 
1227 		} else if (huge_minus(d2, err2)) {
1228 			if (huge_minus(d1, err1)) {
1229 				if (d1 < d2)
1230 					return (-1);
1231 				if (d1 > d2)
1232 					return (+1);
1233 				return (0);
1234 			} else
1235 				return (+1);
1236 		}
1237 
1238 		/* Plus infinity case */
1239 		if (huge_plus(d1, err1)) {
1240 			if (huge_plus(d2, err2)) {
1241 				if (d1 < d2)
1242 					return (-1);
1243 				if (d1 > d2)
1244 					return (+1);
1245 				return (0);
1246 			} else
1247 				return (+1);
1248 		} else if (huge_plus(d2, err2)) {
1249 			if (huge_plus(d1, err1)) {
1250 				if (d1 < d2)
1251 					return (-1);
1252 				if (d1 > d2)
1253 					return (+1);
1254 				return (0);
1255 			} else
1256 				return (-1);
1257 		}
1258 	}
1259 
1260 	if (d1 < d2)
1261 		return (-1);
1262 	if (d1 > d2)
1263 		return (+1);
1264 
1265 	return (0);
1266 }
1267 
1268 /*
1269  * Implements month sort (-M).
1270  */
1271 static int
monthcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1272 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1273 {
1274 	int val1, val2;
1275 	bool key1_read, key2_read;
1276 
1277 	val1 = val2 = 0;
1278 	key1_read = key2_read = false;
1279 
1280 	if (debug_sort) {
1281 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1282 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1283 	}
1284 
1285 	if (kv1->hint->status == HS_UNINITIALIZED) {
1286 		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1287 		key1_read = true;
1288 		kv1->hint->status = HS_INITIALIZED;
1289 	}
1290 
1291 	if (kv2->hint->status == HS_UNINITIALIZED) {
1292 		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1293 		key2_read = true;
1294 		kv2->hint->status = HS_INITIALIZED;
1295 	}
1296 
1297 	if (kv1->hint->status == HS_INITIALIZED) {
1298 		val1 = kv1->hint->v.Mh.m;
1299 		key1_read = true;
1300 	}
1301 
1302 	if (kv2->hint->status == HS_INITIALIZED) {
1303 		val2 = kv2->hint->v.Mh.m;
1304 		key2_read = true;
1305 	}
1306 
1307 	if (!key1_read)
1308 		val1 = bws_month_score(kv1->k);
1309 	if (!key2_read)
1310 		val2 = bws_month_score(kv2->k);
1311 
1312 	if (val1 == val2) {
1313 		return (0);
1314 	}
1315 	if (val1 < val2)
1316 		return (-1);
1317 	return (+1);
1318 }
1319