1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/types.h>
31
32 #include <errno.h>
33 #include <err.h>
34 #include <langinfo.h>
35 #include <limits.h>
36 #include <math.h>
37 #include <md5.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42
43 #include "coll.h"
44 #include "vsort.h"
45
46 struct key_specs *keys;
47 size_t keys_num = 0;
48
49 wint_t symbol_decimal_point = L'.';
50 /* there is no default thousands separator in collate rules: */
51 wint_t symbol_thousands_sep = 0;
52 wint_t symbol_negative_sign = L'-';
53 wint_t symbol_positive_sign = L'+';
54
55 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
56 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
57 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
58 static int numcoll(struct key_value*, struct key_value *, size_t offset);
59 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
60 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
61 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
62
63 /*
64 * Allocate keys array
65 */
66 struct keys_array *
keys_array_alloc(void)67 keys_array_alloc(void)
68 {
69 struct keys_array *ka;
70 size_t sz;
71
72 sz = keys_array_size();
73 ka = sort_calloc(1, sz);
74
75 return (ka);
76 }
77
78 /*
79 * Calculate whether we need key hint space
80 */
81 static size_t
key_hint_size(void)82 key_hint_size(void)
83 {
84
85 return (need_hint ? sizeof(struct key_hint) : 0);
86 }
87
88 /*
89 * Calculate keys array size
90 */
91 size_t
keys_array_size(void)92 keys_array_size(void)
93 {
94
95 return (keys_num * (sizeof(struct key_value) + key_hint_size()));
96 }
97
98 /*
99 * Clean data of keys array
100 */
101 void
clean_keys_array(const struct bwstring * s,struct keys_array * ka)102 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
103 {
104
105 if (ka) {
106 for (size_t i = 0; i < keys_num; ++i) {
107 const struct key_value *kv;
108
109 kv = get_key_from_keys_array(ka, i);
110 if (kv->k && kv->k != s)
111 bwsfree(kv->k);
112 }
113 memset(ka, 0, keys_array_size());
114 }
115 }
116
117 /*
118 * Get pointer to a key value in the keys set
119 */
120 struct key_value *
get_key_from_keys_array(struct keys_array * ka,size_t ind)121 get_key_from_keys_array(struct keys_array *ka, size_t ind)
122 {
123
124 return ((struct key_value *)((caddr_t)ka->key +
125 ind * (sizeof(struct key_value) + key_hint_size())));
126 }
127
128 /*
129 * Set value of a key in the keys set
130 */
131 void
set_key_on_keys_array(struct keys_array * ka,struct bwstring * s,size_t ind)132 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
133 {
134
135 if (ka && keys_num > ind) {
136 struct key_value *kv;
137
138 kv = get_key_from_keys_array(ka, ind);
139
140 if (kv->k && kv->k != s)
141 bwsfree(kv->k);
142 kv->k = s;
143 }
144 }
145
146 /*
147 * Initialize a sort list item
148 */
149 struct sort_list_item *
sort_list_item_alloc(void)150 sort_list_item_alloc(void)
151 {
152 struct sort_list_item *si;
153 size_t sz;
154
155 sz = sizeof(struct sort_list_item) + keys_array_size();
156 si = sort_calloc(1, sz);
157
158 return (si);
159 }
160
161 size_t
sort_list_item_size(struct sort_list_item * si)162 sort_list_item_size(struct sort_list_item *si)
163 {
164 size_t ret = 0;
165
166 if (si) {
167 ret = sizeof(struct sort_list_item) + keys_array_size();
168 if (si->str)
169 ret += bws_memsize(si->str);
170 for (size_t i = 0; i < keys_num; ++i) {
171 const struct key_value *kv;
172
173 kv = get_key_from_keys_array(&si->ka, i);
174
175 if (kv->k != si->str)
176 ret += bws_memsize(kv->k);
177 }
178 }
179 return (ret);
180 }
181
182 /*
183 * Calculate key for a sort list item
184 */
185 static void
sort_list_item_make_key(struct sort_list_item * si)186 sort_list_item_make_key(struct sort_list_item *si)
187 {
188
189 preproc(si->str, &(si->ka));
190 }
191
192 /*
193 * Set value of a sort list item.
194 * Return combined string and keys memory size.
195 */
196 void
sort_list_item_set(struct sort_list_item * si,struct bwstring * str)197 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
198 {
199
200 if (si) {
201 clean_keys_array(si->str, &(si->ka));
202 if (si->str) {
203 if (si->str == str) {
204 /* we are trying to reset the same string */
205 return;
206 } else {
207 bwsfree(si->str);
208 si->str = NULL;
209 }
210 }
211 si->str = str;
212 sort_list_item_make_key(si);
213 }
214 }
215
216 /*
217 * De-allocate a sort list item object memory
218 */
219 void
sort_list_item_clean(struct sort_list_item * si)220 sort_list_item_clean(struct sort_list_item *si)
221 {
222
223 if (si) {
224 clean_keys_array(si->str, &(si->ka));
225 if (si->str) {
226 bwsfree(si->str);
227 si->str = NULL;
228 }
229 }
230 }
231
232 /*
233 * Skip columns according to specs
234 */
235 static size_t
skip_cols_to_start(const struct bwstring * s,size_t cols,size_t start,bool skip_blanks,bool * empty_key)236 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
237 bool skip_blanks, bool *empty_key)
238 {
239 if (cols < 1)
240 return (BWSLEN(s) + 1);
241
242 if (skip_blanks)
243 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
244 ++start;
245
246 while (start < BWSLEN(s) && cols > 1) {
247 --cols;
248 ++start;
249 }
250
251 if (start >= BWSLEN(s))
252 *empty_key = true;
253
254 return (start);
255 }
256
257 /*
258 * Skip fields according to specs
259 */
260 static size_t
skip_fields_to_start(const struct bwstring * s,size_t fields,bool * empty_field)261 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
262 {
263
264 if (fields < 2) {
265 if (BWSLEN(s) == 0)
266 *empty_field = true;
267 return (0);
268 } else if (!(sort_opts_vals.tflag)) {
269 size_t cpos = 0;
270 bool pb = true;
271
272 while (cpos < BWSLEN(s)) {
273 bool isblank;
274
275 isblank = iswblank(BWS_GET(s, cpos));
276
277 if (isblank && !pb) {
278 --fields;
279 if (fields <= 1)
280 return (cpos);
281 }
282 pb = isblank;
283 ++cpos;
284 }
285 if (fields > 1)
286 *empty_field = true;
287 return (cpos);
288 } else {
289 size_t cpos = 0;
290
291 while (cpos < BWSLEN(s)) {
292 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
293 --fields;
294 if (fields <= 1)
295 return (cpos + 1);
296 }
297 ++cpos;
298 }
299 if (fields > 1)
300 *empty_field = true;
301 return (cpos);
302 }
303 }
304
305 /*
306 * Find fields start
307 */
308 static void
find_field_start(const struct bwstring * s,struct key_specs * ks,size_t * field_start,size_t * key_start,bool * empty_field,bool * empty_key)309 find_field_start(const struct bwstring *s, struct key_specs *ks,
310 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
311 {
312
313 *field_start = skip_fields_to_start(s, ks->f1, empty_field);
314 if (!*empty_field)
315 *key_start = skip_cols_to_start(s, ks->c1, *field_start,
316 ks->pos1b, empty_key);
317 else
318 *empty_key = true;
319 }
320
321 /*
322 * Find end key position
323 */
324 static size_t
find_field_end(const struct bwstring * s,struct key_specs * ks)325 find_field_end(const struct bwstring *s, struct key_specs *ks)
326 {
327 size_t f2, next_field_start, pos_end;
328 bool empty_field, empty_key;
329
330 empty_field = false;
331 empty_key = false;
332 f2 = ks->f2;
333
334 if (f2 == 0)
335 return (BWSLEN(s) + 1);
336 else {
337 if (ks->c2 == 0) {
338 next_field_start = skip_fields_to_start(s, f2 + 1,
339 &empty_field);
340 if ((next_field_start > 0) && sort_opts_vals.tflag &&
341 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
342 next_field_start - 1)))
343 --next_field_start;
344 } else
345 next_field_start = skip_fields_to_start(s, f2,
346 &empty_field);
347 }
348
349 if (empty_field || (next_field_start >= BWSLEN(s)))
350 return (BWSLEN(s) + 1);
351
352 if (ks->c2) {
353 pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
354 ks->pos2b, &empty_key);
355 if (pos_end < BWSLEN(s))
356 ++pos_end;
357 } else
358 pos_end = next_field_start;
359
360 return (pos_end);
361 }
362
363 /*
364 * Cut a field according to the key specs
365 */
366 static struct bwstring *
cut_field(const struct bwstring * s,struct key_specs * ks)367 cut_field(const struct bwstring *s, struct key_specs *ks)
368 {
369 struct bwstring *ret = NULL;
370
371 if (s && ks) {
372 size_t field_start, key_end, key_start, sz;
373 bool empty_field, empty_key;
374
375 field_start = 0;
376 key_start = 0;
377 empty_field = false;
378 empty_key = false;
379
380 find_field_start(s, ks, &field_start, &key_start,
381 &empty_field, &empty_key);
382
383 if (empty_key)
384 sz = 0;
385 else {
386 key_end = find_field_end(s, ks);
387 sz = (key_end < key_start) ? 0 : (key_end - key_start);
388 }
389
390 ret = bwsalloc(sz);
391 if (sz)
392 bwsnocpy(ret, s, key_start, sz);
393 } else
394 ret = bwsalloc(0);
395
396 return (ret);
397 }
398
399 /*
400 * Preprocesses a line applying the necessary transformations
401 * specified by command line options and returns the preprocessed
402 * string, which can be used to compare.
403 */
404 int
preproc(struct bwstring * s,struct keys_array * ka)405 preproc(struct bwstring *s, struct keys_array *ka)
406 {
407
408 if (sort_opts_vals.kflag)
409 for (size_t i = 0; i < keys_num; i++) {
410 struct bwstring *key;
411 struct key_specs *kspecs;
412 struct sort_mods *sm;
413
414 kspecs = &(keys[i]);
415 key = cut_field(s, kspecs);
416
417 sm = &(kspecs->sm);
418 if (sm->dflag)
419 key = dictionary_order(key);
420 else if (sm->iflag)
421 key = ignore_nonprinting(key);
422 if (sm->fflag || sm->Mflag)
423 key = ignore_case(key);
424
425 set_key_on_keys_array(ka, key, i);
426 }
427 else {
428 struct bwstring *ret = NULL;
429 struct sort_mods *sm = default_sort_mods;
430
431 if (sm->bflag) {
432 if (ret == NULL)
433 ret = bwsdup(s);
434 ret = ignore_leading_blanks(ret);
435 }
436 if (sm->dflag) {
437 if (ret == NULL)
438 ret = bwsdup(s);
439 ret = dictionary_order(ret);
440 } else if (sm->iflag) {
441 if (ret == NULL)
442 ret = bwsdup(s);
443 ret = ignore_nonprinting(ret);
444 }
445 if (sm->fflag || sm->Mflag) {
446 if (ret == NULL)
447 ret = bwsdup(s);
448 ret = ignore_case(ret);
449 }
450 if (ret == NULL)
451 set_key_on_keys_array(ka, s, 0);
452 else
453 set_key_on_keys_array(ka, ret, 0);
454 }
455
456 return 0;
457 }
458
459 cmpcoll_t
get_sort_func(struct sort_mods * sm)460 get_sort_func(struct sort_mods *sm)
461 {
462
463 if (sm->nflag)
464 return (numcoll);
465 else if (sm->hflag)
466 return (hnumcoll);
467 else if (sm->gflag)
468 return (gnumcoll);
469 else if (sm->Mflag)
470 return (monthcoll);
471 else if (sm->Rflag)
472 return (randomcoll);
473 else if (sm->Vflag)
474 return (versioncoll);
475 else
476 return (wstrcoll);
477 }
478
479 /*
480 * Compares the given strings. Returns a positive number if
481 * the first precedes the second, a negative number if the second is
482 * the preceding one, and zero if they are equal. This function calls
483 * the underlying collate functions, which done the actual comparison.
484 */
485 int
key_coll(struct keys_array * ps1,struct keys_array * ps2,size_t offset)486 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
487 {
488 struct key_value *kv1, *kv2;
489 struct sort_mods *sm;
490 int res = 0;
491
492 for (size_t i = 0; i < keys_num; ++i) {
493 kv1 = get_key_from_keys_array(ps1, i);
494 kv2 = get_key_from_keys_array(ps2, i);
495 sm = &(keys[i].sm);
496
497 if (sm->rflag)
498 res = sm->func(kv2, kv1, offset);
499 else
500 res = sm->func(kv1, kv2, offset);
501
502 if (res)
503 break;
504
505 /* offset applies to only the first key */
506 offset = 0;
507 }
508 return (res);
509 }
510
511 /*
512 * Compare two strings.
513 * Plain symbol-by-symbol comparison.
514 */
515 int
top_level_str_coll(const struct bwstring * s1,const struct bwstring * s2)516 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
517 {
518
519 if (default_sort_mods->rflag) {
520 const struct bwstring *tmp;
521
522 tmp = s1;
523 s1 = s2;
524 s2 = tmp;
525 }
526
527 return (bwscoll(s1, s2, 0));
528 }
529
530 /*
531 * Compare a string and a sort list item, according to the sort specs.
532 */
533 int
str_list_coll(struct bwstring * str1,struct sort_list_item ** ss2)534 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
535 {
536 struct keys_array *ka1;
537 int ret = 0;
538
539 ka1 = keys_array_alloc();
540
541 preproc(str1, ka1);
542
543 sort_list_item_make_key(*ss2);
544
545 if (debug_sort) {
546 bwsprintf(stdout, str1, "; s1=<", ">");
547 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
548 }
549
550 ret = key_coll(ka1, &((*ss2)->ka), 0);
551
552 if (debug_sort)
553 printf("; cmp1=%d", ret);
554
555 clean_keys_array(str1, ka1);
556 sort_free(ka1);
557
558 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
559 ret = top_level_str_coll(str1, ((*ss2)->str));
560 if (debug_sort)
561 printf("; cmp2=%d", ret);
562 }
563
564 if (debug_sort)
565 printf("\n");
566
567 return (ret);
568 }
569
570 /*
571 * Compare two sort list items, according to the sort specs.
572 */
573 int
list_coll_offset(struct sort_list_item ** ss1,struct sort_list_item ** ss2,size_t offset)574 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
575 size_t offset)
576 {
577 int ret;
578
579 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
580
581 if (debug_sort) {
582 if (offset)
583 printf("; offset=%d", (int) offset);
584 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
585 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
586 printf("; cmp1=%d\n", ret);
587 }
588
589 if (ret)
590 return (ret);
591
592 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
593 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
594 if (debug_sort)
595 printf("; cmp2=%d\n", ret);
596 }
597
598 return (ret);
599 }
600
601 /*
602 * Compare two sort list items, according to the sort specs.
603 */
604 int
list_coll(struct sort_list_item ** ss1,struct sort_list_item ** ss2)605 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
606 {
607
608 return (list_coll_offset(ss1, ss2, 0));
609 }
610
611 #define LSCDEF(N) \
612 static int \
613 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
614 { \
615 \
616 return (list_coll_offset(ss1, ss2, N)); \
617 }
618
619 LSCDEF(1)
620 LSCDEF(2)
621 LSCDEF(3)
622 LSCDEF(4)
623 LSCDEF(5)
624 LSCDEF(6)
625 LSCDEF(7)
626 LSCDEF(8)
627 LSCDEF(9)
628 LSCDEF(10)
629 LSCDEF(11)
630 LSCDEF(12)
631 LSCDEF(13)
632 LSCDEF(14)
633 LSCDEF(15)
634 LSCDEF(16)
635 LSCDEF(17)
636 LSCDEF(18)
637 LSCDEF(19)
638 LSCDEF(20)
639
640 listcoll_t
get_list_call_func(size_t offset)641 get_list_call_func(size_t offset)
642 {
643 static const listcoll_t lsarray[] = { list_coll, list_coll_1,
644 list_coll_2, list_coll_3, list_coll_4, list_coll_5,
645 list_coll_6, list_coll_7, list_coll_8, list_coll_9,
646 list_coll_10, list_coll_11, list_coll_12, list_coll_13,
647 list_coll_14, list_coll_15, list_coll_16, list_coll_17,
648 list_coll_18, list_coll_19, list_coll_20 };
649
650 if (offset <= 20)
651 return (lsarray[offset]);
652
653 return (list_coll);
654 }
655
656 /*
657 * Compare two sort list items, only by their original string.
658 */
659 int
list_coll_by_str_only(struct sort_list_item ** ss1,struct sort_list_item ** ss2)660 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
661 {
662
663 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
664 }
665
666 /*
667 * Maximum size of a number in the string (before or after decimal point)
668 */
669 #define MAX_NUM_SIZE (128)
670
671 /*
672 * Set suffix value
673 */
setsuffix(wchar_t c,unsigned char * si)674 static void setsuffix(wchar_t c, unsigned char *si)
675 {
676 switch (c){
677 case L'k':
678 case L'K':
679 *si = 1;
680 break;
681 case L'M':
682 *si = 2;
683 break;
684 case L'G':
685 *si = 3;
686 break;
687 case L'T':
688 *si = 4;
689 break;
690 case L'P':
691 *si = 5;
692 break;
693 case L'E':
694 *si = 6;
695 break;
696 case L'Z':
697 *si = 7;
698 break;
699 case L'Y':
700 *si = 8;
701 break;
702 default:
703 *si = 0;
704 }
705 }
706
707 /*
708 * Read string s and parse the string into a fixed-decimal-point number.
709 * sign equals -1 if the number is negative (explicit plus is not allowed,
710 * according to GNU sort's "info sort".
711 * The number part before decimal point is in the smain, after the decimal
712 * point is in sfrac, tail is the pointer to the remainder of the string.
713 */
714 static int
read_number(struct bwstring * s0,int * sign,wchar_t * smain,size_t * main_len,wchar_t * sfrac,size_t * frac_len,unsigned char * si)715 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
716 {
717 bwstring_iterator s;
718
719 s = bws_begin(s0);
720
721 /* always end the fraction with zero, even if we have no fraction */
722 sfrac[0] = 0;
723
724 while (iswblank(bws_get_iter_value(s)))
725 s = bws_iterator_inc(s, 1);
726
727 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
728 *sign = -1;
729 s = bws_iterator_inc(s, 1);
730 }
731
732 // This is '0', not '\0', do not change this
733 while (iswdigit(bws_get_iter_value(s)) &&
734 (bws_get_iter_value(s) == L'0'))
735 s = bws_iterator_inc(s, 1);
736
737 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
738 if (iswdigit(bws_get_iter_value(s))) {
739 smain[*main_len] = bws_get_iter_value(s);
740 s = bws_iterator_inc(s, 1);
741 *main_len += 1;
742 } else if (symbol_thousands_sep &&
743 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
744 s = bws_iterator_inc(s, 1);
745 else
746 break;
747 }
748
749 smain[*main_len] = 0;
750
751 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
752 s = bws_iterator_inc(s, 1);
753 while (iswdigit(bws_get_iter_value(s)) &&
754 *frac_len < MAX_NUM_SIZE) {
755 sfrac[*frac_len] = bws_get_iter_value(s);
756 s = bws_iterator_inc(s, 1);
757 *frac_len += 1;
758 }
759 sfrac[*frac_len] = 0;
760
761 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
762 --(*frac_len);
763 sfrac[*frac_len] = L'\0';
764 }
765 }
766
767 setsuffix(bws_get_iter_value(s),si);
768
769 if ((*main_len + *frac_len) == 0)
770 *sign = 0;
771
772 return (0);
773 }
774
775 /*
776 * Implements string sort.
777 */
778 static int
wstrcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)779 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
780 {
781
782 if (debug_sort) {
783 if (offset)
784 printf("; offset=%d\n", (int) offset);
785 bwsprintf(stdout, kv1->k, "; k1=<", ">");
786 printf("(%zu)", BWSLEN(kv1->k));
787 bwsprintf(stdout, kv2->k, ", k2=<", ">");
788 printf("(%zu)", BWSLEN(kv2->k));
789 }
790
791 return (bwscoll(kv1->k, kv2->k, offset));
792 }
793
794 /*
795 * Compare two suffixes
796 */
797 static inline int
cmpsuffix(unsigned char si1,unsigned char si2)798 cmpsuffix(unsigned char si1, unsigned char si2)
799 {
800
801 return ((char)si1 - (char)si2);
802 }
803
804 /*
805 * Implements numeric sort for -n and -h.
806 */
807 static int
numcoll_impl(struct key_value * kv1,struct key_value * kv2,size_t offset __unused,bool use_suffix)808 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
809 size_t offset __unused, bool use_suffix)
810 {
811 struct bwstring *s1, *s2;
812 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
813 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
814 int cmp_res, sign1, sign2;
815 size_t frac1, frac2, main1, main2;
816 unsigned char SI1, SI2;
817 bool e1, e2, key1_read, key2_read;
818
819 s1 = kv1->k;
820 s2 = kv2->k;
821 sign1 = sign2 = 0;
822 main1 = main2 = 0;
823 frac1 = frac2 = 0;
824
825 key1_read = key2_read = false;
826
827 if (debug_sort) {
828 bwsprintf(stdout, s1, "; k1=<", ">");
829 bwsprintf(stdout, s2, ", k2=<", ">");
830 }
831
832 if (s1 == s2)
833 return (0);
834
835 if (kv1->hint->status == HS_UNINITIALIZED) {
836 /* read the number from the string */
837 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
838 key1_read = true;
839 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
840 if(main1 < 1 && frac1 < 1)
841 kv1->hint->v.nh.empty=true;
842 kv1->hint->v.nh.si = SI1;
843 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
844 HS_INITIALIZED : HS_ERROR;
845 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
846 }
847
848 if (kv2->hint->status == HS_UNINITIALIZED) {
849 /* read the number from the string */
850 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
851 key2_read = true;
852 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
853 if(main2 < 1 && frac2 < 1)
854 kv2->hint->v.nh.empty=true;
855 kv2->hint->v.nh.si = SI2;
856 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
857 HS_INITIALIZED : HS_ERROR;
858 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
859 }
860
861 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
862 HS_INITIALIZED) {
863 unsigned long long n1, n2;
864 bool neg1, neg2;
865
866 e1 = kv1->hint->v.nh.empty;
867 e2 = kv2->hint->v.nh.empty;
868
869 if (e1 && e2)
870 return (0);
871
872 neg1 = kv1->hint->v.nh.neg;
873 neg2 = kv2->hint->v.nh.neg;
874
875 if (neg1 && !neg2)
876 return (-1);
877 if (neg2 && !neg1)
878 return (+1);
879
880 if (e1)
881 return (neg2 ? +1 : -1);
882 else if (e2)
883 return (neg1 ? -1 : +1);
884
885
886 if (use_suffix) {
887 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
888 if (cmp_res)
889 return (neg1 ? -cmp_res : cmp_res);
890 }
891
892 n1 = kv1->hint->v.nh.n1;
893 n2 = kv2->hint->v.nh.n1;
894 if (n1 < n2)
895 return (neg1 ? +1 : -1);
896 else if (n1 > n2)
897 return (neg1 ? -1 : +1);
898 }
899
900 /* read the numbers from the strings */
901 if (!key1_read)
902 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
903 if (!key2_read)
904 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
905
906 e1 = ((main1 + frac1) == 0);
907 e2 = ((main2 + frac2) == 0);
908
909 if (e1 && e2)
910 return (0);
911
912 /* we know the result if the signs are different */
913 if (sign1 < 0 && sign2 >= 0)
914 return (-1);
915 if (sign1 >= 0 && sign2 < 0)
916 return (+1);
917
918 if (e1)
919 return ((sign2 < 0) ? +1 : -1);
920 else if (e2)
921 return ((sign1 < 0) ? -1 : +1);
922
923 if (use_suffix) {
924 cmp_res = cmpsuffix(SI1, SI2);
925 if (cmp_res)
926 return ((sign1 < 0) ? -cmp_res : cmp_res);
927 }
928
929 /* if both numbers are empty assume that the strings are equal */
930 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
931 return (0);
932
933 /*
934 * if the main part is of different size, we know the result
935 * (because the leading zeros are removed)
936 */
937 if (main1 < main2)
938 cmp_res = -1;
939 else if (main1 > main2)
940 cmp_res = +1;
941 /* if the sizes are equal then simple non-collate string compare gives the correct result */
942 else
943 cmp_res = wcscmp(smain1, smain2);
944
945 /* check fraction */
946 if (!cmp_res)
947 cmp_res = wcscmp(sfrac1, sfrac2);
948
949 if (!cmp_res)
950 return (0);
951
952 /* reverse result if the signs are negative */
953 if (sign1 < 0 && sign2 < 0)
954 cmp_res = -cmp_res;
955
956 return (cmp_res);
957 }
958
959 /*
960 * Implements numeric sort (-n).
961 */
962 static int
numcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)963 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
964 {
965
966 return (numcoll_impl(kv1, kv2, offset, false));
967 }
968
969 /*
970 * Implements 'human' numeric sort (-h).
971 */
972 static int
hnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)973 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
974 {
975
976 return (numcoll_impl(kv1, kv2, offset, true));
977 }
978
979 /* Use hint space to memoize md5 computations, at least. */
980 static void
randomcoll_init_hint(struct key_value * kv,void * hash)981 randomcoll_init_hint(struct key_value *kv, void *hash)
982 {
983
984 memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached));
985 kv->hint->status = HS_INITIALIZED;
986 }
987
988 /*
989 * Implements random sort (-R).
990 */
991 static int
randomcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)992 randomcoll(struct key_value *kv1, struct key_value *kv2,
993 size_t offset __unused)
994 {
995 struct bwstring *s1, *s2;
996 MD5_CTX ctx1, ctx2;
997 unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH];
998 int cmp;
999
1000 s1 = kv1->k;
1001 s2 = kv2->k;
1002
1003 if (debug_sort) {
1004 bwsprintf(stdout, s1, "; k1=<", ">");
1005 bwsprintf(stdout, s2, ", k2=<", ">");
1006 }
1007
1008 if (s1 == s2)
1009 return (0);
1010
1011 if (kv1->hint->status == HS_INITIALIZED &&
1012 kv2->hint->status == HS_INITIALIZED) {
1013 cmp = memcmp(kv1->hint->v.Rh.cached,
1014 kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached));
1015 if (cmp != 0)
1016 return (cmp);
1017 }
1018
1019 memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
1020 memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
1021
1022 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1023 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1024
1025 MD5Final(hash1, &ctx1);
1026 MD5Final(hash2, &ctx2);
1027
1028 if (kv1->hint->status == HS_UNINITIALIZED)
1029 randomcoll_init_hint(kv1, hash1);
1030 if (kv2->hint->status == HS_UNINITIALIZED)
1031 randomcoll_init_hint(kv2, hash2);
1032
1033 return (memcmp(hash1, hash2, sizeof(hash1)));
1034 }
1035
1036 /*
1037 * Implements version sort (-V).
1038 */
1039 static int
versioncoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1040 versioncoll(struct key_value *kv1, struct key_value *kv2,
1041 size_t offset __unused)
1042 {
1043 struct bwstring *s1, *s2;
1044
1045 s1 = kv1->k;
1046 s2 = kv2->k;
1047
1048 if (debug_sort) {
1049 bwsprintf(stdout, s1, "; k1=<", ">");
1050 bwsprintf(stdout, s2, ", k2=<", ">");
1051 }
1052
1053 if (s1 == s2)
1054 return (0);
1055
1056 return (vcmp(s1, s2));
1057 }
1058
1059 /*
1060 * Check for minus infinity
1061 */
1062 static inline bool
huge_minus(double d,int err1)1063 huge_minus(double d, int err1)
1064 {
1065
1066 if (err1 == ERANGE)
1067 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1068 return (+1);
1069
1070 return (0);
1071 }
1072
1073 /*
1074 * Check for plus infinity
1075 */
1076 static inline bool
huge_plus(double d,int err1)1077 huge_plus(double d, int err1)
1078 {
1079
1080 if (err1 == ERANGE)
1081 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1082 return (+1);
1083
1084 return (0);
1085 }
1086
1087 /*
1088 * Check whether a function is a NAN
1089 */
1090 static bool
is_nan(double d)1091 is_nan(double d)
1092 {
1093
1094 return ((d == NAN) || (isnan(d)));
1095 }
1096
1097 /*
1098 * Compare two NANs
1099 */
1100 static int
cmp_nans(double d1,double d2)1101 cmp_nans(double d1, double d2)
1102 {
1103
1104 if (d1 < d2)
1105 return (-1);
1106 if (d1 > d2)
1107 return (+1);
1108 return (0);
1109 }
1110
1111 /*
1112 * Implements general numeric sort (-g).
1113 */
1114 static int
gnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1115 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1116 size_t offset __unused)
1117 {
1118 double d1, d2;
1119 int err1, err2;
1120 bool empty1, empty2, key1_read, key2_read;
1121
1122 d1 = d2 = 0;
1123 err1 = err2 = 0;
1124 key1_read = key2_read = false;
1125
1126 if (debug_sort) {
1127 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1128 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1129 }
1130
1131 if (kv1->hint->status == HS_UNINITIALIZED) {
1132 errno = 0;
1133 d1 = bwstod(kv1->k, &empty1);
1134 err1 = errno;
1135
1136 if (empty1)
1137 kv1->hint->v.gh.notnum = true;
1138 else if (err1 == 0) {
1139 kv1->hint->v.gh.d = d1;
1140 kv1->hint->v.gh.nan = is_nan(d1);
1141 kv1->hint->status = HS_INITIALIZED;
1142 } else
1143 kv1->hint->status = HS_ERROR;
1144
1145 key1_read = true;
1146 }
1147
1148 if (kv2->hint->status == HS_UNINITIALIZED) {
1149 errno = 0;
1150 d2 = bwstod(kv2->k, &empty2);
1151 err2 = errno;
1152
1153 if (empty2)
1154 kv2->hint->v.gh.notnum = true;
1155 else if (err2 == 0) {
1156 kv2->hint->v.gh.d = d2;
1157 kv2->hint->v.gh.nan = is_nan(d2);
1158 kv2->hint->status = HS_INITIALIZED;
1159 } else
1160 kv2->hint->status = HS_ERROR;
1161
1162 key2_read = true;
1163 }
1164
1165 if (kv1->hint->status == HS_INITIALIZED &&
1166 kv2->hint->status == HS_INITIALIZED) {
1167 if (kv1->hint->v.gh.notnum)
1168 return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1169 else if (kv2->hint->v.gh.notnum)
1170 return (+1);
1171
1172 if (kv1->hint->v.gh.nan)
1173 return ((kv2->hint->v.gh.nan) ?
1174 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1175 -1);
1176 else if (kv2->hint->v.gh.nan)
1177 return (+1);
1178
1179 d1 = kv1->hint->v.gh.d;
1180 d2 = kv2->hint->v.gh.d;
1181
1182 if (d1 < d2)
1183 return (-1);
1184 else if (d1 > d2)
1185 return (+1);
1186 else
1187 return (0);
1188 }
1189
1190 if (!key1_read) {
1191 errno = 0;
1192 d1 = bwstod(kv1->k, &empty1);
1193 err1 = errno;
1194 }
1195
1196 if (!key2_read) {
1197 errno = 0;
1198 d2 = bwstod(kv2->k, &empty2);
1199 err2 = errno;
1200 }
1201
1202 /* Non-value case: */
1203 if (empty1)
1204 return (empty2 ? 0 : -1);
1205 else if (empty2)
1206 return (+1);
1207
1208 /* NAN case */
1209 if (is_nan(d1))
1210 return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1211 else if (is_nan(d2))
1212 return (+1);
1213
1214 /* Infinities */
1215 if (err1 == ERANGE || err2 == ERANGE) {
1216 /* Minus infinity case */
1217 if (huge_minus(d1, err1)) {
1218 if (huge_minus(d2, err2)) {
1219 if (d1 < d2)
1220 return (-1);
1221 if (d1 > d2)
1222 return (+1);
1223 return (0);
1224 } else
1225 return (-1);
1226
1227 } else if (huge_minus(d2, err2)) {
1228 if (huge_minus(d1, err1)) {
1229 if (d1 < d2)
1230 return (-1);
1231 if (d1 > d2)
1232 return (+1);
1233 return (0);
1234 } else
1235 return (+1);
1236 }
1237
1238 /* Plus infinity case */
1239 if (huge_plus(d1, err1)) {
1240 if (huge_plus(d2, err2)) {
1241 if (d1 < d2)
1242 return (-1);
1243 if (d1 > d2)
1244 return (+1);
1245 return (0);
1246 } else
1247 return (+1);
1248 } else if (huge_plus(d2, err2)) {
1249 if (huge_plus(d1, err1)) {
1250 if (d1 < d2)
1251 return (-1);
1252 if (d1 > d2)
1253 return (+1);
1254 return (0);
1255 } else
1256 return (-1);
1257 }
1258 }
1259
1260 if (d1 < d2)
1261 return (-1);
1262 if (d1 > d2)
1263 return (+1);
1264
1265 return (0);
1266 }
1267
1268 /*
1269 * Implements month sort (-M).
1270 */
1271 static int
monthcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1272 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1273 {
1274 int val1, val2;
1275 bool key1_read, key2_read;
1276
1277 val1 = val2 = 0;
1278 key1_read = key2_read = false;
1279
1280 if (debug_sort) {
1281 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1282 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1283 }
1284
1285 if (kv1->hint->status == HS_UNINITIALIZED) {
1286 kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1287 key1_read = true;
1288 kv1->hint->status = HS_INITIALIZED;
1289 }
1290
1291 if (kv2->hint->status == HS_UNINITIALIZED) {
1292 kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1293 key2_read = true;
1294 kv2->hint->status = HS_INITIALIZED;
1295 }
1296
1297 if (kv1->hint->status == HS_INITIALIZED) {
1298 val1 = kv1->hint->v.Mh.m;
1299 key1_read = true;
1300 }
1301
1302 if (kv2->hint->status == HS_INITIALIZED) {
1303 val2 = kv2->hint->v.Mh.m;
1304 key2_read = true;
1305 }
1306
1307 if (!key1_read)
1308 val1 = bws_month_score(kv1->k);
1309 if (!key2_read)
1310 val2 = bws_month_score(kv2->k);
1311
1312 if (val1 == val2) {
1313 return (0);
1314 }
1315 if (val1 < val2)
1316 return (-1);
1317 return (+1);
1318 }
1319