1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/types.h> 34 35 #include <errno.h> 36 #include <err.h> 37 #include <langinfo.h> 38 #include <limits.h> 39 #include <math.h> 40 #include <md5.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <wchar.h> 44 #include <wctype.h> 45 46 #include "coll.h" 47 #include "vsort.h" 48 49 struct key_specs *keys; 50 size_t keys_num = 0; 51 52 wint_t symbol_decimal_point = L'.'; 53 /* there is no default thousands separator in collate rules: */ 54 wint_t symbol_thousands_sep = 0; 55 wint_t symbol_negative_sign = L'-'; 56 wint_t symbol_positive_sign = L'+'; 57 58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 60 static int monthcoll(struct key_value*, struct key_value *, size_t offset); 61 static int numcoll(struct key_value*, struct key_value *, size_t offset); 62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 63 static int randomcoll(struct key_value*, struct key_value *, size_t offset); 64 static int versioncoll(struct key_value*, struct key_value *, size_t offset); 65 66 /* 67 * Allocate keys array 68 */ 69 struct keys_array * 70 keys_array_alloc(void) 71 { 72 struct keys_array *ka; 73 size_t sz; 74 75 sz = keys_array_size(); 76 ka = sort_malloc(sz); 77 memset(ka, 0, sz); 78 79 return (ka); 80 } 81 82 /* 83 * Calculate whether we need key hint space 84 */ 85 static size_t 86 key_hint_size(void) 87 { 88 89 return (need_hint ? sizeof(struct key_hint) : 0); 90 } 91 92 /* 93 * Calculate keys array size 94 */ 95 size_t 96 keys_array_size(void) 97 { 98 99 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 100 } 101 102 /* 103 * Clean data of keys array 104 */ 105 void 106 clean_keys_array(const struct bwstring *s, struct keys_array *ka) 107 { 108 109 if (ka) { 110 for (size_t i = 0; i < keys_num; ++i) { 111 const struct key_value *kv; 112 113 kv = get_key_from_keys_array(ka, i); 114 if (kv->k && kv->k != s) 115 bwsfree(kv->k); 116 } 117 memset(ka, 0, keys_array_size()); 118 } 119 } 120 121 /* 122 * Get pointer to a key value in the keys set 123 */ 124 struct key_value * 125 get_key_from_keys_array(struct keys_array *ka, size_t ind) 126 { 127 128 return ((struct key_value *)((caddr_t)ka->key + 129 ind * (sizeof(struct key_value) + key_hint_size()))); 130 } 131 132 /* 133 * Set value of a key in the keys set 134 */ 135 void 136 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 137 { 138 139 if (ka && keys_num > ind) { 140 struct key_value *kv; 141 142 kv = get_key_from_keys_array(ka, ind); 143 144 if (kv->k && kv->k != s) 145 bwsfree(kv->k); 146 kv->k = s; 147 } 148 } 149 150 /* 151 * Initialize a sort list item 152 */ 153 struct sort_list_item * 154 sort_list_item_alloc(void) 155 { 156 struct sort_list_item *si; 157 size_t sz; 158 159 sz = sizeof(struct sort_list_item) + keys_array_size(); 160 si = sort_malloc(sz); 161 memset(si, 0, sz); 162 163 return (si); 164 } 165 166 size_t 167 sort_list_item_size(struct sort_list_item *si) 168 { 169 size_t ret = 0; 170 171 if (si) { 172 ret = sizeof(struct sort_list_item) + keys_array_size(); 173 if (si->str) 174 ret += bws_memsize(si->str); 175 for (size_t i = 0; i < keys_num; ++i) { 176 const struct key_value *kv; 177 178 kv = get_key_from_keys_array(&si->ka, i); 179 180 if (kv->k != si->str) 181 ret += bws_memsize(kv->k); 182 } 183 } 184 return (ret); 185 } 186 187 /* 188 * Calculate key for a sort list item 189 */ 190 static void 191 sort_list_item_make_key(struct sort_list_item *si) 192 { 193 194 preproc(si->str, &(si->ka)); 195 } 196 197 /* 198 * Set value of a sort list item. 199 * Return combined string and keys memory size. 200 */ 201 void 202 sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 203 { 204 205 if (si) { 206 clean_keys_array(si->str, &(si->ka)); 207 if (si->str) { 208 if (si->str == str) { 209 /* we are trying to reset the same string */ 210 return; 211 } else { 212 bwsfree(si->str); 213 si->str = NULL; 214 } 215 } 216 si->str = str; 217 sort_list_item_make_key(si); 218 } 219 } 220 221 /* 222 * De-allocate a sort list item object memory 223 */ 224 void 225 sort_list_item_clean(struct sort_list_item *si) 226 { 227 228 if (si) { 229 clean_keys_array(si->str, &(si->ka)); 230 if (si->str) { 231 bwsfree(si->str); 232 si->str = NULL; 233 } 234 } 235 } 236 237 /* 238 * Skip columns according to specs 239 */ 240 static size_t 241 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 242 bool skip_blanks, bool *empty_key) 243 { 244 if (cols < 1) 245 return (BWSLEN(s) + 1); 246 247 if (skip_blanks) 248 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 249 ++start; 250 251 while (start < BWSLEN(s) && cols > 1) { 252 --cols; 253 ++start; 254 } 255 256 if (start >= BWSLEN(s)) 257 *empty_key = true; 258 259 return (start); 260 } 261 262 /* 263 * Skip fields according to specs 264 */ 265 static size_t 266 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 267 { 268 269 if (fields < 2) { 270 if (BWSLEN(s) == 0) 271 *empty_field = true; 272 return (0); 273 } else if (!(sort_opts_vals.tflag)) { 274 size_t cpos = 0; 275 bool pb = true; 276 277 while (cpos < BWSLEN(s)) { 278 bool isblank; 279 280 isblank = iswblank(BWS_GET(s, cpos)); 281 282 if (isblank && !pb) { 283 --fields; 284 if (fields <= 1) 285 return (cpos); 286 } 287 pb = isblank; 288 ++cpos; 289 } 290 if (fields > 1) 291 *empty_field = true; 292 return (cpos); 293 } else { 294 size_t cpos = 0; 295 296 while (cpos < BWSLEN(s)) { 297 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { 298 --fields; 299 if (fields <= 1) 300 return (cpos + 1); 301 } 302 ++cpos; 303 } 304 if (fields > 1) 305 *empty_field = true; 306 return (cpos); 307 } 308 } 309 310 /* 311 * Find fields start 312 */ 313 static void 314 find_field_start(const struct bwstring *s, struct key_specs *ks, 315 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 316 { 317 318 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 319 if (!*empty_field) 320 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 321 ks->pos1b, empty_key); 322 else 323 *empty_key = true; 324 } 325 326 /* 327 * Find end key position 328 */ 329 static size_t 330 find_field_end(const struct bwstring *s, struct key_specs *ks) 331 { 332 size_t f2, next_field_start, pos_end; 333 bool empty_field, empty_key; 334 335 empty_field = false; 336 empty_key = false; 337 f2 = ks->f2; 338 339 if (f2 == 0) 340 return (BWSLEN(s) + 1); 341 else { 342 if (ks->c2 == 0) { 343 next_field_start = skip_fields_to_start(s, f2 + 1, 344 &empty_field); 345 if ((next_field_start > 0) && sort_opts_vals.tflag && 346 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 347 next_field_start - 1))) 348 --next_field_start; 349 } else 350 next_field_start = skip_fields_to_start(s, f2, 351 &empty_field); 352 } 353 354 if (empty_field || (next_field_start >= BWSLEN(s))) 355 return (BWSLEN(s) + 1); 356 357 if (ks->c2) { 358 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 359 ks->pos2b, &empty_key); 360 if (pos_end < BWSLEN(s)) 361 ++pos_end; 362 } else 363 pos_end = next_field_start; 364 365 return (pos_end); 366 } 367 368 /* 369 * Cut a field according to the key specs 370 */ 371 static struct bwstring * 372 cut_field(const struct bwstring *s, struct key_specs *ks) 373 { 374 struct bwstring *ret = NULL; 375 376 if (s && ks) { 377 size_t field_start, key_end, key_start, sz; 378 bool empty_field, empty_key; 379 380 field_start = 0; 381 key_start = 0; 382 empty_field = false; 383 empty_key = false; 384 385 find_field_start(s, ks, &field_start, &key_start, 386 &empty_field, &empty_key); 387 388 if (empty_key) 389 sz = 0; 390 else { 391 key_end = find_field_end(s, ks); 392 sz = (key_end < key_start) ? 0 : (key_end - key_start); 393 } 394 395 ret = bwsalloc(sz); 396 if (sz) 397 bwsnocpy(ret, s, key_start, sz); 398 } else 399 ret = bwsalloc(0); 400 401 return (ret); 402 } 403 404 /* 405 * Preprocesses a line applying the necessary transformations 406 * specified by command line options and returns the preprocessed 407 * string, which can be used to compare. 408 */ 409 int 410 preproc(struct bwstring *s, struct keys_array *ka) 411 { 412 413 if (sort_opts_vals.kflag) 414 for (size_t i = 0; i < keys_num; i++) { 415 struct bwstring *key; 416 struct key_specs *kspecs; 417 struct sort_mods *sm; 418 419 kspecs = &(keys[i]); 420 key = cut_field(s, kspecs); 421 422 sm = &(kspecs->sm); 423 if (sm->dflag) 424 key = dictionary_order(key); 425 else if (sm->iflag) 426 key = ignore_nonprinting(key); 427 if (sm->fflag || sm->Mflag) 428 key = ignore_case(key); 429 430 set_key_on_keys_array(ka, key, i); 431 } 432 else { 433 struct bwstring *ret = NULL; 434 struct sort_mods *sm = default_sort_mods; 435 436 if (sm->bflag) { 437 if (ret == NULL) 438 ret = bwsdup(s); 439 ret = ignore_leading_blanks(ret); 440 } 441 if (sm->dflag) { 442 if (ret == NULL) 443 ret = bwsdup(s); 444 ret = dictionary_order(ret); 445 } else if (sm->iflag) { 446 if (ret == NULL) 447 ret = bwsdup(s); 448 ret = ignore_nonprinting(ret); 449 } 450 if (sm->fflag || sm->Mflag) { 451 if (ret == NULL) 452 ret = bwsdup(s); 453 ret = ignore_case(ret); 454 } 455 if (ret == NULL) 456 set_key_on_keys_array(ka, s, 0); 457 else 458 set_key_on_keys_array(ka, ret, 0); 459 } 460 461 return 0; 462 } 463 464 cmpcoll_t 465 get_sort_func(struct sort_mods *sm) 466 { 467 468 if (sm->nflag) 469 return (numcoll); 470 else if (sm->hflag) 471 return (hnumcoll); 472 else if (sm->gflag) 473 return (gnumcoll); 474 else if (sm->Mflag) 475 return (monthcoll); 476 else if (sm->Rflag) 477 return (randomcoll); 478 else if (sm->Vflag) 479 return (versioncoll); 480 else 481 return (wstrcoll); 482 } 483 484 /* 485 * Compares the given strings. Returns a positive number if 486 * the first precedes the second, a negative number if the second is 487 * the preceding one, and zero if they are equal. This function calls 488 * the underlying collate functions, which done the actual comparison. 489 */ 490 int 491 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 492 { 493 struct key_value *kv1, *kv2; 494 struct sort_mods *sm; 495 int res = 0; 496 497 for (size_t i = 0; i < keys_num; ++i) { 498 kv1 = get_key_from_keys_array(ps1, i); 499 kv2 = get_key_from_keys_array(ps2, i); 500 sm = &(keys[i].sm); 501 502 if (sm->rflag) 503 res = sm->func(kv2, kv1, offset); 504 else 505 res = sm->func(kv1, kv2, offset); 506 507 if (res) 508 break; 509 510 /* offset applies to only the first key */ 511 offset = 0; 512 } 513 return (res); 514 } 515 516 /* 517 * Compare two strings. 518 * Plain symbol-by-symbol comparison. 519 */ 520 int 521 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 522 { 523 524 if (default_sort_mods->rflag) { 525 const struct bwstring *tmp; 526 527 tmp = s1; 528 s1 = s2; 529 s2 = tmp; 530 } 531 532 return (bwscoll(s1, s2, 0)); 533 } 534 535 /* 536 * Compare a string and a sort list item, according to the sort specs. 537 */ 538 int 539 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 540 { 541 struct keys_array *ka1; 542 int ret = 0; 543 544 ka1 = keys_array_alloc(); 545 546 preproc(str1, ka1); 547 548 sort_list_item_make_key(*ss2); 549 550 if (debug_sort) { 551 bwsprintf(stdout, str1, "; s1=<", ">"); 552 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 553 } 554 555 ret = key_coll(ka1, &((*ss2)->ka), 0); 556 557 if (debug_sort) 558 printf("; cmp1=%d", ret); 559 560 clean_keys_array(str1, ka1); 561 sort_free(ka1); 562 563 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 564 ret = top_level_str_coll(str1, ((*ss2)->str)); 565 if (debug_sort) 566 printf("; cmp2=%d", ret); 567 } 568 569 if (debug_sort) 570 printf("\n"); 571 572 return (ret); 573 } 574 575 /* 576 * Compare two sort list items, according to the sort specs. 577 */ 578 int 579 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 580 size_t offset) 581 { 582 int ret; 583 584 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 585 586 if (debug_sort) { 587 if (offset) 588 printf("; offset=%d", (int) offset); 589 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 590 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 591 printf("; cmp1=%d\n", ret); 592 } 593 594 if (ret) 595 return (ret); 596 597 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 598 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 599 if (debug_sort) 600 printf("; cmp2=%d\n", ret); 601 } 602 603 return (ret); 604 } 605 606 /* 607 * Compare two sort list items, according to the sort specs. 608 */ 609 int 610 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 611 { 612 613 return (list_coll_offset(ss1, ss2, 0)); 614 } 615 616 #define LSCDEF(N) \ 617 static int \ 618 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 619 { \ 620 \ 621 return (list_coll_offset(ss1, ss2, N)); \ 622 } 623 624 LSCDEF(1) 625 LSCDEF(2) 626 LSCDEF(3) 627 LSCDEF(4) 628 LSCDEF(5) 629 LSCDEF(6) 630 LSCDEF(7) 631 LSCDEF(8) 632 LSCDEF(9) 633 LSCDEF(10) 634 LSCDEF(11) 635 LSCDEF(12) 636 LSCDEF(13) 637 LSCDEF(14) 638 LSCDEF(15) 639 LSCDEF(16) 640 LSCDEF(17) 641 LSCDEF(18) 642 LSCDEF(19) 643 LSCDEF(20) 644 645 listcoll_t 646 get_list_call_func(size_t offset) 647 { 648 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 649 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 650 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 651 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 652 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 653 list_coll_18, list_coll_19, list_coll_20 }; 654 655 if (offset <= 20) 656 return (lsarray[offset]); 657 658 return (list_coll); 659 } 660 661 /* 662 * Compare two sort list items, only by their original string. 663 */ 664 int 665 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 666 { 667 668 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 669 } 670 671 /* 672 * Maximum size of a number in the string (before or after decimal point) 673 */ 674 #define MAX_NUM_SIZE (128) 675 676 /* 677 * Set suffix value 678 */ 679 static void setsuffix(wchar_t c, unsigned char *si) 680 { 681 switch (c){ 682 case L'k': 683 case L'K': 684 *si = 1; 685 break; 686 case L'M': 687 *si = 2; 688 break; 689 case L'G': 690 *si = 3; 691 break; 692 case L'T': 693 *si = 4; 694 break; 695 case L'P': 696 *si = 5; 697 break; 698 case L'E': 699 *si = 6; 700 break; 701 case L'Z': 702 *si = 7; 703 break; 704 case L'Y': 705 *si = 8; 706 break; 707 default: 708 *si = 0; 709 } 710 } 711 712 /* 713 * Read string s and parse the string into a fixed-decimal-point number. 714 * sign equals -1 if the number is negative (explicit plus is not allowed, 715 * according to GNU sort's "info sort". 716 * The number part before decimal point is in the smain, after the decimal 717 * point is in sfrac, tail is the pointer to the remainder of the string. 718 */ 719 static int 720 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 721 { 722 bwstring_iterator s; 723 724 s = bws_begin(s0); 725 726 /* always end the fraction with zero, even if we have no fraction */ 727 sfrac[0] = 0; 728 729 while (iswblank(bws_get_iter_value(s))) 730 s = bws_iterator_inc(s, 1); 731 732 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { 733 *sign = -1; 734 s = bws_iterator_inc(s, 1); 735 } 736 737 // This is '0', not '\0', do not change this 738 while (iswdigit(bws_get_iter_value(s)) && 739 (bws_get_iter_value(s) == L'0')) 740 s = bws_iterator_inc(s, 1); 741 742 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 743 if (iswdigit(bws_get_iter_value(s))) { 744 smain[*main_len] = bws_get_iter_value(s); 745 s = bws_iterator_inc(s, 1); 746 *main_len += 1; 747 } else if (symbol_thousands_sep && 748 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) 749 s = bws_iterator_inc(s, 1); 750 else 751 break; 752 } 753 754 smain[*main_len] = 0; 755 756 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { 757 s = bws_iterator_inc(s, 1); 758 while (iswdigit(bws_get_iter_value(s)) && 759 *frac_len < MAX_NUM_SIZE) { 760 sfrac[*frac_len] = bws_get_iter_value(s); 761 s = bws_iterator_inc(s, 1); 762 *frac_len += 1; 763 } 764 sfrac[*frac_len] = 0; 765 766 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 767 --(*frac_len); 768 sfrac[*frac_len] = L'\0'; 769 } 770 } 771 772 setsuffix(bws_get_iter_value(s),si); 773 774 if ((*main_len + *frac_len) == 0) 775 *sign = 0; 776 777 return (0); 778 } 779 780 /* 781 * Implements string sort. 782 */ 783 static int 784 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 785 { 786 787 if (debug_sort) { 788 if (offset) 789 printf("; offset=%d\n", (int) offset); 790 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 791 printf("(%zu)", BWSLEN(kv1->k)); 792 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 793 printf("(%zu)", BWSLEN(kv2->k)); 794 } 795 796 return (bwscoll(kv1->k, kv2->k, offset)); 797 } 798 799 /* 800 * Compare two suffixes 801 */ 802 static inline int 803 cmpsuffix(unsigned char si1, unsigned char si2) 804 { 805 806 return ((char)si1 - (char)si2); 807 } 808 809 /* 810 * Implements numeric sort for -n and -h. 811 */ 812 static int 813 numcoll_impl(struct key_value *kv1, struct key_value *kv2, 814 size_t offset __unused, bool use_suffix) 815 { 816 struct bwstring *s1, *s2; 817 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 818 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 819 int cmp_res, sign1, sign2; 820 size_t frac1, frac2, main1, main2; 821 unsigned char SI1, SI2; 822 bool e1, e2, key1_read, key2_read; 823 824 s1 = kv1->k; 825 s2 = kv2->k; 826 sign1 = sign2 = 0; 827 main1 = main2 = 0; 828 frac1 = frac2 = 0; 829 830 key1_read = key2_read = false; 831 832 if (debug_sort) { 833 bwsprintf(stdout, s1, "; k1=<", ">"); 834 bwsprintf(stdout, s2, ", k2=<", ">"); 835 } 836 837 if (s1 == s2) 838 return (0); 839 840 if (kv1->hint->status == HS_UNINITIALIZED) { 841 /* read the number from the string */ 842 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 843 key1_read = true; 844 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 845 if(main1 < 1 && frac1 < 1) 846 kv1->hint->v.nh.empty=true; 847 kv1->hint->v.nh.si = SI1; 848 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 849 HS_INITIALIZED : HS_ERROR; 850 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 851 } 852 853 if (kv2->hint->status == HS_UNINITIALIZED) { 854 /* read the number from the string */ 855 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 856 key2_read = true; 857 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 858 if(main2 < 1 && frac2 < 1) 859 kv2->hint->v.nh.empty=true; 860 kv2->hint->v.nh.si = SI2; 861 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 862 HS_INITIALIZED : HS_ERROR; 863 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 864 } 865 866 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 867 HS_INITIALIZED) { 868 unsigned long long n1, n2; 869 bool neg1, neg2; 870 871 e1 = kv1->hint->v.nh.empty; 872 e2 = kv2->hint->v.nh.empty; 873 874 if (e1 && e2) 875 return (0); 876 877 neg1 = kv1->hint->v.nh.neg; 878 neg2 = kv2->hint->v.nh.neg; 879 880 if (neg1 && !neg2) 881 return (-1); 882 if (neg2 && !neg1) 883 return (+1); 884 885 if (e1) 886 return (neg2 ? +1 : -1); 887 else if (e2) 888 return (neg1 ? -1 : +1); 889 890 891 if (use_suffix) { 892 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 893 if (cmp_res) 894 return (neg1 ? -cmp_res : cmp_res); 895 } 896 897 n1 = kv1->hint->v.nh.n1; 898 n2 = kv2->hint->v.nh.n1; 899 if (n1 < n2) 900 return (neg1 ? +1 : -1); 901 else if (n1 > n2) 902 return (neg1 ? -1 : +1); 903 } 904 905 /* read the numbers from the strings */ 906 if (!key1_read) 907 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 908 if (!key2_read) 909 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 910 911 e1 = ((main1 + frac1) == 0); 912 e2 = ((main2 + frac2) == 0); 913 914 if (e1 && e2) 915 return (0); 916 917 /* we know the result if the signs are different */ 918 if (sign1 < 0 && sign2 >= 0) 919 return (-1); 920 if (sign1 >= 0 && sign2 < 0) 921 return (+1); 922 923 if (e1) 924 return ((sign2 < 0) ? +1 : -1); 925 else if (e2) 926 return ((sign1 < 0) ? -1 : +1); 927 928 if (use_suffix) { 929 cmp_res = cmpsuffix(SI1, SI2); 930 if (cmp_res) 931 return ((sign1 < 0) ? -cmp_res : cmp_res); 932 } 933 934 /* if both numbers are empty assume that the strings are equal */ 935 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 936 return (0); 937 938 /* 939 * if the main part is of different size, we know the result 940 * (because the leading zeros are removed) 941 */ 942 if (main1 < main2) 943 cmp_res = -1; 944 else if (main1 > main2) 945 cmp_res = +1; 946 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 947 else 948 cmp_res = wcscmp(smain1, smain2); 949 950 /* check fraction */ 951 if (!cmp_res) 952 cmp_res = wcscmp(sfrac1, sfrac2); 953 954 if (!cmp_res) 955 return (0); 956 957 /* reverse result if the signs are negative */ 958 if (sign1 < 0 && sign2 < 0) 959 cmp_res = -cmp_res; 960 961 return (cmp_res); 962 } 963 964 /* 965 * Implements numeric sort (-n). 966 */ 967 static int 968 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 969 { 970 971 return (numcoll_impl(kv1, kv2, offset, false)); 972 } 973 974 /* 975 * Implements 'human' numeric sort (-h). 976 */ 977 static int 978 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 979 { 980 981 return (numcoll_impl(kv1, kv2, offset, true)); 982 } 983 984 /* 985 * Implements random sort (-R). 986 */ 987 static int 988 randomcoll(struct key_value *kv1, struct key_value *kv2, 989 size_t offset __unused) 990 { 991 struct bwstring *s1, *s2; 992 MD5_CTX ctx1, ctx2; 993 char *b1, *b2; 994 995 s1 = kv1->k; 996 s2 = kv2->k; 997 998 if (debug_sort) { 999 bwsprintf(stdout, s1, "; k1=<", ">"); 1000 bwsprintf(stdout, s2, ", k2=<", ">"); 1001 } 1002 1003 if (s1 == s2) 1004 return (0); 1005 1006 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX)); 1007 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX)); 1008 1009 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 1010 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 1011 b1 = MD5End(&ctx1, NULL); 1012 b2 = MD5End(&ctx2, NULL); 1013 if (b1 == NULL) { 1014 if (b2 == NULL) 1015 return (0); 1016 else { 1017 sort_free(b2); 1018 return (-1); 1019 } 1020 } else if (b2 == NULL) { 1021 sort_free(b1); 1022 return (+1); 1023 } else { 1024 int cmp_res; 1025 1026 cmp_res = strcmp(b1,b2); 1027 sort_free(b1); 1028 sort_free(b2); 1029 1030 if (!cmp_res) 1031 cmp_res = bwscoll(s1, s2, 0); 1032 1033 return (cmp_res); 1034 } 1035 } 1036 1037 /* 1038 * Implements version sort (-V). 1039 */ 1040 static int 1041 versioncoll(struct key_value *kv1, struct key_value *kv2, 1042 size_t offset __unused) 1043 { 1044 struct bwstring *s1, *s2; 1045 1046 s1 = kv1->k; 1047 s2 = kv2->k; 1048 1049 if (debug_sort) { 1050 bwsprintf(stdout, s1, "; k1=<", ">"); 1051 bwsprintf(stdout, s2, ", k2=<", ">"); 1052 } 1053 1054 if (s1 == s2) 1055 return (0); 1056 1057 return (vcmp(s1, s2)); 1058 } 1059 1060 /* 1061 * Check for minus infinity 1062 */ 1063 static inline bool 1064 huge_minus(double d, int err1) 1065 { 1066 1067 if (err1 == ERANGE) 1068 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1069 return (+1); 1070 1071 return (0); 1072 } 1073 1074 /* 1075 * Check for plus infinity 1076 */ 1077 static inline bool 1078 huge_plus(double d, int err1) 1079 { 1080 1081 if (err1 == ERANGE) 1082 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1083 return (+1); 1084 1085 return (0); 1086 } 1087 1088 /* 1089 * Check whether a function is a NAN 1090 */ 1091 static bool 1092 is_nan(double d) 1093 { 1094 1095 return ((d == NAN) || (isnan(d))); 1096 } 1097 1098 /* 1099 * Compare two NANs 1100 */ 1101 static int 1102 cmp_nans(double d1, double d2) 1103 { 1104 1105 if (d1 < d2) 1106 return (-1); 1107 if (d1 > d2) 1108 return (+1); 1109 return (0); 1110 } 1111 1112 /* 1113 * Implements general numeric sort (-g). 1114 */ 1115 static int 1116 gnumcoll(struct key_value *kv1, struct key_value *kv2, 1117 size_t offset __unused) 1118 { 1119 double d1, d2; 1120 int err1, err2; 1121 bool empty1, empty2, key1_read, key2_read; 1122 1123 d1 = d2 = 0; 1124 err1 = err2 = 0; 1125 key1_read = key2_read = false; 1126 1127 if (debug_sort) { 1128 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1129 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1130 } 1131 1132 if (kv1->hint->status == HS_UNINITIALIZED) { 1133 errno = 0; 1134 d1 = bwstod(kv1->k, &empty1); 1135 err1 = errno; 1136 1137 if (empty1) 1138 kv1->hint->v.gh.notnum = true; 1139 else if (err1 == 0) { 1140 kv1->hint->v.gh.d = d1; 1141 kv1->hint->v.gh.nan = is_nan(d1); 1142 kv1->hint->status = HS_INITIALIZED; 1143 } else 1144 kv1->hint->status = HS_ERROR; 1145 1146 key1_read = true; 1147 } 1148 1149 if (kv2->hint->status == HS_UNINITIALIZED) { 1150 errno = 0; 1151 d2 = bwstod(kv2->k, &empty2); 1152 err2 = errno; 1153 1154 if (empty2) 1155 kv2->hint->v.gh.notnum = true; 1156 else if (err2 == 0) { 1157 kv2->hint->v.gh.d = d2; 1158 kv2->hint->v.gh.nan = is_nan(d2); 1159 kv2->hint->status = HS_INITIALIZED; 1160 } else 1161 kv2->hint->status = HS_ERROR; 1162 1163 key2_read = true; 1164 } 1165 1166 if (kv1->hint->status == HS_INITIALIZED && 1167 kv2->hint->status == HS_INITIALIZED) { 1168 if (kv1->hint->v.gh.notnum) 1169 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1170 else if (kv2->hint->v.gh.notnum) 1171 return (+1); 1172 1173 if (kv1->hint->v.gh.nan) 1174 return ((kv2->hint->v.gh.nan) ? 1175 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1176 -1); 1177 else if (kv2->hint->v.gh.nan) 1178 return (+1); 1179 1180 d1 = kv1->hint->v.gh.d; 1181 d2 = kv2->hint->v.gh.d; 1182 1183 if (d1 < d2) 1184 return (-1); 1185 else if (d1 > d2) 1186 return (+1); 1187 else 1188 return (0); 1189 } 1190 1191 if (!key1_read) { 1192 errno = 0; 1193 d1 = bwstod(kv1->k, &empty1); 1194 err1 = errno; 1195 } 1196 1197 if (!key2_read) { 1198 errno = 0; 1199 d2 = bwstod(kv2->k, &empty2); 1200 err2 = errno; 1201 } 1202 1203 /* Non-value case: */ 1204 if (empty1) 1205 return (empty2 ? 0 : -1); 1206 else if (empty2) 1207 return (+1); 1208 1209 /* NAN case */ 1210 if (is_nan(d1)) 1211 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1212 else if (is_nan(d2)) 1213 return (+1); 1214 1215 /* Infinities */ 1216 if (err1 == ERANGE || err2 == ERANGE) { 1217 /* Minus infinity case */ 1218 if (huge_minus(d1, err1)) { 1219 if (huge_minus(d2, err2)) { 1220 if (d1 < d2) 1221 return (-1); 1222 if (d1 > d2) 1223 return (+1); 1224 return (0); 1225 } else 1226 return (-1); 1227 1228 } else if (huge_minus(d2, err2)) { 1229 if (huge_minus(d1, err1)) { 1230 if (d1 < d2) 1231 return (-1); 1232 if (d1 > d2) 1233 return (+1); 1234 return (0); 1235 } else 1236 return (+1); 1237 } 1238 1239 /* Plus infinity case */ 1240 if (huge_plus(d1, err1)) { 1241 if (huge_plus(d2, err2)) { 1242 if (d1 < d2) 1243 return (-1); 1244 if (d1 > d2) 1245 return (+1); 1246 return (0); 1247 } else 1248 return (+1); 1249 } else if (huge_plus(d2, err2)) { 1250 if (huge_plus(d1, err1)) { 1251 if (d1 < d2) 1252 return (-1); 1253 if (d1 > d2) 1254 return (+1); 1255 return (0); 1256 } else 1257 return (-1); 1258 } 1259 } 1260 1261 if (d1 < d2) 1262 return (-1); 1263 if (d1 > d2) 1264 return (+1); 1265 1266 return (0); 1267 } 1268 1269 /* 1270 * Implements month sort (-M). 1271 */ 1272 static int 1273 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1274 { 1275 int val1, val2; 1276 bool key1_read, key2_read; 1277 1278 val1 = val2 = 0; 1279 key1_read = key2_read = false; 1280 1281 if (debug_sort) { 1282 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1283 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1284 } 1285 1286 if (kv1->hint->status == HS_UNINITIALIZED) { 1287 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1288 key1_read = true; 1289 kv1->hint->status = HS_INITIALIZED; 1290 } 1291 1292 if (kv2->hint->status == HS_UNINITIALIZED) { 1293 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1294 key2_read = true; 1295 kv2->hint->status = HS_INITIALIZED; 1296 } 1297 1298 if (kv1->hint->status == HS_INITIALIZED) { 1299 val1 = kv1->hint->v.Mh.m; 1300 key1_read = true; 1301 } 1302 1303 if (kv2->hint->status == HS_INITIALIZED) { 1304 val2 = kv2->hint->v.Mh.m; 1305 key2_read = true; 1306 } 1307 1308 if (!key1_read) 1309 val1 = bws_month_score(kv1->k); 1310 if (!key2_read) 1311 val2 = bws_month_score(kv2->k); 1312 1313 if (val1 == val2) { 1314 return (0); 1315 } 1316 if (val1 < val2) 1317 return (-1); 1318 return (+1); 1319 } 1320