1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/types.h> 31 32 #include <errno.h> 33 #include <err.h> 34 #include <langinfo.h> 35 #include <limits.h> 36 #include <math.h> 37 #include <md5.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <wchar.h> 41 #include <wctype.h> 42 43 #include "coll.h" 44 #include "vsort.h" 45 46 struct key_specs *keys; 47 size_t keys_num = 0; 48 49 wint_t symbol_decimal_point = L'.'; 50 /* there is no default thousands separator in collate rules: */ 51 wint_t symbol_thousands_sep = 0; 52 wint_t symbol_negative_sign = L'-'; 53 wint_t symbol_positive_sign = L'+'; 54 55 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 56 static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 57 static int monthcoll(struct key_value*, struct key_value *, size_t offset); 58 static int numcoll(struct key_value*, struct key_value *, size_t offset); 59 static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 60 static int randomcoll(struct key_value*, struct key_value *, size_t offset); 61 static int versioncoll(struct key_value*, struct key_value *, size_t offset); 62 63 /* 64 * Allocate keys array 65 */ 66 struct keys_array * 67 keys_array_alloc(void) 68 { 69 struct keys_array *ka; 70 size_t sz; 71 72 sz = keys_array_size(); 73 ka = sort_calloc(1, sz); 74 75 return (ka); 76 } 77 78 /* 79 * Calculate whether we need key hint space 80 */ 81 static size_t 82 key_hint_size(void) 83 { 84 85 return (need_hint ? sizeof(struct key_hint) : 0); 86 } 87 88 /* 89 * Calculate keys array size 90 */ 91 size_t 92 keys_array_size(void) 93 { 94 95 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 96 } 97 98 /* 99 * Clean data of keys array 100 */ 101 void 102 clean_keys_array(const struct bwstring *s, struct keys_array *ka) 103 { 104 105 if (ka) { 106 for (size_t i = 0; i < keys_num; ++i) { 107 const struct key_value *kv; 108 109 kv = get_key_from_keys_array(ka, i); 110 if (kv->k && kv->k != s) 111 bwsfree(kv->k); 112 } 113 memset(ka, 0, keys_array_size()); 114 } 115 } 116 117 /* 118 * Get pointer to a key value in the keys set 119 */ 120 struct key_value * 121 get_key_from_keys_array(struct keys_array *ka, size_t ind) 122 { 123 124 return ((struct key_value *)((caddr_t)ka->key + 125 ind * (sizeof(struct key_value) + key_hint_size()))); 126 } 127 128 /* 129 * Set value of a key in the keys set 130 */ 131 void 132 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 133 { 134 135 if (ka && keys_num > ind) { 136 struct key_value *kv; 137 138 kv = get_key_from_keys_array(ka, ind); 139 140 if (kv->k && kv->k != s) 141 bwsfree(kv->k); 142 kv->k = s; 143 } 144 } 145 146 /* 147 * Initialize a sort list item 148 */ 149 struct sort_list_item * 150 sort_list_item_alloc(void) 151 { 152 struct sort_list_item *si; 153 size_t sz; 154 155 sz = sizeof(struct sort_list_item) + keys_array_size(); 156 si = sort_calloc(1, sz); 157 158 return (si); 159 } 160 161 size_t 162 sort_list_item_size(struct sort_list_item *si) 163 { 164 size_t ret = 0; 165 166 if (si) { 167 ret = sizeof(struct sort_list_item) + keys_array_size(); 168 if (si->str) 169 ret += bws_memsize(si->str); 170 for (size_t i = 0; i < keys_num; ++i) { 171 const struct key_value *kv; 172 173 kv = get_key_from_keys_array(&si->ka, i); 174 175 if (kv->k != si->str) 176 ret += bws_memsize(kv->k); 177 } 178 } 179 return (ret); 180 } 181 182 /* 183 * Calculate key for a sort list item 184 */ 185 static void 186 sort_list_item_make_key(struct sort_list_item *si) 187 { 188 189 preproc(si->str, &(si->ka)); 190 } 191 192 /* 193 * Set value of a sort list item. 194 * Return combined string and keys memory size. 195 */ 196 void 197 sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 198 { 199 200 if (si) { 201 clean_keys_array(si->str, &(si->ka)); 202 if (si->str) { 203 if (si->str == str) { 204 /* we are trying to reset the same string */ 205 return; 206 } else { 207 bwsfree(si->str); 208 si->str = NULL; 209 } 210 } 211 si->str = str; 212 sort_list_item_make_key(si); 213 } 214 } 215 216 /* 217 * De-allocate a sort list item object memory 218 */ 219 void 220 sort_list_item_clean(struct sort_list_item *si) 221 { 222 223 if (si) { 224 clean_keys_array(si->str, &(si->ka)); 225 if (si->str) { 226 bwsfree(si->str); 227 si->str = NULL; 228 } 229 } 230 } 231 232 /* 233 * Skip columns according to specs 234 */ 235 static size_t 236 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 237 bool skip_blanks, bool *empty_key) 238 { 239 if (cols < 1) 240 return (BWSLEN(s) + 1); 241 242 if (skip_blanks) 243 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 244 ++start; 245 246 while (start < BWSLEN(s) && cols > 1) { 247 --cols; 248 ++start; 249 } 250 251 if (start >= BWSLEN(s)) 252 *empty_key = true; 253 254 return (start); 255 } 256 257 /* 258 * Skip fields according to specs 259 */ 260 static size_t 261 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 262 { 263 264 if (fields < 2) { 265 if (BWSLEN(s) == 0) 266 *empty_field = true; 267 return (0); 268 } else if (!(sort_opts_vals.tflag)) { 269 size_t cpos = 0; 270 bool pb = true; 271 272 while (cpos < BWSLEN(s)) { 273 bool isblank; 274 275 isblank = iswblank(BWS_GET(s, cpos)); 276 277 if (isblank && !pb) { 278 --fields; 279 if (fields <= 1) 280 return (cpos); 281 } 282 pb = isblank; 283 ++cpos; 284 } 285 if (fields > 1) 286 *empty_field = true; 287 return (cpos); 288 } else { 289 size_t cpos = 0; 290 291 while (cpos < BWSLEN(s)) { 292 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { 293 --fields; 294 if (fields <= 1) 295 return (cpos + 1); 296 } 297 ++cpos; 298 } 299 if (fields > 1) 300 *empty_field = true; 301 return (cpos); 302 } 303 } 304 305 /* 306 * Find fields start 307 */ 308 static void 309 find_field_start(const struct bwstring *s, struct key_specs *ks, 310 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 311 { 312 313 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 314 if (!*empty_field) 315 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 316 ks->pos1b, empty_key); 317 else 318 *empty_key = true; 319 } 320 321 /* 322 * Find end key position 323 */ 324 static size_t 325 find_field_end(const struct bwstring *s, struct key_specs *ks) 326 { 327 size_t f2, next_field_start, pos_end; 328 bool empty_field, empty_key; 329 330 empty_field = false; 331 empty_key = false; 332 f2 = ks->f2; 333 334 if (f2 == 0) 335 return (BWSLEN(s) + 1); 336 else { 337 if (ks->c2 == 0) { 338 next_field_start = skip_fields_to_start(s, f2 + 1, 339 &empty_field); 340 if ((next_field_start > 0) && sort_opts_vals.tflag && 341 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 342 next_field_start - 1))) 343 --next_field_start; 344 } else 345 next_field_start = skip_fields_to_start(s, f2, 346 &empty_field); 347 } 348 349 if (empty_field || (next_field_start >= BWSLEN(s))) 350 return (BWSLEN(s) + 1); 351 352 if (ks->c2) { 353 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 354 ks->pos2b, &empty_key); 355 if (pos_end < BWSLEN(s)) 356 ++pos_end; 357 } else 358 pos_end = next_field_start; 359 360 return (pos_end); 361 } 362 363 /* 364 * Cut a field according to the key specs 365 */ 366 static struct bwstring * 367 cut_field(const struct bwstring *s, struct key_specs *ks) 368 { 369 struct bwstring *ret = NULL; 370 371 if (s && ks) { 372 size_t field_start, key_end, key_start, sz; 373 bool empty_field, empty_key; 374 375 field_start = 0; 376 key_start = 0; 377 empty_field = false; 378 empty_key = false; 379 380 find_field_start(s, ks, &field_start, &key_start, 381 &empty_field, &empty_key); 382 383 if (empty_key) 384 sz = 0; 385 else { 386 key_end = find_field_end(s, ks); 387 sz = (key_end < key_start) ? 0 : (key_end - key_start); 388 } 389 390 ret = bwsalloc(sz); 391 if (sz) 392 bwsnocpy(ret, s, key_start, sz); 393 } else 394 ret = bwsalloc(0); 395 396 return (ret); 397 } 398 399 /* 400 * Preprocesses a line applying the necessary transformations 401 * specified by command line options and returns the preprocessed 402 * string, which can be used to compare. 403 */ 404 int 405 preproc(struct bwstring *s, struct keys_array *ka) 406 { 407 408 if (sort_opts_vals.kflag) 409 for (size_t i = 0; i < keys_num; i++) { 410 struct bwstring *key; 411 struct key_specs *kspecs; 412 struct sort_mods *sm; 413 414 kspecs = &(keys[i]); 415 key = cut_field(s, kspecs); 416 417 sm = &(kspecs->sm); 418 if (sm->dflag) 419 key = dictionary_order(key); 420 else if (sm->iflag) 421 key = ignore_nonprinting(key); 422 if (sm->fflag || sm->Mflag) 423 key = ignore_case(key); 424 425 set_key_on_keys_array(ka, key, i); 426 } 427 else { 428 struct bwstring *ret = NULL; 429 struct sort_mods *sm = default_sort_mods; 430 431 if (sm->bflag) { 432 if (ret == NULL) 433 ret = bwsdup(s); 434 ret = ignore_leading_blanks(ret); 435 } 436 if (sm->dflag) { 437 if (ret == NULL) 438 ret = bwsdup(s); 439 ret = dictionary_order(ret); 440 } else if (sm->iflag) { 441 if (ret == NULL) 442 ret = bwsdup(s); 443 ret = ignore_nonprinting(ret); 444 } 445 if (sm->fflag || sm->Mflag) { 446 if (ret == NULL) 447 ret = bwsdup(s); 448 ret = ignore_case(ret); 449 } 450 if (ret == NULL) 451 set_key_on_keys_array(ka, s, 0); 452 else 453 set_key_on_keys_array(ka, ret, 0); 454 } 455 456 return 0; 457 } 458 459 cmpcoll_t 460 get_sort_func(struct sort_mods *sm) 461 { 462 463 if (sm->nflag) 464 return (numcoll); 465 else if (sm->hflag) 466 return (hnumcoll); 467 else if (sm->gflag) 468 return (gnumcoll); 469 else if (sm->Mflag) 470 return (monthcoll); 471 else if (sm->Rflag) 472 return (randomcoll); 473 else if (sm->Vflag) 474 return (versioncoll); 475 else 476 return (wstrcoll); 477 } 478 479 /* 480 * Compares the given strings. Returns a positive number if 481 * the first precedes the second, a negative number if the second is 482 * the preceding one, and zero if they are equal. This function calls 483 * the underlying collate functions, which done the actual comparison. 484 */ 485 int 486 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 487 { 488 struct key_value *kv1, *kv2; 489 struct sort_mods *sm; 490 int res = 0; 491 492 for (size_t i = 0; i < keys_num; ++i) { 493 kv1 = get_key_from_keys_array(ps1, i); 494 kv2 = get_key_from_keys_array(ps2, i); 495 sm = &(keys[i].sm); 496 497 if (sm->rflag) 498 res = sm->func(kv2, kv1, offset); 499 else 500 res = sm->func(kv1, kv2, offset); 501 502 if (res) 503 break; 504 505 /* offset applies to only the first key */ 506 offset = 0; 507 } 508 return (res); 509 } 510 511 /* 512 * Compare two strings. 513 * Plain symbol-by-symbol comparison. 514 */ 515 int 516 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 517 { 518 519 if (default_sort_mods->rflag) { 520 const struct bwstring *tmp; 521 522 tmp = s1; 523 s1 = s2; 524 s2 = tmp; 525 } 526 527 return (bwscoll(s1, s2, 0)); 528 } 529 530 /* 531 * Compare a string and a sort list item, according to the sort specs. 532 */ 533 int 534 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 535 { 536 struct keys_array *ka1; 537 int ret = 0; 538 539 ka1 = keys_array_alloc(); 540 541 preproc(str1, ka1); 542 543 sort_list_item_make_key(*ss2); 544 545 if (debug_sort) { 546 bwsprintf(stdout, str1, "; s1=<", ">"); 547 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 548 } 549 550 ret = key_coll(ka1, &((*ss2)->ka), 0); 551 552 if (debug_sort) 553 printf("; cmp1=%d", ret); 554 555 clean_keys_array(str1, ka1); 556 sort_free(ka1); 557 558 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 559 ret = top_level_str_coll(str1, ((*ss2)->str)); 560 if (debug_sort) 561 printf("; cmp2=%d", ret); 562 } 563 564 if (debug_sort) 565 printf("\n"); 566 567 return (ret); 568 } 569 570 /* 571 * Compare two sort list items, according to the sort specs. 572 */ 573 int 574 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 575 size_t offset) 576 { 577 int ret; 578 579 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 580 581 if (debug_sort) { 582 if (offset) 583 printf("; offset=%d", (int) offset); 584 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 585 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 586 printf("; cmp1=%d\n", ret); 587 } 588 589 if (ret) 590 return (ret); 591 592 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 593 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 594 if (debug_sort) 595 printf("; cmp2=%d\n", ret); 596 } 597 598 return (ret); 599 } 600 601 /* 602 * Compare two sort list items, according to the sort specs. 603 */ 604 int 605 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 606 { 607 608 return (list_coll_offset(ss1, ss2, 0)); 609 } 610 611 #define LSCDEF(N) \ 612 static int \ 613 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 614 { \ 615 \ 616 return (list_coll_offset(ss1, ss2, N)); \ 617 } 618 619 LSCDEF(1) 620 LSCDEF(2) 621 LSCDEF(3) 622 LSCDEF(4) 623 LSCDEF(5) 624 LSCDEF(6) 625 LSCDEF(7) 626 LSCDEF(8) 627 LSCDEF(9) 628 LSCDEF(10) 629 LSCDEF(11) 630 LSCDEF(12) 631 LSCDEF(13) 632 LSCDEF(14) 633 LSCDEF(15) 634 LSCDEF(16) 635 LSCDEF(17) 636 LSCDEF(18) 637 LSCDEF(19) 638 LSCDEF(20) 639 640 listcoll_t 641 get_list_call_func(size_t offset) 642 { 643 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 644 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 645 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 646 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 647 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 648 list_coll_18, list_coll_19, list_coll_20 }; 649 650 if (offset <= 20) 651 return (lsarray[offset]); 652 653 return (list_coll); 654 } 655 656 /* 657 * Compare two sort list items, only by their original string. 658 */ 659 int 660 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 661 { 662 663 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 664 } 665 666 /* 667 * Maximum size of a number in the string (before or after decimal point) 668 */ 669 #define MAX_NUM_SIZE (128) 670 671 /* 672 * Set suffix value 673 */ 674 static void setsuffix(wchar_t c, unsigned char *si) 675 { 676 switch (c){ 677 case L'k': 678 case L'K': 679 *si = 1; 680 break; 681 case L'M': 682 *si = 2; 683 break; 684 case L'G': 685 *si = 3; 686 break; 687 case L'T': 688 *si = 4; 689 break; 690 case L'P': 691 *si = 5; 692 break; 693 case L'E': 694 *si = 6; 695 break; 696 case L'Z': 697 *si = 7; 698 break; 699 case L'Y': 700 *si = 8; 701 break; 702 default: 703 *si = 0; 704 } 705 } 706 707 /* 708 * Read string s and parse the string into a fixed-decimal-point number. 709 * sign equals -1 if the number is negative (explicit plus is not allowed, 710 * according to GNU sort's "info sort". 711 * The number part before decimal point is in the smain, after the decimal 712 * point is in sfrac, tail is the pointer to the remainder of the string. 713 */ 714 static int 715 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 716 { 717 bwstring_iterator s; 718 719 s = bws_begin(s0); 720 721 /* always end the fraction with zero, even if we have no fraction */ 722 sfrac[0] = 0; 723 724 while (iswblank(bws_get_iter_value(s))) 725 s = bws_iterator_inc(s, 1); 726 727 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { 728 *sign = -1; 729 s = bws_iterator_inc(s, 1); 730 } 731 732 // This is '0', not '\0', do not change this 733 while (iswdigit(bws_get_iter_value(s)) && 734 (bws_get_iter_value(s) == L'0')) 735 s = bws_iterator_inc(s, 1); 736 737 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 738 if (iswdigit(bws_get_iter_value(s))) { 739 smain[*main_len] = bws_get_iter_value(s); 740 s = bws_iterator_inc(s, 1); 741 *main_len += 1; 742 } else if (symbol_thousands_sep && 743 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) 744 s = bws_iterator_inc(s, 1); 745 else 746 break; 747 } 748 749 smain[*main_len] = 0; 750 751 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { 752 s = bws_iterator_inc(s, 1); 753 while (iswdigit(bws_get_iter_value(s)) && 754 *frac_len < MAX_NUM_SIZE) { 755 sfrac[*frac_len] = bws_get_iter_value(s); 756 s = bws_iterator_inc(s, 1); 757 *frac_len += 1; 758 } 759 sfrac[*frac_len] = 0; 760 761 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 762 --(*frac_len); 763 sfrac[*frac_len] = L'\0'; 764 } 765 } 766 767 setsuffix(bws_get_iter_value(s),si); 768 769 if ((*main_len + *frac_len) == 0) 770 *sign = 0; 771 772 return (0); 773 } 774 775 /* 776 * Implements string sort. 777 */ 778 static int 779 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 780 { 781 782 if (debug_sort) { 783 if (offset) 784 printf("; offset=%d\n", (int) offset); 785 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 786 printf("(%zu)", BWSLEN(kv1->k)); 787 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 788 printf("(%zu)", BWSLEN(kv2->k)); 789 } 790 791 return (bwscoll(kv1->k, kv2->k, offset)); 792 } 793 794 /* 795 * Compare two suffixes 796 */ 797 static inline int 798 cmpsuffix(unsigned char si1, unsigned char si2) 799 { 800 801 return ((char)si1 - (char)si2); 802 } 803 804 /* 805 * Implements numeric sort for -n and -h. 806 */ 807 static int 808 numcoll_impl(struct key_value *kv1, struct key_value *kv2, 809 size_t offset __unused, bool use_suffix) 810 { 811 struct bwstring *s1, *s2; 812 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 813 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 814 int cmp_res, sign1, sign2; 815 size_t frac1, frac2, main1, main2; 816 unsigned char SI1, SI2; 817 bool e1, e2, key1_read, key2_read; 818 819 s1 = kv1->k; 820 s2 = kv2->k; 821 sign1 = sign2 = 0; 822 main1 = main2 = 0; 823 frac1 = frac2 = 0; 824 825 key1_read = key2_read = false; 826 827 if (debug_sort) { 828 bwsprintf(stdout, s1, "; k1=<", ">"); 829 bwsprintf(stdout, s2, ", k2=<", ">"); 830 } 831 832 if (s1 == s2) 833 return (0); 834 835 if (kv1->hint->status == HS_UNINITIALIZED) { 836 /* read the number from the string */ 837 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 838 key1_read = true; 839 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 840 if(main1 < 1 && frac1 < 1) 841 kv1->hint->v.nh.empty=true; 842 kv1->hint->v.nh.si = SI1; 843 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 844 HS_INITIALIZED : HS_ERROR; 845 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 846 } 847 848 if (kv2->hint->status == HS_UNINITIALIZED) { 849 /* read the number from the string */ 850 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 851 key2_read = true; 852 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 853 if(main2 < 1 && frac2 < 1) 854 kv2->hint->v.nh.empty=true; 855 kv2->hint->v.nh.si = SI2; 856 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 857 HS_INITIALIZED : HS_ERROR; 858 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 859 } 860 861 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 862 HS_INITIALIZED) { 863 unsigned long long n1, n2; 864 bool neg1, neg2; 865 866 e1 = kv1->hint->v.nh.empty; 867 e2 = kv2->hint->v.nh.empty; 868 869 if (e1 && e2) 870 return (0); 871 872 neg1 = kv1->hint->v.nh.neg; 873 neg2 = kv2->hint->v.nh.neg; 874 875 if (neg1 && !neg2) 876 return (-1); 877 if (neg2 && !neg1) 878 return (+1); 879 880 if (e1) 881 return (neg2 ? +1 : -1); 882 else if (e2) 883 return (neg1 ? -1 : +1); 884 885 886 if (use_suffix) { 887 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 888 if (cmp_res) 889 return (neg1 ? -cmp_res : cmp_res); 890 } 891 892 n1 = kv1->hint->v.nh.n1; 893 n2 = kv2->hint->v.nh.n1; 894 if (n1 < n2) 895 return (neg1 ? +1 : -1); 896 else if (n1 > n2) 897 return (neg1 ? -1 : +1); 898 } 899 900 /* read the numbers from the strings */ 901 if (!key1_read) 902 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 903 if (!key2_read) 904 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 905 906 e1 = ((main1 + frac1) == 0); 907 e2 = ((main2 + frac2) == 0); 908 909 if (e1 && e2) 910 return (0); 911 912 /* we know the result if the signs are different */ 913 if (sign1 < 0 && sign2 >= 0) 914 return (-1); 915 if (sign1 >= 0 && sign2 < 0) 916 return (+1); 917 918 if (e1) 919 return ((sign2 < 0) ? +1 : -1); 920 else if (e2) 921 return ((sign1 < 0) ? -1 : +1); 922 923 if (use_suffix) { 924 cmp_res = cmpsuffix(SI1, SI2); 925 if (cmp_res) 926 return ((sign1 < 0) ? -cmp_res : cmp_res); 927 } 928 929 /* if both numbers are empty assume that the strings are equal */ 930 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 931 return (0); 932 933 /* 934 * if the main part is of different size, we know the result 935 * (because the leading zeros are removed) 936 */ 937 if (main1 < main2) 938 cmp_res = -1; 939 else if (main1 > main2) 940 cmp_res = +1; 941 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 942 else 943 cmp_res = wcscmp(smain1, smain2); 944 945 /* check fraction */ 946 if (!cmp_res) 947 cmp_res = wcscmp(sfrac1, sfrac2); 948 949 if (!cmp_res) 950 return (0); 951 952 /* reverse result if the signs are negative */ 953 if (sign1 < 0 && sign2 < 0) 954 cmp_res = -cmp_res; 955 956 return (cmp_res); 957 } 958 959 /* 960 * Implements numeric sort (-n). 961 */ 962 static int 963 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 964 { 965 966 return (numcoll_impl(kv1, kv2, offset, false)); 967 } 968 969 /* 970 * Implements 'human' numeric sort (-h). 971 */ 972 static int 973 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 974 { 975 976 return (numcoll_impl(kv1, kv2, offset, true)); 977 } 978 979 /* Use hint space to memoize md5 computations, at least. */ 980 static void 981 randomcoll_init_hint(struct key_value *kv, void *hash) 982 { 983 984 memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached)); 985 kv->hint->status = HS_INITIALIZED; 986 } 987 988 /* 989 * Implements random sort (-R). 990 */ 991 static int 992 randomcoll(struct key_value *kv1, struct key_value *kv2, 993 size_t offset __unused) 994 { 995 struct bwstring *s1, *s2; 996 MD5_CTX ctx1, ctx2; 997 unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH]; 998 int cmp; 999 1000 s1 = kv1->k; 1001 s2 = kv2->k; 1002 1003 if (debug_sort) { 1004 bwsprintf(stdout, s1, "; k1=<", ">"); 1005 bwsprintf(stdout, s2, ", k2=<", ">"); 1006 } 1007 1008 if (s1 == s2) 1009 return (0); 1010 1011 if (kv1->hint->status == HS_INITIALIZED && 1012 kv2->hint->status == HS_INITIALIZED) { 1013 cmp = memcmp(kv1->hint->v.Rh.cached, 1014 kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached)); 1015 if (cmp != 0) 1016 return (cmp); 1017 } 1018 1019 memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX)); 1020 memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX)); 1021 1022 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 1023 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 1024 1025 MD5Final(hash1, &ctx1); 1026 MD5Final(hash2, &ctx2); 1027 1028 if (kv1->hint->status == HS_UNINITIALIZED) 1029 randomcoll_init_hint(kv1, hash1); 1030 if (kv2->hint->status == HS_UNINITIALIZED) 1031 randomcoll_init_hint(kv2, hash2); 1032 1033 return (memcmp(hash1, hash2, sizeof(hash1))); 1034 } 1035 1036 /* 1037 * Implements version sort (-V). 1038 */ 1039 static int 1040 versioncoll(struct key_value *kv1, struct key_value *kv2, 1041 size_t offset __unused) 1042 { 1043 struct bwstring *s1, *s2; 1044 1045 s1 = kv1->k; 1046 s2 = kv2->k; 1047 1048 if (debug_sort) { 1049 bwsprintf(stdout, s1, "; k1=<", ">"); 1050 bwsprintf(stdout, s2, ", k2=<", ">"); 1051 } 1052 1053 if (s1 == s2) 1054 return (0); 1055 1056 return (vcmp(s1, s2)); 1057 } 1058 1059 /* 1060 * Check for minus infinity 1061 */ 1062 static inline bool 1063 huge_minus(double d, int err1) 1064 { 1065 1066 if (err1 == ERANGE) 1067 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1068 return (+1); 1069 1070 return (0); 1071 } 1072 1073 /* 1074 * Check for plus infinity 1075 */ 1076 static inline bool 1077 huge_plus(double d, int err1) 1078 { 1079 1080 if (err1 == ERANGE) 1081 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1082 return (+1); 1083 1084 return (0); 1085 } 1086 1087 /* 1088 * Check whether a function is a NAN 1089 */ 1090 static bool 1091 is_nan(double d) 1092 { 1093 1094 return ((d == NAN) || (isnan(d))); 1095 } 1096 1097 /* 1098 * Compare two NANs 1099 */ 1100 static int 1101 cmp_nans(double d1, double d2) 1102 { 1103 1104 if (d1 < d2) 1105 return (-1); 1106 if (d1 > d2) 1107 return (+1); 1108 return (0); 1109 } 1110 1111 /* 1112 * Implements general numeric sort (-g). 1113 */ 1114 static int 1115 gnumcoll(struct key_value *kv1, struct key_value *kv2, 1116 size_t offset __unused) 1117 { 1118 double d1, d2; 1119 int err1, err2; 1120 bool empty1, empty2, key1_read, key2_read; 1121 1122 d1 = d2 = 0; 1123 err1 = err2 = 0; 1124 key1_read = key2_read = false; 1125 1126 if (debug_sort) { 1127 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1128 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1129 } 1130 1131 if (kv1->hint->status == HS_UNINITIALIZED) { 1132 errno = 0; 1133 d1 = bwstod(kv1->k, &empty1); 1134 err1 = errno; 1135 1136 if (empty1) 1137 kv1->hint->v.gh.notnum = true; 1138 else if (err1 == 0) { 1139 kv1->hint->v.gh.d = d1; 1140 kv1->hint->v.gh.nan = is_nan(d1); 1141 kv1->hint->status = HS_INITIALIZED; 1142 } else 1143 kv1->hint->status = HS_ERROR; 1144 1145 key1_read = true; 1146 } 1147 1148 if (kv2->hint->status == HS_UNINITIALIZED) { 1149 errno = 0; 1150 d2 = bwstod(kv2->k, &empty2); 1151 err2 = errno; 1152 1153 if (empty2) 1154 kv2->hint->v.gh.notnum = true; 1155 else if (err2 == 0) { 1156 kv2->hint->v.gh.d = d2; 1157 kv2->hint->v.gh.nan = is_nan(d2); 1158 kv2->hint->status = HS_INITIALIZED; 1159 } else 1160 kv2->hint->status = HS_ERROR; 1161 1162 key2_read = true; 1163 } 1164 1165 if (kv1->hint->status == HS_INITIALIZED && 1166 kv2->hint->status == HS_INITIALIZED) { 1167 if (kv1->hint->v.gh.notnum) 1168 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1169 else if (kv2->hint->v.gh.notnum) 1170 return (+1); 1171 1172 if (kv1->hint->v.gh.nan) 1173 return ((kv2->hint->v.gh.nan) ? 1174 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1175 -1); 1176 else if (kv2->hint->v.gh.nan) 1177 return (+1); 1178 1179 d1 = kv1->hint->v.gh.d; 1180 d2 = kv2->hint->v.gh.d; 1181 1182 if (d1 < d2) 1183 return (-1); 1184 else if (d1 > d2) 1185 return (+1); 1186 else 1187 return (0); 1188 } 1189 1190 if (!key1_read) { 1191 errno = 0; 1192 d1 = bwstod(kv1->k, &empty1); 1193 err1 = errno; 1194 } 1195 1196 if (!key2_read) { 1197 errno = 0; 1198 d2 = bwstod(kv2->k, &empty2); 1199 err2 = errno; 1200 } 1201 1202 /* Non-value case: */ 1203 if (empty1) 1204 return (empty2 ? 0 : -1); 1205 else if (empty2) 1206 return (+1); 1207 1208 /* NAN case */ 1209 if (is_nan(d1)) 1210 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1211 else if (is_nan(d2)) 1212 return (+1); 1213 1214 /* Infinities */ 1215 if (err1 == ERANGE || err2 == ERANGE) { 1216 /* Minus infinity case */ 1217 if (huge_minus(d1, err1)) { 1218 if (huge_minus(d2, err2)) { 1219 if (d1 < d2) 1220 return (-1); 1221 if (d1 > d2) 1222 return (+1); 1223 return (0); 1224 } else 1225 return (-1); 1226 1227 } else if (huge_minus(d2, err2)) { 1228 if (huge_minus(d1, err1)) { 1229 if (d1 < d2) 1230 return (-1); 1231 if (d1 > d2) 1232 return (+1); 1233 return (0); 1234 } else 1235 return (+1); 1236 } 1237 1238 /* Plus infinity case */ 1239 if (huge_plus(d1, err1)) { 1240 if (huge_plus(d2, err2)) { 1241 if (d1 < d2) 1242 return (-1); 1243 if (d1 > d2) 1244 return (+1); 1245 return (0); 1246 } else 1247 return (+1); 1248 } else if (huge_plus(d2, err2)) { 1249 if (huge_plus(d1, err1)) { 1250 if (d1 < d2) 1251 return (-1); 1252 if (d1 > d2) 1253 return (+1); 1254 return (0); 1255 } else 1256 return (-1); 1257 } 1258 } 1259 1260 if (d1 < d2) 1261 return (-1); 1262 if (d1 > d2) 1263 return (+1); 1264 1265 return (0); 1266 } 1267 1268 /* 1269 * Implements month sort (-M). 1270 */ 1271 static int 1272 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1273 { 1274 int val1, val2; 1275 bool key1_read, key2_read; 1276 1277 val1 = val2 = 0; 1278 key1_read = key2_read = false; 1279 1280 if (debug_sort) { 1281 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1282 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1283 } 1284 1285 if (kv1->hint->status == HS_UNINITIALIZED) { 1286 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1287 key1_read = true; 1288 kv1->hint->status = HS_INITIALIZED; 1289 } 1290 1291 if (kv2->hint->status == HS_UNINITIALIZED) { 1292 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1293 key2_read = true; 1294 kv2->hint->status = HS_INITIALIZED; 1295 } 1296 1297 if (kv1->hint->status == HS_INITIALIZED) { 1298 val1 = kv1->hint->v.Mh.m; 1299 key1_read = true; 1300 } 1301 1302 if (kv2->hint->status == HS_INITIALIZED) { 1303 val2 = kv2->hint->v.Mh.m; 1304 key2_read = true; 1305 } 1306 1307 if (!key1_read) 1308 val1 = bws_month_score(kv1->k); 1309 if (!key2_read) 1310 val2 = bws_month_score(kv2->k); 1311 1312 if (val1 == val2) { 1313 return (0); 1314 } 1315 if (val1 < val2) 1316 return (-1); 1317 return (+1); 1318 } 1319