1 /*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <oleg.moskalenko@citrix.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/types.h> 32 33 #include <errno.h> 34 #include <err.h> 35 #include <langinfo.h> 36 #include <limits.h> 37 #include <math.h> 38 #include <md5.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <wchar.h> 42 #include <wctype.h> 43 44 #include "coll.h" 45 #include "vsort.h" 46 47 struct key_specs *keys; 48 size_t keys_num = 0; 49 50 wchar_t symbol_decimal_point = L'.'; 51 /* there is no default thousands separator in collate rules: */ 52 wchar_t symbol_thousands_sep = 0; 53 wchar_t symbol_negative_sign = L'-'; 54 wchar_t symbol_positive_sign = L'+'; 55 56 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 57 static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 58 static int monthcoll(struct key_value*, struct key_value *, size_t offset); 59 static int numcoll(struct key_value*, struct key_value *, size_t offset); 60 static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 61 static int randomcoll(struct key_value*, struct key_value *, size_t offset); 62 static int versioncoll(struct key_value*, struct key_value *, size_t offset); 63 64 /* 65 * Allocate keys array 66 */ 67 struct keys_array * 68 keys_array_alloc(void) 69 { 70 struct keys_array *ka; 71 size_t sz; 72 73 sz = keys_array_size(); 74 ka = sort_malloc(sz); 75 memset(ka, 0, sz); 76 77 return (ka); 78 } 79 80 /* 81 * Calculate whether we need key hint space 82 */ 83 static size_t 84 key_hint_size(void) 85 { 86 87 return (need_hint ? sizeof(struct key_hint) : 0); 88 } 89 90 /* 91 * Calculate keys array size 92 */ 93 size_t 94 keys_array_size(void) 95 { 96 97 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 98 } 99 100 /* 101 * Clean data of keys array 102 */ 103 void 104 clean_keys_array(const struct bwstring *s, struct keys_array *ka) 105 { 106 107 if (ka) { 108 for (size_t i = 0; i < keys_num; ++i) 109 if (ka->key[i].k && ka->key[i].k != s) 110 bwsfree(ka->key[i].k); 111 memset(ka, 0, keys_array_size()); 112 } 113 } 114 115 /* 116 * Set value of a key in the keys set 117 */ 118 void 119 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 120 { 121 122 if (ka && keys_num > ind) { 123 struct key_value *kv; 124 125 kv = &(ka->key[ind]); 126 127 if (kv->k && kv->k != s) 128 bwsfree(kv->k); 129 kv->k = s; 130 } 131 } 132 133 /* 134 * Initialize a sort list item 135 */ 136 struct sort_list_item * 137 sort_list_item_alloc(void) 138 { 139 struct sort_list_item *si; 140 size_t sz; 141 142 sz = sizeof(struct sort_list_item) + keys_array_size(); 143 si = sort_malloc(sz); 144 memset(si, 0, sz); 145 146 return (si); 147 } 148 149 size_t 150 sort_list_item_size(struct sort_list_item *si) 151 { 152 size_t ret = 0; 153 154 if (si) { 155 ret = sizeof(struct sort_list_item) + keys_array_size(); 156 if (si->str) 157 ret += bws_memsize(si->str); 158 for (size_t i = 0; i < keys_num; ++i) { 159 struct key_value *kv; 160 161 kv = &(si->ka.key[i]); 162 163 if (kv->k != si->str) 164 ret += bws_memsize(kv->k); 165 } 166 } 167 return (ret); 168 } 169 170 /* 171 * Calculate key for a sort list item 172 */ 173 static void 174 sort_list_item_make_key(struct sort_list_item *si) 175 { 176 177 preproc(si->str, &(si->ka)); 178 } 179 180 /* 181 * Set value of a sort list item. 182 * Return combined string and keys memory size. 183 */ 184 void 185 sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 186 { 187 188 if (si) { 189 clean_keys_array(si->str, &(si->ka)); 190 if (si->str) { 191 if (si->str == str) { 192 /* we are trying to reset the same string */ 193 return; 194 } else { 195 bwsfree(si->str); 196 si->str = NULL; 197 } 198 } 199 si->str = str; 200 sort_list_item_make_key(si); 201 } 202 } 203 204 /* 205 * De-allocate a sort list item object memory 206 */ 207 void 208 sort_list_item_clean(struct sort_list_item *si) 209 { 210 211 if (si) { 212 clean_keys_array(si->str, &(si->ka)); 213 if (si->str) { 214 bwsfree(si->str); 215 si->str = NULL; 216 } 217 } 218 } 219 220 /* 221 * Skip columns according to specs 222 */ 223 static size_t 224 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 225 bool skip_blanks, bool *empty_key) 226 { 227 if (cols < 1) 228 return (BWSLEN(s) + 1); 229 230 if (skip_blanks) 231 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 232 ++start; 233 234 while (start < BWSLEN(s) && cols > 1) { 235 --cols; 236 ++start; 237 } 238 239 if (start >= BWSLEN(s)) 240 *empty_key = true; 241 242 return (start); 243 } 244 245 /* 246 * Skip fields according to specs 247 */ 248 static size_t 249 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 250 { 251 252 if (fields < 2) { 253 if (BWSLEN(s) == 0) 254 *empty_field = true; 255 return (0); 256 } else if (!(sort_opts_vals.tflag)) { 257 size_t cpos = 0; 258 bool pb = true; 259 260 while (cpos < BWSLEN(s)) { 261 bool isblank; 262 263 isblank = iswblank(BWS_GET(s,cpos)); 264 265 if (isblank && !pb) { 266 --fields; 267 if (fields <= 1) 268 return (cpos); 269 } 270 pb = isblank; 271 ++cpos; 272 } 273 if (fields > 1) 274 *empty_field = true; 275 return (cpos); 276 } else { 277 size_t cpos = 0; 278 279 while (cpos < BWSLEN(s)) { 280 if (BWS_GET(s,cpos) == sort_opts_vals.field_sep) { 281 --fields; 282 if (fields <= 1) 283 return (cpos + 1); 284 } 285 ++cpos; 286 } 287 if (fields > 1) 288 *empty_field = true; 289 return (cpos); 290 } 291 } 292 293 /* 294 * Find fields start 295 */ 296 static void 297 find_field_start(const struct bwstring *s, struct key_specs *ks, 298 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 299 { 300 301 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 302 if (!*empty_field) 303 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 304 ks->pos1b, empty_key); 305 else 306 *empty_key = true; 307 } 308 309 /* 310 * Find end key position 311 */ 312 static size_t 313 find_field_end(const struct bwstring *s, struct key_specs *ks) 314 { 315 size_t f2, next_field_start, pos_end; 316 bool empty_field, empty_key; 317 318 pos_end = 0; 319 next_field_start = 0; 320 empty_field = false; 321 empty_key = false; 322 f2 = ks->f2; 323 324 if (f2 == 0) 325 return (BWSLEN(s) + 1); 326 else { 327 if (ks->c2 == 0) { 328 next_field_start = skip_fields_to_start(s, f2 + 1, 329 &empty_field); 330 if ((next_field_start > 0) && sort_opts_vals.tflag && 331 (sort_opts_vals.field_sep == BWS_GET(s, 332 next_field_start - 1))) 333 --next_field_start; 334 } else 335 next_field_start = skip_fields_to_start(s, f2, 336 &empty_field); 337 } 338 339 if (empty_field || (next_field_start >= BWSLEN(s))) 340 return (BWSLEN(s) + 1); 341 342 if (ks->c2) { 343 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 344 ks->pos2b, &empty_key); 345 if (pos_end < BWSLEN(s)) 346 ++pos_end; 347 } else 348 pos_end = next_field_start; 349 350 return (pos_end); 351 } 352 353 /* 354 * Cut a field according to the key specs 355 */ 356 static struct bwstring * 357 cut_field(const struct bwstring *s, struct key_specs *ks) 358 { 359 struct bwstring *ret = NULL; 360 361 if (s && ks) { 362 size_t field_start, key_end, key_start, sz; 363 bool empty_field, empty_key; 364 365 field_start = 0; 366 key_start = 0; 367 empty_field = false; 368 empty_key = false; 369 370 find_field_start(s, ks, &field_start, &key_start, 371 &empty_field, &empty_key); 372 373 if (empty_key) 374 sz = 0; 375 else { 376 key_end = find_field_end(s, ks); 377 sz = (key_end < key_start) ? 0 : (key_end - key_start); 378 } 379 380 ret = bwsalloc(sz); 381 if (sz) 382 bwsnocpy(ret, s, key_start, sz); 383 } else 384 ret = bwsalloc(0); 385 386 return (ret); 387 } 388 389 /* 390 * Preprocesses a line applying the necessary transformations 391 * specified by command line options and returns the preprocessed 392 * string, which can be used to compare. 393 */ 394 int 395 preproc(struct bwstring *s, struct keys_array *ka) 396 { 397 398 if (sort_opts_vals.kflag) 399 for (size_t i = 0; i < keys_num; i++) { 400 struct bwstring *key; 401 struct key_specs *kspecs; 402 struct sort_mods *sm; 403 404 kspecs = &(keys[i]); 405 key = cut_field(s, kspecs); 406 407 sm = &(kspecs->sm); 408 if (sm->dflag) 409 key = dictionary_order(key); 410 else if (sm->iflag) 411 key = ignore_nonprinting(key); 412 if (sm->fflag || sm->Mflag) 413 key = ignore_case(key); 414 415 set_key_on_keys_array(ka, key, i); 416 } 417 else { 418 struct bwstring *ret = NULL; 419 struct sort_mods *sm = default_sort_mods; 420 421 if (sm->bflag) { 422 if (ret == NULL) 423 ret = bwsdup(s); 424 ret = ignore_leading_blanks(ret); 425 } 426 if (sm->dflag) { 427 if (ret == NULL) 428 ret = bwsdup(s); 429 ret = dictionary_order(ret); 430 } else if (sm->iflag) { 431 if (ret == NULL) 432 ret = bwsdup(s); 433 ret = ignore_nonprinting(ret); 434 } 435 if (sm->fflag || sm->Mflag) { 436 if (ret == NULL) 437 ret = bwsdup(s); 438 ret = ignore_case(ret); 439 } 440 if (ret == NULL) 441 set_key_on_keys_array(ka, s, 0); 442 else 443 set_key_on_keys_array(ka, ret, 0); 444 } 445 446 return 0; 447 } 448 449 cmpcoll_t 450 get_sort_func(struct sort_mods *sm) 451 { 452 453 if (sm->nflag) 454 return (numcoll); 455 else if (sm->hflag) 456 return (hnumcoll); 457 else if (sm->gflag) 458 return (gnumcoll); 459 else if (sm->Mflag) 460 return (monthcoll); 461 else if (sm->Rflag) 462 return (randomcoll); 463 else if (sm->Vflag) 464 return (versioncoll); 465 else 466 return (wstrcoll); 467 } 468 469 /* 470 * Compares the given strings. Returns a positive number if 471 * the first precedes the second, a negative number if the second is 472 * the preceding one, and zero if they are equal. This function calls 473 * the underlying collate functions, which done the actual comparison. 474 */ 475 int 476 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 477 { 478 struct sort_mods *sm; 479 int res = 0; 480 481 for (size_t i = 0; i < keys_num; ++i) { 482 sm = &(keys[i].sm); 483 484 if (sm->rflag) 485 res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset); 486 else 487 res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset); 488 489 if (res) 490 break; 491 492 /* offset applies to only the first key */ 493 offset = 0; 494 } 495 return (res); 496 } 497 498 /* 499 * Compare two strings. 500 * Plain symbol-by-symbol comparison. 501 */ 502 int 503 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 504 { 505 506 if (default_sort_mods->rflag) { 507 const struct bwstring *tmp; 508 509 tmp = s1; 510 s1 = s2; 511 s2 = tmp; 512 } 513 514 return (bwscoll(s1, s2, 0)); 515 } 516 517 /* 518 * Compare a string and a sort list item, according to the sort specs. 519 */ 520 int 521 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 522 { 523 struct keys_array *ka1; 524 int ret = 0; 525 526 ka1 = keys_array_alloc(); 527 528 preproc(str1, ka1); 529 530 sort_list_item_make_key(*ss2); 531 532 if (debug_sort) { 533 bwsprintf(stdout, str1, "; s1=<", ">"); 534 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 535 } 536 537 ret = key_coll(ka1, &((*ss2)->ka), 0); 538 539 if (debug_sort) 540 printf("; cmp1=%d", ret); 541 542 clean_keys_array(str1, ka1); 543 sort_free(ka1); 544 545 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 546 ret = top_level_str_coll(str1, ((*ss2)->str)); 547 if (debug_sort) 548 printf("; cmp2=%d", ret); 549 } 550 551 if (debug_sort) 552 printf("\n"); 553 554 return (ret); 555 } 556 557 /* 558 * Compare two sort list items, according to the sort specs. 559 */ 560 int 561 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 562 size_t offset) 563 { 564 int ret; 565 566 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 567 568 if (debug_sort) { 569 if (offset) 570 printf("; offset=%d", (int) offset); 571 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 572 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 573 printf("; cmp1=%d\n", ret); 574 } 575 576 if (ret) 577 return (ret); 578 579 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 580 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 581 if (debug_sort) 582 printf("; cmp2=%d\n", ret); 583 } 584 585 return (ret); 586 } 587 588 /* 589 * Compare two sort list items, according to the sort specs. 590 */ 591 int 592 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 593 { 594 595 return (list_coll_offset(ss1, ss2, 0)); 596 } 597 598 #define LSCDEF(N) \ 599 static int \ 600 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 601 { \ 602 \ 603 return (list_coll_offset(ss1, ss2, N)); \ 604 } 605 606 LSCDEF(1) 607 LSCDEF(2) 608 LSCDEF(3) 609 LSCDEF(4) 610 LSCDEF(5) 611 LSCDEF(6) 612 LSCDEF(7) 613 LSCDEF(8) 614 LSCDEF(9) 615 LSCDEF(10) 616 LSCDEF(11) 617 LSCDEF(12) 618 LSCDEF(13) 619 LSCDEF(14) 620 LSCDEF(15) 621 LSCDEF(16) 622 LSCDEF(17) 623 LSCDEF(18) 624 LSCDEF(19) 625 LSCDEF(20) 626 627 listcoll_t 628 get_list_call_func(size_t offset) 629 { 630 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 631 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 632 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 633 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 634 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 635 list_coll_18, list_coll_19, list_coll_20 }; 636 637 if (offset <= 20) 638 return (lsarray[offset]); 639 640 return (list_coll); 641 } 642 643 /* 644 * Compare two sort list items, only by their original string. 645 */ 646 int 647 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 648 { 649 650 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 651 } 652 653 /* 654 * Maximum size of a number in the string (before or after decimal point) 655 */ 656 #define MAX_NUM_SIZE (128) 657 658 /* 659 * Set suffix value 660 */ 661 static void setsuffix(wchar_t c, unsigned char *si) 662 { 663 switch (c){ 664 case L'k': 665 case L'K': 666 *si = 1; 667 break; 668 case L'M': 669 *si = 2; 670 break; 671 case L'G': 672 *si = 3; 673 break; 674 case L'T': 675 *si = 4; 676 break; 677 case L'P': 678 *si = 5; 679 break; 680 case L'E': 681 *si = 6; 682 break; 683 case L'Z': 684 *si = 7; 685 break; 686 case L'Y': 687 *si = 8; 688 break; 689 default: 690 *si = 0; 691 }; 692 } 693 694 /* 695 * Read string s and parse the string into a fixed-decimal-point number. 696 * sign equals -1 if the number is negative (explicit plus is not allowed, 697 * according to GNU sort's "info sort". 698 * The number part before decimal point is in the smain, after the decimal 699 * point is in sfrac, tail is the pointer to the remainder of the string. 700 */ 701 static int 702 read_number(struct bwstring *s0, int *sign, wchar_t *smain, int *main_len, wchar_t *sfrac, int *frac_len, unsigned char *si) 703 { 704 bwstring_iterator s; 705 706 s = bws_begin(s0); 707 708 /* always end the fraction with zero, even if we have no fraction */ 709 sfrac[0] = 0; 710 711 while (iswblank(bws_get_iter_value(s))) 712 s = bws_iterator_inc(s, 1); 713 714 if (bws_get_iter_value(s) == symbol_negative_sign) { 715 *sign = -1; 716 s = bws_iterator_inc(s, 1); 717 } 718 719 // This is '0', not '\0', do not change this 720 while (iswdigit(bws_get_iter_value(s)) && 721 (bws_get_iter_value(s) == L'0')) 722 s = bws_iterator_inc(s, 1); 723 724 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 725 if (iswdigit(bws_get_iter_value(s))) { 726 smain[*main_len] = bws_get_iter_value(s); 727 s = bws_iterator_inc(s, 1); 728 *main_len += 1; 729 } else if (symbol_thousands_sep && 730 (bws_get_iter_value(s) == symbol_thousands_sep)) 731 s = bws_iterator_inc(s, 1); 732 else 733 break; 734 } 735 736 smain[*main_len] = 0; 737 738 if (bws_get_iter_value(s) == symbol_decimal_point) { 739 s = bws_iterator_inc(s, 1); 740 while (iswdigit(bws_get_iter_value(s)) && 741 *frac_len < MAX_NUM_SIZE) { 742 sfrac[*frac_len] = bws_get_iter_value(s); 743 s = bws_iterator_inc(s, 1); 744 *frac_len += 1; 745 } 746 sfrac[*frac_len] = 0; 747 748 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 749 --(*frac_len); 750 sfrac[*frac_len] = L'\0'; 751 } 752 } 753 754 setsuffix(bws_get_iter_value(s),si); 755 756 if ((*main_len + *frac_len) == 0) 757 *sign = 0; 758 759 return (0); 760 } 761 762 /* 763 * Implements string sort. 764 */ 765 static int 766 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 767 { 768 769 if (debug_sort) { 770 if (offset) 771 printf("; offset=%d\n", (int) offset); 772 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 773 printf("(%zu)", BWSLEN(kv1->k)); 774 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 775 printf("(%zu)", BWSLEN(kv2->k)); 776 } 777 778 return (bwscoll(kv1->k, kv2->k, offset)); 779 } 780 781 /* 782 * Compare two suffixes 783 */ 784 static inline int 785 cmpsuffix(unsigned char si1, unsigned char si2) 786 { 787 788 return ((char)si1 - (char)si2); 789 } 790 791 /* 792 * Implements numeric sort for -n and -h. 793 */ 794 static int 795 numcoll_impl(struct key_value *kv1, struct key_value *kv2, 796 size_t offset __unused, bool use_suffix) 797 { 798 struct bwstring *s1, *s2; 799 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 800 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 801 int cmp_res, frac1, frac2, main1, main2, sign1, sign2; 802 unsigned char SI1, SI2; 803 bool e1, e2, key1_read, key2_read; 804 805 s1 = kv1->k; 806 s2 = kv2->k; 807 sign1 = sign2 = 0; 808 main1 = main2 = 0; 809 frac1 = frac2 = 0; 810 811 cmp_res = 0; 812 key1_read = key2_read = false; 813 814 if (debug_sort) { 815 bwsprintf(stdout, s1, "; k1=<", ">"); 816 bwsprintf(stdout, s2, ", k2=<", ">"); 817 } 818 819 if (s1 == s2) 820 return (0); 821 822 if (kv1->hint->status == HS_UNINITIALIZED) { 823 /* read the number from the string */ 824 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 825 key1_read = true; 826 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 827 if(main1 < 1 && frac1 < 1) 828 kv1->hint->v.nh.empty=true; 829 kv1->hint->v.nh.si = SI1; 830 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 831 HS_INITIALIZED : HS_ERROR; 832 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 833 } 834 835 if (kv2->hint->status == HS_UNINITIALIZED) { 836 /* read the number from the string */ 837 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 838 key2_read = true; 839 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 840 if(main2 < 1 && frac2 < 1) 841 kv2->hint->v.nh.empty=true; 842 kv2->hint->v.nh.si = SI2; 843 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 844 HS_INITIALIZED : HS_ERROR; 845 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 846 } 847 848 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 849 HS_INITIALIZED) { 850 unsigned long long n1, n2; 851 bool neg1, neg2; 852 853 e1 = kv1->hint->v.nh.empty; 854 e2 = kv2->hint->v.nh.empty; 855 856 if (e1 && e2) 857 return (0); 858 859 neg1 = kv1->hint->v.nh.neg; 860 neg2 = kv2->hint->v.nh.neg; 861 862 if (neg1 && !neg2) 863 return (-1); 864 if (neg2 && !neg1) 865 return (+1); 866 867 if (e1) 868 return (neg2 ? +1 : -1); 869 else if (e2) 870 return (neg1 ? -1 : +1); 871 872 873 if (use_suffix) { 874 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 875 if (cmp_res) 876 return (neg1 ? -cmp_res : cmp_res); 877 } 878 879 n1 = kv1->hint->v.nh.n1; 880 n2 = kv2->hint->v.nh.n1; 881 if (n1 < n2) 882 return (neg1 ? +1 : -1); 883 else if (n1 > n2) 884 return (neg1 ? -1 : +1); 885 } 886 887 /* read the numbers from the strings */ 888 if (!key1_read) 889 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 890 if (!key2_read) 891 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 892 893 e1 = ((main1 + frac1) == 0); 894 e2 = ((main2 + frac2) == 0); 895 896 if (e1 && e2) 897 return (0); 898 899 /* we know the result if the signs are different */ 900 if (sign1 < 0 && sign2 >= 0) 901 return (-1); 902 if (sign1 >= 0 && sign2 < 0) 903 return (+1); 904 905 if (e1) 906 return ((sign2 < 0) ? +1 : -1); 907 else if (e2) 908 return ((sign1 < 0) ? -1 : +1); 909 910 if (use_suffix) { 911 cmp_res = cmpsuffix(SI1, SI2); 912 if (cmp_res) 913 return ((sign1 < 0) ? -cmp_res : cmp_res); 914 } 915 916 /* if both numbers are empty assume that the strings are equal */ 917 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 918 return (0); 919 920 /* 921 * if the main part is of different size, we know the result 922 * (because the leading zeros are removed) 923 */ 924 if (main1 < main2) 925 cmp_res = -1; 926 else if (main1 > main2) 927 cmp_res = +1; 928 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 929 else 930 cmp_res = wcscmp(smain1, smain2); 931 932 /* check fraction */ 933 if (!cmp_res) 934 cmp_res = wcscmp(sfrac1, sfrac2); 935 936 if (!cmp_res) 937 return (0); 938 939 /* reverse result if the signs are negative */ 940 if (sign1 < 0 && sign2 < 0) 941 cmp_res = -cmp_res; 942 943 return (cmp_res); 944 } 945 946 /* 947 * Implements numeric sort (-n). 948 */ 949 static int 950 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 951 { 952 953 return (numcoll_impl(kv1, kv2, offset, false)); 954 } 955 956 /* 957 * Implements 'human' numeric sort (-h). 958 */ 959 static int 960 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 961 { 962 963 return (numcoll_impl(kv1, kv2, offset, true)); 964 } 965 966 /* 967 * Implements random sort (-R). 968 */ 969 static int 970 randomcoll(struct key_value *kv1, struct key_value *kv2, 971 size_t offset __unused) 972 { 973 struct bwstring *s1, *s2; 974 MD5_CTX ctx1, ctx2; 975 char *b1, *b2; 976 977 s1 = kv1->k; 978 s2 = kv2->k; 979 980 if (debug_sort) { 981 bwsprintf(stdout, s1, "; k1=<", ">"); 982 bwsprintf(stdout, s2, ", k2=<", ">"); 983 } 984 985 if (s1 == s2) 986 return (0); 987 988 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX)); 989 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX)); 990 991 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 992 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 993 b1 = MD5End(&ctx1, NULL); 994 b2 = MD5End(&ctx2, NULL); 995 if (b1 == NULL) { 996 if (b2 == NULL) 997 return (0); 998 else { 999 sort_free(b2); 1000 return (-1); 1001 } 1002 } else if (b2 == NULL) { 1003 sort_free(b1); 1004 return (+1); 1005 } else { 1006 int cmp_res; 1007 1008 cmp_res = strcmp(b1,b2); 1009 sort_free(b1); 1010 sort_free(b2); 1011 1012 if (!cmp_res) 1013 cmp_res = bwscoll(s1, s2, 0); 1014 1015 return (cmp_res); 1016 } 1017 } 1018 1019 /* 1020 * Implements version sort (-V). 1021 */ 1022 static int 1023 versioncoll(struct key_value *kv1, struct key_value *kv2, 1024 size_t offset __unused) 1025 { 1026 struct bwstring *s1, *s2; 1027 1028 s1 = kv1->k; 1029 s2 = kv2->k; 1030 1031 if (debug_sort) { 1032 bwsprintf(stdout, s1, "; k1=<", ">"); 1033 bwsprintf(stdout, s2, ", k2=<", ">"); 1034 } 1035 1036 if (s1 == s2) 1037 return (0); 1038 1039 return (vcmp(s1, s2)); 1040 } 1041 1042 /* 1043 * Check for minus infinity 1044 */ 1045 static inline bool 1046 huge_minus(double d, int err1) 1047 { 1048 1049 if (err1 == ERANGE) 1050 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1051 return (+1); 1052 1053 return (0); 1054 } 1055 1056 /* 1057 * Check for plus infinity 1058 */ 1059 static inline bool 1060 huge_plus(double d, int err1) 1061 { 1062 1063 if (err1 == ERANGE) 1064 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1065 return (+1); 1066 1067 return (0); 1068 } 1069 1070 /* 1071 * Check whether a function is a NAN 1072 */ 1073 static bool 1074 is_nan(double d) 1075 { 1076 1077 return ((d == NAN) || (isnan(d))); 1078 } 1079 1080 /* 1081 * Compare two NANs 1082 */ 1083 static int 1084 cmp_nans(double d1, double d2) 1085 { 1086 1087 if (d1 < d2) 1088 return (-1); 1089 if (d2 > d2) 1090 return (+1); 1091 return (0); 1092 } 1093 1094 /* 1095 * Implements general numeric sort (-g). 1096 */ 1097 static int 1098 gnumcoll(struct key_value *kv1, struct key_value *kv2, 1099 size_t offset __unused) 1100 { 1101 double d1, d2; 1102 int err1, err2; 1103 bool empty1, empty2, key1_read, key2_read; 1104 1105 d1 = d2 = 0; 1106 err1 = err2 = 0; 1107 key1_read = key2_read = false; 1108 1109 if (debug_sort) { 1110 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1111 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1112 } 1113 1114 if (kv1->hint->status == HS_UNINITIALIZED) { 1115 errno = 0; 1116 d1 = bwstod(kv1->k, &empty1); 1117 err1 = errno; 1118 1119 if (empty1) 1120 kv1->hint->v.gh.notnum = true; 1121 else if (err1 == 0) { 1122 kv1->hint->v.gh.d = d1; 1123 kv1->hint->v.gh.nan = is_nan(d1); 1124 kv1->hint->status = HS_INITIALIZED; 1125 } else 1126 kv1->hint->status = HS_ERROR; 1127 1128 key1_read = true; 1129 } 1130 1131 if (kv2->hint->status == HS_UNINITIALIZED) { 1132 errno = 0; 1133 d2 = bwstod(kv2->k, &empty2); 1134 err2 = errno; 1135 1136 if (empty2) 1137 kv2->hint->v.gh.notnum = true; 1138 else if (err2 == 0) { 1139 kv2->hint->v.gh.d = d2; 1140 kv2->hint->v.gh.nan = is_nan(d2); 1141 kv2->hint->status = HS_INITIALIZED; 1142 } else 1143 kv2->hint->status = HS_ERROR; 1144 1145 key2_read = true; 1146 } 1147 1148 if (kv1->hint->status == HS_INITIALIZED && 1149 kv2->hint->status == HS_INITIALIZED) { 1150 if (kv1->hint->v.gh.notnum) 1151 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1152 else if (kv2->hint->v.gh.notnum) 1153 return (+1); 1154 1155 if (kv1->hint->v.gh.nan) 1156 return ((kv2->hint->v.gh.nan) ? 1157 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1158 -1); 1159 else if (kv2->hint->v.gh.nan) 1160 return (+1); 1161 1162 d1 = kv1->hint->v.gh.d; 1163 d2 = kv2->hint->v.gh.d; 1164 1165 if (d1 < d2) 1166 return (-1); 1167 else if (d1 > d2) 1168 return (+1); 1169 else 1170 return (0); 1171 } 1172 1173 if (!key1_read) { 1174 errno = 0; 1175 d1 = bwstod(kv1->k, &empty1); 1176 err1 = errno; 1177 } 1178 1179 if (!key2_read) { 1180 errno = 0; 1181 d2 = bwstod(kv2->k, &empty2); 1182 err2 = errno; 1183 } 1184 1185 /* Non-value case: */ 1186 if (empty1) 1187 return (empty2 ? 0 : -1); 1188 else if (empty2) 1189 return (+1); 1190 1191 /* NAN case */ 1192 if (is_nan(d1)) 1193 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1194 else if (is_nan(d2)) 1195 return (+1); 1196 1197 /* Infinities */ 1198 if (err1 == ERANGE || err2 == ERANGE) { 1199 /* Minus infinity case */ 1200 if (huge_minus(d1, err1)) { 1201 if (huge_minus(d2, err2)) { 1202 if (d1 < d2) 1203 return (-1); 1204 if (d1 > d2) 1205 return (+1); 1206 return (0); 1207 } else 1208 return (-1); 1209 1210 } else if (huge_minus(d2, err2)) { 1211 if (huge_minus(d1, err1)) { 1212 if (d1 < d2) 1213 return (-1); 1214 if (d1 > d2) 1215 return (+1); 1216 return (0); 1217 } else 1218 return (+1); 1219 } 1220 1221 /* Plus infinity case */ 1222 if (huge_plus(d1, err1)) { 1223 if (huge_plus(d2, err2)) { 1224 if (d1 < d2) 1225 return (-1); 1226 if (d1 > d2) 1227 return (+1); 1228 return (0); 1229 } else 1230 return (+1); 1231 } else if (huge_plus(d2, err2)) { 1232 if (huge_plus(d1, err1)) { 1233 if (d1 < d2) 1234 return (-1); 1235 if (d1 > d2) 1236 return (+1); 1237 return (0); 1238 } else 1239 return (-1); 1240 } 1241 } 1242 1243 if (d1 < d2) 1244 return (-1); 1245 if (d1 > d2) 1246 return (+1); 1247 1248 return (0); 1249 } 1250 1251 /* 1252 * Implements month sort (-M). 1253 */ 1254 static int 1255 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1256 { 1257 int val1, val2; 1258 bool key1_read, key2_read; 1259 1260 val1 = val2 = 0; 1261 key1_read = key2_read = false; 1262 1263 if (debug_sort) { 1264 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1265 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1266 } 1267 1268 if (kv1->hint->status == HS_UNINITIALIZED) { 1269 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1270 key1_read = true; 1271 kv1->hint->status = HS_INITIALIZED; 1272 } 1273 1274 if (kv2->hint->status == HS_UNINITIALIZED) { 1275 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1276 key2_read = true; 1277 kv2->hint->status = HS_INITIALIZED; 1278 } 1279 1280 if (kv1->hint->status == HS_INITIALIZED) { 1281 val1 = kv1->hint->v.Mh.m; 1282 key1_read = true; 1283 } 1284 1285 if (kv2->hint->status == HS_INITIALIZED) { 1286 val2 = kv2->hint->v.Mh.m; 1287 key2_read = true; 1288 } 1289 1290 if (!key1_read) 1291 val1 = bws_month_score(kv1->k); 1292 if (!key2_read) 1293 val2 = bws_month_score(kv2->k); 1294 1295 if (val1 == val2) { 1296 return (0); 1297 } 1298 if (val1 < val2) 1299 return (-1); 1300 return (+1); 1301 } 1302