1 /*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/types.h> 32 33 #include <errno.h> 34 #include <err.h> 35 #include <langinfo.h> 36 #include <limits.h> 37 #include <math.h> 38 #include <md5.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <wchar.h> 42 #include <wctype.h> 43 44 #include "coll.h" 45 #include "vsort.h" 46 47 struct key_specs *keys; 48 size_t keys_num = 0; 49 50 wint_t symbol_decimal_point = L'.'; 51 /* there is no default thousands separator in collate rules: */ 52 wint_t symbol_thousands_sep = 0; 53 wint_t symbol_negative_sign = L'-'; 54 wint_t symbol_positive_sign = L'+'; 55 56 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 57 static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 58 static int monthcoll(struct key_value*, struct key_value *, size_t offset); 59 static int numcoll(struct key_value*, struct key_value *, size_t offset); 60 static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 61 static int randomcoll(struct key_value*, struct key_value *, size_t offset); 62 static int versioncoll(struct key_value*, struct key_value *, size_t offset); 63 64 /* 65 * Allocate keys array 66 */ 67 struct keys_array * 68 keys_array_alloc(void) 69 { 70 struct keys_array *ka; 71 size_t sz; 72 73 sz = keys_array_size(); 74 ka = sort_malloc(sz); 75 memset(ka, 0, sz); 76 77 return (ka); 78 } 79 80 /* 81 * Calculate whether we need key hint space 82 */ 83 static size_t 84 key_hint_size(void) 85 { 86 87 return (need_hint ? sizeof(struct key_hint) : 0); 88 } 89 90 /* 91 * Calculate keys array size 92 */ 93 size_t 94 keys_array_size(void) 95 { 96 97 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 98 } 99 100 /* 101 * Clean data of keys array 102 */ 103 void 104 clean_keys_array(const struct bwstring *s, struct keys_array *ka) 105 { 106 107 if (ka) { 108 for (size_t i = 0; i < keys_num; ++i) { 109 const struct key_value *kv; 110 111 kv = get_key_from_keys_array(ka, i); 112 if (kv->k && kv->k != s) 113 bwsfree(kv->k); 114 } 115 memset(ka, 0, keys_array_size()); 116 } 117 } 118 119 /* 120 * Get pointer to a key value in the keys set 121 */ 122 struct key_value * 123 get_key_from_keys_array(struct keys_array *ka, size_t ind) 124 { 125 126 return ((struct key_value *)((caddr_t)ka->key + 127 ind * (sizeof(struct key_value) + key_hint_size()))); 128 } 129 130 /* 131 * Set value of a key in the keys set 132 */ 133 void 134 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 135 { 136 137 if (ka && keys_num > ind) { 138 struct key_value *kv; 139 140 kv = get_key_from_keys_array(ka, ind); 141 142 if (kv->k && kv->k != s) 143 bwsfree(kv->k); 144 kv->k = s; 145 } 146 } 147 148 /* 149 * Initialize a sort list item 150 */ 151 struct sort_list_item * 152 sort_list_item_alloc(void) 153 { 154 struct sort_list_item *si; 155 size_t sz; 156 157 sz = sizeof(struct sort_list_item) + keys_array_size(); 158 si = sort_malloc(sz); 159 memset(si, 0, sz); 160 161 return (si); 162 } 163 164 size_t 165 sort_list_item_size(struct sort_list_item *si) 166 { 167 size_t ret = 0; 168 169 if (si) { 170 ret = sizeof(struct sort_list_item) + keys_array_size(); 171 if (si->str) 172 ret += bws_memsize(si->str); 173 for (size_t i = 0; i < keys_num; ++i) { 174 const struct key_value *kv; 175 176 kv = get_key_from_keys_array(&si->ka, i); 177 178 if (kv->k != si->str) 179 ret += bws_memsize(kv->k); 180 } 181 } 182 return (ret); 183 } 184 185 /* 186 * Calculate key for a sort list item 187 */ 188 static void 189 sort_list_item_make_key(struct sort_list_item *si) 190 { 191 192 preproc(si->str, &(si->ka)); 193 } 194 195 /* 196 * Set value of a sort list item. 197 * Return combined string and keys memory size. 198 */ 199 void 200 sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 201 { 202 203 if (si) { 204 clean_keys_array(si->str, &(si->ka)); 205 if (si->str) { 206 if (si->str == str) { 207 /* we are trying to reset the same string */ 208 return; 209 } else { 210 bwsfree(si->str); 211 si->str = NULL; 212 } 213 } 214 si->str = str; 215 sort_list_item_make_key(si); 216 } 217 } 218 219 /* 220 * De-allocate a sort list item object memory 221 */ 222 void 223 sort_list_item_clean(struct sort_list_item *si) 224 { 225 226 if (si) { 227 clean_keys_array(si->str, &(si->ka)); 228 if (si->str) { 229 bwsfree(si->str); 230 si->str = NULL; 231 } 232 } 233 } 234 235 /* 236 * Skip columns according to specs 237 */ 238 static size_t 239 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 240 bool skip_blanks, bool *empty_key) 241 { 242 if (cols < 1) 243 return (BWSLEN(s) + 1); 244 245 if (skip_blanks) 246 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 247 ++start; 248 249 while (start < BWSLEN(s) && cols > 1) { 250 --cols; 251 ++start; 252 } 253 254 if (start >= BWSLEN(s)) 255 *empty_key = true; 256 257 return (start); 258 } 259 260 /* 261 * Skip fields according to specs 262 */ 263 static size_t 264 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 265 { 266 267 if (fields < 2) { 268 if (BWSLEN(s) == 0) 269 *empty_field = true; 270 return (0); 271 } else if (!(sort_opts_vals.tflag)) { 272 size_t cpos = 0; 273 bool pb = true; 274 275 while (cpos < BWSLEN(s)) { 276 bool isblank; 277 278 isblank = iswblank(BWS_GET(s, cpos)); 279 280 if (isblank && !pb) { 281 --fields; 282 if (fields <= 1) 283 return (cpos); 284 } 285 pb = isblank; 286 ++cpos; 287 } 288 if (fields > 1) 289 *empty_field = true; 290 return (cpos); 291 } else { 292 size_t cpos = 0; 293 294 while (cpos < BWSLEN(s)) { 295 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { 296 --fields; 297 if (fields <= 1) 298 return (cpos + 1); 299 } 300 ++cpos; 301 } 302 if (fields > 1) 303 *empty_field = true; 304 return (cpos); 305 } 306 } 307 308 /* 309 * Find fields start 310 */ 311 static void 312 find_field_start(const struct bwstring *s, struct key_specs *ks, 313 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 314 { 315 316 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 317 if (!*empty_field) 318 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 319 ks->pos1b, empty_key); 320 else 321 *empty_key = true; 322 } 323 324 /* 325 * Find end key position 326 */ 327 static size_t 328 find_field_end(const struct bwstring *s, struct key_specs *ks) 329 { 330 size_t f2, next_field_start, pos_end; 331 bool empty_field, empty_key; 332 333 empty_field = false; 334 empty_key = false; 335 f2 = ks->f2; 336 337 if (f2 == 0) 338 return (BWSLEN(s) + 1); 339 else { 340 if (ks->c2 == 0) { 341 next_field_start = skip_fields_to_start(s, f2 + 1, 342 &empty_field); 343 if ((next_field_start > 0) && sort_opts_vals.tflag && 344 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 345 next_field_start - 1))) 346 --next_field_start; 347 } else 348 next_field_start = skip_fields_to_start(s, f2, 349 &empty_field); 350 } 351 352 if (empty_field || (next_field_start >= BWSLEN(s))) 353 return (BWSLEN(s) + 1); 354 355 if (ks->c2) { 356 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 357 ks->pos2b, &empty_key); 358 if (pos_end < BWSLEN(s)) 359 ++pos_end; 360 } else 361 pos_end = next_field_start; 362 363 return (pos_end); 364 } 365 366 /* 367 * Cut a field according to the key specs 368 */ 369 static struct bwstring * 370 cut_field(const struct bwstring *s, struct key_specs *ks) 371 { 372 struct bwstring *ret = NULL; 373 374 if (s && ks) { 375 size_t field_start, key_end, key_start, sz; 376 bool empty_field, empty_key; 377 378 field_start = 0; 379 key_start = 0; 380 empty_field = false; 381 empty_key = false; 382 383 find_field_start(s, ks, &field_start, &key_start, 384 &empty_field, &empty_key); 385 386 if (empty_key) 387 sz = 0; 388 else { 389 key_end = find_field_end(s, ks); 390 sz = (key_end < key_start) ? 0 : (key_end - key_start); 391 } 392 393 ret = bwsalloc(sz); 394 if (sz) 395 bwsnocpy(ret, s, key_start, sz); 396 } else 397 ret = bwsalloc(0); 398 399 return (ret); 400 } 401 402 /* 403 * Preprocesses a line applying the necessary transformations 404 * specified by command line options and returns the preprocessed 405 * string, which can be used to compare. 406 */ 407 int 408 preproc(struct bwstring *s, struct keys_array *ka) 409 { 410 411 if (sort_opts_vals.kflag) 412 for (size_t i = 0; i < keys_num; i++) { 413 struct bwstring *key; 414 struct key_specs *kspecs; 415 struct sort_mods *sm; 416 417 kspecs = &(keys[i]); 418 key = cut_field(s, kspecs); 419 420 sm = &(kspecs->sm); 421 if (sm->dflag) 422 key = dictionary_order(key); 423 else if (sm->iflag) 424 key = ignore_nonprinting(key); 425 if (sm->fflag || sm->Mflag) 426 key = ignore_case(key); 427 428 set_key_on_keys_array(ka, key, i); 429 } 430 else { 431 struct bwstring *ret = NULL; 432 struct sort_mods *sm = default_sort_mods; 433 434 if (sm->bflag) { 435 if (ret == NULL) 436 ret = bwsdup(s); 437 ret = ignore_leading_blanks(ret); 438 } 439 if (sm->dflag) { 440 if (ret == NULL) 441 ret = bwsdup(s); 442 ret = dictionary_order(ret); 443 } else if (sm->iflag) { 444 if (ret == NULL) 445 ret = bwsdup(s); 446 ret = ignore_nonprinting(ret); 447 } 448 if (sm->fflag || sm->Mflag) { 449 if (ret == NULL) 450 ret = bwsdup(s); 451 ret = ignore_case(ret); 452 } 453 if (ret == NULL) 454 set_key_on_keys_array(ka, s, 0); 455 else 456 set_key_on_keys_array(ka, ret, 0); 457 } 458 459 return 0; 460 } 461 462 cmpcoll_t 463 get_sort_func(struct sort_mods *sm) 464 { 465 466 if (sm->nflag) 467 return (numcoll); 468 else if (sm->hflag) 469 return (hnumcoll); 470 else if (sm->gflag) 471 return (gnumcoll); 472 else if (sm->Mflag) 473 return (monthcoll); 474 else if (sm->Rflag) 475 return (randomcoll); 476 else if (sm->Vflag) 477 return (versioncoll); 478 else 479 return (wstrcoll); 480 } 481 482 /* 483 * Compares the given strings. Returns a positive number if 484 * the first precedes the second, a negative number if the second is 485 * the preceding one, and zero if they are equal. This function calls 486 * the underlying collate functions, which done the actual comparison. 487 */ 488 int 489 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 490 { 491 struct key_value *kv1, *kv2; 492 struct sort_mods *sm; 493 int res = 0; 494 495 for (size_t i = 0; i < keys_num; ++i) { 496 kv1 = get_key_from_keys_array(ps1, i); 497 kv2 = get_key_from_keys_array(ps2, i); 498 sm = &(keys[i].sm); 499 500 if (sm->rflag) 501 res = sm->func(kv2, kv1, offset); 502 else 503 res = sm->func(kv1, kv2, offset); 504 505 if (res) 506 break; 507 508 /* offset applies to only the first key */ 509 offset = 0; 510 } 511 return (res); 512 } 513 514 /* 515 * Compare two strings. 516 * Plain symbol-by-symbol comparison. 517 */ 518 int 519 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 520 { 521 522 if (default_sort_mods->rflag) { 523 const struct bwstring *tmp; 524 525 tmp = s1; 526 s1 = s2; 527 s2 = tmp; 528 } 529 530 return (bwscoll(s1, s2, 0)); 531 } 532 533 /* 534 * Compare a string and a sort list item, according to the sort specs. 535 */ 536 int 537 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 538 { 539 struct keys_array *ka1; 540 int ret = 0; 541 542 ka1 = keys_array_alloc(); 543 544 preproc(str1, ka1); 545 546 sort_list_item_make_key(*ss2); 547 548 if (debug_sort) { 549 bwsprintf(stdout, str1, "; s1=<", ">"); 550 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 551 } 552 553 ret = key_coll(ka1, &((*ss2)->ka), 0); 554 555 if (debug_sort) 556 printf("; cmp1=%d", ret); 557 558 clean_keys_array(str1, ka1); 559 sort_free(ka1); 560 561 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 562 ret = top_level_str_coll(str1, ((*ss2)->str)); 563 if (debug_sort) 564 printf("; cmp2=%d", ret); 565 } 566 567 if (debug_sort) 568 printf("\n"); 569 570 return (ret); 571 } 572 573 /* 574 * Compare two sort list items, according to the sort specs. 575 */ 576 int 577 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 578 size_t offset) 579 { 580 int ret; 581 582 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 583 584 if (debug_sort) { 585 if (offset) 586 printf("; offset=%d", (int) offset); 587 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 588 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 589 printf("; cmp1=%d\n", ret); 590 } 591 592 if (ret) 593 return (ret); 594 595 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 596 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 597 if (debug_sort) 598 printf("; cmp2=%d\n", ret); 599 } 600 601 return (ret); 602 } 603 604 /* 605 * Compare two sort list items, according to the sort specs. 606 */ 607 int 608 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 609 { 610 611 return (list_coll_offset(ss1, ss2, 0)); 612 } 613 614 #define LSCDEF(N) \ 615 static int \ 616 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 617 { \ 618 \ 619 return (list_coll_offset(ss1, ss2, N)); \ 620 } 621 622 LSCDEF(1) 623 LSCDEF(2) 624 LSCDEF(3) 625 LSCDEF(4) 626 LSCDEF(5) 627 LSCDEF(6) 628 LSCDEF(7) 629 LSCDEF(8) 630 LSCDEF(9) 631 LSCDEF(10) 632 LSCDEF(11) 633 LSCDEF(12) 634 LSCDEF(13) 635 LSCDEF(14) 636 LSCDEF(15) 637 LSCDEF(16) 638 LSCDEF(17) 639 LSCDEF(18) 640 LSCDEF(19) 641 LSCDEF(20) 642 643 listcoll_t 644 get_list_call_func(size_t offset) 645 { 646 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 647 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 648 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 649 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 650 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 651 list_coll_18, list_coll_19, list_coll_20 }; 652 653 if (offset <= 20) 654 return (lsarray[offset]); 655 656 return (list_coll); 657 } 658 659 /* 660 * Compare two sort list items, only by their original string. 661 */ 662 int 663 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 664 { 665 666 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 667 } 668 669 /* 670 * Maximum size of a number in the string (before or after decimal point) 671 */ 672 #define MAX_NUM_SIZE (128) 673 674 /* 675 * Set suffix value 676 */ 677 static void setsuffix(wchar_t c, unsigned char *si) 678 { 679 switch (c){ 680 case L'k': 681 case L'K': 682 *si = 1; 683 break; 684 case L'M': 685 *si = 2; 686 break; 687 case L'G': 688 *si = 3; 689 break; 690 case L'T': 691 *si = 4; 692 break; 693 case L'P': 694 *si = 5; 695 break; 696 case L'E': 697 *si = 6; 698 break; 699 case L'Z': 700 *si = 7; 701 break; 702 case L'Y': 703 *si = 8; 704 break; 705 default: 706 *si = 0; 707 } 708 } 709 710 /* 711 * Read string s and parse the string into a fixed-decimal-point number. 712 * sign equals -1 if the number is negative (explicit plus is not allowed, 713 * according to GNU sort's "info sort". 714 * The number part before decimal point is in the smain, after the decimal 715 * point is in sfrac, tail is the pointer to the remainder of the string. 716 */ 717 static int 718 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 719 { 720 bwstring_iterator s; 721 722 s = bws_begin(s0); 723 724 /* always end the fraction with zero, even if we have no fraction */ 725 sfrac[0] = 0; 726 727 while (iswblank(bws_get_iter_value(s))) 728 s = bws_iterator_inc(s, 1); 729 730 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { 731 *sign = -1; 732 s = bws_iterator_inc(s, 1); 733 } 734 735 // This is '0', not '\0', do not change this 736 while (iswdigit(bws_get_iter_value(s)) && 737 (bws_get_iter_value(s) == L'0')) 738 s = bws_iterator_inc(s, 1); 739 740 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 741 if (iswdigit(bws_get_iter_value(s))) { 742 smain[*main_len] = bws_get_iter_value(s); 743 s = bws_iterator_inc(s, 1); 744 *main_len += 1; 745 } else if (symbol_thousands_sep && 746 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) 747 s = bws_iterator_inc(s, 1); 748 else 749 break; 750 } 751 752 smain[*main_len] = 0; 753 754 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { 755 s = bws_iterator_inc(s, 1); 756 while (iswdigit(bws_get_iter_value(s)) && 757 *frac_len < MAX_NUM_SIZE) { 758 sfrac[*frac_len] = bws_get_iter_value(s); 759 s = bws_iterator_inc(s, 1); 760 *frac_len += 1; 761 } 762 sfrac[*frac_len] = 0; 763 764 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 765 --(*frac_len); 766 sfrac[*frac_len] = L'\0'; 767 } 768 } 769 770 setsuffix(bws_get_iter_value(s),si); 771 772 if ((*main_len + *frac_len) == 0) 773 *sign = 0; 774 775 return (0); 776 } 777 778 /* 779 * Implements string sort. 780 */ 781 static int 782 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 783 { 784 785 if (debug_sort) { 786 if (offset) 787 printf("; offset=%d\n", (int) offset); 788 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 789 printf("(%zu)", BWSLEN(kv1->k)); 790 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 791 printf("(%zu)", BWSLEN(kv2->k)); 792 } 793 794 return (bwscoll(kv1->k, kv2->k, offset)); 795 } 796 797 /* 798 * Compare two suffixes 799 */ 800 static inline int 801 cmpsuffix(unsigned char si1, unsigned char si2) 802 { 803 804 return ((char)si1 - (char)si2); 805 } 806 807 /* 808 * Implements numeric sort for -n and -h. 809 */ 810 static int 811 numcoll_impl(struct key_value *kv1, struct key_value *kv2, 812 size_t offset __unused, bool use_suffix) 813 { 814 struct bwstring *s1, *s2; 815 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 816 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 817 int cmp_res, sign1, sign2; 818 size_t frac1, frac2, main1, main2; 819 unsigned char SI1, SI2; 820 bool e1, e2, key1_read, key2_read; 821 822 s1 = kv1->k; 823 s2 = kv2->k; 824 sign1 = sign2 = 0; 825 main1 = main2 = 0; 826 frac1 = frac2 = 0; 827 828 key1_read = key2_read = false; 829 830 if (debug_sort) { 831 bwsprintf(stdout, s1, "; k1=<", ">"); 832 bwsprintf(stdout, s2, ", k2=<", ">"); 833 } 834 835 if (s1 == s2) 836 return (0); 837 838 if (kv1->hint->status == HS_UNINITIALIZED) { 839 /* read the number from the string */ 840 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 841 key1_read = true; 842 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 843 if(main1 < 1 && frac1 < 1) 844 kv1->hint->v.nh.empty=true; 845 kv1->hint->v.nh.si = SI1; 846 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 847 HS_INITIALIZED : HS_ERROR; 848 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 849 } 850 851 if (kv2->hint->status == HS_UNINITIALIZED) { 852 /* read the number from the string */ 853 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 854 key2_read = true; 855 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 856 if(main2 < 1 && frac2 < 1) 857 kv2->hint->v.nh.empty=true; 858 kv2->hint->v.nh.si = SI2; 859 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 860 HS_INITIALIZED : HS_ERROR; 861 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 862 } 863 864 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 865 HS_INITIALIZED) { 866 unsigned long long n1, n2; 867 bool neg1, neg2; 868 869 e1 = kv1->hint->v.nh.empty; 870 e2 = kv2->hint->v.nh.empty; 871 872 if (e1 && e2) 873 return (0); 874 875 neg1 = kv1->hint->v.nh.neg; 876 neg2 = kv2->hint->v.nh.neg; 877 878 if (neg1 && !neg2) 879 return (-1); 880 if (neg2 && !neg1) 881 return (+1); 882 883 if (e1) 884 return (neg2 ? +1 : -1); 885 else if (e2) 886 return (neg1 ? -1 : +1); 887 888 889 if (use_suffix) { 890 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 891 if (cmp_res) 892 return (neg1 ? -cmp_res : cmp_res); 893 } 894 895 n1 = kv1->hint->v.nh.n1; 896 n2 = kv2->hint->v.nh.n1; 897 if (n1 < n2) 898 return (neg1 ? +1 : -1); 899 else if (n1 > n2) 900 return (neg1 ? -1 : +1); 901 } 902 903 /* read the numbers from the strings */ 904 if (!key1_read) 905 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 906 if (!key2_read) 907 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 908 909 e1 = ((main1 + frac1) == 0); 910 e2 = ((main2 + frac2) == 0); 911 912 if (e1 && e2) 913 return (0); 914 915 /* we know the result if the signs are different */ 916 if (sign1 < 0 && sign2 >= 0) 917 return (-1); 918 if (sign1 >= 0 && sign2 < 0) 919 return (+1); 920 921 if (e1) 922 return ((sign2 < 0) ? +1 : -1); 923 else if (e2) 924 return ((sign1 < 0) ? -1 : +1); 925 926 if (use_suffix) { 927 cmp_res = cmpsuffix(SI1, SI2); 928 if (cmp_res) 929 return ((sign1 < 0) ? -cmp_res : cmp_res); 930 } 931 932 /* if both numbers are empty assume that the strings are equal */ 933 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 934 return (0); 935 936 /* 937 * if the main part is of different size, we know the result 938 * (because the leading zeros are removed) 939 */ 940 if (main1 < main2) 941 cmp_res = -1; 942 else if (main1 > main2) 943 cmp_res = +1; 944 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 945 else 946 cmp_res = wcscmp(smain1, smain2); 947 948 /* check fraction */ 949 if (!cmp_res) 950 cmp_res = wcscmp(sfrac1, sfrac2); 951 952 if (!cmp_res) 953 return (0); 954 955 /* reverse result if the signs are negative */ 956 if (sign1 < 0 && sign2 < 0) 957 cmp_res = -cmp_res; 958 959 return (cmp_res); 960 } 961 962 /* 963 * Implements numeric sort (-n). 964 */ 965 static int 966 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 967 { 968 969 return (numcoll_impl(kv1, kv2, offset, false)); 970 } 971 972 /* 973 * Implements 'human' numeric sort (-h). 974 */ 975 static int 976 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 977 { 978 979 return (numcoll_impl(kv1, kv2, offset, true)); 980 } 981 982 /* 983 * Implements random sort (-R). 984 */ 985 static int 986 randomcoll(struct key_value *kv1, struct key_value *kv2, 987 size_t offset __unused) 988 { 989 struct bwstring *s1, *s2; 990 MD5_CTX ctx1, ctx2; 991 char *b1, *b2; 992 993 s1 = kv1->k; 994 s2 = kv2->k; 995 996 if (debug_sort) { 997 bwsprintf(stdout, s1, "; k1=<", ">"); 998 bwsprintf(stdout, s2, ", k2=<", ">"); 999 } 1000 1001 if (s1 == s2) 1002 return (0); 1003 1004 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX)); 1005 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX)); 1006 1007 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 1008 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 1009 b1 = MD5End(&ctx1, NULL); 1010 b2 = MD5End(&ctx2, NULL); 1011 if (b1 == NULL) { 1012 if (b2 == NULL) 1013 return (0); 1014 else { 1015 sort_free(b2); 1016 return (-1); 1017 } 1018 } else if (b2 == NULL) { 1019 sort_free(b1); 1020 return (+1); 1021 } else { 1022 int cmp_res; 1023 1024 cmp_res = strcmp(b1,b2); 1025 sort_free(b1); 1026 sort_free(b2); 1027 1028 if (!cmp_res) 1029 cmp_res = bwscoll(s1, s2, 0); 1030 1031 return (cmp_res); 1032 } 1033 } 1034 1035 /* 1036 * Implements version sort (-V). 1037 */ 1038 static int 1039 versioncoll(struct key_value *kv1, struct key_value *kv2, 1040 size_t offset __unused) 1041 { 1042 struct bwstring *s1, *s2; 1043 1044 s1 = kv1->k; 1045 s2 = kv2->k; 1046 1047 if (debug_sort) { 1048 bwsprintf(stdout, s1, "; k1=<", ">"); 1049 bwsprintf(stdout, s2, ", k2=<", ">"); 1050 } 1051 1052 if (s1 == s2) 1053 return (0); 1054 1055 return (vcmp(s1, s2)); 1056 } 1057 1058 /* 1059 * Check for minus infinity 1060 */ 1061 static inline bool 1062 huge_minus(double d, int err1) 1063 { 1064 1065 if (err1 == ERANGE) 1066 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1067 return (+1); 1068 1069 return (0); 1070 } 1071 1072 /* 1073 * Check for plus infinity 1074 */ 1075 static inline bool 1076 huge_plus(double d, int err1) 1077 { 1078 1079 if (err1 == ERANGE) 1080 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1081 return (+1); 1082 1083 return (0); 1084 } 1085 1086 /* 1087 * Check whether a function is a NAN 1088 */ 1089 static bool 1090 is_nan(double d) 1091 { 1092 1093 return ((d == NAN) || (isnan(d))); 1094 } 1095 1096 /* 1097 * Compare two NANs 1098 */ 1099 static int 1100 cmp_nans(double d1, double d2) 1101 { 1102 1103 if (d1 < d2) 1104 return (-1); 1105 if (d1 > d2) 1106 return (+1); 1107 return (0); 1108 } 1109 1110 /* 1111 * Implements general numeric sort (-g). 1112 */ 1113 static int 1114 gnumcoll(struct key_value *kv1, struct key_value *kv2, 1115 size_t offset __unused) 1116 { 1117 double d1, d2; 1118 int err1, err2; 1119 bool empty1, empty2, key1_read, key2_read; 1120 1121 d1 = d2 = 0; 1122 err1 = err2 = 0; 1123 key1_read = key2_read = false; 1124 1125 if (debug_sort) { 1126 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1127 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1128 } 1129 1130 if (kv1->hint->status == HS_UNINITIALIZED) { 1131 errno = 0; 1132 d1 = bwstod(kv1->k, &empty1); 1133 err1 = errno; 1134 1135 if (empty1) 1136 kv1->hint->v.gh.notnum = true; 1137 else if (err1 == 0) { 1138 kv1->hint->v.gh.d = d1; 1139 kv1->hint->v.gh.nan = is_nan(d1); 1140 kv1->hint->status = HS_INITIALIZED; 1141 } else 1142 kv1->hint->status = HS_ERROR; 1143 1144 key1_read = true; 1145 } 1146 1147 if (kv2->hint->status == HS_UNINITIALIZED) { 1148 errno = 0; 1149 d2 = bwstod(kv2->k, &empty2); 1150 err2 = errno; 1151 1152 if (empty2) 1153 kv2->hint->v.gh.notnum = true; 1154 else if (err2 == 0) { 1155 kv2->hint->v.gh.d = d2; 1156 kv2->hint->v.gh.nan = is_nan(d2); 1157 kv2->hint->status = HS_INITIALIZED; 1158 } else 1159 kv2->hint->status = HS_ERROR; 1160 1161 key2_read = true; 1162 } 1163 1164 if (kv1->hint->status == HS_INITIALIZED && 1165 kv2->hint->status == HS_INITIALIZED) { 1166 if (kv1->hint->v.gh.notnum) 1167 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1168 else if (kv2->hint->v.gh.notnum) 1169 return (+1); 1170 1171 if (kv1->hint->v.gh.nan) 1172 return ((kv2->hint->v.gh.nan) ? 1173 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1174 -1); 1175 else if (kv2->hint->v.gh.nan) 1176 return (+1); 1177 1178 d1 = kv1->hint->v.gh.d; 1179 d2 = kv2->hint->v.gh.d; 1180 1181 if (d1 < d2) 1182 return (-1); 1183 else if (d1 > d2) 1184 return (+1); 1185 else 1186 return (0); 1187 } 1188 1189 if (!key1_read) { 1190 errno = 0; 1191 d1 = bwstod(kv1->k, &empty1); 1192 err1 = errno; 1193 } 1194 1195 if (!key2_read) { 1196 errno = 0; 1197 d2 = bwstod(kv2->k, &empty2); 1198 err2 = errno; 1199 } 1200 1201 /* Non-value case: */ 1202 if (empty1) 1203 return (empty2 ? 0 : -1); 1204 else if (empty2) 1205 return (+1); 1206 1207 /* NAN case */ 1208 if (is_nan(d1)) 1209 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1210 else if (is_nan(d2)) 1211 return (+1); 1212 1213 /* Infinities */ 1214 if (err1 == ERANGE || err2 == ERANGE) { 1215 /* Minus infinity case */ 1216 if (huge_minus(d1, err1)) { 1217 if (huge_minus(d2, err2)) { 1218 if (d1 < d2) 1219 return (-1); 1220 if (d1 > d2) 1221 return (+1); 1222 return (0); 1223 } else 1224 return (-1); 1225 1226 } else if (huge_minus(d2, err2)) { 1227 if (huge_minus(d1, err1)) { 1228 if (d1 < d2) 1229 return (-1); 1230 if (d1 > d2) 1231 return (+1); 1232 return (0); 1233 } else 1234 return (+1); 1235 } 1236 1237 /* Plus infinity case */ 1238 if (huge_plus(d1, err1)) { 1239 if (huge_plus(d2, err2)) { 1240 if (d1 < d2) 1241 return (-1); 1242 if (d1 > d2) 1243 return (+1); 1244 return (0); 1245 } else 1246 return (+1); 1247 } else if (huge_plus(d2, err2)) { 1248 if (huge_plus(d1, err1)) { 1249 if (d1 < d2) 1250 return (-1); 1251 if (d1 > d2) 1252 return (+1); 1253 return (0); 1254 } else 1255 return (-1); 1256 } 1257 } 1258 1259 if (d1 < d2) 1260 return (-1); 1261 if (d1 > d2) 1262 return (+1); 1263 1264 return (0); 1265 } 1266 1267 /* 1268 * Implements month sort (-M). 1269 */ 1270 static int 1271 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1272 { 1273 int val1, val2; 1274 bool key1_read, key2_read; 1275 1276 val1 = val2 = 0; 1277 key1_read = key2_read = false; 1278 1279 if (debug_sort) { 1280 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1281 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1282 } 1283 1284 if (kv1->hint->status == HS_UNINITIALIZED) { 1285 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1286 key1_read = true; 1287 kv1->hint->status = HS_INITIALIZED; 1288 } 1289 1290 if (kv2->hint->status == HS_UNINITIALIZED) { 1291 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1292 key2_read = true; 1293 kv2->hint->status = HS_INITIALIZED; 1294 } 1295 1296 if (kv1->hint->status == HS_INITIALIZED) { 1297 val1 = kv1->hint->v.Mh.m; 1298 key1_read = true; 1299 } 1300 1301 if (kv2->hint->status == HS_INITIALIZED) { 1302 val2 = kv2->hint->v.Mh.m; 1303 key2_read = true; 1304 } 1305 1306 if (!key1_read) 1307 val1 = bws_month_score(kv1->k); 1308 if (!key2_read) 1309 val2 = bws_month_score(kv2->k); 1310 1311 if (val1 == val2) { 1312 return (0); 1313 } 1314 if (val1 < val2) 1315 return (-1); 1316 return (+1); 1317 } 1318