1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 #include <sys/types.h> 32 33 #include <errno.h> 34 #include <err.h> 35 #include <langinfo.h> 36 #include <limits.h> 37 #include <math.h> 38 #include <md5.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <wchar.h> 42 #include <wctype.h> 43 44 #include "coll.h" 45 #include "vsort.h" 46 47 struct key_specs *keys; 48 size_t keys_num = 0; 49 50 wint_t symbol_decimal_point = L'.'; 51 /* there is no default thousands separator in collate rules: */ 52 wint_t symbol_thousands_sep = 0; 53 wint_t symbol_negative_sign = L'-'; 54 wint_t symbol_positive_sign = L'+'; 55 56 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 57 static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 58 static int monthcoll(struct key_value*, struct key_value *, size_t offset); 59 static int numcoll(struct key_value*, struct key_value *, size_t offset); 60 static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 61 static int randomcoll(struct key_value*, struct key_value *, size_t offset); 62 static int versioncoll(struct key_value*, struct key_value *, size_t offset); 63 64 /* 65 * Allocate keys array 66 */ 67 struct keys_array * 68 keys_array_alloc(void) 69 { 70 struct keys_array *ka; 71 size_t sz; 72 73 sz = keys_array_size(); 74 ka = sort_calloc(1, sz); 75 76 return (ka); 77 } 78 79 /* 80 * Calculate whether we need key hint space 81 */ 82 static size_t 83 key_hint_size(void) 84 { 85 86 return (need_hint ? sizeof(struct key_hint) : 0); 87 } 88 89 /* 90 * Calculate keys array size 91 */ 92 size_t 93 keys_array_size(void) 94 { 95 96 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 97 } 98 99 /* 100 * Clean data of keys array 101 */ 102 void 103 clean_keys_array(const struct bwstring *s, struct keys_array *ka) 104 { 105 106 if (ka) { 107 for (size_t i = 0; i < keys_num; ++i) { 108 const struct key_value *kv; 109 110 kv = get_key_from_keys_array(ka, i); 111 if (kv->k && kv->k != s) 112 bwsfree(kv->k); 113 } 114 memset(ka, 0, keys_array_size()); 115 } 116 } 117 118 /* 119 * Get pointer to a key value in the keys set 120 */ 121 struct key_value * 122 get_key_from_keys_array(struct keys_array *ka, size_t ind) 123 { 124 125 return ((struct key_value *)((caddr_t)ka->key + 126 ind * (sizeof(struct key_value) + key_hint_size()))); 127 } 128 129 /* 130 * Set value of a key in the keys set 131 */ 132 void 133 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 134 { 135 136 if (ka && keys_num > ind) { 137 struct key_value *kv; 138 139 kv = get_key_from_keys_array(ka, ind); 140 141 if (kv->k && kv->k != s) 142 bwsfree(kv->k); 143 kv->k = s; 144 } 145 } 146 147 /* 148 * Initialize a sort list item 149 */ 150 struct sort_list_item * 151 sort_list_item_alloc(void) 152 { 153 struct sort_list_item *si; 154 size_t sz; 155 156 sz = sizeof(struct sort_list_item) + keys_array_size(); 157 si = sort_calloc(1, sz); 158 159 return (si); 160 } 161 162 size_t 163 sort_list_item_size(struct sort_list_item *si) 164 { 165 size_t ret = 0; 166 167 if (si) { 168 ret = sizeof(struct sort_list_item) + keys_array_size(); 169 if (si->str) 170 ret += bws_memsize(si->str); 171 for (size_t i = 0; i < keys_num; ++i) { 172 const struct key_value *kv; 173 174 kv = get_key_from_keys_array(&si->ka, i); 175 176 if (kv->k != si->str) 177 ret += bws_memsize(kv->k); 178 } 179 } 180 return (ret); 181 } 182 183 /* 184 * Calculate key for a sort list item 185 */ 186 static void 187 sort_list_item_make_key(struct sort_list_item *si) 188 { 189 190 preproc(si->str, &(si->ka)); 191 } 192 193 /* 194 * Set value of a sort list item. 195 * Return combined string and keys memory size. 196 */ 197 void 198 sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 199 { 200 201 if (si) { 202 clean_keys_array(si->str, &(si->ka)); 203 if (si->str) { 204 if (si->str == str) { 205 /* we are trying to reset the same string */ 206 return; 207 } else { 208 bwsfree(si->str); 209 si->str = NULL; 210 } 211 } 212 si->str = str; 213 sort_list_item_make_key(si); 214 } 215 } 216 217 /* 218 * De-allocate a sort list item object memory 219 */ 220 void 221 sort_list_item_clean(struct sort_list_item *si) 222 { 223 224 if (si) { 225 clean_keys_array(si->str, &(si->ka)); 226 if (si->str) { 227 bwsfree(si->str); 228 si->str = NULL; 229 } 230 } 231 } 232 233 /* 234 * Skip columns according to specs 235 */ 236 static size_t 237 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 238 bool skip_blanks, bool *empty_key) 239 { 240 if (cols < 1) 241 return (BWSLEN(s) + 1); 242 243 if (skip_blanks) 244 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 245 ++start; 246 247 while (start < BWSLEN(s) && cols > 1) { 248 --cols; 249 ++start; 250 } 251 252 if (start >= BWSLEN(s)) 253 *empty_key = true; 254 255 return (start); 256 } 257 258 /* 259 * Skip fields according to specs 260 */ 261 static size_t 262 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 263 { 264 265 if (fields < 2) { 266 if (BWSLEN(s) == 0) 267 *empty_field = true; 268 return (0); 269 } else if (!(sort_opts_vals.tflag)) { 270 size_t cpos = 0; 271 bool pb = true; 272 273 while (cpos < BWSLEN(s)) { 274 bool isblank; 275 276 isblank = iswblank(BWS_GET(s, cpos)); 277 278 if (isblank && !pb) { 279 --fields; 280 if (fields <= 1) 281 return (cpos); 282 } 283 pb = isblank; 284 ++cpos; 285 } 286 if (fields > 1) 287 *empty_field = true; 288 return (cpos); 289 } else { 290 size_t cpos = 0; 291 292 while (cpos < BWSLEN(s)) { 293 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { 294 --fields; 295 if (fields <= 1) 296 return (cpos + 1); 297 } 298 ++cpos; 299 } 300 if (fields > 1) 301 *empty_field = true; 302 return (cpos); 303 } 304 } 305 306 /* 307 * Find fields start 308 */ 309 static void 310 find_field_start(const struct bwstring *s, struct key_specs *ks, 311 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 312 { 313 314 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 315 if (!*empty_field) 316 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 317 ks->pos1b, empty_key); 318 else 319 *empty_key = true; 320 } 321 322 /* 323 * Find end key position 324 */ 325 static size_t 326 find_field_end(const struct bwstring *s, struct key_specs *ks) 327 { 328 size_t f2, next_field_start, pos_end; 329 bool empty_field, empty_key; 330 331 empty_field = false; 332 empty_key = false; 333 f2 = ks->f2; 334 335 if (f2 == 0) 336 return (BWSLEN(s) + 1); 337 else { 338 if (ks->c2 == 0) { 339 next_field_start = skip_fields_to_start(s, f2 + 1, 340 &empty_field); 341 if ((next_field_start > 0) && sort_opts_vals.tflag && 342 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 343 next_field_start - 1))) 344 --next_field_start; 345 } else 346 next_field_start = skip_fields_to_start(s, f2, 347 &empty_field); 348 } 349 350 if (empty_field || (next_field_start >= BWSLEN(s))) 351 return (BWSLEN(s) + 1); 352 353 if (ks->c2) { 354 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 355 ks->pos2b, &empty_key); 356 if (pos_end < BWSLEN(s)) 357 ++pos_end; 358 } else 359 pos_end = next_field_start; 360 361 return (pos_end); 362 } 363 364 /* 365 * Cut a field according to the key specs 366 */ 367 static struct bwstring * 368 cut_field(const struct bwstring *s, struct key_specs *ks) 369 { 370 struct bwstring *ret = NULL; 371 372 if (s && ks) { 373 size_t field_start, key_end, key_start, sz; 374 bool empty_field, empty_key; 375 376 field_start = 0; 377 key_start = 0; 378 empty_field = false; 379 empty_key = false; 380 381 find_field_start(s, ks, &field_start, &key_start, 382 &empty_field, &empty_key); 383 384 if (empty_key) 385 sz = 0; 386 else { 387 key_end = find_field_end(s, ks); 388 sz = (key_end < key_start) ? 0 : (key_end - key_start); 389 } 390 391 ret = bwsalloc(sz); 392 if (sz) 393 bwsnocpy(ret, s, key_start, sz); 394 } else 395 ret = bwsalloc(0); 396 397 return (ret); 398 } 399 400 /* 401 * Preprocesses a line applying the necessary transformations 402 * specified by command line options and returns the preprocessed 403 * string, which can be used to compare. 404 */ 405 int 406 preproc(struct bwstring *s, struct keys_array *ka) 407 { 408 409 if (sort_opts_vals.kflag) 410 for (size_t i = 0; i < keys_num; i++) { 411 struct bwstring *key; 412 struct key_specs *kspecs; 413 struct sort_mods *sm; 414 415 kspecs = &(keys[i]); 416 key = cut_field(s, kspecs); 417 418 sm = &(kspecs->sm); 419 if (sm->dflag) 420 key = dictionary_order(key); 421 else if (sm->iflag) 422 key = ignore_nonprinting(key); 423 if (sm->fflag || sm->Mflag) 424 key = ignore_case(key); 425 426 set_key_on_keys_array(ka, key, i); 427 } 428 else { 429 struct bwstring *ret = NULL; 430 struct sort_mods *sm = default_sort_mods; 431 432 if (sm->bflag) { 433 if (ret == NULL) 434 ret = bwsdup(s); 435 ret = ignore_leading_blanks(ret); 436 } 437 if (sm->dflag) { 438 if (ret == NULL) 439 ret = bwsdup(s); 440 ret = dictionary_order(ret); 441 } else if (sm->iflag) { 442 if (ret == NULL) 443 ret = bwsdup(s); 444 ret = ignore_nonprinting(ret); 445 } 446 if (sm->fflag || sm->Mflag) { 447 if (ret == NULL) 448 ret = bwsdup(s); 449 ret = ignore_case(ret); 450 } 451 if (ret == NULL) 452 set_key_on_keys_array(ka, s, 0); 453 else 454 set_key_on_keys_array(ka, ret, 0); 455 } 456 457 return 0; 458 } 459 460 cmpcoll_t 461 get_sort_func(struct sort_mods *sm) 462 { 463 464 if (sm->nflag) 465 return (numcoll); 466 else if (sm->hflag) 467 return (hnumcoll); 468 else if (sm->gflag) 469 return (gnumcoll); 470 else if (sm->Mflag) 471 return (monthcoll); 472 else if (sm->Rflag) 473 return (randomcoll); 474 else if (sm->Vflag) 475 return (versioncoll); 476 else 477 return (wstrcoll); 478 } 479 480 /* 481 * Compares the given strings. Returns a positive number if 482 * the first precedes the second, a negative number if the second is 483 * the preceding one, and zero if they are equal. This function calls 484 * the underlying collate functions, which done the actual comparison. 485 */ 486 int 487 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 488 { 489 struct key_value *kv1, *kv2; 490 struct sort_mods *sm; 491 int res = 0; 492 493 for (size_t i = 0; i < keys_num; ++i) { 494 kv1 = get_key_from_keys_array(ps1, i); 495 kv2 = get_key_from_keys_array(ps2, i); 496 sm = &(keys[i].sm); 497 498 if (sm->rflag) 499 res = sm->func(kv2, kv1, offset); 500 else 501 res = sm->func(kv1, kv2, offset); 502 503 if (res) 504 break; 505 506 /* offset applies to only the first key */ 507 offset = 0; 508 } 509 return (res); 510 } 511 512 /* 513 * Compare two strings. 514 * Plain symbol-by-symbol comparison. 515 */ 516 int 517 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 518 { 519 520 if (default_sort_mods->rflag) { 521 const struct bwstring *tmp; 522 523 tmp = s1; 524 s1 = s2; 525 s2 = tmp; 526 } 527 528 return (bwscoll(s1, s2, 0)); 529 } 530 531 /* 532 * Compare a string and a sort list item, according to the sort specs. 533 */ 534 int 535 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 536 { 537 struct keys_array *ka1; 538 int ret = 0; 539 540 ka1 = keys_array_alloc(); 541 542 preproc(str1, ka1); 543 544 sort_list_item_make_key(*ss2); 545 546 if (debug_sort) { 547 bwsprintf(stdout, str1, "; s1=<", ">"); 548 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 549 } 550 551 ret = key_coll(ka1, &((*ss2)->ka), 0); 552 553 if (debug_sort) 554 printf("; cmp1=%d", ret); 555 556 clean_keys_array(str1, ka1); 557 sort_free(ka1); 558 559 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 560 ret = top_level_str_coll(str1, ((*ss2)->str)); 561 if (debug_sort) 562 printf("; cmp2=%d", ret); 563 } 564 565 if (debug_sort) 566 printf("\n"); 567 568 return (ret); 569 } 570 571 /* 572 * Compare two sort list items, according to the sort specs. 573 */ 574 int 575 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 576 size_t offset) 577 { 578 int ret; 579 580 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 581 582 if (debug_sort) { 583 if (offset) 584 printf("; offset=%d", (int) offset); 585 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 586 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 587 printf("; cmp1=%d\n", ret); 588 } 589 590 if (ret) 591 return (ret); 592 593 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 594 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 595 if (debug_sort) 596 printf("; cmp2=%d\n", ret); 597 } 598 599 return (ret); 600 } 601 602 /* 603 * Compare two sort list items, according to the sort specs. 604 */ 605 int 606 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 607 { 608 609 return (list_coll_offset(ss1, ss2, 0)); 610 } 611 612 #define LSCDEF(N) \ 613 static int \ 614 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 615 { \ 616 \ 617 return (list_coll_offset(ss1, ss2, N)); \ 618 } 619 620 LSCDEF(1) 621 LSCDEF(2) 622 LSCDEF(3) 623 LSCDEF(4) 624 LSCDEF(5) 625 LSCDEF(6) 626 LSCDEF(7) 627 LSCDEF(8) 628 LSCDEF(9) 629 LSCDEF(10) 630 LSCDEF(11) 631 LSCDEF(12) 632 LSCDEF(13) 633 LSCDEF(14) 634 LSCDEF(15) 635 LSCDEF(16) 636 LSCDEF(17) 637 LSCDEF(18) 638 LSCDEF(19) 639 LSCDEF(20) 640 641 listcoll_t 642 get_list_call_func(size_t offset) 643 { 644 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 645 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 646 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 647 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 648 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 649 list_coll_18, list_coll_19, list_coll_20 }; 650 651 if (offset <= 20) 652 return (lsarray[offset]); 653 654 return (list_coll); 655 } 656 657 /* 658 * Compare two sort list items, only by their original string. 659 */ 660 int 661 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 662 { 663 664 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 665 } 666 667 /* 668 * Maximum size of a number in the string (before or after decimal point) 669 */ 670 #define MAX_NUM_SIZE (128) 671 672 /* 673 * Set suffix value 674 */ 675 static void setsuffix(wchar_t c, unsigned char *si) 676 { 677 switch (c){ 678 case L'k': 679 case L'K': 680 *si = 1; 681 break; 682 case L'M': 683 *si = 2; 684 break; 685 case L'G': 686 *si = 3; 687 break; 688 case L'T': 689 *si = 4; 690 break; 691 case L'P': 692 *si = 5; 693 break; 694 case L'E': 695 *si = 6; 696 break; 697 case L'Z': 698 *si = 7; 699 break; 700 case L'Y': 701 *si = 8; 702 break; 703 default: 704 *si = 0; 705 } 706 } 707 708 /* 709 * Read string s and parse the string into a fixed-decimal-point number. 710 * sign equals -1 if the number is negative (explicit plus is not allowed, 711 * according to GNU sort's "info sort". 712 * The number part before decimal point is in the smain, after the decimal 713 * point is in sfrac, tail is the pointer to the remainder of the string. 714 */ 715 static int 716 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 717 { 718 bwstring_iterator s; 719 720 s = bws_begin(s0); 721 722 /* always end the fraction with zero, even if we have no fraction */ 723 sfrac[0] = 0; 724 725 while (iswblank(bws_get_iter_value(s))) 726 s = bws_iterator_inc(s, 1); 727 728 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { 729 *sign = -1; 730 s = bws_iterator_inc(s, 1); 731 } 732 733 // This is '0', not '\0', do not change this 734 while (iswdigit(bws_get_iter_value(s)) && 735 (bws_get_iter_value(s) == L'0')) 736 s = bws_iterator_inc(s, 1); 737 738 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 739 if (iswdigit(bws_get_iter_value(s))) { 740 smain[*main_len] = bws_get_iter_value(s); 741 s = bws_iterator_inc(s, 1); 742 *main_len += 1; 743 } else if (symbol_thousands_sep && 744 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) 745 s = bws_iterator_inc(s, 1); 746 else 747 break; 748 } 749 750 smain[*main_len] = 0; 751 752 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { 753 s = bws_iterator_inc(s, 1); 754 while (iswdigit(bws_get_iter_value(s)) && 755 *frac_len < MAX_NUM_SIZE) { 756 sfrac[*frac_len] = bws_get_iter_value(s); 757 s = bws_iterator_inc(s, 1); 758 *frac_len += 1; 759 } 760 sfrac[*frac_len] = 0; 761 762 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 763 --(*frac_len); 764 sfrac[*frac_len] = L'\0'; 765 } 766 } 767 768 setsuffix(bws_get_iter_value(s),si); 769 770 if ((*main_len + *frac_len) == 0) 771 *sign = 0; 772 773 return (0); 774 } 775 776 /* 777 * Implements string sort. 778 */ 779 static int 780 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 781 { 782 783 if (debug_sort) { 784 if (offset) 785 printf("; offset=%d\n", (int) offset); 786 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 787 printf("(%zu)", BWSLEN(kv1->k)); 788 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 789 printf("(%zu)", BWSLEN(kv2->k)); 790 } 791 792 return (bwscoll(kv1->k, kv2->k, offset)); 793 } 794 795 /* 796 * Compare two suffixes 797 */ 798 static inline int 799 cmpsuffix(unsigned char si1, unsigned char si2) 800 { 801 802 return ((char)si1 - (char)si2); 803 } 804 805 /* 806 * Implements numeric sort for -n and -h. 807 */ 808 static int 809 numcoll_impl(struct key_value *kv1, struct key_value *kv2, 810 size_t offset __unused, bool use_suffix) 811 { 812 struct bwstring *s1, *s2; 813 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 814 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 815 int cmp_res, sign1, sign2; 816 size_t frac1, frac2, main1, main2; 817 unsigned char SI1, SI2; 818 bool e1, e2, key1_read, key2_read; 819 820 s1 = kv1->k; 821 s2 = kv2->k; 822 sign1 = sign2 = 0; 823 main1 = main2 = 0; 824 frac1 = frac2 = 0; 825 826 key1_read = key2_read = false; 827 828 if (debug_sort) { 829 bwsprintf(stdout, s1, "; k1=<", ">"); 830 bwsprintf(stdout, s2, ", k2=<", ">"); 831 } 832 833 if (s1 == s2) 834 return (0); 835 836 if (kv1->hint->status == HS_UNINITIALIZED) { 837 /* read the number from the string */ 838 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 839 key1_read = true; 840 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 841 if(main1 < 1 && frac1 < 1) 842 kv1->hint->v.nh.empty=true; 843 kv1->hint->v.nh.si = SI1; 844 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 845 HS_INITIALIZED : HS_ERROR; 846 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 847 } 848 849 if (kv2->hint->status == HS_UNINITIALIZED) { 850 /* read the number from the string */ 851 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 852 key2_read = true; 853 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 854 if(main2 < 1 && frac2 < 1) 855 kv2->hint->v.nh.empty=true; 856 kv2->hint->v.nh.si = SI2; 857 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 858 HS_INITIALIZED : HS_ERROR; 859 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 860 } 861 862 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 863 HS_INITIALIZED) { 864 unsigned long long n1, n2; 865 bool neg1, neg2; 866 867 e1 = kv1->hint->v.nh.empty; 868 e2 = kv2->hint->v.nh.empty; 869 870 if (e1 && e2) 871 return (0); 872 873 neg1 = kv1->hint->v.nh.neg; 874 neg2 = kv2->hint->v.nh.neg; 875 876 if (neg1 && !neg2) 877 return (-1); 878 if (neg2 && !neg1) 879 return (+1); 880 881 if (e1) 882 return (neg2 ? +1 : -1); 883 else if (e2) 884 return (neg1 ? -1 : +1); 885 886 887 if (use_suffix) { 888 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 889 if (cmp_res) 890 return (neg1 ? -cmp_res : cmp_res); 891 } 892 893 n1 = kv1->hint->v.nh.n1; 894 n2 = kv2->hint->v.nh.n1; 895 if (n1 < n2) 896 return (neg1 ? +1 : -1); 897 else if (n1 > n2) 898 return (neg1 ? -1 : +1); 899 } 900 901 /* read the numbers from the strings */ 902 if (!key1_read) 903 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 904 if (!key2_read) 905 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 906 907 e1 = ((main1 + frac1) == 0); 908 e2 = ((main2 + frac2) == 0); 909 910 if (e1 && e2) 911 return (0); 912 913 /* we know the result if the signs are different */ 914 if (sign1 < 0 && sign2 >= 0) 915 return (-1); 916 if (sign1 >= 0 && sign2 < 0) 917 return (+1); 918 919 if (e1) 920 return ((sign2 < 0) ? +1 : -1); 921 else if (e2) 922 return ((sign1 < 0) ? -1 : +1); 923 924 if (use_suffix) { 925 cmp_res = cmpsuffix(SI1, SI2); 926 if (cmp_res) 927 return ((sign1 < 0) ? -cmp_res : cmp_res); 928 } 929 930 /* if both numbers are empty assume that the strings are equal */ 931 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 932 return (0); 933 934 /* 935 * if the main part is of different size, we know the result 936 * (because the leading zeros are removed) 937 */ 938 if (main1 < main2) 939 cmp_res = -1; 940 else if (main1 > main2) 941 cmp_res = +1; 942 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 943 else 944 cmp_res = wcscmp(smain1, smain2); 945 946 /* check fraction */ 947 if (!cmp_res) 948 cmp_res = wcscmp(sfrac1, sfrac2); 949 950 if (!cmp_res) 951 return (0); 952 953 /* reverse result if the signs are negative */ 954 if (sign1 < 0 && sign2 < 0) 955 cmp_res = -cmp_res; 956 957 return (cmp_res); 958 } 959 960 /* 961 * Implements numeric sort (-n). 962 */ 963 static int 964 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 965 { 966 967 return (numcoll_impl(kv1, kv2, offset, false)); 968 } 969 970 /* 971 * Implements 'human' numeric sort (-h). 972 */ 973 static int 974 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 975 { 976 977 return (numcoll_impl(kv1, kv2, offset, true)); 978 } 979 980 /* Use hint space to memoize md5 computations, at least. */ 981 static void 982 randomcoll_init_hint(struct key_value *kv, void *hash) 983 { 984 985 memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached)); 986 kv->hint->status = HS_INITIALIZED; 987 } 988 989 /* 990 * Implements random sort (-R). 991 */ 992 static int 993 randomcoll(struct key_value *kv1, struct key_value *kv2, 994 size_t offset __unused) 995 { 996 struct bwstring *s1, *s2; 997 MD5_CTX ctx1, ctx2; 998 unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH]; 999 int cmp; 1000 1001 s1 = kv1->k; 1002 s2 = kv2->k; 1003 1004 if (debug_sort) { 1005 bwsprintf(stdout, s1, "; k1=<", ">"); 1006 bwsprintf(stdout, s2, ", k2=<", ">"); 1007 } 1008 1009 if (s1 == s2) 1010 return (0); 1011 1012 if (kv1->hint->status == HS_INITIALIZED && 1013 kv2->hint->status == HS_INITIALIZED) { 1014 cmp = memcmp(kv1->hint->v.Rh.cached, 1015 kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached)); 1016 if (cmp != 0) 1017 return (cmp); 1018 } 1019 1020 memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX)); 1021 memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX)); 1022 1023 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 1024 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 1025 1026 MD5Final(hash1, &ctx1); 1027 MD5Final(hash2, &ctx2); 1028 1029 if (kv1->hint->status == HS_UNINITIALIZED) 1030 randomcoll_init_hint(kv1, hash1); 1031 if (kv2->hint->status == HS_UNINITIALIZED) 1032 randomcoll_init_hint(kv2, hash2); 1033 1034 return (memcmp(hash1, hash2, sizeof(hash1))); 1035 } 1036 1037 /* 1038 * Implements version sort (-V). 1039 */ 1040 static int 1041 versioncoll(struct key_value *kv1, struct key_value *kv2, 1042 size_t offset __unused) 1043 { 1044 struct bwstring *s1, *s2; 1045 1046 s1 = kv1->k; 1047 s2 = kv2->k; 1048 1049 if (debug_sort) { 1050 bwsprintf(stdout, s1, "; k1=<", ">"); 1051 bwsprintf(stdout, s2, ", k2=<", ">"); 1052 } 1053 1054 if (s1 == s2) 1055 return (0); 1056 1057 return (vcmp(s1, s2)); 1058 } 1059 1060 /* 1061 * Check for minus infinity 1062 */ 1063 static inline bool 1064 huge_minus(double d, int err1) 1065 { 1066 1067 if (err1 == ERANGE) 1068 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1069 return (+1); 1070 1071 return (0); 1072 } 1073 1074 /* 1075 * Check for plus infinity 1076 */ 1077 static inline bool 1078 huge_plus(double d, int err1) 1079 { 1080 1081 if (err1 == ERANGE) 1082 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1083 return (+1); 1084 1085 return (0); 1086 } 1087 1088 /* 1089 * Check whether a function is a NAN 1090 */ 1091 static bool 1092 is_nan(double d) 1093 { 1094 1095 return ((d == NAN) || (isnan(d))); 1096 } 1097 1098 /* 1099 * Compare two NANs 1100 */ 1101 static int 1102 cmp_nans(double d1, double d2) 1103 { 1104 1105 if (d1 < d2) 1106 return (-1); 1107 if (d1 > d2) 1108 return (+1); 1109 return (0); 1110 } 1111 1112 /* 1113 * Implements general numeric sort (-g). 1114 */ 1115 static int 1116 gnumcoll(struct key_value *kv1, struct key_value *kv2, 1117 size_t offset __unused) 1118 { 1119 double d1, d2; 1120 int err1, err2; 1121 bool empty1, empty2, key1_read, key2_read; 1122 1123 d1 = d2 = 0; 1124 err1 = err2 = 0; 1125 key1_read = key2_read = false; 1126 1127 if (debug_sort) { 1128 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1129 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1130 } 1131 1132 if (kv1->hint->status == HS_UNINITIALIZED) { 1133 errno = 0; 1134 d1 = bwstod(kv1->k, &empty1); 1135 err1 = errno; 1136 1137 if (empty1) 1138 kv1->hint->v.gh.notnum = true; 1139 else if (err1 == 0) { 1140 kv1->hint->v.gh.d = d1; 1141 kv1->hint->v.gh.nan = is_nan(d1); 1142 kv1->hint->status = HS_INITIALIZED; 1143 } else 1144 kv1->hint->status = HS_ERROR; 1145 1146 key1_read = true; 1147 } 1148 1149 if (kv2->hint->status == HS_UNINITIALIZED) { 1150 errno = 0; 1151 d2 = bwstod(kv2->k, &empty2); 1152 err2 = errno; 1153 1154 if (empty2) 1155 kv2->hint->v.gh.notnum = true; 1156 else if (err2 == 0) { 1157 kv2->hint->v.gh.d = d2; 1158 kv2->hint->v.gh.nan = is_nan(d2); 1159 kv2->hint->status = HS_INITIALIZED; 1160 } else 1161 kv2->hint->status = HS_ERROR; 1162 1163 key2_read = true; 1164 } 1165 1166 if (kv1->hint->status == HS_INITIALIZED && 1167 kv2->hint->status == HS_INITIALIZED) { 1168 if (kv1->hint->v.gh.notnum) 1169 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1170 else if (kv2->hint->v.gh.notnum) 1171 return (+1); 1172 1173 if (kv1->hint->v.gh.nan) 1174 return ((kv2->hint->v.gh.nan) ? 1175 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1176 -1); 1177 else if (kv2->hint->v.gh.nan) 1178 return (+1); 1179 1180 d1 = kv1->hint->v.gh.d; 1181 d2 = kv2->hint->v.gh.d; 1182 1183 if (d1 < d2) 1184 return (-1); 1185 else if (d1 > d2) 1186 return (+1); 1187 else 1188 return (0); 1189 } 1190 1191 if (!key1_read) { 1192 errno = 0; 1193 d1 = bwstod(kv1->k, &empty1); 1194 err1 = errno; 1195 } 1196 1197 if (!key2_read) { 1198 errno = 0; 1199 d2 = bwstod(kv2->k, &empty2); 1200 err2 = errno; 1201 } 1202 1203 /* Non-value case: */ 1204 if (empty1) 1205 return (empty2 ? 0 : -1); 1206 else if (empty2) 1207 return (+1); 1208 1209 /* NAN case */ 1210 if (is_nan(d1)) 1211 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1212 else if (is_nan(d2)) 1213 return (+1); 1214 1215 /* Infinities */ 1216 if (err1 == ERANGE || err2 == ERANGE) { 1217 /* Minus infinity case */ 1218 if (huge_minus(d1, err1)) { 1219 if (huge_minus(d2, err2)) { 1220 if (d1 < d2) 1221 return (-1); 1222 if (d1 > d2) 1223 return (+1); 1224 return (0); 1225 } else 1226 return (-1); 1227 1228 } else if (huge_minus(d2, err2)) { 1229 if (huge_minus(d1, err1)) { 1230 if (d1 < d2) 1231 return (-1); 1232 if (d1 > d2) 1233 return (+1); 1234 return (0); 1235 } else 1236 return (+1); 1237 } 1238 1239 /* Plus infinity case */ 1240 if (huge_plus(d1, err1)) { 1241 if (huge_plus(d2, err2)) { 1242 if (d1 < d2) 1243 return (-1); 1244 if (d1 > d2) 1245 return (+1); 1246 return (0); 1247 } else 1248 return (+1); 1249 } else if (huge_plus(d2, err2)) { 1250 if (huge_plus(d1, err1)) { 1251 if (d1 < d2) 1252 return (-1); 1253 if (d1 > d2) 1254 return (+1); 1255 return (0); 1256 } else 1257 return (-1); 1258 } 1259 } 1260 1261 if (d1 < d2) 1262 return (-1); 1263 if (d1 > d2) 1264 return (+1); 1265 1266 return (0); 1267 } 1268 1269 /* 1270 * Implements month sort (-M). 1271 */ 1272 static int 1273 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1274 { 1275 int val1, val2; 1276 bool key1_read, key2_read; 1277 1278 val1 = val2 = 0; 1279 key1_read = key2_read = false; 1280 1281 if (debug_sort) { 1282 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1283 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1284 } 1285 1286 if (kv1->hint->status == HS_UNINITIALIZED) { 1287 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1288 key1_read = true; 1289 kv1->hint->status = HS_INITIALIZED; 1290 } 1291 1292 if (kv2->hint->status == HS_UNINITIALIZED) { 1293 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1294 key2_read = true; 1295 kv2->hint->status = HS_INITIALIZED; 1296 } 1297 1298 if (kv1->hint->status == HS_INITIALIZED) { 1299 val1 = kv1->hint->v.Mh.m; 1300 key1_read = true; 1301 } 1302 1303 if (kv2->hint->status == HS_INITIALIZED) { 1304 val2 = kv2->hint->v.Mh.m; 1305 key2_read = true; 1306 } 1307 1308 if (!key1_read) 1309 val1 = bws_month_score(kv1->k); 1310 if (!key2_read) 1311 val2 = bws_month_score(kv2->k); 1312 1313 if (val1 == val2) { 1314 return (0); 1315 } 1316 if (val1 < val2) 1317 return (-1); 1318 return (+1); 1319 } 1320