1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/types.h> 34 35 #include <errno.h> 36 #include <err.h> 37 #include <langinfo.h> 38 #include <limits.h> 39 #include <math.h> 40 #include <md5.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <wchar.h> 44 #include <wctype.h> 45 46 #include "coll.h" 47 #include "vsort.h" 48 49 struct key_specs *keys; 50 size_t keys_num = 0; 51 52 wint_t symbol_decimal_point = L'.'; 53 /* there is no default thousands separator in collate rules: */ 54 wint_t symbol_thousands_sep = 0; 55 wint_t symbol_negative_sign = L'-'; 56 wint_t symbol_positive_sign = L'+'; 57 58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 60 static int monthcoll(struct key_value*, struct key_value *, size_t offset); 61 static int numcoll(struct key_value*, struct key_value *, size_t offset); 62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 63 static int randomcoll(struct key_value*, struct key_value *, size_t offset); 64 static int versioncoll(struct key_value*, struct key_value *, size_t offset); 65 66 /* 67 * Allocate keys array 68 */ 69 struct keys_array * 70 keys_array_alloc(void) 71 { 72 struct keys_array *ka; 73 size_t sz; 74 75 sz = keys_array_size(); 76 ka = sort_calloc(1, sz); 77 78 return (ka); 79 } 80 81 /* 82 * Calculate whether we need key hint space 83 */ 84 static size_t 85 key_hint_size(void) 86 { 87 88 return (need_hint ? sizeof(struct key_hint) : 0); 89 } 90 91 /* 92 * Calculate keys array size 93 */ 94 size_t 95 keys_array_size(void) 96 { 97 98 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 99 } 100 101 /* 102 * Clean data of keys array 103 */ 104 void 105 clean_keys_array(const struct bwstring *s, struct keys_array *ka) 106 { 107 108 if (ka) { 109 for (size_t i = 0; i < keys_num; ++i) { 110 const struct key_value *kv; 111 112 kv = get_key_from_keys_array(ka, i); 113 if (kv->k && kv->k != s) 114 bwsfree(kv->k); 115 } 116 memset(ka, 0, keys_array_size()); 117 } 118 } 119 120 /* 121 * Get pointer to a key value in the keys set 122 */ 123 struct key_value * 124 get_key_from_keys_array(struct keys_array *ka, size_t ind) 125 { 126 127 return ((struct key_value *)((caddr_t)ka->key + 128 ind * (sizeof(struct key_value) + key_hint_size()))); 129 } 130 131 /* 132 * Set value of a key in the keys set 133 */ 134 void 135 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 136 { 137 138 if (ka && keys_num > ind) { 139 struct key_value *kv; 140 141 kv = get_key_from_keys_array(ka, ind); 142 143 if (kv->k && kv->k != s) 144 bwsfree(kv->k); 145 kv->k = s; 146 } 147 } 148 149 /* 150 * Initialize a sort list item 151 */ 152 struct sort_list_item * 153 sort_list_item_alloc(void) 154 { 155 struct sort_list_item *si; 156 size_t sz; 157 158 sz = sizeof(struct sort_list_item) + keys_array_size(); 159 si = sort_calloc(1, sz); 160 161 return (si); 162 } 163 164 size_t 165 sort_list_item_size(struct sort_list_item *si) 166 { 167 size_t ret = 0; 168 169 if (si) { 170 ret = sizeof(struct sort_list_item) + keys_array_size(); 171 if (si->str) 172 ret += bws_memsize(si->str); 173 for (size_t i = 0; i < keys_num; ++i) { 174 const struct key_value *kv; 175 176 kv = get_key_from_keys_array(&si->ka, i); 177 178 if (kv->k != si->str) 179 ret += bws_memsize(kv->k); 180 } 181 } 182 return (ret); 183 } 184 185 /* 186 * Calculate key for a sort list item 187 */ 188 static void 189 sort_list_item_make_key(struct sort_list_item *si) 190 { 191 192 preproc(si->str, &(si->ka)); 193 } 194 195 /* 196 * Set value of a sort list item. 197 * Return combined string and keys memory size. 198 */ 199 void 200 sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 201 { 202 203 if (si) { 204 clean_keys_array(si->str, &(si->ka)); 205 if (si->str) { 206 if (si->str == str) { 207 /* we are trying to reset the same string */ 208 return; 209 } else { 210 bwsfree(si->str); 211 si->str = NULL; 212 } 213 } 214 si->str = str; 215 sort_list_item_make_key(si); 216 } 217 } 218 219 /* 220 * De-allocate a sort list item object memory 221 */ 222 void 223 sort_list_item_clean(struct sort_list_item *si) 224 { 225 226 if (si) { 227 clean_keys_array(si->str, &(si->ka)); 228 if (si->str) { 229 bwsfree(si->str); 230 si->str = NULL; 231 } 232 } 233 } 234 235 /* 236 * Skip columns according to specs 237 */ 238 static size_t 239 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 240 bool skip_blanks, bool *empty_key) 241 { 242 if (cols < 1) 243 return (BWSLEN(s) + 1); 244 245 if (skip_blanks) 246 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 247 ++start; 248 249 while (start < BWSLEN(s) && cols > 1) { 250 --cols; 251 ++start; 252 } 253 254 if (start >= BWSLEN(s)) 255 *empty_key = true; 256 257 return (start); 258 } 259 260 /* 261 * Skip fields according to specs 262 */ 263 static size_t 264 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 265 { 266 267 if (fields < 2) { 268 if (BWSLEN(s) == 0) 269 *empty_field = true; 270 return (0); 271 } else if (!(sort_opts_vals.tflag)) { 272 size_t cpos = 0; 273 bool pb = true; 274 275 while (cpos < BWSLEN(s)) { 276 bool isblank; 277 278 isblank = iswblank(BWS_GET(s, cpos)); 279 280 if (isblank && !pb) { 281 --fields; 282 if (fields <= 1) 283 return (cpos); 284 } 285 pb = isblank; 286 ++cpos; 287 } 288 if (fields > 1) 289 *empty_field = true; 290 return (cpos); 291 } else { 292 size_t cpos = 0; 293 294 while (cpos < BWSLEN(s)) { 295 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { 296 --fields; 297 if (fields <= 1) 298 return (cpos + 1); 299 } 300 ++cpos; 301 } 302 if (fields > 1) 303 *empty_field = true; 304 return (cpos); 305 } 306 } 307 308 /* 309 * Find fields start 310 */ 311 static void 312 find_field_start(const struct bwstring *s, struct key_specs *ks, 313 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 314 { 315 316 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 317 if (!*empty_field) 318 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 319 ks->pos1b, empty_key); 320 else 321 *empty_key = true; 322 } 323 324 /* 325 * Find end key position 326 */ 327 static size_t 328 find_field_end(const struct bwstring *s, struct key_specs *ks) 329 { 330 size_t f2, next_field_start, pos_end; 331 bool empty_field, empty_key; 332 333 empty_field = false; 334 empty_key = false; 335 f2 = ks->f2; 336 337 if (f2 == 0) 338 return (BWSLEN(s) + 1); 339 else { 340 if (ks->c2 == 0) { 341 next_field_start = skip_fields_to_start(s, f2 + 1, 342 &empty_field); 343 if ((next_field_start > 0) && sort_opts_vals.tflag && 344 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 345 next_field_start - 1))) 346 --next_field_start; 347 } else 348 next_field_start = skip_fields_to_start(s, f2, 349 &empty_field); 350 } 351 352 if (empty_field || (next_field_start >= BWSLEN(s))) 353 return (BWSLEN(s) + 1); 354 355 if (ks->c2) { 356 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 357 ks->pos2b, &empty_key); 358 if (pos_end < BWSLEN(s)) 359 ++pos_end; 360 } else 361 pos_end = next_field_start; 362 363 return (pos_end); 364 } 365 366 /* 367 * Cut a field according to the key specs 368 */ 369 static struct bwstring * 370 cut_field(const struct bwstring *s, struct key_specs *ks) 371 { 372 struct bwstring *ret = NULL; 373 374 if (s && ks) { 375 size_t field_start, key_end, key_start, sz; 376 bool empty_field, empty_key; 377 378 field_start = 0; 379 key_start = 0; 380 empty_field = false; 381 empty_key = false; 382 383 find_field_start(s, ks, &field_start, &key_start, 384 &empty_field, &empty_key); 385 386 if (empty_key) 387 sz = 0; 388 else { 389 key_end = find_field_end(s, ks); 390 sz = (key_end < key_start) ? 0 : (key_end - key_start); 391 } 392 393 ret = bwsalloc(sz); 394 if (sz) 395 bwsnocpy(ret, s, key_start, sz); 396 } else 397 ret = bwsalloc(0); 398 399 return (ret); 400 } 401 402 /* 403 * Preprocesses a line applying the necessary transformations 404 * specified by command line options and returns the preprocessed 405 * string, which can be used to compare. 406 */ 407 int 408 preproc(struct bwstring *s, struct keys_array *ka) 409 { 410 411 if (sort_opts_vals.kflag) 412 for (size_t i = 0; i < keys_num; i++) { 413 struct bwstring *key; 414 struct key_specs *kspecs; 415 struct sort_mods *sm; 416 417 kspecs = &(keys[i]); 418 key = cut_field(s, kspecs); 419 420 sm = &(kspecs->sm); 421 if (sm->dflag) 422 key = dictionary_order(key); 423 else if (sm->iflag) 424 key = ignore_nonprinting(key); 425 if (sm->fflag || sm->Mflag) 426 key = ignore_case(key); 427 428 set_key_on_keys_array(ka, key, i); 429 } 430 else { 431 struct bwstring *ret = NULL; 432 struct sort_mods *sm = default_sort_mods; 433 434 if (sm->bflag) { 435 if (ret == NULL) 436 ret = bwsdup(s); 437 ret = ignore_leading_blanks(ret); 438 } 439 if (sm->dflag) { 440 if (ret == NULL) 441 ret = bwsdup(s); 442 ret = dictionary_order(ret); 443 } else if (sm->iflag) { 444 if (ret == NULL) 445 ret = bwsdup(s); 446 ret = ignore_nonprinting(ret); 447 } 448 if (sm->fflag || sm->Mflag) { 449 if (ret == NULL) 450 ret = bwsdup(s); 451 ret = ignore_case(ret); 452 } 453 if (ret == NULL) 454 set_key_on_keys_array(ka, s, 0); 455 else 456 set_key_on_keys_array(ka, ret, 0); 457 } 458 459 return 0; 460 } 461 462 cmpcoll_t 463 get_sort_func(struct sort_mods *sm) 464 { 465 466 if (sm->nflag) 467 return (numcoll); 468 else if (sm->hflag) 469 return (hnumcoll); 470 else if (sm->gflag) 471 return (gnumcoll); 472 else if (sm->Mflag) 473 return (monthcoll); 474 else if (sm->Rflag) 475 return (randomcoll); 476 else if (sm->Vflag) 477 return (versioncoll); 478 else 479 return (wstrcoll); 480 } 481 482 /* 483 * Compares the given strings. Returns a positive number if 484 * the first precedes the second, a negative number if the second is 485 * the preceding one, and zero if they are equal. This function calls 486 * the underlying collate functions, which done the actual comparison. 487 */ 488 int 489 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 490 { 491 struct key_value *kv1, *kv2; 492 struct sort_mods *sm; 493 int res = 0; 494 495 for (size_t i = 0; i < keys_num; ++i) { 496 kv1 = get_key_from_keys_array(ps1, i); 497 kv2 = get_key_from_keys_array(ps2, i); 498 sm = &(keys[i].sm); 499 500 if (sm->rflag) 501 res = sm->func(kv2, kv1, offset); 502 else 503 res = sm->func(kv1, kv2, offset); 504 505 if (res) 506 break; 507 508 /* offset applies to only the first key */ 509 offset = 0; 510 } 511 return (res); 512 } 513 514 /* 515 * Compare two strings. 516 * Plain symbol-by-symbol comparison. 517 */ 518 int 519 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 520 { 521 522 if (default_sort_mods->rflag) { 523 const struct bwstring *tmp; 524 525 tmp = s1; 526 s1 = s2; 527 s2 = tmp; 528 } 529 530 return (bwscoll(s1, s2, 0)); 531 } 532 533 /* 534 * Compare a string and a sort list item, according to the sort specs. 535 */ 536 int 537 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 538 { 539 struct keys_array *ka1; 540 int ret = 0; 541 542 ka1 = keys_array_alloc(); 543 544 preproc(str1, ka1); 545 546 sort_list_item_make_key(*ss2); 547 548 if (debug_sort) { 549 bwsprintf(stdout, str1, "; s1=<", ">"); 550 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 551 } 552 553 ret = key_coll(ka1, &((*ss2)->ka), 0); 554 555 if (debug_sort) 556 printf("; cmp1=%d", ret); 557 558 clean_keys_array(str1, ka1); 559 sort_free(ka1); 560 561 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 562 ret = top_level_str_coll(str1, ((*ss2)->str)); 563 if (debug_sort) 564 printf("; cmp2=%d", ret); 565 } 566 567 if (debug_sort) 568 printf("\n"); 569 570 return (ret); 571 } 572 573 /* 574 * Compare two sort list items, according to the sort specs. 575 */ 576 int 577 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 578 size_t offset) 579 { 580 int ret; 581 582 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 583 584 if (debug_sort) { 585 if (offset) 586 printf("; offset=%d", (int) offset); 587 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 588 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 589 printf("; cmp1=%d\n", ret); 590 } 591 592 if (ret) 593 return (ret); 594 595 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 596 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 597 if (debug_sort) 598 printf("; cmp2=%d\n", ret); 599 } 600 601 return (ret); 602 } 603 604 /* 605 * Compare two sort list items, according to the sort specs. 606 */ 607 int 608 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 609 { 610 611 return (list_coll_offset(ss1, ss2, 0)); 612 } 613 614 #define LSCDEF(N) \ 615 static int \ 616 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 617 { \ 618 \ 619 return (list_coll_offset(ss1, ss2, N)); \ 620 } 621 622 LSCDEF(1) 623 LSCDEF(2) 624 LSCDEF(3) 625 LSCDEF(4) 626 LSCDEF(5) 627 LSCDEF(6) 628 LSCDEF(7) 629 LSCDEF(8) 630 LSCDEF(9) 631 LSCDEF(10) 632 LSCDEF(11) 633 LSCDEF(12) 634 LSCDEF(13) 635 LSCDEF(14) 636 LSCDEF(15) 637 LSCDEF(16) 638 LSCDEF(17) 639 LSCDEF(18) 640 LSCDEF(19) 641 LSCDEF(20) 642 643 listcoll_t 644 get_list_call_func(size_t offset) 645 { 646 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 647 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 648 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 649 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 650 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 651 list_coll_18, list_coll_19, list_coll_20 }; 652 653 if (offset <= 20) 654 return (lsarray[offset]); 655 656 return (list_coll); 657 } 658 659 /* 660 * Compare two sort list items, only by their original string. 661 */ 662 int 663 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 664 { 665 666 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 667 } 668 669 /* 670 * Maximum size of a number in the string (before or after decimal point) 671 */ 672 #define MAX_NUM_SIZE (128) 673 674 /* 675 * Set suffix value 676 */ 677 static void setsuffix(wchar_t c, unsigned char *si) 678 { 679 switch (c){ 680 case L'k': 681 case L'K': 682 *si = 1; 683 break; 684 case L'M': 685 *si = 2; 686 break; 687 case L'G': 688 *si = 3; 689 break; 690 case L'T': 691 *si = 4; 692 break; 693 case L'P': 694 *si = 5; 695 break; 696 case L'E': 697 *si = 6; 698 break; 699 case L'Z': 700 *si = 7; 701 break; 702 case L'Y': 703 *si = 8; 704 break; 705 default: 706 *si = 0; 707 } 708 } 709 710 /* 711 * Read string s and parse the string into a fixed-decimal-point number. 712 * sign equals -1 if the number is negative (explicit plus is not allowed, 713 * according to GNU sort's "info sort". 714 * The number part before decimal point is in the smain, after the decimal 715 * point is in sfrac, tail is the pointer to the remainder of the string. 716 */ 717 static int 718 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 719 { 720 bwstring_iterator s; 721 722 s = bws_begin(s0); 723 724 /* always end the fraction with zero, even if we have no fraction */ 725 sfrac[0] = 0; 726 727 while (iswblank(bws_get_iter_value(s))) 728 s = bws_iterator_inc(s, 1); 729 730 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { 731 *sign = -1; 732 s = bws_iterator_inc(s, 1); 733 } 734 735 // This is '0', not '\0', do not change this 736 while (iswdigit(bws_get_iter_value(s)) && 737 (bws_get_iter_value(s) == L'0')) 738 s = bws_iterator_inc(s, 1); 739 740 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 741 if (iswdigit(bws_get_iter_value(s))) { 742 smain[*main_len] = bws_get_iter_value(s); 743 s = bws_iterator_inc(s, 1); 744 *main_len += 1; 745 } else if (symbol_thousands_sep && 746 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) 747 s = bws_iterator_inc(s, 1); 748 else 749 break; 750 } 751 752 smain[*main_len] = 0; 753 754 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { 755 s = bws_iterator_inc(s, 1); 756 while (iswdigit(bws_get_iter_value(s)) && 757 *frac_len < MAX_NUM_SIZE) { 758 sfrac[*frac_len] = bws_get_iter_value(s); 759 s = bws_iterator_inc(s, 1); 760 *frac_len += 1; 761 } 762 sfrac[*frac_len] = 0; 763 764 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 765 --(*frac_len); 766 sfrac[*frac_len] = L'\0'; 767 } 768 } 769 770 setsuffix(bws_get_iter_value(s),si); 771 772 if ((*main_len + *frac_len) == 0) 773 *sign = 0; 774 775 return (0); 776 } 777 778 /* 779 * Implements string sort. 780 */ 781 static int 782 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 783 { 784 785 if (debug_sort) { 786 if (offset) 787 printf("; offset=%d\n", (int) offset); 788 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 789 printf("(%zu)", BWSLEN(kv1->k)); 790 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 791 printf("(%zu)", BWSLEN(kv2->k)); 792 } 793 794 return (bwscoll(kv1->k, kv2->k, offset)); 795 } 796 797 /* 798 * Compare two suffixes 799 */ 800 static inline int 801 cmpsuffix(unsigned char si1, unsigned char si2) 802 { 803 804 return ((char)si1 - (char)si2); 805 } 806 807 /* 808 * Implements numeric sort for -n and -h. 809 */ 810 static int 811 numcoll_impl(struct key_value *kv1, struct key_value *kv2, 812 size_t offset __unused, bool use_suffix) 813 { 814 struct bwstring *s1, *s2; 815 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 816 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 817 int cmp_res, sign1, sign2; 818 size_t frac1, frac2, main1, main2; 819 unsigned char SI1, SI2; 820 bool e1, e2, key1_read, key2_read; 821 822 s1 = kv1->k; 823 s2 = kv2->k; 824 sign1 = sign2 = 0; 825 main1 = main2 = 0; 826 frac1 = frac2 = 0; 827 828 key1_read = key2_read = false; 829 830 if (debug_sort) { 831 bwsprintf(stdout, s1, "; k1=<", ">"); 832 bwsprintf(stdout, s2, ", k2=<", ">"); 833 } 834 835 if (s1 == s2) 836 return (0); 837 838 if (kv1->hint->status == HS_UNINITIALIZED) { 839 /* read the number from the string */ 840 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 841 key1_read = true; 842 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 843 if(main1 < 1 && frac1 < 1) 844 kv1->hint->v.nh.empty=true; 845 kv1->hint->v.nh.si = SI1; 846 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 847 HS_INITIALIZED : HS_ERROR; 848 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 849 } 850 851 if (kv2->hint->status == HS_UNINITIALIZED) { 852 /* read the number from the string */ 853 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 854 key2_read = true; 855 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 856 if(main2 < 1 && frac2 < 1) 857 kv2->hint->v.nh.empty=true; 858 kv2->hint->v.nh.si = SI2; 859 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 860 HS_INITIALIZED : HS_ERROR; 861 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 862 } 863 864 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 865 HS_INITIALIZED) { 866 unsigned long long n1, n2; 867 bool neg1, neg2; 868 869 e1 = kv1->hint->v.nh.empty; 870 e2 = kv2->hint->v.nh.empty; 871 872 if (e1 && e2) 873 return (0); 874 875 neg1 = kv1->hint->v.nh.neg; 876 neg2 = kv2->hint->v.nh.neg; 877 878 if (neg1 && !neg2) 879 return (-1); 880 if (neg2 && !neg1) 881 return (+1); 882 883 if (e1) 884 return (neg2 ? +1 : -1); 885 else if (e2) 886 return (neg1 ? -1 : +1); 887 888 889 if (use_suffix) { 890 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 891 if (cmp_res) 892 return (neg1 ? -cmp_res : cmp_res); 893 } 894 895 n1 = kv1->hint->v.nh.n1; 896 n2 = kv2->hint->v.nh.n1; 897 if (n1 < n2) 898 return (neg1 ? +1 : -1); 899 else if (n1 > n2) 900 return (neg1 ? -1 : +1); 901 } 902 903 /* read the numbers from the strings */ 904 if (!key1_read) 905 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 906 if (!key2_read) 907 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 908 909 e1 = ((main1 + frac1) == 0); 910 e2 = ((main2 + frac2) == 0); 911 912 if (e1 && e2) 913 return (0); 914 915 /* we know the result if the signs are different */ 916 if (sign1 < 0 && sign2 >= 0) 917 return (-1); 918 if (sign1 >= 0 && sign2 < 0) 919 return (+1); 920 921 if (e1) 922 return ((sign2 < 0) ? +1 : -1); 923 else if (e2) 924 return ((sign1 < 0) ? -1 : +1); 925 926 if (use_suffix) { 927 cmp_res = cmpsuffix(SI1, SI2); 928 if (cmp_res) 929 return ((sign1 < 0) ? -cmp_res : cmp_res); 930 } 931 932 /* if both numbers are empty assume that the strings are equal */ 933 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 934 return (0); 935 936 /* 937 * if the main part is of different size, we know the result 938 * (because the leading zeros are removed) 939 */ 940 if (main1 < main2) 941 cmp_res = -1; 942 else if (main1 > main2) 943 cmp_res = +1; 944 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 945 else 946 cmp_res = wcscmp(smain1, smain2); 947 948 /* check fraction */ 949 if (!cmp_res) 950 cmp_res = wcscmp(sfrac1, sfrac2); 951 952 if (!cmp_res) 953 return (0); 954 955 /* reverse result if the signs are negative */ 956 if (sign1 < 0 && sign2 < 0) 957 cmp_res = -cmp_res; 958 959 return (cmp_res); 960 } 961 962 /* 963 * Implements numeric sort (-n). 964 */ 965 static int 966 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 967 { 968 969 return (numcoll_impl(kv1, kv2, offset, false)); 970 } 971 972 /* 973 * Implements 'human' numeric sort (-h). 974 */ 975 static int 976 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 977 { 978 979 return (numcoll_impl(kv1, kv2, offset, true)); 980 } 981 982 /* Use hint space to memoize md5 computations, at least. */ 983 static void 984 randomcoll_init_hint(struct key_value *kv, void *hash) 985 { 986 987 memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached)); 988 kv->hint->status = HS_INITIALIZED; 989 } 990 991 /* 992 * Implements random sort (-R). 993 */ 994 static int 995 randomcoll(struct key_value *kv1, struct key_value *kv2, 996 size_t offset __unused) 997 { 998 struct bwstring *s1, *s2; 999 MD5_CTX ctx1, ctx2; 1000 unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH]; 1001 int cmp; 1002 1003 s1 = kv1->k; 1004 s2 = kv2->k; 1005 1006 if (debug_sort) { 1007 bwsprintf(stdout, s1, "; k1=<", ">"); 1008 bwsprintf(stdout, s2, ", k2=<", ">"); 1009 } 1010 1011 if (s1 == s2) 1012 return (0); 1013 1014 if (kv1->hint->status == HS_INITIALIZED && 1015 kv2->hint->status == HS_INITIALIZED) { 1016 cmp = memcmp(kv1->hint->v.Rh.cached, 1017 kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached)); 1018 if (cmp != 0) 1019 return (cmp); 1020 } 1021 1022 memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX)); 1023 memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX)); 1024 1025 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 1026 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 1027 1028 MD5Final(hash1, &ctx1); 1029 MD5Final(hash2, &ctx2); 1030 1031 if (kv1->hint->status == HS_UNINITIALIZED) 1032 randomcoll_init_hint(kv1, hash1); 1033 if (kv2->hint->status == HS_UNINITIALIZED) 1034 randomcoll_init_hint(kv2, hash2); 1035 1036 return (memcmp(hash1, hash2, sizeof(hash1))); 1037 } 1038 1039 /* 1040 * Implements version sort (-V). 1041 */ 1042 static int 1043 versioncoll(struct key_value *kv1, struct key_value *kv2, 1044 size_t offset __unused) 1045 { 1046 struct bwstring *s1, *s2; 1047 1048 s1 = kv1->k; 1049 s2 = kv2->k; 1050 1051 if (debug_sort) { 1052 bwsprintf(stdout, s1, "; k1=<", ">"); 1053 bwsprintf(stdout, s2, ", k2=<", ">"); 1054 } 1055 1056 if (s1 == s2) 1057 return (0); 1058 1059 return (vcmp(s1, s2)); 1060 } 1061 1062 /* 1063 * Check for minus infinity 1064 */ 1065 static inline bool 1066 huge_minus(double d, int err1) 1067 { 1068 1069 if (err1 == ERANGE) 1070 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1071 return (+1); 1072 1073 return (0); 1074 } 1075 1076 /* 1077 * Check for plus infinity 1078 */ 1079 static inline bool 1080 huge_plus(double d, int err1) 1081 { 1082 1083 if (err1 == ERANGE) 1084 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1085 return (+1); 1086 1087 return (0); 1088 } 1089 1090 /* 1091 * Check whether a function is a NAN 1092 */ 1093 static bool 1094 is_nan(double d) 1095 { 1096 1097 return ((d == NAN) || (isnan(d))); 1098 } 1099 1100 /* 1101 * Compare two NANs 1102 */ 1103 static int 1104 cmp_nans(double d1, double d2) 1105 { 1106 1107 if (d1 < d2) 1108 return (-1); 1109 if (d1 > d2) 1110 return (+1); 1111 return (0); 1112 } 1113 1114 /* 1115 * Implements general numeric sort (-g). 1116 */ 1117 static int 1118 gnumcoll(struct key_value *kv1, struct key_value *kv2, 1119 size_t offset __unused) 1120 { 1121 double d1, d2; 1122 int err1, err2; 1123 bool empty1, empty2, key1_read, key2_read; 1124 1125 d1 = d2 = 0; 1126 err1 = err2 = 0; 1127 key1_read = key2_read = false; 1128 1129 if (debug_sort) { 1130 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1131 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1132 } 1133 1134 if (kv1->hint->status == HS_UNINITIALIZED) { 1135 errno = 0; 1136 d1 = bwstod(kv1->k, &empty1); 1137 err1 = errno; 1138 1139 if (empty1) 1140 kv1->hint->v.gh.notnum = true; 1141 else if (err1 == 0) { 1142 kv1->hint->v.gh.d = d1; 1143 kv1->hint->v.gh.nan = is_nan(d1); 1144 kv1->hint->status = HS_INITIALIZED; 1145 } else 1146 kv1->hint->status = HS_ERROR; 1147 1148 key1_read = true; 1149 } 1150 1151 if (kv2->hint->status == HS_UNINITIALIZED) { 1152 errno = 0; 1153 d2 = bwstod(kv2->k, &empty2); 1154 err2 = errno; 1155 1156 if (empty2) 1157 kv2->hint->v.gh.notnum = true; 1158 else if (err2 == 0) { 1159 kv2->hint->v.gh.d = d2; 1160 kv2->hint->v.gh.nan = is_nan(d2); 1161 kv2->hint->status = HS_INITIALIZED; 1162 } else 1163 kv2->hint->status = HS_ERROR; 1164 1165 key2_read = true; 1166 } 1167 1168 if (kv1->hint->status == HS_INITIALIZED && 1169 kv2->hint->status == HS_INITIALIZED) { 1170 if (kv1->hint->v.gh.notnum) 1171 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1172 else if (kv2->hint->v.gh.notnum) 1173 return (+1); 1174 1175 if (kv1->hint->v.gh.nan) 1176 return ((kv2->hint->v.gh.nan) ? 1177 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1178 -1); 1179 else if (kv2->hint->v.gh.nan) 1180 return (+1); 1181 1182 d1 = kv1->hint->v.gh.d; 1183 d2 = kv2->hint->v.gh.d; 1184 1185 if (d1 < d2) 1186 return (-1); 1187 else if (d1 > d2) 1188 return (+1); 1189 else 1190 return (0); 1191 } 1192 1193 if (!key1_read) { 1194 errno = 0; 1195 d1 = bwstod(kv1->k, &empty1); 1196 err1 = errno; 1197 } 1198 1199 if (!key2_read) { 1200 errno = 0; 1201 d2 = bwstod(kv2->k, &empty2); 1202 err2 = errno; 1203 } 1204 1205 /* Non-value case: */ 1206 if (empty1) 1207 return (empty2 ? 0 : -1); 1208 else if (empty2) 1209 return (+1); 1210 1211 /* NAN case */ 1212 if (is_nan(d1)) 1213 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1214 else if (is_nan(d2)) 1215 return (+1); 1216 1217 /* Infinities */ 1218 if (err1 == ERANGE || err2 == ERANGE) { 1219 /* Minus infinity case */ 1220 if (huge_minus(d1, err1)) { 1221 if (huge_minus(d2, err2)) { 1222 if (d1 < d2) 1223 return (-1); 1224 if (d1 > d2) 1225 return (+1); 1226 return (0); 1227 } else 1228 return (-1); 1229 1230 } else if (huge_minus(d2, err2)) { 1231 if (huge_minus(d1, err1)) { 1232 if (d1 < d2) 1233 return (-1); 1234 if (d1 > d2) 1235 return (+1); 1236 return (0); 1237 } else 1238 return (+1); 1239 } 1240 1241 /* Plus infinity case */ 1242 if (huge_plus(d1, err1)) { 1243 if (huge_plus(d2, err2)) { 1244 if (d1 < d2) 1245 return (-1); 1246 if (d1 > d2) 1247 return (+1); 1248 return (0); 1249 } else 1250 return (+1); 1251 } else if (huge_plus(d2, err2)) { 1252 if (huge_plus(d1, err1)) { 1253 if (d1 < d2) 1254 return (-1); 1255 if (d1 > d2) 1256 return (+1); 1257 return (0); 1258 } else 1259 return (-1); 1260 } 1261 } 1262 1263 if (d1 < d2) 1264 return (-1); 1265 if (d1 > d2) 1266 return (+1); 1267 1268 return (0); 1269 } 1270 1271 /* 1272 * Implements month sort (-M). 1273 */ 1274 static int 1275 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1276 { 1277 int val1, val2; 1278 bool key1_read, key2_read; 1279 1280 val1 = val2 = 0; 1281 key1_read = key2_read = false; 1282 1283 if (debug_sort) { 1284 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1285 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1286 } 1287 1288 if (kv1->hint->status == HS_UNINITIALIZED) { 1289 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1290 key1_read = true; 1291 kv1->hint->status = HS_INITIALIZED; 1292 } 1293 1294 if (kv2->hint->status == HS_UNINITIALIZED) { 1295 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1296 key2_read = true; 1297 kv2->hint->status = HS_INITIALIZED; 1298 } 1299 1300 if (kv1->hint->status == HS_INITIALIZED) { 1301 val1 = kv1->hint->v.Mh.m; 1302 key1_read = true; 1303 } 1304 1305 if (kv2->hint->status == HS_INITIALIZED) { 1306 val2 = kv2->hint->v.Mh.m; 1307 key2_read = true; 1308 } 1309 1310 if (!key1_read) 1311 val1 = bws_month_score(kv1->k); 1312 if (!key2_read) 1313 val2 = bws_month_score(kv2->k); 1314 1315 if (val1 == val2) { 1316 return (0); 1317 } 1318 if (val1 < val2) 1319 return (-1); 1320 return (+1); 1321 } 1322