1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <ctype.h> 34 #include <errno.h> 35 #include <err.h> 36 #include <langinfo.h> 37 #include <math.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <wchar.h> 41 #include <wctype.h> 42 43 #include "bwstring.h" 44 #include "sort.h" 45 46 bool byte_sort; 47 48 static wchar_t **wmonths; 49 static char **cmonths; 50 51 /* initialise months */ 52 53 void 54 initialise_months(void) 55 { 56 const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, 57 ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10, 58 ABMON_11, ABMON_12 }; 59 char *tmp; 60 size_t len; 61 62 if (MB_CUR_MAX == 1) { 63 if (cmonths == NULL) { 64 char *m; 65 66 cmonths = sort_malloc(sizeof(char*) * 12); 67 for (int i = 0; i < 12; i++) { 68 cmonths[i] = NULL; 69 tmp = nl_langinfo(item[i]); 70 if (debug_sort) 71 printf("month[%d]=%s\n", i, tmp); 72 if (*tmp == '\0') 73 continue; 74 m = sort_strdup(tmp); 75 len = strlen(tmp); 76 for (unsigned int j = 0; j < len; j++) 77 m[j] = toupper(m[j]); 78 cmonths[i] = m; 79 } 80 } 81 82 } else { 83 if (wmonths == NULL) { 84 wchar_t *m; 85 86 wmonths = sort_malloc(sizeof(wchar_t *) * 12); 87 for (int i = 0; i < 12; i++) { 88 wmonths[i] = NULL; 89 tmp = nl_langinfo(item[i]); 90 if (debug_sort) 91 printf("month[%d]=%s\n", i, tmp); 92 if (*tmp == '\0') 93 continue; 94 len = strlen(tmp); 95 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1)); 96 if (mbstowcs(m, tmp, len) == 97 ((size_t) - 1)) { 98 sort_free(m); 99 continue; 100 } 101 m[len] = L'\0'; 102 for (unsigned int j = 0; j < len; j++) 103 m[j] = towupper(m[j]); 104 wmonths[i] = m; 105 } 106 } 107 } 108 } 109 110 /* 111 * Compare two wide-character strings 112 */ 113 static int 114 wide_str_coll(const wchar_t *s1, const wchar_t *s2) 115 { 116 int ret = 0; 117 118 errno = 0; 119 ret = wcscoll(s1, s2); 120 if (errno == EILSEQ) { 121 errno = 0; 122 ret = wcscmp(s1, s2); 123 if (errno != 0) { 124 for (size_t i = 0; ; ++i) { 125 wchar_t c1 = s1[i]; 126 wchar_t c2 = s2[i]; 127 if (c1 == L'\0') 128 return ((c2 == L'\0') ? 0 : -1); 129 if (c2 == L'\0') 130 return (+1); 131 if (c1 == c2) 132 continue; 133 return ((int)(c1 - c2)); 134 } 135 } 136 } 137 return (ret); 138 } 139 140 /* counterparts of wcs functions */ 141 142 void 143 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) 144 { 145 146 if (MB_CUR_MAX == 1) 147 fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix); 148 else 149 fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix); 150 } 151 152 const void* bwsrawdata(const struct bwstring *bws) 153 { 154 155 return (&(bws->data)); 156 } 157 158 size_t bwsrawlen(const struct bwstring *bws) 159 { 160 161 return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len)); 162 } 163 164 size_t 165 bws_memsize(const struct bwstring *bws) 166 { 167 168 return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) : 169 (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring))); 170 } 171 172 void 173 bws_setlen(struct bwstring *bws, size_t newlen) 174 { 175 176 if (bws && newlen != bws->len && newlen <= bws->len) { 177 bws->len = newlen; 178 if (MB_CUR_MAX == 1) 179 bws->data.cstr[newlen] = '\0'; 180 else 181 bws->data.wstr[newlen] = L'\0'; 182 } 183 } 184 185 /* 186 * Allocate a new binary string of specified size 187 */ 188 struct bwstring * 189 bwsalloc(size_t sz) 190 { 191 struct bwstring *ret; 192 193 if (MB_CUR_MAX == 1) 194 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); 195 else 196 ret = sort_malloc(sizeof(struct bwstring) + 197 SIZEOF_WCHAR_STRING(sz + 1)); 198 ret->len = sz; 199 200 if (MB_CUR_MAX == 1) 201 ret->data.cstr[ret->len] = '\0'; 202 else 203 ret->data.wstr[ret->len] = L'\0'; 204 205 return (ret); 206 } 207 208 /* 209 * Create a copy of binary string. 210 * New string size equals the length of the old string. 211 */ 212 struct bwstring * 213 bwsdup(const struct bwstring *s) 214 { 215 216 if (s == NULL) 217 return (NULL); 218 else { 219 struct bwstring *ret = bwsalloc(s->len); 220 221 if (MB_CUR_MAX == 1) 222 memcpy(ret->data.cstr, s->data.cstr, (s->len)); 223 else 224 memcpy(ret->data.wstr, s->data.wstr, 225 SIZEOF_WCHAR_STRING(s->len)); 226 227 return (ret); 228 } 229 } 230 231 /* 232 * Create a new binary string from a wide character buffer. 233 */ 234 struct bwstring * 235 bwssbdup(const wchar_t *str, size_t len) 236 { 237 238 if (str == NULL) 239 return ((len == 0) ? bwsalloc(0) : NULL); 240 else { 241 struct bwstring *ret; 242 243 ret = bwsalloc(len); 244 245 if (MB_CUR_MAX == 1) 246 for (size_t i = 0; i < len; ++i) 247 ret->data.cstr[i] = (unsigned char) str[i]; 248 else 249 memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len)); 250 251 return (ret); 252 } 253 } 254 255 /* 256 * Create a new binary string from a raw binary buffer. 257 */ 258 struct bwstring * 259 bwscsbdup(const unsigned char *str, size_t len) 260 { 261 struct bwstring *ret; 262 263 ret = bwsalloc(len); 264 265 if (str) { 266 if (MB_CUR_MAX == 1) 267 memcpy(ret->data.cstr, str, len); 268 else { 269 mbstate_t mbs; 270 const char *s; 271 size_t charlen, chars, cptr; 272 273 chars = 0; 274 cptr = 0; 275 s = (const char *) str; 276 277 memset(&mbs, 0, sizeof(mbs)); 278 279 while (cptr < len) { 280 size_t n = MB_CUR_MAX; 281 282 if (n > len - cptr) 283 n = len - cptr; 284 charlen = mbrlen(s + cptr, n, &mbs); 285 switch (charlen) { 286 case 0: 287 /* FALLTHROUGH */ 288 case (size_t) -1: 289 /* FALLTHROUGH */ 290 case (size_t) -2: 291 ret->data.wstr[chars++] = 292 (unsigned char) s[cptr]; 293 ++cptr; 294 break; 295 default: 296 n = mbrtowc(ret->data.wstr + (chars++), 297 s + cptr, charlen, &mbs); 298 if ((n == (size_t)-1) || (n == (size_t)-2)) 299 /* NOTREACHED */ 300 err(2, "mbrtowc error"); 301 cptr += charlen; 302 } 303 } 304 305 ret->len = chars; 306 ret->data.wstr[ret->len] = L'\0'; 307 } 308 } 309 return (ret); 310 } 311 312 /* 313 * De-allocate object memory 314 */ 315 void 316 bwsfree(const struct bwstring *s) 317 { 318 319 if (s) 320 sort_free(s); 321 } 322 323 /* 324 * Copy content of src binary string to dst. 325 * If the capacity of the dst string is not sufficient, 326 * then the data is truncated. 327 */ 328 size_t 329 bwscpy(struct bwstring *dst, const struct bwstring *src) 330 { 331 size_t nums = src->len; 332 333 if (nums > dst->len) 334 nums = dst->len; 335 dst->len = nums; 336 337 if (MB_CUR_MAX == 1) { 338 memcpy(dst->data.cstr, src->data.cstr, nums); 339 dst->data.cstr[dst->len] = '\0'; 340 } else { 341 memcpy(dst->data.wstr, src->data.wstr, 342 SIZEOF_WCHAR_STRING(nums + 1)); 343 dst->data.wstr[dst->len] = L'\0'; 344 } 345 346 return (nums); 347 } 348 349 /* 350 * Copy content of src binary string to dst, 351 * with specified number of symbols to be copied. 352 * If the capacity of the dst string is not sufficient, 353 * then the data is truncated. 354 */ 355 struct bwstring * 356 bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size) 357 { 358 size_t nums = src->len; 359 360 if (nums > dst->len) 361 nums = dst->len; 362 if (nums > size) 363 nums = size; 364 dst->len = nums; 365 366 if (MB_CUR_MAX == 1) { 367 memcpy(dst->data.cstr, src->data.cstr, nums); 368 dst->data.cstr[dst->len] = '\0'; 369 } else { 370 memcpy(dst->data.wstr, src->data.wstr, 371 SIZEOF_WCHAR_STRING(nums + 1)); 372 dst->data.wstr[dst->len] = L'\0'; 373 } 374 375 return (dst); 376 } 377 378 /* 379 * Copy content of src binary string to dst, 380 * with specified number of symbols to be copied. 381 * An offset value can be specified, from the start of src string. 382 * If the capacity of the dst string is not sufficient, 383 * then the data is truncated. 384 */ 385 struct bwstring * 386 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, 387 size_t size) 388 { 389 390 if (offset >= src->len) { 391 dst->data.wstr[0] = 0; 392 dst->len = 0; 393 } else { 394 size_t nums = src->len - offset; 395 396 if (nums > dst->len) 397 nums = dst->len; 398 if (nums > size) 399 nums = size; 400 dst->len = nums; 401 if (MB_CUR_MAX == 1) { 402 memcpy(dst->data.cstr, src->data.cstr + offset, 403 (nums)); 404 dst->data.cstr[dst->len] = '\0'; 405 } else { 406 memcpy(dst->data.wstr, src->data.wstr + offset, 407 SIZEOF_WCHAR_STRING(nums)); 408 dst->data.wstr[dst->len] = L'\0'; 409 } 410 } 411 return (dst); 412 } 413 414 /* 415 * Write binary string to the file. 416 * The output is ended either with '\n' (nl == true) 417 * or '\0' (nl == false). 418 */ 419 size_t 420 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) 421 { 422 423 if (MB_CUR_MAX == 1) { 424 size_t len = bws->len; 425 426 if (!zero_ended) { 427 bws->data.cstr[len] = '\n'; 428 429 if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) 430 err(2, NULL); 431 432 bws->data.cstr[len] = '\0'; 433 } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) 434 err(2, NULL); 435 436 return (len + 1); 437 438 } else { 439 wchar_t eols; 440 size_t printed = 0; 441 442 eols = zero_ended ? btowc('\0') : btowc('\n'); 443 444 while (printed < BWSLEN(bws)) { 445 const wchar_t *s = bws->data.wstr + printed; 446 447 if (*s == L'\0') { 448 int nums; 449 450 nums = fwprintf(f, L"%lc", *s); 451 452 if (nums != 1) 453 err(2, NULL); 454 ++printed; 455 } else { 456 int nums; 457 458 nums = fwprintf(f, L"%ls", s); 459 460 if (nums < 1) 461 err(2, NULL); 462 printed += nums; 463 } 464 } 465 fwprintf(f, L"%lc", eols); 466 return (printed + 1); 467 } 468 } 469 470 /* 471 * Allocate and read a binary string from file. 472 * The strings are nl-ended or zero-ended, depending on the sort setting. 473 */ 474 struct bwstring * 475 bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb) 476 { 477 wint_t eols; 478 479 eols = zero_ended ? btowc('\0') : btowc('\n'); 480 481 if (!zero_ended && (MB_CUR_MAX > 1)) { 482 wchar_t *ret; 483 484 ret = fgetwln(f, len); 485 486 if (ret == NULL) { 487 if (!feof(f)) 488 err(2, NULL); 489 return (NULL); 490 } 491 if (*len > 0) { 492 if (ret[*len - 1] == (wchar_t)eols) 493 --(*len); 494 } 495 return (bwssbdup(ret, *len)); 496 497 } else if (!zero_ended && (MB_CUR_MAX == 1)) { 498 char *ret; 499 500 ret = fgetln(f, len); 501 502 if (ret == NULL) { 503 if (!feof(f)) 504 err(2, NULL); 505 return (NULL); 506 } 507 if (*len > 0) { 508 if (ret[*len - 1] == '\n') 509 --(*len); 510 } 511 return (bwscsbdup((unsigned char*)ret, *len)); 512 513 } else { 514 *len = 0; 515 516 if (feof(f)) 517 return (NULL); 518 519 if (2 >= rb->fgetwln_z_buffer_size) { 520 rb->fgetwln_z_buffer_size += 256; 521 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, 522 sizeof(wchar_t) * rb->fgetwln_z_buffer_size); 523 } 524 rb->fgetwln_z_buffer[*len] = 0; 525 526 if (MB_CUR_MAX == 1) 527 while (!feof(f)) { 528 int c; 529 530 c = fgetc(f); 531 532 if (c == EOF) { 533 if (*len == 0) 534 return (NULL); 535 goto line_read_done; 536 } 537 if (c == eols) 538 goto line_read_done; 539 540 if (*len + 1 >= rb->fgetwln_z_buffer_size) { 541 rb->fgetwln_z_buffer_size += 256; 542 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, 543 SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); 544 } 545 546 rb->fgetwln_z_buffer[*len] = c; 547 rb->fgetwln_z_buffer[++(*len)] = 0; 548 } 549 else 550 while (!feof(f)) { 551 wint_t c = 0; 552 553 c = fgetwc(f); 554 555 if (c == WEOF) { 556 if (*len == 0) 557 return (NULL); 558 goto line_read_done; 559 } 560 if (c == eols) 561 goto line_read_done; 562 563 if (*len + 1 >= rb->fgetwln_z_buffer_size) { 564 rb->fgetwln_z_buffer_size += 256; 565 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, 566 SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); 567 } 568 569 rb->fgetwln_z_buffer[*len] = c; 570 rb->fgetwln_z_buffer[++(*len)] = 0; 571 } 572 573 line_read_done: 574 /* we do not count the last 0 */ 575 return (bwssbdup(rb->fgetwln_z_buffer, *len)); 576 } 577 } 578 579 int 580 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, 581 size_t offset, size_t len) 582 { 583 size_t cmp_len, len1, len2; 584 int res = 0; 585 586 len1 = bws1->len; 587 len2 = bws2->len; 588 589 if (len1 <= offset) { 590 return ((len2 <= offset) ? 0 : -1); 591 } else { 592 if (len2 <= offset) 593 return (+1); 594 else { 595 len1 -= offset; 596 len2 -= offset; 597 598 cmp_len = len1; 599 600 if (len2 < cmp_len) 601 cmp_len = len2; 602 603 if (len < cmp_len) 604 cmp_len = len; 605 606 if (MB_CUR_MAX == 1) { 607 const unsigned char *s1, *s2; 608 609 s1 = bws1->data.cstr + offset; 610 s2 = bws2->data.cstr + offset; 611 612 res = memcmp(s1, s2, cmp_len); 613 614 } else { 615 const wchar_t *s1, *s2; 616 617 s1 = bws1->data.wstr + offset; 618 s2 = bws2->data.wstr + offset; 619 620 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); 621 } 622 } 623 } 624 625 if (res == 0) { 626 if (len1 < cmp_len && len1 < len2) 627 res = -1; 628 else if (len2 < cmp_len && len2 < len1) 629 res = +1; 630 } 631 632 return (res); 633 } 634 635 int 636 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 637 { 638 size_t len1, len2, cmp_len; 639 int res; 640 641 len1 = bws1->len; 642 len2 = bws2->len; 643 644 len1 -= offset; 645 len2 -= offset; 646 647 cmp_len = len1; 648 649 if (len2 < cmp_len) 650 cmp_len = len2; 651 652 res = bwsncmp(bws1, bws2, offset, cmp_len); 653 654 if (res == 0) { 655 if( len1 < len2) 656 res = -1; 657 else if (len2 < len1) 658 res = +1; 659 } 660 661 return (res); 662 } 663 664 int 665 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) 666 { 667 wchar_t c1, c2; 668 size_t i = 0; 669 670 for (i = 0; i < len; ++i) { 671 c1 = bws_get_iter_value(iter1); 672 c2 = bws_get_iter_value(iter2); 673 if (c1 != c2) 674 return (c1 - c2); 675 iter1 = bws_iterator_inc(iter1, 1); 676 iter2 = bws_iterator_inc(iter2, 1); 677 } 678 679 return (0); 680 } 681 682 int 683 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 684 { 685 size_t len1, len2; 686 687 len1 = bws1->len; 688 len2 = bws2->len; 689 690 if (len1 <= offset) 691 return ((len2 <= offset) ? 0 : -1); 692 else { 693 if (len2 <= offset) 694 return (+1); 695 else { 696 len1 -= offset; 697 len2 -= offset; 698 699 if (MB_CUR_MAX == 1) { 700 const unsigned char *s1, *s2; 701 702 s1 = bws1->data.cstr + offset; 703 s2 = bws2->data.cstr + offset; 704 705 if (byte_sort) { 706 int res = 0; 707 708 if (len1 > len2) { 709 res = memcmp(s1, s2, len2); 710 if (!res) 711 res = +1; 712 } else if (len1 < len2) { 713 res = memcmp(s1, s2, len1); 714 if (!res) 715 res = -1; 716 } else 717 res = memcmp(s1, s2, len1); 718 719 return (res); 720 721 } else { 722 int res = 0; 723 size_t i, maxlen; 724 725 i = 0; 726 maxlen = len1; 727 728 if (maxlen > len2) 729 maxlen = len2; 730 731 while (i < maxlen) { 732 /* goto next non-zero part: */ 733 while ((i < maxlen) && 734 !s1[i] && !s2[i]) 735 ++i; 736 737 if (i >= maxlen) 738 break; 739 740 if (s1[i] == 0) { 741 if (s2[i] == 0) 742 /* NOTREACHED */ 743 err(2, "bwscoll error 01"); 744 else 745 return (-1); 746 } else if (s2[i] == 0) 747 return (+1); 748 749 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i)); 750 if (res) 751 return (res); 752 753 while ((i < maxlen) && 754 s1[i] && s2[i]) 755 ++i; 756 757 if (i >= maxlen) 758 break; 759 760 if (s1[i] == 0) { 761 if (s2[i] == 0) { 762 ++i; 763 continue; 764 } else 765 return (-1); 766 } else if (s2[i] == 0) 767 return (+1); 768 else 769 /* NOTREACHED */ 770 err(2, "bwscoll error 02"); 771 } 772 773 if (len1 < len2) 774 return (-1); 775 else if (len1 > len2) 776 return (+1); 777 778 return (0); 779 } 780 } else { 781 const wchar_t *s1, *s2; 782 size_t i, maxlen; 783 int res = 0; 784 785 s1 = bws1->data.wstr + offset; 786 s2 = bws2->data.wstr + offset; 787 788 i = 0; 789 maxlen = len1; 790 791 if (maxlen > len2) 792 maxlen = len2; 793 794 while (i < maxlen) { 795 796 /* goto next non-zero part: */ 797 while ((i < maxlen) && 798 !s1[i] && !s2[i]) 799 ++i; 800 801 if (i >= maxlen) 802 break; 803 804 if (s1[i] == 0) { 805 if (s2[i] == 0) 806 /* NOTREACHED */ 807 err(2, "bwscoll error 1"); 808 else 809 return (-1); 810 } else if (s2[i] == 0) 811 return (+1); 812 813 res = wide_str_coll(s1 + i, s2 + i); 814 if (res) 815 return (res); 816 817 while ((i < maxlen) && s1[i] && s2[i]) 818 ++i; 819 820 if (i >= maxlen) 821 break; 822 823 if (s1[i] == 0) { 824 if (s2[i] == 0) { 825 ++i; 826 continue; 827 } else 828 return (-1); 829 } else if (s2[i] == 0) 830 return (+1); 831 else 832 /* NOTREACHED */ 833 err(2, "bwscoll error 2"); 834 } 835 836 if (len1 < len2) 837 return (-1); 838 else if (len1 > len2) 839 return (+1); 840 841 return (0); 842 } 843 } 844 } 845 } 846 847 /* 848 * Correction of the system API 849 */ 850 double 851 bwstod(struct bwstring *s0, bool *empty) 852 { 853 double ret = 0; 854 855 if (MB_CUR_MAX == 1) { 856 unsigned char *end, *s; 857 char *ep; 858 859 s = s0->data.cstr; 860 end = s + s0->len; 861 ep = NULL; 862 863 while (isblank(*s) && s < end) 864 ++s; 865 866 if (!isprint(*s)) { 867 *empty = true; 868 return (0); 869 } 870 871 ret = strtod((char*)s, &ep); 872 if ((unsigned char*) ep == s) { 873 *empty = true; 874 return (0); 875 } 876 } else { 877 wchar_t *end, *ep, *s; 878 879 s = s0->data.wstr; 880 end = s + s0->len; 881 ep = NULL; 882 883 while (iswblank(*s) && s < end) 884 ++s; 885 886 if (!iswprint(*s)) { 887 *empty = true; 888 return (0); 889 } 890 891 ret = wcstod(s, &ep); 892 if (ep == s) { 893 *empty = true; 894 return (0); 895 } 896 } 897 898 *empty = false; 899 return (ret); 900 } 901 902 /* 903 * A helper function for monthcoll. If a line matches 904 * a month name, it returns (number of the month - 1), 905 * while if there is no match, it just return -1. 906 */ 907 908 int 909 bws_month_score(const struct bwstring *s0) 910 { 911 912 if (MB_CUR_MAX == 1) { 913 const unsigned char *end, *s; 914 915 s = s0->data.cstr; 916 end = s + s0->len; 917 918 while (isblank(*s) && s < end) 919 ++s; 920 921 for (int i = 11; i >= 0; --i) { 922 if (cmonths[i] && 923 (s == (unsigned char*)strstr((const char*)s, (char*)(cmonths[i])))) 924 return (i); 925 } 926 927 } else { 928 const wchar_t *end, *s; 929 930 s = s0->data.wstr; 931 end = s + s0->len; 932 933 while (iswblank(*s) && s < end) 934 ++s; 935 936 for (int i = 11; i >= 0; --i) { 937 if (wmonths[i] && (s == wcsstr(s, wmonths[i]))) 938 return (i); 939 } 940 } 941 942 return (-1); 943 } 944 945 /* 946 * Rips out leading blanks (-b). 947 */ 948 struct bwstring * 949 ignore_leading_blanks(struct bwstring *str) 950 { 951 952 if (MB_CUR_MAX == 1) { 953 unsigned char *dst, *end, *src; 954 955 src = str->data.cstr; 956 dst = src; 957 end = src + str->len; 958 959 while (src < end && isblank(*src)) 960 ++src; 961 962 if (src != dst) { 963 size_t newlen; 964 965 newlen = BWSLEN(str) - (src - dst); 966 967 while (src < end) { 968 *dst = *src; 969 ++dst; 970 ++src; 971 } 972 bws_setlen(str, newlen); 973 } 974 } else { 975 wchar_t *dst, *end, *src; 976 977 src = str->data.wstr; 978 dst = src; 979 end = src + str->len; 980 981 while (src < end && iswblank(*src)) 982 ++src; 983 984 if (src != dst) { 985 986 size_t newlen = BWSLEN(str) - (src - dst); 987 988 while (src < end) { 989 *dst = *src; 990 ++dst; 991 ++src; 992 } 993 bws_setlen(str, newlen); 994 995 } 996 } 997 return (str); 998 } 999 1000 /* 1001 * Rips out nonprinting characters (-i). 1002 */ 1003 struct bwstring * 1004 ignore_nonprinting(struct bwstring *str) 1005 { 1006 size_t newlen = str->len; 1007 1008 if (MB_CUR_MAX == 1) { 1009 unsigned char *dst, *end, *src; 1010 unsigned char c; 1011 1012 src = str->data.cstr; 1013 dst = src; 1014 end = src + str->len; 1015 1016 while (src < end) { 1017 c = *src; 1018 if (isprint(c)) { 1019 *dst = c; 1020 ++dst; 1021 ++src; 1022 } else { 1023 ++src; 1024 --newlen; 1025 } 1026 } 1027 } else { 1028 wchar_t *dst, *end, *src; 1029 wchar_t c; 1030 1031 src = str->data.wstr; 1032 dst = src; 1033 end = src + str->len; 1034 1035 while (src < end) { 1036 c = *src; 1037 if (iswprint(c)) { 1038 *dst = c; 1039 ++dst; 1040 ++src; 1041 } else { 1042 ++src; 1043 --newlen; 1044 } 1045 } 1046 } 1047 bws_setlen(str, newlen); 1048 1049 return (str); 1050 } 1051 1052 /* 1053 * Rips out any characters that are not alphanumeric characters 1054 * nor blanks (-d). 1055 */ 1056 struct bwstring * 1057 dictionary_order(struct bwstring *str) 1058 { 1059 size_t newlen = str->len; 1060 1061 if (MB_CUR_MAX == 1) { 1062 unsigned char *dst, *end, *src; 1063 unsigned char c; 1064 1065 src = str->data.cstr; 1066 dst = src; 1067 end = src + str->len; 1068 1069 while (src < end) { 1070 c = *src; 1071 if (isalnum(c) || isblank(c)) { 1072 *dst = c; 1073 ++dst; 1074 ++src; 1075 } else { 1076 ++src; 1077 --newlen; 1078 } 1079 } 1080 } else { 1081 wchar_t *dst, *end, *src; 1082 wchar_t c; 1083 1084 src = str->data.wstr; 1085 dst = src; 1086 end = src + str->len; 1087 1088 while (src < end) { 1089 c = *src; 1090 if (iswalnum(c) || iswblank(c)) { 1091 *dst = c; 1092 ++dst; 1093 ++src; 1094 } else { 1095 ++src; 1096 --newlen; 1097 } 1098 } 1099 } 1100 bws_setlen(str, newlen); 1101 1102 return (str); 1103 } 1104 1105 /* 1106 * Converts string to lower case(-f). 1107 */ 1108 struct bwstring * 1109 ignore_case(struct bwstring *str) 1110 { 1111 1112 if (MB_CUR_MAX == 1) { 1113 unsigned char *end, *s; 1114 1115 s = str->data.cstr; 1116 end = s + str->len; 1117 1118 while (s < end) { 1119 *s = toupper(*s); 1120 ++s; 1121 } 1122 } else { 1123 wchar_t *end, *s; 1124 1125 s = str->data.wstr; 1126 end = s + str->len; 1127 1128 while (s < end) { 1129 *s = towupper(*s); 1130 ++s; 1131 } 1132 } 1133 return (str); 1134 } 1135 1136 void 1137 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) 1138 { 1139 1140 if (MB_CUR_MAX == 1) 1141 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr); 1142 else 1143 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr); 1144 } 1145