1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <ctype.h> 34 #include <errno.h> 35 #include <err.h> 36 #include <langinfo.h> 37 #include <math.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <wchar.h> 41 #include <wctype.h> 42 43 #include "bwstring.h" 44 #include "sort.h" 45 46 bool byte_sort; 47 48 static wchar_t **wmonths; 49 static char **cmonths; 50 51 /* initialise months */ 52 53 void 54 initialise_months(void) 55 { 56 const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, 57 ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10, 58 ABMON_11, ABMON_12 }; 59 char *tmp; 60 size_t len; 61 62 if (mb_cur_max == 1) { 63 if (cmonths == NULL) { 64 char *m; 65 66 cmonths = sort_malloc(sizeof(char*) * 12); 67 for (int i = 0; i < 12; i++) { 68 cmonths[i] = NULL; 69 tmp = nl_langinfo(item[i]); 70 if (debug_sort) 71 printf("month[%d]=%s\n", i, tmp); 72 if (*tmp == '\0') 73 continue; 74 m = sort_strdup(tmp); 75 len = strlen(tmp); 76 for (unsigned int j = 0; j < len; j++) 77 m[j] = toupper(m[j]); 78 cmonths[i] = m; 79 } 80 } 81 82 } else { 83 if (wmonths == NULL) { 84 wchar_t *m; 85 86 wmonths = sort_malloc(sizeof(wchar_t *) * 12); 87 for (int i = 0; i < 12; i++) { 88 wmonths[i] = NULL; 89 tmp = nl_langinfo(item[i]); 90 if (debug_sort) 91 printf("month[%d]=%s\n", i, tmp); 92 if (*tmp == '\0') 93 continue; 94 len = strlen(tmp); 95 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1)); 96 if (mbstowcs(m, tmp, len) == 97 ((size_t) - 1)) { 98 sort_free(m); 99 continue; 100 } 101 m[len] = L'\0'; 102 for (unsigned int j = 0; j < len; j++) 103 m[j] = towupper(m[j]); 104 wmonths[i] = m; 105 } 106 } 107 } 108 } 109 110 /* 111 * Compare two wide-character strings 112 */ 113 static int 114 wide_str_coll(const wchar_t *s1, const wchar_t *s2) 115 { 116 int ret; 117 118 errno = 0; 119 ret = wcscoll(s1, s2); 120 if (errno == EILSEQ) { 121 errno = 0; 122 ret = wcscmp(s1, s2); 123 if (errno != 0) { 124 for (size_t i = 0; ; ++i) { 125 wchar_t c1 = s1[i]; 126 wchar_t c2 = s2[i]; 127 if (c1 == L'\0') 128 return ((c2 == L'\0') ? 0 : -1); 129 if (c2 == L'\0') 130 return (+1); 131 if (c1 == c2) 132 continue; 133 return ((int)(c1 - c2)); 134 } 135 } 136 } 137 return (ret); 138 } 139 140 /* counterparts of wcs functions */ 141 142 void 143 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) 144 { 145 146 if (mb_cur_max == 1) 147 fprintf(f, "%s%s%s", prefix, bws->cdata.str, suffix); 148 else 149 fprintf(f, "%s%S%s", prefix, bws->wdata.str, suffix); 150 } 151 152 const void* bwsrawdata(const struct bwstring *bws) 153 { 154 155 return (bws->wdata.str); 156 } 157 158 size_t bwsrawlen(const struct bwstring *bws) 159 { 160 161 return ((mb_cur_max == 1) ? bws->cdata.len : 162 SIZEOF_WCHAR_STRING(bws->wdata.len)); 163 } 164 165 size_t 166 bws_memsize(const struct bwstring *bws) 167 { 168 169 return ((mb_cur_max == 1) ? 170 (bws->cdata.len + 2 + sizeof(struct bwstring)) : 171 (SIZEOF_WCHAR_STRING(bws->wdata.len + 1) + sizeof(struct bwstring))); 172 } 173 174 void 175 bws_setlen(struct bwstring *bws, size_t newlen) 176 { 177 178 if (mb_cur_max == 1 && bws && newlen != bws->cdata.len && 179 newlen <= bws->cdata.len) { 180 bws->cdata.len = newlen; 181 bws->cdata.str[newlen] = '\0'; 182 } else if (bws && newlen != bws->wdata.len && newlen <= bws->wdata.len) { 183 bws->wdata.len = newlen; 184 bws->wdata.str[newlen] = L'\0'; 185 } 186 } 187 188 /* 189 * Allocate a new binary string of specified size 190 */ 191 struct bwstring * 192 bwsalloc(size_t sz) 193 { 194 struct bwstring *ret; 195 196 if (mb_cur_max == 1) { 197 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); 198 ret->cdata.len = sz; 199 ret->cdata.str[sz] = '\0'; 200 } else { 201 ret = sort_malloc( 202 sizeof(struct bwstring) + SIZEOF_WCHAR_STRING(sz + 1)); 203 ret->wdata.len = sz; 204 ret->wdata.str[sz] = L'\0'; 205 } 206 207 return (ret); 208 } 209 210 /* 211 * Create a copy of binary string. 212 * New string size equals the length of the old string. 213 */ 214 struct bwstring * 215 bwsdup(const struct bwstring *s) 216 { 217 218 if (s == NULL) 219 return (NULL); 220 else { 221 struct bwstring *ret = bwsalloc(BWSLEN(s)); 222 223 if (mb_cur_max == 1) 224 memcpy(ret->cdata.str, s->cdata.str, (s->cdata.len)); 225 else 226 memcpy(ret->wdata.str, s->wdata.str, 227 SIZEOF_WCHAR_STRING(s->wdata.len)); 228 229 return (ret); 230 } 231 } 232 233 /* 234 * Create a new binary string from a wide character buffer. 235 */ 236 struct bwstring * 237 bwssbdup(const wchar_t *str, size_t len) 238 { 239 240 if (str == NULL) 241 return ((len == 0) ? bwsalloc(0) : NULL); 242 else { 243 struct bwstring *ret; 244 245 ret = bwsalloc(len); 246 247 if (mb_cur_max == 1) 248 for (size_t i = 0; i < len; ++i) 249 ret->cdata.str[i] = (char)str[i]; 250 else 251 memcpy(ret->wdata.str, str, SIZEOF_WCHAR_STRING(len)); 252 253 return (ret); 254 } 255 } 256 257 /* 258 * Create a new binary string from a raw binary buffer. 259 */ 260 struct bwstring * 261 bwscsbdup(const unsigned char *str, size_t len) 262 { 263 struct bwstring *ret; 264 265 ret = bwsalloc(len); 266 267 if (str) { 268 if (mb_cur_max == 1) 269 memcpy(ret->cdata.str, str, len); 270 else { 271 mbstate_t mbs; 272 const char *s; 273 size_t charlen, chars, cptr; 274 275 chars = 0; 276 cptr = 0; 277 s = (const char *) str; 278 279 memset(&mbs, 0, sizeof(mbs)); 280 281 while (cptr < len) { 282 size_t n = mb_cur_max; 283 284 if (n > len - cptr) 285 n = len - cptr; 286 charlen = mbrlen(s + cptr, n, &mbs); 287 switch (charlen) { 288 case 0: 289 /* FALLTHROUGH */ 290 case (size_t) -1: 291 /* FALLTHROUGH */ 292 case (size_t) -2: 293 ret->wdata.str[chars++] = 294 (unsigned char) s[cptr]; 295 ++cptr; 296 break; 297 default: 298 n = mbrtowc(ret->wdata.str + (chars++), 299 s + cptr, charlen, &mbs); 300 if ((n == (size_t)-1) || (n == (size_t)-2)) 301 /* NOTREACHED */ 302 err(2, "mbrtowc error"); 303 cptr += charlen; 304 } 305 } 306 307 ret->wdata.len = chars; 308 ret->wdata.str[ret->wdata.len] = L'\0'; 309 } 310 } 311 return (ret); 312 } 313 314 /* 315 * De-allocate object memory 316 */ 317 void 318 bwsfree(const struct bwstring *s) 319 { 320 321 if (s) 322 sort_free(s); 323 } 324 325 /* 326 * Copy content of src binary string to dst. 327 * If the capacity of the dst string is not sufficient, 328 * then the data is truncated. 329 */ 330 size_t 331 bwscpy(struct bwstring *dst, const struct bwstring *src) 332 { 333 size_t nums = BWSLEN(src); 334 335 if (nums > BWSLEN(dst)) 336 nums = BWSLEN(dst); 337 338 if (mb_cur_max == 1) { 339 memcpy(dst->cdata.str, src->cdata.str, nums); 340 dst->cdata.len = nums; 341 dst->cdata.str[dst->cdata.len] = '\0'; 342 } else { 343 memcpy(dst->wdata.str, src->wdata.str, 344 SIZEOF_WCHAR_STRING(nums)); 345 dst->wdata.len = nums; 346 dst->wdata.str[nums] = L'\0'; 347 } 348 349 return (nums); 350 } 351 352 /* 353 * Copy content of src binary string to dst, 354 * with specified number of symbols to be copied. 355 * If the capacity of the dst string is not sufficient, 356 * then the data is truncated. 357 */ 358 struct bwstring * 359 bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size) 360 { 361 size_t nums = BWSLEN(src); 362 363 if (nums > BWSLEN(dst)) 364 nums = BWSLEN(dst); 365 if (nums > size) 366 nums = size; 367 368 if (mb_cur_max == 1) { 369 memcpy(dst->cdata.str, src->cdata.str, nums); 370 dst->cdata.len = nums; 371 dst->cdata.str[nums] = '\0'; 372 } else { 373 memcpy(dst->wdata.str, src->wdata.str, 374 SIZEOF_WCHAR_STRING(nums)); 375 dst->wdata.len = nums; 376 dst->wdata.str[nums] = L'\0'; 377 } 378 379 return (dst); 380 } 381 382 /* 383 * Copy content of src binary string to dst, 384 * with specified number of symbols to be copied. 385 * An offset value can be specified, from the start of src string. 386 * If the capacity of the dst string is not sufficient, 387 * then the data is truncated. 388 */ 389 struct bwstring * 390 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, 391 size_t size) 392 { 393 394 if (offset >= BWSLEN(src)) { 395 bws_setlen(dst, 0); 396 } else { 397 size_t nums = BWSLEN(src) - offset; 398 399 if (nums > BWSLEN(dst)) 400 nums = BWSLEN(dst); 401 if (nums > size) 402 nums = size; 403 if (mb_cur_max == 1) { 404 memcpy(dst->cdata.str, src->cdata.str + offset, nums); 405 dst->cdata.len = nums; 406 dst->cdata.str[nums] = '\0'; 407 } else { 408 memcpy(dst->wdata.str, src->wdata.str + offset, 409 SIZEOF_WCHAR_STRING(nums)); 410 dst->wdata.len = nums; 411 dst->wdata.str[nums] = L'\0'; 412 } 413 } 414 return (dst); 415 } 416 417 /* 418 * Write binary string to the file. 419 * The output is ended either with '\n' (nl == true) 420 * or '\0' (nl == false). 421 */ 422 size_t 423 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) 424 { 425 426 if (mb_cur_max == 1) { 427 size_t len = bws->cdata.len; 428 429 if (!zero_ended) { 430 bws->cdata.str[len] = '\n'; 431 432 if (fwrite(bws->cdata.str, len + 1, 1, f) < 1) 433 err(2, NULL); 434 435 bws->cdata.str[len] = '\0'; 436 } else if (fwrite(bws->cdata.str, len + 1, 1, f) < 1) 437 err(2, NULL); 438 439 return (len + 1); 440 441 } else { 442 wchar_t eols; 443 size_t printed = 0; 444 445 eols = zero_ended ? btowc('\0') : btowc('\n'); 446 447 while (printed < BWSLEN(bws)) { 448 const wchar_t *s = bws->wdata.str + printed; 449 450 if (*s == L'\0') { 451 int nums; 452 453 nums = fwprintf(f, L"%lc", *s); 454 455 if (nums != 1) 456 err(2, NULL); 457 ++printed; 458 } else { 459 int nums; 460 461 nums = fwprintf(f, L"%ls", s); 462 463 if (nums < 1) 464 err(2, NULL); 465 printed += nums; 466 } 467 } 468 fwprintf(f, L"%lc", eols); 469 return (printed + 1); 470 } 471 } 472 473 /* 474 * Allocate and read a binary string from file. 475 * The strings are nl-ended or zero-ended, depending on the sort setting. 476 */ 477 struct bwstring * 478 bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb) 479 { 480 wint_t eols; 481 482 eols = zero_ended ? btowc('\0') : btowc('\n'); 483 484 if (!zero_ended && (mb_cur_max > 1)) { 485 wchar_t *ret; 486 487 ret = fgetwln(f, len); 488 489 if (ret == NULL) { 490 if (!feof(f)) 491 err(2, NULL); 492 return (NULL); 493 } 494 if (*len > 0) { 495 if (ret[*len - 1] == (wchar_t)eols) 496 --(*len); 497 } 498 return (bwssbdup(ret, *len)); 499 500 } else if (!zero_ended && (mb_cur_max == 1)) { 501 char *ret; 502 503 ret = fgetln(f, len); 504 505 if (ret == NULL) { 506 if (!feof(f)) 507 err(2, NULL); 508 return (NULL); 509 } 510 if (*len > 0) { 511 if (ret[*len - 1] == '\n') 512 --(*len); 513 } 514 return (bwscsbdup((unsigned char *)ret, *len)); 515 516 } else { 517 *len = 0; 518 519 if (feof(f)) 520 return (NULL); 521 522 if (2 >= rb->fgetwln_z_buffer_size) { 523 rb->fgetwln_z_buffer_size += 256; 524 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, 525 sizeof(wchar_t) * rb->fgetwln_z_buffer_size); 526 } 527 rb->fgetwln_z_buffer[*len] = 0; 528 529 if (mb_cur_max == 1) 530 while (!feof(f)) { 531 int c; 532 533 c = fgetc(f); 534 535 if (c == EOF) { 536 if (*len == 0) 537 return (NULL); 538 goto line_read_done; 539 } 540 if (c == eols) 541 goto line_read_done; 542 543 if (*len + 1 >= rb->fgetwln_z_buffer_size) { 544 rb->fgetwln_z_buffer_size += 256; 545 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, 546 SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); 547 } 548 549 rb->fgetwln_z_buffer[*len] = c; 550 rb->fgetwln_z_buffer[++(*len)] = 0; 551 } 552 else 553 while (!feof(f)) { 554 wint_t c; 555 556 c = fgetwc(f); 557 558 if (c == WEOF) { 559 if (*len == 0) 560 return (NULL); 561 goto line_read_done; 562 } 563 if (c == eols) 564 goto line_read_done; 565 566 if (*len + 1 >= rb->fgetwln_z_buffer_size) { 567 rb->fgetwln_z_buffer_size += 256; 568 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, 569 SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); 570 } 571 572 rb->fgetwln_z_buffer[*len] = c; 573 rb->fgetwln_z_buffer[++(*len)] = 0; 574 } 575 576 line_read_done: 577 /* we do not count the last 0 */ 578 return (bwssbdup(rb->fgetwln_z_buffer, *len)); 579 } 580 } 581 582 int 583 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, 584 size_t offset, size_t len) 585 { 586 size_t cmp_len, len1, len2; 587 int res; 588 589 len1 = BWSLEN(bws1); 590 len2 = BWSLEN(bws2); 591 592 if (len1 <= offset) { 593 return ((len2 <= offset) ? 0 : -1); 594 } else { 595 if (len2 <= offset) 596 return (+1); 597 else { 598 len1 -= offset; 599 len2 -= offset; 600 601 cmp_len = len1; 602 603 if (len2 < cmp_len) 604 cmp_len = len2; 605 606 if (len < cmp_len) 607 cmp_len = len; 608 609 if (mb_cur_max == 1) { 610 const char *s1, *s2; 611 612 s1 = bws1->cdata.str + offset; 613 s2 = bws2->cdata.str + offset; 614 615 res = memcmp(s1, s2, cmp_len); 616 617 } else { 618 const wchar_t *s1, *s2; 619 620 s1 = bws1->wdata.str + offset; 621 s2 = bws2->wdata.str + offset; 622 623 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); 624 } 625 } 626 } 627 628 if (res == 0) { 629 if (len1 < cmp_len && len1 < len2) 630 res = -1; 631 else if (len2 < cmp_len && len2 < len1) 632 res = +1; 633 } 634 635 return (res); 636 } 637 638 int 639 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 640 { 641 size_t len1, len2, cmp_len; 642 int res; 643 644 len1 = BWSLEN(bws1); 645 len2 = BWSLEN(bws2); 646 647 len1 -= offset; 648 len2 -= offset; 649 650 cmp_len = len1; 651 652 if (len2 < cmp_len) 653 cmp_len = len2; 654 655 res = bwsncmp(bws1, bws2, offset, cmp_len); 656 657 if (res == 0) { 658 if( len1 < len2) 659 res = -1; 660 else if (len2 < len1) 661 res = +1; 662 } 663 664 return (res); 665 } 666 667 int 668 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) 669 { 670 wchar_t c1, c2; 671 size_t i; 672 673 for (i = 0; i < len; ++i) { 674 c1 = bws_get_iter_value(iter1); 675 c2 = bws_get_iter_value(iter2); 676 if (c1 != c2) 677 return (c1 - c2); 678 iter1 = bws_iterator_inc(iter1, 1); 679 iter2 = bws_iterator_inc(iter2, 1); 680 } 681 682 return (0); 683 } 684 685 int 686 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 687 { 688 size_t len1, len2; 689 690 len1 = BWSLEN(bws1); 691 len2 = BWSLEN(bws2); 692 693 if (len1 <= offset) 694 return ((len2 <= offset) ? 0 : -1); 695 else { 696 if (len2 <= offset) 697 return (+1); 698 else { 699 len1 -= offset; 700 len2 -= offset; 701 702 if (mb_cur_max == 1) { 703 const char *s1, *s2; 704 705 s1 = bws1->cdata.str + offset; 706 s2 = bws2->cdata.str + offset; 707 708 if (byte_sort) { 709 int res; 710 711 if (len1 > len2) { 712 res = memcmp(s1, s2, len2); 713 if (!res) 714 res = +1; 715 } else if (len1 < len2) { 716 res = memcmp(s1, s2, len1); 717 if (!res) 718 res = -1; 719 } else 720 res = memcmp(s1, s2, len1); 721 722 return (res); 723 724 } else { 725 int res; 726 size_t i, maxlen; 727 728 i = 0; 729 maxlen = len1; 730 731 if (maxlen > len2) 732 maxlen = len2; 733 734 while (i < maxlen) { 735 /* goto next non-zero part: */ 736 while ((i < maxlen) && 737 !s1[i] && !s2[i]) 738 ++i; 739 740 if (i >= maxlen) 741 break; 742 743 if (s1[i] == 0) { 744 if (s2[i] == 0) 745 /* NOTREACHED */ 746 err(2, "bwscoll error 01"); 747 else 748 return (-1); 749 } else if (s2[i] == 0) 750 return (+1); 751 752 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i)); 753 if (res) 754 return (res); 755 756 while ((i < maxlen) && 757 s1[i] && s2[i]) 758 ++i; 759 760 if (i >= maxlen) 761 break; 762 763 if (s1[i] == 0) { 764 if (s2[i] == 0) { 765 ++i; 766 continue; 767 } else 768 return (-1); 769 } else if (s2[i] == 0) 770 return (+1); 771 else 772 /* NOTREACHED */ 773 err(2, "bwscoll error 02"); 774 } 775 776 if (len1 < len2) 777 return (-1); 778 else if (len1 > len2) 779 return (+1); 780 781 return (0); 782 } 783 } else { 784 const wchar_t *s1, *s2; 785 size_t i, maxlen; 786 int res; 787 788 s1 = bws1->wdata.str + offset; 789 s2 = bws2->wdata.str + offset; 790 791 i = 0; 792 maxlen = len1; 793 794 if (maxlen > len2) 795 maxlen = len2; 796 797 while (i < maxlen) { 798 799 /* goto next non-zero part: */ 800 while ((i < maxlen) && 801 !s1[i] && !s2[i]) 802 ++i; 803 804 if (i >= maxlen) 805 break; 806 807 if (s1[i] == 0) { 808 if (s2[i] == 0) 809 /* NOTREACHED */ 810 err(2, "bwscoll error 1"); 811 else 812 return (-1); 813 } else if (s2[i] == 0) 814 return (+1); 815 816 res = wide_str_coll(s1 + i, s2 + i); 817 if (res) 818 return (res); 819 820 while ((i < maxlen) && s1[i] && s2[i]) 821 ++i; 822 823 if (i >= maxlen) 824 break; 825 826 if (s1[i] == 0) { 827 if (s2[i] == 0) { 828 ++i; 829 continue; 830 } else 831 return (-1); 832 } else if (s2[i] == 0) 833 return (+1); 834 else 835 /* NOTREACHED */ 836 err(2, "bwscoll error 2"); 837 } 838 839 if (len1 < len2) 840 return (-1); 841 else if (len1 > len2) 842 return (+1); 843 844 return (0); 845 } 846 } 847 } 848 } 849 850 /* 851 * Correction of the system API 852 */ 853 double 854 bwstod(struct bwstring *s0, bool *empty) 855 { 856 double ret; 857 858 if (mb_cur_max == 1) { 859 char *end, *s; 860 char *ep; 861 862 s = s0->cdata.str; 863 end = s + s0->cdata.len; 864 ep = NULL; 865 866 while (isblank(*s) && s < end) 867 ++s; 868 869 if (!isprint(*s)) { 870 *empty = true; 871 return (0); 872 } 873 874 ret = strtod((char*)s, &ep); 875 if (ep == s) { 876 *empty = true; 877 return (0); 878 } 879 } else { 880 wchar_t *end, *ep, *s; 881 882 s = s0->wdata.str; 883 end = s + s0->wdata.len; 884 ep = NULL; 885 886 while (iswblank(*s) && s < end) 887 ++s; 888 889 if (!iswprint(*s)) { 890 *empty = true; 891 return (0); 892 } 893 894 ret = wcstod(s, &ep); 895 if (ep == s) { 896 *empty = true; 897 return (0); 898 } 899 } 900 901 *empty = false; 902 return (ret); 903 } 904 905 /* 906 * A helper function for monthcoll. If a line matches 907 * a month name, it returns (number of the month - 1), 908 * while if there is no match, it just return -1. 909 */ 910 911 int 912 bws_month_score(const struct bwstring *s0) 913 { 914 915 if (mb_cur_max == 1) { 916 const char *end, *s; 917 918 s = s0->cdata.str; 919 end = s + s0->cdata.len; 920 921 while (isblank(*s) && s < end) 922 ++s; 923 924 for (int i = 11; i >= 0; --i) { 925 if (cmonths[i] && 926 (s == strstr(s, cmonths[i]))) 927 return (i); 928 } 929 930 } else { 931 const wchar_t *end, *s; 932 933 s = s0->wdata.str; 934 end = s + s0->wdata.len; 935 936 while (iswblank(*s) && s < end) 937 ++s; 938 939 for (int i = 11; i >= 0; --i) { 940 if (wmonths[i] && (s == wcsstr(s, wmonths[i]))) 941 return (i); 942 } 943 } 944 945 return (-1); 946 } 947 948 /* 949 * Rips out leading blanks (-b). 950 */ 951 struct bwstring * 952 ignore_leading_blanks(struct bwstring *str) 953 { 954 955 if (mb_cur_max == 1) { 956 char *dst, *end, *src; 957 958 src = str->cdata.str; 959 dst = src; 960 end = src + str->cdata.len; 961 962 while (src < end && isblank(*src)) 963 ++src; 964 965 if (src != dst) { 966 size_t newlen; 967 968 newlen = BWSLEN(str) - (src - dst); 969 970 while (src < end) { 971 *dst = *src; 972 ++dst; 973 ++src; 974 } 975 bws_setlen(str, newlen); 976 } 977 } else { 978 wchar_t *dst, *end, *src; 979 980 src = str->wdata.str; 981 dst = src; 982 end = src + str->wdata.len; 983 984 while (src < end && iswblank(*src)) 985 ++src; 986 987 if (src != dst) { 988 989 size_t newlen = BWSLEN(str) - (src - dst); 990 991 while (src < end) { 992 *dst = *src; 993 ++dst; 994 ++src; 995 } 996 bws_setlen(str, newlen); 997 998 } 999 } 1000 return (str); 1001 } 1002 1003 /* 1004 * Rips out nonprinting characters (-i). 1005 */ 1006 struct bwstring * 1007 ignore_nonprinting(struct bwstring *str) 1008 { 1009 size_t newlen = BWSLEN(str); 1010 1011 if (mb_cur_max == 1) { 1012 char *dst, *end, *src; 1013 char c; 1014 1015 src = str->cdata.str; 1016 dst = src; 1017 end = src + str->cdata.len; 1018 1019 while (src < end) { 1020 c = *src; 1021 if (isprint(c)) { 1022 *dst = c; 1023 ++dst; 1024 ++src; 1025 } else { 1026 ++src; 1027 --newlen; 1028 } 1029 } 1030 } else { 1031 wchar_t *dst, *end, *src; 1032 wchar_t c; 1033 1034 src = str->wdata.str; 1035 dst = src; 1036 end = src + str->wdata.len; 1037 1038 while (src < end) { 1039 c = *src; 1040 if (iswprint(c)) { 1041 *dst = c; 1042 ++dst; 1043 ++src; 1044 } else { 1045 ++src; 1046 --newlen; 1047 } 1048 } 1049 } 1050 bws_setlen(str, newlen); 1051 1052 return (str); 1053 } 1054 1055 /* 1056 * Rips out any characters that are not alphanumeric characters 1057 * nor blanks (-d). 1058 */ 1059 struct bwstring * 1060 dictionary_order(struct bwstring *str) 1061 { 1062 size_t newlen = BWSLEN(str); 1063 1064 if (mb_cur_max == 1) { 1065 char *dst, *end, *src; 1066 char c; 1067 1068 src = str->cdata.str; 1069 dst = src; 1070 end = src + str->cdata.len; 1071 1072 while (src < end) { 1073 c = *src; 1074 if (isalnum(c) || isblank(c)) { 1075 *dst = c; 1076 ++dst; 1077 ++src; 1078 } else { 1079 ++src; 1080 --newlen; 1081 } 1082 } 1083 } else { 1084 wchar_t *dst, *end, *src; 1085 wchar_t c; 1086 1087 src = str->wdata.str; 1088 dst = src; 1089 end = src + str->wdata.len; 1090 1091 while (src < end) { 1092 c = *src; 1093 if (iswalnum(c) || iswblank(c)) { 1094 *dst = c; 1095 ++dst; 1096 ++src; 1097 } else { 1098 ++src; 1099 --newlen; 1100 } 1101 } 1102 } 1103 bws_setlen(str, newlen); 1104 1105 return (str); 1106 } 1107 1108 /* 1109 * Converts string to lower case(-f). 1110 */ 1111 struct bwstring * 1112 ignore_case(struct bwstring *str) 1113 { 1114 1115 if (mb_cur_max == 1) { 1116 char *end, *s; 1117 1118 s = str->cdata.str; 1119 end = s + str->cdata.len; 1120 1121 while (s < end) { 1122 *s = toupper(*s); 1123 ++s; 1124 } 1125 } else { 1126 wchar_t *end, *s; 1127 1128 s = str->wdata.str; 1129 end = s + str->wdata.len; 1130 1131 while (s < end) { 1132 *s = towupper(*s); 1133 ++s; 1134 } 1135 } 1136 return (str); 1137 } 1138 1139 void 1140 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) 1141 { 1142 1143 if (mb_cur_max == 1) 1144 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->cdata.str); 1145 else 1146 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->wdata.str); 1147 } 1148