1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <ctype.h> 34 #include <errno.h> 35 #include <err.h> 36 #include <langinfo.h> 37 #include <math.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <wchar.h> 41 #include <wctype.h> 42 43 #include "bwstring.h" 44 #include "sort.h" 45 46 bool byte_sort; 47 48 static wchar_t **wmonths; 49 static char **cmonths; 50 51 /* initialise months */ 52 53 void 54 initialise_months(void) 55 { 56 const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, 57 ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10, 58 ABMON_11, ABMON_12 }; 59 char *tmp; 60 size_t len; 61 62 if (mb_cur_max == 1) { 63 if (cmonths == NULL) { 64 char *m; 65 66 cmonths = sort_malloc(sizeof(char*) * 12); 67 for (int i = 0; i < 12; i++) { 68 cmonths[i] = NULL; 69 tmp = nl_langinfo(item[i]); 70 if (debug_sort) 71 printf("month[%d]=%s\n", i, tmp); 72 if (*tmp == '\0') 73 continue; 74 m = sort_strdup(tmp); 75 len = strlen(tmp); 76 for (unsigned int j = 0; j < len; j++) 77 m[j] = toupper(m[j]); 78 cmonths[i] = m; 79 } 80 } 81 82 } else { 83 if (wmonths == NULL) { 84 wchar_t *m; 85 86 wmonths = sort_malloc(sizeof(wchar_t *) * 12); 87 for (int i = 0; i < 12; i++) { 88 wmonths[i] = NULL; 89 tmp = nl_langinfo(item[i]); 90 if (debug_sort) 91 printf("month[%d]=%s\n", i, tmp); 92 if (*tmp == '\0') 93 continue; 94 len = strlen(tmp); 95 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1)); 96 if (mbstowcs(m, tmp, len) == 97 ((size_t) - 1)) { 98 sort_free(m); 99 continue; 100 } 101 m[len] = L'\0'; 102 for (unsigned int j = 0; j < len; j++) 103 m[j] = towupper(m[j]); 104 wmonths[i] = m; 105 } 106 } 107 } 108 } 109 110 /* 111 * Compare two wide-character strings 112 */ 113 static int 114 wide_str_coll(const wchar_t *s1, const wchar_t *s2) 115 { 116 int ret; 117 118 errno = 0; 119 ret = wcscoll(s1, s2); 120 if (errno == EILSEQ) { 121 errno = 0; 122 ret = wcscmp(s1, s2); 123 if (errno != 0) { 124 for (size_t i = 0; ; ++i) { 125 wchar_t c1 = s1[i]; 126 wchar_t c2 = s2[i]; 127 if (c1 == L'\0') 128 return ((c2 == L'\0') ? 0 : -1); 129 if (c2 == L'\0') 130 return (+1); 131 if (c1 == c2) 132 continue; 133 return ((int)(c1 - c2)); 134 } 135 } 136 } 137 return (ret); 138 } 139 140 /* counterparts of wcs functions */ 141 142 void 143 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) 144 { 145 146 if (mb_cur_max == 1) 147 fprintf(f, "%s%s%s", prefix, bws->cdata.str, suffix); 148 else 149 fprintf(f, "%s%S%s", prefix, bws->wdata.str, suffix); 150 } 151 152 const void* bwsrawdata(const struct bwstring *bws) 153 { 154 155 return (bws->wdata.str); 156 } 157 158 size_t bwsrawlen(const struct bwstring *bws) 159 { 160 161 return ((mb_cur_max == 1) ? bws->cdata.len : 162 SIZEOF_WCHAR_STRING(bws->wdata.len)); 163 } 164 165 size_t 166 bws_memsize(const struct bwstring *bws) 167 { 168 169 return ((mb_cur_max == 1) ? 170 (bws->cdata.len + 2 + sizeof(struct bwstring)) : 171 (SIZEOF_WCHAR_STRING(bws->wdata.len + 1) + sizeof(struct bwstring))); 172 } 173 174 void 175 bws_setlen(struct bwstring *bws, size_t newlen) 176 { 177 178 if (mb_cur_max == 1 && bws && newlen != bws->cdata.len && 179 newlen <= bws->cdata.len) { 180 bws->cdata.len = newlen; 181 bws->cdata.str[newlen] = '\0'; 182 } else if (bws && newlen != bws->wdata.len && newlen <= bws->wdata.len) { 183 bws->wdata.len = newlen; 184 bws->wdata.str[newlen] = L'\0'; 185 } 186 } 187 188 /* 189 * Allocate a new binary string of specified size 190 */ 191 struct bwstring * 192 bwsalloc(size_t sz) 193 { 194 struct bwstring *ret; 195 196 if (mb_cur_max == 1) { 197 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); 198 ret->cdata.len = sz; 199 ret->cdata.str[sz] = '\0'; 200 } else { 201 ret = sort_malloc( 202 sizeof(struct bwstring) + SIZEOF_WCHAR_STRING(sz + 1)); 203 ret->wdata.len = sz; 204 ret->wdata.str[sz] = L'\0'; 205 } 206 207 return (ret); 208 } 209 210 /* 211 * Create a copy of binary string. 212 * New string size equals the length of the old string. 213 */ 214 struct bwstring * 215 bwsdup(const struct bwstring *s) 216 { 217 218 if (s == NULL) 219 return (NULL); 220 else { 221 struct bwstring *ret = bwsalloc(BWSLEN(s)); 222 223 if (mb_cur_max == 1) 224 memcpy(ret->cdata.str, s->cdata.str, (s->cdata.len)); 225 else 226 memcpy(ret->wdata.str, s->wdata.str, 227 SIZEOF_WCHAR_STRING(s->wdata.len)); 228 229 return (ret); 230 } 231 } 232 233 /* 234 * Create a new binary string from a wide character buffer. 235 */ 236 struct bwstring * 237 bwssbdup(const wchar_t *str, size_t len) 238 { 239 240 if (str == NULL) 241 return ((len == 0) ? bwsalloc(0) : NULL); 242 else { 243 struct bwstring *ret; 244 245 ret = bwsalloc(len); 246 247 if (mb_cur_max == 1) 248 for (size_t i = 0; i < len; ++i) 249 ret->cdata.str[i] = (char)str[i]; 250 else 251 memcpy(ret->wdata.str, str, SIZEOF_WCHAR_STRING(len)); 252 253 return (ret); 254 } 255 } 256 257 /* 258 * Create a new binary string from a raw binary buffer. 259 */ 260 struct bwstring * 261 bwscsbdup(const unsigned char *str, size_t len) 262 { 263 struct bwstring *ret; 264 265 ret = bwsalloc(len); 266 267 if (str) { 268 if (mb_cur_max == 1) 269 memcpy(ret->cdata.str, str, len); 270 else { 271 mbstate_t mbs; 272 const char *s; 273 size_t charlen, chars, cptr; 274 275 chars = 0; 276 cptr = 0; 277 s = (const char *) str; 278 279 memset(&mbs, 0, sizeof(mbs)); 280 281 while (cptr < len) { 282 size_t n = mb_cur_max; 283 284 if (n > len - cptr) 285 n = len - cptr; 286 charlen = mbrlen(s + cptr, n, &mbs); 287 switch (charlen) { 288 case 0: 289 /* FALLTHROUGH */ 290 case (size_t) -1: 291 /* FALLTHROUGH */ 292 case (size_t) -2: 293 ret->wdata.str[chars++] = 294 (unsigned char) s[cptr]; 295 ++cptr; 296 break; 297 default: 298 n = mbrtowc(ret->wdata.str + (chars++), 299 s + cptr, charlen, &mbs); 300 if ((n == (size_t)-1) || (n == (size_t)-2)) 301 /* NOTREACHED */ 302 err(2, "mbrtowc error"); 303 cptr += charlen; 304 } 305 } 306 307 ret->wdata.len = chars; 308 ret->wdata.str[ret->wdata.len] = L'\0'; 309 } 310 } 311 return (ret); 312 } 313 314 /* 315 * De-allocate object memory 316 */ 317 void 318 bwsfree(const struct bwstring *s) 319 { 320 321 if (s) 322 sort_free(s); 323 } 324 325 /* 326 * Copy content of src binary string to dst, 327 * with specified number of symbols to be copied. 328 * An offset value can be specified, from the start of src string. 329 * If the capacity of the dst string is not sufficient, 330 * then the data is truncated. 331 */ 332 struct bwstring * 333 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, 334 size_t size) 335 { 336 337 if (offset >= BWSLEN(src)) { 338 bws_setlen(dst, 0); 339 } else { 340 size_t nums = BWSLEN(src) - offset; 341 342 if (nums > BWSLEN(dst)) 343 nums = BWSLEN(dst); 344 if (nums > size) 345 nums = size; 346 if (mb_cur_max == 1) { 347 memcpy(dst->cdata.str, src->cdata.str + offset, nums); 348 dst->cdata.len = nums; 349 dst->cdata.str[nums] = '\0'; 350 } else { 351 memcpy(dst->wdata.str, src->wdata.str + offset, 352 SIZEOF_WCHAR_STRING(nums)); 353 dst->wdata.len = nums; 354 dst->wdata.str[nums] = L'\0'; 355 } 356 } 357 return (dst); 358 } 359 360 /* 361 * Write binary string to the file. 362 * The output is ended either with '\n' (nl == true) 363 * or '\0' (nl == false). 364 */ 365 size_t 366 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) 367 { 368 369 if (mb_cur_max == 1) { 370 size_t len = bws->cdata.len; 371 372 if (!zero_ended) { 373 bws->cdata.str[len] = '\n'; 374 375 if (fwrite(bws->cdata.str, len + 1, 1, f) < 1) 376 err(2, NULL); 377 378 bws->cdata.str[len] = '\0'; 379 } else if (fwrite(bws->cdata.str, len + 1, 1, f) < 1) 380 err(2, NULL); 381 382 return (len + 1); 383 384 } else { 385 wchar_t eols; 386 size_t printed = 0; 387 388 eols = zero_ended ? btowc('\0') : btowc('\n'); 389 390 while (printed < BWSLEN(bws)) { 391 const wchar_t *s = bws->wdata.str + printed; 392 393 if (*s == L'\0') { 394 int nums; 395 396 nums = fwprintf(f, L"%lc", *s); 397 398 if (nums != 1) 399 err(2, NULL); 400 ++printed; 401 } else { 402 int nums; 403 404 nums = fwprintf(f, L"%ls", s); 405 406 if (nums < 1) 407 err(2, NULL); 408 printed += nums; 409 } 410 } 411 fwprintf(f, L"%lc", eols); 412 return (printed + 1); 413 } 414 } 415 416 int 417 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, 418 size_t offset, size_t len) 419 { 420 size_t cmp_len, len1, len2; 421 int res; 422 423 len1 = BWSLEN(bws1); 424 len2 = BWSLEN(bws2); 425 426 if (len1 <= offset) { 427 return ((len2 <= offset) ? 0 : -1); 428 } else { 429 if (len2 <= offset) 430 return (+1); 431 else { 432 len1 -= offset; 433 len2 -= offset; 434 435 cmp_len = len1; 436 437 if (len2 < cmp_len) 438 cmp_len = len2; 439 440 if (len < cmp_len) 441 cmp_len = len; 442 443 if (mb_cur_max == 1) { 444 const char *s1, *s2; 445 446 s1 = bws1->cdata.str + offset; 447 s2 = bws2->cdata.str + offset; 448 449 res = memcmp(s1, s2, cmp_len); 450 451 } else { 452 const wchar_t *s1, *s2; 453 454 s1 = bws1->wdata.str + offset; 455 s2 = bws2->wdata.str + offset; 456 457 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); 458 } 459 } 460 } 461 462 if (res == 0) { 463 if (len1 < cmp_len && len1 < len2) 464 res = -1; 465 else if (len2 < cmp_len && len2 < len1) 466 res = +1; 467 } 468 469 return (res); 470 } 471 472 int 473 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 474 { 475 size_t len1, len2, cmp_len; 476 int res; 477 478 len1 = BWSLEN(bws1); 479 len2 = BWSLEN(bws2); 480 481 len1 -= offset; 482 len2 -= offset; 483 484 cmp_len = len1; 485 486 if (len2 < cmp_len) 487 cmp_len = len2; 488 489 res = bwsncmp(bws1, bws2, offset, cmp_len); 490 491 if (res == 0) { 492 if( len1 < len2) 493 res = -1; 494 else if (len2 < len1) 495 res = +1; 496 } 497 498 return (res); 499 } 500 501 int 502 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) 503 { 504 wchar_t c1, c2; 505 size_t i; 506 507 for (i = 0; i < len; ++i) { 508 c1 = bws_get_iter_value(iter1); 509 c2 = bws_get_iter_value(iter2); 510 if (c1 != c2) 511 return (c1 - c2); 512 iter1 = bws_iterator_inc(iter1, 1); 513 iter2 = bws_iterator_inc(iter2, 1); 514 } 515 516 return (0); 517 } 518 519 int 520 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 521 { 522 size_t len1, len2; 523 524 len1 = BWSLEN(bws1); 525 len2 = BWSLEN(bws2); 526 527 if (len1 <= offset) 528 return ((len2 <= offset) ? 0 : -1); 529 else { 530 if (len2 <= offset) 531 return (+1); 532 else { 533 len1 -= offset; 534 len2 -= offset; 535 536 if (mb_cur_max == 1) { 537 const char *s1, *s2; 538 539 s1 = bws1->cdata.str + offset; 540 s2 = bws2->cdata.str + offset; 541 542 if (byte_sort) { 543 int res; 544 545 if (len1 > len2) { 546 res = memcmp(s1, s2, len2); 547 if (!res) 548 res = +1; 549 } else if (len1 < len2) { 550 res = memcmp(s1, s2, len1); 551 if (!res) 552 res = -1; 553 } else 554 res = memcmp(s1, s2, len1); 555 556 return (res); 557 558 } else { 559 int res; 560 size_t i, maxlen; 561 562 i = 0; 563 maxlen = len1; 564 565 if (maxlen > len2) 566 maxlen = len2; 567 568 while (i < maxlen) { 569 /* goto next non-zero part: */ 570 while ((i < maxlen) && 571 !s1[i] && !s2[i]) 572 ++i; 573 574 if (i >= maxlen) 575 break; 576 577 if (s1[i] == 0) { 578 if (s2[i] == 0) 579 /* NOTREACHED */ 580 err(2, "bwscoll error 01"); 581 else 582 return (-1); 583 } else if (s2[i] == 0) 584 return (+1); 585 586 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i)); 587 if (res) 588 return (res); 589 590 while ((i < maxlen) && 591 s1[i] && s2[i]) 592 ++i; 593 594 if (i >= maxlen) 595 break; 596 597 if (s1[i] == 0) { 598 if (s2[i] == 0) { 599 ++i; 600 continue; 601 } else 602 return (-1); 603 } else if (s2[i] == 0) 604 return (+1); 605 else 606 /* NOTREACHED */ 607 err(2, "bwscoll error 02"); 608 } 609 610 if (len1 < len2) 611 return (-1); 612 else if (len1 > len2) 613 return (+1); 614 615 return (0); 616 } 617 } else { 618 const wchar_t *s1, *s2; 619 size_t i, maxlen; 620 int res; 621 622 s1 = bws1->wdata.str + offset; 623 s2 = bws2->wdata.str + offset; 624 625 i = 0; 626 maxlen = len1; 627 628 if (maxlen > len2) 629 maxlen = len2; 630 631 while (i < maxlen) { 632 633 /* goto next non-zero part: */ 634 while ((i < maxlen) && 635 !s1[i] && !s2[i]) 636 ++i; 637 638 if (i >= maxlen) 639 break; 640 641 if (s1[i] == 0) { 642 if (s2[i] == 0) 643 /* NOTREACHED */ 644 err(2, "bwscoll error 1"); 645 else 646 return (-1); 647 } else if (s2[i] == 0) 648 return (+1); 649 650 res = wide_str_coll(s1 + i, s2 + i); 651 if (res) 652 return (res); 653 654 while ((i < maxlen) && s1[i] && s2[i]) 655 ++i; 656 657 if (i >= maxlen) 658 break; 659 660 if (s1[i] == 0) { 661 if (s2[i] == 0) { 662 ++i; 663 continue; 664 } else 665 return (-1); 666 } else if (s2[i] == 0) 667 return (+1); 668 else 669 /* NOTREACHED */ 670 err(2, "bwscoll error 2"); 671 } 672 673 if (len1 < len2) 674 return (-1); 675 else if (len1 > len2) 676 return (+1); 677 678 return (0); 679 } 680 } 681 } 682 } 683 684 /* 685 * Correction of the system API 686 */ 687 double 688 bwstod(struct bwstring *s0, bool *empty) 689 { 690 double ret; 691 692 if (mb_cur_max == 1) { 693 char *end, *s; 694 char *ep; 695 696 s = s0->cdata.str; 697 end = s + s0->cdata.len; 698 ep = NULL; 699 700 while (isblank(*s) && s < end) 701 ++s; 702 703 if (!isprint(*s)) { 704 *empty = true; 705 return (0); 706 } 707 708 ret = strtod((char*)s, &ep); 709 if (ep == s) { 710 *empty = true; 711 return (0); 712 } 713 } else { 714 wchar_t *end, *ep, *s; 715 716 s = s0->wdata.str; 717 end = s + s0->wdata.len; 718 ep = NULL; 719 720 while (iswblank(*s) && s < end) 721 ++s; 722 723 if (!iswprint(*s)) { 724 *empty = true; 725 return (0); 726 } 727 728 ret = wcstod(s, &ep); 729 if (ep == s) { 730 *empty = true; 731 return (0); 732 } 733 } 734 735 *empty = false; 736 return (ret); 737 } 738 739 /* 740 * A helper function for monthcoll. If a line matches 741 * a month name, it returns (number of the month - 1), 742 * while if there is no match, it just return -1. 743 */ 744 745 int 746 bws_month_score(const struct bwstring *s0) 747 { 748 749 if (mb_cur_max == 1) { 750 const char *end, *s; 751 752 s = s0->cdata.str; 753 end = s + s0->cdata.len; 754 755 while (isblank(*s) && s < end) 756 ++s; 757 758 for (int i = 11; i >= 0; --i) { 759 if (cmonths[i] && 760 (s == strstr(s, cmonths[i]))) 761 return (i); 762 } 763 764 } else { 765 const wchar_t *end, *s; 766 767 s = s0->wdata.str; 768 end = s + s0->wdata.len; 769 770 while (iswblank(*s) && s < end) 771 ++s; 772 773 for (int i = 11; i >= 0; --i) { 774 if (wmonths[i] && (s == wcsstr(s, wmonths[i]))) 775 return (i); 776 } 777 } 778 779 return (-1); 780 } 781 782 /* 783 * Rips out leading blanks (-b). 784 */ 785 struct bwstring * 786 ignore_leading_blanks(struct bwstring *str) 787 { 788 789 if (mb_cur_max == 1) { 790 char *dst, *end, *src; 791 792 src = str->cdata.str; 793 dst = src; 794 end = src + str->cdata.len; 795 796 while (src < end && isblank(*src)) 797 ++src; 798 799 if (src != dst) { 800 size_t newlen; 801 802 newlen = BWSLEN(str) - (src - dst); 803 804 while (src < end) { 805 *dst = *src; 806 ++dst; 807 ++src; 808 } 809 bws_setlen(str, newlen); 810 } 811 } else { 812 wchar_t *dst, *end, *src; 813 814 src = str->wdata.str; 815 dst = src; 816 end = src + str->wdata.len; 817 818 while (src < end && iswblank(*src)) 819 ++src; 820 821 if (src != dst) { 822 823 size_t newlen = BWSLEN(str) - (src - dst); 824 825 while (src < end) { 826 *dst = *src; 827 ++dst; 828 ++src; 829 } 830 bws_setlen(str, newlen); 831 832 } 833 } 834 return (str); 835 } 836 837 /* 838 * Rips out nonprinting characters (-i). 839 */ 840 struct bwstring * 841 ignore_nonprinting(struct bwstring *str) 842 { 843 size_t newlen = BWSLEN(str); 844 845 if (mb_cur_max == 1) { 846 char *dst, *end, *src; 847 char c; 848 849 src = str->cdata.str; 850 dst = src; 851 end = src + str->cdata.len; 852 853 while (src < end) { 854 c = *src; 855 if (isprint(c)) { 856 *dst = c; 857 ++dst; 858 ++src; 859 } else { 860 ++src; 861 --newlen; 862 } 863 } 864 } else { 865 wchar_t *dst, *end, *src; 866 wchar_t c; 867 868 src = str->wdata.str; 869 dst = src; 870 end = src + str->wdata.len; 871 872 while (src < end) { 873 c = *src; 874 if (iswprint(c)) { 875 *dst = c; 876 ++dst; 877 ++src; 878 } else { 879 ++src; 880 --newlen; 881 } 882 } 883 } 884 bws_setlen(str, newlen); 885 886 return (str); 887 } 888 889 /* 890 * Rips out any characters that are not alphanumeric characters 891 * nor blanks (-d). 892 */ 893 struct bwstring * 894 dictionary_order(struct bwstring *str) 895 { 896 size_t newlen = BWSLEN(str); 897 898 if (mb_cur_max == 1) { 899 char *dst, *end, *src; 900 char c; 901 902 src = str->cdata.str; 903 dst = src; 904 end = src + str->cdata.len; 905 906 while (src < end) { 907 c = *src; 908 if (isalnum(c) || isblank(c)) { 909 *dst = c; 910 ++dst; 911 ++src; 912 } else { 913 ++src; 914 --newlen; 915 } 916 } 917 } else { 918 wchar_t *dst, *end, *src; 919 wchar_t c; 920 921 src = str->wdata.str; 922 dst = src; 923 end = src + str->wdata.len; 924 925 while (src < end) { 926 c = *src; 927 if (iswalnum(c) || iswblank(c)) { 928 *dst = c; 929 ++dst; 930 ++src; 931 } else { 932 ++src; 933 --newlen; 934 } 935 } 936 } 937 bws_setlen(str, newlen); 938 939 return (str); 940 } 941 942 /* 943 * Converts string to lower case(-f). 944 */ 945 struct bwstring * 946 ignore_case(struct bwstring *str) 947 { 948 949 if (mb_cur_max == 1) { 950 char *end, *s; 951 952 s = str->cdata.str; 953 end = s + str->cdata.len; 954 955 while (s < end) { 956 *s = toupper(*s); 957 ++s; 958 } 959 } else { 960 wchar_t *end, *s; 961 962 s = str->wdata.str; 963 end = s + str->wdata.len; 964 965 while (s < end) { 966 *s = towupper(*s); 967 ++s; 968 } 969 } 970 return (str); 971 } 972 973 void 974 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) 975 { 976 977 if (mb_cur_max == 1) 978 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->cdata.str); 979 else 980 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->wdata.str); 981 } 982