1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 #include <ctype.h> 32 #include <errno.h> 33 #include <err.h> 34 #include <langinfo.h> 35 #include <math.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <wchar.h> 39 #include <wctype.h> 40 41 #include "bwstring.h" 42 #include "sort.h" 43 44 bool byte_sort; 45 46 struct wmonth { 47 wchar_t *mon; 48 wchar_t *ab; 49 wchar_t *alt; 50 }; 51 52 struct cmonth { 53 char *mon; 54 char *ab; 55 char *alt; 56 }; 57 58 static struct wmonth *wmonths; 59 static struct cmonth *cmonths; 60 61 static int 62 populate_cmonth(char **field, const nl_item item, int idx) 63 { 64 char *tmp, *m; 65 size_t i, len; 66 67 tmp = nl_langinfo(item); 68 if (debug_sort) 69 printf("month[%d]=%s\n", idx, tmp); 70 if (*tmp == '\0') 71 return (0); 72 m = sort_strdup(tmp); 73 len = strlen(tmp); 74 for (i = 0; i < len; i++) 75 m[i] = toupper(m[i]); 76 *field = m; 77 78 return (1); 79 } 80 81 static int 82 populate_wmonth(wchar_t **field, const nl_item item, int idx) 83 { 84 wchar_t *m; 85 char *tmp; 86 size_t i, len; 87 88 tmp = nl_langinfo(item); 89 if (debug_sort) 90 printf("month[%d]=%s\n", idx, tmp); 91 if (*tmp == '\0') 92 return (0); 93 len = strlen(tmp); 94 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1)); 95 if (mbstowcs(m, tmp, len) == ((size_t) - 1)) { 96 sort_free(m); 97 return (0); 98 } 99 m[len] = L'\0'; 100 for (i = 0; i < len; i++) 101 m[i] = towupper(m[i]); 102 *field = m; 103 104 return (1); 105 } 106 107 void 108 initialise_months(void) 109 { 110 const nl_item mon_item[12] = { MON_1, MON_2, MON_3, MON_4, 111 MON_5, MON_6, MON_7, MON_8, MON_9, MON_10, 112 MON_11, MON_12 }; 113 const nl_item ab_item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, 114 ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10, 115 ABMON_11, ABMON_12 }; 116 #ifdef ALTMON_1 117 const nl_item alt_item[12] = { ALTMON_1, ALTMON_2, ALTMON_3, ALTMON_4, 118 ALTMON_5, ALTMON_6, ALTMON_7, ALTMON_8, ALTMON_9, ALTMON_10, 119 ALTMON_11, ALTMON_12 }; 120 #endif 121 int i; 122 123 /* 124 * Handle all possible month formats: abbrevation, full name, 125 * standalone name (without case ending). 126 */ 127 if (mb_cur_max == 1) { 128 if (cmonths == NULL) { 129 cmonths = sort_malloc(sizeof(struct cmonth) * 12); 130 for (i = 0; i < 12; i++) { 131 if (!populate_cmonth(&cmonths[i].mon, 132 mon_item[i], i)) 133 continue; 134 if (!populate_cmonth(&cmonths[i].ab, 135 ab_item[i], i)) 136 continue; 137 #ifdef ALTMON_1 138 if (!populate_cmonth(&cmonths[i].alt, 139 alt_item[i], i)) 140 continue; 141 #else 142 cmonths[i].alt = NULL; 143 #endif 144 } 145 } 146 147 } else { 148 if (wmonths == NULL) { 149 wmonths = sort_malloc(sizeof(struct wmonth) * 12); 150 for (i = 0; i < 12; i++) { 151 if (!populate_wmonth(&wmonths[i].mon, 152 mon_item[i], i)) 153 continue; 154 if (!populate_wmonth(&wmonths[i].ab, 155 ab_item[i], i)) 156 continue; 157 #ifdef ALTMON_1 158 if (!populate_wmonth(&wmonths[i].alt, 159 alt_item[i], i)) 160 continue; 161 #else 162 wmonths[i].alt = NULL; 163 #endif 164 } 165 } 166 } 167 } 168 169 /* 170 * Compare two wide-character strings 171 */ 172 static int 173 wide_str_coll(const wchar_t *s1, const wchar_t *s2) 174 { 175 int ret; 176 177 errno = 0; 178 ret = wcscoll(s1, s2); 179 if (errno == EILSEQ) { 180 errno = 0; 181 ret = wcscmp(s1, s2); 182 if (errno != 0) { 183 for (size_t i = 0; ; ++i) { 184 wchar_t c1 = s1[i]; 185 wchar_t c2 = s2[i]; 186 if (c1 == L'\0') 187 return ((c2 == L'\0') ? 0 : -1); 188 if (c2 == L'\0') 189 return (+1); 190 if (c1 == c2) 191 continue; 192 return ((int)(c1 - c2)); 193 } 194 } 195 } 196 return (ret); 197 } 198 199 /* counterparts of wcs functions */ 200 201 void 202 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) 203 { 204 205 if (mb_cur_max == 1) 206 fprintf(f, "%s%s%s", prefix, bws->cdata.str, suffix); 207 else 208 fprintf(f, "%s%S%s", prefix, bws->wdata.str, suffix); 209 } 210 211 const void* bwsrawdata(const struct bwstring *bws) 212 { 213 214 return (bws->wdata.str); 215 } 216 217 size_t bwsrawlen(const struct bwstring *bws) 218 { 219 220 return ((mb_cur_max == 1) ? bws->cdata.len : 221 SIZEOF_WCHAR_STRING(bws->wdata.len)); 222 } 223 224 size_t 225 bws_memsize(const struct bwstring *bws) 226 { 227 228 return ((mb_cur_max == 1) ? 229 (bws->cdata.len + 2 + sizeof(struct bwstring)) : 230 (SIZEOF_WCHAR_STRING(bws->wdata.len + 1) + sizeof(struct bwstring))); 231 } 232 233 void 234 bws_setlen(struct bwstring *bws, size_t newlen) 235 { 236 237 if (mb_cur_max == 1 && bws && newlen != bws->cdata.len && 238 newlen <= bws->cdata.len) { 239 bws->cdata.len = newlen; 240 bws->cdata.str[newlen] = '\0'; 241 } else if (bws && newlen != bws->wdata.len && newlen <= bws->wdata.len) { 242 bws->wdata.len = newlen; 243 bws->wdata.str[newlen] = L'\0'; 244 } 245 } 246 247 /* 248 * Allocate a new binary string of specified size 249 */ 250 struct bwstring * 251 bwsalloc(size_t sz) 252 { 253 struct bwstring *ret; 254 255 if (mb_cur_max == 1) { 256 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); 257 ret->cdata.len = sz; 258 ret->cdata.str[sz] = '\0'; 259 } else { 260 ret = sort_malloc( 261 sizeof(struct bwstring) + SIZEOF_WCHAR_STRING(sz + 1)); 262 ret->wdata.len = sz; 263 ret->wdata.str[sz] = L'\0'; 264 } 265 266 return (ret); 267 } 268 269 /* 270 * Create a copy of binary string. 271 * New string size equals the length of the old string. 272 */ 273 struct bwstring * 274 bwsdup(const struct bwstring *s) 275 { 276 277 if (s == NULL) 278 return (NULL); 279 else { 280 struct bwstring *ret = bwsalloc(BWSLEN(s)); 281 282 if (mb_cur_max == 1) 283 memcpy(ret->cdata.str, s->cdata.str, (s->cdata.len)); 284 else 285 memcpy(ret->wdata.str, s->wdata.str, 286 SIZEOF_WCHAR_STRING(s->wdata.len)); 287 288 return (ret); 289 } 290 } 291 292 /* 293 * Create a new binary string from a wide character buffer. 294 */ 295 struct bwstring * 296 bwssbdup(const wchar_t *str, size_t len) 297 { 298 299 if (str == NULL) 300 return ((len == 0) ? bwsalloc(0) : NULL); 301 else { 302 struct bwstring *ret; 303 304 ret = bwsalloc(len); 305 306 if (mb_cur_max == 1) 307 for (size_t i = 0; i < len; ++i) 308 ret->cdata.str[i] = (char)str[i]; 309 else 310 memcpy(ret->wdata.str, str, SIZEOF_WCHAR_STRING(len)); 311 312 return (ret); 313 } 314 } 315 316 /* 317 * Create a new binary string from a raw binary buffer. 318 */ 319 struct bwstring * 320 bwscsbdup(const unsigned char *str, size_t len) 321 { 322 struct bwstring *ret; 323 324 ret = bwsalloc(len); 325 326 if (str) { 327 if (mb_cur_max == 1) 328 memcpy(ret->cdata.str, str, len); 329 else { 330 mbstate_t mbs; 331 const char *s; 332 size_t charlen, chars, cptr; 333 334 chars = 0; 335 cptr = 0; 336 s = (const char *) str; 337 338 memset(&mbs, 0, sizeof(mbs)); 339 340 while (cptr < len) { 341 size_t n = mb_cur_max; 342 343 if (n > len - cptr) 344 n = len - cptr; 345 charlen = mbrlen(s + cptr, n, &mbs); 346 switch (charlen) { 347 case 0: 348 /* FALLTHROUGH */ 349 case (size_t) -1: 350 /* FALLTHROUGH */ 351 case (size_t) -2: 352 ret->wdata.str[chars++] = 353 (unsigned char) s[cptr]; 354 ++cptr; 355 break; 356 default: 357 n = mbrtowc(ret->wdata.str + (chars++), 358 s + cptr, charlen, &mbs); 359 if ((n == (size_t)-1) || (n == (size_t)-2)) 360 /* NOTREACHED */ 361 err(2, "mbrtowc error"); 362 cptr += charlen; 363 } 364 } 365 366 ret->wdata.len = chars; 367 ret->wdata.str[ret->wdata.len] = L'\0'; 368 } 369 } 370 return (ret); 371 } 372 373 /* 374 * De-allocate object memory 375 */ 376 void 377 bwsfree(const struct bwstring *s) 378 { 379 380 if (s) 381 sort_free(s); 382 } 383 384 /* 385 * Copy content of src binary string to dst, 386 * with specified number of symbols to be copied. 387 * An offset value can be specified, from the start of src string. 388 * If the capacity of the dst string is not sufficient, 389 * then the data is truncated. 390 */ 391 struct bwstring * 392 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, 393 size_t size) 394 { 395 396 if (offset >= BWSLEN(src)) { 397 bws_setlen(dst, 0); 398 } else { 399 size_t nums = BWSLEN(src) - offset; 400 401 if (nums > BWSLEN(dst)) 402 nums = BWSLEN(dst); 403 if (nums > size) 404 nums = size; 405 if (mb_cur_max == 1) { 406 memcpy(dst->cdata.str, src->cdata.str + offset, nums); 407 dst->cdata.len = nums; 408 dst->cdata.str[nums] = '\0'; 409 } else { 410 memcpy(dst->wdata.str, src->wdata.str + offset, 411 SIZEOF_WCHAR_STRING(nums)); 412 dst->wdata.len = nums; 413 dst->wdata.str[nums] = L'\0'; 414 } 415 } 416 return (dst); 417 } 418 419 /* 420 * Write binary string to the file. 421 * The output is ended either with '\n' (nl == true) 422 * or '\0' (nl == false). 423 */ 424 size_t 425 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) 426 { 427 428 if (mb_cur_max == 1) { 429 size_t len = bws->cdata.len; 430 431 if (!zero_ended) { 432 bws->cdata.str[len] = '\n'; 433 434 if (fwrite(bws->cdata.str, len + 1, 1, f) < 1) 435 err(2, NULL); 436 437 bws->cdata.str[len] = '\0'; 438 } else if (fwrite(bws->cdata.str, len + 1, 1, f) < 1) 439 err(2, NULL); 440 441 return (len + 1); 442 443 } else { 444 wchar_t eols; 445 size_t printed = 0; 446 447 eols = zero_ended ? btowc('\0') : btowc('\n'); 448 449 while (printed < BWSLEN(bws)) { 450 const wchar_t *s = bws->wdata.str + printed; 451 452 if (*s == L'\0') { 453 int nums; 454 455 nums = fwprintf(f, L"%lc", *s); 456 457 if (nums != 1) 458 err(2, NULL); 459 ++printed; 460 } else { 461 int nums; 462 463 nums = fwprintf(f, L"%ls", s); 464 465 if (nums < 1) 466 err(2, NULL); 467 printed += nums; 468 } 469 } 470 fwprintf(f, L"%lc", eols); 471 return (printed + 1); 472 } 473 } 474 475 int 476 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, 477 size_t offset, size_t len) 478 { 479 size_t cmp_len, len1, len2; 480 int res; 481 482 len1 = BWSLEN(bws1); 483 len2 = BWSLEN(bws2); 484 485 if (len1 <= offset) { 486 return ((len2 <= offset) ? 0 : -1); 487 } else { 488 if (len2 <= offset) 489 return (+1); 490 else { 491 len1 -= offset; 492 len2 -= offset; 493 494 cmp_len = len1; 495 496 if (len2 < cmp_len) 497 cmp_len = len2; 498 499 if (len < cmp_len) 500 cmp_len = len; 501 502 if (mb_cur_max == 1) { 503 const char *s1, *s2; 504 505 s1 = bws1->cdata.str + offset; 506 s2 = bws2->cdata.str + offset; 507 508 res = memcmp(s1, s2, cmp_len); 509 510 } else { 511 const wchar_t *s1, *s2; 512 513 s1 = bws1->wdata.str + offset; 514 s2 = bws2->wdata.str + offset; 515 516 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); 517 } 518 } 519 } 520 521 if (res == 0) { 522 if (len1 < cmp_len && len1 < len2) 523 res = -1; 524 else if (len2 < cmp_len && len2 < len1) 525 res = +1; 526 } 527 528 return (res); 529 } 530 531 int 532 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 533 { 534 size_t len1, len2, cmp_len; 535 int res; 536 537 len1 = BWSLEN(bws1); 538 len2 = BWSLEN(bws2); 539 540 len1 -= offset; 541 len2 -= offset; 542 543 cmp_len = len1; 544 545 if (len2 < cmp_len) 546 cmp_len = len2; 547 548 res = bwsncmp(bws1, bws2, offset, cmp_len); 549 550 if (res == 0) { 551 if( len1 < len2) 552 res = -1; 553 else if (len2 < len1) 554 res = +1; 555 } 556 557 return (res); 558 } 559 560 int 561 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) 562 { 563 wchar_t c1, c2; 564 size_t i; 565 566 for (i = 0; i < len; ++i) { 567 c1 = bws_get_iter_value(iter1); 568 c2 = bws_get_iter_value(iter2); 569 if (c1 != c2) 570 return (c1 - c2); 571 iter1 = bws_iterator_inc(iter1, 1); 572 iter2 = bws_iterator_inc(iter2, 1); 573 } 574 575 return (0); 576 } 577 578 int 579 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 580 { 581 size_t len1, len2; 582 583 len1 = BWSLEN(bws1); 584 len2 = BWSLEN(bws2); 585 586 if (len1 <= offset) 587 return ((len2 <= offset) ? 0 : -1); 588 else { 589 if (len2 <= offset) 590 return (+1); 591 else { 592 len1 -= offset; 593 len2 -= offset; 594 595 if (mb_cur_max == 1) { 596 const char *s1, *s2; 597 598 s1 = bws1->cdata.str + offset; 599 s2 = bws2->cdata.str + offset; 600 601 if (byte_sort) { 602 int res; 603 604 if (len1 > len2) { 605 res = memcmp(s1, s2, len2); 606 if (!res) 607 res = +1; 608 } else if (len1 < len2) { 609 res = memcmp(s1, s2, len1); 610 if (!res) 611 res = -1; 612 } else 613 res = memcmp(s1, s2, len1); 614 615 return (res); 616 617 } else { 618 int res; 619 size_t i, maxlen; 620 621 i = 0; 622 maxlen = len1; 623 624 if (maxlen > len2) 625 maxlen = len2; 626 627 while (i < maxlen) { 628 /* goto next non-zero part: */ 629 while ((i < maxlen) && 630 !s1[i] && !s2[i]) 631 ++i; 632 633 if (i >= maxlen) 634 break; 635 636 if (s1[i] == 0) { 637 if (s2[i] == 0) 638 /* NOTREACHED */ 639 err(2, "bwscoll error 01"); 640 else 641 return (-1); 642 } else if (s2[i] == 0) 643 return (+1); 644 645 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i)); 646 if (res) 647 return (res); 648 649 while ((i < maxlen) && 650 s1[i] && s2[i]) 651 ++i; 652 653 if (i >= maxlen) 654 break; 655 656 if (s1[i] == 0) { 657 if (s2[i] == 0) { 658 ++i; 659 continue; 660 } else 661 return (-1); 662 } else if (s2[i] == 0) 663 return (+1); 664 else 665 /* NOTREACHED */ 666 err(2, "bwscoll error 02"); 667 } 668 669 if (len1 < len2) 670 return (-1); 671 else if (len1 > len2) 672 return (+1); 673 674 return (0); 675 } 676 } else { 677 const wchar_t *s1, *s2; 678 size_t i, maxlen; 679 int res; 680 681 s1 = bws1->wdata.str + offset; 682 s2 = bws2->wdata.str + offset; 683 684 i = 0; 685 maxlen = len1; 686 687 if (maxlen > len2) 688 maxlen = len2; 689 690 while (i < maxlen) { 691 692 /* goto next non-zero part: */ 693 while ((i < maxlen) && 694 !s1[i] && !s2[i]) 695 ++i; 696 697 if (i >= maxlen) 698 break; 699 700 if (s1[i] == 0) { 701 if (s2[i] == 0) 702 /* NOTREACHED */ 703 err(2, "bwscoll error 1"); 704 else 705 return (-1); 706 } else if (s2[i] == 0) 707 return (+1); 708 709 res = wide_str_coll(s1 + i, s2 + i); 710 if (res) 711 return (res); 712 713 while ((i < maxlen) && s1[i] && s2[i]) 714 ++i; 715 716 if (i >= maxlen) 717 break; 718 719 if (s1[i] == 0) { 720 if (s2[i] == 0) { 721 ++i; 722 continue; 723 } else 724 return (-1); 725 } else if (s2[i] == 0) 726 return (+1); 727 else 728 /* NOTREACHED */ 729 err(2, "bwscoll error 2"); 730 } 731 732 if (len1 < len2) 733 return (-1); 734 else if (len1 > len2) 735 return (+1); 736 737 return (0); 738 } 739 } 740 } 741 } 742 743 /* 744 * Correction of the system API 745 */ 746 double 747 bwstod(struct bwstring *s0, bool *empty) 748 { 749 double ret; 750 751 if (mb_cur_max == 1) { 752 char *end, *s; 753 char *ep; 754 755 s = s0->cdata.str; 756 end = s + s0->cdata.len; 757 ep = NULL; 758 759 while (isblank(*s) && s < end) 760 ++s; 761 762 if (!isprint(*s)) { 763 *empty = true; 764 return (0); 765 } 766 767 ret = strtod((char*)s, &ep); 768 if (ep == s) { 769 *empty = true; 770 return (0); 771 } 772 } else { 773 wchar_t *end, *ep, *s; 774 775 s = s0->wdata.str; 776 end = s + s0->wdata.len; 777 ep = NULL; 778 779 while (iswblank(*s) && s < end) 780 ++s; 781 782 if (!iswprint(*s)) { 783 *empty = true; 784 return (0); 785 } 786 787 ret = wcstod(s, &ep); 788 if (ep == s) { 789 *empty = true; 790 return (0); 791 } 792 } 793 794 *empty = false; 795 return (ret); 796 } 797 798 /* 799 * A helper function for monthcoll. If a line matches 800 * a month name, it returns (number of the month - 1), 801 * while if there is no match, it just return -1. 802 */ 803 804 int 805 bws_month_score(const struct bwstring *s0) 806 { 807 808 if (mb_cur_max == 1) { 809 const char *end, *s; 810 811 s = s0->cdata.str; 812 end = s + s0->cdata.len; 813 814 while (isblank(*s) && s < end) 815 ++s; 816 817 for (int i = 11; i >= 0; --i) { 818 if (cmonths[i].mon && (s == strstr(s, cmonths[i].mon))) 819 return (i); 820 if (cmonths[i].ab && (s == strstr(s, cmonths[i].ab))) 821 return (i); 822 if (cmonths[i].alt && (s == strstr(s, cmonths[i].alt))) 823 return (i); 824 } 825 826 } else { 827 const wchar_t *end, *s; 828 829 s = s0->wdata.str; 830 end = s + s0->wdata.len; 831 832 while (iswblank(*s) && s < end) 833 ++s; 834 835 for (int i = 11; i >= 0; --i) { 836 if (wmonths[i].ab && (s == wcsstr(s, wmonths[i].ab))) 837 return (i); 838 if (wmonths[i].mon && (s == wcsstr(s, wmonths[i].mon))) 839 return (i); 840 if (wmonths[i].alt && (s == wcsstr(s, wmonths[i].alt))) 841 return (i); 842 } 843 } 844 845 return (-1); 846 } 847 848 /* 849 * Rips out leading blanks (-b). 850 */ 851 struct bwstring * 852 ignore_leading_blanks(struct bwstring *str) 853 { 854 855 if (mb_cur_max == 1) { 856 char *dst, *end, *src; 857 858 src = str->cdata.str; 859 dst = src; 860 end = src + str->cdata.len; 861 862 while (src < end && isblank(*src)) 863 ++src; 864 865 if (src != dst) { 866 size_t newlen; 867 868 newlen = BWSLEN(str) - (src - dst); 869 870 while (src < end) { 871 *dst = *src; 872 ++dst; 873 ++src; 874 } 875 bws_setlen(str, newlen); 876 } 877 } else { 878 wchar_t *dst, *end, *src; 879 880 src = str->wdata.str; 881 dst = src; 882 end = src + str->wdata.len; 883 884 while (src < end && iswblank(*src)) 885 ++src; 886 887 if (src != dst) { 888 889 size_t newlen = BWSLEN(str) - (src - dst); 890 891 while (src < end) { 892 *dst = *src; 893 ++dst; 894 ++src; 895 } 896 bws_setlen(str, newlen); 897 898 } 899 } 900 return (str); 901 } 902 903 /* 904 * Rips out nonprinting characters (-i). 905 */ 906 struct bwstring * 907 ignore_nonprinting(struct bwstring *str) 908 { 909 size_t newlen = BWSLEN(str); 910 911 if (mb_cur_max == 1) { 912 char *dst, *end, *src; 913 char c; 914 915 src = str->cdata.str; 916 dst = src; 917 end = src + str->cdata.len; 918 919 while (src < end) { 920 c = *src; 921 if (isprint(c)) { 922 *dst = c; 923 ++dst; 924 ++src; 925 } else { 926 ++src; 927 --newlen; 928 } 929 } 930 } else { 931 wchar_t *dst, *end, *src; 932 wchar_t c; 933 934 src = str->wdata.str; 935 dst = src; 936 end = src + str->wdata.len; 937 938 while (src < end) { 939 c = *src; 940 if (iswprint(c)) { 941 *dst = c; 942 ++dst; 943 ++src; 944 } else { 945 ++src; 946 --newlen; 947 } 948 } 949 } 950 bws_setlen(str, newlen); 951 952 return (str); 953 } 954 955 /* 956 * Rips out any characters that are not alphanumeric characters 957 * nor blanks (-d). 958 */ 959 struct bwstring * 960 dictionary_order(struct bwstring *str) 961 { 962 size_t newlen = BWSLEN(str); 963 964 if (mb_cur_max == 1) { 965 char *dst, *end, *src; 966 char c; 967 968 src = str->cdata.str; 969 dst = src; 970 end = src + str->cdata.len; 971 972 while (src < end) { 973 c = *src; 974 if (isalnum(c) || isblank(c)) { 975 *dst = c; 976 ++dst; 977 ++src; 978 } else { 979 ++src; 980 --newlen; 981 } 982 } 983 } else { 984 wchar_t *dst, *end, *src; 985 wchar_t c; 986 987 src = str->wdata.str; 988 dst = src; 989 end = src + str->wdata.len; 990 991 while (src < end) { 992 c = *src; 993 if (iswalnum(c) || iswblank(c)) { 994 *dst = c; 995 ++dst; 996 ++src; 997 } else { 998 ++src; 999 --newlen; 1000 } 1001 } 1002 } 1003 bws_setlen(str, newlen); 1004 1005 return (str); 1006 } 1007 1008 /* 1009 * Converts string to lower case(-f). 1010 */ 1011 struct bwstring * 1012 ignore_case(struct bwstring *str) 1013 { 1014 1015 if (mb_cur_max == 1) { 1016 char *end, *s; 1017 1018 s = str->cdata.str; 1019 end = s + str->cdata.len; 1020 1021 while (s < end) { 1022 *s = toupper(*s); 1023 ++s; 1024 } 1025 } else { 1026 wchar_t *end, *s; 1027 1028 s = str->wdata.str; 1029 end = s + str->wdata.len; 1030 1031 while (s < end) { 1032 *s = towupper(*s); 1033 ++s; 1034 } 1035 } 1036 return (str); 1037 } 1038 1039 void 1040 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) 1041 { 1042 1043 if (mb_cur_max == 1) 1044 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->cdata.str); 1045 else 1046 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->wdata.str); 1047 } 1048