1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 #include <ctype.h> 32 #include <errno.h> 33 #include <err.h> 34 #include <langinfo.h> 35 #include <math.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <wchar.h> 39 #include <wctype.h> 40 41 #include "bwstring.h" 42 #include "sort.h" 43 44 bool byte_sort; 45 46 struct wmonth { 47 wchar_t *mon; 48 wchar_t *ab; 49 wchar_t *alt; 50 }; 51 52 struct cmonth { 53 char *mon; 54 char *ab; 55 char *alt; 56 }; 57 58 static struct wmonth *wmonths; 59 static struct cmonth *cmonths; 60 61 static int 62 populate_cmonth(char **field, const nl_item item, int idx) 63 { 64 char *tmp, *m; 65 size_t i, len; 66 67 tmp = nl_langinfo(item); 68 if (debug_sort) 69 printf("month[%d]=%s\n", idx, tmp); 70 if (*tmp == '\0') 71 return (0); 72 m = sort_strdup(tmp); 73 len = strlen(tmp); 74 for (i = 0; i < len; i++) 75 m[i] = toupper(m[i]); 76 *field = m; 77 78 return (1); 79 } 80 81 static int 82 populate_wmonth(wchar_t **field, const nl_item item, int idx) 83 { 84 wchar_t *m; 85 char *tmp; 86 size_t i, len; 87 88 tmp = nl_langinfo(item); 89 if (debug_sort) 90 printf("month[%d]=%s\n", idx, tmp); 91 if (*tmp == '\0') 92 return (0); 93 len = strlen(tmp); 94 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1)); 95 if (mbstowcs(m, tmp, len) == ((size_t) - 1)) { 96 sort_free(m); 97 return (0); 98 } 99 m[len] = L'\0'; 100 for (i = 0; i < len; i++) 101 m[i] = towupper(m[i]); 102 *field = m; 103 104 return (1); 105 } 106 107 void 108 initialise_months(void) 109 { 110 const nl_item mon_item[12] = { MON_1, MON_2, MON_3, MON_4, 111 MON_5, MON_6, MON_7, MON_8, MON_9, MON_10, 112 MON_11, MON_12 }; 113 const nl_item ab_item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, 114 ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10, 115 ABMON_11, ABMON_12 }; 116 const nl_item alt_item[12] = { ALTMON_1, ALTMON_2, ALTMON_3, ALTMON_4, 117 ALTMON_5, ALTMON_6, ALTMON_7, ALTMON_8, ALTMON_9, ALTMON_10, 118 ALTMON_11, ALTMON_12 }; 119 int i; 120 121 /* 122 * Handle all possible month formats: abbrevation, full name, 123 * standalone name (without case ending). 124 */ 125 if (mb_cur_max == 1) { 126 if (cmonths == NULL) { 127 cmonths = sort_malloc(sizeof(struct cmonth) * 12); 128 for (i = 0; i < 12; i++) { 129 if (!populate_cmonth(&cmonths[i].mon, 130 mon_item[i], i)) 131 continue; 132 if (!populate_cmonth(&cmonths[i].ab, 133 ab_item[i], i)) 134 continue; 135 if (!populate_cmonth(&cmonths[i].alt, 136 alt_item[i], i)) 137 continue; 138 } 139 } 140 141 } else { 142 if (wmonths == NULL) { 143 wmonths = sort_malloc(sizeof(struct wmonth) * 12); 144 for (i = 0; i < 12; i++) { 145 if (!populate_wmonth(&wmonths[i].mon, 146 mon_item[i], i)) 147 continue; 148 if (!populate_wmonth(&wmonths[i].ab, 149 ab_item[i], i)) 150 continue; 151 if (!populate_wmonth(&wmonths[i].alt, 152 alt_item[i], i)) 153 continue; 154 } 155 } 156 } 157 } 158 159 /* 160 * Compare two wide-character strings 161 */ 162 static int 163 wide_str_coll(const wchar_t *s1, const wchar_t *s2) 164 { 165 int ret; 166 167 errno = 0; 168 ret = wcscoll(s1, s2); 169 if (errno == EILSEQ) { 170 errno = 0; 171 ret = wcscmp(s1, s2); 172 if (errno != 0) { 173 for (size_t i = 0; ; ++i) { 174 wchar_t c1 = s1[i]; 175 wchar_t c2 = s2[i]; 176 if (c1 == L'\0') 177 return ((c2 == L'\0') ? 0 : -1); 178 if (c2 == L'\0') 179 return (+1); 180 if (c1 == c2) 181 continue; 182 return ((int)(c1 - c2)); 183 } 184 } 185 } 186 return (ret); 187 } 188 189 /* counterparts of wcs functions */ 190 191 void 192 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) 193 { 194 195 if (mb_cur_max == 1) 196 fprintf(f, "%s%s%s", prefix, bws->cdata.str, suffix); 197 else 198 fprintf(f, "%s%S%s", prefix, bws->wdata.str, suffix); 199 } 200 201 const void* bwsrawdata(const struct bwstring *bws) 202 { 203 204 return (bws->wdata.str); 205 } 206 207 size_t bwsrawlen(const struct bwstring *bws) 208 { 209 210 return ((mb_cur_max == 1) ? bws->cdata.len : 211 SIZEOF_WCHAR_STRING(bws->wdata.len)); 212 } 213 214 size_t 215 bws_memsize(const struct bwstring *bws) 216 { 217 218 return ((mb_cur_max == 1) ? 219 (bws->cdata.len + 2 + sizeof(struct bwstring)) : 220 (SIZEOF_WCHAR_STRING(bws->wdata.len + 1) + sizeof(struct bwstring))); 221 } 222 223 void 224 bws_setlen(struct bwstring *bws, size_t newlen) 225 { 226 227 if (mb_cur_max == 1 && bws && newlen != bws->cdata.len && 228 newlen <= bws->cdata.len) { 229 bws->cdata.len = newlen; 230 bws->cdata.str[newlen] = '\0'; 231 } else if (bws && newlen != bws->wdata.len && newlen <= bws->wdata.len) { 232 bws->wdata.len = newlen; 233 bws->wdata.str[newlen] = L'\0'; 234 } 235 } 236 237 /* 238 * Allocate a new binary string of specified size 239 */ 240 struct bwstring * 241 bwsalloc(size_t sz) 242 { 243 struct bwstring *ret; 244 245 if (mb_cur_max == 1) { 246 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); 247 ret->cdata.len = sz; 248 ret->cdata.str[sz] = '\0'; 249 } else { 250 ret = sort_malloc( 251 sizeof(struct bwstring) + SIZEOF_WCHAR_STRING(sz + 1)); 252 ret->wdata.len = sz; 253 ret->wdata.str[sz] = L'\0'; 254 } 255 256 return (ret); 257 } 258 259 /* 260 * Create a copy of binary string. 261 * New string size equals the length of the old string. 262 */ 263 struct bwstring * 264 bwsdup(const struct bwstring *s) 265 { 266 267 if (s == NULL) 268 return (NULL); 269 else { 270 struct bwstring *ret = bwsalloc(BWSLEN(s)); 271 272 if (mb_cur_max == 1) 273 memcpy(ret->cdata.str, s->cdata.str, (s->cdata.len)); 274 else 275 memcpy(ret->wdata.str, s->wdata.str, 276 SIZEOF_WCHAR_STRING(s->wdata.len)); 277 278 return (ret); 279 } 280 } 281 282 /* 283 * Create a new binary string from a wide character buffer. 284 */ 285 struct bwstring * 286 bwssbdup(const wchar_t *str, size_t len) 287 { 288 289 if (str == NULL) 290 return ((len == 0) ? bwsalloc(0) : NULL); 291 else { 292 struct bwstring *ret; 293 294 ret = bwsalloc(len); 295 296 if (mb_cur_max == 1) 297 for (size_t i = 0; i < len; ++i) 298 ret->cdata.str[i] = (char)str[i]; 299 else 300 memcpy(ret->wdata.str, str, SIZEOF_WCHAR_STRING(len)); 301 302 return (ret); 303 } 304 } 305 306 /* 307 * Create a new binary string from a raw binary buffer. 308 */ 309 struct bwstring * 310 bwscsbdup(const unsigned char *str, size_t len) 311 { 312 struct bwstring *ret; 313 314 ret = bwsalloc(len); 315 316 if (str) { 317 if (mb_cur_max == 1) 318 memcpy(ret->cdata.str, str, len); 319 else { 320 mbstate_t mbs; 321 const char *s; 322 size_t charlen, chars, cptr; 323 324 chars = 0; 325 cptr = 0; 326 s = (const char *) str; 327 328 memset(&mbs, 0, sizeof(mbs)); 329 330 while (cptr < len) { 331 size_t n = mb_cur_max; 332 333 if (n > len - cptr) 334 n = len - cptr; 335 charlen = mbrlen(s + cptr, n, &mbs); 336 switch (charlen) { 337 case 0: 338 /* FALLTHROUGH */ 339 case (size_t) -1: 340 /* FALLTHROUGH */ 341 case (size_t) -2: 342 ret->wdata.str[chars++] = 343 (unsigned char) s[cptr]; 344 ++cptr; 345 break; 346 default: 347 n = mbrtowc(ret->wdata.str + (chars++), 348 s + cptr, charlen, &mbs); 349 if ((n == (size_t)-1) || (n == (size_t)-2)) 350 /* NOTREACHED */ 351 err(2, "mbrtowc error"); 352 cptr += charlen; 353 } 354 } 355 356 ret->wdata.len = chars; 357 ret->wdata.str[ret->wdata.len] = L'\0'; 358 } 359 } 360 return (ret); 361 } 362 363 /* 364 * De-allocate object memory 365 */ 366 void 367 bwsfree(const struct bwstring *s) 368 { 369 370 if (s) 371 sort_free(s); 372 } 373 374 /* 375 * Copy content of src binary string to dst, 376 * with specified number of symbols to be copied. 377 * An offset value can be specified, from the start of src string. 378 * If the capacity of the dst string is not sufficient, 379 * then the data is truncated. 380 */ 381 struct bwstring * 382 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, 383 size_t size) 384 { 385 386 if (offset >= BWSLEN(src)) { 387 bws_setlen(dst, 0); 388 } else { 389 size_t nums = BWSLEN(src) - offset; 390 391 if (nums > BWSLEN(dst)) 392 nums = BWSLEN(dst); 393 if (nums > size) 394 nums = size; 395 if (mb_cur_max == 1) { 396 memcpy(dst->cdata.str, src->cdata.str + offset, nums); 397 dst->cdata.len = nums; 398 dst->cdata.str[nums] = '\0'; 399 } else { 400 memcpy(dst->wdata.str, src->wdata.str + offset, 401 SIZEOF_WCHAR_STRING(nums)); 402 dst->wdata.len = nums; 403 dst->wdata.str[nums] = L'\0'; 404 } 405 } 406 return (dst); 407 } 408 409 /* 410 * Write binary string to the file. 411 * The output is ended either with '\n' (nl == true) 412 * or '\0' (nl == false). 413 */ 414 size_t 415 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) 416 { 417 418 if (mb_cur_max == 1) { 419 size_t len = bws->cdata.len; 420 421 if (!zero_ended) { 422 bws->cdata.str[len] = '\n'; 423 424 if (fwrite(bws->cdata.str, len + 1, 1, f) < 1) 425 err(2, NULL); 426 427 bws->cdata.str[len] = '\0'; 428 } else if (fwrite(bws->cdata.str, len + 1, 1, f) < 1) 429 err(2, NULL); 430 431 return (len + 1); 432 433 } else { 434 wchar_t eols; 435 size_t printed = 0; 436 437 eols = zero_ended ? btowc('\0') : btowc('\n'); 438 439 while (printed < BWSLEN(bws)) { 440 const wchar_t *s = bws->wdata.str + printed; 441 442 if (*s == L'\0') { 443 int nums; 444 445 nums = fwprintf(f, L"%lc", *s); 446 447 if (nums != 1) 448 err(2, NULL); 449 ++printed; 450 } else { 451 int nums; 452 453 nums = fwprintf(f, L"%ls", s); 454 455 if (nums < 1) 456 err(2, NULL); 457 printed += nums; 458 } 459 } 460 fwprintf(f, L"%lc", eols); 461 return (printed + 1); 462 } 463 } 464 465 int 466 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, 467 size_t offset, size_t len) 468 { 469 size_t cmp_len, len1, len2; 470 int res; 471 472 len1 = BWSLEN(bws1); 473 len2 = BWSLEN(bws2); 474 475 if (len1 <= offset) { 476 return ((len2 <= offset) ? 0 : -1); 477 } else { 478 if (len2 <= offset) 479 return (+1); 480 else { 481 len1 -= offset; 482 len2 -= offset; 483 484 cmp_len = len1; 485 486 if (len2 < cmp_len) 487 cmp_len = len2; 488 489 if (len < cmp_len) 490 cmp_len = len; 491 492 if (mb_cur_max == 1) { 493 const char *s1, *s2; 494 495 s1 = bws1->cdata.str + offset; 496 s2 = bws2->cdata.str + offset; 497 498 res = memcmp(s1, s2, cmp_len); 499 500 } else { 501 const wchar_t *s1, *s2; 502 503 s1 = bws1->wdata.str + offset; 504 s2 = bws2->wdata.str + offset; 505 506 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); 507 } 508 } 509 } 510 511 if (res == 0) { 512 if (len1 < cmp_len && len1 < len2) 513 res = -1; 514 else if (len2 < cmp_len && len2 < len1) 515 res = +1; 516 } 517 518 return (res); 519 } 520 521 int 522 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 523 { 524 size_t len1, len2, cmp_len; 525 int res; 526 527 len1 = BWSLEN(bws1); 528 len2 = BWSLEN(bws2); 529 530 len1 -= offset; 531 len2 -= offset; 532 533 cmp_len = len1; 534 535 if (len2 < cmp_len) 536 cmp_len = len2; 537 538 res = bwsncmp(bws1, bws2, offset, cmp_len); 539 540 if (res == 0) { 541 if( len1 < len2) 542 res = -1; 543 else if (len2 < len1) 544 res = +1; 545 } 546 547 return (res); 548 } 549 550 int 551 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) 552 { 553 wchar_t c1, c2; 554 size_t i; 555 556 for (i = 0; i < len; ++i) { 557 c1 = bws_get_iter_value(iter1); 558 c2 = bws_get_iter_value(iter2); 559 if (c1 != c2) 560 return (c1 - c2); 561 iter1 = bws_iterator_inc(iter1, 1); 562 iter2 = bws_iterator_inc(iter2, 1); 563 } 564 565 return (0); 566 } 567 568 int 569 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 570 { 571 size_t len1, len2; 572 573 len1 = BWSLEN(bws1); 574 len2 = BWSLEN(bws2); 575 576 if (len1 <= offset) 577 return ((len2 <= offset) ? 0 : -1); 578 else { 579 if (len2 <= offset) 580 return (+1); 581 else { 582 len1 -= offset; 583 len2 -= offset; 584 585 if (mb_cur_max == 1) { 586 const char *s1, *s2; 587 588 s1 = bws1->cdata.str + offset; 589 s2 = bws2->cdata.str + offset; 590 591 if (byte_sort) { 592 int res; 593 594 if (len1 > len2) { 595 res = memcmp(s1, s2, len2); 596 if (!res) 597 res = +1; 598 } else if (len1 < len2) { 599 res = memcmp(s1, s2, len1); 600 if (!res) 601 res = -1; 602 } else 603 res = memcmp(s1, s2, len1); 604 605 return (res); 606 607 } else { 608 int res; 609 size_t i, maxlen; 610 611 i = 0; 612 maxlen = len1; 613 614 if (maxlen > len2) 615 maxlen = len2; 616 617 while (i < maxlen) { 618 /* goto next non-zero part: */ 619 while ((i < maxlen) && 620 !s1[i] && !s2[i]) 621 ++i; 622 623 if (i >= maxlen) 624 break; 625 626 if (s1[i] == 0) { 627 if (s2[i] == 0) 628 /* NOTREACHED */ 629 err(2, "bwscoll error 01"); 630 else 631 return (-1); 632 } else if (s2[i] == 0) 633 return (+1); 634 635 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i)); 636 if (res) 637 return (res); 638 639 while ((i < maxlen) && 640 s1[i] && s2[i]) 641 ++i; 642 643 if (i >= maxlen) 644 break; 645 646 if (s1[i] == 0) { 647 if (s2[i] == 0) { 648 ++i; 649 continue; 650 } else 651 return (-1); 652 } else if (s2[i] == 0) 653 return (+1); 654 else 655 /* NOTREACHED */ 656 err(2, "bwscoll error 02"); 657 } 658 659 if (len1 < len2) 660 return (-1); 661 else if (len1 > len2) 662 return (+1); 663 664 return (0); 665 } 666 } else { 667 const wchar_t *s1, *s2; 668 size_t i, maxlen; 669 int res; 670 671 s1 = bws1->wdata.str + offset; 672 s2 = bws2->wdata.str + offset; 673 674 i = 0; 675 maxlen = len1; 676 677 if (maxlen > len2) 678 maxlen = len2; 679 680 while (i < maxlen) { 681 682 /* goto next non-zero part: */ 683 while ((i < maxlen) && 684 !s1[i] && !s2[i]) 685 ++i; 686 687 if (i >= maxlen) 688 break; 689 690 if (s1[i] == 0) { 691 if (s2[i] == 0) 692 /* NOTREACHED */ 693 err(2, "bwscoll error 1"); 694 else 695 return (-1); 696 } else if (s2[i] == 0) 697 return (+1); 698 699 res = wide_str_coll(s1 + i, s2 + i); 700 if (res) 701 return (res); 702 703 while ((i < maxlen) && s1[i] && s2[i]) 704 ++i; 705 706 if (i >= maxlen) 707 break; 708 709 if (s1[i] == 0) { 710 if (s2[i] == 0) { 711 ++i; 712 continue; 713 } else 714 return (-1); 715 } else if (s2[i] == 0) 716 return (+1); 717 else 718 /* NOTREACHED */ 719 err(2, "bwscoll error 2"); 720 } 721 722 if (len1 < len2) 723 return (-1); 724 else if (len1 > len2) 725 return (+1); 726 727 return (0); 728 } 729 } 730 } 731 } 732 733 /* 734 * Correction of the system API 735 */ 736 double 737 bwstod(struct bwstring *s0, bool *empty) 738 { 739 double ret; 740 741 if (mb_cur_max == 1) { 742 char *end, *s; 743 char *ep; 744 745 s = s0->cdata.str; 746 end = s + s0->cdata.len; 747 ep = NULL; 748 749 while (isblank(*s) && s < end) 750 ++s; 751 752 if (!isprint(*s)) { 753 *empty = true; 754 return (0); 755 } 756 757 ret = strtod((char*)s, &ep); 758 if (ep == s) { 759 *empty = true; 760 return (0); 761 } 762 } else { 763 wchar_t *end, *ep, *s; 764 765 s = s0->wdata.str; 766 end = s + s0->wdata.len; 767 ep = NULL; 768 769 while (iswblank(*s) && s < end) 770 ++s; 771 772 if (!iswprint(*s)) { 773 *empty = true; 774 return (0); 775 } 776 777 ret = wcstod(s, &ep); 778 if (ep == s) { 779 *empty = true; 780 return (0); 781 } 782 } 783 784 *empty = false; 785 return (ret); 786 } 787 788 /* 789 * A helper function for monthcoll. If a line matches 790 * a month name, it returns (number of the month - 1), 791 * while if there is no match, it just return -1. 792 */ 793 794 int 795 bws_month_score(const struct bwstring *s0) 796 { 797 798 if (mb_cur_max == 1) { 799 const char *end, *s; 800 801 s = s0->cdata.str; 802 end = s + s0->cdata.len; 803 804 while (isblank(*s) && s < end) 805 ++s; 806 807 for (int i = 11; i >= 0; --i) { 808 if (cmonths[i].mon && (s == strstr(s, cmonths[i].mon))) 809 return (i); 810 if (cmonths[i].ab && (s == strstr(s, cmonths[i].ab))) 811 return (i); 812 if (cmonths[i].alt && (s == strstr(s, cmonths[i].alt))) 813 return (i); 814 } 815 816 } else { 817 const wchar_t *end, *s; 818 819 s = s0->wdata.str; 820 end = s + s0->wdata.len; 821 822 while (iswblank(*s) && s < end) 823 ++s; 824 825 for (int i = 11; i >= 0; --i) { 826 if (wmonths[i].ab && (s == wcsstr(s, wmonths[i].ab))) 827 return (i); 828 if (wmonths[i].mon && (s == wcsstr(s, wmonths[i].mon))) 829 return (i); 830 if (wmonths[i].alt && (s == wcsstr(s, wmonths[i].alt))) 831 return (i); 832 } 833 } 834 835 return (-1); 836 } 837 838 /* 839 * Rips out leading blanks (-b). 840 */ 841 struct bwstring * 842 ignore_leading_blanks(struct bwstring *str) 843 { 844 845 if (mb_cur_max == 1) { 846 char *dst, *end, *src; 847 848 src = str->cdata.str; 849 dst = src; 850 end = src + str->cdata.len; 851 852 while (src < end && isblank(*src)) 853 ++src; 854 855 if (src != dst) { 856 size_t newlen; 857 858 newlen = BWSLEN(str) - (src - dst); 859 860 while (src < end) { 861 *dst = *src; 862 ++dst; 863 ++src; 864 } 865 bws_setlen(str, newlen); 866 } 867 } else { 868 wchar_t *dst, *end, *src; 869 870 src = str->wdata.str; 871 dst = src; 872 end = src + str->wdata.len; 873 874 while (src < end && iswblank(*src)) 875 ++src; 876 877 if (src != dst) { 878 879 size_t newlen = BWSLEN(str) - (src - dst); 880 881 while (src < end) { 882 *dst = *src; 883 ++dst; 884 ++src; 885 } 886 bws_setlen(str, newlen); 887 888 } 889 } 890 return (str); 891 } 892 893 /* 894 * Rips out nonprinting characters (-i). 895 */ 896 struct bwstring * 897 ignore_nonprinting(struct bwstring *str) 898 { 899 size_t newlen = BWSLEN(str); 900 901 if (mb_cur_max == 1) { 902 char *dst, *end, *src; 903 char c; 904 905 src = str->cdata.str; 906 dst = src; 907 end = src + str->cdata.len; 908 909 while (src < end) { 910 c = *src; 911 if (isprint(c)) { 912 *dst = c; 913 ++dst; 914 ++src; 915 } else { 916 ++src; 917 --newlen; 918 } 919 } 920 } else { 921 wchar_t *dst, *end, *src; 922 wchar_t c; 923 924 src = str->wdata.str; 925 dst = src; 926 end = src + str->wdata.len; 927 928 while (src < end) { 929 c = *src; 930 if (iswprint(c)) { 931 *dst = c; 932 ++dst; 933 ++src; 934 } else { 935 ++src; 936 --newlen; 937 } 938 } 939 } 940 bws_setlen(str, newlen); 941 942 return (str); 943 } 944 945 /* 946 * Rips out any characters that are not alphanumeric characters 947 * nor blanks (-d). 948 */ 949 struct bwstring * 950 dictionary_order(struct bwstring *str) 951 { 952 size_t newlen = BWSLEN(str); 953 954 if (mb_cur_max == 1) { 955 char *dst, *end, *src; 956 char c; 957 958 src = str->cdata.str; 959 dst = src; 960 end = src + str->cdata.len; 961 962 while (src < end) { 963 c = *src; 964 if (isalnum(c) || isblank(c)) { 965 *dst = c; 966 ++dst; 967 ++src; 968 } else { 969 ++src; 970 --newlen; 971 } 972 } 973 } else { 974 wchar_t *dst, *end, *src; 975 wchar_t c; 976 977 src = str->wdata.str; 978 dst = src; 979 end = src + str->wdata.len; 980 981 while (src < end) { 982 c = *src; 983 if (iswalnum(c) || iswblank(c)) { 984 *dst = c; 985 ++dst; 986 ++src; 987 } else { 988 ++src; 989 --newlen; 990 } 991 } 992 } 993 bws_setlen(str, newlen); 994 995 return (str); 996 } 997 998 /* 999 * Converts string to lower case(-f). 1000 */ 1001 struct bwstring * 1002 ignore_case(struct bwstring *str) 1003 { 1004 1005 if (mb_cur_max == 1) { 1006 char *end, *s; 1007 1008 s = str->cdata.str; 1009 end = s + str->cdata.len; 1010 1011 while (s < end) { 1012 *s = toupper(*s); 1013 ++s; 1014 } 1015 } else { 1016 wchar_t *end, *s; 1017 1018 s = str->wdata.str; 1019 end = s + str->wdata.len; 1020 1021 while (s < end) { 1022 *s = towupper(*s); 1023 ++s; 1024 } 1025 } 1026 return (str); 1027 } 1028 1029 void 1030 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) 1031 { 1032 1033 if (mb_cur_max == 1) 1034 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->cdata.str); 1035 else 1036 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->wdata.str); 1037 } 1038