1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 #include <ctype.h>
32 #include <errno.h>
33 #include <err.h>
34 #include <langinfo.h>
35 #include <math.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <wchar.h>
39 #include <wctype.h>
40
41 #include "bwstring.h"
42 #include "sort.h"
43
44 bool byte_sort;
45
46 struct wmonth {
47 wchar_t *mon;
48 wchar_t *ab;
49 wchar_t *alt;
50 };
51
52 struct cmonth {
53 char *mon;
54 char *ab;
55 char *alt;
56 };
57
58 static struct wmonth *wmonths;
59 static struct cmonth *cmonths;
60
61 static int
populate_cmonth(char ** field,const nl_item item,int idx)62 populate_cmonth(char **field, const nl_item item, int idx)
63 {
64 char *tmp, *m;
65 size_t i, len;
66
67 tmp = nl_langinfo(item);
68 if (debug_sort)
69 printf("month[%d]=%s\n", idx, tmp);
70 if (*tmp == '\0')
71 return (0);
72 m = sort_strdup(tmp);
73 len = strlen(tmp);
74 for (i = 0; i < len; i++)
75 m[i] = toupper(m[i]);
76 *field = m;
77
78 return (1);
79 }
80
81 static int
populate_wmonth(wchar_t ** field,const nl_item item,int idx)82 populate_wmonth(wchar_t **field, const nl_item item, int idx)
83 {
84 wchar_t *m;
85 char *tmp;
86 size_t i, len;
87
88 tmp = nl_langinfo(item);
89 if (debug_sort)
90 printf("month[%d]=%s\n", idx, tmp);
91 if (*tmp == '\0')
92 return (0);
93 len = strlen(tmp);
94 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
95 if (mbstowcs(m, tmp, len) == ((size_t) - 1)) {
96 sort_free(m);
97 return (0);
98 }
99 m[len] = L'\0';
100 for (i = 0; i < len; i++)
101 m[i] = towupper(m[i]);
102 *field = m;
103
104 return (1);
105 }
106
107 void
initialise_months(void)108 initialise_months(void)
109 {
110 const nl_item mon_item[12] = { MON_1, MON_2, MON_3, MON_4,
111 MON_5, MON_6, MON_7, MON_8, MON_9, MON_10,
112 MON_11, MON_12 };
113 const nl_item ab_item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
114 ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
115 ABMON_11, ABMON_12 };
116 #ifdef ALTMON_1
117 const nl_item alt_item[12] = { ALTMON_1, ALTMON_2, ALTMON_3, ALTMON_4,
118 ALTMON_5, ALTMON_6, ALTMON_7, ALTMON_8, ALTMON_9, ALTMON_10,
119 ALTMON_11, ALTMON_12 };
120 #endif
121 int i;
122
123 /*
124 * Handle all possible month formats: abbrevation, full name,
125 * standalone name (without case ending).
126 */
127 if (mb_cur_max == 1) {
128 if (cmonths == NULL) {
129 cmonths = sort_malloc(sizeof(struct cmonth) * 12);
130 for (i = 0; i < 12; i++) {
131 if (!populate_cmonth(&cmonths[i].mon,
132 mon_item[i], i))
133 continue;
134 if (!populate_cmonth(&cmonths[i].ab,
135 ab_item[i], i))
136 continue;
137 #ifdef ALTMON_1
138 if (!populate_cmonth(&cmonths[i].alt,
139 alt_item[i], i))
140 continue;
141 #else
142 cmonths[i].alt = NULL;
143 #endif
144 }
145 }
146
147 } else {
148 if (wmonths == NULL) {
149 wmonths = sort_malloc(sizeof(struct wmonth) * 12);
150 for (i = 0; i < 12; i++) {
151 if (!populate_wmonth(&wmonths[i].mon,
152 mon_item[i], i))
153 continue;
154 if (!populate_wmonth(&wmonths[i].ab,
155 ab_item[i], i))
156 continue;
157 #ifdef ALTMON_1
158 if (!populate_wmonth(&wmonths[i].alt,
159 alt_item[i], i))
160 continue;
161 #else
162 wmonths[i].alt = NULL;
163 #endif
164 }
165 }
166 }
167 }
168
169 /*
170 * Compare two wide-character strings
171 */
172 static int
wide_str_coll(const wchar_t * s1,const wchar_t * s2)173 wide_str_coll(const wchar_t *s1, const wchar_t *s2)
174 {
175 int ret;
176
177 errno = 0;
178 ret = wcscoll(s1, s2);
179 if (errno == EILSEQ) {
180 errno = 0;
181 ret = wcscmp(s1, s2);
182 if (errno != 0) {
183 for (size_t i = 0; ; ++i) {
184 wchar_t c1 = s1[i];
185 wchar_t c2 = s2[i];
186 if (c1 == L'\0')
187 return ((c2 == L'\0') ? 0 : -1);
188 if (c2 == L'\0')
189 return (+1);
190 if (c1 == c2)
191 continue;
192 return ((int)(c1 - c2));
193 }
194 }
195 }
196 return (ret);
197 }
198
199 /* counterparts of wcs functions */
200
201 void
bwsprintf(FILE * f,struct bwstring * bws,const char * prefix,const char * suffix)202 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
203 {
204
205 if (mb_cur_max == 1)
206 fprintf(f, "%s%s%s", prefix, bws->cdata.str, suffix);
207 else
208 fprintf(f, "%s%S%s", prefix, bws->wdata.str, suffix);
209 }
210
bwsrawdata(const struct bwstring * bws)211 const void* bwsrawdata(const struct bwstring *bws)
212 {
213
214 return (bws->wdata.str);
215 }
216
bwsrawlen(const struct bwstring * bws)217 size_t bwsrawlen(const struct bwstring *bws)
218 {
219
220 return ((mb_cur_max == 1) ? bws->cdata.len :
221 SIZEOF_WCHAR_STRING(bws->wdata.len));
222 }
223
224 size_t
bws_memsize(const struct bwstring * bws)225 bws_memsize(const struct bwstring *bws)
226 {
227
228 return ((mb_cur_max == 1) ?
229 (bws->cdata.len + 2 + sizeof(struct bwstring)) :
230 (SIZEOF_WCHAR_STRING(bws->wdata.len + 1) + sizeof(struct bwstring)));
231 }
232
233 void
bws_setlen(struct bwstring * bws,size_t newlen)234 bws_setlen(struct bwstring *bws, size_t newlen)
235 {
236
237 if (mb_cur_max == 1 && bws && newlen != bws->cdata.len &&
238 newlen <= bws->cdata.len) {
239 bws->cdata.len = newlen;
240 bws->cdata.str[newlen] = '\0';
241 } else if (bws && newlen != bws->wdata.len && newlen <= bws->wdata.len) {
242 bws->wdata.len = newlen;
243 bws->wdata.str[newlen] = L'\0';
244 }
245 }
246
247 /*
248 * Allocate a new binary string of specified size
249 */
250 struct bwstring *
bwsalloc(size_t sz)251 bwsalloc(size_t sz)
252 {
253 struct bwstring *ret;
254
255 if (mb_cur_max == 1) {
256 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
257 ret->cdata.len = sz;
258 ret->cdata.str[sz] = '\0';
259 } else {
260 ret = sort_malloc(
261 sizeof(struct bwstring) + SIZEOF_WCHAR_STRING(sz + 1));
262 ret->wdata.len = sz;
263 ret->wdata.str[sz] = L'\0';
264 }
265
266 return (ret);
267 }
268
269 /*
270 * Create a copy of binary string.
271 * New string size equals the length of the old string.
272 */
273 struct bwstring *
bwsdup(const struct bwstring * s)274 bwsdup(const struct bwstring *s)
275 {
276
277 if (s == NULL)
278 return (NULL);
279 else {
280 struct bwstring *ret = bwsalloc(BWSLEN(s));
281
282 if (mb_cur_max == 1)
283 memcpy(ret->cdata.str, s->cdata.str, (s->cdata.len));
284 else
285 memcpy(ret->wdata.str, s->wdata.str,
286 SIZEOF_WCHAR_STRING(s->wdata.len));
287
288 return (ret);
289 }
290 }
291
292 /*
293 * Create a new binary string from a wide character buffer.
294 */
295 struct bwstring *
bwssbdup(const wchar_t * str,size_t len)296 bwssbdup(const wchar_t *str, size_t len)
297 {
298
299 if (str == NULL)
300 return ((len == 0) ? bwsalloc(0) : NULL);
301 else {
302 struct bwstring *ret;
303
304 ret = bwsalloc(len);
305
306 if (mb_cur_max == 1)
307 for (size_t i = 0; i < len; ++i)
308 ret->cdata.str[i] = (char)str[i];
309 else
310 memcpy(ret->wdata.str, str, SIZEOF_WCHAR_STRING(len));
311
312 return (ret);
313 }
314 }
315
316 /*
317 * Create a new binary string from a raw binary buffer.
318 */
319 struct bwstring *
bwscsbdup(const unsigned char * str,size_t len)320 bwscsbdup(const unsigned char *str, size_t len)
321 {
322 struct bwstring *ret;
323
324 ret = bwsalloc(len);
325
326 if (str) {
327 if (mb_cur_max == 1)
328 memcpy(ret->cdata.str, str, len);
329 else {
330 mbstate_t mbs;
331 const char *s;
332 size_t charlen, chars, cptr;
333
334 chars = 0;
335 cptr = 0;
336 s = (const char *) str;
337
338 memset(&mbs, 0, sizeof(mbs));
339
340 while (cptr < len) {
341 size_t n = mb_cur_max;
342
343 if (n > len - cptr)
344 n = len - cptr;
345 charlen = mbrlen(s + cptr, n, &mbs);
346 switch (charlen) {
347 case 0:
348 /* FALLTHROUGH */
349 case (size_t) -1:
350 /* FALLTHROUGH */
351 case (size_t) -2:
352 ret->wdata.str[chars++] =
353 (unsigned char) s[cptr];
354 ++cptr;
355 break;
356 default:
357 n = mbrtowc(ret->wdata.str + (chars++),
358 s + cptr, charlen, &mbs);
359 if ((n == (size_t)-1) || (n == (size_t)-2))
360 /* NOTREACHED */
361 err(2, "mbrtowc error");
362 cptr += charlen;
363 }
364 }
365
366 ret->wdata.len = chars;
367 ret->wdata.str[ret->wdata.len] = L'\0';
368 }
369 }
370 return (ret);
371 }
372
373 /*
374 * De-allocate object memory
375 */
376 void
bwsfree(const struct bwstring * s)377 bwsfree(const struct bwstring *s)
378 {
379
380 if (s)
381 sort_free(s);
382 }
383
384 /*
385 * Copy content of src binary string to dst,
386 * with specified number of symbols to be copied.
387 * An offset value can be specified, from the start of src string.
388 * If the capacity of the dst string is not sufficient,
389 * then the data is truncated.
390 */
391 struct bwstring *
bwsnocpy(struct bwstring * dst,const struct bwstring * src,size_t offset,size_t size)392 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
393 size_t size)
394 {
395
396 if (offset >= BWSLEN(src)) {
397 bws_setlen(dst, 0);
398 } else {
399 size_t nums = BWSLEN(src) - offset;
400
401 if (nums > BWSLEN(dst))
402 nums = BWSLEN(dst);
403 if (nums > size)
404 nums = size;
405 if (mb_cur_max == 1) {
406 memcpy(dst->cdata.str, src->cdata.str + offset, nums);
407 dst->cdata.len = nums;
408 dst->cdata.str[nums] = '\0';
409 } else {
410 memcpy(dst->wdata.str, src->wdata.str + offset,
411 SIZEOF_WCHAR_STRING(nums));
412 dst->wdata.len = nums;
413 dst->wdata.str[nums] = L'\0';
414 }
415 }
416 return (dst);
417 }
418
419 /*
420 * Write binary string to the file.
421 * The output is ended either with '\n' (nl == true)
422 * or '\0' (nl == false).
423 */
424 size_t
bwsfwrite(struct bwstring * bws,FILE * f,bool zero_ended)425 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
426 {
427
428 if (mb_cur_max == 1) {
429 size_t len = bws->cdata.len;
430
431 if (!zero_ended) {
432 bws->cdata.str[len] = '\n';
433
434 if (fwrite(bws->cdata.str, len + 1, 1, f) < 1)
435 err(2, NULL);
436
437 bws->cdata.str[len] = '\0';
438 } else if (fwrite(bws->cdata.str, len + 1, 1, f) < 1)
439 err(2, NULL);
440
441 return (len + 1);
442
443 } else {
444 wchar_t eols;
445 size_t printed = 0;
446
447 eols = zero_ended ? btowc('\0') : btowc('\n');
448
449 while (printed < BWSLEN(bws)) {
450 const wchar_t *s = bws->wdata.str + printed;
451
452 if (*s == L'\0') {
453 int nums;
454
455 nums = fwprintf(f, L"%lc", *s);
456
457 if (nums != 1)
458 err(2, NULL);
459 ++printed;
460 } else {
461 int nums;
462
463 nums = fwprintf(f, L"%ls", s);
464
465 if (nums < 1)
466 err(2, NULL);
467 printed += nums;
468 }
469 }
470 fwprintf(f, L"%lc", eols);
471 return (printed + 1);
472 }
473 }
474
475 int
bwsncmp(const struct bwstring * bws1,const struct bwstring * bws2,size_t offset,size_t len)476 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
477 size_t offset, size_t len)
478 {
479 size_t cmp_len, len1, len2;
480 int res;
481
482 len1 = BWSLEN(bws1);
483 len2 = BWSLEN(bws2);
484
485 if (len1 <= offset) {
486 return ((len2 <= offset) ? 0 : -1);
487 } else {
488 if (len2 <= offset)
489 return (+1);
490 else {
491 len1 -= offset;
492 len2 -= offset;
493
494 cmp_len = len1;
495
496 if (len2 < cmp_len)
497 cmp_len = len2;
498
499 if (len < cmp_len)
500 cmp_len = len;
501
502 if (mb_cur_max == 1) {
503 const char *s1, *s2;
504
505 s1 = bws1->cdata.str + offset;
506 s2 = bws2->cdata.str + offset;
507
508 res = memcmp(s1, s2, cmp_len);
509
510 } else {
511 const wchar_t *s1, *s2;
512
513 s1 = bws1->wdata.str + offset;
514 s2 = bws2->wdata.str + offset;
515
516 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
517 }
518 }
519 }
520
521 if (res == 0) {
522 if (len1 < cmp_len && len1 < len2)
523 res = -1;
524 else if (len2 < cmp_len && len2 < len1)
525 res = +1;
526 }
527
528 return (res);
529 }
530
531 int
bwscmp(const struct bwstring * bws1,const struct bwstring * bws2,size_t offset)532 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
533 {
534 size_t len1, len2, cmp_len;
535 int res;
536
537 len1 = BWSLEN(bws1);
538 len2 = BWSLEN(bws2);
539
540 len1 -= offset;
541 len2 -= offset;
542
543 cmp_len = len1;
544
545 if (len2 < cmp_len)
546 cmp_len = len2;
547
548 res = bwsncmp(bws1, bws2, offset, cmp_len);
549
550 if (res == 0) {
551 if( len1 < len2)
552 res = -1;
553 else if (len2 < len1)
554 res = +1;
555 }
556
557 return (res);
558 }
559
560 int
bws_iterator_cmp(bwstring_iterator iter1,bwstring_iterator iter2,size_t len)561 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
562 {
563 wchar_t c1, c2;
564 size_t i;
565
566 for (i = 0; i < len; ++i) {
567 c1 = bws_get_iter_value(iter1);
568 c2 = bws_get_iter_value(iter2);
569 if (c1 != c2)
570 return (c1 - c2);
571 iter1 = bws_iterator_inc(iter1, 1);
572 iter2 = bws_iterator_inc(iter2, 1);
573 }
574
575 return (0);
576 }
577
578 int
bwscoll(const struct bwstring * bws1,const struct bwstring * bws2,size_t offset)579 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
580 {
581 size_t len1, len2;
582
583 len1 = BWSLEN(bws1);
584 len2 = BWSLEN(bws2);
585
586 if (len1 <= offset)
587 return ((len2 <= offset) ? 0 : -1);
588 else {
589 if (len2 <= offset)
590 return (+1);
591 else {
592 len1 -= offset;
593 len2 -= offset;
594
595 if (mb_cur_max == 1) {
596 const char *s1, *s2;
597
598 s1 = bws1->cdata.str + offset;
599 s2 = bws2->cdata.str + offset;
600
601 if (byte_sort) {
602 int res;
603
604 if (len1 > len2) {
605 res = memcmp(s1, s2, len2);
606 if (!res)
607 res = +1;
608 } else if (len1 < len2) {
609 res = memcmp(s1, s2, len1);
610 if (!res)
611 res = -1;
612 } else
613 res = memcmp(s1, s2, len1);
614
615 return (res);
616
617 } else {
618 int res;
619 size_t i, maxlen;
620
621 i = 0;
622 maxlen = len1;
623
624 if (maxlen > len2)
625 maxlen = len2;
626
627 while (i < maxlen) {
628 /* goto next non-zero part: */
629 while ((i < maxlen) &&
630 !s1[i] && !s2[i])
631 ++i;
632
633 if (i >= maxlen)
634 break;
635
636 if (s1[i] == 0) {
637 if (s2[i] == 0)
638 /* NOTREACHED */
639 err(2, "bwscoll error 01");
640 else
641 return (-1);
642 } else if (s2[i] == 0)
643 return (+1);
644
645 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i));
646 if (res)
647 return (res);
648
649 while ((i < maxlen) &&
650 s1[i] && s2[i])
651 ++i;
652
653 if (i >= maxlen)
654 break;
655
656 if (s1[i] == 0) {
657 if (s2[i] == 0) {
658 ++i;
659 continue;
660 } else
661 return (-1);
662 } else if (s2[i] == 0)
663 return (+1);
664 else
665 /* NOTREACHED */
666 err(2, "bwscoll error 02");
667 }
668
669 if (len1 < len2)
670 return (-1);
671 else if (len1 > len2)
672 return (+1);
673
674 return (0);
675 }
676 } else {
677 const wchar_t *s1, *s2;
678 size_t i, maxlen;
679 int res;
680
681 s1 = bws1->wdata.str + offset;
682 s2 = bws2->wdata.str + offset;
683
684 i = 0;
685 maxlen = len1;
686
687 if (maxlen > len2)
688 maxlen = len2;
689
690 while (i < maxlen) {
691
692 /* goto next non-zero part: */
693 while ((i < maxlen) &&
694 !s1[i] && !s2[i])
695 ++i;
696
697 if (i >= maxlen)
698 break;
699
700 if (s1[i] == 0) {
701 if (s2[i] == 0)
702 /* NOTREACHED */
703 err(2, "bwscoll error 1");
704 else
705 return (-1);
706 } else if (s2[i] == 0)
707 return (+1);
708
709 res = wide_str_coll(s1 + i, s2 + i);
710 if (res)
711 return (res);
712
713 while ((i < maxlen) && s1[i] && s2[i])
714 ++i;
715
716 if (i >= maxlen)
717 break;
718
719 if (s1[i] == 0) {
720 if (s2[i] == 0) {
721 ++i;
722 continue;
723 } else
724 return (-1);
725 } else if (s2[i] == 0)
726 return (+1);
727 else
728 /* NOTREACHED */
729 err(2, "bwscoll error 2");
730 }
731
732 if (len1 < len2)
733 return (-1);
734 else if (len1 > len2)
735 return (+1);
736
737 return (0);
738 }
739 }
740 }
741 }
742
743 /*
744 * Correction of the system API
745 */
746 double
bwstod(struct bwstring * s0,bool * empty)747 bwstod(struct bwstring *s0, bool *empty)
748 {
749 double ret;
750
751 if (mb_cur_max == 1) {
752 char *end, *s;
753 char *ep;
754
755 s = s0->cdata.str;
756 end = s + s0->cdata.len;
757 ep = NULL;
758
759 while (isblank(*s) && s < end)
760 ++s;
761
762 if (!isprint(*s)) {
763 *empty = true;
764 return (0);
765 }
766
767 ret = strtod((char*)s, &ep);
768 if (ep == s) {
769 *empty = true;
770 return (0);
771 }
772 } else {
773 wchar_t *end, *ep, *s;
774
775 s = s0->wdata.str;
776 end = s + s0->wdata.len;
777 ep = NULL;
778
779 while (iswblank(*s) && s < end)
780 ++s;
781
782 if (!iswprint(*s)) {
783 *empty = true;
784 return (0);
785 }
786
787 ret = wcstod(s, &ep);
788 if (ep == s) {
789 *empty = true;
790 return (0);
791 }
792 }
793
794 *empty = false;
795 return (ret);
796 }
797
798 /*
799 * A helper function for monthcoll. If a line matches
800 * a month name, it returns (number of the month - 1),
801 * while if there is no match, it just return -1.
802 */
803
804 int
bws_month_score(const struct bwstring * s0)805 bws_month_score(const struct bwstring *s0)
806 {
807
808 if (mb_cur_max == 1) {
809 const char *end, *s;
810
811 s = s0->cdata.str;
812 end = s + s0->cdata.len;
813
814 while (isblank(*s) && s < end)
815 ++s;
816
817 for (int i = 11; i >= 0; --i) {
818 if (cmonths[i].mon && (s == strstr(s, cmonths[i].mon)))
819 return (i);
820 if (cmonths[i].ab && (s == strstr(s, cmonths[i].ab)))
821 return (i);
822 if (cmonths[i].alt && (s == strstr(s, cmonths[i].alt)))
823 return (i);
824 }
825
826 } else {
827 const wchar_t *end, *s;
828
829 s = s0->wdata.str;
830 end = s + s0->wdata.len;
831
832 while (iswblank(*s) && s < end)
833 ++s;
834
835 for (int i = 11; i >= 0; --i) {
836 if (wmonths[i].ab && (s == wcsstr(s, wmonths[i].ab)))
837 return (i);
838 if (wmonths[i].mon && (s == wcsstr(s, wmonths[i].mon)))
839 return (i);
840 if (wmonths[i].alt && (s == wcsstr(s, wmonths[i].alt)))
841 return (i);
842 }
843 }
844
845 return (-1);
846 }
847
848 /*
849 * Rips out leading blanks (-b).
850 */
851 struct bwstring *
ignore_leading_blanks(struct bwstring * str)852 ignore_leading_blanks(struct bwstring *str)
853 {
854
855 if (mb_cur_max == 1) {
856 char *dst, *end, *src;
857
858 src = str->cdata.str;
859 dst = src;
860 end = src + str->cdata.len;
861
862 while (src < end && isblank(*src))
863 ++src;
864
865 if (src != dst) {
866 size_t newlen;
867
868 newlen = BWSLEN(str) - (src - dst);
869
870 while (src < end) {
871 *dst = *src;
872 ++dst;
873 ++src;
874 }
875 bws_setlen(str, newlen);
876 }
877 } else {
878 wchar_t *dst, *end, *src;
879
880 src = str->wdata.str;
881 dst = src;
882 end = src + str->wdata.len;
883
884 while (src < end && iswblank(*src))
885 ++src;
886
887 if (src != dst) {
888
889 size_t newlen = BWSLEN(str) - (src - dst);
890
891 while (src < end) {
892 *dst = *src;
893 ++dst;
894 ++src;
895 }
896 bws_setlen(str, newlen);
897
898 }
899 }
900 return (str);
901 }
902
903 /*
904 * Rips out nonprinting characters (-i).
905 */
906 struct bwstring *
ignore_nonprinting(struct bwstring * str)907 ignore_nonprinting(struct bwstring *str)
908 {
909 size_t newlen = BWSLEN(str);
910
911 if (mb_cur_max == 1) {
912 char *dst, *end, *src;
913 char c;
914
915 src = str->cdata.str;
916 dst = src;
917 end = src + str->cdata.len;
918
919 while (src < end) {
920 c = *src;
921 if (isprint(c)) {
922 *dst = c;
923 ++dst;
924 ++src;
925 } else {
926 ++src;
927 --newlen;
928 }
929 }
930 } else {
931 wchar_t *dst, *end, *src;
932 wchar_t c;
933
934 src = str->wdata.str;
935 dst = src;
936 end = src + str->wdata.len;
937
938 while (src < end) {
939 c = *src;
940 if (iswprint(c)) {
941 *dst = c;
942 ++dst;
943 ++src;
944 } else {
945 ++src;
946 --newlen;
947 }
948 }
949 }
950 bws_setlen(str, newlen);
951
952 return (str);
953 }
954
955 /*
956 * Rips out any characters that are not alphanumeric characters
957 * nor blanks (-d).
958 */
959 struct bwstring *
dictionary_order(struct bwstring * str)960 dictionary_order(struct bwstring *str)
961 {
962 size_t newlen = BWSLEN(str);
963
964 if (mb_cur_max == 1) {
965 char *dst, *end, *src;
966 char c;
967
968 src = str->cdata.str;
969 dst = src;
970 end = src + str->cdata.len;
971
972 while (src < end) {
973 c = *src;
974 if (isalnum(c) || isblank(c)) {
975 *dst = c;
976 ++dst;
977 ++src;
978 } else {
979 ++src;
980 --newlen;
981 }
982 }
983 } else {
984 wchar_t *dst, *end, *src;
985 wchar_t c;
986
987 src = str->wdata.str;
988 dst = src;
989 end = src + str->wdata.len;
990
991 while (src < end) {
992 c = *src;
993 if (iswalnum(c) || iswblank(c)) {
994 *dst = c;
995 ++dst;
996 ++src;
997 } else {
998 ++src;
999 --newlen;
1000 }
1001 }
1002 }
1003 bws_setlen(str, newlen);
1004
1005 return (str);
1006 }
1007
1008 /*
1009 * Converts string to lower case(-f).
1010 */
1011 struct bwstring *
ignore_case(struct bwstring * str)1012 ignore_case(struct bwstring *str)
1013 {
1014
1015 if (mb_cur_max == 1) {
1016 char *end, *s;
1017
1018 s = str->cdata.str;
1019 end = s + str->cdata.len;
1020
1021 while (s < end) {
1022 *s = toupper(*s);
1023 ++s;
1024 }
1025 } else {
1026 wchar_t *end, *s;
1027
1028 s = str->wdata.str;
1029 end = s + str->wdata.len;
1030
1031 while (s < end) {
1032 *s = towupper(*s);
1033 ++s;
1034 }
1035 }
1036 return (str);
1037 }
1038
1039 void
bws_disorder_warnx(struct bwstring * s,const char * fn,size_t pos)1040 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
1041 {
1042
1043 if (mb_cur_max == 1)
1044 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->cdata.str);
1045 else
1046 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->wdata.str);
1047 }
1048