1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright 2014 Garrett D'Amore <garrett@damore.org> 5 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 6 * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> 7 * at Electronni Visti IA, Kiev, Ukraine. 8 * All rights reserved. 9 * 10 * Copyright (c) 2011 The FreeBSD Foundation 11 * 12 * Portions of this software were developed by David Chisnall 13 * under sponsorship from the FreeBSD Foundation. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * Adapted to xlocale by John Marino <draco@marino.st> 37 */ 38 39 #include "namespace.h" 40 41 #include <sys/types.h> 42 #include <sys/stat.h> 43 #include <sys/mman.h> 44 45 #include <assert.h> 46 #include <ctype.h> 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <wchar.h> 51 #include <errno.h> 52 #include <unistd.h> 53 #include <fcntl.h> 54 #include "un-namespace.h" 55 56 #include "collate.h" 57 #include "setlocale.h" 58 #include "ldpart.h" 59 #include "libc_private.h" 60 61 struct xlocale_collate __xlocale_global_collate = { 62 {{0}, "C"}, 1, 0, 0, 0 63 }; 64 65 struct xlocale_collate __xlocale_C_collate = { 66 {{0}, "C"}, 1, 0, 0, 0 67 }; 68 69 struct xlocale_collate __xlocale_POSIX_collate = { 70 {{0}, "POSIX"}, 1, 0, 0, 0 71 }; 72 73 struct xlocale_collate __xlocale_CUTF8_collate = { 74 {{0}, "C.UTF-8"}, 1, 0, 0, 0 75 }; 76 77 static int 78 __collate_load_tables_l(const char *encoding, struct xlocale_collate *table); 79 80 static void 81 destruct_collate(void *t) 82 { 83 struct xlocale_collate *table = t; 84 if (table->map && (table->maplen > 0)) { 85 (void) munmap(table->map, table->maplen); 86 } 87 free(t); 88 } 89 90 void * 91 __collate_load(const char *encoding, __unused locale_t unused) 92 { 93 if (strcmp(encoding, "C") == 0) 94 return (&__xlocale_C_collate); 95 else if (strcmp(encoding, "POSIX") == 0) 96 return (&__xlocale_POSIX_collate); 97 else if (strcmp(encoding, "C.UTF-8") == 0) 98 return (&__xlocale_CUTF8_collate); 99 100 struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 101 1); 102 if (table == NULL) 103 return (NULL); 104 table->header.header.destructor = destruct_collate; 105 106 /* 107 * FIXME: Make sure that _LDP_CACHE is never returned. We 108 * should be doing the caching outside of this section. 109 */ 110 if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) { 111 xlocale_release(table); 112 return (NULL); 113 } 114 return (table); 115 } 116 117 /** 118 * Load the collation tables for the specified encoding into the global table. 119 */ 120 int 121 __collate_load_tables(const char *encoding) 122 { 123 124 return (__collate_load_tables_l(encoding, &__xlocale_global_collate)); 125 } 126 127 static int 128 __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) 129 { 130 int i, chains, z; 131 char *buf; 132 char *TMP; 133 char *map; 134 collate_info_t *info; 135 struct stat sbuf; 136 int fd; 137 138 table->__collate_load_error = 1; 139 140 /* 'encoding' must be already checked. */ 141 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0 || 142 strncmp(encoding, "C.", 2) == 0) { 143 return (_LDP_CACHE); 144 } 145 146 if (asprintf(&buf, "%s/%s/LC_COLLATE", _PathLocale, encoding) == -1) 147 return (_LDP_ERROR); 148 149 if ((fd = _open(buf, O_RDONLY | O_CLOEXEC)) < 0) { 150 free(buf); 151 return (_LDP_ERROR); 152 } 153 free(buf); 154 if (_fstat(fd, &sbuf) < 0) { 155 (void) _close(fd); 156 return (_LDP_ERROR); 157 } 158 if (sbuf.st_size < (COLLATE_FMT_VERSION_LEN + 159 XLOCALE_DEF_VERSION_LEN + 160 sizeof (*info))) { 161 (void) _close(fd); 162 errno = EINVAL; 163 return (_LDP_ERROR); 164 } 165 map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); 166 (void) _close(fd); 167 if ((TMP = map) == MAP_FAILED) { 168 return (_LDP_ERROR); 169 } 170 171 if (strncmp(TMP, COLLATE_FMT_VERSION, COLLATE_FMT_VERSION_LEN) != 0) { 172 (void) munmap(map, sbuf.st_size); 173 errno = EINVAL; 174 return (_LDP_ERROR); 175 } 176 TMP += COLLATE_FMT_VERSION_LEN; 177 strlcat(table->header.version, TMP, sizeof (table->header.version)); 178 TMP += XLOCALE_DEF_VERSION_LEN; 179 180 info = (void *)TMP; 181 TMP += sizeof (*info); 182 183 if ((info->directive_count < 1) || 184 (info->directive_count >= COLL_WEIGHTS_MAX) || 185 ((chains = info->chain_count) < 0)) { 186 (void) munmap(map, sbuf.st_size); 187 errno = EINVAL; 188 return (_LDP_ERROR); 189 } 190 191 i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) + 192 (sizeof (collate_chain_t) * chains) + 193 (sizeof (collate_large_t) * info->large_count); 194 for (z = 0; z < info->directive_count; z++) { 195 i += sizeof (collate_subst_t) * info->subst_count[z]; 196 } 197 if (i != (sbuf.st_size - (TMP - map))) { 198 (void) munmap(map, sbuf.st_size); 199 errno = EINVAL; 200 return (_LDP_ERROR); 201 } 202 203 if (table->map && (table->maplen > 0)) { 204 (void) munmap(table->map, table->maplen); 205 } 206 table->map = map; 207 table->maplen = sbuf.st_size; 208 table->info = info; 209 table->char_pri_table = (void *)TMP; 210 TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1); 211 212 for (z = 0; z < info->directive_count; z++) { 213 if (info->subst_count[z] > 0) { 214 table->subst_table[z] = (void *)TMP; 215 TMP += info->subst_count[z] * sizeof (collate_subst_t); 216 } else { 217 table->subst_table[z] = NULL; 218 } 219 } 220 221 if (chains > 0) { 222 table->chain_pri_table = (void *)TMP; 223 TMP += chains * sizeof (collate_chain_t); 224 } else 225 table->chain_pri_table = NULL; 226 if (info->large_count > 0) 227 table->large_pri_table = (void *)TMP; 228 else 229 table->large_pri_table = NULL; 230 231 table->__collate_load_error = 0; 232 return (_LDP_LOADED); 233 } 234 235 static const int32_t * 236 substsearch(struct xlocale_collate *table, const wchar_t key, int pass) 237 { 238 const collate_subst_t *p; 239 int n = table->info->subst_count[pass]; 240 241 if (n == 0) 242 return (NULL); 243 244 if (pass >= table->info->directive_count) 245 return (NULL); 246 247 if (!(key & COLLATE_SUBST_PRIORITY)) 248 return (NULL); 249 250 p = table->subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY); 251 assert(p->key == key); 252 253 return (p->pri); 254 } 255 256 static collate_chain_t * 257 chainsearch(struct xlocale_collate *table, const wchar_t *key, int *len) 258 { 259 int low = 0; 260 int high = table->info->chain_count - 1; 261 int next, compar, l; 262 collate_chain_t *p; 263 collate_chain_t *tab = table->chain_pri_table; 264 265 if (high < 0) 266 return (NULL); 267 268 while (low <= high) { 269 next = (low + high) / 2; 270 p = tab + next; 271 compar = *key - *p->str; 272 if (compar == 0) { 273 l = wcsnlen(p->str, COLLATE_STR_LEN); 274 compar = wcsncmp(key, p->str, l); 275 if (compar == 0) { 276 *len = l; 277 return (p); 278 } 279 } 280 if (compar > 0) 281 low = next + 1; 282 else 283 high = next - 1; 284 } 285 return (NULL); 286 } 287 288 static collate_large_t * 289 largesearch(struct xlocale_collate *table, const wchar_t key) 290 { 291 int low = 0; 292 int high = table->info->large_count - 1; 293 int next, compar; 294 collate_large_t *p; 295 collate_large_t *tab = table->large_pri_table; 296 297 if (high < 0) 298 return (NULL); 299 300 while (low <= high) { 301 next = (low + high) / 2; 302 p = tab + next; 303 compar = key - p->val; 304 if (compar == 0) 305 return (p); 306 if (compar > 0) 307 low = next + 1; 308 else 309 high = next - 1; 310 } 311 return (NULL); 312 } 313 314 void 315 _collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len, 316 int *pri, int which, const int **state) 317 { 318 collate_chain_t *p2; 319 collate_large_t *match; 320 int p, l; 321 const int *sptr; 322 323 /* 324 * If this is the "last" pass for the UNDEFINED, then 325 * we just return the priority itself. 326 */ 327 if (which >= table->info->directive_count) { 328 *pri = *t; 329 *len = 1; 330 *state = NULL; 331 return; 332 } 333 334 /* 335 * If we have remaining substitution data from a previous 336 * call, consume it first. 337 */ 338 if ((sptr = *state) != NULL) { 339 *pri = *sptr; 340 sptr++; 341 if ((sptr == *state) || (sptr == NULL)) 342 *state = NULL; 343 else 344 *state = sptr; 345 *len = 0; 346 return; 347 } 348 349 /* No active substitutions */ 350 *len = 1; 351 352 /* 353 * Check for composites such as diphthongs that collate as a 354 * single element (aka chains or collating-elements). 355 */ 356 if (((p2 = chainsearch(table, t, &l)) != NULL) && 357 ((p = p2->pri[which]) >= 0)) { 358 359 *len = l; 360 *pri = p; 361 362 } else if (*t <= UCHAR_MAX) { 363 364 /* 365 * Character is a small (8-bit) character. 366 * We just look these up directly for speed. 367 */ 368 *pri = table->char_pri_table[*t].pri[which]; 369 370 } else if ((table->info->large_count > 0) && 371 ((match = largesearch(table, *t)) != NULL)) { 372 373 /* 374 * Character was found in the extended table. 375 */ 376 *pri = match->pri.pri[which]; 377 378 } else { 379 /* 380 * Character lacks a specific definition. 381 */ 382 if (table->info->directive[which] & DIRECTIVE_UNDEFINED) { 383 /* Mask off sign bit to prevent ordering confusion. */ 384 *pri = (*t & COLLATE_MAX_PRIORITY); 385 } else { 386 *pri = table->info->undef_pri[which]; 387 } 388 /* No substitutions for undefined characters! */ 389 return; 390 } 391 392 /* 393 * Try substituting (expanding) the character. We are 394 * currently doing this *after* the chain compression. I 395 * think it should not matter, but this way might be slightly 396 * faster. 397 * 398 * We do this after the priority search, as this will help us 399 * to identify a single key value. In order for this to work, 400 * its important that the priority assigned to a given element 401 * to be substituted be unique for that level. The localedef 402 * code ensures this for us. 403 */ 404 if ((sptr = substsearch(table, *pri, which)) != NULL) { 405 if ((*pri = *sptr) > 0) { 406 sptr++; 407 *state = *sptr ? sptr : NULL; 408 } 409 } 410 411 } 412 413 /* 414 * This is the meaty part of wcsxfrm & strxfrm. Note that it does 415 * NOT NULL terminate. That is left to the caller. 416 */ 417 size_t 418 _collate_wxfrm(struct xlocale_collate *table, const wchar_t *src, wchar_t *xf, 419 size_t room) 420 { 421 int pri; 422 int len; 423 const wchar_t *t; 424 wchar_t *tr = NULL; 425 int direc; 426 int pass; 427 const int32_t *state; 428 size_t want = 0; 429 size_t need = 0; 430 int ndir = table->info->directive_count; 431 432 assert(src); 433 434 for (pass = 0; pass <= ndir; pass++) { 435 436 state = NULL; 437 438 if (pass != 0) { 439 /* insert level separator from the previous pass */ 440 if (room) { 441 *xf++ = 1; 442 room--; 443 } 444 want++; 445 } 446 447 /* special pass for undefined */ 448 if (pass == ndir) { 449 direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 450 } else { 451 direc = table->info->directive[pass]; 452 } 453 454 t = src; 455 456 if (direc & DIRECTIVE_BACKWARD) { 457 wchar_t *bp, *fp, c; 458 free(tr); 459 if ((tr = wcsdup(t)) == NULL) { 460 errno = ENOMEM; 461 goto fail; 462 } 463 bp = tr; 464 fp = tr + wcslen(tr) - 1; 465 while (bp < fp) { 466 c = *bp; 467 *bp++ = *fp; 468 *fp-- = c; 469 } 470 t = (const wchar_t *)tr; 471 } 472 473 if (direc & DIRECTIVE_POSITION) { 474 while (*t || state) { 475 _collate_lookup(table, t, &len, &pri, pass, &state); 476 t += len; 477 if (pri <= 0) { 478 if (pri < 0) { 479 errno = EINVAL; 480 goto fail; 481 } 482 state = NULL; 483 pri = COLLATE_MAX_PRIORITY; 484 } 485 if (room) { 486 *xf++ = pri; 487 room--; 488 } 489 want++; 490 need = want; 491 } 492 } else { 493 while (*t || state) { 494 _collate_lookup(table, t, &len, &pri, pass, &state); 495 t += len; 496 if (pri <= 0) { 497 if (pri < 0) { 498 errno = EINVAL; 499 goto fail; 500 } 501 state = NULL; 502 continue; 503 } 504 if (room) { 505 *xf++ = pri; 506 room--; 507 } 508 want++; 509 need = want; 510 } 511 } 512 } 513 free(tr); 514 return (need); 515 516 fail: 517 free(tr); 518 return ((size_t)(-1)); 519 } 520 521 /* 522 * In the non-POSIX case, we transform each character into a string of 523 * characters representing the character's priority. Since char is usually 524 * signed, we are limited by 7 bits per byte. To avoid zero, we need to add 525 * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6 526 * bits per byte. 527 * 528 * It turns out that we sometimes have real priorities that are 529 * 31-bits wide. (But: be careful using priorities where the high 530 * order bit is set -- i.e. the priority is negative. The sort order 531 * may be surprising!) 532 * 533 * TODO: This would be a good area to optimize somewhat. It turns out 534 * that real prioririties *except for the last UNDEFINED pass* are generally 535 * very small. We need the localedef code to precalculate the max 536 * priority for us, and ideally also give us a mask, and then we could 537 * severely limit what we expand to. 538 */ 539 #define XFRM_BYTES 6 540 #define XFRM_OFFSET ('0') /* make all printable characters */ 541 #define XFRM_SHIFT 6 542 #define XFRM_MASK ((1 << XFRM_SHIFT) - 1) 543 #define XFRM_SEP ('.') /* chosen to be less than XFRM_OFFSET */ 544 545 static int 546 xfrm(struct xlocale_collate *table, unsigned char *p, int pri, int pass) 547 { 548 /* we use unsigned to ensure zero fill on right shift */ 549 uint32_t val = (uint32_t)table->info->pri_count[pass]; 550 int nc = 0; 551 552 while (val) { 553 *p = (pri & XFRM_MASK) + XFRM_OFFSET; 554 pri >>= XFRM_SHIFT; 555 val >>= XFRM_SHIFT; 556 p++; 557 nc++; 558 } 559 return (nc); 560 } 561 562 size_t 563 _collate_sxfrm(struct xlocale_collate *table, const wchar_t *src, char *xf, 564 size_t room) 565 { 566 int pri; 567 int len; 568 const wchar_t *t; 569 wchar_t *tr = NULL; 570 int direc; 571 int pass; 572 const int32_t *state; 573 size_t want = 0; 574 size_t need = 0; 575 int b; 576 uint8_t buf[XFRM_BYTES]; 577 int ndir = table->info->directive_count; 578 579 assert(src); 580 581 for (pass = 0; pass <= ndir; pass++) { 582 583 state = NULL; 584 585 if (pass != 0) { 586 /* insert level separator from the previous pass */ 587 if (room) { 588 *xf++ = XFRM_SEP; 589 room--; 590 } 591 want++; 592 } 593 594 /* special pass for undefined */ 595 if (pass == ndir) { 596 direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 597 } else { 598 direc = table->info->directive[pass]; 599 } 600 601 t = src; 602 603 if (direc & DIRECTIVE_BACKWARD) { 604 wchar_t *bp, *fp, c; 605 free(tr); 606 if ((tr = wcsdup(t)) == NULL) { 607 errno = ENOMEM; 608 goto fail; 609 } 610 bp = tr; 611 fp = tr + wcslen(tr) - 1; 612 while (bp < fp) { 613 c = *bp; 614 *bp++ = *fp; 615 *fp-- = c; 616 } 617 t = (const wchar_t *)tr; 618 } 619 620 if (direc & DIRECTIVE_POSITION) { 621 while (*t || state) { 622 623 _collate_lookup(table, t, &len, &pri, pass, &state); 624 t += len; 625 if (pri <= 0) { 626 if (pri < 0) { 627 errno = EINVAL; 628 goto fail; 629 } 630 state = NULL; 631 pri = COLLATE_MAX_PRIORITY; 632 } 633 634 b = xfrm(table, buf, pri, pass); 635 want += b; 636 if (room) { 637 while (b) { 638 b--; 639 if (room) { 640 *xf++ = buf[b]; 641 room--; 642 } 643 } 644 } 645 need = want; 646 } 647 } else { 648 while (*t || state) { 649 _collate_lookup(table, t, &len, &pri, pass, &state); 650 t += len; 651 if (pri <= 0) { 652 if (pri < 0) { 653 errno = EINVAL; 654 goto fail; 655 } 656 state = NULL; 657 continue; 658 } 659 660 b = xfrm(table, buf, pri, pass); 661 want += b; 662 if (room) { 663 664 while (b) { 665 b--; 666 if (room) { 667 *xf++ = buf[b]; 668 room--; 669 } 670 } 671 } 672 need = want; 673 } 674 } 675 } 676 free(tr); 677 return (need); 678 679 fail: 680 free(tr); 681 return ((size_t)(-1)); 682 } 683 684 /* 685 * __collate_equiv_value returns the primary collation value for the given 686 * collating symbol specified by str and len. Zero or negative is returned 687 * if the collating symbol was not found. This function is used by bracket 688 * code in the TRE regex library. 689 */ 690 int 691 __collate_equiv_value(locale_t locale, const wchar_t *str, size_t len) 692 { 693 int32_t e; 694 695 if (len < 1 || len >= COLLATE_STR_LEN) 696 return (-1); 697 698 FIX_LOCALE(locale); 699 struct xlocale_collate *table = 700 (struct xlocale_collate*)locale->components[XLC_COLLATE]; 701 702 if (table->__collate_load_error) 703 return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1); 704 705 if (len == 1) { 706 e = -1; 707 if (*str <= UCHAR_MAX) 708 e = table->char_pri_table[*str].pri[0]; 709 else if (table->info->large_count > 0) { 710 collate_large_t *match_large; 711 match_large = largesearch(table, *str); 712 if (match_large) 713 e = match_large->pri.pri[0]; 714 } 715 if (e == 0) 716 return (1); 717 return (e > 0 ? e : 0); 718 } 719 if (table->info->chain_count > 0) { 720 wchar_t name[COLLATE_STR_LEN]; 721 collate_chain_t *match_chain; 722 int clen; 723 724 wcsncpy (name, str, len); 725 name[len] = 0; 726 match_chain = chainsearch(table, name, &clen); 727 if (match_chain) { 728 e = match_chain->pri[0]; 729 if (e == 0) 730 return (1); 731 return (e < 0 ? -e : e); 732 } 733 } 734 return (0); 735 } 736 737 /* 738 * __collate_collating_symbol takes the multibyte string specified by 739 * src and slen, and using ps, converts that to a wide character. Then 740 * it is checked to verify it is a collating symbol, and then copies 741 * it to the wide character string specified by dst and dlen (the 742 * results are not null terminated). The length of the wide characters 743 * copied to dst is returned if successful. Zero is returned if no such 744 * collating symbol exists. (size_t)-1 is returned if there are wide-character 745 * conversion errors, if the length of the converted string is greater that 746 * COLLATE_STR_LEN or if dlen is too small. It is up to the calling routine to 747 * preserve the mbstate_t structure as needed. 748 */ 749 size_t 750 __collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, 751 size_t slen, mbstate_t *ps) 752 { 753 wchar_t wname[COLLATE_STR_LEN]; 754 wchar_t w, *wp; 755 struct xlocale_collate *table; 756 size_t len, l; 757 758 table = 759 (struct xlocale_collate *)__get_locale()->components[XLC_COLLATE]; 760 /* POSIX locale */ 761 if (table->__collate_load_error) { 762 if (dlen < 1) 763 return ((size_t)-1); 764 if (slen != 1 || !isascii(*src)) 765 return (0); 766 *dst = *src; 767 return (1); 768 } 769 for (wp = wname, len = 0; slen > 0; len++) { 770 l = mbrtowc(&w, src, slen, ps); 771 if (l == (size_t)-1 || l == (size_t)-2) 772 return ((size_t)-1); 773 if (l == 0) 774 break; 775 if (len >= COLLATE_STR_LEN) 776 return ((size_t)-1); 777 *wp++ = w; 778 src += l; 779 slen -= l; 780 } 781 if (len == 0 || len > dlen) 782 return ((size_t)-1); 783 if (len == 1) { 784 if (*wname <= UCHAR_MAX) { 785 if (table->char_pri_table[*wname].pri[0] >= 0) { 786 if (dlen > 0) 787 *dst = *wname; 788 return (1); 789 } 790 return (0); 791 } else if (table->info->large_count > 0) { 792 collate_large_t *match; 793 match = largesearch(table, *wname); 794 if (match && match->pri.pri[0] >= 0) { 795 if (dlen > 0) 796 *dst = *wname; 797 return (1); 798 } 799 } 800 return (0); 801 } 802 *wp = 0; 803 if (table->info->chain_count > 0) { 804 collate_chain_t *match; 805 int ll; 806 match = chainsearch(table, wname, &ll); 807 if (match) { 808 if (ll < dlen) 809 dlen = ll; 810 wcsncpy(dst, wname, dlen); 811 return (dlen); 812 } 813 } 814 return (0); 815 } 816 817 /* 818 * __collate_equiv_class returns the equivalence class number for the symbol 819 * specified by src and slen, using ps to convert from multi-byte to wide 820 * character. Zero is returned if the symbol is not in an equivalence 821 * class. -1 is returned if there are wide character conversion errors, 822 * if there are any greater-than-8-bit characters or if a multi-byte symbol 823 * is greater or equal to COLLATE_STR_LEN in length. It is up to the calling 824 * routine to preserve the mbstate_t structure as needed. 825 */ 826 int 827 __collate_equiv_class(const char *src, size_t slen, mbstate_t *ps) 828 { 829 wchar_t wname[COLLATE_STR_LEN]; 830 wchar_t w, *wp; 831 struct xlocale_collate *table; 832 size_t len, l; 833 int e; 834 835 table = 836 (struct xlocale_collate *)__get_locale()->components[XLC_COLLATE]; 837 /* POSIX locale */ 838 if (table->__collate_load_error) 839 return (0); 840 for (wp = wname, len = 0; slen > 0; len++) { 841 l = mbrtowc(&w, src, slen, ps); 842 if (l == (size_t)-1 || l == (size_t)-2) 843 return (-1); 844 if (l == 0) 845 break; 846 if (len >= COLLATE_STR_LEN) 847 return (-1); 848 *wp++ = w; 849 src += l; 850 slen -= l; 851 } 852 if (len == 0) 853 return (-1); 854 if (len == 1) { 855 e = -1; 856 if (*wname <= UCHAR_MAX) 857 e = table->char_pri_table[*wname].pri[0]; 858 else if (table->info->large_count > 0) { 859 collate_large_t *match; 860 match = largesearch(table, *wname); 861 if (match) 862 e = match->pri.pri[0]; 863 } 864 if (e == 0) 865 return (IGNORE_EQUIV_CLASS); 866 return (e > 0 ? e : 0); 867 } 868 *wp = 0; 869 if (table->info->chain_count > 0) { 870 collate_chain_t *match; 871 int ll; 872 match = chainsearch(table, wname, &ll); 873 if (match) { 874 e = match->pri[0]; 875 if (e == 0) 876 return (IGNORE_EQUIV_CLASS); 877 return (e < 0 ? -e : e); 878 } 879 } 880 return (0); 881 } 882 883 884 /* 885 * __collate_equiv_match tries to match any single or multi-character symbol 886 * in equivalence class equiv_class in the multi-byte string specified by src 887 * and slen. If start is non-zero, it is taken to be the first (pre-converted) 888 * wide character. Subsequence wide characters, if needed, will use ps in 889 * the conversion. On a successful match, the length of the matched string 890 * is returned (including the start character). If dst is non-NULL, the 891 * matched wide-character string is copied to dst, a wide character array of 892 * length dlen (the results are not zero-terminated). If rlen is non-NULL, 893 * the number of character in src actually used is returned. Zero is 894 * returned by __collate_equiv_match if there is no match. (size_t)-1 is 895 * returned on error: if there were conversion errors or if dlen is too small 896 * to accept the results. On no match or error, ps is restored to its incoming 897 * state. 898 */ 899 size_t 900 __collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, 901 const char *src, size_t slen, mbstate_t *ps, size_t *rlen) 902 { 903 wchar_t w; 904 size_t len, l, clen; 905 int i; 906 wchar_t buf[COLLATE_STR_LEN], *wp; 907 mbstate_t save; 908 const char *s = src; 909 struct xlocale_collate *table; 910 size_t sl = slen; 911 collate_chain_t *ch = NULL; 912 913 table = 914 (struct xlocale_collate *)__get_locale()->components[XLC_COLLATE]; 915 /* POSIX locale */ 916 if (table->__collate_load_error) 917 return ((size_t)-1); 918 if (equiv_class == IGNORE_EQUIV_CLASS) 919 equiv_class = 0; 920 if (ps) 921 save = *ps; 922 wp = buf; 923 len = clen = 0; 924 if (start) { 925 *wp++ = start; 926 len = 1; 927 } 928 /* convert up to the max chain length */ 929 while (sl > 0 && len < table->info->chain_max_len) { 930 l = mbrtowc(&w, s, sl, ps); 931 if (l == (size_t)-1 || l == (size_t)-2 || l == 0) 932 break; 933 *wp++ = w; 934 s += l; 935 clen += l; 936 sl -= l; 937 len++; 938 } 939 *wp = 0; 940 if (len > 1 && (ch = chainsearch(table, buf, &i)) != NULL) { 941 int e = ch->pri[0]; 942 if (e < 0) 943 e = -e; 944 if (e == equiv_class) 945 goto found; 946 } 947 /* try single character */ 948 i = 1; 949 if (*buf <= UCHAR_MAX) { 950 if (equiv_class == table->char_pri_table[*buf].pri[0]) 951 goto found; 952 } else if (table->info->large_count > 0) { 953 collate_large_t *match; 954 match = largesearch(table, *buf); 955 if (match && equiv_class == match->pri.pri[0]) 956 goto found; 957 } 958 /* no match */ 959 if (ps) 960 *ps = save; 961 return (0); 962 found: 963 /* 964 * If we converted more than we used, restore to initial 965 * and reconvert up to what did match. 966 */ 967 if (i < len) { 968 len = i; 969 if (ps) 970 *ps = save; 971 if (start) 972 i--; 973 clen = 0; 974 while (i-- > 0) { 975 l = mbrtowc(&w, src, slen, ps); 976 src += l; 977 clen += l; 978 slen -= l; 979 } 980 } 981 if (dst) { 982 if (dlen < len) { 983 if (ps) 984 *ps = save; 985 return ((size_t)-1); 986 } 987 for (wp = buf; len > 0; len--) 988 *dst++ = *wp++; 989 } 990 if (rlen) 991 *rlen = clen; 992 return (len); 993 } 994