14297a3b0SGarrett D'Amore /* 22d08521bSGarrett D'Amore * Copyright 2013 Garrett D'Amore <garrett@damore.org> 3475b496bSGarrett D'Amore * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 44297a3b0SGarrett D'Amore * Copyright (c) 2002-2004 Tim J. Robbins 54297a3b0SGarrett D'Amore * All rights reserved. 64297a3b0SGarrett D'Amore * 74297a3b0SGarrett D'Amore * Redistribution and use in source and binary forms, with or without 84297a3b0SGarrett D'Amore * modification, are permitted provided that the following conditions 94297a3b0SGarrett D'Amore * are met: 104297a3b0SGarrett D'Amore * 1. Redistributions of source code must retain the above copyright 114297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer. 124297a3b0SGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright 134297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer in the 144297a3b0SGarrett D'Amore * documentation and/or other materials provided with the distribution. 154297a3b0SGarrett D'Amore * 164297a3b0SGarrett D'Amore * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 174297a3b0SGarrett D'Amore * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 184297a3b0SGarrett D'Amore * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 194297a3b0SGarrett D'Amore * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 204297a3b0SGarrett D'Amore * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 214297a3b0SGarrett D'Amore * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 224297a3b0SGarrett D'Amore * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 234297a3b0SGarrett D'Amore * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 244297a3b0SGarrett D'Amore * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 254297a3b0SGarrett D'Amore * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 264297a3b0SGarrett D'Amore * SUCH DAMAGE. 274297a3b0SGarrett D'Amore */ 284297a3b0SGarrett D'Amore 294297a3b0SGarrett D'Amore #include "lint.h" 304297a3b0SGarrett D'Amore #include <errno.h> 314297a3b0SGarrett D'Amore #include <limits.h> 324297a3b0SGarrett D'Amore #include <stdlib.h> 334297a3b0SGarrett D'Amore #include <string.h> 344297a3b0SGarrett D'Amore #include <wchar.h> 354297a3b0SGarrett D'Amore #include "mblocal.h" 362d08521bSGarrett D'Amore #include "lctype.h" 374297a3b0SGarrett D'Amore 384297a3b0SGarrett D'Amore static size_t _UTF8_mbrtowc(wchar_t *_RESTRICT_KYWD, 394297a3b0SGarrett D'Amore const char *_RESTRICT_KYWD, 40*d8e0a9a1SRobert Mustacchi size_t, mbstate_t *_RESTRICT_KYWD, boolean_t); 414297a3b0SGarrett D'Amore static int _UTF8_mbsinit(const mbstate_t *); 424297a3b0SGarrett D'Amore static size_t _UTF8_mbsnrtowcs(wchar_t *_RESTRICT_KYWD, 434297a3b0SGarrett D'Amore const char **_RESTRICT_KYWD, size_t, size_t, 444297a3b0SGarrett D'Amore mbstate_t *_RESTRICT_KYWD); 454297a3b0SGarrett D'Amore static size_t _UTF8_wcrtomb(char *_RESTRICT_KYWD, wchar_t, 464297a3b0SGarrett D'Amore mbstate_t *_RESTRICT_KYWD); 474297a3b0SGarrett D'Amore static size_t _UTF8_wcsnrtombs(char *_RESTRICT_KYWD, 484297a3b0SGarrett D'Amore const wchar_t **_RESTRICT_KYWD, 494297a3b0SGarrett D'Amore size_t, size_t, mbstate_t *_RESTRICT_KYWD); 504297a3b0SGarrett D'Amore 512d08521bSGarrett D'Amore void 522d08521bSGarrett D'Amore _UTF8_init(struct lc_ctype *lct) 534297a3b0SGarrett D'Amore { 542d08521bSGarrett D'Amore lct->lc_mbrtowc = _UTF8_mbrtowc; 552d08521bSGarrett D'Amore lct->lc_wcrtomb = _UTF8_wcrtomb; 562d08521bSGarrett D'Amore lct->lc_mbsinit = _UTF8_mbsinit; 572d08521bSGarrett D'Amore lct->lc_mbsnrtowcs = _UTF8_mbsnrtowcs; 582d08521bSGarrett D'Amore lct->lc_wcsnrtombs = _UTF8_wcsnrtombs; 592d08521bSGarrett D'Amore lct->lc_is_ascii = 0; 602d08521bSGarrett D'Amore lct->lc_max_mblen = 4; 614297a3b0SGarrett D'Amore } 624297a3b0SGarrett D'Amore 634297a3b0SGarrett D'Amore static int 644297a3b0SGarrett D'Amore _UTF8_mbsinit(const mbstate_t *ps) 654297a3b0SGarrett D'Amore { 664297a3b0SGarrett D'Amore 674297a3b0SGarrett D'Amore return (ps == NULL || ((const _UTF8State *)ps)->want == 0); 684297a3b0SGarrett D'Amore } 694297a3b0SGarrett D'Amore 704297a3b0SGarrett D'Amore static size_t 714297a3b0SGarrett D'Amore _UTF8_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 72*d8e0a9a1SRobert Mustacchi size_t n, mbstate_t *_RESTRICT_KYWD ps, boolean_t zero) 734297a3b0SGarrett D'Amore { 744297a3b0SGarrett D'Amore _UTF8State *us; 754297a3b0SGarrett D'Amore int ch, i, mask, want; 764297a3b0SGarrett D'Amore wchar_t lbound, wch; 774297a3b0SGarrett D'Amore 784297a3b0SGarrett D'Amore us = (_UTF8State *)ps; 794297a3b0SGarrett D'Amore 804297a3b0SGarrett D'Amore if (us->want < 0 || us->want > 6) { 814297a3b0SGarrett D'Amore errno = EINVAL; 824297a3b0SGarrett D'Amore return ((size_t)-1); 834297a3b0SGarrett D'Amore } 844297a3b0SGarrett D'Amore 854297a3b0SGarrett D'Amore if (s == NULL) { 864297a3b0SGarrett D'Amore s = ""; 874297a3b0SGarrett D'Amore n = 1; 884297a3b0SGarrett D'Amore pwc = NULL; 894297a3b0SGarrett D'Amore } 904297a3b0SGarrett D'Amore 914297a3b0SGarrett D'Amore if (n == 0) 924297a3b0SGarrett D'Amore /* Incomplete multibyte sequence */ 934297a3b0SGarrett D'Amore return ((size_t)-2); 944297a3b0SGarrett D'Amore 954297a3b0SGarrett D'Amore if (us->want == 0) { 964297a3b0SGarrett D'Amore /* 974297a3b0SGarrett D'Amore * Determine the number of octets that make up this character 984297a3b0SGarrett D'Amore * from the first octet, and a mask that extracts the 994297a3b0SGarrett D'Amore * interesting bits of the first octet. We already know 1004297a3b0SGarrett D'Amore * the character is at least two bytes long. 1014297a3b0SGarrett D'Amore * 1024297a3b0SGarrett D'Amore * We also specify a lower bound for the character code to 1034297a3b0SGarrett D'Amore * detect redundant, non-"shortest form" encodings. For 1044297a3b0SGarrett D'Amore * example, the sequence C0 80 is _not_ a legal representation 1054297a3b0SGarrett D'Amore * of the null character. This enforces a 1-to-1 mapping 1064297a3b0SGarrett D'Amore * between character codes and their multibyte representations. 1074297a3b0SGarrett D'Amore */ 1084297a3b0SGarrett D'Amore ch = (unsigned char)*s; 1094297a3b0SGarrett D'Amore if ((ch & 0x80) == 0) { 110475b496bSGarrett D'Amore /* Fast path for plain ASCII characters. */ 111475b496bSGarrett D'Amore if (pwc != NULL) 112475b496bSGarrett D'Amore *pwc = ch; 113*d8e0a9a1SRobert Mustacchi if (zero || ch != '\0') { 114*d8e0a9a1SRobert Mustacchi return (1); 115*d8e0a9a1SRobert Mustacchi } else { 116*d8e0a9a1SRobert Mustacchi return (0); 117*d8e0a9a1SRobert Mustacchi } 118475b496bSGarrett D'Amore } 119475b496bSGarrett D'Amore if ((ch & 0xe0) == 0xc0) { 1204297a3b0SGarrett D'Amore mask = 0x1f; 1214297a3b0SGarrett D'Amore want = 2; 1224297a3b0SGarrett D'Amore lbound = 0x80; 1234297a3b0SGarrett D'Amore } else if ((ch & 0xf0) == 0xe0) { 1244297a3b0SGarrett D'Amore mask = 0x0f; 1254297a3b0SGarrett D'Amore want = 3; 1264297a3b0SGarrett D'Amore lbound = 0x800; 1274297a3b0SGarrett D'Amore } else if ((ch & 0xf8) == 0xf0) { 1284297a3b0SGarrett D'Amore mask = 0x07; 1294297a3b0SGarrett D'Amore want = 4; 1304297a3b0SGarrett D'Amore lbound = 0x10000; 1314297a3b0SGarrett D'Amore #if 0 1324297a3b0SGarrett D'Amore /* These would be illegal in the UTF-8 space */ 1334297a3b0SGarrett D'Amore 1344297a3b0SGarrett D'Amore } else if ((ch & 0xfc) == 0xf8) { 1354297a3b0SGarrett D'Amore mask = 0x03; 1364297a3b0SGarrett D'Amore want = 5; 1374297a3b0SGarrett D'Amore lbound = 0x200000; 1384297a3b0SGarrett D'Amore } else if ((ch & 0xfe) == 0xfc) { 1394297a3b0SGarrett D'Amore mask = 0x01; 1404297a3b0SGarrett D'Amore want = 6; 1414297a3b0SGarrett D'Amore lbound = 0x4000000; 1424297a3b0SGarrett D'Amore #endif 1434297a3b0SGarrett D'Amore } else { 1444297a3b0SGarrett D'Amore /* 1454297a3b0SGarrett D'Amore * Malformed input; input is not UTF-8. 1464297a3b0SGarrett D'Amore */ 1474297a3b0SGarrett D'Amore errno = EILSEQ; 1484297a3b0SGarrett D'Amore return ((size_t)-1); 1494297a3b0SGarrett D'Amore } 1504297a3b0SGarrett D'Amore } else { 1514297a3b0SGarrett D'Amore want = us->want; 1524297a3b0SGarrett D'Amore lbound = us->lbound; 1534297a3b0SGarrett D'Amore } 1544297a3b0SGarrett D'Amore 1554297a3b0SGarrett D'Amore /* 1564297a3b0SGarrett D'Amore * Decode the octet sequence representing the character in chunks 1574297a3b0SGarrett D'Amore * of 6 bits, most significant first. 1584297a3b0SGarrett D'Amore */ 1594297a3b0SGarrett D'Amore if (us->want == 0) 1604297a3b0SGarrett D'Amore wch = (unsigned char)*s++ & mask; 1614297a3b0SGarrett D'Amore else 1624297a3b0SGarrett D'Amore wch = us->ch; 1634297a3b0SGarrett D'Amore 1644297a3b0SGarrett D'Amore for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) { 1654297a3b0SGarrett D'Amore if ((*s & 0xc0) != 0x80) { 1664297a3b0SGarrett D'Amore /* 1674297a3b0SGarrett D'Amore * Malformed input; bad characters in the middle 1684297a3b0SGarrett D'Amore * of a character. 1694297a3b0SGarrett D'Amore */ 1704297a3b0SGarrett D'Amore errno = EILSEQ; 1714297a3b0SGarrett D'Amore return ((size_t)-1); 1724297a3b0SGarrett D'Amore } 1734297a3b0SGarrett D'Amore wch <<= 6; 1744297a3b0SGarrett D'Amore wch |= *s++ & 0x3f; 1754297a3b0SGarrett D'Amore } 1764297a3b0SGarrett D'Amore if (i < want) { 1774297a3b0SGarrett D'Amore /* Incomplete multibyte sequence. */ 1784297a3b0SGarrett D'Amore us->want = want - i; 1794297a3b0SGarrett D'Amore us->lbound = lbound; 1804297a3b0SGarrett D'Amore us->ch = wch; 1814297a3b0SGarrett D'Amore return ((size_t)-2); 1824297a3b0SGarrett D'Amore } 1834297a3b0SGarrett D'Amore if (wch < lbound) { 1844297a3b0SGarrett D'Amore /* 1854297a3b0SGarrett D'Amore * Malformed input; redundant encoding. 1864297a3b0SGarrett D'Amore */ 1874297a3b0SGarrett D'Amore errno = EILSEQ; 1884297a3b0SGarrett D'Amore return ((size_t)-1); 1894297a3b0SGarrett D'Amore } 1904297a3b0SGarrett D'Amore if (pwc != NULL) 1914297a3b0SGarrett D'Amore *pwc = wch; 1924297a3b0SGarrett D'Amore us->want = 0; 193*d8e0a9a1SRobert Mustacchi if (zero || wch != L'\0') { 194*d8e0a9a1SRobert Mustacchi return (want); 195*d8e0a9a1SRobert Mustacchi } else { 196*d8e0a9a1SRobert Mustacchi return (0); 197*d8e0a9a1SRobert Mustacchi } 1984297a3b0SGarrett D'Amore } 1994297a3b0SGarrett D'Amore 2004297a3b0SGarrett D'Amore static size_t 2014297a3b0SGarrett D'Amore _UTF8_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, const char **_RESTRICT_KYWD src, 2024297a3b0SGarrett D'Amore size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps) 2034297a3b0SGarrett D'Amore { 2044297a3b0SGarrett D'Amore _UTF8State *us; 2054297a3b0SGarrett D'Amore const char *s; 2064297a3b0SGarrett D'Amore size_t nchr; 2074297a3b0SGarrett D'Amore wchar_t wc; 2084297a3b0SGarrett D'Amore size_t nb; 2094297a3b0SGarrett D'Amore 2104297a3b0SGarrett D'Amore us = (_UTF8State *)ps; 2114297a3b0SGarrett D'Amore 2124297a3b0SGarrett D'Amore s = *src; 2134297a3b0SGarrett D'Amore nchr = 0; 2144297a3b0SGarrett D'Amore 2154297a3b0SGarrett D'Amore if (dst == NULL) { 2164297a3b0SGarrett D'Amore /* 2174297a3b0SGarrett D'Amore * The fast path in the loop below is not safe if an ASCII 2184297a3b0SGarrett D'Amore * character appears as anything but the first byte of a 2194297a3b0SGarrett D'Amore * multibyte sequence. Check now to avoid doing it in the loop. 2204297a3b0SGarrett D'Amore */ 2214297a3b0SGarrett D'Amore if (nms > 0 && us->want > 0 && (signed char)*s > 0) { 2224297a3b0SGarrett D'Amore errno = EILSEQ; 2234297a3b0SGarrett D'Amore return ((size_t)-1); 2244297a3b0SGarrett D'Amore } 2254297a3b0SGarrett D'Amore for (;;) { 226*d8e0a9a1SRobert Mustacchi if (nms > 0 && (signed char)*s > 0) { 2274297a3b0SGarrett D'Amore /* 2284297a3b0SGarrett D'Amore * Fast path for plain ASCII characters 2294297a3b0SGarrett D'Amore * excluding NUL. 2304297a3b0SGarrett D'Amore */ 2314297a3b0SGarrett D'Amore nb = 1; 232*d8e0a9a1SRobert Mustacchi } else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps, 233*d8e0a9a1SRobert Mustacchi B_FALSE)) == (size_t)-1) { 2344297a3b0SGarrett D'Amore /* Invalid sequence - mbrtowc() sets errno. */ 2354297a3b0SGarrett D'Amore return ((size_t)-1); 236*d8e0a9a1SRobert Mustacchi } else if (nb == 0 || nb == (size_t)-2) { 2374297a3b0SGarrett D'Amore return (nchr); 238*d8e0a9a1SRobert Mustacchi } 2394297a3b0SGarrett D'Amore s += nb; 2404297a3b0SGarrett D'Amore nms -= nb; 2414297a3b0SGarrett D'Amore nchr++; 2424297a3b0SGarrett D'Amore } 2434297a3b0SGarrett D'Amore /*NOTREACHED*/ 2444297a3b0SGarrett D'Amore } 2454297a3b0SGarrett D'Amore 2464297a3b0SGarrett D'Amore /* 2474297a3b0SGarrett D'Amore * The fast path in the loop below is not safe if an ASCII 2484297a3b0SGarrett D'Amore * character appears as anything but the first byte of a 2494297a3b0SGarrett D'Amore * multibyte sequence. Check now to avoid doing it in the loop. 2504297a3b0SGarrett D'Amore */ 2514297a3b0SGarrett D'Amore if (nms > 0 && len > 0 && us->want > 0 && (signed char)*s > 0) { 2524297a3b0SGarrett D'Amore errno = EILSEQ; 2534297a3b0SGarrett D'Amore return ((size_t)-1); 2544297a3b0SGarrett D'Amore } 2554297a3b0SGarrett D'Amore while (len-- > 0) { 2564297a3b0SGarrett D'Amore if (nms > 0 && (signed char)*s > 0) { 2574297a3b0SGarrett D'Amore /* 2584297a3b0SGarrett D'Amore * Fast path for plain ASCII characters 2594297a3b0SGarrett D'Amore * excluding NUL. 2604297a3b0SGarrett D'Amore */ 2614297a3b0SGarrett D'Amore *dst = (wchar_t)*s; 2624297a3b0SGarrett D'Amore nb = 1; 263*d8e0a9a1SRobert Mustacchi } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps, B_FALSE)) == 2644297a3b0SGarrett D'Amore (size_t)-1) { 2654297a3b0SGarrett D'Amore *src = s; 2664297a3b0SGarrett D'Amore return ((size_t)-1); 2674297a3b0SGarrett D'Amore } else if (nb == (size_t)-2) { 2684297a3b0SGarrett D'Amore *src = s + nms; 2694297a3b0SGarrett D'Amore return (nchr); 2704297a3b0SGarrett D'Amore } else if (nb == 0) { 2714297a3b0SGarrett D'Amore *src = NULL; 2724297a3b0SGarrett D'Amore return (nchr); 2734297a3b0SGarrett D'Amore } 2744297a3b0SGarrett D'Amore s += nb; 2754297a3b0SGarrett D'Amore nms -= nb; 2764297a3b0SGarrett D'Amore nchr++; 2774297a3b0SGarrett D'Amore dst++; 2784297a3b0SGarrett D'Amore } 2794297a3b0SGarrett D'Amore *src = s; 2804297a3b0SGarrett D'Amore return (nchr); 2814297a3b0SGarrett D'Amore } 2824297a3b0SGarrett D'Amore 2834297a3b0SGarrett D'Amore static size_t 2844297a3b0SGarrett D'Amore _UTF8_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, mbstate_t *_RESTRICT_KYWD ps) 2854297a3b0SGarrett D'Amore { 2864297a3b0SGarrett D'Amore _UTF8State *us; 2874297a3b0SGarrett D'Amore unsigned char lead; 2884297a3b0SGarrett D'Amore int i, len; 2894297a3b0SGarrett D'Amore 2904297a3b0SGarrett D'Amore us = (_UTF8State *)ps; 2914297a3b0SGarrett D'Amore 2924297a3b0SGarrett D'Amore if (us->want != 0) { 2934297a3b0SGarrett D'Amore errno = EINVAL; 2944297a3b0SGarrett D'Amore return ((size_t)-1); 2954297a3b0SGarrett D'Amore } 2964297a3b0SGarrett D'Amore 2974297a3b0SGarrett D'Amore if (s == NULL) 2984297a3b0SGarrett D'Amore /* Reset to initial shift state (no-op) */ 2994297a3b0SGarrett D'Amore return (1); 3004297a3b0SGarrett D'Amore 3014297a3b0SGarrett D'Amore /* 3024297a3b0SGarrett D'Amore * Determine the number of octets needed to represent this character. 3034297a3b0SGarrett D'Amore * We always output the shortest sequence possible. Also specify the 3044297a3b0SGarrett D'Amore * first few bits of the first octet, which contains the information 3054297a3b0SGarrett D'Amore * about the sequence length. 3064297a3b0SGarrett D'Amore */ 3074297a3b0SGarrett D'Amore if ((wc & ~0x7f) == 0) { 308475b496bSGarrett D'Amore /* Fast path for plain ASCII characters. */ 309475b496bSGarrett D'Amore *s = (char)wc; 310475b496bSGarrett D'Amore return (1); 3114297a3b0SGarrett D'Amore } else if ((wc & ~0x7ff) == 0) { 3124297a3b0SGarrett D'Amore lead = 0xc0; 3134297a3b0SGarrett D'Amore len = 2; 3144297a3b0SGarrett D'Amore } else if ((wc & ~0xffff) == 0) { 3154297a3b0SGarrett D'Amore lead = 0xe0; 3164297a3b0SGarrett D'Amore len = 3; 3174297a3b0SGarrett D'Amore } else if ((wc & ~0x1fffff) == 0) { 3184297a3b0SGarrett D'Amore lead = 0xf0; 3194297a3b0SGarrett D'Amore len = 4; 3204297a3b0SGarrett D'Amore #if 0 3214297a3b0SGarrett D'Amore /* Again, 5 and 6 byte encodings are simply not permitted */ 3224297a3b0SGarrett D'Amore } else if ((wc & ~0x3ffffff) == 0) { 3234297a3b0SGarrett D'Amore lead = 0xf8; 3244297a3b0SGarrett D'Amore len = 5; 3254297a3b0SGarrett D'Amore } else if ((wc & ~0x7fffffff) == 0) { 3264297a3b0SGarrett D'Amore lead = 0xfc; 3274297a3b0SGarrett D'Amore len = 6; 3284297a3b0SGarrett D'Amore #endif 3294297a3b0SGarrett D'Amore } else { 3304297a3b0SGarrett D'Amore errno = EILSEQ; 3314297a3b0SGarrett D'Amore return ((size_t)-1); 3324297a3b0SGarrett D'Amore } 3334297a3b0SGarrett D'Amore 3344297a3b0SGarrett D'Amore /* 3354297a3b0SGarrett D'Amore * Output the octets representing the character in chunks 3364297a3b0SGarrett D'Amore * of 6 bits, least significant last. The first octet is 3374297a3b0SGarrett D'Amore * a special case because it contains the sequence length 3384297a3b0SGarrett D'Amore * information. 3394297a3b0SGarrett D'Amore */ 3404297a3b0SGarrett D'Amore for (i = len - 1; i > 0; i--) { 3414297a3b0SGarrett D'Amore s[i] = (wc & 0x3f) | 0x80; 3424297a3b0SGarrett D'Amore wc >>= 6; 3434297a3b0SGarrett D'Amore } 3444297a3b0SGarrett D'Amore *s = (wc & 0xff) | lead; 3454297a3b0SGarrett D'Amore 3464297a3b0SGarrett D'Amore return (len); 3474297a3b0SGarrett D'Amore } 3484297a3b0SGarrett D'Amore 3494297a3b0SGarrett D'Amore static size_t 3504297a3b0SGarrett D'Amore _UTF8_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, 3514297a3b0SGarrett D'Amore size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps) 3524297a3b0SGarrett D'Amore { 3534297a3b0SGarrett D'Amore _UTF8State *us; 3544297a3b0SGarrett D'Amore char buf[MB_LEN_MAX]; 3554297a3b0SGarrett D'Amore const wchar_t *s; 3564297a3b0SGarrett D'Amore size_t nbytes; 3574297a3b0SGarrett D'Amore size_t nb; 3584297a3b0SGarrett D'Amore 3594297a3b0SGarrett D'Amore us = (_UTF8State *)ps; 3604297a3b0SGarrett D'Amore 3614297a3b0SGarrett D'Amore if (us->want != 0) { 3624297a3b0SGarrett D'Amore errno = EINVAL; 3634297a3b0SGarrett D'Amore return ((size_t)-1); 3644297a3b0SGarrett D'Amore } 3654297a3b0SGarrett D'Amore 3664297a3b0SGarrett D'Amore s = *src; 3674297a3b0SGarrett D'Amore nbytes = 0; 3684297a3b0SGarrett D'Amore 3694297a3b0SGarrett D'Amore if (dst == NULL) { 3704297a3b0SGarrett D'Amore while (nwc-- > 0) { 3714297a3b0SGarrett D'Amore if (0 <= *s && *s < 0x80) 3724297a3b0SGarrett D'Amore /* Fast path for plain ASCII characters. */ 3734297a3b0SGarrett D'Amore nb = 1; 3744297a3b0SGarrett D'Amore else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == 3754297a3b0SGarrett D'Amore (size_t)-1) 3764297a3b0SGarrett D'Amore /* Invalid character - wcrtomb() sets errno. */ 3774297a3b0SGarrett D'Amore return ((size_t)-1); 3784297a3b0SGarrett D'Amore if (*s == L'\0') 3794297a3b0SGarrett D'Amore return (nbytes + nb - 1); 3804297a3b0SGarrett D'Amore s++; 3814297a3b0SGarrett D'Amore nbytes += nb; 3824297a3b0SGarrett D'Amore } 3834297a3b0SGarrett D'Amore return (nbytes); 3844297a3b0SGarrett D'Amore } 3854297a3b0SGarrett D'Amore 3864297a3b0SGarrett D'Amore while (len > 0 && nwc-- > 0) { 3874297a3b0SGarrett D'Amore if (0 <= *s && *s < 0x80) { 3884297a3b0SGarrett D'Amore /* Fast path for plain ASCII characters. */ 3894297a3b0SGarrett D'Amore nb = 1; 3904297a3b0SGarrett D'Amore *dst = *s; 3914297a3b0SGarrett D'Amore } else if (len > (size_t)MB_CUR_MAX) { 3924297a3b0SGarrett D'Amore /* Enough space to translate in-place. */ 3934297a3b0SGarrett D'Amore if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) { 3944297a3b0SGarrett D'Amore *src = s; 3954297a3b0SGarrett D'Amore return ((size_t)-1); 3964297a3b0SGarrett D'Amore } 3974297a3b0SGarrett D'Amore } else { 3984297a3b0SGarrett D'Amore /* 3994297a3b0SGarrett D'Amore * May not be enough space; use temp. buffer. 4004297a3b0SGarrett D'Amore */ 4014297a3b0SGarrett D'Amore if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) { 4024297a3b0SGarrett D'Amore *src = s; 4034297a3b0SGarrett D'Amore return ((size_t)-1); 4044297a3b0SGarrett D'Amore } 4054297a3b0SGarrett D'Amore if (nb > (int)len) 4064297a3b0SGarrett D'Amore /* MB sequence for character won't fit. */ 4074297a3b0SGarrett D'Amore break; 4084297a3b0SGarrett D'Amore (void) memcpy(dst, buf, nb); 4094297a3b0SGarrett D'Amore } 4104297a3b0SGarrett D'Amore if (*s == L'\0') { 4114297a3b0SGarrett D'Amore *src = NULL; 4124297a3b0SGarrett D'Amore return (nbytes + nb - 1); 4134297a3b0SGarrett D'Amore } 4144297a3b0SGarrett D'Amore s++; 4154297a3b0SGarrett D'Amore dst += nb; 4164297a3b0SGarrett D'Amore len -= nb; 4174297a3b0SGarrett D'Amore nbytes += nb; 4184297a3b0SGarrett D'Amore } 4194297a3b0SGarrett D'Amore *src = s; 4204297a3b0SGarrett D'Amore return (nbytes); 4214297a3b0SGarrett D'Amore } 422