14297a3b0SGarrett D'Amore /*
2*2d08521bSGarrett D'Amore * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3475b496bSGarrett D'Amore * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
44297a3b0SGarrett D'Amore * Copyright (c) 2002-2004 Tim J. Robbins
54297a3b0SGarrett D'Amore * All rights reserved.
64297a3b0SGarrett D'Amore *
74297a3b0SGarrett D'Amore * Redistribution and use in source and binary forms, with or without
84297a3b0SGarrett D'Amore * modification, are permitted provided that the following conditions
94297a3b0SGarrett D'Amore * are met:
104297a3b0SGarrett D'Amore * 1. Redistributions of source code must retain the above copyright
114297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer.
124297a3b0SGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright
134297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer in the
144297a3b0SGarrett D'Amore * documentation and/or other materials provided with the distribution.
154297a3b0SGarrett D'Amore *
164297a3b0SGarrett D'Amore * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
174297a3b0SGarrett D'Amore * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
184297a3b0SGarrett D'Amore * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
194297a3b0SGarrett D'Amore * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
204297a3b0SGarrett D'Amore * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
214297a3b0SGarrett D'Amore * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
224297a3b0SGarrett D'Amore * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
234297a3b0SGarrett D'Amore * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
244297a3b0SGarrett D'Amore * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
254297a3b0SGarrett D'Amore * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
264297a3b0SGarrett D'Amore * SUCH DAMAGE.
274297a3b0SGarrett D'Amore */
284297a3b0SGarrett D'Amore
294297a3b0SGarrett D'Amore #include "lint.h"
304297a3b0SGarrett D'Amore #include <errno.h>
314297a3b0SGarrett D'Amore #include <limits.h>
324297a3b0SGarrett D'Amore #include <stdlib.h>
334297a3b0SGarrett D'Amore #include <string.h>
344297a3b0SGarrett D'Amore #include <wchar.h>
354297a3b0SGarrett D'Amore #include "mblocal.h"
36*2d08521bSGarrett D'Amore #include "lctype.h"
374297a3b0SGarrett D'Amore
384297a3b0SGarrett D'Amore static size_t _UTF8_mbrtowc(wchar_t *_RESTRICT_KYWD,
394297a3b0SGarrett D'Amore const char *_RESTRICT_KYWD,
404297a3b0SGarrett D'Amore size_t, mbstate_t *_RESTRICT_KYWD);
414297a3b0SGarrett D'Amore static int _UTF8_mbsinit(const mbstate_t *);
424297a3b0SGarrett D'Amore static size_t _UTF8_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
434297a3b0SGarrett D'Amore const char **_RESTRICT_KYWD, size_t, size_t,
444297a3b0SGarrett D'Amore mbstate_t *_RESTRICT_KYWD);
454297a3b0SGarrett D'Amore static size_t _UTF8_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
464297a3b0SGarrett D'Amore mbstate_t *_RESTRICT_KYWD);
474297a3b0SGarrett D'Amore static size_t _UTF8_wcsnrtombs(char *_RESTRICT_KYWD,
484297a3b0SGarrett D'Amore const wchar_t **_RESTRICT_KYWD,
494297a3b0SGarrett D'Amore size_t, size_t, mbstate_t *_RESTRICT_KYWD);
504297a3b0SGarrett D'Amore
514297a3b0SGarrett D'Amore typedef struct {
524297a3b0SGarrett D'Amore wchar_t ch;
534297a3b0SGarrett D'Amore int want;
544297a3b0SGarrett D'Amore wchar_t lbound;
554297a3b0SGarrett D'Amore } _UTF8State;
564297a3b0SGarrett D'Amore
57*2d08521bSGarrett D'Amore void
_UTF8_init(struct lc_ctype * lct)58*2d08521bSGarrett D'Amore _UTF8_init(struct lc_ctype *lct)
594297a3b0SGarrett D'Amore {
60*2d08521bSGarrett D'Amore lct->lc_mbrtowc = _UTF8_mbrtowc;
61*2d08521bSGarrett D'Amore lct->lc_wcrtomb = _UTF8_wcrtomb;
62*2d08521bSGarrett D'Amore lct->lc_mbsinit = _UTF8_mbsinit;
63*2d08521bSGarrett D'Amore lct->lc_mbsnrtowcs = _UTF8_mbsnrtowcs;
64*2d08521bSGarrett D'Amore lct->lc_wcsnrtombs = _UTF8_wcsnrtombs;
65*2d08521bSGarrett D'Amore lct->lc_is_ascii = 0;
66*2d08521bSGarrett D'Amore lct->lc_max_mblen = 4;
674297a3b0SGarrett D'Amore }
684297a3b0SGarrett D'Amore
694297a3b0SGarrett D'Amore static int
_UTF8_mbsinit(const mbstate_t * ps)704297a3b0SGarrett D'Amore _UTF8_mbsinit(const mbstate_t *ps)
714297a3b0SGarrett D'Amore {
724297a3b0SGarrett D'Amore
734297a3b0SGarrett D'Amore return (ps == NULL || ((const _UTF8State *)ps)->want == 0);
744297a3b0SGarrett D'Amore }
754297a3b0SGarrett D'Amore
764297a3b0SGarrett D'Amore static size_t
_UTF8_mbrtowc(wchar_t * _RESTRICT_KYWD pwc,const char * _RESTRICT_KYWD s,size_t n,mbstate_t * _RESTRICT_KYWD ps)774297a3b0SGarrett D'Amore _UTF8_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
784297a3b0SGarrett D'Amore size_t n, mbstate_t *_RESTRICT_KYWD ps)
794297a3b0SGarrett D'Amore {
804297a3b0SGarrett D'Amore _UTF8State *us;
814297a3b0SGarrett D'Amore int ch, i, mask, want;
824297a3b0SGarrett D'Amore wchar_t lbound, wch;
834297a3b0SGarrett D'Amore
844297a3b0SGarrett D'Amore us = (_UTF8State *)ps;
854297a3b0SGarrett D'Amore
864297a3b0SGarrett D'Amore if (us->want < 0 || us->want > 6) {
874297a3b0SGarrett D'Amore errno = EINVAL;
884297a3b0SGarrett D'Amore return ((size_t)-1);
894297a3b0SGarrett D'Amore }
904297a3b0SGarrett D'Amore
914297a3b0SGarrett D'Amore if (s == NULL) {
924297a3b0SGarrett D'Amore s = "";
934297a3b0SGarrett D'Amore n = 1;
944297a3b0SGarrett D'Amore pwc = NULL;
954297a3b0SGarrett D'Amore }
964297a3b0SGarrett D'Amore
974297a3b0SGarrett D'Amore if (n == 0)
984297a3b0SGarrett D'Amore /* Incomplete multibyte sequence */
994297a3b0SGarrett D'Amore return ((size_t)-2);
1004297a3b0SGarrett D'Amore
1014297a3b0SGarrett D'Amore if (us->want == 0) {
1024297a3b0SGarrett D'Amore /*
1034297a3b0SGarrett D'Amore * Determine the number of octets that make up this character
1044297a3b0SGarrett D'Amore * from the first octet, and a mask that extracts the
1054297a3b0SGarrett D'Amore * interesting bits of the first octet. We already know
1064297a3b0SGarrett D'Amore * the character is at least two bytes long.
1074297a3b0SGarrett D'Amore *
1084297a3b0SGarrett D'Amore * We also specify a lower bound for the character code to
1094297a3b0SGarrett D'Amore * detect redundant, non-"shortest form" encodings. For
1104297a3b0SGarrett D'Amore * example, the sequence C0 80 is _not_ a legal representation
1114297a3b0SGarrett D'Amore * of the null character. This enforces a 1-to-1 mapping
1124297a3b0SGarrett D'Amore * between character codes and their multibyte representations.
1134297a3b0SGarrett D'Amore */
1144297a3b0SGarrett D'Amore ch = (unsigned char)*s;
1154297a3b0SGarrett D'Amore if ((ch & 0x80) == 0) {
116475b496bSGarrett D'Amore /* Fast path for plain ASCII characters. */
117475b496bSGarrett D'Amore if (pwc != NULL)
118475b496bSGarrett D'Amore *pwc = ch;
119475b496bSGarrett D'Amore return (ch != '\0' ? 1 : 0);
120475b496bSGarrett D'Amore }
121475b496bSGarrett D'Amore if ((ch & 0xe0) == 0xc0) {
1224297a3b0SGarrett D'Amore mask = 0x1f;
1234297a3b0SGarrett D'Amore want = 2;
1244297a3b0SGarrett D'Amore lbound = 0x80;
1254297a3b0SGarrett D'Amore } else if ((ch & 0xf0) == 0xe0) {
1264297a3b0SGarrett D'Amore mask = 0x0f;
1274297a3b0SGarrett D'Amore want = 3;
1284297a3b0SGarrett D'Amore lbound = 0x800;
1294297a3b0SGarrett D'Amore } else if ((ch & 0xf8) == 0xf0) {
1304297a3b0SGarrett D'Amore mask = 0x07;
1314297a3b0SGarrett D'Amore want = 4;
1324297a3b0SGarrett D'Amore lbound = 0x10000;
1334297a3b0SGarrett D'Amore #if 0
1344297a3b0SGarrett D'Amore /* These would be illegal in the UTF-8 space */
1354297a3b0SGarrett D'Amore
1364297a3b0SGarrett D'Amore } else if ((ch & 0xfc) == 0xf8) {
1374297a3b0SGarrett D'Amore mask = 0x03;
1384297a3b0SGarrett D'Amore want = 5;
1394297a3b0SGarrett D'Amore lbound = 0x200000;
1404297a3b0SGarrett D'Amore } else if ((ch & 0xfe) == 0xfc) {
1414297a3b0SGarrett D'Amore mask = 0x01;
1424297a3b0SGarrett D'Amore want = 6;
1434297a3b0SGarrett D'Amore lbound = 0x4000000;
1444297a3b0SGarrett D'Amore #endif
1454297a3b0SGarrett D'Amore } else {
1464297a3b0SGarrett D'Amore /*
1474297a3b0SGarrett D'Amore * Malformed input; input is not UTF-8.
1484297a3b0SGarrett D'Amore */
1494297a3b0SGarrett D'Amore errno = EILSEQ;
1504297a3b0SGarrett D'Amore return ((size_t)-1);
1514297a3b0SGarrett D'Amore }
1524297a3b0SGarrett D'Amore } else {
1534297a3b0SGarrett D'Amore want = us->want;
1544297a3b0SGarrett D'Amore lbound = us->lbound;
1554297a3b0SGarrett D'Amore }
1564297a3b0SGarrett D'Amore
1574297a3b0SGarrett D'Amore /*
1584297a3b0SGarrett D'Amore * Decode the octet sequence representing the character in chunks
1594297a3b0SGarrett D'Amore * of 6 bits, most significant first.
1604297a3b0SGarrett D'Amore */
1614297a3b0SGarrett D'Amore if (us->want == 0)
1624297a3b0SGarrett D'Amore wch = (unsigned char)*s++ & mask;
1634297a3b0SGarrett D'Amore else
1644297a3b0SGarrett D'Amore wch = us->ch;
1654297a3b0SGarrett D'Amore
1664297a3b0SGarrett D'Amore for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) {
1674297a3b0SGarrett D'Amore if ((*s & 0xc0) != 0x80) {
1684297a3b0SGarrett D'Amore /*
1694297a3b0SGarrett D'Amore * Malformed input; bad characters in the middle
1704297a3b0SGarrett D'Amore * of a character.
1714297a3b0SGarrett D'Amore */
1724297a3b0SGarrett D'Amore errno = EILSEQ;
1734297a3b0SGarrett D'Amore return ((size_t)-1);
1744297a3b0SGarrett D'Amore }
1754297a3b0SGarrett D'Amore wch <<= 6;
1764297a3b0SGarrett D'Amore wch |= *s++ & 0x3f;
1774297a3b0SGarrett D'Amore }
1784297a3b0SGarrett D'Amore if (i < want) {
1794297a3b0SGarrett D'Amore /* Incomplete multibyte sequence. */
1804297a3b0SGarrett D'Amore us->want = want - i;
1814297a3b0SGarrett D'Amore us->lbound = lbound;
1824297a3b0SGarrett D'Amore us->ch = wch;
1834297a3b0SGarrett D'Amore return ((size_t)-2);
1844297a3b0SGarrett D'Amore }
1854297a3b0SGarrett D'Amore if (wch < lbound) {
1864297a3b0SGarrett D'Amore /*
1874297a3b0SGarrett D'Amore * Malformed input; redundant encoding.
1884297a3b0SGarrett D'Amore */
1894297a3b0SGarrett D'Amore errno = EILSEQ;
1904297a3b0SGarrett D'Amore return ((size_t)-1);
1914297a3b0SGarrett D'Amore }
1924297a3b0SGarrett D'Amore if (pwc != NULL)
1934297a3b0SGarrett D'Amore *pwc = wch;
1944297a3b0SGarrett D'Amore us->want = 0;
1954297a3b0SGarrett D'Amore return (wch == L'\0' ? 0 : want);
1964297a3b0SGarrett D'Amore }
1974297a3b0SGarrett D'Amore
1984297a3b0SGarrett D'Amore static size_t
_UTF8_mbsnrtowcs(wchar_t * _RESTRICT_KYWD dst,const char ** _RESTRICT_KYWD src,size_t nms,size_t len,mbstate_t * _RESTRICT_KYWD ps)1994297a3b0SGarrett D'Amore _UTF8_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, const char **_RESTRICT_KYWD src,
2004297a3b0SGarrett D'Amore size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
2014297a3b0SGarrett D'Amore {
2024297a3b0SGarrett D'Amore _UTF8State *us;
2034297a3b0SGarrett D'Amore const char *s;
2044297a3b0SGarrett D'Amore size_t nchr;
2054297a3b0SGarrett D'Amore wchar_t wc;
2064297a3b0SGarrett D'Amore size_t nb;
2074297a3b0SGarrett D'Amore
2084297a3b0SGarrett D'Amore us = (_UTF8State *)ps;
2094297a3b0SGarrett D'Amore
2104297a3b0SGarrett D'Amore s = *src;
2114297a3b0SGarrett D'Amore nchr = 0;
2124297a3b0SGarrett D'Amore
2134297a3b0SGarrett D'Amore if (dst == NULL) {
2144297a3b0SGarrett D'Amore /*
2154297a3b0SGarrett D'Amore * The fast path in the loop below is not safe if an ASCII
2164297a3b0SGarrett D'Amore * character appears as anything but the first byte of a
2174297a3b0SGarrett D'Amore * multibyte sequence. Check now to avoid doing it in the loop.
2184297a3b0SGarrett D'Amore */
2194297a3b0SGarrett D'Amore if (nms > 0 && us->want > 0 && (signed char)*s > 0) {
2204297a3b0SGarrett D'Amore errno = EILSEQ;
2214297a3b0SGarrett D'Amore return ((size_t)-1);
2224297a3b0SGarrett D'Amore }
2234297a3b0SGarrett D'Amore for (;;) {
2244297a3b0SGarrett D'Amore if (nms > 0 && (signed char)*s > 0)
2254297a3b0SGarrett D'Amore /*
2264297a3b0SGarrett D'Amore * Fast path for plain ASCII characters
2274297a3b0SGarrett D'Amore * excluding NUL.
2284297a3b0SGarrett D'Amore */
2294297a3b0SGarrett D'Amore nb = 1;
2304297a3b0SGarrett D'Amore else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) ==
2314297a3b0SGarrett D'Amore (size_t)-1)
2324297a3b0SGarrett D'Amore /* Invalid sequence - mbrtowc() sets errno. */
2334297a3b0SGarrett D'Amore return ((size_t)-1);
2344297a3b0SGarrett D'Amore else if (nb == 0 || nb == (size_t)-2)
2354297a3b0SGarrett D'Amore return (nchr);
2364297a3b0SGarrett D'Amore s += nb;
2374297a3b0SGarrett D'Amore nms -= nb;
2384297a3b0SGarrett D'Amore nchr++;
2394297a3b0SGarrett D'Amore }
2404297a3b0SGarrett D'Amore /*NOTREACHED*/
2414297a3b0SGarrett D'Amore }
2424297a3b0SGarrett D'Amore
2434297a3b0SGarrett D'Amore /*
2444297a3b0SGarrett D'Amore * The fast path in the loop below is not safe if an ASCII
2454297a3b0SGarrett D'Amore * character appears as anything but the first byte of a
2464297a3b0SGarrett D'Amore * multibyte sequence. Check now to avoid doing it in the loop.
2474297a3b0SGarrett D'Amore */
2484297a3b0SGarrett D'Amore if (nms > 0 && len > 0 && us->want > 0 && (signed char)*s > 0) {
2494297a3b0SGarrett D'Amore errno = EILSEQ;
2504297a3b0SGarrett D'Amore return ((size_t)-1);
2514297a3b0SGarrett D'Amore }
2524297a3b0SGarrett D'Amore while (len-- > 0) {
2534297a3b0SGarrett D'Amore if (nms > 0 && (signed char)*s > 0) {
2544297a3b0SGarrett D'Amore /*
2554297a3b0SGarrett D'Amore * Fast path for plain ASCII characters
2564297a3b0SGarrett D'Amore * excluding NUL.
2574297a3b0SGarrett D'Amore */
2584297a3b0SGarrett D'Amore *dst = (wchar_t)*s;
2594297a3b0SGarrett D'Amore nb = 1;
2604297a3b0SGarrett D'Amore } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) ==
2614297a3b0SGarrett D'Amore (size_t)-1) {
2624297a3b0SGarrett D'Amore *src = s;
2634297a3b0SGarrett D'Amore return ((size_t)-1);
2644297a3b0SGarrett D'Amore } else if (nb == (size_t)-2) {
2654297a3b0SGarrett D'Amore *src = s + nms;
2664297a3b0SGarrett D'Amore return (nchr);
2674297a3b0SGarrett D'Amore } else if (nb == 0) {
2684297a3b0SGarrett D'Amore *src = NULL;
2694297a3b0SGarrett D'Amore return (nchr);
2704297a3b0SGarrett D'Amore }
2714297a3b0SGarrett D'Amore s += nb;
2724297a3b0SGarrett D'Amore nms -= nb;
2734297a3b0SGarrett D'Amore nchr++;
2744297a3b0SGarrett D'Amore dst++;
2754297a3b0SGarrett D'Amore }
2764297a3b0SGarrett D'Amore *src = s;
2774297a3b0SGarrett D'Amore return (nchr);
2784297a3b0SGarrett D'Amore }
2794297a3b0SGarrett D'Amore
2804297a3b0SGarrett D'Amore static size_t
_UTF8_wcrtomb(char * _RESTRICT_KYWD s,wchar_t wc,mbstate_t * _RESTRICT_KYWD ps)2814297a3b0SGarrett D'Amore _UTF8_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, mbstate_t *_RESTRICT_KYWD ps)
2824297a3b0SGarrett D'Amore {
2834297a3b0SGarrett D'Amore _UTF8State *us;
2844297a3b0SGarrett D'Amore unsigned char lead;
2854297a3b0SGarrett D'Amore int i, len;
2864297a3b0SGarrett D'Amore
2874297a3b0SGarrett D'Amore us = (_UTF8State *)ps;
2884297a3b0SGarrett D'Amore
2894297a3b0SGarrett D'Amore if (us->want != 0) {
2904297a3b0SGarrett D'Amore errno = EINVAL;
2914297a3b0SGarrett D'Amore return ((size_t)-1);
2924297a3b0SGarrett D'Amore }
2934297a3b0SGarrett D'Amore
2944297a3b0SGarrett D'Amore if (s == NULL)
2954297a3b0SGarrett D'Amore /* Reset to initial shift state (no-op) */
2964297a3b0SGarrett D'Amore return (1);
2974297a3b0SGarrett D'Amore
2984297a3b0SGarrett D'Amore /*
2994297a3b0SGarrett D'Amore * Determine the number of octets needed to represent this character.
3004297a3b0SGarrett D'Amore * We always output the shortest sequence possible. Also specify the
3014297a3b0SGarrett D'Amore * first few bits of the first octet, which contains the information
3024297a3b0SGarrett D'Amore * about the sequence length.
3034297a3b0SGarrett D'Amore */
3044297a3b0SGarrett D'Amore if ((wc & ~0x7f) == 0) {
305475b496bSGarrett D'Amore /* Fast path for plain ASCII characters. */
306475b496bSGarrett D'Amore *s = (char)wc;
307475b496bSGarrett D'Amore return (1);
3084297a3b0SGarrett D'Amore } else if ((wc & ~0x7ff) == 0) {
3094297a3b0SGarrett D'Amore lead = 0xc0;
3104297a3b0SGarrett D'Amore len = 2;
3114297a3b0SGarrett D'Amore } else if ((wc & ~0xffff) == 0) {
3124297a3b0SGarrett D'Amore lead = 0xe0;
3134297a3b0SGarrett D'Amore len = 3;
3144297a3b0SGarrett D'Amore } else if ((wc & ~0x1fffff) == 0) {
3154297a3b0SGarrett D'Amore lead = 0xf0;
3164297a3b0SGarrett D'Amore len = 4;
3174297a3b0SGarrett D'Amore #if 0
3184297a3b0SGarrett D'Amore /* Again, 5 and 6 byte encodings are simply not permitted */
3194297a3b0SGarrett D'Amore } else if ((wc & ~0x3ffffff) == 0) {
3204297a3b0SGarrett D'Amore lead = 0xf8;
3214297a3b0SGarrett D'Amore len = 5;
3224297a3b0SGarrett D'Amore } else if ((wc & ~0x7fffffff) == 0) {
3234297a3b0SGarrett D'Amore lead = 0xfc;
3244297a3b0SGarrett D'Amore len = 6;
3254297a3b0SGarrett D'Amore #endif
3264297a3b0SGarrett D'Amore } else {
3274297a3b0SGarrett D'Amore errno = EILSEQ;
3284297a3b0SGarrett D'Amore return ((size_t)-1);
3294297a3b0SGarrett D'Amore }
3304297a3b0SGarrett D'Amore
3314297a3b0SGarrett D'Amore /*
3324297a3b0SGarrett D'Amore * Output the octets representing the character in chunks
3334297a3b0SGarrett D'Amore * of 6 bits, least significant last. The first octet is
3344297a3b0SGarrett D'Amore * a special case because it contains the sequence length
3354297a3b0SGarrett D'Amore * information.
3364297a3b0SGarrett D'Amore */
3374297a3b0SGarrett D'Amore for (i = len - 1; i > 0; i--) {
3384297a3b0SGarrett D'Amore s[i] = (wc & 0x3f) | 0x80;
3394297a3b0SGarrett D'Amore wc >>= 6;
3404297a3b0SGarrett D'Amore }
3414297a3b0SGarrett D'Amore *s = (wc & 0xff) | lead;
3424297a3b0SGarrett D'Amore
3434297a3b0SGarrett D'Amore return (len);
3444297a3b0SGarrett D'Amore }
3454297a3b0SGarrett D'Amore
3464297a3b0SGarrett D'Amore static size_t
_UTF8_wcsnrtombs(char * _RESTRICT_KYWD dst,const wchar_t ** _RESTRICT_KYWD src,size_t nwc,size_t len,mbstate_t * _RESTRICT_KYWD ps)3474297a3b0SGarrett D'Amore _UTF8_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
3484297a3b0SGarrett D'Amore size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
3494297a3b0SGarrett D'Amore {
3504297a3b0SGarrett D'Amore _UTF8State *us;
3514297a3b0SGarrett D'Amore char buf[MB_LEN_MAX];
3524297a3b0SGarrett D'Amore const wchar_t *s;
3534297a3b0SGarrett D'Amore size_t nbytes;
3544297a3b0SGarrett D'Amore size_t nb;
3554297a3b0SGarrett D'Amore
3564297a3b0SGarrett D'Amore us = (_UTF8State *)ps;
3574297a3b0SGarrett D'Amore
3584297a3b0SGarrett D'Amore if (us->want != 0) {
3594297a3b0SGarrett D'Amore errno = EINVAL;
3604297a3b0SGarrett D'Amore return ((size_t)-1);
3614297a3b0SGarrett D'Amore }
3624297a3b0SGarrett D'Amore
3634297a3b0SGarrett D'Amore s = *src;
3644297a3b0SGarrett D'Amore nbytes = 0;
3654297a3b0SGarrett D'Amore
3664297a3b0SGarrett D'Amore if (dst == NULL) {
3674297a3b0SGarrett D'Amore while (nwc-- > 0) {
3684297a3b0SGarrett D'Amore if (0 <= *s && *s < 0x80)
3694297a3b0SGarrett D'Amore /* Fast path for plain ASCII characters. */
3704297a3b0SGarrett D'Amore nb = 1;
3714297a3b0SGarrett D'Amore else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) ==
3724297a3b0SGarrett D'Amore (size_t)-1)
3734297a3b0SGarrett D'Amore /* Invalid character - wcrtomb() sets errno. */
3744297a3b0SGarrett D'Amore return ((size_t)-1);
3754297a3b0SGarrett D'Amore if (*s == L'\0')
3764297a3b0SGarrett D'Amore return (nbytes + nb - 1);
3774297a3b0SGarrett D'Amore s++;
3784297a3b0SGarrett D'Amore nbytes += nb;
3794297a3b0SGarrett D'Amore }
3804297a3b0SGarrett D'Amore return (nbytes);
3814297a3b0SGarrett D'Amore }
3824297a3b0SGarrett D'Amore
3834297a3b0SGarrett D'Amore while (len > 0 && nwc-- > 0) {
3844297a3b0SGarrett D'Amore if (0 <= *s && *s < 0x80) {
3854297a3b0SGarrett D'Amore /* Fast path for plain ASCII characters. */
3864297a3b0SGarrett D'Amore nb = 1;
3874297a3b0SGarrett D'Amore *dst = *s;
3884297a3b0SGarrett D'Amore } else if (len > (size_t)MB_CUR_MAX) {
3894297a3b0SGarrett D'Amore /* Enough space to translate in-place. */
3904297a3b0SGarrett D'Amore if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) {
3914297a3b0SGarrett D'Amore *src = s;
3924297a3b0SGarrett D'Amore return ((size_t)-1);
3934297a3b0SGarrett D'Amore }
3944297a3b0SGarrett D'Amore } else {
3954297a3b0SGarrett D'Amore /*
3964297a3b0SGarrett D'Amore * May not be enough space; use temp. buffer.
3974297a3b0SGarrett D'Amore */
3984297a3b0SGarrett D'Amore if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) {
3994297a3b0SGarrett D'Amore *src = s;
4004297a3b0SGarrett D'Amore return ((size_t)-1);
4014297a3b0SGarrett D'Amore }
4024297a3b0SGarrett D'Amore if (nb > (int)len)
4034297a3b0SGarrett D'Amore /* MB sequence for character won't fit. */
4044297a3b0SGarrett D'Amore break;
4054297a3b0SGarrett D'Amore (void) memcpy(dst, buf, nb);
4064297a3b0SGarrett D'Amore }
4074297a3b0SGarrett D'Amore if (*s == L'\0') {
4084297a3b0SGarrett D'Amore *src = NULL;
4094297a3b0SGarrett D'Amore return (nbytes + nb - 1);
4104297a3b0SGarrett D'Amore }
4114297a3b0SGarrett D'Amore s++;
4124297a3b0SGarrett D'Amore dst += nb;
4134297a3b0SGarrett D'Amore len -= nb;
4144297a3b0SGarrett D'Amore nbytes += nb;
4154297a3b0SGarrett D'Amore }
4164297a3b0SGarrett D'Amore *src = s;
4174297a3b0SGarrett D'Amore return (nbytes);
4184297a3b0SGarrett D'Amore }
419