14297a3b0SGarrett D'Amore /* 22d08521bSGarrett D'Amore * Copyright 2013 Garrett D'Amore <garrett@damore.org> 36b5e5868SGarrett D'Amore * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 44297a3b0SGarrett D'Amore * Copyright (c) 2004 Tim J. Robbins. All rights reserved. 54297a3b0SGarrett D'Amore * Copyright (c) 2003 David Xu <davidxu@freebsd.org> 64297a3b0SGarrett D'Amore * All rights reserved. 74297a3b0SGarrett D'Amore * 84297a3b0SGarrett D'Amore * Redistribution and use in source and binary forms, with or without 94297a3b0SGarrett D'Amore * modification, are permitted provided that the following conditions 104297a3b0SGarrett D'Amore * are met: 114297a3b0SGarrett D'Amore * 1. Redistributions of source code must retain the above copyright 124297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer. 134297a3b0SGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright 144297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer in the 154297a3b0SGarrett D'Amore * documentation and/or other materials provided with the distribution. 164297a3b0SGarrett D'Amore * 174297a3b0SGarrett D'Amore * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 184297a3b0SGarrett D'Amore * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 194297a3b0SGarrett D'Amore * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 204297a3b0SGarrett D'Amore * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 214297a3b0SGarrett D'Amore * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 224297a3b0SGarrett D'Amore * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 234297a3b0SGarrett D'Amore * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 244297a3b0SGarrett D'Amore * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 254297a3b0SGarrett D'Amore * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 264297a3b0SGarrett D'Amore * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 274297a3b0SGarrett D'Amore * SUCH DAMAGE. 284297a3b0SGarrett D'Amore */ 294297a3b0SGarrett D'Amore 304297a3b0SGarrett D'Amore #include "lint.h" 314297a3b0SGarrett D'Amore #include <sys/types.h> 324297a3b0SGarrett D'Amore #include <errno.h> 334297a3b0SGarrett D'Amore #include "runetype.h" 344297a3b0SGarrett D'Amore #include <stdlib.h> 354297a3b0SGarrett D'Amore #include <string.h> 364297a3b0SGarrett D'Amore #include <wchar.h> 374297a3b0SGarrett D'Amore #include "mblocal.h" 382d08521bSGarrett D'Amore #include "lctype.h" 394297a3b0SGarrett D'Amore 404297a3b0SGarrett D'Amore static size_t _GB2312_mbrtowc(wchar_t *_RESTRICT_KYWD, 414297a3b0SGarrett D'Amore const char *_RESTRICT_KYWD, 42*d8e0a9a1SRobert Mustacchi size_t, mbstate_t *_RESTRICT_KYWD, boolean_t); 434297a3b0SGarrett D'Amore static int _GB2312_mbsinit(const mbstate_t *); 444297a3b0SGarrett D'Amore static size_t _GB2312_wcrtomb(char *_RESTRICT_KYWD, wchar_t, 454297a3b0SGarrett D'Amore mbstate_t *_RESTRICT_KYWD); 462d08521bSGarrett D'Amore static size_t _GB2312_mbsnrtowcs(wchar_t *_RESTRICT_KYWD, 472d08521bSGarrett D'Amore const char **_RESTRICT_KYWD, size_t, size_t, 482d08521bSGarrett D'Amore mbstate_t *_RESTRICT_KYWD); 492d08521bSGarrett D'Amore static size_t _GB2312_wcsnrtombs(char *_RESTRICT_KYWD, 502d08521bSGarrett D'Amore const wchar_t **_RESTRICT_KYWD, size_t, size_t, 512d08521bSGarrett D'Amore mbstate_t *_RESTRICT_KYWD); 522d08521bSGarrett D'Amore 532d08521bSGarrett D'Amore void 542d08521bSGarrett D'Amore _GB2312_init(struct lc_ctype *lct) 554297a3b0SGarrett D'Amore { 564297a3b0SGarrett D'Amore 572d08521bSGarrett D'Amore lct->lc_mbrtowc = _GB2312_mbrtowc; 582d08521bSGarrett D'Amore lct->lc_wcrtomb = _GB2312_wcrtomb; 592d08521bSGarrett D'Amore lct->lc_mbsinit = _GB2312_mbsinit; 602d08521bSGarrett D'Amore lct->lc_mbsnrtowcs = _GB2312_mbsnrtowcs; 612d08521bSGarrett D'Amore lct->lc_wcsnrtombs = _GB2312_wcsnrtombs; 622d08521bSGarrett D'Amore lct->lc_max_mblen = 2; 632d08521bSGarrett D'Amore lct->lc_is_ascii = 0; 644297a3b0SGarrett D'Amore } 654297a3b0SGarrett D'Amore 664297a3b0SGarrett D'Amore static int 674297a3b0SGarrett D'Amore _GB2312_mbsinit(const mbstate_t *ps) 684297a3b0SGarrett D'Amore { 694297a3b0SGarrett D'Amore 704297a3b0SGarrett D'Amore return (ps == NULL || ((const _GB2312State *)ps)->count == 0); 714297a3b0SGarrett D'Amore } 724297a3b0SGarrett D'Amore 734297a3b0SGarrett D'Amore static int 744297a3b0SGarrett D'Amore _GB2312_check(const char *str, size_t n) 754297a3b0SGarrett D'Amore { 764297a3b0SGarrett D'Amore const uchar_t *s = (const uchar_t *)str; 774297a3b0SGarrett D'Amore 784297a3b0SGarrett D'Amore if (n == 0) 794297a3b0SGarrett D'Amore /* Incomplete multibyte sequence */ 804297a3b0SGarrett D'Amore return (-2); 814297a3b0SGarrett D'Amore if (s[0] >= 0xa1 && s[0] <= 0xfe) { 824297a3b0SGarrett D'Amore if (n < 2) 834297a3b0SGarrett D'Amore /* Incomplete multibyte sequence */ 844297a3b0SGarrett D'Amore return (-2); 854297a3b0SGarrett D'Amore if (s[1] < 0xa1 || s[1] > 0xfe) 864297a3b0SGarrett D'Amore /* Invalid multibyte sequence */ 874297a3b0SGarrett D'Amore return (-1); 884297a3b0SGarrett D'Amore return (2); 894297a3b0SGarrett D'Amore } else if (s[0] & 0x80) { 904297a3b0SGarrett D'Amore /* Invalid multibyte sequence */ 914297a3b0SGarrett D'Amore return (-1); 924297a3b0SGarrett D'Amore } 934297a3b0SGarrett D'Amore return (1); 944297a3b0SGarrett D'Amore } 954297a3b0SGarrett D'Amore 964297a3b0SGarrett D'Amore static size_t 974297a3b0SGarrett D'Amore _GB2312_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 98*d8e0a9a1SRobert Mustacchi size_t n, mbstate_t *_RESTRICT_KYWD ps, boolean_t zero) 994297a3b0SGarrett D'Amore { 1004297a3b0SGarrett D'Amore _GB2312State *gs; 1014297a3b0SGarrett D'Amore wchar_t wc; 1024297a3b0SGarrett D'Amore int i, len, ocount; 1034297a3b0SGarrett D'Amore size_t ncopy; 1044297a3b0SGarrett D'Amore 1054297a3b0SGarrett D'Amore gs = (_GB2312State *)ps; 1064297a3b0SGarrett D'Amore 1074297a3b0SGarrett D'Amore if (gs->count < 0 || gs->count > sizeof (gs->bytes)) { 1084297a3b0SGarrett D'Amore errno = EINVAL; 1094297a3b0SGarrett D'Amore return ((size_t)-1); 1104297a3b0SGarrett D'Amore } 1114297a3b0SGarrett D'Amore 1124297a3b0SGarrett D'Amore if (s == NULL) { 1134297a3b0SGarrett D'Amore s = ""; 1144297a3b0SGarrett D'Amore n = 1; 1154297a3b0SGarrett D'Amore pwc = NULL; 1164297a3b0SGarrett D'Amore } 1174297a3b0SGarrett D'Amore 1184297a3b0SGarrett D'Amore ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof (gs->bytes) - gs->count); 1194297a3b0SGarrett D'Amore (void) memcpy(gs->bytes + gs->count, s, ncopy); 1204297a3b0SGarrett D'Amore ocount = gs->count; 1214297a3b0SGarrett D'Amore gs->count += ncopy; 1224297a3b0SGarrett D'Amore s = (char *)gs->bytes; 1234297a3b0SGarrett D'Amore n = gs->count; 1244297a3b0SGarrett D'Amore 1254297a3b0SGarrett D'Amore if ((len = _GB2312_check(s, n)) < 0) 1264297a3b0SGarrett D'Amore return ((size_t)len); 1274297a3b0SGarrett D'Amore wc = 0; 1284297a3b0SGarrett D'Amore i = len; 1294297a3b0SGarrett D'Amore while (i-- > 0) 1304297a3b0SGarrett D'Amore wc = (wc << 8) | (unsigned char)*s++; 1314297a3b0SGarrett D'Amore if (pwc != NULL) 1324297a3b0SGarrett D'Amore *pwc = wc; 1334297a3b0SGarrett D'Amore gs->count = 0; 134*d8e0a9a1SRobert Mustacchi if (zero || wc != L'\0') { 135*d8e0a9a1SRobert Mustacchi return (len - ocount); 136*d8e0a9a1SRobert Mustacchi } else { 137*d8e0a9a1SRobert Mustacchi return (0); 138*d8e0a9a1SRobert Mustacchi } 1394297a3b0SGarrett D'Amore } 1404297a3b0SGarrett D'Amore 1414297a3b0SGarrett D'Amore static size_t 1424297a3b0SGarrett D'Amore _GB2312_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, 1434297a3b0SGarrett D'Amore mbstate_t *_RESTRICT_KYWD ps) 1444297a3b0SGarrett D'Amore { 1454297a3b0SGarrett D'Amore _GB2312State *gs; 1464297a3b0SGarrett D'Amore 1474297a3b0SGarrett D'Amore gs = (_GB2312State *)ps; 1484297a3b0SGarrett D'Amore 1494297a3b0SGarrett D'Amore if (gs->count != 0) { 1504297a3b0SGarrett D'Amore errno = EINVAL; 1514297a3b0SGarrett D'Amore return ((size_t)-1); 1524297a3b0SGarrett D'Amore } 1534297a3b0SGarrett D'Amore 1544297a3b0SGarrett D'Amore if (s == NULL) 1554297a3b0SGarrett D'Amore /* Reset to initial shift state (no-op) */ 1564297a3b0SGarrett D'Amore return (1); 1574297a3b0SGarrett D'Amore if (wc & 0x8000) { 1584297a3b0SGarrett D'Amore *s++ = (wc >> 8) & 0xff; 1594297a3b0SGarrett D'Amore *s = wc & 0xff; 1604297a3b0SGarrett D'Amore return (2); 1614297a3b0SGarrett D'Amore } 1624297a3b0SGarrett D'Amore *s = wc & 0xff; 1634297a3b0SGarrett D'Amore return (1); 1644297a3b0SGarrett D'Amore } 1652d08521bSGarrett D'Amore 1662d08521bSGarrett D'Amore static size_t 1672d08521bSGarrett D'Amore _GB2312_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, 1682d08521bSGarrett D'Amore const char **_RESTRICT_KYWD src, size_t nms, size_t len, 1692d08521bSGarrett D'Amore mbstate_t *_RESTRICT_KYWD ps) 1702d08521bSGarrett D'Amore { 1712d08521bSGarrett D'Amore return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB2312_mbrtowc)); 1722d08521bSGarrett D'Amore } 1732d08521bSGarrett D'Amore 1742d08521bSGarrett D'Amore static size_t 1752d08521bSGarrett D'Amore _GB2312_wcsnrtombs(char *_RESTRICT_KYWD dst, 1762d08521bSGarrett D'Amore const wchar_t **_RESTRICT_KYWD src, size_t nwc, size_t len, 1772d08521bSGarrett D'Amore mbstate_t *_RESTRICT_KYWD ps) 1782d08521bSGarrett D'Amore { 1792d08521bSGarrett D'Amore return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB2312_wcrtomb)); 1802d08521bSGarrett D'Amore } 181