14f6d4aa3STim J. Robbins /*-
2*4d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
3d915a14eSPedro F. Giffuni *
47b247341SBaptiste Daroussin * Copyright 2013 Garrett D'Amore <garrett@damore.org>
57b247341SBaptiste Daroussin * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
64f6d4aa3STim J. Robbins * Copyright (c) 2002-2004 Tim J. Robbins
74f6d4aa3STim J. Robbins * All rights reserved.
8a0308108SAndrey A. Chernov *
93c87aa1dSDavid Chisnall * Copyright (c) 2011 The FreeBSD Foundation
105b5fa75aSEd Maste *
113c87aa1dSDavid Chisnall * Portions of this software were developed by David Chisnall
123c87aa1dSDavid Chisnall * under sponsorship from the FreeBSD Foundation.
133c87aa1dSDavid Chisnall *
14a0308108SAndrey A. Chernov * Redistribution and use in source and binary forms, with or without
15a0308108SAndrey A. Chernov * modification, are permitted provided that the following conditions
16a0308108SAndrey A. Chernov * are met:
17a0308108SAndrey A. Chernov * 1. Redistributions of source code must retain the above copyright
18a0308108SAndrey A. Chernov * notice, this list of conditions and the following disclaimer.
19a0308108SAndrey A. Chernov * 2. Redistributions in binary form must reproduce the above copyright
20a0308108SAndrey A. Chernov * notice, this list of conditions and the following disclaimer in the
21a0308108SAndrey A. Chernov * documentation and/or other materials provided with the distribution.
22a0308108SAndrey A. Chernov *
234f6d4aa3STim J. Robbins * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24a0308108SAndrey A. Chernov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25a0308108SAndrey A. Chernov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
264f6d4aa3STim J. Robbins * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27a0308108SAndrey A. Chernov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28a0308108SAndrey A. Chernov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29a0308108SAndrey A. Chernov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30a0308108SAndrey A. Chernov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31a0308108SAndrey A. Chernov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32a0308108SAndrey A. Chernov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33a0308108SAndrey A. Chernov * SUCH DAMAGE.
34a0308108SAndrey A. Chernov */
357b247341SBaptiste Daroussin
364f6d4aa3STim J. Robbins /*
374f6d4aa3STim J. Robbins * PRC National Standard GB 18030-2000 encoding of Chinese text.
384f6d4aa3STim J. Robbins *
394f6d4aa3STim J. Robbins * See gb18030(5) for details.
404f6d4aa3STim J. Robbins */
41a0308108SAndrey A. Chernov
42ca2dae42STim J. Robbins #include <sys/param.h>
434f6d4aa3STim J. Robbins #include <errno.h>
444f6d4aa3STim J. Robbins #include <runetype.h>
45a0308108SAndrey A. Chernov #include <stdlib.h>
46ca2dae42STim J. Robbins #include <string.h>
474f6d4aa3STim J. Robbins #include <wchar.h>
482051a8f2STim J. Robbins #include "mblocal.h"
494f6d4aa3STim J. Robbins
50e94c6cb4SAlexey Zelkin static size_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict,
51e94c6cb4SAlexey Zelkin size_t, mbstate_t * __restrict);
52e94c6cb4SAlexey Zelkin static int _GB18030_mbsinit(const mbstate_t *);
53e94c6cb4SAlexey Zelkin static size_t _GB18030_wcrtomb(char * __restrict, wchar_t,
544f6d4aa3STim J. Robbins mbstate_t * __restrict);
557b247341SBaptiste Daroussin static size_t _GB18030_mbsnrtowcs(wchar_t * __restrict,
567b247341SBaptiste Daroussin const char ** __restrict, size_t, size_t,
577b247341SBaptiste Daroussin mbstate_t * __restrict);
587b247341SBaptiste Daroussin static size_t _GB18030_wcsnrtombs(char * __restrict,
597b247341SBaptiste Daroussin const wchar_t ** __restrict, size_t, size_t,
607b247341SBaptiste Daroussin mbstate_t * __restrict);
617b247341SBaptiste Daroussin
62a0308108SAndrey A. Chernov
63ca2dae42STim J. Robbins typedef struct {
64ca2dae42STim J. Robbins int count;
65ca2dae42STim J. Robbins u_char bytes[4];
66ca2dae42STim J. Robbins } _GB18030State;
67ca2dae42STim J. Robbins
68a0308108SAndrey A. Chernov int
_GB18030_init(struct xlocale_ctype * l,_RuneLocale * rl)693c87aa1dSDavid Chisnall _GB18030_init(struct xlocale_ctype *l, _RuneLocale *rl)
70a0308108SAndrey A. Chernov {
714f6d4aa3STim J. Robbins
723c87aa1dSDavid Chisnall l->__mbrtowc = _GB18030_mbrtowc;
733c87aa1dSDavid Chisnall l->__wcrtomb = _GB18030_wcrtomb;
743c87aa1dSDavid Chisnall l->__mbsinit = _GB18030_mbsinit;
757b247341SBaptiste Daroussin l->__mbsnrtowcs = _GB18030_mbsnrtowcs;
767b247341SBaptiste Daroussin l->__wcsnrtombs = _GB18030_wcsnrtombs;
773c87aa1dSDavid Chisnall l->runes = rl;
783c87aa1dSDavid Chisnall l->__mb_cur_max = 4;
793c87aa1dSDavid Chisnall l->__mb_sb_limit = 128;
804f6d4aa3STim J. Robbins
81a0308108SAndrey A. Chernov return (0);
82a0308108SAndrey A. Chernov }
83a0308108SAndrey A. Chernov
84e94c6cb4SAlexey Zelkin static int
_GB18030_mbsinit(const mbstate_t * ps)85ca2dae42STim J. Robbins _GB18030_mbsinit(const mbstate_t *ps)
86ca2dae42STim J. Robbins {
87ca2dae42STim J. Robbins
88fa02ee78STim J. Robbins return (ps == NULL || ((const _GB18030State *)ps)->count == 0);
89ca2dae42STim J. Robbins }
90ca2dae42STim J. Robbins
91e94c6cb4SAlexey Zelkin static size_t
_GB18030_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)924f6d4aa3STim J. Robbins _GB18030_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
93ca2dae42STim J. Robbins size_t n, mbstate_t * __restrict ps)
94a0308108SAndrey A. Chernov {
95ca2dae42STim J. Robbins _GB18030State *gs;
964f6d4aa3STim J. Robbins wchar_t wch;
97ca2dae42STim J. Robbins int ch, len, ocount;
98ca2dae42STim J. Robbins size_t ncopy;
994f6d4aa3STim J. Robbins
100ca2dae42STim J. Robbins gs = (_GB18030State *)ps;
101ca2dae42STim J. Robbins
102fc813796STim J. Robbins if (gs->count < 0 || gs->count > sizeof(gs->bytes)) {
103fc813796STim J. Robbins errno = EINVAL;
104fc813796STim J. Robbins return ((size_t)-1);
105fc813796STim J. Robbins }
106fc813796STim J. Robbins
107ca2dae42STim J. Robbins if (s == NULL) {
108ca2dae42STim J. Robbins s = "";
109ca2dae42STim J. Robbins n = 1;
110ca2dae42STim J. Robbins pwc = NULL;
111ca2dae42STim J. Robbins }
112ca2dae42STim J. Robbins
113ca2dae42STim J. Robbins ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count);
114ca2dae42STim J. Robbins memcpy(gs->bytes + gs->count, s, ncopy);
115ca2dae42STim J. Robbins ocount = gs->count;
116ca2dae42STim J. Robbins gs->count += ncopy;
117ca2dae42STim J. Robbins s = (char *)gs->bytes;
118ca2dae42STim J. Robbins n = gs->count;
119ca2dae42STim J. Robbins
1204f6d4aa3STim J. Robbins if (n == 0)
1214f6d4aa3STim J. Robbins /* Incomplete multibyte sequence */
1224f6d4aa3STim J. Robbins return ((size_t)-2);
1234f6d4aa3STim J. Robbins
1244f6d4aa3STim J. Robbins /*
1254f6d4aa3STim J. Robbins * Single byte: [00-7f]
1264f6d4aa3STim J. Robbins * Two byte: [81-fe][40-7e,80-fe]
1274f6d4aa3STim J. Robbins * Four byte: [81-fe][30-39][81-fe][30-39]
1284f6d4aa3STim J. Robbins */
1294f6d4aa3STim J. Robbins ch = (unsigned char)*s++;
1304f6d4aa3STim J. Robbins if (ch <= 0x7f) {
1314f6d4aa3STim J. Robbins len = 1;
1324f6d4aa3STim J. Robbins wch = ch;
1334f6d4aa3STim J. Robbins } else if (ch >= 0x81 && ch <= 0xfe) {
1344f6d4aa3STim J. Robbins wch = ch;
1354f6d4aa3STim J. Robbins if (n < 2)
1364f6d4aa3STim J. Robbins return ((size_t)-2);
1374f6d4aa3STim J. Robbins ch = (unsigned char)*s++;
1384f6d4aa3STim J. Robbins if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe)) {
1394f6d4aa3STim J. Robbins wch = (wch << 8) | ch;
1404f6d4aa3STim J. Robbins len = 2;
1414f6d4aa3STim J. Robbins } else if (ch >= 0x30 && ch <= 0x39) {
1424f6d4aa3STim J. Robbins /*
1434f6d4aa3STim J. Robbins * Strip high bit off the wide character we will
1444f6d4aa3STim J. Robbins * eventually output so that it is positive when
1454f6d4aa3STim J. Robbins * cast to wint_t on 32-bit twos-complement machines.
1464f6d4aa3STim J. Robbins */
1474f6d4aa3STim J. Robbins wch = ((wch & 0x7f) << 8) | ch;
1484f6d4aa3STim J. Robbins if (n < 3)
1494f6d4aa3STim J. Robbins return ((size_t)-2);
1504f6d4aa3STim J. Robbins ch = (unsigned char)*s++;
1514f6d4aa3STim J. Robbins if (ch < 0x81 || ch > 0xfe)
1524f6d4aa3STim J. Robbins goto ilseq;
1534f6d4aa3STim J. Robbins wch = (wch << 8) | ch;
1544f6d4aa3STim J. Robbins if (n < 4)
1554f6d4aa3STim J. Robbins return ((size_t)-2);
1564f6d4aa3STim J. Robbins ch = (unsigned char)*s++;
1574f6d4aa3STim J. Robbins if (ch < 0x30 || ch > 0x39)
1584f6d4aa3STim J. Robbins goto ilseq;
1594f6d4aa3STim J. Robbins wch = (wch << 8) | ch;
1604f6d4aa3STim J. Robbins len = 4;
1614f6d4aa3STim J. Robbins } else
1624f6d4aa3STim J. Robbins goto ilseq;
1634f6d4aa3STim J. Robbins } else
1644f6d4aa3STim J. Robbins goto ilseq;
1654f6d4aa3STim J. Robbins
1664f6d4aa3STim J. Robbins if (pwc != NULL)
1674f6d4aa3STim J. Robbins *pwc = wch;
168ca2dae42STim J. Robbins gs->count = 0;
169ca2dae42STim J. Robbins return (wch == L'\0' ? 0 : len - ocount);
1704f6d4aa3STim J. Robbins ilseq:
1714f6d4aa3STim J. Robbins errno = EILSEQ;
1724f6d4aa3STim J. Robbins return ((size_t)-1);
173a0308108SAndrey A. Chernov }
174a0308108SAndrey A. Chernov
175e94c6cb4SAlexey Zelkin static size_t
_GB18030_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)176fc813796STim J. Robbins _GB18030_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
177a0308108SAndrey A. Chernov {
178fc813796STim J. Robbins _GB18030State *gs;
1794f6d4aa3STim J. Robbins size_t len;
1804f6d4aa3STim J. Robbins int c;
181a0308108SAndrey A. Chernov
182fc813796STim J. Robbins gs = (_GB18030State *)ps;
183fc813796STim J. Robbins
184fc813796STim J. Robbins if (gs->count != 0) {
185fc813796STim J. Robbins errno = EINVAL;
186fc813796STim J. Robbins return ((size_t)-1);
187fc813796STim J. Robbins }
188fc813796STim J. Robbins
1894f6d4aa3STim J. Robbins if (s == NULL)
1904f6d4aa3STim J. Robbins /* Reset to initial shift state (no-op) */
191a0308108SAndrey A. Chernov return (1);
1924f6d4aa3STim J. Robbins if ((wc & ~0x7fffffff) != 0)
1934f6d4aa3STim J. Robbins goto ilseq;
1944f6d4aa3STim J. Robbins if (wc & 0x7f000000) {
1954f6d4aa3STim J. Robbins /* Replace high bit that mbrtowc() removed. */
1964f6d4aa3STim J. Robbins wc |= 0x80000000;
1974f6d4aa3STim J. Robbins c = (wc >> 24) & 0xff;
1984f6d4aa3STim J. Robbins if (c < 0x81 || c > 0xfe)
1994f6d4aa3STim J. Robbins goto ilseq;
2004f6d4aa3STim J. Robbins *s++ = c;
2014f6d4aa3STim J. Robbins c = (wc >> 16) & 0xff;
2024f6d4aa3STim J. Robbins if (c < 0x30 || c > 0x39)
2034f6d4aa3STim J. Robbins goto ilseq;
2044f6d4aa3STim J. Robbins *s++ = c;
2054f6d4aa3STim J. Robbins c = (wc >> 8) & 0xff;
2064f6d4aa3STim J. Robbins if (c < 0x81 || c > 0xfe)
2074f6d4aa3STim J. Robbins goto ilseq;
2084f6d4aa3STim J. Robbins *s++ = c;
2094f6d4aa3STim J. Robbins c = wc & 0xff;
2104f6d4aa3STim J. Robbins if (c < 0x30 || c > 0x39)
2114f6d4aa3STim J. Robbins goto ilseq;
2124f6d4aa3STim J. Robbins *s++ = c;
2134f6d4aa3STim J. Robbins len = 4;
2144f6d4aa3STim J. Robbins } else if (wc & 0x00ff0000)
2154f6d4aa3STim J. Robbins goto ilseq;
2164f6d4aa3STim J. Robbins else if (wc & 0x0000ff00) {
2174f6d4aa3STim J. Robbins c = (wc >> 8) & 0xff;
2184f6d4aa3STim J. Robbins if (c < 0x81 || c > 0xfe)
2194f6d4aa3STim J. Robbins goto ilseq;
2204f6d4aa3STim J. Robbins *s++ = c;
2214f6d4aa3STim J. Robbins c = wc & 0xff;
2224f6d4aa3STim J. Robbins if (c < 0x40 || c == 0x7f || c == 0xff)
2234f6d4aa3STim J. Robbins goto ilseq;
2244f6d4aa3STim J. Robbins *s++ = c;
2254f6d4aa3STim J. Robbins len = 2;
2264f6d4aa3STim J. Robbins } else if (wc <= 0x7f) {
2274f6d4aa3STim J. Robbins *s++ = wc;
2284f6d4aa3STim J. Robbins len = 1;
2294f6d4aa3STim J. Robbins } else
2304f6d4aa3STim J. Robbins goto ilseq;
2314f6d4aa3STim J. Robbins
2324f6d4aa3STim J. Robbins return (len);
2334f6d4aa3STim J. Robbins ilseq:
2344f6d4aa3STim J. Robbins errno = EILSEQ;
2354f6d4aa3STim J. Robbins return ((size_t)-1);
236a0308108SAndrey A. Chernov }
2377b247341SBaptiste Daroussin
2387b247341SBaptiste Daroussin static size_t
_GB18030_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)2397b247341SBaptiste Daroussin _GB18030_mbsnrtowcs(wchar_t * __restrict dst,
2407b247341SBaptiste Daroussin const char ** __restrict src, size_t nms, size_t len,
2417b247341SBaptiste Daroussin mbstate_t * __restrict ps)
2427b247341SBaptiste Daroussin {
2437b247341SBaptiste Daroussin return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB18030_mbrtowc));
2447b247341SBaptiste Daroussin }
2457b247341SBaptiste Daroussin
2467b247341SBaptiste Daroussin static size_t
_GB18030_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)2477b247341SBaptiste Daroussin _GB18030_wcsnrtombs(char * __restrict dst,
2487b247341SBaptiste Daroussin const wchar_t ** __restrict src, size_t nwc, size_t len,
2497b247341SBaptiste Daroussin mbstate_t * __restrict ps)
2507b247341SBaptiste Daroussin {
2517b247341SBaptiste Daroussin return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB18030_wcrtomb));
2527b247341SBaptiste Daroussin }
253