1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright 2013 Garrett D'Amore <garrett@damore.org>
5 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
6 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
7 * Copyright (c) 1993
8 * The Regents of the University of California. All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * Paul Borman at Krystal Technologies.
12 *
13 * Copyright (c) 2011 The FreeBSD Foundation
14 *
15 * Portions of this software were developed by David Chisnall
16 * under sponsorship from the FreeBSD Foundation.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 */
42
43 #include <sys/param.h>
44 #include <errno.h>
45 #include <limits.h>
46 #include <runetype.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <wchar.h>
50 #include "mblocal.h"
51
52 extern int __mb_sb_limit;
53
54 static size_t _EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict,
55 size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
56 static size_t _EUC_wcrtomb_impl(char * __restrict, wchar_t,
57 mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
58
59 static size_t _EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict,
60 size_t, mbstate_t * __restrict);
61 static size_t _EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict,
62 size_t, mbstate_t * __restrict);
63 static size_t _EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict,
64 size_t, mbstate_t * __restrict);
65 static size_t _EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict,
66 size_t, mbstate_t * __restrict);
67
68 static size_t _EUC_CN_wcrtomb(char * __restrict, wchar_t,
69 mbstate_t * __restrict);
70 static size_t _EUC_JP_wcrtomb(char * __restrict, wchar_t,
71 mbstate_t * __restrict);
72 static size_t _EUC_KR_wcrtomb(char * __restrict, wchar_t,
73 mbstate_t * __restrict);
74 static size_t _EUC_TW_wcrtomb(char * __restrict, wchar_t,
75 mbstate_t * __restrict);
76
77 static size_t _EUC_CN_mbsnrtowcs(wchar_t * __restrict,
78 const char ** __restrict, size_t, size_t,
79 mbstate_t * __restrict);
80 static size_t _EUC_JP_mbsnrtowcs(wchar_t * __restrict,
81 const char ** __restrict, size_t, size_t,
82 mbstate_t * __restrict);
83 static size_t _EUC_KR_mbsnrtowcs(wchar_t * __restrict,
84 const char ** __restrict, size_t, size_t,
85 mbstate_t * __restrict);
86 static size_t _EUC_TW_mbsnrtowcs(wchar_t * __restrict,
87 const char ** __restrict, size_t, size_t,
88 mbstate_t * __restrict);
89
90 static size_t _EUC_CN_wcsnrtombs(char * __restrict,
91 const wchar_t ** __restrict, size_t, size_t,
92 mbstate_t * __restrict);
93 static size_t _EUC_JP_wcsnrtombs(char * __restrict,
94 const wchar_t ** __restrict, size_t, size_t,
95 mbstate_t * __restrict);
96 static size_t _EUC_KR_wcsnrtombs(char * __restrict,
97 const wchar_t ** __restrict, size_t, size_t,
98 mbstate_t * __restrict);
99 static size_t _EUC_TW_wcsnrtombs(char * __restrict,
100 const wchar_t ** __restrict, size_t, size_t,
101 mbstate_t * __restrict);
102
103 static int _EUC_mbsinit(const mbstate_t *);
104
105 typedef struct {
106 wchar_t ch;
107 int set;
108 int want;
109 } _EucState;
110
111 static int
_EUC_mbsinit(const mbstate_t * ps)112 _EUC_mbsinit(const mbstate_t *ps)
113 {
114
115 return (ps == NULL || ((const _EucState *)ps)->want == 0);
116 }
117
118 /*
119 * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
120 */
121 int
_EUC_CN_init(struct xlocale_ctype * l,_RuneLocale * rl)122 _EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl)
123 {
124 l->__mbrtowc = _EUC_CN_mbrtowc;
125 l->__wcrtomb = _EUC_CN_wcrtomb;
126 l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs;
127 l->__wcsnrtombs = _EUC_CN_wcsnrtombs;
128 l->__mbsinit = _EUC_mbsinit;
129
130 l->runes = rl;
131 l->__mb_cur_max = 4;
132 l->__mb_sb_limit = 128;
133 return (0);
134 }
135
136 static size_t
_EUC_CN_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)137 _EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
138 size_t n, mbstate_t * __restrict ps)
139 {
140 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
141 }
142
143 static size_t
_EUC_CN_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)144 _EUC_CN_mbsnrtowcs(wchar_t * __restrict dst,
145 const char ** __restrict src,
146 size_t nms, size_t len, mbstate_t * __restrict ps)
147 {
148 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc));
149 }
150
151 static size_t
_EUC_CN_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)152 _EUC_CN_wcrtomb(char * __restrict s, wchar_t wc,
153 mbstate_t * __restrict ps)
154 {
155 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
156 }
157
158 static size_t
_EUC_CN_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)159 _EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
160 size_t nwc, size_t len, mbstate_t * __restrict ps)
161 {
162 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb));
163 }
164
165 /*
166 * EUC-KR uses only CS0 and CS1.
167 */
168 int
_EUC_KR_init(struct xlocale_ctype * l,_RuneLocale * rl)169 _EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl)
170 {
171 l->__mbrtowc = _EUC_KR_mbrtowc;
172 l->__wcrtomb = _EUC_KR_wcrtomb;
173 l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs;
174 l->__wcsnrtombs = _EUC_KR_wcsnrtombs;
175 l->__mbsinit = _EUC_mbsinit;
176
177 l->runes = rl;
178 l->__mb_cur_max = 2;
179 l->__mb_sb_limit = 128;
180 return (0);
181 }
182
183 static size_t
_EUC_KR_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)184 _EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
185 size_t n, mbstate_t * __restrict ps)
186 {
187 return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0));
188 }
189
190 static size_t
_EUC_KR_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)191 _EUC_KR_mbsnrtowcs(wchar_t * __restrict dst,
192 const char ** __restrict src,
193 size_t nms, size_t len, mbstate_t * __restrict ps)
194 {
195 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc));
196 }
197
198 static size_t
_EUC_KR_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)199 _EUC_KR_wcrtomb(char * __restrict s, wchar_t wc,
200 mbstate_t * __restrict ps)
201 {
202 return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0));
203 }
204
205 static size_t
_EUC_KR_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)206 _EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
207 size_t nwc, size_t len, mbstate_t * __restrict ps)
208 {
209 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb));
210 }
211
212 /*
213 * EUC-JP uses CS0, CS1, CS2, and CS3.
214 */
215 int
_EUC_JP_init(struct xlocale_ctype * l,_RuneLocale * rl)216 _EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl)
217 {
218 l->__mbrtowc = _EUC_JP_mbrtowc;
219 l->__wcrtomb = _EUC_JP_wcrtomb;
220 l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs;
221 l->__wcsnrtombs = _EUC_JP_wcsnrtombs;
222 l->__mbsinit = _EUC_mbsinit;
223
224 l->runes = rl;
225 l->__mb_cur_max = 3;
226 l->__mb_sb_limit = 128;
227 return (0);
228 }
229
230 static size_t
_EUC_JP_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)231 _EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
232 size_t n, mbstate_t * __restrict ps)
233 {
234 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3));
235 }
236
237 static size_t
_EUC_JP_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)238 _EUC_JP_mbsnrtowcs(wchar_t * __restrict dst,
239 const char ** __restrict src,
240 size_t nms, size_t len, mbstate_t * __restrict ps)
241 {
242 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc));
243 }
244
245 static size_t
_EUC_JP_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)246 _EUC_JP_wcrtomb(char * __restrict s, wchar_t wc,
247 mbstate_t * __restrict ps)
248 {
249 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3));
250 }
251
252 static size_t
_EUC_JP_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)253 _EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
254 size_t nwc, size_t len, mbstate_t * __restrict ps)
255 {
256 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb));
257 }
258
259 /*
260 * EUC-TW uses CS0, CS1, and CS2.
261 */
262 int
_EUC_TW_init(struct xlocale_ctype * l,_RuneLocale * rl)263 _EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl)
264 {
265 l->__mbrtowc = _EUC_TW_mbrtowc;
266 l->__wcrtomb = _EUC_TW_wcrtomb;
267 l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs;
268 l->__wcsnrtombs = _EUC_TW_wcsnrtombs;
269 l->__mbsinit = _EUC_mbsinit;
270
271 l->runes = rl;
272 l->__mb_cur_max = 4;
273 l->__mb_sb_limit = 128;
274 return (0);
275 }
276
277 static size_t
_EUC_TW_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)278 _EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
279 size_t n, mbstate_t * __restrict ps)
280 {
281 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
282 }
283
284 static size_t
_EUC_TW_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)285 _EUC_TW_mbsnrtowcs(wchar_t * __restrict dst,
286 const char ** __restrict src,
287 size_t nms, size_t len, mbstate_t * __restrict ps)
288 {
289 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc));
290 }
291
292 static size_t
_EUC_TW_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)293 _EUC_TW_wcrtomb(char * __restrict s, wchar_t wc,
294 mbstate_t * __restrict ps)
295 {
296 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
297 }
298
299 static size_t
_EUC_TW_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)300 _EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
301 size_t nwc, size_t len, mbstate_t * __restrict ps)
302 {
303 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb));
304 }
305
306 /*
307 * Common EUC code.
308 */
309
310 static size_t
_EUC_mbrtowc_impl(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps,uint8_t cs2,uint8_t cs2width,uint8_t cs3,uint8_t cs3width)311 _EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s,
312 size_t n, mbstate_t * __restrict ps,
313 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
314 {
315 _EucState *es;
316 int i, want;
317 wchar_t wc = 0;
318 unsigned char ch, chs;
319
320 es = (_EucState *)ps;
321
322 if (es->want < 0 || es->want > MB_CUR_MAX) {
323 errno = EINVAL;
324 return ((size_t)-1);
325 }
326
327 if (s == NULL) {
328 s = "";
329 n = 1;
330 pwc = NULL;
331 }
332
333 if (n == 0)
334 /* Incomplete multibyte sequence */
335 return ((size_t)-2);
336
337 if (es->want == 0) {
338 /* Fast path for plain ASCII (CS0) */
339 if (((ch = (unsigned char)*s) & 0x80) == 0) {
340 if (pwc != NULL)
341 *pwc = ch;
342 return (ch != '\0' ? 1 : 0);
343 }
344
345 if (ch >= 0xa1) {
346 /* CS1 */
347 want = 2;
348 } else if (ch == cs2) {
349 want = cs2width;
350 } else if (ch == cs3) {
351 want = cs3width;
352 } else {
353 errno = EILSEQ;
354 return ((size_t)-1);
355 }
356
357
358 es->want = want;
359 es->ch = 0;
360 } else {
361 want = es->want;
362 wc = es->ch;
363 }
364
365 for (i = 0; i < MIN(want, n); i++) {
366 wc <<= 8;
367 chs = *s;
368 wc |= chs;
369 s++;
370 }
371 if (i < want) {
372 /* Incomplete multibyte sequence */
373 es->want = want - i;
374 es->ch = wc;
375 errno = EILSEQ;
376 return ((size_t)-2);
377 }
378 if (pwc != NULL)
379 *pwc = wc;
380 es->want = 0;
381 return (wc == L'\0' ? 0 : want);
382 }
383
384 static size_t
_EUC_wcrtomb_impl(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps,uint8_t cs2,uint8_t cs2width,uint8_t cs3,uint8_t cs3width)385 _EUC_wcrtomb_impl(char * __restrict s, wchar_t wc,
386 mbstate_t * __restrict ps,
387 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
388 {
389 _EucState *es;
390 int i, len;
391 wchar_t nm;
392
393 es = (_EucState *)ps;
394
395 if (es->want != 0) {
396 errno = EINVAL;
397 return ((size_t)-1);
398 }
399
400 if (s == NULL)
401 /* Reset to initial shift state (no-op) */
402 return (1);
403
404 if ((wc & ~0x7f) == 0) {
405 /* Fast path for plain ASCII (CS0) */
406 *s = (char)wc;
407 return (1);
408 }
409
410 /* Determine the "length" */
411 if ((unsigned)wc > 0xffffff) {
412 len = 4;
413 } else if ((unsigned)wc > 0xffff) {
414 len = 3;
415 } else if ((unsigned)wc > 0xff) {
416 len = 2;
417 } else {
418 len = 1;
419 }
420
421 if (len > MB_CUR_MAX) {
422 errno = EILSEQ;
423 return ((size_t)-1);
424 }
425
426 /* This first check excludes CS1, which is implicitly valid. */
427 if ((wc < 0xa100) || (wc > 0xffff)) {
428 /* Check for valid CS2 or CS3 */
429 nm = (wc >> ((len - 1) * 8));
430 if (nm == cs2) {
431 if (len != cs2width) {
432 errno = EILSEQ;
433 return ((size_t)-1);
434 }
435 } else if (nm == cs3) {
436 if (len != cs3width) {
437 errno = EILSEQ;
438 return ((size_t)-1);
439 }
440 } else {
441 errno = EILSEQ;
442 return ((size_t)-1);
443 }
444 }
445
446 /* Stash the bytes, least significant last */
447 for (i = len - 1; i >= 0; i--) {
448 s[i] = (wc & 0xff);
449 wc >>= 8;
450 }
451 return (len);
452 }
453