1 /*
2 * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
4 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
5 * Copyright (c) 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Paul Borman at Krystal Technologies.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include "lint.h"
37 #include <errno.h>
38 #include <limits.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42 #include <sys/types.h>
43 #include <sys/euc.h>
44 #include "mblocal.h"
45 #include "lctype.h"
46
47 static size_t _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD,
48 const char *_RESTRICT_KYWD,
49 size_t, mbstate_t *_RESTRICT_KYWD, uint8_t, uint8_t, uint8_t, uint8_t);
50 static size_t _EUC_wcrtomb_impl(char *_RESTRICT_KYWD, wchar_t,
51 mbstate_t *_RESTRICT_KYWD, uint8_t, uint8_t, uint8_t, uint8_t);
52
53 static size_t _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD,
54 const char *_RESTRICT_KYWD,
55 size_t, mbstate_t *_RESTRICT_KYWD);
56 static size_t _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD,
57 const char *_RESTRICT_KYWD,
58 size_t, mbstate_t *_RESTRICT_KYWD);
59 static size_t _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD,
60 const char *_RESTRICT_KYWD,
61 size_t, mbstate_t *_RESTRICT_KYWD);
62 static size_t _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD,
63 const char *_RESTRICT_KYWD,
64 size_t, mbstate_t *_RESTRICT_KYWD);
65
66 static size_t _EUC_CN_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
67 mbstate_t *_RESTRICT_KYWD);
68 static size_t _EUC_JP_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
69 mbstate_t *_RESTRICT_KYWD);
70 static size_t _EUC_KR_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
71 mbstate_t *_RESTRICT_KYWD);
72 static size_t _EUC_TW_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
73 mbstate_t *_RESTRICT_KYWD);
74
75 static size_t _EUC_CN_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
76 const char **_RESTRICT_KYWD, size_t, size_t,
77 mbstate_t *_RESTRICT_KYWD);
78 static size_t _EUC_JP_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
79 const char **_RESTRICT_KYWD, size_t, size_t,
80 mbstate_t *_RESTRICT_KYWD);
81 static size_t _EUC_KR_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
82 const char **_RESTRICT_KYWD, size_t, size_t,
83 mbstate_t *_RESTRICT_KYWD);
84 static size_t _EUC_TW_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
85 const char **_RESTRICT_KYWD, size_t, size_t,
86 mbstate_t *_RESTRICT_KYWD);
87
88 static size_t _EUC_CN_wcsnrtombs(char *_RESTRICT_KYWD,
89 const wchar_t **_RESTRICT_KYWD, size_t, size_t,
90 mbstate_t *_RESTRICT_KYWD);
91 static size_t _EUC_JP_wcsnrtombs(char *_RESTRICT_KYWD,
92 const wchar_t **_RESTRICT_KYWD, size_t, size_t,
93 mbstate_t *_RESTRICT_KYWD);
94 static size_t _EUC_KR_wcsnrtombs(char *_RESTRICT_KYWD,
95 const wchar_t **_RESTRICT_KYWD, size_t, size_t,
96 mbstate_t *_RESTRICT_KYWD);
97 static size_t _EUC_TW_wcsnrtombs(char *_RESTRICT_KYWD,
98 const wchar_t **_RESTRICT_KYWD, size_t, size_t,
99 mbstate_t *_RESTRICT_KYWD);
100
101 static int _EUC_mbsinit(const mbstate_t *);
102
103 typedef struct {
104 wchar_t ch;
105 int set;
106 int want;
107 } _EucState;
108
109 int
_EUC_mbsinit(const mbstate_t * ps)110 _EUC_mbsinit(const mbstate_t *ps)
111 {
112
113 return (ps == NULL || ((const _EucState *)ps)->want == 0);
114 }
115
116 /*
117 * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
118 */
119 void
_EUC_CN_init(struct lc_ctype * lct)120 _EUC_CN_init(struct lc_ctype *lct)
121 {
122 lct->lc_mbrtowc = _EUC_CN_mbrtowc;
123 lct->lc_wcrtomb = _EUC_CN_wcrtomb;
124 lct->lc_mbsnrtowcs = _EUC_CN_mbsnrtowcs;
125 lct->lc_wcsnrtombs = _EUC_CN_wcsnrtombs;
126 lct->lc_mbsinit = _EUC_mbsinit;
127
128 lct->lc_max_mblen = 4;
129 lct->lc_is_ascii = 0;
130 }
131
132 static size_t
_EUC_CN_mbrtowc(wchar_t * _RESTRICT_KYWD pwc,const char * _RESTRICT_KYWD s,size_t n,mbstate_t * _RESTRICT_KYWD ps)133 _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
134 size_t n, mbstate_t *_RESTRICT_KYWD ps)
135 {
136 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
137 }
138
139 static size_t
_EUC_CN_mbsnrtowcs(wchar_t * _RESTRICT_KYWD dst,const char ** _RESTRICT_KYWD src,size_t nms,size_t len,mbstate_t * _RESTRICT_KYWD ps)140 _EUC_CN_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst,
141 const char **_RESTRICT_KYWD src,
142 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
143 {
144 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc));
145 }
146
147 static size_t
_EUC_CN_wcrtomb(char * _RESTRICT_KYWD s,wchar_t wc,mbstate_t * _RESTRICT_KYWD ps)148 _EUC_CN_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
149 mbstate_t *_RESTRICT_KYWD ps)
150 {
151 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
152 }
153
154 static size_t
_EUC_CN_wcsnrtombs(char * _RESTRICT_KYWD dst,const wchar_t ** _RESTRICT_KYWD src,size_t nwc,size_t len,mbstate_t * _RESTRICT_KYWD ps)155 _EUC_CN_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
156 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
157 {
158 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb));
159 }
160
161 /*
162 * EUC-KR uses only CS0 and CS1.
163 */
164 void
_EUC_KR_init(struct lc_ctype * lct)165 _EUC_KR_init(struct lc_ctype *lct)
166 {
167 lct->lc_mbrtowc = _EUC_KR_mbrtowc;
168 lct->lc_wcrtomb = _EUC_KR_wcrtomb;
169 lct->lc_mbsnrtowcs = _EUC_KR_mbsnrtowcs;
170 lct->lc_wcsnrtombs = _EUC_KR_wcsnrtombs;
171 lct->lc_mbsinit = _EUC_mbsinit;
172
173 lct->lc_max_mblen = 2;
174 lct->lc_is_ascii = 0;
175 }
176
177 static size_t
_EUC_KR_mbrtowc(wchar_t * _RESTRICT_KYWD pwc,const char * _RESTRICT_KYWD s,size_t n,mbstate_t * _RESTRICT_KYWD ps)178 _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
179 size_t n, mbstate_t *_RESTRICT_KYWD ps)
180 {
181 return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0));
182 }
183
184 static size_t
_EUC_KR_mbsnrtowcs(wchar_t * _RESTRICT_KYWD dst,const char ** _RESTRICT_KYWD src,size_t nms,size_t len,mbstate_t * _RESTRICT_KYWD ps)185 _EUC_KR_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst,
186 const char **_RESTRICT_KYWD src,
187 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
188 {
189 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc));
190 }
191
192 static size_t
_EUC_KR_wcrtomb(char * _RESTRICT_KYWD s,wchar_t wc,mbstate_t * _RESTRICT_KYWD ps)193 _EUC_KR_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
194 mbstate_t *_RESTRICT_KYWD ps)
195 {
196 return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0));
197 }
198
199 static size_t
_EUC_KR_wcsnrtombs(char * _RESTRICT_KYWD dst,const wchar_t ** _RESTRICT_KYWD src,size_t nwc,size_t len,mbstate_t * _RESTRICT_KYWD ps)200 _EUC_KR_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
201 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
202 {
203 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb));
204 }
205
206 /*
207 * EUC-JP uses CS0, CS1, CS2, and CS3.
208 */
209 void
_EUC_JP_init(struct lc_ctype * lct)210 _EUC_JP_init(struct lc_ctype *lct)
211 {
212 lct->lc_mbrtowc = _EUC_JP_mbrtowc;
213 lct->lc_wcrtomb = _EUC_JP_wcrtomb;
214 lct->lc_mbsnrtowcs = _EUC_JP_mbsnrtowcs;
215 lct->lc_wcsnrtombs = _EUC_JP_wcsnrtombs;
216 lct->lc_mbsinit = _EUC_mbsinit;
217
218 lct->lc_max_mblen = 3;
219 lct->lc_is_ascii = 0;
220 }
221
222 static size_t
_EUC_JP_mbrtowc(wchar_t * _RESTRICT_KYWD pwc,const char * _RESTRICT_KYWD s,size_t n,mbstate_t * _RESTRICT_KYWD ps)223 _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
224 size_t n, mbstate_t *_RESTRICT_KYWD ps)
225 {
226 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3));
227 }
228
229 static size_t
_EUC_JP_mbsnrtowcs(wchar_t * _RESTRICT_KYWD dst,const char ** _RESTRICT_KYWD src,size_t nms,size_t len,mbstate_t * _RESTRICT_KYWD ps)230 _EUC_JP_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst,
231 const char **_RESTRICT_KYWD src,
232 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
233 {
234 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc));
235 }
236
237 static size_t
_EUC_JP_wcrtomb(char * _RESTRICT_KYWD s,wchar_t wc,mbstate_t * _RESTRICT_KYWD ps)238 _EUC_JP_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
239 mbstate_t *_RESTRICT_KYWD ps)
240 {
241 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3));
242 }
243
244 static size_t
_EUC_JP_wcsnrtombs(char * _RESTRICT_KYWD dst,const wchar_t ** _RESTRICT_KYWD src,size_t nwc,size_t len,mbstate_t * _RESTRICT_KYWD ps)245 _EUC_JP_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
246 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
247 {
248 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb));
249 }
250
251 /*
252 * EUC-TW uses CS0, CS1, and CS2.
253 */
254 void
_EUC_TW_init(struct lc_ctype * lct)255 _EUC_TW_init(struct lc_ctype *lct)
256 {
257 lct->lc_mbrtowc = _EUC_TW_mbrtowc;
258 lct->lc_wcrtomb = _EUC_TW_wcrtomb;
259 lct->lc_mbsnrtowcs = _EUC_TW_mbsnrtowcs;
260 lct->lc_wcsnrtombs = _EUC_TW_wcsnrtombs;
261 lct->lc_mbsinit = _EUC_mbsinit;
262
263 lct->lc_max_mblen = 4;
264 lct->lc_is_ascii = 0;
265 }
266
267 static size_t
_EUC_TW_mbrtowc(wchar_t * _RESTRICT_KYWD pwc,const char * _RESTRICT_KYWD s,size_t n,mbstate_t * _RESTRICT_KYWD ps)268 _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
269 size_t n, mbstate_t *_RESTRICT_KYWD ps)
270 {
271 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
272 }
273
274 static size_t
_EUC_TW_mbsnrtowcs(wchar_t * _RESTRICT_KYWD dst,const char ** _RESTRICT_KYWD src,size_t nms,size_t len,mbstate_t * _RESTRICT_KYWD ps)275 _EUC_TW_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst,
276 const char **_RESTRICT_KYWD src,
277 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
278 {
279 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc));
280 }
281
282 static size_t
_EUC_TW_wcrtomb(char * _RESTRICT_KYWD s,wchar_t wc,mbstate_t * _RESTRICT_KYWD ps)283 _EUC_TW_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
284 mbstate_t *_RESTRICT_KYWD ps)
285 {
286 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
287 }
288
289 static size_t
_EUC_TW_wcsnrtombs(char * _RESTRICT_KYWD dst,const wchar_t ** _RESTRICT_KYWD src,size_t nwc,size_t len,mbstate_t * _RESTRICT_KYWD ps)290 _EUC_TW_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
291 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
292 {
293 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb));
294 }
295
296 /*
297 * Common EUC code.
298 */
299
300 static size_t
_EUC_mbrtowc_impl(wchar_t * _RESTRICT_KYWD pwc,const char * _RESTRICT_KYWD s,size_t n,mbstate_t * _RESTRICT_KYWD ps,uint8_t cs2,uint8_t cs2width,uint8_t cs3,uint8_t cs3width)301 _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
302 size_t n, mbstate_t *_RESTRICT_KYWD ps,
303 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
304 {
305 _EucState *es;
306 int i, want;
307 wchar_t wc;
308 unsigned char ch;
309
310 es = (_EucState *)ps;
311
312 if (es->want < 0 || es->want > MB_CUR_MAX) {
313 errno = EINVAL;
314 return ((size_t)-1);
315 }
316
317 if (s == NULL) {
318 s = "";
319 n = 1;
320 pwc = NULL;
321 }
322
323 if (n == 0)
324 /* Incomplete multibyte sequence */
325 return ((size_t)-2);
326
327 if (es->want == 0) {
328 /* Fast path for plain ASCII (CS0) */
329 if (((ch = (unsigned char)*s) & 0x80) == 0) {
330 if (pwc != NULL)
331 *pwc = ch;
332 return (ch != '\0' ? 1 : 0);
333 }
334
335 if (ch >= 0xa1) {
336 /* CS1 */
337 want = 2;
338 } else if (ch == cs2) {
339 want = cs2width;
340 } else if (ch == cs3) {
341 want = cs3width;
342 } else {
343 errno = EILSEQ;
344 return ((size_t)-1);
345 }
346
347
348 es->want = want;
349 es->ch = 0;
350 } else {
351 want = es->want;
352 wc = es->ch;
353 }
354
355 for (i = 0; i < MIN(want, n); i++) {
356 wc <<= 8;
357 wc |= *s;
358 s++;
359 }
360 if (i < want) {
361 /* Incomplete multibyte sequence */
362 es->want = want - i;
363 es->ch = wc;
364 return ((size_t)-2);
365 }
366 if (pwc != NULL)
367 *pwc = wc;
368 es->want = 0;
369 return (wc == L'\0' ? 0 : want);
370 }
371
372 static size_t
_EUC_wcrtomb_impl(char * _RESTRICT_KYWD s,wchar_t wc,mbstate_t * _RESTRICT_KYWD ps,uint8_t cs2,uint8_t cs2width,uint8_t cs3,uint8_t cs3width)373 _EUC_wcrtomb_impl(char *_RESTRICT_KYWD s, wchar_t wc,
374 mbstate_t *_RESTRICT_KYWD ps,
375 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
376 {
377 _EucState *es;
378 int i, len;
379 wchar_t nm;
380
381 es = (_EucState *)ps;
382
383 if (es->want != 0) {
384 errno = EINVAL;
385 return ((size_t)-1);
386 }
387
388 if (s == NULL)
389 /* Reset to initial shift state (no-op) */
390 return (1);
391
392 if ((wc & ~0x7f) == 0) {
393 /* Fast path for plain ASCII (CS0) */
394 *s = (char)wc;
395 return (1);
396 }
397
398 /* Determine the "length" */
399 if ((unsigned)wc > 0xffffff) {
400 len = 4;
401 } else if ((unsigned)wc > 0xffff) {
402 len = 3;
403 } else if ((unsigned)wc > 0xff) {
404 len = 2;
405 } else {
406 len = 1;
407 }
408
409 if (len > MB_CUR_MAX) {
410 errno = EILSEQ;
411 return ((size_t)-1);
412 }
413
414 /* This first check excludes CS1, which is implicitly valid. */
415 if ((wc < 0xa100) || (wc > 0xffff)) {
416 /* Check for valid CS2 or CS3 */
417 nm = (wc >> ((len - 1) * 8));
418 if (nm == cs2) {
419 if (len != cs2width) {
420 errno = EILSEQ;
421 return ((size_t)-1);
422 }
423 } else if (nm == cs3) {
424 if (len != cs3width) {
425 errno = EILSEQ;
426 return ((size_t)-1);
427 }
428 } else {
429 errno = EILSEQ;
430 return ((size_t)-1);
431 }
432 }
433
434 /* Stash the bytes, least significant last */
435 for (i = len - 1; i >= 0; i--) {
436 s[i] = (wc & 0xff);
437 wc >>= 8;
438 }
439 return (len);
440 }
441