1 /*
2 * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
4 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
5 * Copyright (c) 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Paul Borman at Krystal Technologies.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include "lint.h"
37 #include <errno.h>
38 #include <limits.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42 #include <sys/types.h>
43 #include <sys/euc.h>
44 #include "mblocal.h"
45 #include "lctype.h"
46
47 static size_t _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD,
48 const char *_RESTRICT_KYWD,
49 size_t, mbstate_t *_RESTRICT_KYWD, uint8_t, uint8_t, uint8_t, uint8_t,
50 boolean_t);
51 static size_t _EUC_wcrtomb_impl(char *_RESTRICT_KYWD, wchar_t,
52 mbstate_t *_RESTRICT_KYWD, uint8_t, uint8_t, uint8_t, uint8_t);
53
54 static size_t _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD,
55 const char *_RESTRICT_KYWD,
56 size_t, mbstate_t *_RESTRICT_KYWD, boolean_t);
57 static size_t _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD,
58 const char *_RESTRICT_KYWD,
59 size_t, mbstate_t *_RESTRICT_KYWD, boolean_t);
60 static size_t _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD,
61 const char *_RESTRICT_KYWD,
62 size_t, mbstate_t *_RESTRICT_KYWD, boolean_t);
63 static size_t _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD,
64 const char *_RESTRICT_KYWD,
65 size_t, mbstate_t *_RESTRICT_KYWD, boolean_t);
66
67 static size_t _EUC_CN_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
68 mbstate_t *_RESTRICT_KYWD);
69 static size_t _EUC_JP_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
70 mbstate_t *_RESTRICT_KYWD);
71 static size_t _EUC_KR_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
72 mbstate_t *_RESTRICT_KYWD);
73 static size_t _EUC_TW_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
74 mbstate_t *_RESTRICT_KYWD);
75
76 static size_t _EUC_CN_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
77 const char **_RESTRICT_KYWD, size_t, size_t,
78 mbstate_t *_RESTRICT_KYWD);
79 static size_t _EUC_JP_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
80 const char **_RESTRICT_KYWD, size_t, size_t,
81 mbstate_t *_RESTRICT_KYWD);
82 static size_t _EUC_KR_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
83 const char **_RESTRICT_KYWD, size_t, size_t,
84 mbstate_t *_RESTRICT_KYWD);
85 static size_t _EUC_TW_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
86 const char **_RESTRICT_KYWD, size_t, size_t,
87 mbstate_t *_RESTRICT_KYWD);
88
89 static size_t _EUC_CN_wcsnrtombs(char *_RESTRICT_KYWD,
90 const wchar_t **_RESTRICT_KYWD, size_t, size_t,
91 mbstate_t *_RESTRICT_KYWD);
92 static size_t _EUC_JP_wcsnrtombs(char *_RESTRICT_KYWD,
93 const wchar_t **_RESTRICT_KYWD, size_t, size_t,
94 mbstate_t *_RESTRICT_KYWD);
95 static size_t _EUC_KR_wcsnrtombs(char *_RESTRICT_KYWD,
96 const wchar_t **_RESTRICT_KYWD, size_t, size_t,
97 mbstate_t *_RESTRICT_KYWD);
98 static size_t _EUC_TW_wcsnrtombs(char *_RESTRICT_KYWD,
99 const wchar_t **_RESTRICT_KYWD, size_t, size_t,
100 mbstate_t *_RESTRICT_KYWD);
101
102 static int _EUC_mbsinit(const mbstate_t *);
103
104 int
_EUC_mbsinit(const mbstate_t * ps)105 _EUC_mbsinit(const mbstate_t *ps)
106 {
107
108 return (ps == NULL || ((const _EucState *)ps)->want == 0);
109 }
110
111 /*
112 * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
113 */
114 void
_EUC_CN_init(struct lc_ctype * lct)115 _EUC_CN_init(struct lc_ctype *lct)
116 {
117 lct->lc_mbrtowc = _EUC_CN_mbrtowc;
118 lct->lc_wcrtomb = _EUC_CN_wcrtomb;
119 lct->lc_mbsnrtowcs = _EUC_CN_mbsnrtowcs;
120 lct->lc_wcsnrtombs = _EUC_CN_wcsnrtombs;
121 lct->lc_mbsinit = _EUC_mbsinit;
122
123 lct->lc_max_mblen = 4;
124 lct->lc_is_ascii = 0;
125 }
126
127 static size_t
_EUC_CN_mbrtowc(wchar_t * _RESTRICT_KYWD pwc,const char * _RESTRICT_KYWD s,size_t n,mbstate_t * _RESTRICT_KYWD ps,boolean_t zero)128 _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
129 size_t n, mbstate_t *_RESTRICT_KYWD ps, boolean_t zero)
130 {
131 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0, zero));
132 }
133
134 static size_t
_EUC_CN_mbsnrtowcs(wchar_t * _RESTRICT_KYWD dst,const char ** _RESTRICT_KYWD src,size_t nms,size_t len,mbstate_t * _RESTRICT_KYWD ps)135 _EUC_CN_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst,
136 const char **_RESTRICT_KYWD src,
137 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
138 {
139 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc));
140 }
141
142 static size_t
_EUC_CN_wcrtomb(char * _RESTRICT_KYWD s,wchar_t wc,mbstate_t * _RESTRICT_KYWD ps)143 _EUC_CN_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
144 mbstate_t *_RESTRICT_KYWD ps)
145 {
146 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
147 }
148
149 static size_t
_EUC_CN_wcsnrtombs(char * _RESTRICT_KYWD dst,const wchar_t ** _RESTRICT_KYWD src,size_t nwc,size_t len,mbstate_t * _RESTRICT_KYWD ps)150 _EUC_CN_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
151 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
152 {
153 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb));
154 }
155
156 /*
157 * EUC-KR uses only CS0 and CS1.
158 */
159 void
_EUC_KR_init(struct lc_ctype * lct)160 _EUC_KR_init(struct lc_ctype *lct)
161 {
162 lct->lc_mbrtowc = _EUC_KR_mbrtowc;
163 lct->lc_wcrtomb = _EUC_KR_wcrtomb;
164 lct->lc_mbsnrtowcs = _EUC_KR_mbsnrtowcs;
165 lct->lc_wcsnrtombs = _EUC_KR_wcsnrtombs;
166 lct->lc_mbsinit = _EUC_mbsinit;
167
168 lct->lc_max_mblen = 2;
169 lct->lc_is_ascii = 0;
170 }
171
172 static size_t
_EUC_KR_mbrtowc(wchar_t * _RESTRICT_KYWD pwc,const char * _RESTRICT_KYWD s,size_t n,mbstate_t * _RESTRICT_KYWD ps,boolean_t zero)173 _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
174 size_t n, mbstate_t *_RESTRICT_KYWD ps, boolean_t zero)
175 {
176 return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0, zero));
177 }
178
179 static size_t
_EUC_KR_mbsnrtowcs(wchar_t * _RESTRICT_KYWD dst,const char ** _RESTRICT_KYWD src,size_t nms,size_t len,mbstate_t * _RESTRICT_KYWD ps)180 _EUC_KR_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst,
181 const char **_RESTRICT_KYWD src,
182 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
183 {
184 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc));
185 }
186
187 static size_t
_EUC_KR_wcrtomb(char * _RESTRICT_KYWD s,wchar_t wc,mbstate_t * _RESTRICT_KYWD ps)188 _EUC_KR_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
189 mbstate_t *_RESTRICT_KYWD ps)
190 {
191 return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0));
192 }
193
194 static size_t
_EUC_KR_wcsnrtombs(char * _RESTRICT_KYWD dst,const wchar_t ** _RESTRICT_KYWD src,size_t nwc,size_t len,mbstate_t * _RESTRICT_KYWD ps)195 _EUC_KR_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
196 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
197 {
198 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb));
199 }
200
201 /*
202 * EUC-JP uses CS0, CS1, CS2, and CS3.
203 */
204 void
_EUC_JP_init(struct lc_ctype * lct)205 _EUC_JP_init(struct lc_ctype *lct)
206 {
207 lct->lc_mbrtowc = _EUC_JP_mbrtowc;
208 lct->lc_wcrtomb = _EUC_JP_wcrtomb;
209 lct->lc_mbsnrtowcs = _EUC_JP_mbsnrtowcs;
210 lct->lc_wcsnrtombs = _EUC_JP_wcsnrtombs;
211 lct->lc_mbsinit = _EUC_mbsinit;
212
213 lct->lc_max_mblen = 3;
214 lct->lc_is_ascii = 0;
215 }
216
217 static size_t
_EUC_JP_mbrtowc(wchar_t * _RESTRICT_KYWD pwc,const char * _RESTRICT_KYWD s,size_t n,mbstate_t * _RESTRICT_KYWD ps,boolean_t zero)218 _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
219 size_t n, mbstate_t *_RESTRICT_KYWD ps, boolean_t zero)
220 {
221 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3, zero));
222 }
223
224 static size_t
_EUC_JP_mbsnrtowcs(wchar_t * _RESTRICT_KYWD dst,const char ** _RESTRICT_KYWD src,size_t nms,size_t len,mbstate_t * _RESTRICT_KYWD ps)225 _EUC_JP_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst,
226 const char **_RESTRICT_KYWD src,
227 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
228 {
229 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc));
230 }
231
232 static size_t
_EUC_JP_wcrtomb(char * _RESTRICT_KYWD s,wchar_t wc,mbstate_t * _RESTRICT_KYWD ps)233 _EUC_JP_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
234 mbstate_t *_RESTRICT_KYWD ps)
235 {
236 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3));
237 }
238
239 static size_t
_EUC_JP_wcsnrtombs(char * _RESTRICT_KYWD dst,const wchar_t ** _RESTRICT_KYWD src,size_t nwc,size_t len,mbstate_t * _RESTRICT_KYWD ps)240 _EUC_JP_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
241 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
242 {
243 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb));
244 }
245
246 /*
247 * EUC-TW uses CS0, CS1, and CS2.
248 */
249 void
_EUC_TW_init(struct lc_ctype * lct)250 _EUC_TW_init(struct lc_ctype *lct)
251 {
252 lct->lc_mbrtowc = _EUC_TW_mbrtowc;
253 lct->lc_wcrtomb = _EUC_TW_wcrtomb;
254 lct->lc_mbsnrtowcs = _EUC_TW_mbsnrtowcs;
255 lct->lc_wcsnrtombs = _EUC_TW_wcsnrtombs;
256 lct->lc_mbsinit = _EUC_mbsinit;
257
258 lct->lc_max_mblen = 4;
259 lct->lc_is_ascii = 0;
260 }
261
262 static size_t
_EUC_TW_mbrtowc(wchar_t * _RESTRICT_KYWD pwc,const char * _RESTRICT_KYWD s,size_t n,mbstate_t * _RESTRICT_KYWD ps,boolean_t zero)263 _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
264 size_t n, mbstate_t *_RESTRICT_KYWD ps, boolean_t zero)
265 {
266 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0, zero));
267 }
268
269 static size_t
_EUC_TW_mbsnrtowcs(wchar_t * _RESTRICT_KYWD dst,const char ** _RESTRICT_KYWD src,size_t nms,size_t len,mbstate_t * _RESTRICT_KYWD ps)270 _EUC_TW_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst,
271 const char **_RESTRICT_KYWD src,
272 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
273 {
274 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc));
275 }
276
277 static size_t
_EUC_TW_wcrtomb(char * _RESTRICT_KYWD s,wchar_t wc,mbstate_t * _RESTRICT_KYWD ps)278 _EUC_TW_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
279 mbstate_t *_RESTRICT_KYWD ps)
280 {
281 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
282 }
283
284 static size_t
_EUC_TW_wcsnrtombs(char * _RESTRICT_KYWD dst,const wchar_t ** _RESTRICT_KYWD src,size_t nwc,size_t len,mbstate_t * _RESTRICT_KYWD ps)285 _EUC_TW_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
286 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
287 {
288 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb));
289 }
290
291 /*
292 * Common EUC code.
293 */
294
295 static size_t
_EUC_mbrtowc_impl(wchar_t * _RESTRICT_KYWD pwc,const char * _RESTRICT_KYWD s,size_t n,mbstate_t * _RESTRICT_KYWD ps,uint8_t cs2,uint8_t cs2width,uint8_t cs3,uint8_t cs3width,boolean_t zero)296 _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
297 size_t n, mbstate_t *_RESTRICT_KYWD ps,
298 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width,
299 boolean_t zero)
300 {
301 _EucState *es;
302 int i, want;
303 wchar_t wc = 0;
304 unsigned char ch, chs;
305
306 es = (_EucState *)ps;
307
308 if (es->want < 0 || es->want > MB_CUR_MAX) {
309 errno = EINVAL;
310 return ((size_t)-1);
311 }
312
313 if (s == NULL) {
314 s = "";
315 n = 1;
316 pwc = NULL;
317 }
318
319 if (n == 0)
320 /* Incomplete multibyte sequence */
321 return ((size_t)-2);
322
323 if (es->want == 0) {
324 /* Fast path for plain ASCII (CS0) */
325 if (((ch = (unsigned char)*s) & 0x80) == 0) {
326 if (pwc != NULL)
327 *pwc = ch;
328 if (zero || ch != '\0') {
329 return (1);
330 } else {
331 return (0);
332 }
333 }
334
335 if (ch >= 0xa1) {
336 /* CS1 */
337 want = 2;
338 } else if (ch == cs2) {
339 want = cs2width;
340 } else if (ch == cs3) {
341 want = cs3width;
342 } else {
343 errno = EILSEQ;
344 return ((size_t)-1);
345 }
346
347
348 es->want = want;
349 es->ch = 0;
350 } else {
351 want = es->want;
352 wc = es->ch;
353 }
354
355 for (i = 0; i < MIN(want, n); i++) {
356 wc <<= 8;
357 chs = *s;
358 wc |= chs;
359 s++;
360 }
361 if (i < want) {
362 /* Incomplete multibyte sequence */
363 es->want = want - i;
364 es->ch = wc;
365 return ((size_t)-2);
366 }
367 if (pwc != NULL)
368 *pwc = wc;
369 es->want = 0;
370 if (zero || wc != L'\0') {
371 return (want);
372 } else {
373 return (0);
374 }
375 }
376
377 static size_t
_EUC_wcrtomb_impl(char * _RESTRICT_KYWD s,wchar_t wc,mbstate_t * _RESTRICT_KYWD ps,uint8_t cs2,uint8_t cs2width,uint8_t cs3,uint8_t cs3width)378 _EUC_wcrtomb_impl(char *_RESTRICT_KYWD s, wchar_t wc,
379 mbstate_t *_RESTRICT_KYWD ps,
380 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
381 {
382 _EucState *es;
383 int i, len;
384 wchar_t nm;
385
386 es = (_EucState *)ps;
387
388 if (es->want != 0) {
389 errno = EINVAL;
390 return ((size_t)-1);
391 }
392
393 if (s == NULL)
394 /* Reset to initial shift state (no-op) */
395 return (1);
396
397 if ((wc & ~0x7f) == 0) {
398 /* Fast path for plain ASCII (CS0) */
399 *s = (char)wc;
400 return (1);
401 }
402
403 /* Determine the "length" */
404 if ((unsigned)wc > 0xffffff) {
405 len = 4;
406 } else if ((unsigned)wc > 0xffff) {
407 len = 3;
408 } else if ((unsigned)wc > 0xff) {
409 len = 2;
410 } else {
411 len = 1;
412 }
413
414 if (len > MB_CUR_MAX) {
415 errno = EILSEQ;
416 return ((size_t)-1);
417 }
418
419 /* This first check excludes CS1, which is implicitly valid. */
420 if ((wc < 0xa100) || (wc > 0xffff)) {
421 /* Check for valid CS2 or CS3 */
422 nm = (wc >> ((len - 1) * 8));
423 if (nm == cs2) {
424 if (len != cs2width) {
425 errno = EILSEQ;
426 return ((size_t)-1);
427 }
428 } else if (nm == cs3) {
429 if (len != cs3width) {
430 errno = EILSEQ;
431 return ((size_t)-1);
432 }
433 } else {
434 errno = EILSEQ;
435 return ((size_t)-1);
436 }
437 }
438
439 /* Stash the bytes, least significant last */
440 for (i = len - 1; i >= 0; i--) {
441 s[i] = (wc & 0xff);
442 wc >>= 8;
443 }
444 return (len);
445 }
446