1 /* 2 * Copyright 2013 Garrett D'Amore <garrett@damore.org> 3 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 4 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. 5 * Copyright (c) 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Paul Borman at Krystal Technologies. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include "lint.h" 37 #include <errno.h> 38 #include <limits.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <wchar.h> 42 #include <sys/types.h> 43 #include <sys/euc.h> 44 #include "mblocal.h" 45 #include "lctype.h" 46 47 static size_t _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD, 48 const char *_RESTRICT_KYWD, 49 size_t, mbstate_t *_RESTRICT_KYWD, uint8_t, uint8_t, uint8_t, uint8_t); 50 static size_t _EUC_wcrtomb_impl(char *_RESTRICT_KYWD, wchar_t, 51 mbstate_t *_RESTRICT_KYWD, uint8_t, uint8_t, uint8_t, uint8_t); 52 53 static size_t _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD, 54 const char *_RESTRICT_KYWD, 55 size_t, mbstate_t *_RESTRICT_KYWD); 56 static size_t _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD, 57 const char *_RESTRICT_KYWD, 58 size_t, mbstate_t *_RESTRICT_KYWD); 59 static size_t _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD, 60 const char *_RESTRICT_KYWD, 61 size_t, mbstate_t *_RESTRICT_KYWD); 62 static size_t _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD, 63 const char *_RESTRICT_KYWD, 64 size_t, mbstate_t *_RESTRICT_KYWD); 65 66 static size_t _EUC_CN_wcrtomb(char *_RESTRICT_KYWD, wchar_t, 67 mbstate_t *_RESTRICT_KYWD); 68 static size_t _EUC_JP_wcrtomb(char *_RESTRICT_KYWD, wchar_t, 69 mbstate_t *_RESTRICT_KYWD); 70 static size_t _EUC_KR_wcrtomb(char *_RESTRICT_KYWD, wchar_t, 71 mbstate_t *_RESTRICT_KYWD); 72 static size_t _EUC_TW_wcrtomb(char *_RESTRICT_KYWD, wchar_t, 73 mbstate_t *_RESTRICT_KYWD); 74 75 static size_t _EUC_CN_mbsnrtowcs(wchar_t *_RESTRICT_KYWD, 76 const char **_RESTRICT_KYWD, size_t, size_t, 77 mbstate_t *_RESTRICT_KYWD); 78 static size_t _EUC_JP_mbsnrtowcs(wchar_t *_RESTRICT_KYWD, 79 const char **_RESTRICT_KYWD, size_t, size_t, 80 mbstate_t *_RESTRICT_KYWD); 81 static size_t _EUC_KR_mbsnrtowcs(wchar_t *_RESTRICT_KYWD, 82 const char **_RESTRICT_KYWD, size_t, size_t, 83 mbstate_t *_RESTRICT_KYWD); 84 static size_t _EUC_TW_mbsnrtowcs(wchar_t *_RESTRICT_KYWD, 85 const char **_RESTRICT_KYWD, size_t, size_t, 86 mbstate_t *_RESTRICT_KYWD); 87 88 static size_t _EUC_CN_wcsnrtombs(char *_RESTRICT_KYWD, 89 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 90 mbstate_t *_RESTRICT_KYWD); 91 static size_t _EUC_JP_wcsnrtombs(char *_RESTRICT_KYWD, 92 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 93 mbstate_t *_RESTRICT_KYWD); 94 static size_t _EUC_KR_wcsnrtombs(char *_RESTRICT_KYWD, 95 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 96 mbstate_t *_RESTRICT_KYWD); 97 static size_t _EUC_TW_wcsnrtombs(char *_RESTRICT_KYWD, 98 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 99 mbstate_t *_RESTRICT_KYWD); 100 101 static int _EUC_mbsinit(const mbstate_t *); 102 103 typedef struct { 104 wchar_t ch; 105 int set; 106 int want; 107 } _EucState; 108 109 int 110 _EUC_mbsinit(const mbstate_t *ps) 111 { 112 113 return (ps == NULL || ((const _EucState *)ps)->want == 0); 114 } 115 116 /* 117 * EUC-CN uses CS0, CS1 and CS2 (4 bytes). 118 */ 119 void 120 _EUC_CN_init(struct lc_ctype *lct) 121 { 122 lct->lc_mbrtowc = _EUC_CN_mbrtowc; 123 lct->lc_wcrtomb = _EUC_CN_wcrtomb; 124 lct->lc_mbsnrtowcs = _EUC_CN_mbsnrtowcs; 125 lct->lc_wcsnrtombs = _EUC_CN_wcsnrtombs; 126 lct->lc_mbsinit = _EUC_mbsinit; 127 128 lct->lc_max_mblen = 4; 129 lct->lc_is_ascii = 0; 130 } 131 132 static size_t 133 _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 134 size_t n, mbstate_t *_RESTRICT_KYWD ps) 135 { 136 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0)); 137 } 138 139 static size_t 140 _EUC_CN_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, 141 const char **_RESTRICT_KYWD src, 142 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps) 143 { 144 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc)); 145 } 146 147 static size_t 148 _EUC_CN_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, 149 mbstate_t *_RESTRICT_KYWD ps) 150 { 151 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); 152 } 153 154 static size_t 155 _EUC_CN_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, 156 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps) 157 { 158 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb)); 159 } 160 161 /* 162 * EUC-KR uses only CS0 and CS1. 163 */ 164 void 165 _EUC_KR_init(struct lc_ctype *lct) 166 { 167 lct->lc_mbrtowc = _EUC_KR_mbrtowc; 168 lct->lc_wcrtomb = _EUC_KR_wcrtomb; 169 lct->lc_mbsnrtowcs = _EUC_KR_mbsnrtowcs; 170 lct->lc_wcsnrtombs = _EUC_KR_wcsnrtombs; 171 lct->lc_mbsinit = _EUC_mbsinit; 172 173 lct->lc_max_mblen = 2; 174 lct->lc_is_ascii = 0; 175 } 176 177 static size_t 178 _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 179 size_t n, mbstate_t *_RESTRICT_KYWD ps) 180 { 181 return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0)); 182 } 183 184 static size_t 185 _EUC_KR_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, 186 const char **_RESTRICT_KYWD src, 187 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps) 188 { 189 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc)); 190 } 191 192 static size_t 193 _EUC_KR_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, 194 mbstate_t *_RESTRICT_KYWD ps) 195 { 196 return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0)); 197 } 198 199 static size_t 200 _EUC_KR_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, 201 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps) 202 { 203 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb)); 204 } 205 206 /* 207 * EUC-JP uses CS0, CS1, CS2, and CS3. 208 */ 209 void 210 _EUC_JP_init(struct lc_ctype *lct) 211 { 212 lct->lc_mbrtowc = _EUC_JP_mbrtowc; 213 lct->lc_wcrtomb = _EUC_JP_wcrtomb; 214 lct->lc_mbsnrtowcs = _EUC_JP_mbsnrtowcs; 215 lct->lc_wcsnrtombs = _EUC_JP_wcsnrtombs; 216 lct->lc_mbsinit = _EUC_mbsinit; 217 218 lct->lc_max_mblen = 3; 219 lct->lc_is_ascii = 0; 220 } 221 222 static size_t 223 _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 224 size_t n, mbstate_t *_RESTRICT_KYWD ps) 225 { 226 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3)); 227 } 228 229 static size_t 230 _EUC_JP_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, 231 const char **_RESTRICT_KYWD src, 232 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps) 233 { 234 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc)); 235 } 236 237 static size_t 238 _EUC_JP_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, 239 mbstate_t *_RESTRICT_KYWD ps) 240 { 241 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3)); 242 } 243 244 static size_t 245 _EUC_JP_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, 246 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps) 247 { 248 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb)); 249 } 250 251 /* 252 * EUC-TW uses CS0, CS1, and CS2. 253 */ 254 void 255 _EUC_TW_init(struct lc_ctype *lct) 256 { 257 lct->lc_mbrtowc = _EUC_TW_mbrtowc; 258 lct->lc_wcrtomb = _EUC_TW_wcrtomb; 259 lct->lc_mbsnrtowcs = _EUC_TW_mbsnrtowcs; 260 lct->lc_wcsnrtombs = _EUC_TW_wcsnrtombs; 261 lct->lc_mbsinit = _EUC_mbsinit; 262 263 lct->lc_max_mblen = 4; 264 lct->lc_is_ascii = 0; 265 } 266 267 static size_t 268 _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 269 size_t n, mbstate_t *_RESTRICT_KYWD ps) 270 { 271 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0)); 272 } 273 274 static size_t 275 _EUC_TW_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, 276 const char **_RESTRICT_KYWD src, 277 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps) 278 { 279 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc)); 280 } 281 282 static size_t 283 _EUC_TW_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, 284 mbstate_t *_RESTRICT_KYWD ps) 285 { 286 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); 287 } 288 289 static size_t 290 _EUC_TW_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, 291 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps) 292 { 293 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb)); 294 } 295 296 /* 297 * Common EUC code. 298 */ 299 300 static size_t 301 _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 302 size_t n, mbstate_t *_RESTRICT_KYWD ps, 303 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) 304 { 305 _EucState *es; 306 int i, want; 307 wchar_t wc = 0; 308 unsigned char ch, chs; 309 310 es = (_EucState *)ps; 311 312 if (es->want < 0 || es->want > MB_CUR_MAX) { 313 errno = EINVAL; 314 return ((size_t)-1); 315 } 316 317 if (s == NULL) { 318 s = ""; 319 n = 1; 320 pwc = NULL; 321 } 322 323 if (n == 0) 324 /* Incomplete multibyte sequence */ 325 return ((size_t)-2); 326 327 if (es->want == 0) { 328 /* Fast path for plain ASCII (CS0) */ 329 if (((ch = (unsigned char)*s) & 0x80) == 0) { 330 if (pwc != NULL) 331 *pwc = ch; 332 return (ch != '\0' ? 1 : 0); 333 } 334 335 if (ch >= 0xa1) { 336 /* CS1 */ 337 want = 2; 338 } else if (ch == cs2) { 339 want = cs2width; 340 } else if (ch == cs3) { 341 want = cs3width; 342 } else { 343 errno = EILSEQ; 344 return ((size_t)-1); 345 } 346 347 348 es->want = want; 349 es->ch = 0; 350 } else { 351 want = es->want; 352 wc = es->ch; 353 } 354 355 for (i = 0; i < MIN(want, n); i++) { 356 wc <<= 8; 357 chs = *s; 358 wc |= chs; 359 s++; 360 } 361 if (i < want) { 362 /* Incomplete multibyte sequence */ 363 es->want = want - i; 364 es->ch = wc; 365 return ((size_t)-2); 366 } 367 if (pwc != NULL) 368 *pwc = wc; 369 es->want = 0; 370 return (wc == L'\0' ? 0 : want); 371 } 372 373 static size_t 374 _EUC_wcrtomb_impl(char *_RESTRICT_KYWD s, wchar_t wc, 375 mbstate_t *_RESTRICT_KYWD ps, 376 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) 377 { 378 _EucState *es; 379 int i, len; 380 wchar_t nm; 381 382 es = (_EucState *)ps; 383 384 if (es->want != 0) { 385 errno = EINVAL; 386 return ((size_t)-1); 387 } 388 389 if (s == NULL) 390 /* Reset to initial shift state (no-op) */ 391 return (1); 392 393 if ((wc & ~0x7f) == 0) { 394 /* Fast path for plain ASCII (CS0) */ 395 *s = (char)wc; 396 return (1); 397 } 398 399 /* Determine the "length" */ 400 if ((unsigned)wc > 0xffffff) { 401 len = 4; 402 } else if ((unsigned)wc > 0xffff) { 403 len = 3; 404 } else if ((unsigned)wc > 0xff) { 405 len = 2; 406 } else { 407 len = 1; 408 } 409 410 if (len > MB_CUR_MAX) { 411 errno = EILSEQ; 412 return ((size_t)-1); 413 } 414 415 /* This first check excludes CS1, which is implicitly valid. */ 416 if ((wc < 0xa100) || (wc > 0xffff)) { 417 /* Check for valid CS2 or CS3 */ 418 nm = (wc >> ((len - 1) * 8)); 419 if (nm == cs2) { 420 if (len != cs2width) { 421 errno = EILSEQ; 422 return ((size_t)-1); 423 } 424 } else if (nm == cs3) { 425 if (len != cs3width) { 426 errno = EILSEQ; 427 return ((size_t)-1); 428 } 429 } else { 430 errno = EILSEQ; 431 return ((size_t)-1); 432 } 433 } 434 435 /* Stash the bytes, least significant last */ 436 for (i = len - 1; i >= 0; i--) { 437 s[i] = (wc & 0xff); 438 wc >>= 8; 439 } 440 return (len); 441 } 442