1 /* 2 * Copyright 2013 Garrett D'Amore <garrett@damore.org> 3 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 4 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. 5 * Copyright (c) 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Paul Borman at Krystal Technologies. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include "lint.h" 37 #include <errno.h> 38 #include <limits.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <wchar.h> 42 #include <sys/types.h> 43 #include <sys/euc.h> 44 #include "mblocal.h" 45 #include "lctype.h" 46 47 static size_t _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD, 48 const char *_RESTRICT_KYWD, 49 size_t, mbstate_t *_RESTRICT_KYWD, uint8_t, uint8_t, uint8_t, uint8_t, 50 boolean_t); 51 static size_t _EUC_wcrtomb_impl(char *_RESTRICT_KYWD, wchar_t, 52 mbstate_t *_RESTRICT_KYWD, uint8_t, uint8_t, uint8_t, uint8_t); 53 54 static size_t _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD, 55 const char *_RESTRICT_KYWD, 56 size_t, mbstate_t *_RESTRICT_KYWD, boolean_t); 57 static size_t _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD, 58 const char *_RESTRICT_KYWD, 59 size_t, mbstate_t *_RESTRICT_KYWD, boolean_t); 60 static size_t _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD, 61 const char *_RESTRICT_KYWD, 62 size_t, mbstate_t *_RESTRICT_KYWD, boolean_t); 63 static size_t _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD, 64 const char *_RESTRICT_KYWD, 65 size_t, mbstate_t *_RESTRICT_KYWD, boolean_t); 66 67 static size_t _EUC_CN_wcrtomb(char *_RESTRICT_KYWD, wchar_t, 68 mbstate_t *_RESTRICT_KYWD); 69 static size_t _EUC_JP_wcrtomb(char *_RESTRICT_KYWD, wchar_t, 70 mbstate_t *_RESTRICT_KYWD); 71 static size_t _EUC_KR_wcrtomb(char *_RESTRICT_KYWD, wchar_t, 72 mbstate_t *_RESTRICT_KYWD); 73 static size_t _EUC_TW_wcrtomb(char *_RESTRICT_KYWD, wchar_t, 74 mbstate_t *_RESTRICT_KYWD); 75 76 static size_t _EUC_CN_mbsnrtowcs(wchar_t *_RESTRICT_KYWD, 77 const char **_RESTRICT_KYWD, size_t, size_t, 78 mbstate_t *_RESTRICT_KYWD); 79 static size_t _EUC_JP_mbsnrtowcs(wchar_t *_RESTRICT_KYWD, 80 const char **_RESTRICT_KYWD, size_t, size_t, 81 mbstate_t *_RESTRICT_KYWD); 82 static size_t _EUC_KR_mbsnrtowcs(wchar_t *_RESTRICT_KYWD, 83 const char **_RESTRICT_KYWD, size_t, size_t, 84 mbstate_t *_RESTRICT_KYWD); 85 static size_t _EUC_TW_mbsnrtowcs(wchar_t *_RESTRICT_KYWD, 86 const char **_RESTRICT_KYWD, size_t, size_t, 87 mbstate_t *_RESTRICT_KYWD); 88 89 static size_t _EUC_CN_wcsnrtombs(char *_RESTRICT_KYWD, 90 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 91 mbstate_t *_RESTRICT_KYWD); 92 static size_t _EUC_JP_wcsnrtombs(char *_RESTRICT_KYWD, 93 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 94 mbstate_t *_RESTRICT_KYWD); 95 static size_t _EUC_KR_wcsnrtombs(char *_RESTRICT_KYWD, 96 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 97 mbstate_t *_RESTRICT_KYWD); 98 static size_t _EUC_TW_wcsnrtombs(char *_RESTRICT_KYWD, 99 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 100 mbstate_t *_RESTRICT_KYWD); 101 102 static int _EUC_mbsinit(const mbstate_t *); 103 104 int 105 _EUC_mbsinit(const mbstate_t *ps) 106 { 107 108 return (ps == NULL || ((const _EucState *)ps)->want == 0); 109 } 110 111 /* 112 * EUC-CN uses CS0, CS1 and CS2 (4 bytes). 113 */ 114 void 115 _EUC_CN_init(struct lc_ctype *lct) 116 { 117 lct->lc_mbrtowc = _EUC_CN_mbrtowc; 118 lct->lc_wcrtomb = _EUC_CN_wcrtomb; 119 lct->lc_mbsnrtowcs = _EUC_CN_mbsnrtowcs; 120 lct->lc_wcsnrtombs = _EUC_CN_wcsnrtombs; 121 lct->lc_mbsinit = _EUC_mbsinit; 122 123 lct->lc_max_mblen = 4; 124 lct->lc_is_ascii = 0; 125 } 126 127 static size_t 128 _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 129 size_t n, mbstate_t *_RESTRICT_KYWD ps, boolean_t zero) 130 { 131 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0, zero)); 132 } 133 134 static size_t 135 _EUC_CN_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, 136 const char **_RESTRICT_KYWD src, 137 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps) 138 { 139 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc)); 140 } 141 142 static size_t 143 _EUC_CN_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, 144 mbstate_t *_RESTRICT_KYWD ps) 145 { 146 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); 147 } 148 149 static size_t 150 _EUC_CN_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, 151 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps) 152 { 153 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb)); 154 } 155 156 /* 157 * EUC-KR uses only CS0 and CS1. 158 */ 159 void 160 _EUC_KR_init(struct lc_ctype *lct) 161 { 162 lct->lc_mbrtowc = _EUC_KR_mbrtowc; 163 lct->lc_wcrtomb = _EUC_KR_wcrtomb; 164 lct->lc_mbsnrtowcs = _EUC_KR_mbsnrtowcs; 165 lct->lc_wcsnrtombs = _EUC_KR_wcsnrtombs; 166 lct->lc_mbsinit = _EUC_mbsinit; 167 168 lct->lc_max_mblen = 2; 169 lct->lc_is_ascii = 0; 170 } 171 172 static size_t 173 _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 174 size_t n, mbstate_t *_RESTRICT_KYWD ps, boolean_t zero) 175 { 176 return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0, zero)); 177 } 178 179 static size_t 180 _EUC_KR_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, 181 const char **_RESTRICT_KYWD src, 182 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps) 183 { 184 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc)); 185 } 186 187 static size_t 188 _EUC_KR_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, 189 mbstate_t *_RESTRICT_KYWD ps) 190 { 191 return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0)); 192 } 193 194 static size_t 195 _EUC_KR_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, 196 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps) 197 { 198 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb)); 199 } 200 201 /* 202 * EUC-JP uses CS0, CS1, CS2, and CS3. 203 */ 204 void 205 _EUC_JP_init(struct lc_ctype *lct) 206 { 207 lct->lc_mbrtowc = _EUC_JP_mbrtowc; 208 lct->lc_wcrtomb = _EUC_JP_wcrtomb; 209 lct->lc_mbsnrtowcs = _EUC_JP_mbsnrtowcs; 210 lct->lc_wcsnrtombs = _EUC_JP_wcsnrtombs; 211 lct->lc_mbsinit = _EUC_mbsinit; 212 213 lct->lc_max_mblen = 3; 214 lct->lc_is_ascii = 0; 215 } 216 217 static size_t 218 _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 219 size_t n, mbstate_t *_RESTRICT_KYWD ps, boolean_t zero) 220 { 221 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3, zero)); 222 } 223 224 static size_t 225 _EUC_JP_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, 226 const char **_RESTRICT_KYWD src, 227 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps) 228 { 229 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc)); 230 } 231 232 static size_t 233 _EUC_JP_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, 234 mbstate_t *_RESTRICT_KYWD ps) 235 { 236 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3)); 237 } 238 239 static size_t 240 _EUC_JP_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, 241 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps) 242 { 243 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb)); 244 } 245 246 /* 247 * EUC-TW uses CS0, CS1, and CS2. 248 */ 249 void 250 _EUC_TW_init(struct lc_ctype *lct) 251 { 252 lct->lc_mbrtowc = _EUC_TW_mbrtowc; 253 lct->lc_wcrtomb = _EUC_TW_wcrtomb; 254 lct->lc_mbsnrtowcs = _EUC_TW_mbsnrtowcs; 255 lct->lc_wcsnrtombs = _EUC_TW_wcsnrtombs; 256 lct->lc_mbsinit = _EUC_mbsinit; 257 258 lct->lc_max_mblen = 4; 259 lct->lc_is_ascii = 0; 260 } 261 262 static size_t 263 _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 264 size_t n, mbstate_t *_RESTRICT_KYWD ps, boolean_t zero) 265 { 266 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0, zero)); 267 } 268 269 static size_t 270 _EUC_TW_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, 271 const char **_RESTRICT_KYWD src, 272 size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps) 273 { 274 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc)); 275 } 276 277 static size_t 278 _EUC_TW_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, 279 mbstate_t *_RESTRICT_KYWD ps) 280 { 281 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); 282 } 283 284 static size_t 285 _EUC_TW_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, 286 size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps) 287 { 288 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb)); 289 } 290 291 /* 292 * Common EUC code. 293 */ 294 295 static size_t 296 _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 297 size_t n, mbstate_t *_RESTRICT_KYWD ps, 298 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width, 299 boolean_t zero) 300 { 301 _EucState *es; 302 int i, want; 303 wchar_t wc = 0; 304 unsigned char ch, chs; 305 306 es = (_EucState *)ps; 307 308 if (es->want < 0 || es->want > MB_CUR_MAX) { 309 errno = EINVAL; 310 return ((size_t)-1); 311 } 312 313 if (s == NULL) { 314 s = ""; 315 n = 1; 316 pwc = NULL; 317 } 318 319 if (n == 0) 320 /* Incomplete multibyte sequence */ 321 return ((size_t)-2); 322 323 if (es->want == 0) { 324 /* Fast path for plain ASCII (CS0) */ 325 if (((ch = (unsigned char)*s) & 0x80) == 0) { 326 if (pwc != NULL) 327 *pwc = ch; 328 if (zero || ch != '\0') { 329 return (1); 330 } else { 331 return (0); 332 } 333 } 334 335 if (ch >= 0xa1) { 336 /* CS1 */ 337 want = 2; 338 } else if (ch == cs2) { 339 want = cs2width; 340 } else if (ch == cs3) { 341 want = cs3width; 342 } else { 343 errno = EILSEQ; 344 return ((size_t)-1); 345 } 346 347 348 es->want = want; 349 es->ch = 0; 350 } else { 351 want = es->want; 352 wc = es->ch; 353 } 354 355 for (i = 0; i < MIN(want, n); i++) { 356 wc <<= 8; 357 chs = *s; 358 wc |= chs; 359 s++; 360 } 361 if (i < want) { 362 /* Incomplete multibyte sequence */ 363 es->want = want - i; 364 es->ch = wc; 365 return ((size_t)-2); 366 } 367 if (pwc != NULL) 368 *pwc = wc; 369 es->want = 0; 370 if (zero || wc != L'\0') { 371 return (want); 372 } else { 373 return (0); 374 } 375 } 376 377 static size_t 378 _EUC_wcrtomb_impl(char *_RESTRICT_KYWD s, wchar_t wc, 379 mbstate_t *_RESTRICT_KYWD ps, 380 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) 381 { 382 _EucState *es; 383 int i, len; 384 wchar_t nm; 385 386 es = (_EucState *)ps; 387 388 if (es->want != 0) { 389 errno = EINVAL; 390 return ((size_t)-1); 391 } 392 393 if (s == NULL) 394 /* Reset to initial shift state (no-op) */ 395 return (1); 396 397 if ((wc & ~0x7f) == 0) { 398 /* Fast path for plain ASCII (CS0) */ 399 *s = (char)wc; 400 return (1); 401 } 402 403 /* Determine the "length" */ 404 if ((unsigned)wc > 0xffffff) { 405 len = 4; 406 } else if ((unsigned)wc > 0xffff) { 407 len = 3; 408 } else if ((unsigned)wc > 0xff) { 409 len = 2; 410 } else { 411 len = 1; 412 } 413 414 if (len > MB_CUR_MAX) { 415 errno = EILSEQ; 416 return ((size_t)-1); 417 } 418 419 /* This first check excludes CS1, which is implicitly valid. */ 420 if ((wc < 0xa100) || (wc > 0xffff)) { 421 /* Check for valid CS2 or CS3 */ 422 nm = (wc >> ((len - 1) * 8)); 423 if (nm == cs2) { 424 if (len != cs2width) { 425 errno = EILSEQ; 426 return ((size_t)-1); 427 } 428 } else if (nm == cs3) { 429 if (len != cs3width) { 430 errno = EILSEQ; 431 return ((size_t)-1); 432 } 433 } else { 434 errno = EILSEQ; 435 return ((size_t)-1); 436 } 437 } 438 439 /* Stash the bytes, least significant last */ 440 for (i = len - 1; i >= 0; i--) { 441 s[i] = (wc & 0xff); 442 wc >>= 8; 443 } 444 return (len); 445 } 446