1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright 2013 Garrett D'Amore <garrett@damore.org> 5 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 6 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. 7 * Copyright (c) 1993 8 * The Regents of the University of California. All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * Paul Borman at Krystal Technologies. 12 * 13 * Copyright (c) 2011 The FreeBSD Foundation 14 * All rights reserved. 15 * Portions of this software were developed by David Chisnall 16 * under sponsorship from the FreeBSD Foundation. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 3. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 */ 42 43 #if defined(LIBC_SCCS) && !defined(lint) 44 static char sccsid[] = "@(#)euc.c 8.1 (Berkeley) 6/4/93"; 45 #endif /* LIBC_SCCS and not lint */ 46 #include <sys/param.h> 47 __FBSDID("$FreeBSD$"); 48 49 #include <errno.h> 50 #include <limits.h> 51 #include <runetype.h> 52 #include <stdlib.h> 53 #include <string.h> 54 #include <wchar.h> 55 #include "mblocal.h" 56 57 extern int __mb_sb_limit; 58 59 static size_t _EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict, 60 size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t); 61 static size_t _EUC_wcrtomb_impl(char * __restrict, wchar_t, 62 mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t); 63 64 static size_t _EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict, 65 size_t, mbstate_t * __restrict); 66 static size_t _EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict, 67 size_t, mbstate_t * __restrict); 68 static size_t _EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict, 69 size_t, mbstate_t * __restrict); 70 static size_t _EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict, 71 size_t, mbstate_t * __restrict); 72 73 static size_t _EUC_CN_wcrtomb(char * __restrict, wchar_t, 74 mbstate_t * __restrict); 75 static size_t _EUC_JP_wcrtomb(char * __restrict, wchar_t, 76 mbstate_t * __restrict); 77 static size_t _EUC_KR_wcrtomb(char * __restrict, wchar_t, 78 mbstate_t * __restrict); 79 static size_t _EUC_TW_wcrtomb(char * __restrict, wchar_t, 80 mbstate_t * __restrict); 81 82 static size_t _EUC_CN_mbsnrtowcs(wchar_t * __restrict, 83 const char ** __restrict, size_t, size_t, 84 mbstate_t * __restrict); 85 static size_t _EUC_JP_mbsnrtowcs(wchar_t * __restrict, 86 const char ** __restrict, size_t, size_t, 87 mbstate_t * __restrict); 88 static size_t _EUC_KR_mbsnrtowcs(wchar_t * __restrict, 89 const char ** __restrict, size_t, size_t, 90 mbstate_t * __restrict); 91 static size_t _EUC_TW_mbsnrtowcs(wchar_t * __restrict, 92 const char ** __restrict, size_t, size_t, 93 mbstate_t * __restrict); 94 95 static size_t _EUC_CN_wcsnrtombs(char * __restrict, 96 const wchar_t ** __restrict, size_t, size_t, 97 mbstate_t * __restrict); 98 static size_t _EUC_JP_wcsnrtombs(char * __restrict, 99 const wchar_t ** __restrict, size_t, size_t, 100 mbstate_t * __restrict); 101 static size_t _EUC_KR_wcsnrtombs(char * __restrict, 102 const wchar_t ** __restrict, size_t, size_t, 103 mbstate_t * __restrict); 104 static size_t _EUC_TW_wcsnrtombs(char * __restrict, 105 const wchar_t ** __restrict, size_t, size_t, 106 mbstate_t * __restrict); 107 108 static int _EUC_mbsinit(const mbstate_t *); 109 110 typedef struct { 111 wchar_t ch; 112 int set; 113 int want; 114 } _EucState; 115 116 static int 117 _EUC_mbsinit(const mbstate_t *ps) 118 { 119 120 return (ps == NULL || ((const _EucState *)ps)->want == 0); 121 } 122 123 /* 124 * EUC-CN uses CS0, CS1 and CS2 (4 bytes). 125 */ 126 int 127 _EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl) 128 { 129 l->__mbrtowc = _EUC_CN_mbrtowc; 130 l->__wcrtomb = _EUC_CN_wcrtomb; 131 l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs; 132 l->__wcsnrtombs = _EUC_CN_wcsnrtombs; 133 l->__mbsinit = _EUC_mbsinit; 134 135 l->runes = rl; 136 l->__mb_cur_max = 4; 137 l->__mb_sb_limit = 128; 138 return (0); 139 } 140 141 static size_t 142 _EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, 143 size_t n, mbstate_t * __restrict ps) 144 { 145 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0)); 146 } 147 148 static size_t 149 _EUC_CN_mbsnrtowcs(wchar_t * __restrict dst, 150 const char ** __restrict src, 151 size_t nms, size_t len, mbstate_t * __restrict ps) 152 { 153 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc)); 154 } 155 156 static size_t 157 _EUC_CN_wcrtomb(char * __restrict s, wchar_t wc, 158 mbstate_t * __restrict ps) 159 { 160 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); 161 } 162 163 static size_t 164 _EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, 165 size_t nwc, size_t len, mbstate_t * __restrict ps) 166 { 167 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb)); 168 } 169 170 /* 171 * EUC-KR uses only CS0 and CS1. 172 */ 173 int 174 _EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl) 175 { 176 l->__mbrtowc = _EUC_KR_mbrtowc; 177 l->__wcrtomb = _EUC_KR_wcrtomb; 178 l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs; 179 l->__wcsnrtombs = _EUC_KR_wcsnrtombs; 180 l->__mbsinit = _EUC_mbsinit; 181 182 l->runes = rl; 183 l->__mb_cur_max = 2; 184 l->__mb_sb_limit = 128; 185 return (0); 186 } 187 188 static size_t 189 _EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, 190 size_t n, mbstate_t * __restrict ps) 191 { 192 return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0)); 193 } 194 195 static size_t 196 _EUC_KR_mbsnrtowcs(wchar_t * __restrict dst, 197 const char ** __restrict src, 198 size_t nms, size_t len, mbstate_t * __restrict ps) 199 { 200 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc)); 201 } 202 203 static size_t 204 _EUC_KR_wcrtomb(char * __restrict s, wchar_t wc, 205 mbstate_t * __restrict ps) 206 { 207 return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0)); 208 } 209 210 static size_t 211 _EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, 212 size_t nwc, size_t len, mbstate_t * __restrict ps) 213 { 214 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb)); 215 } 216 217 /* 218 * EUC-JP uses CS0, CS1, CS2, and CS3. 219 */ 220 int 221 _EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl) 222 { 223 l->__mbrtowc = _EUC_JP_mbrtowc; 224 l->__wcrtomb = _EUC_JP_wcrtomb; 225 l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs; 226 l->__wcsnrtombs = _EUC_JP_wcsnrtombs; 227 l->__mbsinit = _EUC_mbsinit; 228 229 l->runes = rl; 230 l->__mb_cur_max = 3; 231 l->__mb_sb_limit = 128; 232 return (0); 233 } 234 235 static size_t 236 _EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, 237 size_t n, mbstate_t * __restrict ps) 238 { 239 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3)); 240 } 241 242 static size_t 243 _EUC_JP_mbsnrtowcs(wchar_t * __restrict dst, 244 const char ** __restrict src, 245 size_t nms, size_t len, mbstate_t * __restrict ps) 246 { 247 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc)); 248 } 249 250 static size_t 251 _EUC_JP_wcrtomb(char * __restrict s, wchar_t wc, 252 mbstate_t * __restrict ps) 253 { 254 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3)); 255 } 256 257 static size_t 258 _EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, 259 size_t nwc, size_t len, mbstate_t * __restrict ps) 260 { 261 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb)); 262 } 263 264 /* 265 * EUC-TW uses CS0, CS1, and CS2. 266 */ 267 int 268 _EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl) 269 { 270 l->__mbrtowc = _EUC_TW_mbrtowc; 271 l->__wcrtomb = _EUC_TW_wcrtomb; 272 l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs; 273 l->__wcsnrtombs = _EUC_TW_wcsnrtombs; 274 l->__mbsinit = _EUC_mbsinit; 275 276 l->runes = rl; 277 l->__mb_cur_max = 4; 278 l->__mb_sb_limit = 128; 279 return (0); 280 } 281 282 static size_t 283 _EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, 284 size_t n, mbstate_t * __restrict ps) 285 { 286 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0)); 287 } 288 289 static size_t 290 _EUC_TW_mbsnrtowcs(wchar_t * __restrict dst, 291 const char ** __restrict src, 292 size_t nms, size_t len, mbstate_t * __restrict ps) 293 { 294 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc)); 295 } 296 297 static size_t 298 _EUC_TW_wcrtomb(char * __restrict s, wchar_t wc, 299 mbstate_t * __restrict ps) 300 { 301 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); 302 } 303 304 static size_t 305 _EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, 306 size_t nwc, size_t len, mbstate_t * __restrict ps) 307 { 308 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb)); 309 } 310 311 /* 312 * Common EUC code. 313 */ 314 315 static size_t 316 _EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s, 317 size_t n, mbstate_t * __restrict ps, 318 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) 319 { 320 _EucState *es; 321 int i, want; 322 wchar_t wc = 0; 323 unsigned char ch, chs; 324 325 es = (_EucState *)ps; 326 327 if (es->want < 0 || es->want > MB_CUR_MAX) { 328 errno = EINVAL; 329 return ((size_t)-1); 330 } 331 332 if (s == NULL) { 333 s = ""; 334 n = 1; 335 pwc = NULL; 336 } 337 338 if (n == 0) 339 /* Incomplete multibyte sequence */ 340 return ((size_t)-2); 341 342 if (es->want == 0) { 343 /* Fast path for plain ASCII (CS0) */ 344 if (((ch = (unsigned char)*s) & 0x80) == 0) { 345 if (pwc != NULL) 346 *pwc = ch; 347 return (ch != '\0' ? 1 : 0); 348 } 349 350 if (ch >= 0xa1) { 351 /* CS1 */ 352 want = 2; 353 } else if (ch == cs2) { 354 want = cs2width; 355 } else if (ch == cs3) { 356 want = cs3width; 357 } else { 358 errno = EILSEQ; 359 return ((size_t)-1); 360 } 361 362 363 es->want = want; 364 es->ch = 0; 365 } else { 366 want = es->want; 367 wc = es->ch; 368 } 369 370 for (i = 0; i < MIN(want, n); i++) { 371 wc <<= 8; 372 chs = *s; 373 wc |= chs; 374 s++; 375 } 376 if (i < want) { 377 /* Incomplete multibyte sequence */ 378 es->want = want - i; 379 es->ch = wc; 380 errno = EILSEQ; 381 return ((size_t)-2); 382 } 383 if (pwc != NULL) 384 *pwc = wc; 385 es->want = 0; 386 return (wc == L'\0' ? 0 : want); 387 } 388 389 static size_t 390 _EUC_wcrtomb_impl(char * __restrict s, wchar_t wc, 391 mbstate_t * __restrict ps, 392 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) 393 { 394 _EucState *es; 395 int i, len; 396 wchar_t nm; 397 398 es = (_EucState *)ps; 399 400 if (es->want != 0) { 401 errno = EINVAL; 402 return ((size_t)-1); 403 } 404 405 if (s == NULL) 406 /* Reset to initial shift state (no-op) */ 407 return (1); 408 409 if ((wc & ~0x7f) == 0) { 410 /* Fast path for plain ASCII (CS0) */ 411 *s = (char)wc; 412 return (1); 413 } 414 415 /* Determine the "length" */ 416 if ((unsigned)wc > 0xffffff) { 417 len = 4; 418 } else if ((unsigned)wc > 0xffff) { 419 len = 3; 420 } else if ((unsigned)wc > 0xff) { 421 len = 2; 422 } else { 423 len = 1; 424 } 425 426 if (len > MB_CUR_MAX) { 427 errno = EILSEQ; 428 return ((size_t)-1); 429 } 430 431 /* This first check excludes CS1, which is implicitly valid. */ 432 if ((wc < 0xa100) || (wc > 0xffff)) { 433 /* Check for valid CS2 or CS3 */ 434 nm = (wc >> ((len - 1) * 8)); 435 if (nm == cs2) { 436 if (len != cs2width) { 437 errno = EILSEQ; 438 return ((size_t)-1); 439 } 440 } else if (nm == cs3) { 441 if (len != cs3width) { 442 errno = EILSEQ; 443 return ((size_t)-1); 444 } 445 } else { 446 errno = EILSEQ; 447 return ((size_t)-1); 448 } 449 } 450 451 /* Stash the bytes, least significant last */ 452 for (i = len - 1; i >= 0; i--) { 453 s[i] = (wc & 0xff); 454 wc >>= 8; 455 } 456 return (len); 457 } 458