1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright 2013 Garrett D'Amore <garrett@damore.org> 5 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 6 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. 7 * Copyright (c) 1993 8 * The Regents of the University of California. All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * Paul Borman at Krystal Technologies. 12 * 13 * Copyright (c) 2011 The FreeBSD Foundation 14 * 15 * Portions of this software were developed by David Chisnall 16 * under sponsorship from the FreeBSD Foundation. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 3. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 */ 42 43 #include <sys/param.h> 44 #include <errno.h> 45 #include <limits.h> 46 #include <runetype.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <wchar.h> 50 #include "mblocal.h" 51 52 extern int __mb_sb_limit; 53 54 static size_t _EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict, 55 size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t); 56 static size_t _EUC_wcrtomb_impl(char * __restrict, wchar_t, 57 mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t); 58 59 static size_t _EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict, 60 size_t, mbstate_t * __restrict); 61 static size_t _EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict, 62 size_t, mbstate_t * __restrict); 63 static size_t _EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict, 64 size_t, mbstate_t * __restrict); 65 static size_t _EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict, 66 size_t, mbstate_t * __restrict); 67 68 static size_t _EUC_CN_wcrtomb(char * __restrict, wchar_t, 69 mbstate_t * __restrict); 70 static size_t _EUC_JP_wcrtomb(char * __restrict, wchar_t, 71 mbstate_t * __restrict); 72 static size_t _EUC_KR_wcrtomb(char * __restrict, wchar_t, 73 mbstate_t * __restrict); 74 static size_t _EUC_TW_wcrtomb(char * __restrict, wchar_t, 75 mbstate_t * __restrict); 76 77 static size_t _EUC_CN_mbsnrtowcs(wchar_t * __restrict, 78 const char ** __restrict, size_t, size_t, 79 mbstate_t * __restrict); 80 static size_t _EUC_JP_mbsnrtowcs(wchar_t * __restrict, 81 const char ** __restrict, size_t, size_t, 82 mbstate_t * __restrict); 83 static size_t _EUC_KR_mbsnrtowcs(wchar_t * __restrict, 84 const char ** __restrict, size_t, size_t, 85 mbstate_t * __restrict); 86 static size_t _EUC_TW_mbsnrtowcs(wchar_t * __restrict, 87 const char ** __restrict, size_t, size_t, 88 mbstate_t * __restrict); 89 90 static size_t _EUC_CN_wcsnrtombs(char * __restrict, 91 const wchar_t ** __restrict, size_t, size_t, 92 mbstate_t * __restrict); 93 static size_t _EUC_JP_wcsnrtombs(char * __restrict, 94 const wchar_t ** __restrict, size_t, size_t, 95 mbstate_t * __restrict); 96 static size_t _EUC_KR_wcsnrtombs(char * __restrict, 97 const wchar_t ** __restrict, size_t, size_t, 98 mbstate_t * __restrict); 99 static size_t _EUC_TW_wcsnrtombs(char * __restrict, 100 const wchar_t ** __restrict, size_t, size_t, 101 mbstate_t * __restrict); 102 103 static int _EUC_mbsinit(const mbstate_t *); 104 105 typedef struct { 106 wchar_t ch; 107 int set; 108 int want; 109 } _EucState; 110 111 static int 112 _EUC_mbsinit(const mbstate_t *ps) 113 { 114 115 return (ps == NULL || ((const _EucState *)ps)->want == 0); 116 } 117 118 /* 119 * EUC-CN uses CS0, CS1 and CS2 (4 bytes). 120 */ 121 int 122 _EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl) 123 { 124 l->__mbrtowc = _EUC_CN_mbrtowc; 125 l->__wcrtomb = _EUC_CN_wcrtomb; 126 l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs; 127 l->__wcsnrtombs = _EUC_CN_wcsnrtombs; 128 l->__mbsinit = _EUC_mbsinit; 129 130 l->runes = rl; 131 l->__mb_cur_max = 4; 132 l->__mb_sb_limit = 128; 133 return (0); 134 } 135 136 static size_t 137 _EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, 138 size_t n, mbstate_t * __restrict ps) 139 { 140 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0)); 141 } 142 143 static size_t 144 _EUC_CN_mbsnrtowcs(wchar_t * __restrict dst, 145 const char ** __restrict src, 146 size_t nms, size_t len, mbstate_t * __restrict ps) 147 { 148 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc)); 149 } 150 151 static size_t 152 _EUC_CN_wcrtomb(char * __restrict s, wchar_t wc, 153 mbstate_t * __restrict ps) 154 { 155 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); 156 } 157 158 static size_t 159 _EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, 160 size_t nwc, size_t len, mbstate_t * __restrict ps) 161 { 162 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb)); 163 } 164 165 /* 166 * EUC-KR uses only CS0 and CS1. 167 */ 168 int 169 _EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl) 170 { 171 l->__mbrtowc = _EUC_KR_mbrtowc; 172 l->__wcrtomb = _EUC_KR_wcrtomb; 173 l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs; 174 l->__wcsnrtombs = _EUC_KR_wcsnrtombs; 175 l->__mbsinit = _EUC_mbsinit; 176 177 l->runes = rl; 178 l->__mb_cur_max = 2; 179 l->__mb_sb_limit = 128; 180 return (0); 181 } 182 183 static size_t 184 _EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, 185 size_t n, mbstate_t * __restrict ps) 186 { 187 return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0)); 188 } 189 190 static size_t 191 _EUC_KR_mbsnrtowcs(wchar_t * __restrict dst, 192 const char ** __restrict src, 193 size_t nms, size_t len, mbstate_t * __restrict ps) 194 { 195 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc)); 196 } 197 198 static size_t 199 _EUC_KR_wcrtomb(char * __restrict s, wchar_t wc, 200 mbstate_t * __restrict ps) 201 { 202 return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0)); 203 } 204 205 static size_t 206 _EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, 207 size_t nwc, size_t len, mbstate_t * __restrict ps) 208 { 209 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb)); 210 } 211 212 /* 213 * EUC-JP uses CS0, CS1, CS2, and CS3. 214 */ 215 int 216 _EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl) 217 { 218 l->__mbrtowc = _EUC_JP_mbrtowc; 219 l->__wcrtomb = _EUC_JP_wcrtomb; 220 l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs; 221 l->__wcsnrtombs = _EUC_JP_wcsnrtombs; 222 l->__mbsinit = _EUC_mbsinit; 223 224 l->runes = rl; 225 l->__mb_cur_max = 3; 226 l->__mb_sb_limit = 128; 227 return (0); 228 } 229 230 static size_t 231 _EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, 232 size_t n, mbstate_t * __restrict ps) 233 { 234 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3)); 235 } 236 237 static size_t 238 _EUC_JP_mbsnrtowcs(wchar_t * __restrict dst, 239 const char ** __restrict src, 240 size_t nms, size_t len, mbstate_t * __restrict ps) 241 { 242 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc)); 243 } 244 245 static size_t 246 _EUC_JP_wcrtomb(char * __restrict s, wchar_t wc, 247 mbstate_t * __restrict ps) 248 { 249 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3)); 250 } 251 252 static size_t 253 _EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, 254 size_t nwc, size_t len, mbstate_t * __restrict ps) 255 { 256 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb)); 257 } 258 259 /* 260 * EUC-TW uses CS0, CS1, and CS2. 261 */ 262 int 263 _EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl) 264 { 265 l->__mbrtowc = _EUC_TW_mbrtowc; 266 l->__wcrtomb = _EUC_TW_wcrtomb; 267 l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs; 268 l->__wcsnrtombs = _EUC_TW_wcsnrtombs; 269 l->__mbsinit = _EUC_mbsinit; 270 271 l->runes = rl; 272 l->__mb_cur_max = 4; 273 l->__mb_sb_limit = 128; 274 return (0); 275 } 276 277 static size_t 278 _EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, 279 size_t n, mbstate_t * __restrict ps) 280 { 281 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0)); 282 } 283 284 static size_t 285 _EUC_TW_mbsnrtowcs(wchar_t * __restrict dst, 286 const char ** __restrict src, 287 size_t nms, size_t len, mbstate_t * __restrict ps) 288 { 289 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc)); 290 } 291 292 static size_t 293 _EUC_TW_wcrtomb(char * __restrict s, wchar_t wc, 294 mbstate_t * __restrict ps) 295 { 296 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); 297 } 298 299 static size_t 300 _EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, 301 size_t nwc, size_t len, mbstate_t * __restrict ps) 302 { 303 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb)); 304 } 305 306 /* 307 * Common EUC code. 308 */ 309 310 static size_t 311 _EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s, 312 size_t n, mbstate_t * __restrict ps, 313 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) 314 { 315 _EucState *es; 316 int i, want; 317 wchar_t wc = 0; 318 unsigned char ch, chs; 319 320 es = (_EucState *)ps; 321 322 if (es->want < 0 || es->want > MB_CUR_MAX) { 323 errno = EINVAL; 324 return ((size_t)-1); 325 } 326 327 if (s == NULL) { 328 s = ""; 329 n = 1; 330 pwc = NULL; 331 } 332 333 if (n == 0) 334 /* Incomplete multibyte sequence */ 335 return ((size_t)-2); 336 337 if (es->want == 0) { 338 /* Fast path for plain ASCII (CS0) */ 339 if (((ch = (unsigned char)*s) & 0x80) == 0) { 340 if (pwc != NULL) 341 *pwc = ch; 342 return (ch != '\0' ? 1 : 0); 343 } 344 345 if (ch >= 0xa1) { 346 /* CS1 */ 347 want = 2; 348 } else if (ch == cs2) { 349 want = cs2width; 350 } else if (ch == cs3) { 351 want = cs3width; 352 } else { 353 errno = EILSEQ; 354 return ((size_t)-1); 355 } 356 357 358 es->want = want; 359 es->ch = 0; 360 } else { 361 want = es->want; 362 wc = es->ch; 363 } 364 365 for (i = 0; i < MIN(want, n); i++) { 366 wc <<= 8; 367 chs = *s; 368 wc |= chs; 369 s++; 370 } 371 if (i < want) { 372 /* Incomplete multibyte sequence */ 373 es->want = want - i; 374 es->ch = wc; 375 errno = EILSEQ; 376 return ((size_t)-2); 377 } 378 if (pwc != NULL) 379 *pwc = wc; 380 es->want = 0; 381 return (wc == L'\0' ? 0 : want); 382 } 383 384 static size_t 385 _EUC_wcrtomb_impl(char * __restrict s, wchar_t wc, 386 mbstate_t * __restrict ps, 387 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) 388 { 389 _EucState *es; 390 int i, len; 391 wchar_t nm; 392 393 es = (_EucState *)ps; 394 395 if (es->want != 0) { 396 errno = EINVAL; 397 return ((size_t)-1); 398 } 399 400 if (s == NULL) 401 /* Reset to initial shift state (no-op) */ 402 return (1); 403 404 if ((wc & ~0x7f) == 0) { 405 /* Fast path for plain ASCII (CS0) */ 406 *s = (char)wc; 407 return (1); 408 } 409 410 /* Determine the "length" */ 411 if ((unsigned)wc > 0xffffff) { 412 len = 4; 413 } else if ((unsigned)wc > 0xffff) { 414 len = 3; 415 } else if ((unsigned)wc > 0xff) { 416 len = 2; 417 } else { 418 len = 1; 419 } 420 421 if (len > MB_CUR_MAX) { 422 errno = EILSEQ; 423 return ((size_t)-1); 424 } 425 426 /* This first check excludes CS1, which is implicitly valid. */ 427 if ((wc < 0xa100) || (wc > 0xffff)) { 428 /* Check for valid CS2 or CS3 */ 429 nm = (wc >> ((len - 1) * 8)); 430 if (nm == cs2) { 431 if (len != cs2width) { 432 errno = EILSEQ; 433 return ((size_t)-1); 434 } 435 } else if (nm == cs3) { 436 if (len != cs3width) { 437 errno = EILSEQ; 438 return ((size_t)-1); 439 } 440 } else { 441 errno = EILSEQ; 442 return ((size_t)-1); 443 } 444 } 445 446 /* Stash the bytes, least significant last */ 447 for (i = len - 1; i >= 0; i--) { 448 s[i] = (wc & 0xff); 449 wc >>= 8; 450 } 451 return (len); 452 } 453