1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_big5.c,v 1.13 2011/05/23 14:53:46 joerg Exp $ */ 3 4 /*- 5 * Copyright (c)2002, 2006 Citrus Project, 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /*- 31 * SPDX-License-Identifier: BSD-3-Clause 32 * 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Paul Borman at Krystal Technologies. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 #include <sys/cdefs.h> 65 #include <sys/queue.h> 66 #include <sys/types.h> 67 68 #include <assert.h> 69 #include <errno.h> 70 #include <limits.h> 71 #include <stddef.h> 72 #include <stdint.h> 73 #include <stdio.h> 74 #include <stdlib.h> 75 #include <string.h> 76 #include <wchar.h> 77 78 #include "citrus_namespace.h" 79 #include "citrus_prop.h" 80 #include "citrus_types.h" 81 #include "citrus_bcs.h" 82 #include "citrus_module.h" 83 #include "citrus_stdenc.h" 84 #include "citrus_big5.h" 85 86 /* ---------------------------------------------------------------------- 87 * private stuffs used by templates 88 */ 89 90 typedef struct { 91 int chlen; 92 char ch[2]; 93 } _BIG5State; 94 95 typedef struct _BIG5Exclude { 96 TAILQ_ENTRY(_BIG5Exclude) entry; 97 wint_t start; 98 wint_t end; 99 } _BIG5Exclude; 100 101 typedef TAILQ_HEAD(_BIG5ExcludeList, _BIG5Exclude) _BIG5ExcludeList; 102 103 typedef struct { 104 _BIG5ExcludeList excludes; 105 int cell[0x100]; 106 } _BIG5EncodingInfo; 107 108 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 109 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 110 111 #define _FUNCNAME(m) _citrus_BIG5_##m 112 #define _ENCODING_INFO _BIG5EncodingInfo 113 #define _ENCODING_STATE _BIG5State 114 #define _ENCODING_MB_CUR_MAX(_ei_) 2 115 #define _ENCODING_IS_STATE_DEPENDENT 0 116 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 117 118 119 static __inline void 120 /*ARGSUSED*/ 121 _citrus_BIG5_init_state(_BIG5EncodingInfo * __restrict ei __unused, 122 _BIG5State * __restrict s) 123 { 124 125 memset(s, 0, sizeof(*s)); 126 } 127 128 #if 0 129 static __inline void 130 /*ARGSUSED*/ 131 _citrus_BIG5_pack_state(_BIG5EncodingInfo * __restrict ei __unused, 132 void * __restrict pspriv, 133 const _BIG5State * __restrict s) 134 { 135 136 memcpy(pspriv, (const void *)s, sizeof(*s)); 137 } 138 139 static __inline void 140 /*ARGSUSED*/ 141 _citrus_BIG5_unpack_state(_BIG5EncodingInfo * __restrict ei __unused, 142 _BIG5State * __restrict s, 143 const void * __restrict pspriv) 144 { 145 146 memcpy((void *)s, pspriv, sizeof(*s)); 147 } 148 #endif 149 150 static __inline int 151 _citrus_BIG5_check(_BIG5EncodingInfo *ei, unsigned int c) 152 { 153 154 return ((ei->cell[c & 0xFF] & 0x1) ? 2 : 1); 155 } 156 157 static __inline int 158 _citrus_BIG5_check2(_BIG5EncodingInfo *ei, unsigned int c) 159 { 160 161 return ((ei->cell[c & 0xFF] & 0x2) ? 1 : 0); 162 } 163 164 static __inline int 165 _citrus_BIG5_check_excludes(_BIG5EncodingInfo *ei, wint_t c) 166 { 167 _BIG5Exclude *exclude; 168 169 TAILQ_FOREACH(exclude, &ei->excludes, entry) { 170 if (c >= exclude->start && c <= exclude->end) 171 return (EILSEQ); 172 } 173 return (0); 174 } 175 176 static int 177 _citrus_BIG5_fill_rowcol(void * __restrict ctx, const char * __restrict s, 178 uint64_t start, uint64_t end) 179 { 180 _BIG5EncodingInfo *ei; 181 uint64_t n; 182 int i; 183 184 if (start > 0xFF || end > 0xFF) 185 return (EINVAL); 186 ei = (_BIG5EncodingInfo *)ctx; 187 i = strcmp("row", s) ? 1 : 0; 188 i = 1 << i; 189 for (n = start; n <= end; ++n) 190 ei->cell[n & 0xFF] |= i; 191 return (0); 192 } 193 194 static int 195 /*ARGSUSED*/ 196 _citrus_BIG5_fill_excludes(void * __restrict ctx, 197 const char * __restrict s __unused, uint64_t start, uint64_t end) 198 { 199 _BIG5EncodingInfo *ei; 200 _BIG5Exclude *exclude; 201 202 if (start > 0xFFFF || end > 0xFFFF) 203 return (EINVAL); 204 ei = (_BIG5EncodingInfo *)ctx; 205 exclude = TAILQ_LAST(&ei->excludes, _BIG5ExcludeList); 206 if (exclude != NULL && (wint_t)start <= exclude->end) 207 return (EINVAL); 208 exclude = (void *)malloc(sizeof(*exclude)); 209 if (exclude == NULL) 210 return (ENOMEM); 211 exclude->start = (wint_t)start; 212 exclude->end = (wint_t)end; 213 TAILQ_INSERT_TAIL(&ei->excludes, exclude, entry); 214 215 return (0); 216 } 217 218 static const _citrus_prop_hint_t root_hints[] = { 219 _CITRUS_PROP_HINT_NUM("row", &_citrus_BIG5_fill_rowcol), 220 _CITRUS_PROP_HINT_NUM("col", &_citrus_BIG5_fill_rowcol), 221 _CITRUS_PROP_HINT_NUM("excludes", &_citrus_BIG5_fill_excludes), 222 _CITRUS_PROP_HINT_END 223 }; 224 225 static void 226 /*ARGSUSED*/ 227 _citrus_BIG5_encoding_module_uninit(_BIG5EncodingInfo *ei) 228 { 229 _BIG5Exclude *exclude; 230 231 while ((exclude = TAILQ_FIRST(&ei->excludes)) != NULL) { 232 TAILQ_REMOVE(&ei->excludes, exclude, entry); 233 free(exclude); 234 } 235 } 236 237 static int 238 /*ARGSUSED*/ 239 _citrus_BIG5_encoding_module_init(_BIG5EncodingInfo * __restrict ei, 240 const void * __restrict var, size_t lenvar) 241 { 242 const char *s; 243 int err; 244 245 memset((void *)ei, 0, sizeof(*ei)); 246 TAILQ_INIT(&ei->excludes); 247 248 if (lenvar > 0 && var != NULL) { 249 s = _bcs_skip_ws_len((const char *)var, &lenvar); 250 if (lenvar > 0 && *s != '\0') { 251 err = _citrus_prop_parse_variable( 252 root_hints, (void *)ei, s, lenvar); 253 if (err == 0) 254 return (0); 255 256 _citrus_BIG5_encoding_module_uninit(ei); 257 memset((void *)ei, 0, sizeof(*ei)); 258 TAILQ_INIT(&ei->excludes); 259 } 260 } 261 262 /* fallback Big5-1984, for backward compatibility. */ 263 _citrus_BIG5_fill_rowcol(ei, "row", 0xA1, 0xFE); 264 _citrus_BIG5_fill_rowcol(ei, "col", 0x40, 0x7E); 265 _citrus_BIG5_fill_rowcol(ei, "col", 0xA1, 0xFE); 266 267 return (0); 268 } 269 270 static int 271 /*ARGSUSED*/ 272 _citrus_BIG5_mbrtowc_priv(_BIG5EncodingInfo * __restrict ei, 273 wchar_t * __restrict pwc, 274 char ** __restrict s, size_t n, 275 _BIG5State * __restrict psenc, 276 size_t * __restrict nresult) 277 { 278 wchar_t wchar; 279 char *s0; 280 int c, chlenbak; 281 282 s0 = *s; 283 284 if (s0 == NULL) { 285 _citrus_BIG5_init_state(ei, psenc); 286 *nresult = 0; 287 return (0); 288 } 289 290 chlenbak = psenc->chlen; 291 292 /* make sure we have the first byte in the buffer */ 293 switch (psenc->chlen) { 294 case 0: 295 if (n < 1) 296 goto restart; 297 psenc->ch[0] = *s0++; 298 psenc->chlen = 1; 299 n--; 300 break; 301 case 1: 302 break; 303 default: 304 /* illegal state */ 305 goto ilseq; 306 } 307 308 c = _citrus_BIG5_check(ei, psenc->ch[0] & 0xff); 309 if (c == 0) 310 goto ilseq; 311 while (psenc->chlen < c) { 312 if (n < 1) { 313 goto restart; 314 } 315 psenc->ch[psenc->chlen] = *s0++; 316 psenc->chlen++; 317 n--; 318 } 319 320 switch (c) { 321 case 1: 322 wchar = psenc->ch[0] & 0xff; 323 break; 324 case 2: 325 if (!_citrus_BIG5_check2(ei, psenc->ch[1] & 0xff)) 326 goto ilseq; 327 wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff); 328 break; 329 default: 330 /* illegal state */ 331 goto ilseq; 332 } 333 334 if (_citrus_BIG5_check_excludes(ei, (wint_t)wchar) != 0) 335 goto ilseq; 336 337 *s = s0; 338 psenc->chlen = 0; 339 if (pwc) 340 *pwc = wchar; 341 *nresult = wchar ? c - chlenbak : 0; 342 343 return (0); 344 345 ilseq: 346 psenc->chlen = 0; 347 *nresult = (size_t)-1; 348 return (EILSEQ); 349 350 restart: 351 *s = s0; 352 *nresult = (size_t)-2; 353 return (0); 354 } 355 356 static int 357 /*ARGSUSED*/ 358 _citrus_BIG5_wcrtomb_priv(_BIG5EncodingInfo * __restrict ei, 359 char * __restrict s, 360 size_t n, wchar_t wc, _BIG5State * __restrict psenc __unused, 361 size_t * __restrict nresult) 362 { 363 size_t l; 364 int ret; 365 366 /* check invalid sequence */ 367 if (wc & ~0xffff || 368 _citrus_BIG5_check_excludes(ei, (wint_t)wc) != 0) { 369 ret = EILSEQ; 370 goto err; 371 } 372 373 if (wc & 0x8000) { 374 if (_citrus_BIG5_check(ei, (wc >> 8) & 0xff) != 2 || 375 !_citrus_BIG5_check2(ei, wc & 0xff)) { 376 ret = EILSEQ; 377 goto err; 378 } 379 l = 2; 380 } else { 381 if (wc & ~0xff || !_citrus_BIG5_check(ei, wc & 0xff)) { 382 ret = EILSEQ; 383 goto err; 384 } 385 l = 1; 386 } 387 388 if (n < l) { 389 /* bound check failure */ 390 ret = E2BIG; 391 goto err; 392 } 393 394 if (l == 2) { 395 s[0] = (wc >> 8) & 0xff; 396 s[1] = wc & 0xff; 397 } else 398 s[0] = wc & 0xff; 399 400 *nresult = l; 401 402 return (0); 403 404 err: 405 *nresult = (size_t)-1; 406 return (ret); 407 } 408 409 static __inline int 410 /*ARGSUSED*/ 411 _citrus_BIG5_stdenc_wctocs(_BIG5EncodingInfo * __restrict ei __unused, 412 _csid_t * __restrict csid, 413 _index_t * __restrict idx, wchar_t wc) 414 { 415 416 *csid = (wc < 0x100) ? 0 : 1; 417 *idx = (_index_t)wc; 418 419 return (0); 420 } 421 422 static __inline int 423 /*ARGSUSED*/ 424 _citrus_BIG5_stdenc_cstowc(_BIG5EncodingInfo * __restrict ei __unused, 425 wchar_t * __restrict wc, 426 _csid_t csid, _index_t idx) 427 { 428 429 switch (csid) { 430 case 0: 431 case 1: 432 *wc = (wchar_t)idx; 433 break; 434 default: 435 return (EILSEQ); 436 } 437 438 return (0); 439 } 440 441 static __inline int 442 /*ARGSUSED*/ 443 _citrus_BIG5_stdenc_get_state_desc_generic(_BIG5EncodingInfo * __restrict ei __unused, 444 _BIG5State * __restrict psenc, 445 int * __restrict rstate) 446 { 447 448 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 449 _STDENC_SDGEN_INCOMPLETE_CHAR; 450 return (0); 451 } 452 453 /* ---------------------------------------------------------------------- 454 * public interface for stdenc 455 */ 456 457 _CITRUS_STDENC_DECLS(BIG5); 458 _CITRUS_STDENC_DEF_OPS(BIG5); 459 460 #include "citrus_stdenc_template.h" 461