1 /* $NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c)2003 Citrus Project, 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include <sys/types.h> 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <limits.h> 37 #include <stdbool.h> 38 #include <stddef.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <wchar.h> 43 44 #include "citrus_namespace.h" 45 #include "citrus_types.h" 46 #include "citrus_bcs.h" 47 #include "citrus_module.h" 48 #include "citrus_stdenc.h" 49 #include "citrus_gbk2k.h" 50 51 52 /* ---------------------------------------------------------------------- 53 * private stuffs used by templates 54 */ 55 56 typedef struct _GBK2KState { 57 int chlen; 58 char ch[4]; 59 } _GBK2KState; 60 61 typedef struct { 62 int mb_cur_max; 63 } _GBK2KEncodingInfo; 64 65 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 66 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 67 68 #define _FUNCNAME(m) _citrus_GBK2K_##m 69 #define _ENCODING_INFO _GBK2KEncodingInfo 70 #define _ENCODING_STATE _GBK2KState 71 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 72 #define _ENCODING_IS_STATE_DEPENDENT 0 73 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 74 75 static __inline void 76 /*ARGSUSED*/ 77 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei __unused, 78 _GBK2KState * __restrict s) 79 { 80 81 memset(s, 0, sizeof(*s)); 82 } 83 84 #if 0 85 static __inline void 86 /*ARGSUSED*/ 87 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei __unused, 88 void * __restrict pspriv, const _GBK2KState * __restrict s) 89 { 90 91 memcpy(pspriv, (const void *)s, sizeof(*s)); 92 } 93 94 static __inline void 95 /*ARGSUSED*/ 96 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei __unused, 97 _GBK2KState * __restrict s, const void * __restrict pspriv) 98 { 99 100 memcpy((void *)s, pspriv, sizeof(*s)); 101 } 102 #endif 103 104 static __inline bool 105 _mb_singlebyte(int c) 106 { 107 108 return ((c & 0xff) <= 0x7f); 109 } 110 111 static __inline bool 112 _mb_leadbyte(int c) 113 { 114 115 c &= 0xff; 116 return (0x81 <= c && c <= 0xfe); 117 } 118 119 static __inline bool 120 _mb_trailbyte(int c) 121 { 122 123 c &= 0xff; 124 return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe)); 125 } 126 127 static __inline bool 128 _mb_surrogate(int c) 129 { 130 131 c &= 0xff; 132 return (0x30 <= c && c <= 0x39); 133 } 134 135 static __inline int 136 _mb_count(wchar_t v) 137 { 138 uint32_t c; 139 140 c = (uint32_t)v; /* XXX */ 141 if (!(c & 0xffffff00)) 142 return (1); 143 if (!(c & 0xffff0000)) 144 return (2); 145 return (4); 146 } 147 148 #define _PSENC (psenc->ch[psenc->chlen - 1]) 149 #define _PUSH_PSENC(c) (psenc->ch[psenc->chlen++] = (c)) 150 151 static int 152 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei, 153 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 154 _GBK2KState * __restrict psenc, size_t * __restrict nresult) 155 { 156 char *s0, *s1; 157 wchar_t wc; 158 int chlenbak, len; 159 160 s0 = *s; 161 162 if (s0 == NULL) { 163 /* _citrus_GBK2K_init_state(ei, psenc); */ 164 psenc->chlen = 0; 165 *nresult = 0; 166 return (0); 167 } 168 169 chlenbak = psenc->chlen; 170 171 switch (psenc->chlen) { 172 case 3: 173 if (!_mb_leadbyte (_PSENC)) 174 goto invalid; 175 /* FALLTHROUGH */ 176 case 2: 177 if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC)) 178 goto invalid; 179 /* FALLTHROUGH */ 180 case 1: 181 if (!_mb_leadbyte (_PSENC)) 182 goto invalid; 183 /* FALLTHOROUGH */ 184 case 0: 185 break; 186 default: 187 goto invalid; 188 } 189 190 for (;;) { 191 if (n-- < 1) 192 goto restart; 193 194 _PUSH_PSENC(*s0++); 195 196 switch (psenc->chlen) { 197 case 1: 198 if (_mb_singlebyte(_PSENC)) 199 goto convert; 200 if (_mb_leadbyte (_PSENC)) 201 continue; 202 goto ilseq; 203 case 2: 204 if (_mb_trailbyte (_PSENC)) 205 goto convert; 206 if (ei->mb_cur_max == 4 && 207 _mb_surrogate (_PSENC)) 208 continue; 209 goto ilseq; 210 case 3: 211 if (_mb_leadbyte (_PSENC)) 212 continue; 213 goto ilseq; 214 case 4: 215 if (_mb_surrogate (_PSENC)) 216 goto convert; 217 goto ilseq; 218 } 219 } 220 221 convert: 222 len = psenc->chlen; 223 s1 = &psenc->ch[0]; 224 wc = 0; 225 while (len-- > 0) 226 wc = (wc << 8) | (*s1++ & 0xff); 227 228 if (pwc != NULL) 229 *pwc = wc; 230 *s = s0; 231 *nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak; 232 /* _citrus_GBK2K_init_state(ei, psenc); */ 233 psenc->chlen = 0; 234 235 return (0); 236 237 restart: 238 *s = s0; 239 *nresult = (size_t)-2; 240 241 return (0); 242 243 invalid: 244 return (EINVAL); 245 246 ilseq: 247 *nresult = (size_t)-1; 248 return (EILSEQ); 249 } 250 251 static int 252 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei, 253 char * __restrict s, size_t n, wchar_t wc, _GBK2KState * __restrict psenc, 254 size_t * __restrict nresult) 255 { 256 size_t len; 257 int ret; 258 259 if (psenc->chlen != 0) { 260 ret = EINVAL; 261 goto err; 262 } 263 264 len = _mb_count(wc); 265 if (n < len) { 266 ret = E2BIG; 267 goto err; 268 } 269 270 switch (len) { 271 case 1: 272 if (!_mb_singlebyte(_PUSH_PSENC(wc ))) { 273 ret = EILSEQ; 274 goto err; 275 } 276 break; 277 case 2: 278 if (!_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 279 !_mb_trailbyte (_PUSH_PSENC(wc))) { 280 ret = EILSEQ; 281 goto err; 282 } 283 break; 284 case 4: 285 if (ei->mb_cur_max != 4 || 286 !_mb_leadbyte (_PUSH_PSENC(wc >> 24)) || 287 !_mb_surrogate (_PUSH_PSENC(wc >> 16)) || 288 !_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 289 !_mb_surrogate (_PUSH_PSENC(wc))) { 290 ret = EILSEQ; 291 goto err; 292 } 293 break; 294 } 295 296 memcpy(s, psenc->ch, psenc->chlen); 297 *nresult = psenc->chlen; 298 /* _citrus_GBK2K_init_state(ei, psenc); */ 299 psenc->chlen = 0; 300 301 return (0); 302 303 err: 304 *nresult = (size_t)-1; 305 return (ret); 306 } 307 308 static __inline int 309 /*ARGSUSED*/ 310 _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei __unused, 311 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 312 { 313 uint8_t ch, cl; 314 315 if ((uint32_t)wc < 0x80) { 316 /* ISO646 */ 317 *csid = 0; 318 *idx = (_index_t)wc; 319 } else if ((uint32_t)wc >= 0x10000) { 320 /* GBKUCS : XXX */ 321 *csid = 3; 322 *idx = (_index_t)wc; 323 } else { 324 ch = (uint8_t)(wc >> 8); 325 cl = (uint8_t)wc; 326 if (ch >= 0xA1 && cl >= 0xA1) { 327 /* EUC G1 */ 328 *csid = 1; 329 *idx = (_index_t)wc & 0x7F7FU; 330 } else { 331 /* extended area (0x8140-) */ 332 *csid = 2; 333 *idx = (_index_t)wc; 334 } 335 } 336 337 return (0); 338 } 339 340 static __inline int 341 /*ARGSUSED*/ 342 _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei, 343 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 344 { 345 346 switch (csid) { 347 case 0: 348 /* ISO646 */ 349 *wc = (wchar_t)idx; 350 break; 351 case 1: 352 /* EUC G1 */ 353 *wc = (wchar_t)idx | 0x8080U; 354 break; 355 case 2: 356 /* extended area */ 357 *wc = (wchar_t)idx; 358 break; 359 case 3: 360 /* GBKUCS : XXX */ 361 if (ei->mb_cur_max != 4) 362 return (EINVAL); 363 *wc = (wchar_t)idx; 364 break; 365 default: 366 return (EILSEQ); 367 } 368 369 return (0); 370 } 371 372 static __inline int 373 /*ARGSUSED*/ 374 _citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei __unused, 375 _GBK2KState * __restrict psenc, int * __restrict rstate) 376 { 377 378 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 379 _STDENC_SDGEN_INCOMPLETE_CHAR; 380 return (0); 381 } 382 383 static int 384 /*ARGSUSED*/ 385 _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei, 386 const void * __restrict var, size_t lenvar) 387 { 388 const char *p; 389 390 p = var; 391 memset((void *)ei, 0, sizeof(*ei)); 392 ei->mb_cur_max = 4; 393 while (lenvar > 0) { 394 switch (_bcs_tolower(*p)) { 395 case '2': 396 MATCH("2byte", ei->mb_cur_max = 2); 397 break; 398 } 399 p++; 400 lenvar--; 401 } 402 403 return (0); 404 } 405 406 static void 407 /*ARGSUSED*/ 408 _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei __unused) 409 { 410 411 } 412 413 /* ---------------------------------------------------------------------- 414 * public interface for stdenc 415 */ 416 417 _CITRUS_STDENC_DECLS(GBK2K); 418 _CITRUS_STDENC_DEF_OPS(GBK2K); 419 420 #include "citrus_stdenc_template.h" 421