1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $ */ 3 4 /*- 5 * Copyright (c)2003 Citrus Project, 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 #include <sys/types.h> 32 33 #include <assert.h> 34 #include <errno.h> 35 #include <limits.h> 36 #include <stdbool.h> 37 #include <stddef.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <wchar.h> 42 43 #include "citrus_namespace.h" 44 #include "citrus_types.h" 45 #include "citrus_bcs.h" 46 #include "citrus_module.h" 47 #include "citrus_stdenc.h" 48 #include "citrus_gbk2k.h" 49 50 51 /* ---------------------------------------------------------------------- 52 * private stuffs used by templates 53 */ 54 55 typedef struct _GBK2KState { 56 int chlen; 57 char ch[4]; 58 } _GBK2KState; 59 60 typedef struct { 61 int mb_cur_max; 62 } _GBK2KEncodingInfo; 63 64 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 65 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 66 67 #define _FUNCNAME(m) _citrus_GBK2K_##m 68 #define _ENCODING_INFO _GBK2KEncodingInfo 69 #define _ENCODING_STATE _GBK2KState 70 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 71 #define _ENCODING_IS_STATE_DEPENDENT 0 72 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 73 74 static __inline void 75 /*ARGSUSED*/ 76 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei __unused, 77 _GBK2KState * __restrict s) 78 { 79 80 memset(s, 0, sizeof(*s)); 81 } 82 83 #if 0 84 static __inline void 85 /*ARGSUSED*/ 86 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei __unused, 87 void * __restrict pspriv, const _GBK2KState * __restrict s) 88 { 89 90 memcpy(pspriv, (const void *)s, sizeof(*s)); 91 } 92 93 static __inline void 94 /*ARGSUSED*/ 95 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei __unused, 96 _GBK2KState * __restrict s, const void * __restrict pspriv) 97 { 98 99 memcpy((void *)s, pspriv, sizeof(*s)); 100 } 101 #endif 102 103 static __inline bool 104 _mb_singlebyte(int c) 105 { 106 107 return ((c & 0xff) <= 0x7f); 108 } 109 110 static __inline bool 111 _mb_leadbyte(int c) 112 { 113 114 c &= 0xff; 115 return (0x81 <= c && c <= 0xfe); 116 } 117 118 static __inline bool 119 _mb_trailbyte(int c) 120 { 121 122 c &= 0xff; 123 return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe)); 124 } 125 126 static __inline bool 127 _mb_surrogate(int c) 128 { 129 130 c &= 0xff; 131 return (0x30 <= c && c <= 0x39); 132 } 133 134 static __inline int 135 _mb_count(wchar_t v) 136 { 137 uint32_t c; 138 139 c = (uint32_t)v; /* XXX */ 140 if (!(c & 0xffffff00)) 141 return (1); 142 if (!(c & 0xffff0000)) 143 return (2); 144 return (4); 145 } 146 147 #define _PSENC (psenc->ch[psenc->chlen - 1]) 148 #define _PUSH_PSENC(c) (psenc->ch[psenc->chlen++] = (c)) 149 150 static int 151 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei, 152 wchar_t * __restrict pwc, const char ** __restrict s, size_t n, 153 _GBK2KState * __restrict psenc, size_t * __restrict nresult) 154 { 155 const char *s0, *s1; 156 wchar_t wc; 157 int chlenbak, len; 158 159 s0 = *s; 160 161 if (s0 == NULL) { 162 /* _citrus_GBK2K_init_state(ei, psenc); */ 163 psenc->chlen = 0; 164 *nresult = 0; 165 return (0); 166 } 167 168 chlenbak = psenc->chlen; 169 170 switch (psenc->chlen) { 171 case 3: 172 if (!_mb_leadbyte (_PSENC)) 173 goto invalid; 174 /* FALLTHROUGH */ 175 case 2: 176 if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC)) 177 goto invalid; 178 /* FALLTHROUGH */ 179 case 1: 180 if (!_mb_leadbyte (_PSENC)) 181 goto invalid; 182 /* FALLTHOROUGH */ 183 case 0: 184 break; 185 default: 186 goto invalid; 187 } 188 189 for (;;) { 190 if (n-- < 1) 191 goto restart; 192 193 _PUSH_PSENC(*s0++); 194 195 switch (psenc->chlen) { 196 case 1: 197 if (_mb_singlebyte(_PSENC)) 198 goto convert; 199 if (_mb_leadbyte (_PSENC)) 200 continue; 201 goto ilseq; 202 case 2: 203 if (_mb_trailbyte (_PSENC)) 204 goto convert; 205 if (ei->mb_cur_max == 4 && 206 _mb_surrogate (_PSENC)) 207 continue; 208 goto ilseq; 209 case 3: 210 if (_mb_leadbyte (_PSENC)) 211 continue; 212 goto ilseq; 213 case 4: 214 if (_mb_surrogate (_PSENC)) 215 goto convert; 216 goto ilseq; 217 } 218 } 219 220 convert: 221 len = psenc->chlen; 222 s1 = &psenc->ch[0]; 223 wc = 0; 224 while (len-- > 0) 225 wc = (wc << 8) | (*s1++ & 0xff); 226 227 if (pwc != NULL) 228 *pwc = wc; 229 *s = s0; 230 *nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak; 231 /* _citrus_GBK2K_init_state(ei, psenc); */ 232 psenc->chlen = 0; 233 234 return (0); 235 236 restart: 237 *s = s0; 238 *nresult = (size_t)-2; 239 240 return (0); 241 242 invalid: 243 return (EINVAL); 244 245 ilseq: 246 *nresult = (size_t)-1; 247 return (EILSEQ); 248 } 249 250 static int 251 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei, 252 char * __restrict s, size_t n, wchar_t wc, _GBK2KState * __restrict psenc, 253 size_t * __restrict nresult) 254 { 255 size_t len; 256 int ret; 257 258 if (psenc->chlen != 0) { 259 ret = EINVAL; 260 goto err; 261 } 262 263 len = _mb_count(wc); 264 if (n < len) { 265 ret = E2BIG; 266 goto err; 267 } 268 269 switch (len) { 270 case 1: 271 if (!_mb_singlebyte(_PUSH_PSENC(wc ))) { 272 ret = EILSEQ; 273 goto err; 274 } 275 break; 276 case 2: 277 if (!_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 278 !_mb_trailbyte (_PUSH_PSENC(wc))) { 279 ret = EILSEQ; 280 goto err; 281 } 282 break; 283 case 4: 284 if (ei->mb_cur_max != 4 || 285 !_mb_leadbyte (_PUSH_PSENC(wc >> 24)) || 286 !_mb_surrogate (_PUSH_PSENC(wc >> 16)) || 287 !_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 288 !_mb_surrogate (_PUSH_PSENC(wc))) { 289 ret = EILSEQ; 290 goto err; 291 } 292 break; 293 } 294 295 memcpy(s, psenc->ch, psenc->chlen); 296 *nresult = psenc->chlen; 297 /* _citrus_GBK2K_init_state(ei, psenc); */ 298 psenc->chlen = 0; 299 300 return (0); 301 302 err: 303 *nresult = (size_t)-1; 304 return (ret); 305 } 306 307 static __inline int 308 /*ARGSUSED*/ 309 _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei __unused, 310 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 311 { 312 uint8_t ch, cl; 313 314 if ((uint32_t)wc < 0x80) { 315 /* ISO646 */ 316 *csid = 0; 317 *idx = (_index_t)wc; 318 } else if ((uint32_t)wc >= 0x10000) { 319 /* GBKUCS : XXX */ 320 *csid = 3; 321 *idx = (_index_t)wc; 322 } else { 323 ch = (uint8_t)(wc >> 8); 324 cl = (uint8_t)wc; 325 if (ch >= 0xA1 && cl >= 0xA1) { 326 /* EUC G1 */ 327 *csid = 1; 328 *idx = (_index_t)wc & 0x7F7FU; 329 } else { 330 /* extended area (0x8140-) */ 331 *csid = 2; 332 *idx = (_index_t)wc; 333 } 334 } 335 336 return (0); 337 } 338 339 static __inline int 340 /*ARGSUSED*/ 341 _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei, 342 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 343 { 344 345 switch (csid) { 346 case 0: 347 /* ISO646 */ 348 *wc = (wchar_t)idx; 349 break; 350 case 1: 351 /* EUC G1 */ 352 *wc = (wchar_t)idx | 0x8080U; 353 break; 354 case 2: 355 /* extended area */ 356 *wc = (wchar_t)idx; 357 break; 358 case 3: 359 /* GBKUCS : XXX */ 360 if (ei->mb_cur_max != 4) 361 return (EINVAL); 362 *wc = (wchar_t)idx; 363 break; 364 default: 365 return (EILSEQ); 366 } 367 368 return (0); 369 } 370 371 static __inline int 372 /*ARGSUSED*/ 373 _citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei __unused, 374 _GBK2KState * __restrict psenc, int * __restrict rstate) 375 { 376 377 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 378 _STDENC_SDGEN_INCOMPLETE_CHAR; 379 return (0); 380 } 381 382 static int 383 /*ARGSUSED*/ 384 _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei, 385 const void * __restrict var, size_t lenvar) 386 { 387 const char *p; 388 389 p = var; 390 memset((void *)ei, 0, sizeof(*ei)); 391 ei->mb_cur_max = 4; 392 while (lenvar > 0) { 393 switch (_bcs_tolower(*p)) { 394 case '2': 395 MATCH("2byte", ei->mb_cur_max = 2); 396 break; 397 } 398 p++; 399 lenvar--; 400 } 401 402 return (0); 403 } 404 405 static void 406 /*ARGSUSED*/ 407 _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei __unused) 408 { 409 410 } 411 412 /* ---------------------------------------------------------------------- 413 * public interface for stdenc 414 */ 415 416 _CITRUS_STDENC_DECLS(GBK2K); 417 _CITRUS_STDENC_DEF_OPS(GBK2K); 418 419 #include "citrus_stdenc_template.h" 420