1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $ */ 3 4 /*- 5 * SPDX-License-Identifier: BSD-2-Clause 6 * 7 * Copyright (c)2003 Citrus Project, 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #include <sys/types.h> 34 35 #include <assert.h> 36 #include <errno.h> 37 #include <limits.h> 38 #include <stdbool.h> 39 #include <stddef.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <wchar.h> 44 45 #include "citrus_namespace.h" 46 #include "citrus_types.h" 47 #include "citrus_bcs.h" 48 #include "citrus_module.h" 49 #include "citrus_stdenc.h" 50 #include "citrus_gbk2k.h" 51 52 53 /* ---------------------------------------------------------------------- 54 * private stuffs used by templates 55 */ 56 57 typedef struct _GBK2KState { 58 int chlen; 59 char ch[4]; 60 } _GBK2KState; 61 62 typedef struct { 63 int mb_cur_max; 64 } _GBK2KEncodingInfo; 65 66 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 67 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 68 69 #define _FUNCNAME(m) _citrus_GBK2K_##m 70 #define _ENCODING_INFO _GBK2KEncodingInfo 71 #define _ENCODING_STATE _GBK2KState 72 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 73 #define _ENCODING_IS_STATE_DEPENDENT 0 74 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 75 76 static __inline void 77 /*ARGSUSED*/ 78 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei __unused, 79 _GBK2KState * __restrict s) 80 { 81 82 memset(s, 0, sizeof(*s)); 83 } 84 85 #if 0 86 static __inline void 87 /*ARGSUSED*/ 88 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei __unused, 89 void * __restrict pspriv, const _GBK2KState * __restrict s) 90 { 91 92 memcpy(pspriv, (const void *)s, sizeof(*s)); 93 } 94 95 static __inline void 96 /*ARGSUSED*/ 97 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei __unused, 98 _GBK2KState * __restrict s, const void * __restrict pspriv) 99 { 100 101 memcpy((void *)s, pspriv, sizeof(*s)); 102 } 103 #endif 104 105 static __inline bool 106 _mb_singlebyte(int c) 107 { 108 109 return ((c & 0xff) <= 0x7f); 110 } 111 112 static __inline bool 113 _mb_leadbyte(int c) 114 { 115 116 c &= 0xff; 117 return (0x81 <= c && c <= 0xfe); 118 } 119 120 static __inline bool 121 _mb_trailbyte(int c) 122 { 123 124 c &= 0xff; 125 return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe)); 126 } 127 128 static __inline bool 129 _mb_surrogate(int c) 130 { 131 132 c &= 0xff; 133 return (0x30 <= c && c <= 0x39); 134 } 135 136 static __inline int 137 _mb_count(wchar_t v) 138 { 139 uint32_t c; 140 141 c = (uint32_t)v; /* XXX */ 142 if (!(c & 0xffffff00)) 143 return (1); 144 if (!(c & 0xffff0000)) 145 return (2); 146 return (4); 147 } 148 149 #define _PSENC (psenc->ch[psenc->chlen - 1]) 150 #define _PUSH_PSENC(c) (psenc->ch[psenc->chlen++] = (c)) 151 152 static int 153 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei, 154 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 155 _GBK2KState * __restrict psenc, size_t * __restrict nresult) 156 { 157 char *s0, *s1; 158 wchar_t wc; 159 int chlenbak, len; 160 161 s0 = *s; 162 163 if (s0 == NULL) { 164 /* _citrus_GBK2K_init_state(ei, psenc); */ 165 psenc->chlen = 0; 166 *nresult = 0; 167 return (0); 168 } 169 170 chlenbak = psenc->chlen; 171 172 switch (psenc->chlen) { 173 case 3: 174 if (!_mb_leadbyte (_PSENC)) 175 goto invalid; 176 /* FALLTHROUGH */ 177 case 2: 178 if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC)) 179 goto invalid; 180 /* FALLTHROUGH */ 181 case 1: 182 if (!_mb_leadbyte (_PSENC)) 183 goto invalid; 184 /* FALLTHOROUGH */ 185 case 0: 186 break; 187 default: 188 goto invalid; 189 } 190 191 for (;;) { 192 if (n-- < 1) 193 goto restart; 194 195 _PUSH_PSENC(*s0++); 196 197 switch (psenc->chlen) { 198 case 1: 199 if (_mb_singlebyte(_PSENC)) 200 goto convert; 201 if (_mb_leadbyte (_PSENC)) 202 continue; 203 goto ilseq; 204 case 2: 205 if (_mb_trailbyte (_PSENC)) 206 goto convert; 207 if (ei->mb_cur_max == 4 && 208 _mb_surrogate (_PSENC)) 209 continue; 210 goto ilseq; 211 case 3: 212 if (_mb_leadbyte (_PSENC)) 213 continue; 214 goto ilseq; 215 case 4: 216 if (_mb_surrogate (_PSENC)) 217 goto convert; 218 goto ilseq; 219 } 220 } 221 222 convert: 223 len = psenc->chlen; 224 s1 = &psenc->ch[0]; 225 wc = 0; 226 while (len-- > 0) 227 wc = (wc << 8) | (*s1++ & 0xff); 228 229 if (pwc != NULL) 230 *pwc = wc; 231 *s = s0; 232 *nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak; 233 /* _citrus_GBK2K_init_state(ei, psenc); */ 234 psenc->chlen = 0; 235 236 return (0); 237 238 restart: 239 *s = s0; 240 *nresult = (size_t)-2; 241 242 return (0); 243 244 invalid: 245 return (EINVAL); 246 247 ilseq: 248 *nresult = (size_t)-1; 249 return (EILSEQ); 250 } 251 252 static int 253 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei, 254 char * __restrict s, size_t n, wchar_t wc, _GBK2KState * __restrict psenc, 255 size_t * __restrict nresult) 256 { 257 size_t len; 258 int ret; 259 260 if (psenc->chlen != 0) { 261 ret = EINVAL; 262 goto err; 263 } 264 265 len = _mb_count(wc); 266 if (n < len) { 267 ret = E2BIG; 268 goto err; 269 } 270 271 switch (len) { 272 case 1: 273 if (!_mb_singlebyte(_PUSH_PSENC(wc ))) { 274 ret = EILSEQ; 275 goto err; 276 } 277 break; 278 case 2: 279 if (!_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 280 !_mb_trailbyte (_PUSH_PSENC(wc))) { 281 ret = EILSEQ; 282 goto err; 283 } 284 break; 285 case 4: 286 if (ei->mb_cur_max != 4 || 287 !_mb_leadbyte (_PUSH_PSENC(wc >> 24)) || 288 !_mb_surrogate (_PUSH_PSENC(wc >> 16)) || 289 !_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 290 !_mb_surrogate (_PUSH_PSENC(wc))) { 291 ret = EILSEQ; 292 goto err; 293 } 294 break; 295 } 296 297 memcpy(s, psenc->ch, psenc->chlen); 298 *nresult = psenc->chlen; 299 /* _citrus_GBK2K_init_state(ei, psenc); */ 300 psenc->chlen = 0; 301 302 return (0); 303 304 err: 305 *nresult = (size_t)-1; 306 return (ret); 307 } 308 309 static __inline int 310 /*ARGSUSED*/ 311 _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei __unused, 312 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 313 { 314 uint8_t ch, cl; 315 316 if ((uint32_t)wc < 0x80) { 317 /* ISO646 */ 318 *csid = 0; 319 *idx = (_index_t)wc; 320 } else if ((uint32_t)wc >= 0x10000) { 321 /* GBKUCS : XXX */ 322 *csid = 3; 323 *idx = (_index_t)wc; 324 } else { 325 ch = (uint8_t)(wc >> 8); 326 cl = (uint8_t)wc; 327 if (ch >= 0xA1 && cl >= 0xA1) { 328 /* EUC G1 */ 329 *csid = 1; 330 *idx = (_index_t)wc & 0x7F7FU; 331 } else { 332 /* extended area (0x8140-) */ 333 *csid = 2; 334 *idx = (_index_t)wc; 335 } 336 } 337 338 return (0); 339 } 340 341 static __inline int 342 /*ARGSUSED*/ 343 _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei, 344 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 345 { 346 347 switch (csid) { 348 case 0: 349 /* ISO646 */ 350 *wc = (wchar_t)idx; 351 break; 352 case 1: 353 /* EUC G1 */ 354 *wc = (wchar_t)idx | 0x8080U; 355 break; 356 case 2: 357 /* extended area */ 358 *wc = (wchar_t)idx; 359 break; 360 case 3: 361 /* GBKUCS : XXX */ 362 if (ei->mb_cur_max != 4) 363 return (EINVAL); 364 *wc = (wchar_t)idx; 365 break; 366 default: 367 return (EILSEQ); 368 } 369 370 return (0); 371 } 372 373 static __inline int 374 /*ARGSUSED*/ 375 _citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei __unused, 376 _GBK2KState * __restrict psenc, int * __restrict rstate) 377 { 378 379 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 380 _STDENC_SDGEN_INCOMPLETE_CHAR; 381 return (0); 382 } 383 384 static int 385 /*ARGSUSED*/ 386 _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei, 387 const void * __restrict var, size_t lenvar) 388 { 389 const char *p; 390 391 p = var; 392 memset((void *)ei, 0, sizeof(*ei)); 393 ei->mb_cur_max = 4; 394 while (lenvar > 0) { 395 switch (_bcs_tolower(*p)) { 396 case '2': 397 MATCH("2byte", ei->mb_cur_max = 2); 398 break; 399 } 400 p++; 401 lenvar--; 402 } 403 404 return (0); 405 } 406 407 static void 408 /*ARGSUSED*/ 409 _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei __unused) 410 { 411 412 } 413 414 /* ---------------------------------------------------------------------- 415 * public interface for stdenc 416 */ 417 418 _CITRUS_STDENC_DECLS(GBK2K); 419 _CITRUS_STDENC_DEF_OPS(GBK2K); 420 421 #include "citrus_stdenc_template.h" 422