1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */ 3 4 /*- 5 * Copyright (c)2004, 2006 Citrus Project, 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 31 #include <sys/cdefs.h> 32 #include <sys/types.h> 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <limits.h> 37 #include <stddef.h> 38 #include <stdio.h> 39 #include <stdint.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <wchar.h> 43 44 #include "citrus_namespace.h" 45 #include "citrus_types.h" 46 #include "citrus_module.h" 47 #include "citrus_stdenc.h" 48 #include "citrus_zw.h" 49 50 /* ---------------------------------------------------------------------- 51 * private stuffs used by templates 52 */ 53 54 typedef struct { 55 int dummy; 56 } _ZWEncodingInfo; 57 58 typedef enum { 59 NONE, AMBIGIOUS, ASCII, GB2312 60 } _ZWCharset; 61 62 typedef struct { 63 _ZWCharset charset; 64 int chlen; 65 char ch[4]; 66 } _ZWState; 67 68 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 69 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 70 71 #define _FUNCNAME(m) _citrus_ZW_##m 72 #define _ENCODING_INFO _ZWEncodingInfo 73 #define _ENCODING_STATE _ZWState 74 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 75 #define _ENCODING_IS_STATE_DEPENDENT 1 76 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->charset != NONE) 77 78 static __inline void 79 /*ARGSUSED*/ 80 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei __unused, 81 _ZWState * __restrict psenc) 82 { 83 84 psenc->chlen = 0; 85 psenc->charset = NONE; 86 } 87 88 static __inline void 89 /*ARGSUSED*/ 90 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei __unused, 91 void *__restrict pspriv, const _ZWState * __restrict psenc) 92 { 93 94 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 95 } 96 97 static __inline void 98 /*ARGSUSED*/ 99 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei __unused, 100 _ZWState * __restrict psenc, const void * __restrict pspriv) 101 { 102 103 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 104 } 105 106 static int 107 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei, 108 wchar_t * __restrict pwc, char **__restrict s, size_t n, 109 _ZWState * __restrict psenc, size_t * __restrict nresult) 110 { 111 char *s0; 112 wchar_t wc; 113 int ch, len; 114 115 if (*s == NULL) { 116 _citrus_ZW_init_state(ei, psenc); 117 *nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT; 118 return (0); 119 } 120 s0 = *s; 121 len = 0; 122 123 #define STORE \ 124 do { \ 125 if (n-- < 1) { \ 126 *nresult = (size_t)-2; \ 127 *s = s0; \ 128 return (0); \ 129 } \ 130 ch = (unsigned char)*s0++; \ 131 if (len++ > MB_LEN_MAX || ch > 0x7F)\ 132 goto ilseq; \ 133 psenc->ch[psenc->chlen++] = ch; \ 134 } while (/*CONSTCOND*/0) 135 136 loop: 137 switch (psenc->charset) { 138 case ASCII: 139 switch (psenc->chlen) { 140 case 0: 141 STORE; 142 switch (psenc->ch[0]) { 143 case '\0': case '\n': 144 psenc->charset = NONE; 145 } 146 /*FALLTHROUGH*/ 147 case 1: 148 break; 149 default: 150 return (EINVAL); 151 } 152 ch = (unsigned char)psenc->ch[0]; 153 if (ch > 0x7F) 154 goto ilseq; 155 wc = (wchar_t)ch; 156 psenc->chlen = 0; 157 break; 158 case NONE: 159 if (psenc->chlen != 0) 160 return (EINVAL); 161 STORE; 162 ch = (unsigned char)psenc->ch[0]; 163 if (ch != 'z') { 164 if (ch != '\n' && ch != '\0') 165 psenc->charset = ASCII; 166 wc = (wchar_t)ch; 167 psenc->chlen = 0; 168 break; 169 } 170 psenc->charset = AMBIGIOUS; 171 psenc->chlen = 0; 172 /* FALLTHROUGH */ 173 case AMBIGIOUS: 174 if (psenc->chlen != 0) 175 return (EINVAL); 176 STORE; 177 if (psenc->ch[0] != 'W') { 178 psenc->charset = ASCII; 179 wc = L'z'; 180 break; 181 } 182 psenc->charset = GB2312; 183 psenc->chlen = 0; 184 /* FALLTHROUGH */ 185 case GB2312: 186 switch (psenc->chlen) { 187 case 0: 188 STORE; 189 ch = (unsigned char)psenc->ch[0]; 190 if (ch == '\0') { 191 psenc->charset = NONE; 192 wc = (wchar_t)ch; 193 psenc->chlen = 0; 194 break; 195 } else if (ch == '\n') { 196 psenc->charset = NONE; 197 psenc->chlen = 0; 198 goto loop; 199 } 200 /*FALLTHROUGH*/ 201 case 1: 202 STORE; 203 if (psenc->ch[0] == ' ') { 204 ch = (unsigned char)psenc->ch[1]; 205 wc = (wchar_t)ch; 206 psenc->chlen = 0; 207 break; 208 } else if (psenc->ch[0] == '#') { 209 ch = (unsigned char)psenc->ch[1]; 210 if (ch == '\n') { 211 psenc->charset = NONE; 212 wc = (wchar_t)ch; 213 psenc->chlen = 0; 214 break; 215 } else if (ch == ' ') { 216 wc = (wchar_t)ch; 217 psenc->chlen = 0; 218 break; 219 } 220 } 221 ch = (unsigned char)psenc->ch[0]; 222 if (ch < 0x21 || ch > 0x7E) 223 goto ilseq; 224 wc = (wchar_t)(ch << 8); 225 ch = (unsigned char)psenc->ch[1]; 226 if (ch < 0x21 || ch > 0x7E) { 227 ilseq: 228 *nresult = (size_t)-1; 229 return (EILSEQ); 230 } 231 wc |= (wchar_t)ch; 232 psenc->chlen = 0; 233 break; 234 default: 235 return (EINVAL); 236 } 237 break; 238 default: 239 return (EINVAL); 240 } 241 if (pwc != NULL) 242 *pwc = wc; 243 244 *nresult = (size_t)(wc == 0 ? 0 : len); 245 *s = s0; 246 247 return (0); 248 } 249 250 static int 251 /*ARGSUSED*/ 252 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei __unused, 253 char *__restrict s, size_t n, wchar_t wc, 254 _ZWState * __restrict psenc, size_t * __restrict nresult) 255 { 256 int ch; 257 258 if (psenc->chlen != 0) 259 return (EINVAL); 260 if ((uint32_t)wc <= 0x7F) { 261 ch = (unsigned char)wc; 262 switch (psenc->charset) { 263 case NONE: 264 if (ch == '\0' || ch == '\n') 265 psenc->ch[psenc->chlen++] = ch; 266 else { 267 if (n < 4) 268 return (E2BIG); 269 n -= 4; 270 psenc->ch[psenc->chlen++] = 'z'; 271 psenc->ch[psenc->chlen++] = 'W'; 272 psenc->ch[psenc->chlen++] = ' '; 273 psenc->ch[psenc->chlen++] = ch; 274 psenc->charset = GB2312; 275 } 276 break; 277 case GB2312: 278 if (n < 2) 279 return (E2BIG); 280 n -= 2; 281 if (ch == '\0') { 282 psenc->ch[psenc->chlen++] = '\n'; 283 psenc->ch[psenc->chlen++] = '\0'; 284 psenc->charset = NONE; 285 } else if (ch == '\n') { 286 psenc->ch[psenc->chlen++] = '#'; 287 psenc->ch[psenc->chlen++] = '\n'; 288 psenc->charset = NONE; 289 } else { 290 psenc->ch[psenc->chlen++] = ' '; 291 psenc->ch[psenc->chlen++] = ch; 292 } 293 break; 294 default: 295 return (EINVAL); 296 } 297 } else if ((uint32_t)wc <= 0x7E7E) { 298 switch (psenc->charset) { 299 case NONE: 300 if (n < 2) 301 return (E2BIG); 302 n -= 2; 303 psenc->ch[psenc->chlen++] = 'z'; 304 psenc->ch[psenc->chlen++] = 'W'; 305 psenc->charset = GB2312; 306 /* FALLTHROUGH*/ 307 case GB2312: 308 if (n < 2) 309 return (E2BIG); 310 n -= 2; 311 ch = (wc >> 8) & 0xFF; 312 if (ch < 0x21 || ch > 0x7E) 313 goto ilseq; 314 psenc->ch[psenc->chlen++] = ch; 315 ch = wc & 0xFF; 316 if (ch < 0x21 || ch > 0x7E) 317 goto ilseq; 318 psenc->ch[psenc->chlen++] = ch; 319 break; 320 default: 321 return (EINVAL); 322 } 323 } else { 324 ilseq: 325 *nresult = (size_t)-1; 326 return (EILSEQ); 327 } 328 memcpy(s, psenc->ch, psenc->chlen); 329 *nresult = psenc->chlen; 330 psenc->chlen = 0; 331 332 return (0); 333 } 334 335 static int 336 /*ARGSUSED*/ 337 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei __unused, 338 char * __restrict s, size_t n, _ZWState * __restrict psenc, 339 size_t * __restrict nresult) 340 { 341 342 if (psenc->chlen != 0) 343 return (EINVAL); 344 switch (psenc->charset) { 345 case GB2312: 346 if (n-- < 1) 347 return (E2BIG); 348 psenc->ch[psenc->chlen++] = '\n'; 349 psenc->charset = NONE; 350 /*FALLTHROUGH*/ 351 case NONE: 352 *nresult = psenc->chlen; 353 if (psenc->chlen > 0) { 354 memcpy(s, psenc->ch, psenc->chlen); 355 psenc->chlen = 0; 356 } 357 break; 358 default: 359 return (EINVAL); 360 } 361 362 return (0); 363 } 364 365 static __inline int 366 /*ARGSUSED*/ 367 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei __unused, 368 _ZWState * __restrict psenc, int * __restrict rstate) 369 { 370 371 switch (psenc->charset) { 372 case NONE: 373 if (psenc->chlen != 0) 374 return (EINVAL); 375 *rstate = _STDENC_SDGEN_INITIAL; 376 break; 377 case AMBIGIOUS: 378 if (psenc->chlen != 0) 379 return (EINVAL); 380 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT; 381 break; 382 case ASCII: 383 case GB2312: 384 switch (psenc->chlen) { 385 case 0: 386 *rstate = _STDENC_SDGEN_STABLE; 387 break; 388 case 1: 389 *rstate = (psenc->ch[0] == '#') ? 390 _STDENC_SDGEN_INCOMPLETE_SHIFT : 391 _STDENC_SDGEN_INCOMPLETE_CHAR; 392 break; 393 default: 394 return (EINVAL); 395 } 396 break; 397 default: 398 return (EINVAL); 399 } 400 return (0); 401 } 402 403 static __inline int 404 /*ARGSUSED*/ 405 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei __unused, 406 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 407 { 408 409 *csid = (_csid_t)(wc <= (wchar_t)0x7FU) ? 0 : 1; 410 *idx = (_index_t)wc; 411 412 return (0); 413 } 414 415 static __inline int 416 /*ARGSUSED*/ 417 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei __unused, 418 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 419 { 420 421 switch (csid) { 422 case 0: case 1: 423 break; 424 default: 425 return (EINVAL); 426 } 427 *wc = (wchar_t)idx; 428 429 return (0); 430 } 431 432 static void 433 /*ARGSUSED*/ 434 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei __unused) 435 { 436 437 } 438 439 static int 440 /*ARGSUSED*/ 441 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei __unused, 442 const void *__restrict var __unused, size_t lenvar __unused) 443 { 444 445 return (0); 446 } 447 448 /* ---------------------------------------------------------------------- 449 * public interface for stdenc 450 */ 451 452 _CITRUS_STDENC_DECLS(ZW); 453 _CITRUS_STDENC_DEF_OPS(ZW); 454 455 #include "citrus_stdenc_template.h" 456