1 /* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c)2004, 2006 Citrus Project, 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 #include <sys/types.h> 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <limits.h> 37 #include <stddef.h> 38 #include <stdio.h> 39 #include <stdint.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <wchar.h> 43 44 #include "citrus_namespace.h" 45 #include "citrus_types.h" 46 #include "citrus_module.h" 47 #include "citrus_stdenc.h" 48 #include "citrus_zw.h" 49 50 /* ---------------------------------------------------------------------- 51 * private stuffs used by templates 52 */ 53 54 typedef struct { 55 int dummy; 56 } _ZWEncodingInfo; 57 58 typedef enum { 59 NONE, AMBIGIOUS, ASCII, GB2312 60 } _ZWCharset; 61 62 typedef struct { 63 _ZWCharset charset; 64 int chlen; 65 char ch[4]; 66 } _ZWState; 67 68 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 69 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 70 71 #define _FUNCNAME(m) _citrus_ZW_##m 72 #define _ENCODING_INFO _ZWEncodingInfo 73 #define _ENCODING_STATE _ZWState 74 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 75 #define _ENCODING_IS_STATE_DEPENDENT 1 76 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->charset != NONE) 77 78 static __inline void 79 /*ARGSUSED*/ 80 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei __unused, 81 _ZWState * __restrict psenc) 82 { 83 84 psenc->chlen = 0; 85 psenc->charset = NONE; 86 } 87 88 #if 0 89 static __inline void 90 /*ARGSUSED*/ 91 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei __unused, 92 void *__restrict pspriv, const _ZWState * __restrict psenc) 93 { 94 95 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 96 } 97 98 static __inline void 99 /*ARGSUSED*/ 100 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei __unused, 101 _ZWState * __restrict psenc, const void * __restrict pspriv) 102 { 103 104 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 105 } 106 #endif 107 108 static int 109 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei, 110 wchar_t * __restrict pwc, char **__restrict s, size_t n, 111 _ZWState * __restrict psenc, size_t * __restrict nresult) 112 { 113 char *s0; 114 wchar_t wc; 115 int ch, len; 116 117 if (*s == NULL) { 118 _citrus_ZW_init_state(ei, psenc); 119 *nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT; 120 return (0); 121 } 122 s0 = *s; 123 len = 0; 124 125 #define STORE \ 126 do { \ 127 if (n-- < 1) { \ 128 *nresult = (size_t)-2; \ 129 *s = s0; \ 130 return (0); \ 131 } \ 132 ch = (unsigned char)*s0++; \ 133 if (len++ > MB_LEN_MAX || ch > 0x7F)\ 134 goto ilseq; \ 135 psenc->ch[psenc->chlen++] = ch; \ 136 } while (/*CONSTCOND*/0) 137 138 loop: 139 switch (psenc->charset) { 140 case ASCII: 141 switch (psenc->chlen) { 142 case 0: 143 STORE; 144 switch (psenc->ch[0]) { 145 case '\0': case '\n': 146 psenc->charset = NONE; 147 } 148 /*FALLTHROUGH*/ 149 case 1: 150 break; 151 default: 152 return (EINVAL); 153 } 154 ch = (unsigned char)psenc->ch[0]; 155 if (ch > 0x7F) 156 goto ilseq; 157 wc = (wchar_t)ch; 158 psenc->chlen = 0; 159 break; 160 case NONE: 161 if (psenc->chlen != 0) 162 return (EINVAL); 163 STORE; 164 ch = (unsigned char)psenc->ch[0]; 165 if (ch != 'z') { 166 if (ch != '\n' && ch != '\0') 167 psenc->charset = ASCII; 168 wc = (wchar_t)ch; 169 psenc->chlen = 0; 170 break; 171 } 172 psenc->charset = AMBIGIOUS; 173 psenc->chlen = 0; 174 /* FALLTHROUGH */ 175 case AMBIGIOUS: 176 if (psenc->chlen != 0) 177 return (EINVAL); 178 STORE; 179 if (psenc->ch[0] != 'W') { 180 psenc->charset = ASCII; 181 wc = L'z'; 182 break; 183 } 184 psenc->charset = GB2312; 185 psenc->chlen = 0; 186 /* FALLTHROUGH */ 187 case GB2312: 188 switch (psenc->chlen) { 189 case 0: 190 STORE; 191 ch = (unsigned char)psenc->ch[0]; 192 if (ch == '\0') { 193 psenc->charset = NONE; 194 wc = (wchar_t)ch; 195 psenc->chlen = 0; 196 break; 197 } else if (ch == '\n') { 198 psenc->charset = NONE; 199 psenc->chlen = 0; 200 goto loop; 201 } 202 /*FALLTHROUGH*/ 203 case 1: 204 STORE; 205 if (psenc->ch[0] == ' ') { 206 ch = (unsigned char)psenc->ch[1]; 207 wc = (wchar_t)ch; 208 psenc->chlen = 0; 209 break; 210 } else if (psenc->ch[0] == '#') { 211 ch = (unsigned char)psenc->ch[1]; 212 if (ch == '\n') { 213 psenc->charset = NONE; 214 wc = (wchar_t)ch; 215 psenc->chlen = 0; 216 break; 217 } else if (ch == ' ') { 218 wc = (wchar_t)ch; 219 psenc->chlen = 0; 220 break; 221 } 222 } 223 ch = (unsigned char)psenc->ch[0]; 224 if (ch < 0x21 || ch > 0x7E) 225 goto ilseq; 226 wc = (wchar_t)(ch << 8); 227 ch = (unsigned char)psenc->ch[1]; 228 if (ch < 0x21 || ch > 0x7E) { 229 ilseq: 230 *nresult = (size_t)-1; 231 return (EILSEQ); 232 } 233 wc |= (wchar_t)ch; 234 psenc->chlen = 0; 235 break; 236 default: 237 return (EINVAL); 238 } 239 break; 240 default: 241 return (EINVAL); 242 } 243 if (pwc != NULL) 244 *pwc = wc; 245 246 *nresult = (size_t)(wc == 0 ? 0 : len); 247 *s = s0; 248 249 return (0); 250 } 251 252 static int 253 /*ARGSUSED*/ 254 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei __unused, 255 char *__restrict s, size_t n, wchar_t wc, 256 _ZWState * __restrict psenc, size_t * __restrict nresult) 257 { 258 int ch; 259 260 if (psenc->chlen != 0) 261 return (EINVAL); 262 if ((uint32_t)wc <= 0x7F) { 263 ch = (unsigned char)wc; 264 switch (psenc->charset) { 265 case NONE: 266 if (ch == '\0' || ch == '\n') 267 psenc->ch[psenc->chlen++] = ch; 268 else { 269 if (n < 4) 270 return (E2BIG); 271 n -= 4; 272 psenc->ch[psenc->chlen++] = 'z'; 273 psenc->ch[psenc->chlen++] = 'W'; 274 psenc->ch[psenc->chlen++] = ' '; 275 psenc->ch[psenc->chlen++] = ch; 276 psenc->charset = GB2312; 277 } 278 break; 279 case GB2312: 280 if (n < 2) 281 return (E2BIG); 282 n -= 2; 283 if (ch == '\0') { 284 psenc->ch[psenc->chlen++] = '\n'; 285 psenc->ch[psenc->chlen++] = '\0'; 286 psenc->charset = NONE; 287 } else if (ch == '\n') { 288 psenc->ch[psenc->chlen++] = '#'; 289 psenc->ch[psenc->chlen++] = '\n'; 290 psenc->charset = NONE; 291 } else { 292 psenc->ch[psenc->chlen++] = ' '; 293 psenc->ch[psenc->chlen++] = ch; 294 } 295 break; 296 default: 297 return (EINVAL); 298 } 299 } else if ((uint32_t)wc <= 0x7E7E) { 300 switch (psenc->charset) { 301 case NONE: 302 if (n < 2) 303 return (E2BIG); 304 n -= 2; 305 psenc->ch[psenc->chlen++] = 'z'; 306 psenc->ch[psenc->chlen++] = 'W'; 307 psenc->charset = GB2312; 308 /* FALLTHROUGH*/ 309 case GB2312: 310 if (n < 2) 311 return (E2BIG); 312 n -= 2; 313 ch = (wc >> 8) & 0xFF; 314 if (ch < 0x21 || ch > 0x7E) 315 goto ilseq; 316 psenc->ch[psenc->chlen++] = ch; 317 ch = wc & 0xFF; 318 if (ch < 0x21 || ch > 0x7E) 319 goto ilseq; 320 psenc->ch[psenc->chlen++] = ch; 321 break; 322 default: 323 return (EINVAL); 324 } 325 } else { 326 ilseq: 327 *nresult = (size_t)-1; 328 return (EILSEQ); 329 } 330 memcpy(s, psenc->ch, psenc->chlen); 331 *nresult = psenc->chlen; 332 psenc->chlen = 0; 333 334 return (0); 335 } 336 337 static int 338 /*ARGSUSED*/ 339 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei __unused, 340 char * __restrict s, size_t n, _ZWState * __restrict psenc, 341 size_t * __restrict nresult) 342 { 343 344 if (psenc->chlen != 0) 345 return (EINVAL); 346 switch (psenc->charset) { 347 case GB2312: 348 if (n-- < 1) 349 return (E2BIG); 350 psenc->ch[psenc->chlen++] = '\n'; 351 psenc->charset = NONE; 352 /*FALLTHROUGH*/ 353 case NONE: 354 *nresult = psenc->chlen; 355 if (psenc->chlen > 0) { 356 memcpy(s, psenc->ch, psenc->chlen); 357 psenc->chlen = 0; 358 } 359 break; 360 default: 361 return (EINVAL); 362 } 363 364 return (0); 365 } 366 367 static __inline int 368 /*ARGSUSED*/ 369 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei __unused, 370 _ZWState * __restrict psenc, int * __restrict rstate) 371 { 372 373 switch (psenc->charset) { 374 case NONE: 375 if (psenc->chlen != 0) 376 return (EINVAL); 377 *rstate = _STDENC_SDGEN_INITIAL; 378 break; 379 case AMBIGIOUS: 380 if (psenc->chlen != 0) 381 return (EINVAL); 382 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT; 383 break; 384 case ASCII: 385 case GB2312: 386 switch (psenc->chlen) { 387 case 0: 388 *rstate = _STDENC_SDGEN_STABLE; 389 break; 390 case 1: 391 *rstate = (psenc->ch[0] == '#') ? 392 _STDENC_SDGEN_INCOMPLETE_SHIFT : 393 _STDENC_SDGEN_INCOMPLETE_CHAR; 394 break; 395 default: 396 return (EINVAL); 397 } 398 break; 399 default: 400 return (EINVAL); 401 } 402 return (0); 403 } 404 405 static __inline int 406 /*ARGSUSED*/ 407 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei __unused, 408 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 409 { 410 411 *csid = (_csid_t)(wc <= (wchar_t)0x7FU) ? 0 : 1; 412 *idx = (_index_t)wc; 413 414 return (0); 415 } 416 417 static __inline int 418 /*ARGSUSED*/ 419 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei __unused, 420 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 421 { 422 423 switch (csid) { 424 case 0: case 1: 425 break; 426 default: 427 return (EINVAL); 428 } 429 *wc = (wchar_t)idx; 430 431 return (0); 432 } 433 434 static void 435 /*ARGSUSED*/ 436 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei __unused) 437 { 438 439 } 440 441 static int 442 /*ARGSUSED*/ 443 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei __unused, 444 const void *__restrict var __unused, size_t lenvar __unused) 445 { 446 447 return (0); 448 } 449 450 /* ---------------------------------------------------------------------- 451 * public interface for stdenc 452 */ 453 454 _CITRUS_STDENC_DECLS(ZW); 455 _CITRUS_STDENC_DEF_OPS(ZW); 456 457 #include "citrus_stdenc_template.h" 458