1 /* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c)2004, 2006 Citrus Project, 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 #include <sys/cdefs.h> 33 #include <sys/types.h> 34 35 #include <assert.h> 36 #include <errno.h> 37 #include <limits.h> 38 #include <stddef.h> 39 #include <stdio.h> 40 #include <stdint.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <wchar.h> 44 45 #include "citrus_namespace.h" 46 #include "citrus_types.h" 47 #include "citrus_module.h" 48 #include "citrus_stdenc.h" 49 #include "citrus_zw.h" 50 51 /* ---------------------------------------------------------------------- 52 * private stuffs used by templates 53 */ 54 55 typedef struct { 56 int dummy; 57 } _ZWEncodingInfo; 58 59 typedef enum { 60 NONE, AMBIGIOUS, ASCII, GB2312 61 } _ZWCharset; 62 63 typedef struct { 64 _ZWCharset charset; 65 int chlen; 66 char ch[4]; 67 } _ZWState; 68 69 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 70 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 71 72 #define _FUNCNAME(m) _citrus_ZW_##m 73 #define _ENCODING_INFO _ZWEncodingInfo 74 #define _ENCODING_STATE _ZWState 75 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 76 #define _ENCODING_IS_STATE_DEPENDENT 1 77 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->charset != NONE) 78 79 static __inline void 80 /*ARGSUSED*/ 81 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei __unused, 82 _ZWState * __restrict psenc) 83 { 84 85 psenc->chlen = 0; 86 psenc->charset = NONE; 87 } 88 89 #if 0 90 static __inline void 91 /*ARGSUSED*/ 92 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei __unused, 93 void *__restrict pspriv, const _ZWState * __restrict psenc) 94 { 95 96 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 97 } 98 99 static __inline void 100 /*ARGSUSED*/ 101 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei __unused, 102 _ZWState * __restrict psenc, const void * __restrict pspriv) 103 { 104 105 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 106 } 107 #endif 108 109 static int 110 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei, 111 wchar_t * __restrict pwc, char **__restrict s, size_t n, 112 _ZWState * __restrict psenc, size_t * __restrict nresult) 113 { 114 char *s0; 115 wchar_t wc; 116 int ch, len; 117 118 if (*s == NULL) { 119 _citrus_ZW_init_state(ei, psenc); 120 *nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT; 121 return (0); 122 } 123 s0 = *s; 124 len = 0; 125 126 #define STORE \ 127 do { \ 128 if (n-- < 1) { \ 129 *nresult = (size_t)-2; \ 130 *s = s0; \ 131 return (0); \ 132 } \ 133 ch = (unsigned char)*s0++; \ 134 if (len++ > MB_LEN_MAX || ch > 0x7F)\ 135 goto ilseq; \ 136 psenc->ch[psenc->chlen++] = ch; \ 137 } while (/*CONSTCOND*/0) 138 139 loop: 140 switch (psenc->charset) { 141 case ASCII: 142 switch (psenc->chlen) { 143 case 0: 144 STORE; 145 switch (psenc->ch[0]) { 146 case '\0': case '\n': 147 psenc->charset = NONE; 148 } 149 /*FALLTHROUGH*/ 150 case 1: 151 break; 152 default: 153 return (EINVAL); 154 } 155 ch = (unsigned char)psenc->ch[0]; 156 if (ch > 0x7F) 157 goto ilseq; 158 wc = (wchar_t)ch; 159 psenc->chlen = 0; 160 break; 161 case NONE: 162 if (psenc->chlen != 0) 163 return (EINVAL); 164 STORE; 165 ch = (unsigned char)psenc->ch[0]; 166 if (ch != 'z') { 167 if (ch != '\n' && ch != '\0') 168 psenc->charset = ASCII; 169 wc = (wchar_t)ch; 170 psenc->chlen = 0; 171 break; 172 } 173 psenc->charset = AMBIGIOUS; 174 psenc->chlen = 0; 175 /* FALLTHROUGH */ 176 case AMBIGIOUS: 177 if (psenc->chlen != 0) 178 return (EINVAL); 179 STORE; 180 if (psenc->ch[0] != 'W') { 181 psenc->charset = ASCII; 182 wc = L'z'; 183 break; 184 } 185 psenc->charset = GB2312; 186 psenc->chlen = 0; 187 /* FALLTHROUGH */ 188 case GB2312: 189 switch (psenc->chlen) { 190 case 0: 191 STORE; 192 ch = (unsigned char)psenc->ch[0]; 193 if (ch == '\0') { 194 psenc->charset = NONE; 195 wc = (wchar_t)ch; 196 psenc->chlen = 0; 197 break; 198 } else if (ch == '\n') { 199 psenc->charset = NONE; 200 psenc->chlen = 0; 201 goto loop; 202 } 203 /*FALLTHROUGH*/ 204 case 1: 205 STORE; 206 if (psenc->ch[0] == ' ') { 207 ch = (unsigned char)psenc->ch[1]; 208 wc = (wchar_t)ch; 209 psenc->chlen = 0; 210 break; 211 } else if (psenc->ch[0] == '#') { 212 ch = (unsigned char)psenc->ch[1]; 213 if (ch == '\n') { 214 psenc->charset = NONE; 215 wc = (wchar_t)ch; 216 psenc->chlen = 0; 217 break; 218 } else if (ch == ' ') { 219 wc = (wchar_t)ch; 220 psenc->chlen = 0; 221 break; 222 } 223 } 224 ch = (unsigned char)psenc->ch[0]; 225 if (ch < 0x21 || ch > 0x7E) 226 goto ilseq; 227 wc = (wchar_t)(ch << 8); 228 ch = (unsigned char)psenc->ch[1]; 229 if (ch < 0x21 || ch > 0x7E) { 230 ilseq: 231 *nresult = (size_t)-1; 232 return (EILSEQ); 233 } 234 wc |= (wchar_t)ch; 235 psenc->chlen = 0; 236 break; 237 default: 238 return (EINVAL); 239 } 240 break; 241 default: 242 return (EINVAL); 243 } 244 if (pwc != NULL) 245 *pwc = wc; 246 247 *nresult = (size_t)(wc == 0 ? 0 : len); 248 *s = s0; 249 250 return (0); 251 } 252 253 static int 254 /*ARGSUSED*/ 255 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei __unused, 256 char *__restrict s, size_t n, wchar_t wc, 257 _ZWState * __restrict psenc, size_t * __restrict nresult) 258 { 259 int ch; 260 261 if (psenc->chlen != 0) 262 return (EINVAL); 263 if ((uint32_t)wc <= 0x7F) { 264 ch = (unsigned char)wc; 265 switch (psenc->charset) { 266 case NONE: 267 if (ch == '\0' || ch == '\n') 268 psenc->ch[psenc->chlen++] = ch; 269 else { 270 if (n < 4) 271 return (E2BIG); 272 n -= 4; 273 psenc->ch[psenc->chlen++] = 'z'; 274 psenc->ch[psenc->chlen++] = 'W'; 275 psenc->ch[psenc->chlen++] = ' '; 276 psenc->ch[psenc->chlen++] = ch; 277 psenc->charset = GB2312; 278 } 279 break; 280 case GB2312: 281 if (n < 2) 282 return (E2BIG); 283 n -= 2; 284 if (ch == '\0') { 285 psenc->ch[psenc->chlen++] = '\n'; 286 psenc->ch[psenc->chlen++] = '\0'; 287 psenc->charset = NONE; 288 } else if (ch == '\n') { 289 psenc->ch[psenc->chlen++] = '#'; 290 psenc->ch[psenc->chlen++] = '\n'; 291 psenc->charset = NONE; 292 } else { 293 psenc->ch[psenc->chlen++] = ' '; 294 psenc->ch[psenc->chlen++] = ch; 295 } 296 break; 297 default: 298 return (EINVAL); 299 } 300 } else if ((uint32_t)wc <= 0x7E7E) { 301 switch (psenc->charset) { 302 case NONE: 303 if (n < 2) 304 return (E2BIG); 305 n -= 2; 306 psenc->ch[psenc->chlen++] = 'z'; 307 psenc->ch[psenc->chlen++] = 'W'; 308 psenc->charset = GB2312; 309 /* FALLTHROUGH*/ 310 case GB2312: 311 if (n < 2) 312 return (E2BIG); 313 n -= 2; 314 ch = (wc >> 8) & 0xFF; 315 if (ch < 0x21 || ch > 0x7E) 316 goto ilseq; 317 psenc->ch[psenc->chlen++] = ch; 318 ch = wc & 0xFF; 319 if (ch < 0x21 || ch > 0x7E) 320 goto ilseq; 321 psenc->ch[psenc->chlen++] = ch; 322 break; 323 default: 324 return (EINVAL); 325 } 326 } else { 327 ilseq: 328 *nresult = (size_t)-1; 329 return (EILSEQ); 330 } 331 memcpy(s, psenc->ch, psenc->chlen); 332 *nresult = psenc->chlen; 333 psenc->chlen = 0; 334 335 return (0); 336 } 337 338 static int 339 /*ARGSUSED*/ 340 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei __unused, 341 char * __restrict s, size_t n, _ZWState * __restrict psenc, 342 size_t * __restrict nresult) 343 { 344 345 if (psenc->chlen != 0) 346 return (EINVAL); 347 switch (psenc->charset) { 348 case GB2312: 349 if (n-- < 1) 350 return (E2BIG); 351 psenc->ch[psenc->chlen++] = '\n'; 352 psenc->charset = NONE; 353 /*FALLTHROUGH*/ 354 case NONE: 355 *nresult = psenc->chlen; 356 if (psenc->chlen > 0) { 357 memcpy(s, psenc->ch, psenc->chlen); 358 psenc->chlen = 0; 359 } 360 break; 361 default: 362 return (EINVAL); 363 } 364 365 return (0); 366 } 367 368 static __inline int 369 /*ARGSUSED*/ 370 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei __unused, 371 _ZWState * __restrict psenc, int * __restrict rstate) 372 { 373 374 switch (psenc->charset) { 375 case NONE: 376 if (psenc->chlen != 0) 377 return (EINVAL); 378 *rstate = _STDENC_SDGEN_INITIAL; 379 break; 380 case AMBIGIOUS: 381 if (psenc->chlen != 0) 382 return (EINVAL); 383 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT; 384 break; 385 case ASCII: 386 case GB2312: 387 switch (psenc->chlen) { 388 case 0: 389 *rstate = _STDENC_SDGEN_STABLE; 390 break; 391 case 1: 392 *rstate = (psenc->ch[0] == '#') ? 393 _STDENC_SDGEN_INCOMPLETE_SHIFT : 394 _STDENC_SDGEN_INCOMPLETE_CHAR; 395 break; 396 default: 397 return (EINVAL); 398 } 399 break; 400 default: 401 return (EINVAL); 402 } 403 return (0); 404 } 405 406 static __inline int 407 /*ARGSUSED*/ 408 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei __unused, 409 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 410 { 411 412 *csid = (_csid_t)(wc <= (wchar_t)0x7FU) ? 0 : 1; 413 *idx = (_index_t)wc; 414 415 return (0); 416 } 417 418 static __inline int 419 /*ARGSUSED*/ 420 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei __unused, 421 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 422 { 423 424 switch (csid) { 425 case 0: case 1: 426 break; 427 default: 428 return (EINVAL); 429 } 430 *wc = (wchar_t)idx; 431 432 return (0); 433 } 434 435 static void 436 /*ARGSUSED*/ 437 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei __unused) 438 { 439 440 } 441 442 static int 443 /*ARGSUSED*/ 444 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei __unused, 445 const void *__restrict var __unused, size_t lenvar __unused) 446 { 447 448 return (0); 449 } 450 451 /* ---------------------------------------------------------------------- 452 * public interface for stdenc 453 */ 454 455 _CITRUS_STDENC_DECLS(ZW); 456 _CITRUS_STDENC_DEF_OPS(ZW); 457 458 #include "citrus_stdenc_template.h" 459