1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */ 3 4 /*- 5 * SPDX-License-Identifier: BSD-2-Clause 6 * 7 * Copyright (c)2004, 2006 Citrus Project, 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 */ 32 33 #include <sys/cdefs.h> 34 #include <sys/types.h> 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <limits.h> 39 #include <stddef.h> 40 #include <stdio.h> 41 #include <stdint.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <wchar.h> 45 46 #include "citrus_namespace.h" 47 #include "citrus_types.h" 48 #include "citrus_module.h" 49 #include "citrus_stdenc.h" 50 #include "citrus_zw.h" 51 52 /* ---------------------------------------------------------------------- 53 * private stuffs used by templates 54 */ 55 56 typedef struct { 57 int dummy; 58 } _ZWEncodingInfo; 59 60 typedef enum { 61 NONE, AMBIGIOUS, ASCII, GB2312 62 } _ZWCharset; 63 64 typedef struct { 65 _ZWCharset charset; 66 int chlen; 67 char ch[4]; 68 } _ZWState; 69 70 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 71 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 72 73 #define _FUNCNAME(m) _citrus_ZW_##m 74 #define _ENCODING_INFO _ZWEncodingInfo 75 #define _ENCODING_STATE _ZWState 76 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 77 #define _ENCODING_IS_STATE_DEPENDENT 1 78 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->charset != NONE) 79 80 static __inline void 81 /*ARGSUSED*/ 82 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei __unused, 83 _ZWState * __restrict psenc) 84 { 85 86 psenc->chlen = 0; 87 psenc->charset = NONE; 88 } 89 90 #if 0 91 static __inline void 92 /*ARGSUSED*/ 93 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei __unused, 94 void *__restrict pspriv, const _ZWState * __restrict psenc) 95 { 96 97 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 98 } 99 100 static __inline void 101 /*ARGSUSED*/ 102 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei __unused, 103 _ZWState * __restrict psenc, const void * __restrict pspriv) 104 { 105 106 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 107 } 108 #endif 109 110 static int 111 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei, 112 wchar_t * __restrict pwc, char **__restrict s, size_t n, 113 _ZWState * __restrict psenc, size_t * __restrict nresult) 114 { 115 char *s0; 116 wchar_t wc; 117 int ch, len; 118 119 if (*s == NULL) { 120 _citrus_ZW_init_state(ei, psenc); 121 *nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT; 122 return (0); 123 } 124 s0 = *s; 125 len = 0; 126 127 #define STORE \ 128 do { \ 129 if (n-- < 1) { \ 130 *nresult = (size_t)-2; \ 131 *s = s0; \ 132 return (0); \ 133 } \ 134 ch = (unsigned char)*s0++; \ 135 if (len++ > MB_LEN_MAX || ch > 0x7F)\ 136 goto ilseq; \ 137 psenc->ch[psenc->chlen++] = ch; \ 138 } while (/*CONSTCOND*/0) 139 140 loop: 141 switch (psenc->charset) { 142 case ASCII: 143 switch (psenc->chlen) { 144 case 0: 145 STORE; 146 switch (psenc->ch[0]) { 147 case '\0': case '\n': 148 psenc->charset = NONE; 149 } 150 /*FALLTHROUGH*/ 151 case 1: 152 break; 153 default: 154 return (EINVAL); 155 } 156 ch = (unsigned char)psenc->ch[0]; 157 if (ch > 0x7F) 158 goto ilseq; 159 wc = (wchar_t)ch; 160 psenc->chlen = 0; 161 break; 162 case NONE: 163 if (psenc->chlen != 0) 164 return (EINVAL); 165 STORE; 166 ch = (unsigned char)psenc->ch[0]; 167 if (ch != 'z') { 168 if (ch != '\n' && ch != '\0') 169 psenc->charset = ASCII; 170 wc = (wchar_t)ch; 171 psenc->chlen = 0; 172 break; 173 } 174 psenc->charset = AMBIGIOUS; 175 psenc->chlen = 0; 176 /* FALLTHROUGH */ 177 case AMBIGIOUS: 178 if (psenc->chlen != 0) 179 return (EINVAL); 180 STORE; 181 if (psenc->ch[0] != 'W') { 182 psenc->charset = ASCII; 183 wc = L'z'; 184 break; 185 } 186 psenc->charset = GB2312; 187 psenc->chlen = 0; 188 /* FALLTHROUGH */ 189 case GB2312: 190 switch (psenc->chlen) { 191 case 0: 192 STORE; 193 ch = (unsigned char)psenc->ch[0]; 194 if (ch == '\0') { 195 psenc->charset = NONE; 196 wc = (wchar_t)ch; 197 psenc->chlen = 0; 198 break; 199 } else if (ch == '\n') { 200 psenc->charset = NONE; 201 psenc->chlen = 0; 202 goto loop; 203 } 204 /*FALLTHROUGH*/ 205 case 1: 206 STORE; 207 if (psenc->ch[0] == ' ') { 208 ch = (unsigned char)psenc->ch[1]; 209 wc = (wchar_t)ch; 210 psenc->chlen = 0; 211 break; 212 } else if (psenc->ch[0] == '#') { 213 ch = (unsigned char)psenc->ch[1]; 214 if (ch == '\n') { 215 psenc->charset = NONE; 216 wc = (wchar_t)ch; 217 psenc->chlen = 0; 218 break; 219 } else if (ch == ' ') { 220 wc = (wchar_t)ch; 221 psenc->chlen = 0; 222 break; 223 } 224 } 225 ch = (unsigned char)psenc->ch[0]; 226 if (ch < 0x21 || ch > 0x7E) 227 goto ilseq; 228 wc = (wchar_t)(ch << 8); 229 ch = (unsigned char)psenc->ch[1]; 230 if (ch < 0x21 || ch > 0x7E) { 231 ilseq: 232 *nresult = (size_t)-1; 233 return (EILSEQ); 234 } 235 wc |= (wchar_t)ch; 236 psenc->chlen = 0; 237 break; 238 default: 239 return (EINVAL); 240 } 241 break; 242 default: 243 return (EINVAL); 244 } 245 if (pwc != NULL) 246 *pwc = wc; 247 248 *nresult = (size_t)(wc == 0 ? 0 : len); 249 *s = s0; 250 251 return (0); 252 } 253 254 static int 255 /*ARGSUSED*/ 256 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei __unused, 257 char *__restrict s, size_t n, wchar_t wc, 258 _ZWState * __restrict psenc, size_t * __restrict nresult) 259 { 260 int ch; 261 262 if (psenc->chlen != 0) 263 return (EINVAL); 264 if ((uint32_t)wc <= 0x7F) { 265 ch = (unsigned char)wc; 266 switch (psenc->charset) { 267 case NONE: 268 if (ch == '\0' || ch == '\n') 269 psenc->ch[psenc->chlen++] = ch; 270 else { 271 if (n < 4) 272 return (E2BIG); 273 n -= 4; 274 psenc->ch[psenc->chlen++] = 'z'; 275 psenc->ch[psenc->chlen++] = 'W'; 276 psenc->ch[psenc->chlen++] = ' '; 277 psenc->ch[psenc->chlen++] = ch; 278 psenc->charset = GB2312; 279 } 280 break; 281 case GB2312: 282 if (n < 2) 283 return (E2BIG); 284 n -= 2; 285 if (ch == '\0') { 286 psenc->ch[psenc->chlen++] = '\n'; 287 psenc->ch[psenc->chlen++] = '\0'; 288 psenc->charset = NONE; 289 } else if (ch == '\n') { 290 psenc->ch[psenc->chlen++] = '#'; 291 psenc->ch[psenc->chlen++] = '\n'; 292 psenc->charset = NONE; 293 } else { 294 psenc->ch[psenc->chlen++] = ' '; 295 psenc->ch[psenc->chlen++] = ch; 296 } 297 break; 298 default: 299 return (EINVAL); 300 } 301 } else if ((uint32_t)wc <= 0x7E7E) { 302 switch (psenc->charset) { 303 case NONE: 304 if (n < 2) 305 return (E2BIG); 306 n -= 2; 307 psenc->ch[psenc->chlen++] = 'z'; 308 psenc->ch[psenc->chlen++] = 'W'; 309 psenc->charset = GB2312; 310 /* FALLTHROUGH*/ 311 case GB2312: 312 if (n < 2) 313 return (E2BIG); 314 n -= 2; 315 ch = (wc >> 8) & 0xFF; 316 if (ch < 0x21 || ch > 0x7E) 317 goto ilseq; 318 psenc->ch[psenc->chlen++] = ch; 319 ch = wc & 0xFF; 320 if (ch < 0x21 || ch > 0x7E) 321 goto ilseq; 322 psenc->ch[psenc->chlen++] = ch; 323 break; 324 default: 325 return (EINVAL); 326 } 327 } else { 328 ilseq: 329 *nresult = (size_t)-1; 330 return (EILSEQ); 331 } 332 memcpy(s, psenc->ch, psenc->chlen); 333 *nresult = psenc->chlen; 334 psenc->chlen = 0; 335 336 return (0); 337 } 338 339 static int 340 /*ARGSUSED*/ 341 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei __unused, 342 char * __restrict s, size_t n, _ZWState * __restrict psenc, 343 size_t * __restrict nresult) 344 { 345 346 if (psenc->chlen != 0) 347 return (EINVAL); 348 switch (psenc->charset) { 349 case GB2312: 350 if (n-- < 1) 351 return (E2BIG); 352 psenc->ch[psenc->chlen++] = '\n'; 353 psenc->charset = NONE; 354 /*FALLTHROUGH*/ 355 case NONE: 356 *nresult = psenc->chlen; 357 if (psenc->chlen > 0) { 358 memcpy(s, psenc->ch, psenc->chlen); 359 psenc->chlen = 0; 360 } 361 break; 362 default: 363 return (EINVAL); 364 } 365 366 return (0); 367 } 368 369 static __inline int 370 /*ARGSUSED*/ 371 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei __unused, 372 _ZWState * __restrict psenc, int * __restrict rstate) 373 { 374 375 switch (psenc->charset) { 376 case NONE: 377 if (psenc->chlen != 0) 378 return (EINVAL); 379 *rstate = _STDENC_SDGEN_INITIAL; 380 break; 381 case AMBIGIOUS: 382 if (psenc->chlen != 0) 383 return (EINVAL); 384 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT; 385 break; 386 case ASCII: 387 case GB2312: 388 switch (psenc->chlen) { 389 case 0: 390 *rstate = _STDENC_SDGEN_STABLE; 391 break; 392 case 1: 393 *rstate = (psenc->ch[0] == '#') ? 394 _STDENC_SDGEN_INCOMPLETE_SHIFT : 395 _STDENC_SDGEN_INCOMPLETE_CHAR; 396 break; 397 default: 398 return (EINVAL); 399 } 400 break; 401 default: 402 return (EINVAL); 403 } 404 return (0); 405 } 406 407 static __inline int 408 /*ARGSUSED*/ 409 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei __unused, 410 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 411 { 412 413 *csid = (_csid_t)(wc <= (wchar_t)0x7FU) ? 0 : 1; 414 *idx = (_index_t)wc; 415 416 return (0); 417 } 418 419 static __inline int 420 /*ARGSUSED*/ 421 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei __unused, 422 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 423 { 424 425 switch (csid) { 426 case 0: case 1: 427 break; 428 default: 429 return (EINVAL); 430 } 431 *wc = (wchar_t)idx; 432 433 return (0); 434 } 435 436 static void 437 /*ARGSUSED*/ 438 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei __unused) 439 { 440 441 } 442 443 static int 444 /*ARGSUSED*/ 445 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei __unused, 446 const void *__restrict var __unused, size_t lenvar __unused) 447 { 448 449 return (0); 450 } 451 452 /* ---------------------------------------------------------------------- 453 * public interface for stdenc 454 */ 455 456 _CITRUS_STDENC_DECLS(ZW); 457 _CITRUS_STDENC_DEF_OPS(ZW); 458 459 #include "citrus_stdenc_template.h" 460