1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $ */ 3 4 /*- 5 * Copyright (c)2007 Citrus Project, 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include <sys/cdefs.h> 30 #include <sys/types.h> 31 32 #include <assert.h> 33 #include <errno.h> 34 #include <limits.h> 35 #include <stddef.h> 36 #include <stdint.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <wchar.h> 41 42 #include "citrus_namespace.h" 43 #include "citrus_types.h" 44 #include "citrus_bcs.h" 45 #include "citrus_module.h" 46 #include "citrus_stdenc.h" 47 #include "citrus_dechanyu.h" 48 49 /* ---------------------------------------------------------------------- 50 * private stuffs used by templates 51 */ 52 53 typedef struct { 54 size_t chlen; 55 char ch[4]; 56 } _DECHanyuState; 57 58 typedef struct { 59 int dummy; 60 } _DECHanyuEncodingInfo; 61 62 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 63 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.__CONCAT(s_,_func_) 64 65 #define _FUNCNAME(m) __CONCAT(_citrus_DECHanyu_,m) 66 #define _ENCODING_INFO _DECHanyuEncodingInfo 67 #define _ENCODING_STATE _DECHanyuState 68 #define _ENCODING_MB_CUR_MAX(_ei_) 4 69 #define _ENCODING_IS_STATE_DEPENDENT 0 70 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 71 72 static __inline void 73 /*ARGSUSED*/ 74 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused, 75 _DECHanyuState * __restrict psenc) 76 { 77 78 psenc->chlen = 0; 79 } 80 81 #if 0 82 static __inline void 83 /*ARGSUSED*/ 84 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei __unused, 85 void * __restrict pspriv, const _DECHanyuState * __restrict psenc) 86 { 87 88 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 89 } 90 91 static __inline void 92 /*ARGSUSED*/ 93 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei __unused, 94 _DECHanyuState * __restrict psenc, 95 const void * __restrict pspriv) 96 { 97 98 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 99 } 100 #endif 101 102 static void 103 /*ARGSUSED*/ 104 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei __unused) 105 { 106 107 /* ei may be null */ 108 } 109 110 static int 111 /*ARGSUSED*/ 112 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused, 113 const void * __restrict var __unused, size_t lenvar __unused) 114 { 115 116 /* ei may be null */ 117 return (0); 118 } 119 120 static __inline bool 121 is_singlebyte(int c) 122 { 123 124 return (c <= 0x7F); 125 } 126 127 static __inline bool 128 is_leadbyte(int c) 129 { 130 131 return (c >= 0xA1 && c <= 0xFE); 132 } 133 134 static __inline bool 135 is_trailbyte(int c) 136 { 137 138 c &= ~0x80; 139 return (c >= 0x21 && c <= 0x7E); 140 } 141 142 static __inline bool 143 is_hanyu1(int c) 144 { 145 146 return (c == 0xC2); 147 } 148 149 static __inline bool 150 is_hanyu2(int c) 151 { 152 153 return (c == 0xCB); 154 } 155 156 #define HANYUBIT 0xC2CB0000 157 158 static __inline bool 159 is_94charset(int c) 160 { 161 162 return (c >= 0x21 && c <= 0x7E); 163 } 164 165 static int 166 /*ARGSUSED*/ 167 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei, 168 wchar_t * __restrict pwc, const char ** __restrict s, size_t n, 169 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 170 { 171 const char *s0; 172 wchar_t wc; 173 int ch; 174 175 if (*s == NULL) { 176 _citrus_DECHanyu_init_state(ei, psenc); 177 *nresult = _ENCODING_IS_STATE_DEPENDENT; 178 return (0); 179 } 180 s0 = *s; 181 182 wc = (wchar_t)0; 183 switch (psenc->chlen) { 184 case 0: 185 if (n-- < 1) 186 goto restart; 187 ch = *s0++ & 0xFF; 188 if (is_singlebyte(ch)) { 189 if (pwc != NULL) 190 *pwc = (wchar_t)ch; 191 *nresult = (size_t)((ch == 0) ? 0 : 1); 192 *s = s0; 193 return (0); 194 } 195 if (!is_leadbyte(ch)) 196 goto ilseq; 197 psenc->ch[psenc->chlen++] = ch; 198 break; 199 case 1: 200 ch = psenc->ch[0] & 0xFF; 201 if (!is_leadbyte(ch)) 202 return (EINVAL); 203 break; 204 case 2: case 3: 205 ch = psenc->ch[0] & 0xFF; 206 if (is_hanyu1(ch)) { 207 ch = psenc->ch[1] & 0xFF; 208 if (is_hanyu2(ch)) { 209 wc |= (wchar_t)HANYUBIT; 210 break; 211 } 212 } 213 /*FALLTHROUGH*/ 214 default: 215 return (EINVAL); 216 } 217 218 switch (psenc->chlen) { 219 case 1: 220 if (is_hanyu1(ch)) { 221 if (n-- < 1) 222 goto restart; 223 ch = *s0++ & 0xFF; 224 if (!is_hanyu2(ch)) 225 goto ilseq; 226 psenc->ch[psenc->chlen++] = ch; 227 wc |= (wchar_t)HANYUBIT; 228 if (n-- < 1) 229 goto restart; 230 ch = *s0++ & 0xFF; 231 if (!is_leadbyte(ch)) 232 goto ilseq; 233 psenc->ch[psenc->chlen++] = ch; 234 } 235 break; 236 case 2: 237 if (n-- < 1) 238 goto restart; 239 ch = *s0++ & 0xFF; 240 if (!is_leadbyte(ch)) 241 goto ilseq; 242 psenc->ch[psenc->chlen++] = ch; 243 break; 244 case 3: 245 ch = psenc->ch[2] & 0xFF; 246 if (!is_leadbyte(ch)) 247 return (EINVAL); 248 } 249 if (n-- < 1) 250 goto restart; 251 wc |= (wchar_t)(ch << 8); 252 ch = *s0++ & 0xFF; 253 if (!is_trailbyte(ch)) 254 goto ilseq; 255 wc |= (wchar_t)ch; 256 if (pwc != NULL) 257 *pwc = wc; 258 *nresult = (size_t)(s0 - *s); 259 *s = s0; 260 psenc->chlen = 0; 261 262 return (0); 263 264 restart: 265 *nresult = (size_t)-2; 266 *s = s0; 267 return (0); 268 269 ilseq: 270 *nresult = (size_t)-1; 271 return (EILSEQ); 272 } 273 274 static int 275 /*ARGSUSED*/ 276 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused, 277 char * __restrict s, size_t n, wchar_t wc, 278 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 279 { 280 int ch; 281 282 if (psenc->chlen != 0) 283 return (EINVAL); 284 285 /* XXX: assume wchar_t as int */ 286 if ((uint32_t)wc <= 0x7F) { 287 ch = wc & 0xFF; 288 } else { 289 if ((uint32_t)wc > 0xFFFF) { 290 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT) 291 goto ilseq; 292 psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF; 293 psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF; 294 wc &= 0xFFFF; 295 } 296 ch = (wc >> 8) & 0xFF; 297 if (!is_leadbyte(ch)) 298 goto ilseq; 299 psenc->ch[psenc->chlen++] = ch; 300 ch = wc & 0xFF; 301 if (!is_trailbyte(ch)) 302 goto ilseq; 303 } 304 psenc->ch[psenc->chlen++] = ch; 305 if (n < psenc->chlen) { 306 *nresult = (size_t)-1; 307 return (E2BIG); 308 } 309 memcpy(s, psenc->ch, psenc->chlen); 310 *nresult = psenc->chlen; 311 psenc->chlen = 0; 312 313 return (0); 314 315 ilseq: 316 *nresult = (size_t)-1; 317 return (EILSEQ); 318 } 319 320 static __inline int 321 /*ARGSUSED*/ 322 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused, 323 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 324 { 325 wchar_t mask; 326 int plane; 327 328 plane = 0; 329 mask = 0x7F; 330 /* XXX: assume wchar_t as int */ 331 if ((uint32_t)wc > 0x7F) { 332 if ((uint32_t)wc > 0xFFFF) { 333 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT) 334 return (EILSEQ); 335 plane += 2; 336 } 337 if (!is_leadbyte((wc >> 8) & 0xFF) || 338 !is_trailbyte(wc & 0xFF)) 339 return (EILSEQ); 340 plane += (wc & 0x80) ? 1 : 2; 341 mask |= 0x7F00; 342 } 343 *csid = plane; 344 *idx = (_index_t)(wc & mask); 345 346 return (0); 347 } 348 349 static __inline int 350 /*ARGSUSED*/ 351 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused, 352 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 353 { 354 355 if (csid == 0) { 356 if (idx > 0x7F) 357 return (EILSEQ); 358 } else if (csid <= 4) { 359 if (!is_94charset(idx >> 8)) 360 return (EILSEQ); 361 if (!is_94charset(idx & 0xFF)) 362 return (EILSEQ); 363 if (csid % 2) 364 idx |= 0x80; 365 idx |= 0x8000; 366 if (csid > 2) 367 idx |= HANYUBIT; 368 } else 369 return (EILSEQ); 370 *wc = (wchar_t)idx; 371 return (0); 372 } 373 374 static __inline int 375 /*ARGSUSED*/ 376 _citrus_DECHanyu_stdenc_get_state_desc_generic( 377 _DECHanyuEncodingInfo * __restrict ei __unused, 378 _DECHanyuState * __restrict psenc, int * __restrict rstate) 379 { 380 381 *rstate = (psenc->chlen == 0) 382 ? _STDENC_SDGEN_INITIAL 383 : _STDENC_SDGEN_INCOMPLETE_CHAR; 384 return (0); 385 } 386 387 /* ---------------------------------------------------------------------- 388 * public interface for stdenc 389 */ 390 391 _CITRUS_STDENC_DECLS(DECHanyu); 392 _CITRUS_STDENC_DEF_OPS(DECHanyu); 393 394 #include "citrus_stdenc_template.h" 395