1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_dechanyu.c,v 1.3 2008/06/14 16:01:07 tnozaki Exp $ */ 3 4 /*- 5 * Copyright (c)2007 Citrus Project, 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include <sys/cdefs.h> 30 #include <sys/types.h> 31 32 #include <assert.h> 33 #include <errno.h> 34 #include <limits.h> 35 #include <stddef.h> 36 #include <stdint.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <wchar.h> 41 42 #include "citrus_namespace.h" 43 #include "citrus_types.h" 44 #include "citrus_bcs.h" 45 #include "citrus_module.h" 46 #include "citrus_stdenc.h" 47 #include "citrus_dechanyu.h" 48 49 /* ---------------------------------------------------------------------- 50 * private stuffs used by templates 51 */ 52 53 typedef struct { 54 size_t chlen; 55 char ch[4]; 56 } _DECHanyuState; 57 58 typedef struct { 59 int dummy; 60 } _DECHanyuEncodingInfo; 61 62 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 63 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.__CONCAT(s_,_func_) 64 65 #define _FUNCNAME(m) __CONCAT(_citrus_DECHanyu_,m) 66 #define _ENCODING_INFO _DECHanyuEncodingInfo 67 #define _ENCODING_STATE _DECHanyuState 68 #define _ENCODING_MB_CUR_MAX(_ei_) 4 69 #define _ENCODING_IS_STATE_DEPENDENT 0 70 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 71 72 static __inline void 73 /*ARGSUSED*/ 74 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused, 75 _DECHanyuState * __restrict psenc) 76 { 77 78 psenc->chlen = 0; 79 } 80 81 static __inline void 82 /*ARGSUSED*/ 83 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei __unused, 84 void * __restrict pspriv, const _DECHanyuState * __restrict psenc) 85 { 86 87 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 88 } 89 90 static __inline void 91 /*ARGSUSED*/ 92 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei __unused, 93 _DECHanyuState * __restrict psenc, 94 const void * __restrict pspriv) 95 { 96 97 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 98 } 99 100 static void 101 /*ARGSUSED*/ 102 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei __unused) 103 { 104 105 /* ei may be null */ 106 } 107 108 static int 109 /*ARGSUSED*/ 110 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused, 111 const void * __restrict var __unused, size_t lenvar __unused) 112 { 113 114 /* ei may be null */ 115 return (0); 116 } 117 118 static __inline bool 119 is_singlebyte(int c) 120 { 121 122 return (c <= 0x7F); 123 } 124 125 static __inline bool 126 is_leadbyte(int c) 127 { 128 129 return (c >= 0xA1 && c <= 0xFE); 130 } 131 132 static __inline bool 133 is_trailbyte(int c) 134 { 135 136 c &= ~0x80; 137 return (c >= 0x21 && c <= 0x7E); 138 } 139 140 static __inline bool 141 is_hanyu1(int c) 142 { 143 144 return (c == 0xC2); 145 } 146 147 static __inline bool 148 is_hanyu2(int c) 149 { 150 151 return (c == 0xCB); 152 } 153 154 #define HANYUBIT 0xC2CB0000 155 156 static __inline bool 157 is_94charset(int c) 158 { 159 160 return (c >= 0x21 && c <= 0x7E); 161 } 162 163 static int 164 /*ARGSUSED*/ 165 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei, 166 wchar_t * __restrict pwc, const char ** __restrict s, size_t n, 167 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 168 { 169 const char *s0; 170 wchar_t wc; 171 int ch; 172 173 if (*s == NULL) { 174 _citrus_DECHanyu_init_state(ei, psenc); 175 *nresult = _ENCODING_IS_STATE_DEPENDENT; 176 return (0); 177 } 178 s0 = *s; 179 180 wc = (wchar_t)0; 181 switch (psenc->chlen) { 182 case 0: 183 if (n-- < 1) 184 goto restart; 185 ch = *s0++ & 0xFF; 186 if (is_singlebyte(ch)) { 187 if (pwc != NULL) 188 *pwc = (wchar_t)ch; 189 *nresult = (size_t)((ch == 0) ? 0 : 1); 190 *s = s0; 191 return (0); 192 } 193 if (!is_leadbyte(ch)) 194 goto ilseq; 195 psenc->ch[psenc->chlen++] = ch; 196 break; 197 case 1: 198 ch = psenc->ch[0] & 0xFF; 199 if (!is_leadbyte(ch)) 200 return (EINVAL); 201 break; 202 case 2: case 3: 203 ch = psenc->ch[0] & 0xFF; 204 if (is_hanyu1(ch)) { 205 ch = psenc->ch[1] & 0xFF; 206 if (is_hanyu2(ch)) { 207 wc |= (wchar_t)HANYUBIT; 208 break; 209 } 210 } 211 /*FALLTHROUGH*/ 212 default: 213 return (EINVAL); 214 } 215 216 switch (psenc->chlen) { 217 case 1: 218 if (is_hanyu1(ch)) { 219 if (n-- < 1) 220 goto restart; 221 ch = *s0++ & 0xFF; 222 if (!is_hanyu2(ch)) 223 goto ilseq; 224 psenc->ch[psenc->chlen++] = ch; 225 wc |= (wchar_t)HANYUBIT; 226 if (n-- < 1) 227 goto restart; 228 ch = *s0++ & 0xFF; 229 if (!is_leadbyte(ch)) 230 goto ilseq; 231 psenc->ch[psenc->chlen++] = ch; 232 } 233 break; 234 case 2: 235 if (n-- < 1) 236 goto restart; 237 ch = *s0++ & 0xFF; 238 if (!is_leadbyte(ch)) 239 goto ilseq; 240 psenc->ch[psenc->chlen++] = ch; 241 break; 242 case 3: 243 ch = psenc->ch[2] & 0xFF; 244 if (!is_leadbyte(ch)) 245 return (EINVAL); 246 } 247 if (n-- < 1) 248 goto restart; 249 wc |= (wchar_t)(ch << 8); 250 ch = *s0++ & 0xFF; 251 if (!is_trailbyte(ch)) 252 goto ilseq; 253 wc |= (wchar_t)ch; 254 if (pwc != NULL) 255 *pwc = wc; 256 *nresult = (size_t)(s0 - *s); 257 *s = s0; 258 psenc->chlen = 0; 259 260 return (0); 261 262 restart: 263 *nresult = (size_t)-2; 264 *s = s0; 265 return (0); 266 267 ilseq: 268 *nresult = (size_t)-1; 269 return (EILSEQ); 270 } 271 272 static int 273 /*ARGSUSED*/ 274 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused, 275 char * __restrict s, size_t n, wchar_t wc, 276 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 277 { 278 int ch; 279 280 if (psenc->chlen != 0) 281 return (EINVAL); 282 283 /* XXX: assume wchar_t as int */ 284 if ((uint32_t)wc <= 0x7F) { 285 ch = wc & 0xFF; 286 } else { 287 if ((uint32_t)wc > 0xFFFF) { 288 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT) 289 goto ilseq; 290 psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF; 291 psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF; 292 wc &= 0xFFFF; 293 } 294 ch = (wc >> 8) & 0xFF; 295 if (!is_leadbyte(ch)) 296 goto ilseq; 297 psenc->ch[psenc->chlen++] = ch; 298 ch = wc & 0xFF; 299 if (!is_trailbyte(ch)) 300 goto ilseq; 301 } 302 psenc->ch[psenc->chlen++] = ch; 303 if (n < psenc->chlen) { 304 *nresult = (size_t)-1; 305 return (E2BIG); 306 } 307 memcpy(s, psenc->ch, psenc->chlen); 308 *nresult = psenc->chlen; 309 psenc->chlen = 0; 310 311 return (0); 312 313 ilseq: 314 *nresult = (size_t)-1; 315 return (EILSEQ); 316 } 317 318 static __inline int 319 /*ARGSUSED*/ 320 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused, 321 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 322 { 323 wchar_t mask; 324 int plane; 325 326 plane = 0; 327 mask = 0x7F; 328 /* XXX: assume wchar_t as int */ 329 if ((uint32_t)wc > 0x7F) { 330 if ((uint32_t)wc > 0xFFFF) { 331 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT) 332 return (EILSEQ); 333 plane += 2; 334 } 335 if (!is_leadbyte((wc >> 8) & 0xFF) || 336 !is_trailbyte(wc & 0xFF)) 337 return (EILSEQ); 338 plane += (wc & 0x80) ? 1 : 2; 339 mask |= 0x7F00; 340 } 341 *csid = plane; 342 *idx = (_index_t)(wc & mask); 343 344 return (0); 345 } 346 347 static __inline int 348 /*ARGSUSED*/ 349 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused, 350 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 351 { 352 353 if (csid == 0) { 354 if (idx > 0x7F) 355 return (EILSEQ); 356 } else if (csid <= 4) { 357 if (!is_94charset(idx >> 8)) 358 return (EILSEQ); 359 if (!is_94charset(idx & 0xFF)) 360 return (EILSEQ); 361 if (csid % 2) 362 idx |= 0x80; 363 idx |= 0x8000; 364 if (csid > 2) 365 idx |= HANYUBIT; 366 } else 367 return (EILSEQ); 368 *wc = (wchar_t)idx; 369 return (0); 370 } 371 372 static __inline int 373 /*ARGSUSED*/ 374 _citrus_DECHanyu_stdenc_get_state_desc_generic( 375 _DECHanyuEncodingInfo * __restrict ei __unused, 376 _DECHanyuState * __restrict psenc, int * __restrict rstate) 377 { 378 379 *rstate = (psenc->chlen == 0) 380 ? _STDENC_SDGEN_INITIAL 381 : _STDENC_SDGEN_INCOMPLETE_CHAR; 382 return (0); 383 } 384 385 /* ---------------------------------------------------------------------- 386 * public interface for stdenc 387 */ 388 389 _CITRUS_STDENC_DECLS(DECHanyu); 390 _CITRUS_STDENC_DEF_OPS(DECHanyu); 391 392 #include "citrus_stdenc_template.h" 393