1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $ */ 3 4 /*- 5 * SPDX-License-Identifier: BSD-2-Clause 6 * 7 * Copyright (c)2007 Citrus Project, 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 #include <sys/cdefs.h> 32 #include <sys/types.h> 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <limits.h> 37 #include <stddef.h> 38 #include <stdint.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <wchar.h> 43 44 #include "citrus_namespace.h" 45 #include "citrus_types.h" 46 #include "citrus_bcs.h" 47 #include "citrus_module.h" 48 #include "citrus_stdenc.h" 49 #include "citrus_dechanyu.h" 50 51 /* ---------------------------------------------------------------------- 52 * private stuffs used by templates 53 */ 54 55 typedef struct { 56 size_t chlen; 57 char ch[4]; 58 } _DECHanyuState; 59 60 typedef struct { 61 int dummy; 62 } _DECHanyuEncodingInfo; 63 64 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 65 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.__CONCAT(s_,_func_) 66 67 #define _FUNCNAME(m) __CONCAT(_citrus_DECHanyu_,m) 68 #define _ENCODING_INFO _DECHanyuEncodingInfo 69 #define _ENCODING_STATE _DECHanyuState 70 #define _ENCODING_MB_CUR_MAX(_ei_) 4 71 #define _ENCODING_IS_STATE_DEPENDENT 0 72 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 73 74 static __inline void 75 /*ARGSUSED*/ 76 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused, 77 _DECHanyuState * __restrict psenc) 78 { 79 80 psenc->chlen = 0; 81 } 82 83 #if 0 84 static __inline void 85 /*ARGSUSED*/ 86 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei __unused, 87 void * __restrict pspriv, const _DECHanyuState * __restrict psenc) 88 { 89 90 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 91 } 92 93 static __inline void 94 /*ARGSUSED*/ 95 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei __unused, 96 _DECHanyuState * __restrict psenc, 97 const void * __restrict pspriv) 98 { 99 100 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 101 } 102 #endif 103 104 static void 105 /*ARGSUSED*/ 106 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei __unused) 107 { 108 109 /* ei may be null */ 110 } 111 112 static int 113 /*ARGSUSED*/ 114 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused, 115 const void * __restrict var __unused, size_t lenvar __unused) 116 { 117 118 /* ei may be null */ 119 return (0); 120 } 121 122 static __inline bool 123 is_singlebyte(int c) 124 { 125 126 return (c <= 0x7F); 127 } 128 129 static __inline bool 130 is_leadbyte(int c) 131 { 132 133 return (c >= 0xA1 && c <= 0xFE); 134 } 135 136 static __inline bool 137 is_trailbyte(int c) 138 { 139 140 c &= ~0x80; 141 return (c >= 0x21 && c <= 0x7E); 142 } 143 144 static __inline bool 145 is_hanyu1(int c) 146 { 147 148 return (c == 0xC2); 149 } 150 151 static __inline bool 152 is_hanyu2(int c) 153 { 154 155 return (c == 0xCB); 156 } 157 158 #define HANYUBIT 0xC2CB0000 159 160 static __inline bool 161 is_94charset(int c) 162 { 163 164 return (c >= 0x21 && c <= 0x7E); 165 } 166 167 static int 168 /*ARGSUSED*/ 169 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei, 170 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 171 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 172 { 173 char *s0; 174 wchar_t wc; 175 int ch; 176 177 if (*s == NULL) { 178 _citrus_DECHanyu_init_state(ei, psenc); 179 *nresult = _ENCODING_IS_STATE_DEPENDENT; 180 return (0); 181 } 182 s0 = *s; 183 184 wc = (wchar_t)0; 185 switch (psenc->chlen) { 186 case 0: 187 if (n-- < 1) 188 goto restart; 189 ch = *s0++ & 0xFF; 190 if (is_singlebyte(ch)) { 191 if (pwc != NULL) 192 *pwc = (wchar_t)ch; 193 *nresult = (size_t)((ch == 0) ? 0 : 1); 194 *s = s0; 195 return (0); 196 } 197 if (!is_leadbyte(ch)) 198 goto ilseq; 199 psenc->ch[psenc->chlen++] = ch; 200 break; 201 case 1: 202 ch = psenc->ch[0] & 0xFF; 203 if (!is_leadbyte(ch)) 204 return (EINVAL); 205 break; 206 case 2: case 3: 207 ch = psenc->ch[0] & 0xFF; 208 if (is_hanyu1(ch)) { 209 ch = psenc->ch[1] & 0xFF; 210 if (is_hanyu2(ch)) { 211 wc |= (wchar_t)HANYUBIT; 212 break; 213 } 214 } 215 /*FALLTHROUGH*/ 216 default: 217 return (EINVAL); 218 } 219 220 switch (psenc->chlen) { 221 case 1: 222 if (is_hanyu1(ch)) { 223 if (n-- < 1) 224 goto restart; 225 ch = *s0++ & 0xFF; 226 if (!is_hanyu2(ch)) 227 goto ilseq; 228 psenc->ch[psenc->chlen++] = ch; 229 wc |= (wchar_t)HANYUBIT; 230 if (n-- < 1) 231 goto restart; 232 ch = *s0++ & 0xFF; 233 if (!is_leadbyte(ch)) 234 goto ilseq; 235 psenc->ch[psenc->chlen++] = ch; 236 } 237 break; 238 case 2: 239 if (n-- < 1) 240 goto restart; 241 ch = *s0++ & 0xFF; 242 if (!is_leadbyte(ch)) 243 goto ilseq; 244 psenc->ch[psenc->chlen++] = ch; 245 break; 246 case 3: 247 ch = psenc->ch[2] & 0xFF; 248 if (!is_leadbyte(ch)) 249 return (EINVAL); 250 } 251 if (n-- < 1) 252 goto restart; 253 wc |= (wchar_t)(ch << 8); 254 ch = *s0++ & 0xFF; 255 if (!is_trailbyte(ch)) 256 goto ilseq; 257 wc |= (wchar_t)ch; 258 if (pwc != NULL) 259 *pwc = wc; 260 *nresult = (size_t)(s0 - *s); 261 *s = s0; 262 psenc->chlen = 0; 263 264 return (0); 265 266 restart: 267 *nresult = (size_t)-2; 268 *s = s0; 269 return (0); 270 271 ilseq: 272 *nresult = (size_t)-1; 273 return (EILSEQ); 274 } 275 276 static int 277 /*ARGSUSED*/ 278 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused, 279 char * __restrict s, size_t n, wchar_t wc, 280 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 281 { 282 int ch; 283 284 if (psenc->chlen != 0) 285 return (EINVAL); 286 287 /* XXX: assume wchar_t as int */ 288 if ((uint32_t)wc <= 0x7F) { 289 ch = wc & 0xFF; 290 } else { 291 if ((uint32_t)wc > 0xFFFF) { 292 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT) 293 goto ilseq; 294 psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF; 295 psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF; 296 wc &= 0xFFFF; 297 } 298 ch = (wc >> 8) & 0xFF; 299 if (!is_leadbyte(ch)) 300 goto ilseq; 301 psenc->ch[psenc->chlen++] = ch; 302 ch = wc & 0xFF; 303 if (!is_trailbyte(ch)) 304 goto ilseq; 305 } 306 psenc->ch[psenc->chlen++] = ch; 307 if (n < psenc->chlen) { 308 *nresult = (size_t)-1; 309 return (E2BIG); 310 } 311 memcpy(s, psenc->ch, psenc->chlen); 312 *nresult = psenc->chlen; 313 psenc->chlen = 0; 314 315 return (0); 316 317 ilseq: 318 *nresult = (size_t)-1; 319 return (EILSEQ); 320 } 321 322 static __inline int 323 /*ARGSUSED*/ 324 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused, 325 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 326 { 327 wchar_t mask; 328 int plane; 329 330 plane = 0; 331 mask = 0x7F; 332 /* XXX: assume wchar_t as int */ 333 if ((uint32_t)wc > 0x7F) { 334 if ((uint32_t)wc > 0xFFFF) { 335 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT) 336 return (EILSEQ); 337 plane += 2; 338 } 339 if (!is_leadbyte((wc >> 8) & 0xFF) || 340 !is_trailbyte(wc & 0xFF)) 341 return (EILSEQ); 342 plane += (wc & 0x80) ? 1 : 2; 343 mask |= 0x7F00; 344 } 345 *csid = plane; 346 *idx = (_index_t)(wc & mask); 347 348 return (0); 349 } 350 351 static __inline int 352 /*ARGSUSED*/ 353 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused, 354 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 355 { 356 357 if (csid == 0) { 358 if (idx > 0x7F) 359 return (EILSEQ); 360 } else if (csid <= 4) { 361 if (!is_94charset(idx >> 8)) 362 return (EILSEQ); 363 if (!is_94charset(idx & 0xFF)) 364 return (EILSEQ); 365 if (csid % 2) 366 idx |= 0x80; 367 idx |= 0x8000; 368 if (csid > 2) 369 idx |= HANYUBIT; 370 } else 371 return (EILSEQ); 372 *wc = (wchar_t)idx; 373 return (0); 374 } 375 376 static __inline int 377 /*ARGSUSED*/ 378 _citrus_DECHanyu_stdenc_get_state_desc_generic( 379 _DECHanyuEncodingInfo * __restrict ei __unused, 380 _DECHanyuState * __restrict psenc, int * __restrict rstate) 381 { 382 383 *rstate = (psenc->chlen == 0) 384 ? _STDENC_SDGEN_INITIAL 385 : _STDENC_SDGEN_INCOMPLETE_CHAR; 386 return (0); 387 } 388 389 /* ---------------------------------------------------------------------- 390 * public interface for stdenc 391 */ 392 393 _CITRUS_STDENC_DECLS(DECHanyu); 394 _CITRUS_STDENC_DEF_OPS(DECHanyu); 395 396 #include "citrus_stdenc_template.h" 397