1 /* $NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c)2007 Citrus Project, 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 #include <sys/cdefs.h> 31 #include <sys/types.h> 32 33 #include <assert.h> 34 #include <errno.h> 35 #include <limits.h> 36 #include <stddef.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <wchar.h> 42 43 #include "citrus_namespace.h" 44 #include "citrus_types.h" 45 #include "citrus_bcs.h" 46 #include "citrus_module.h" 47 #include "citrus_stdenc.h" 48 #include "citrus_dechanyu.h" 49 50 /* ---------------------------------------------------------------------- 51 * private stuffs used by templates 52 */ 53 54 typedef struct { 55 size_t chlen; 56 char ch[4]; 57 } _DECHanyuState; 58 59 typedef struct { 60 int dummy; 61 } _DECHanyuEncodingInfo; 62 63 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 64 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.__CONCAT(s_,_func_) 65 66 #define _FUNCNAME(m) __CONCAT(_citrus_DECHanyu_,m) 67 #define _ENCODING_INFO _DECHanyuEncodingInfo 68 #define _ENCODING_STATE _DECHanyuState 69 #define _ENCODING_MB_CUR_MAX(_ei_) 4 70 #define _ENCODING_IS_STATE_DEPENDENT 0 71 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 72 73 static __inline void 74 /*ARGSUSED*/ 75 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused, 76 _DECHanyuState * __restrict psenc) 77 { 78 79 psenc->chlen = 0; 80 } 81 82 #if 0 83 static __inline void 84 /*ARGSUSED*/ 85 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei __unused, 86 void * __restrict pspriv, const _DECHanyuState * __restrict psenc) 87 { 88 89 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 90 } 91 92 static __inline void 93 /*ARGSUSED*/ 94 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei __unused, 95 _DECHanyuState * __restrict psenc, 96 const void * __restrict pspriv) 97 { 98 99 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 100 } 101 #endif 102 103 static void 104 /*ARGSUSED*/ 105 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei __unused) 106 { 107 108 /* ei may be null */ 109 } 110 111 static int 112 /*ARGSUSED*/ 113 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused, 114 const void * __restrict var __unused, size_t lenvar __unused) 115 { 116 117 /* ei may be null */ 118 return (0); 119 } 120 121 static __inline bool 122 is_singlebyte(int c) 123 { 124 125 return (c <= 0x7F); 126 } 127 128 static __inline bool 129 is_leadbyte(int c) 130 { 131 132 return (c >= 0xA1 && c <= 0xFE); 133 } 134 135 static __inline bool 136 is_trailbyte(int c) 137 { 138 139 c &= ~0x80; 140 return (c >= 0x21 && c <= 0x7E); 141 } 142 143 static __inline bool 144 is_hanyu1(int c) 145 { 146 147 return (c == 0xC2); 148 } 149 150 static __inline bool 151 is_hanyu2(int c) 152 { 153 154 return (c == 0xCB); 155 } 156 157 #define HANYUBIT 0xC2CB0000 158 159 static __inline bool 160 is_94charset(int c) 161 { 162 163 return (c >= 0x21 && c <= 0x7E); 164 } 165 166 static int 167 /*ARGSUSED*/ 168 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei, 169 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 170 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 171 { 172 char *s0; 173 wchar_t wc; 174 int ch; 175 176 if (*s == NULL) { 177 _citrus_DECHanyu_init_state(ei, psenc); 178 *nresult = _ENCODING_IS_STATE_DEPENDENT; 179 return (0); 180 } 181 s0 = *s; 182 183 wc = (wchar_t)0; 184 switch (psenc->chlen) { 185 case 0: 186 if (n-- < 1) 187 goto restart; 188 ch = *s0++ & 0xFF; 189 if (is_singlebyte(ch)) { 190 if (pwc != NULL) 191 *pwc = (wchar_t)ch; 192 *nresult = (size_t)((ch == 0) ? 0 : 1); 193 *s = s0; 194 return (0); 195 } 196 if (!is_leadbyte(ch)) 197 goto ilseq; 198 psenc->ch[psenc->chlen++] = ch; 199 break; 200 case 1: 201 ch = psenc->ch[0] & 0xFF; 202 if (!is_leadbyte(ch)) 203 return (EINVAL); 204 break; 205 case 2: case 3: 206 ch = psenc->ch[0] & 0xFF; 207 if (is_hanyu1(ch)) { 208 ch = psenc->ch[1] & 0xFF; 209 if (is_hanyu2(ch)) { 210 wc |= (wchar_t)HANYUBIT; 211 break; 212 } 213 } 214 /*FALLTHROUGH*/ 215 default: 216 return (EINVAL); 217 } 218 219 switch (psenc->chlen) { 220 case 1: 221 if (is_hanyu1(ch)) { 222 if (n-- < 1) 223 goto restart; 224 ch = *s0++ & 0xFF; 225 if (!is_hanyu2(ch)) 226 goto ilseq; 227 psenc->ch[psenc->chlen++] = ch; 228 wc |= (wchar_t)HANYUBIT; 229 if (n-- < 1) 230 goto restart; 231 ch = *s0++ & 0xFF; 232 if (!is_leadbyte(ch)) 233 goto ilseq; 234 psenc->ch[psenc->chlen++] = ch; 235 } 236 break; 237 case 2: 238 if (n-- < 1) 239 goto restart; 240 ch = *s0++ & 0xFF; 241 if (!is_leadbyte(ch)) 242 goto ilseq; 243 psenc->ch[psenc->chlen++] = ch; 244 break; 245 case 3: 246 ch = psenc->ch[2] & 0xFF; 247 if (!is_leadbyte(ch)) 248 return (EINVAL); 249 } 250 if (n-- < 1) 251 goto restart; 252 wc |= (wchar_t)(ch << 8); 253 ch = *s0++ & 0xFF; 254 if (!is_trailbyte(ch)) 255 goto ilseq; 256 wc |= (wchar_t)ch; 257 if (pwc != NULL) 258 *pwc = wc; 259 *nresult = (size_t)(s0 - *s); 260 *s = s0; 261 psenc->chlen = 0; 262 263 return (0); 264 265 restart: 266 *nresult = (size_t)-2; 267 *s = s0; 268 return (0); 269 270 ilseq: 271 *nresult = (size_t)-1; 272 return (EILSEQ); 273 } 274 275 static int 276 /*ARGSUSED*/ 277 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused, 278 char * __restrict s, size_t n, wchar_t wc, 279 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 280 { 281 int ch; 282 283 if (psenc->chlen != 0) 284 return (EINVAL); 285 286 /* XXX: assume wchar_t as int */ 287 if ((uint32_t)wc <= 0x7F) { 288 ch = wc & 0xFF; 289 } else { 290 if ((uint32_t)wc > 0xFFFF) { 291 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT) 292 goto ilseq; 293 psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF; 294 psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF; 295 wc &= 0xFFFF; 296 } 297 ch = (wc >> 8) & 0xFF; 298 if (!is_leadbyte(ch)) 299 goto ilseq; 300 psenc->ch[psenc->chlen++] = ch; 301 ch = wc & 0xFF; 302 if (!is_trailbyte(ch)) 303 goto ilseq; 304 } 305 psenc->ch[psenc->chlen++] = ch; 306 if (n < psenc->chlen) { 307 *nresult = (size_t)-1; 308 return (E2BIG); 309 } 310 memcpy(s, psenc->ch, psenc->chlen); 311 *nresult = psenc->chlen; 312 psenc->chlen = 0; 313 314 return (0); 315 316 ilseq: 317 *nresult = (size_t)-1; 318 return (EILSEQ); 319 } 320 321 static __inline int 322 /*ARGSUSED*/ 323 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused, 324 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 325 { 326 wchar_t mask; 327 int plane; 328 329 plane = 0; 330 mask = 0x7F; 331 /* XXX: assume wchar_t as int */ 332 if ((uint32_t)wc > 0x7F) { 333 if ((uint32_t)wc > 0xFFFF) { 334 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT) 335 return (EILSEQ); 336 plane += 2; 337 } 338 if (!is_leadbyte((wc >> 8) & 0xFF) || 339 !is_trailbyte(wc & 0xFF)) 340 return (EILSEQ); 341 plane += (wc & 0x80) ? 1 : 2; 342 mask |= 0x7F00; 343 } 344 *csid = plane; 345 *idx = (_index_t)(wc & mask); 346 347 return (0); 348 } 349 350 static __inline int 351 /*ARGSUSED*/ 352 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused, 353 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 354 { 355 356 if (csid == 0) { 357 if (idx > 0x7F) 358 return (EILSEQ); 359 } else if (csid <= 4) { 360 if (!is_94charset(idx >> 8)) 361 return (EILSEQ); 362 if (!is_94charset(idx & 0xFF)) 363 return (EILSEQ); 364 if (csid % 2) 365 idx |= 0x80; 366 idx |= 0x8000; 367 if (csid > 2) 368 idx |= HANYUBIT; 369 } else 370 return (EILSEQ); 371 *wc = (wchar_t)idx; 372 return (0); 373 } 374 375 static __inline int 376 /*ARGSUSED*/ 377 _citrus_DECHanyu_stdenc_get_state_desc_generic( 378 _DECHanyuEncodingInfo * __restrict ei __unused, 379 _DECHanyuState * __restrict psenc, int * __restrict rstate) 380 { 381 382 *rstate = (psenc->chlen == 0) 383 ? _STDENC_SDGEN_INITIAL 384 : _STDENC_SDGEN_INCOMPLETE_CHAR; 385 return (0); 386 } 387 388 /* ---------------------------------------------------------------------- 389 * public interface for stdenc 390 */ 391 392 _CITRUS_STDENC_DECLS(DECHanyu); 393 _CITRUS_STDENC_DEF_OPS(DECHanyu); 394 395 #include "citrus_stdenc_template.h" 396