1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_viqr.c,v 1.5 2011/11/19 18:20:13 tnozaki Exp $ */ 3 4 /*- 5 * SPDX-License-Identifier: BSD-2-Clause 6 * 7 * Copyright (c)2006 Citrus Project, 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 */ 32 33 #include <sys/cdefs.h> 34 #include <sys/queue.h> 35 #include <sys/types.h> 36 37 #include <assert.h> 38 #include <errno.h> 39 #include <limits.h> 40 #include <stddef.h> 41 #include <stdint.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <wchar.h> 45 46 #include "citrus_namespace.h" 47 #include "citrus_types.h" 48 #include "citrus_bcs.h" 49 #include "citrus_module.h" 50 #include "citrus_stdenc.h" 51 #include "citrus_viqr.h" 52 53 #define ESCAPE '\\' 54 55 /* 56 * this table generated from RFC 1456. 57 */ 58 static const char *mnemonic_rfc1456[0x100] = { 59 NULL , NULL , "A(?", NULL , NULL , "A(~", "A^~", NULL , 60 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 61 NULL , NULL , NULL , NULL , "Y?" , NULL , NULL , NULL , 62 NULL , "Y~" , NULL , NULL , NULL , NULL , "Y." , NULL , 63 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 64 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 65 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 66 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 67 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 68 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 69 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 70 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 71 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 72 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 73 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 74 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 75 "A." , "A('", "A(`", "A(.", "A^'", "A^`", "A^?", "A^.", 76 "E~" , "E." , "E^'", "E^`", "E^?", "E^~", "E^.", "O^'", 77 "O^`", "O^?", "O^~", "O^.", "O+.", "O+'", "O+`", "O+?", 78 "I." , "O?" , "O." , "I?" , "U?" , "U~" , "U." , "Y`" , 79 "O~" , "a('", "a(`", "a(.", "a^'", "a^`", "a^?", "a^.", 80 "e~" , "e." , "e^'", "e^`", "e^?", "e^~", "e^.", "o^'", 81 "o^`", "o^?", "o^~", "O+~", "O+" , "o^.", "o+`", "o+?", 82 "i." , "U+.", "U+'", "U+`", "U+?", "o+" , "o+'", "U+" , 83 "A`" , "A'" , "A^" , "A~" , "A?" , "A(" , "a(?", "a(~", 84 "E`" , "E'" , "E^" , "E?" , "I`" , "I'" , "I~" , "y`" , 85 "DD" , "u+'", "O`" , "O'" , "O^" , "a." , "y?" , "u+`", 86 "u+?", "U`" , "U'" , "y~" , "y." , "Y'" , "o+~", "u+" , 87 "a`" , "a'" , "a^" , "a~" , "a?" , "a(" , "u+~", "a^~", 88 "e`" , "e'" , "e^" , "e?" , "i`" , "i'" , "i~" , "i?" , 89 "dd" , "u+.", "o`" , "o'" , "o^" , "o~" , "o?" , "o." , 90 "u." , "u`" , "u'" , "u~" , "u?" , "y'" , "o+.", "U+~", 91 }; 92 93 typedef struct { 94 const char *name; 95 wchar_t value; 96 } mnemonic_def_t; 97 98 static const mnemonic_def_t mnemonic_ext[] = { 99 /* add extra mnemonic here (should be sorted by wchar_t order). */ 100 }; 101 static const size_t mnemonic_ext_size = 102 sizeof(mnemonic_ext) / sizeof(mnemonic_def_t); 103 104 static const char * 105 mnemonic_ext_find(wchar_t wc, const mnemonic_def_t *head, size_t n) 106 { 107 const mnemonic_def_t *mid; 108 109 for (; n > 0; n >>= 1) { 110 mid = head + (n >> 1); 111 if (mid->value == wc) 112 return (mid->name); 113 else if (mid->value < wc) { 114 head = mid + 1; 115 --n; 116 } 117 } 118 return (NULL); 119 } 120 121 struct mnemonic_t; 122 typedef TAILQ_HEAD(mnemonic_list_t, mnemonic_t) mnemonic_list_t; 123 typedef struct mnemonic_t { 124 TAILQ_ENTRY(mnemonic_t) entry; 125 struct mnemonic_t *parent; 126 mnemonic_list_t child; 127 wchar_t value; 128 int ascii; 129 } mnemonic_t; 130 131 static mnemonic_t * 132 mnemonic_list_find(mnemonic_list_t *ml, int ch) 133 { 134 mnemonic_t *m; 135 136 TAILQ_FOREACH(m, ml, entry) { 137 if (m->ascii == ch) 138 return (m); 139 } 140 141 return (NULL); 142 } 143 144 static mnemonic_t * 145 mnemonic_create(mnemonic_t *parent, int ascii, wchar_t value) 146 { 147 mnemonic_t *m; 148 149 m = malloc(sizeof(*m)); 150 if (m != NULL) { 151 m->parent = parent; 152 m->ascii = ascii; 153 m->value = value; 154 TAILQ_INIT(&m->child); 155 } 156 157 return (m); 158 } 159 160 static int 161 mnemonic_append_child(mnemonic_t *m, const char *s, 162 wchar_t value, wchar_t invalid) 163 { 164 mnemonic_t *m0; 165 int ch; 166 167 ch = (unsigned char)*s++; 168 if (ch == '\0') 169 return (EINVAL); 170 m0 = mnemonic_list_find(&m->child, ch); 171 if (m0 == NULL) { 172 m0 = mnemonic_create(m, ch, (wchar_t)ch); 173 if (m0 == NULL) 174 return (ENOMEM); 175 TAILQ_INSERT_TAIL(&m->child, m0, entry); 176 } 177 m = m0; 178 for (m0 = NULL; (ch = (unsigned char)*s) != '\0'; ++s) { 179 m0 = mnemonic_list_find(&m->child, ch); 180 if (m0 == NULL) { 181 m0 = mnemonic_create(m, ch, invalid); 182 if (m0 == NULL) 183 return (ENOMEM); 184 TAILQ_INSERT_TAIL(&m->child, m0, entry); 185 } 186 m = m0; 187 } 188 if (m0 == NULL) 189 return (EINVAL); 190 m0->value = value; 191 192 return (0); 193 } 194 195 static void 196 mnemonic_destroy(mnemonic_t *m) 197 { 198 mnemonic_t *m0; 199 200 TAILQ_FOREACH(m0, &m->child, entry) 201 mnemonic_destroy(m0); 202 free(m); 203 } 204 205 typedef struct { 206 mnemonic_t *mroot; 207 wchar_t invalid; 208 size_t mb_cur_max; 209 } _VIQREncodingInfo; 210 211 typedef struct { 212 int chlen; 213 char ch[MB_LEN_MAX]; 214 } _VIQRState; 215 216 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 217 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 218 219 #define _FUNCNAME(m) _citrus_VIQR_##m 220 #define _ENCODING_INFO _VIQREncodingInfo 221 #define _ENCODING_STATE _VIQRState 222 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 223 #define _ENCODING_IS_STATE_DEPENDENT 1 224 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 225 226 static __inline void 227 /*ARGSUSED*/ 228 _citrus_VIQR_init_state(_VIQREncodingInfo * __restrict ei __unused, 229 _VIQRState * __restrict psenc) 230 { 231 232 psenc->chlen = 0; 233 } 234 235 #if 0 236 static __inline void 237 /*ARGSUSED*/ 238 _citrus_VIQR_pack_state(_VIQREncodingInfo * __restrict ei __unused, 239 void *__restrict pspriv, const _VIQRState * __restrict psenc) 240 { 241 242 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 243 } 244 245 static __inline void 246 /*ARGSUSED*/ 247 _citrus_VIQR_unpack_state(_VIQREncodingInfo * __restrict ei __unused, 248 _VIQRState * __restrict psenc, const void * __restrict pspriv) 249 { 250 251 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 252 } 253 #endif 254 255 static int 256 _citrus_VIQR_mbrtowc_priv(_VIQREncodingInfo * __restrict ei, 257 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 258 _VIQRState * __restrict psenc, size_t * __restrict nresult) 259 { 260 mnemonic_t *m, *m0; 261 char *s0; 262 wchar_t wc; 263 ssize_t i; 264 int ch, escape; 265 266 if (*s == NULL) { 267 _citrus_VIQR_init_state(ei, psenc); 268 *nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT; 269 return (0); 270 } 271 s0 = *s; 272 273 i = 0; 274 m = ei->mroot; 275 for (escape = 0;;) { 276 if (psenc->chlen == i) { 277 if (n-- < 1) { 278 *s = s0; 279 *nresult = (size_t)-2; 280 return (0); 281 } 282 psenc->ch[psenc->chlen++] = *s0++; 283 } 284 ch = (unsigned char)psenc->ch[i++]; 285 if (ch == ESCAPE) { 286 if (m != ei->mroot) 287 break; 288 escape = 1; 289 continue; 290 } 291 if (escape != 0) 292 break; 293 m0 = mnemonic_list_find(&m->child, ch); 294 if (m0 == NULL) 295 break; 296 m = m0; 297 } 298 while (m != ei->mroot) { 299 --i; 300 if (m->value != ei->invalid) 301 break; 302 m = m->parent; 303 } 304 if (ch == ESCAPE && m != ei->mroot) 305 ++i; 306 psenc->chlen -= i; 307 memmove(&psenc->ch[0], &psenc->ch[i], psenc->chlen); 308 wc = (m == ei->mroot) ? (wchar_t)ch : m->value; 309 if (pwc != NULL) 310 *pwc = wc; 311 *nresult = (size_t)(wc == 0 ? 0 : s0 - *s); 312 *s = s0; 313 314 return (0); 315 } 316 317 static int 318 _citrus_VIQR_wcrtomb_priv(_VIQREncodingInfo * __restrict ei, 319 char * __restrict s, size_t n, wchar_t wc, 320 _VIQRState * __restrict psenc, size_t * __restrict nresult) 321 { 322 mnemonic_t *m; 323 const char *p; 324 int ch = 0; 325 326 switch (psenc->chlen) { 327 case 0: case 1: 328 break; 329 default: 330 return (EINVAL); 331 } 332 m = NULL; 333 if ((uint32_t)wc <= 0xFF) { 334 p = mnemonic_rfc1456[wc & 0xFF]; 335 if (p != NULL) 336 goto mnemonic_found; 337 if (n-- < 1) 338 goto e2big; 339 ch = (unsigned int)wc; 340 m = ei->mroot; 341 if (psenc->chlen > 0) { 342 m = mnemonic_list_find(&m->child, psenc->ch[0]); 343 if (m == NULL) 344 return (EINVAL); 345 psenc->ch[0] = ESCAPE; 346 } 347 if (mnemonic_list_find(&m->child, ch) == NULL) { 348 psenc->chlen = 0; 349 m = NULL; 350 } 351 psenc->ch[psenc->chlen++] = ch; 352 } else { 353 p = mnemonic_ext_find(wc, &mnemonic_ext[0], mnemonic_ext_size); 354 if (p == NULL) { 355 *nresult = (size_t)-1; 356 return (EILSEQ); 357 } else { 358 mnemonic_found: 359 psenc->chlen = 0; 360 while (*p != '\0') { 361 if (n-- < 1) 362 goto e2big; 363 psenc->ch[psenc->chlen++] = *p++; 364 } 365 } 366 } 367 memcpy(s, psenc->ch, psenc->chlen); 368 *nresult = psenc->chlen; 369 if (m == ei->mroot) { 370 psenc->ch[0] = ch; 371 psenc->chlen = 1; 372 } else 373 psenc->chlen = 0; 374 375 return (0); 376 377 e2big: 378 *nresult = (size_t)-1; 379 return (E2BIG); 380 } 381 382 static int 383 /* ARGSUSED */ 384 _citrus_VIQR_put_state_reset(_VIQREncodingInfo * __restrict ei __unused, 385 char * __restrict s __unused, size_t n __unused, 386 _VIQRState * __restrict psenc, size_t * __restrict nresult) 387 { 388 389 switch (psenc->chlen) { 390 case 0: case 1: 391 break; 392 default: 393 return (EINVAL); 394 } 395 *nresult = 0; 396 psenc->chlen = 0; 397 398 return (0); 399 } 400 401 static __inline int 402 /*ARGSUSED*/ 403 _citrus_VIQR_stdenc_wctocs(_VIQREncodingInfo * __restrict ei __unused, 404 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 405 { 406 407 *csid = 0; 408 *idx = (_index_t)wc; 409 410 return (0); 411 } 412 413 static __inline int 414 /*ARGSUSED*/ 415 _citrus_VIQR_stdenc_cstowc(_VIQREncodingInfo * __restrict ei __unused, 416 wchar_t * __restrict pwc, _csid_t csid, _index_t idx) 417 { 418 419 if (csid != 0) 420 return (EILSEQ); 421 *pwc = (wchar_t)idx; 422 423 return (0); 424 } 425 426 static void 427 _citrus_VIQR_encoding_module_uninit(_VIQREncodingInfo *ei) 428 { 429 430 mnemonic_destroy(ei->mroot); 431 } 432 433 static int 434 /*ARGSUSED*/ 435 _citrus_VIQR_encoding_module_init(_VIQREncodingInfo * __restrict ei, 436 const void * __restrict var __unused, size_t lenvar __unused) 437 { 438 const char *s; 439 size_t i, n; 440 int errnum; 441 442 ei->mb_cur_max = 1; 443 ei->invalid = (wchar_t)-1; 444 ei->mroot = mnemonic_create(NULL, '\0', ei->invalid); 445 if (ei->mroot == NULL) 446 return (ENOMEM); 447 for (i = 0; i < sizeof(mnemonic_rfc1456) / sizeof(const char *); ++i) { 448 s = mnemonic_rfc1456[i]; 449 if (s == NULL) 450 continue; 451 n = strlen(s); 452 if (ei->mb_cur_max < n) 453 ei->mb_cur_max = n; 454 errnum = mnemonic_append_child(ei->mroot, 455 s, (wchar_t)i, ei->invalid); 456 if (errnum != 0) { 457 _citrus_VIQR_encoding_module_uninit(ei); 458 return (errnum); 459 } 460 } 461 /* a + 1 < b + 1 here to silence gcc warning about unsigned < 0. */ 462 for (i = 0; i + 1 < mnemonic_ext_size + 1; ++i) { 463 const mnemonic_def_t *p; 464 465 p = &mnemonic_ext[i]; 466 n = strlen(p->name); 467 if (ei->mb_cur_max < n) 468 ei->mb_cur_max = n; 469 errnum = mnemonic_append_child(ei->mroot, 470 p->name, p->value, ei->invalid); 471 if (errnum != 0) { 472 _citrus_VIQR_encoding_module_uninit(ei); 473 return (errnum); 474 } 475 } 476 477 return (0); 478 } 479 480 static __inline int 481 /*ARGSUSED*/ 482 _citrus_VIQR_stdenc_get_state_desc_generic(_VIQREncodingInfo * __restrict ei __unused, 483 _VIQRState * __restrict psenc, int * __restrict rstate) 484 { 485 486 *rstate = (psenc->chlen == 0) ? 487 _STDENC_SDGEN_INITIAL : 488 _STDENC_SDGEN_INCOMPLETE_CHAR; 489 490 return (0); 491 } 492 493 /* ---------------------------------------------------------------------- 494 * public interface for stdenc 495 */ 496 497 _CITRUS_STDENC_DECLS(VIQR); 498 _CITRUS_STDENC_DEF_OPS(VIQR); 499 500 #include "citrus_stdenc_template.h" 501