1 /* $NetBSD: citrus_viqr.c,v 1.5 2011/11/19 18:20:13 tnozaki Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c)2006 Citrus Project, 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 #include <sys/cdefs.h> 33 #include <sys/queue.h> 34 #include <sys/types.h> 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <limits.h> 39 #include <stddef.h> 40 #include <stdint.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <wchar.h> 44 45 #include "citrus_namespace.h" 46 #include "citrus_types.h" 47 #include "citrus_bcs.h" 48 #include "citrus_module.h" 49 #include "citrus_stdenc.h" 50 #include "citrus_viqr.h" 51 52 #define ESCAPE '\\' 53 54 /* 55 * this table generated from RFC 1456. 56 */ 57 static const char *mnemonic_rfc1456[0x100] = { 58 NULL , NULL , "A(?", NULL , NULL , "A(~", "A^~", NULL , 59 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 60 NULL , NULL , NULL , NULL , "Y?" , NULL , NULL , NULL , 61 NULL , "Y~" , NULL , NULL , NULL , NULL , "Y." , NULL , 62 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 63 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 64 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 65 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 66 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 67 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 68 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 69 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 70 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 71 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 72 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 73 NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , 74 "A." , "A('", "A(`", "A(.", "A^'", "A^`", "A^?", "A^.", 75 "E~" , "E." , "E^'", "E^`", "E^?", "E^~", "E^.", "O^'", 76 "O^`", "O^?", "O^~", "O^.", "O+.", "O+'", "O+`", "O+?", 77 "I." , "O?" , "O." , "I?" , "U?" , "U~" , "U." , "Y`" , 78 "O~" , "a('", "a(`", "a(.", "a^'", "a^`", "a^?", "a^.", 79 "e~" , "e." , "e^'", "e^`", "e^?", "e^~", "e^.", "o^'", 80 "o^`", "o^?", "o^~", "O+~", "O+" , "o^.", "o+`", "o+?", 81 "i." , "U+.", "U+'", "U+`", "U+?", "o+" , "o+'", "U+" , 82 "A`" , "A'" , "A^" , "A~" , "A?" , "A(" , "a(?", "a(~", 83 "E`" , "E'" , "E^" , "E?" , "I`" , "I'" , "I~" , "y`" , 84 "DD" , "u+'", "O`" , "O'" , "O^" , "a." , "y?" , "u+`", 85 "u+?", "U`" , "U'" , "y~" , "y." , "Y'" , "o+~", "u+" , 86 "a`" , "a'" , "a^" , "a~" , "a?" , "a(" , "u+~", "a^~", 87 "e`" , "e'" , "e^" , "e?" , "i`" , "i'" , "i~" , "i?" , 88 "dd" , "u+.", "o`" , "o'" , "o^" , "o~" , "o?" , "o." , 89 "u." , "u`" , "u'" , "u~" , "u?" , "y'" , "o+.", "U+~", 90 }; 91 92 typedef struct { 93 const char *name; 94 wchar_t value; 95 } mnemonic_def_t; 96 97 static const mnemonic_def_t mnemonic_ext[] = { 98 /* add extra mnemonic here (should be sorted by wchar_t order). */ 99 }; 100 static const size_t mnemonic_ext_size = 101 sizeof(mnemonic_ext) / sizeof(mnemonic_def_t); 102 103 static const char * 104 mnemonic_ext_find(wchar_t wc, const mnemonic_def_t *head, size_t n) 105 { 106 const mnemonic_def_t *mid; 107 108 for (; n > 0; n >>= 1) { 109 mid = head + (n >> 1); 110 if (mid->value == wc) 111 return (mid->name); 112 else if (mid->value < wc) { 113 head = mid + 1; 114 --n; 115 } 116 } 117 return (NULL); 118 } 119 120 struct mnemonic_t; 121 typedef TAILQ_HEAD(mnemonic_list_t, mnemonic_t) mnemonic_list_t; 122 typedef struct mnemonic_t { 123 TAILQ_ENTRY(mnemonic_t) entry; 124 struct mnemonic_t *parent; 125 mnemonic_list_t child; 126 wchar_t value; 127 int ascii; 128 } mnemonic_t; 129 130 static mnemonic_t * 131 mnemonic_list_find(mnemonic_list_t *ml, int ch) 132 { 133 mnemonic_t *m; 134 135 TAILQ_FOREACH(m, ml, entry) { 136 if (m->ascii == ch) 137 return (m); 138 } 139 140 return (NULL); 141 } 142 143 static mnemonic_t * 144 mnemonic_create(mnemonic_t *parent, int ascii, wchar_t value) 145 { 146 mnemonic_t *m; 147 148 m = malloc(sizeof(*m)); 149 if (m != NULL) { 150 m->parent = parent; 151 m->ascii = ascii; 152 m->value = value; 153 TAILQ_INIT(&m->child); 154 } 155 156 return (m); 157 } 158 159 static int 160 mnemonic_append_child(mnemonic_t *m, const char *s, 161 wchar_t value, wchar_t invalid) 162 { 163 mnemonic_t *m0; 164 int ch; 165 166 ch = (unsigned char)*s++; 167 if (ch == '\0') 168 return (EINVAL); 169 m0 = mnemonic_list_find(&m->child, ch); 170 if (m0 == NULL) { 171 m0 = mnemonic_create(m, ch, (wchar_t)ch); 172 if (m0 == NULL) 173 return (ENOMEM); 174 TAILQ_INSERT_TAIL(&m->child, m0, entry); 175 } 176 m = m0; 177 for (m0 = NULL; (ch = (unsigned char)*s) != '\0'; ++s) { 178 m0 = mnemonic_list_find(&m->child, ch); 179 if (m0 == NULL) { 180 m0 = mnemonic_create(m, ch, invalid); 181 if (m0 == NULL) 182 return (ENOMEM); 183 TAILQ_INSERT_TAIL(&m->child, m0, entry); 184 } 185 m = m0; 186 } 187 if (m0 == NULL) 188 return (EINVAL); 189 m0->value = value; 190 191 return (0); 192 } 193 194 static void 195 mnemonic_destroy(mnemonic_t *m) 196 { 197 mnemonic_t *m0, *n; 198 199 TAILQ_FOREACH_SAFE(m0, &m->child, entry, n) 200 mnemonic_destroy(m0); 201 free(m); 202 } 203 204 typedef struct { 205 mnemonic_t *mroot; 206 wchar_t invalid; 207 size_t mb_cur_max; 208 } _VIQREncodingInfo; 209 210 typedef struct { 211 int chlen; 212 char ch[MB_LEN_MAX]; 213 } _VIQRState; 214 215 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 216 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 217 218 #define _FUNCNAME(m) _citrus_VIQR_##m 219 #define _ENCODING_INFO _VIQREncodingInfo 220 #define _ENCODING_STATE _VIQRState 221 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 222 #define _ENCODING_IS_STATE_DEPENDENT 1 223 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 224 225 static __inline void 226 /*ARGSUSED*/ 227 _citrus_VIQR_init_state(_VIQREncodingInfo * __restrict ei __unused, 228 _VIQRState * __restrict psenc) 229 { 230 231 psenc->chlen = 0; 232 } 233 234 #if 0 235 static __inline void 236 /*ARGSUSED*/ 237 _citrus_VIQR_pack_state(_VIQREncodingInfo * __restrict ei __unused, 238 void *__restrict pspriv, const _VIQRState * __restrict psenc) 239 { 240 241 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 242 } 243 244 static __inline void 245 /*ARGSUSED*/ 246 _citrus_VIQR_unpack_state(_VIQREncodingInfo * __restrict ei __unused, 247 _VIQRState * __restrict psenc, const void * __restrict pspriv) 248 { 249 250 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 251 } 252 #endif 253 254 static int 255 _citrus_VIQR_mbrtowc_priv(_VIQREncodingInfo * __restrict ei, 256 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 257 _VIQRState * __restrict psenc, size_t * __restrict nresult) 258 { 259 mnemonic_t *m, *m0; 260 char *s0; 261 wchar_t wc; 262 ssize_t i; 263 int ch, escape; 264 265 if (*s == NULL) { 266 _citrus_VIQR_init_state(ei, psenc); 267 *nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT; 268 return (0); 269 } 270 s0 = *s; 271 272 i = 0; 273 m = ei->mroot; 274 for (escape = 0;;) { 275 if (psenc->chlen == i) { 276 if (n-- < 1) { 277 *s = s0; 278 *nresult = (size_t)-2; 279 return (0); 280 } 281 psenc->ch[psenc->chlen++] = *s0++; 282 } 283 ch = (unsigned char)psenc->ch[i++]; 284 if (ch == ESCAPE) { 285 if (m != ei->mroot) 286 break; 287 escape = 1; 288 continue; 289 } 290 if (escape != 0) 291 break; 292 m0 = mnemonic_list_find(&m->child, ch); 293 if (m0 == NULL) 294 break; 295 m = m0; 296 } 297 while (m != ei->mroot) { 298 --i; 299 if (m->value != ei->invalid) 300 break; 301 m = m->parent; 302 } 303 if (ch == ESCAPE && m != ei->mroot) 304 ++i; 305 psenc->chlen -= i; 306 memmove(&psenc->ch[0], &psenc->ch[i], psenc->chlen); 307 wc = (m == ei->mroot) ? (wchar_t)ch : m->value; 308 if (pwc != NULL) 309 *pwc = wc; 310 *nresult = (size_t)(wc == 0 ? 0 : s0 - *s); 311 *s = s0; 312 313 return (0); 314 } 315 316 static int 317 _citrus_VIQR_wcrtomb_priv(_VIQREncodingInfo * __restrict ei, 318 char * __restrict s, size_t n, wchar_t wc, 319 _VIQRState * __restrict psenc, size_t * __restrict nresult) 320 { 321 mnemonic_t *m; 322 const char *p; 323 int ch = 0; 324 325 switch (psenc->chlen) { 326 case 0: case 1: 327 break; 328 default: 329 return (EINVAL); 330 } 331 m = NULL; 332 if ((uint32_t)wc <= 0xFF) { 333 p = mnemonic_rfc1456[wc & 0xFF]; 334 if (p != NULL) 335 goto mnemonic_found; 336 if (n-- < 1) 337 goto e2big; 338 ch = (unsigned int)wc; 339 m = ei->mroot; 340 if (psenc->chlen > 0) { 341 m = mnemonic_list_find(&m->child, psenc->ch[0]); 342 if (m == NULL) 343 return (EINVAL); 344 psenc->ch[0] = ESCAPE; 345 } 346 if (mnemonic_list_find(&m->child, ch) == NULL) { 347 psenc->chlen = 0; 348 m = NULL; 349 } 350 psenc->ch[psenc->chlen++] = ch; 351 } else { 352 p = mnemonic_ext_find(wc, &mnemonic_ext[0], mnemonic_ext_size); 353 if (p == NULL) { 354 *nresult = (size_t)-1; 355 return (EILSEQ); 356 } else { 357 mnemonic_found: 358 psenc->chlen = 0; 359 while (*p != '\0') { 360 if (n-- < 1) 361 goto e2big; 362 psenc->ch[psenc->chlen++] = *p++; 363 } 364 } 365 } 366 memcpy(s, psenc->ch, psenc->chlen); 367 *nresult = psenc->chlen; 368 if (m == ei->mroot) { 369 psenc->ch[0] = ch; 370 psenc->chlen = 1; 371 } else 372 psenc->chlen = 0; 373 374 return (0); 375 376 e2big: 377 *nresult = (size_t)-1; 378 return (E2BIG); 379 } 380 381 static int 382 /* ARGSUSED */ 383 _citrus_VIQR_put_state_reset(_VIQREncodingInfo * __restrict ei __unused, 384 char * __restrict s __unused, size_t n __unused, 385 _VIQRState * __restrict psenc, size_t * __restrict nresult) 386 { 387 388 switch (psenc->chlen) { 389 case 0: case 1: 390 break; 391 default: 392 return (EINVAL); 393 } 394 *nresult = 0; 395 psenc->chlen = 0; 396 397 return (0); 398 } 399 400 static __inline int 401 /*ARGSUSED*/ 402 _citrus_VIQR_stdenc_wctocs(_VIQREncodingInfo * __restrict ei __unused, 403 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 404 { 405 406 *csid = 0; 407 *idx = (_index_t)wc; 408 409 return (0); 410 } 411 412 static __inline int 413 /*ARGSUSED*/ 414 _citrus_VIQR_stdenc_cstowc(_VIQREncodingInfo * __restrict ei __unused, 415 wchar_t * __restrict pwc, _csid_t csid, _index_t idx) 416 { 417 418 if (csid != 0) 419 return (EILSEQ); 420 *pwc = (wchar_t)idx; 421 422 return (0); 423 } 424 425 static void 426 _citrus_VIQR_encoding_module_uninit(_VIQREncodingInfo *ei) 427 { 428 429 mnemonic_destroy(ei->mroot); 430 } 431 432 static int 433 /*ARGSUSED*/ 434 _citrus_VIQR_encoding_module_init(_VIQREncodingInfo * __restrict ei, 435 const void * __restrict var __unused, size_t lenvar __unused) 436 { 437 const char *s; 438 size_t i, n; 439 int errnum; 440 441 ei->mb_cur_max = 1; 442 ei->invalid = (wchar_t)-1; 443 ei->mroot = mnemonic_create(NULL, '\0', ei->invalid); 444 if (ei->mroot == NULL) 445 return (ENOMEM); 446 for (i = 0; i < sizeof(mnemonic_rfc1456) / sizeof(const char *); ++i) { 447 s = mnemonic_rfc1456[i]; 448 if (s == NULL) 449 continue; 450 n = strlen(s); 451 if (ei->mb_cur_max < n) 452 ei->mb_cur_max = n; 453 errnum = mnemonic_append_child(ei->mroot, 454 s, (wchar_t)i, ei->invalid); 455 if (errnum != 0) { 456 _citrus_VIQR_encoding_module_uninit(ei); 457 return (errnum); 458 } 459 } 460 /* a + 1 < b + 1 here to silence gcc warning about unsigned < 0. */ 461 for (i = 0; i + 1 < mnemonic_ext_size + 1; ++i) { 462 const mnemonic_def_t *p; 463 464 p = &mnemonic_ext[i]; 465 n = strlen(p->name); 466 if (ei->mb_cur_max < n) 467 ei->mb_cur_max = n; 468 errnum = mnemonic_append_child(ei->mroot, 469 p->name, p->value, ei->invalid); 470 if (errnum != 0) { 471 _citrus_VIQR_encoding_module_uninit(ei); 472 return (errnum); 473 } 474 } 475 476 return (0); 477 } 478 479 static __inline int 480 /*ARGSUSED*/ 481 _citrus_VIQR_stdenc_get_state_desc_generic(_VIQREncodingInfo * __restrict ei __unused, 482 _VIQRState * __restrict psenc, int * __restrict rstate) 483 { 484 485 *rstate = (psenc->chlen == 0) ? 486 _STDENC_SDGEN_INITIAL : 487 _STDENC_SDGEN_INCOMPLETE_CHAR; 488 489 return (0); 490 } 491 492 /* ---------------------------------------------------------------------- 493 * public interface for stdenc 494 */ 495 496 _CITRUS_STDENC_DECLS(VIQR); 497 _CITRUS_STDENC_DEF_OPS(VIQR); 498 499 #include "citrus_stdenc_template.h" 500