1 /* $NetBSD: citrus_hz.c,v 1.2 2008/06/14 16:01:07 tnozaki Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c)2004, 2006 Citrus Project,
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 */
31
32 #include <sys/cdefs.h>
33 #include <sys/queue.h>
34 #include <sys/types.h>
35
36 #include <assert.h>
37 #include <errno.h>
38 #include <limits.h>
39 #include <stddef.h>
40 #include <stdint.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <wchar.h>
44
45 #include "citrus_namespace.h"
46 #include "citrus_types.h"
47 #include "citrus_bcs.h"
48 #include "citrus_module.h"
49 #include "citrus_stdenc.h"
50
51 #include "citrus_hz.h"
52 #include "citrus_prop.h"
53
54 /*
55 * wchar_t mapping:
56 *
57 * CTRL/ASCII 00000000 00000000 00000000 gxxxxxxx
58 * GB2312 00000000 00000000 0xxxxxxx gxxxxxxx
59 * 94/96*n (~M) 0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx
60 */
61
62 #define ESCAPE_CHAR '~'
63
64 typedef enum {
65 CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4
66 } charset_t;
67
68 typedef struct {
69 int start;
70 int end;
71 int width;
72 } range_t;
73
74 static const range_t ranges[] = {
75 #define RANGE(start, end) { start, end, (end - start) + 1 }
76 /* CTRL */ RANGE(0x00, 0x1F),
77 /* ASCII */ RANGE(0x20, 0x7F),
78 /* GB2312 */ RANGE(0x21, 0x7E),
79 /* CS94 */ RANGE(0x21, 0x7E),
80 /* CS96 */ RANGE(0x20, 0x7F),
81 #undef RANGE
82 };
83
84 typedef struct escape_t escape_t;
85 typedef struct {
86 charset_t charset;
87 escape_t *escape;
88 ssize_t length;
89 #define ROWCOL_MAX 3
90 } graphic_t;
91
92 typedef TAILQ_HEAD(escape_list, escape_t) escape_list;
93 struct escape_t {
94 TAILQ_ENTRY(escape_t) entry;
95 escape_list *set;
96 graphic_t *left;
97 graphic_t *right;
98 int ch;
99 };
100
101 #define GL(escape) ((escape)->left)
102 #define GR(escape) ((escape)->right)
103 #define SET(escape) ((escape)->set)
104 #define ESC(escape) ((escape)->ch)
105 #define INIT(escape) (TAILQ_FIRST(SET(escape)))
106
107 static __inline escape_t *
find_escape(escape_list * set,int ch)108 find_escape(escape_list *set, int ch)
109 {
110 escape_t *escape;
111
112 TAILQ_FOREACH(escape, set, entry) {
113 if (ESC(escape) == ch)
114 break;
115 }
116
117 return (escape);
118 }
119
120 typedef struct {
121 escape_list e0;
122 escape_list e1;
123 graphic_t *ascii;
124 graphic_t *gb2312;
125 } _HZEncodingInfo;
126
127 #define E0SET(ei) (&(ei)->e0)
128 #define E1SET(ei) (&(ei)->e1)
129 #define INIT0(ei) (TAILQ_FIRST(E0SET(ei)))
130 #define INIT1(ei) (TAILQ_FIRST(E1SET(ei)))
131
132 typedef struct {
133 escape_t *inuse;
134 int chlen;
135 char ch[ROWCOL_MAX];
136 } _HZState;
137
138 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
139 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
140
141 #define _FUNCNAME(m) _citrus_HZ_##m
142 #define _ENCODING_INFO _HZEncodingInfo
143 #define _ENCODING_STATE _HZState
144 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
145 #define _ENCODING_IS_STATE_DEPENDENT 1
146 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->inuse == NULL)
147
148 static __inline void
_citrus_HZ_init_state(_HZEncodingInfo * __restrict ei,_HZState * __restrict psenc)149 _citrus_HZ_init_state(_HZEncodingInfo * __restrict ei,
150 _HZState * __restrict psenc)
151 {
152
153 psenc->chlen = 0;
154 psenc->inuse = INIT0(ei);
155 }
156
157 #if 0
158 static __inline void
159 /*ARGSUSED*/
160 _citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei __unused,
161 void *__restrict pspriv, const _HZState * __restrict psenc)
162 {
163
164 memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
165 }
166
167 static __inline void
168 /*ARGSUSED*/
169 _citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei __unused,
170 _HZState * __restrict psenc, const void * __restrict pspriv)
171 {
172
173 memcpy((void *)psenc, pspriv, sizeof(*psenc));
174 }
175 #endif
176
177 static int
_citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei,wchar_t * __restrict pwc,char ** __restrict s,size_t n,_HZState * __restrict psenc,size_t * __restrict nresult)178 _citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei,
179 wchar_t * __restrict pwc, char ** __restrict s, size_t n,
180 _HZState * __restrict psenc, size_t * __restrict nresult)
181 {
182 escape_t *candidate, *init;
183 graphic_t *graphic;
184 const range_t *range;
185 char *s0;
186 wchar_t wc;
187 int bit, ch, head, len, tail;
188
189 if (*s == NULL) {
190 _citrus_HZ_init_state(ei, psenc);
191 *nresult = 1;
192 return (0);
193 }
194 s0 = *s;
195 if (psenc->chlen < 0 || psenc->inuse == NULL)
196 return (EINVAL);
197
198 wc = (wchar_t)0;
199 bit = head = tail = 0;
200 graphic = NULL;
201 for (len = 0; len <= MB_LEN_MAX;) {
202 if (psenc->chlen == tail) {
203 if (n-- < 1) {
204 *s = s0;
205 *nresult = (size_t)-2;
206 return (0);
207 }
208 psenc->ch[psenc->chlen++] = *s0++;
209 ++len;
210 }
211 ch = (unsigned char)psenc->ch[tail++];
212 if (tail == 1) {
213 if ((ch & ~0x80) <= 0x1F) {
214 if (psenc->inuse != INIT0(ei))
215 break;
216 wc = (wchar_t)ch;
217 goto done;
218 }
219 if (ch & 0x80) {
220 graphic = GR(psenc->inuse);
221 bit = 0x80;
222 ch &= ~0x80;
223 } else {
224 graphic = GL(psenc->inuse);
225 if (ch == ESCAPE_CHAR)
226 continue;
227 bit = 0x0;
228 }
229 if (graphic == NULL)
230 break;
231 } else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) {
232 if (tail < psenc->chlen)
233 return (EINVAL);
234 if (ch == ESCAPE_CHAR) {
235 ++head;
236 } else if (ch == '\n') {
237 if (psenc->inuse != INIT0(ei))
238 break;
239 tail = psenc->chlen = 0;
240 continue;
241 } else {
242 candidate = NULL;
243 init = INIT0(ei);
244 if (psenc->inuse == init) {
245 init = INIT1(ei);
246 } else if (INIT(psenc->inuse) == init) {
247 if (ESC(init) != ch)
248 break;
249 candidate = init;
250 }
251 if (candidate == NULL) {
252 candidate = find_escape(
253 SET(psenc->inuse), ch);
254 if (candidate == NULL) {
255 if (init == NULL ||
256 ESC(init) != ch)
257 break;
258 candidate = init;
259 }
260 }
261 psenc->inuse = candidate;
262 tail = psenc->chlen = 0;
263 continue;
264 }
265 } else if (ch & 0x80) {
266 if (graphic != GR(psenc->inuse))
267 break;
268 ch &= ~0x80;
269 } else {
270 if (graphic != GL(psenc->inuse))
271 break;
272 }
273 range = &ranges[(size_t)graphic->charset];
274 if (range->start > ch || range->end < ch)
275 break;
276 wc <<= 8;
277 wc |= ch;
278 if (graphic->length == (tail - head)) {
279 if (graphic->charset > GB2312)
280 bit |= ESC(psenc->inuse) << 24;
281 wc |= bit;
282 goto done;
283 }
284 }
285 *nresult = (size_t)-1;
286 return (EILSEQ);
287 done:
288 if (tail < psenc->chlen)
289 return (EINVAL);
290 *s = s0;
291 if (pwc != NULL)
292 *pwc = wc;
293 psenc->chlen = 0;
294 *nresult = (wc == 0) ? 0 : len;
295
296 return (0);
297 }
298
299 static int
_citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei,char * __restrict s,size_t n,wchar_t wc,_HZState * __restrict psenc,size_t * __restrict nresult)300 _citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei,
301 char * __restrict s, size_t n, wchar_t wc,
302 _HZState * __restrict psenc, size_t * __restrict nresult)
303 {
304 escape_t *candidate, *init;
305 graphic_t *graphic;
306 const range_t *range;
307 size_t len;
308 int bit, ch;
309
310 if (psenc->chlen != 0 || psenc->inuse == NULL)
311 return (EINVAL);
312 if (wc & 0x80) {
313 bit = 0x80;
314 wc &= ~0x80;
315 } else {
316 bit = 0x0;
317 }
318 if ((uint32_t)wc <= 0x1F) {
319 candidate = INIT0(ei);
320 graphic = (bit == 0) ? candidate->left : candidate->right;
321 if (graphic == NULL)
322 goto ilseq;
323 range = &ranges[(size_t)CTRL];
324 len = 1;
325 } else if ((uint32_t)wc <= 0x7F) {
326 graphic = ei->ascii;
327 if (graphic == NULL)
328 goto ilseq;
329 candidate = graphic->escape;
330 range = &ranges[(size_t)graphic->charset];
331 len = graphic->length;
332 } else if ((uint32_t)wc <= 0x7F7F) {
333 graphic = ei->gb2312;
334 if (graphic == NULL)
335 goto ilseq;
336 candidate = graphic->escape;
337 range = &ranges[(size_t)graphic->charset];
338 len = graphic->length;
339 } else {
340 ch = (wc >> 24) & 0xFF;
341 candidate = find_escape(E0SET(ei), ch);
342 if (candidate == NULL) {
343 candidate = find_escape(E1SET(ei), ch);
344 if (candidate == NULL)
345 goto ilseq;
346 }
347 wc &= ~0xFF000000;
348 graphic = (bit == 0) ? candidate->left : candidate->right;
349 if (graphic == NULL)
350 goto ilseq;
351 range = &ranges[(size_t)graphic->charset];
352 len = graphic->length;
353 }
354 if (psenc->inuse != candidate) {
355 init = INIT0(ei);
356 if (SET(psenc->inuse) == SET(candidate)) {
357 if (INIT(psenc->inuse) != init ||
358 psenc->inuse == init || candidate == init)
359 init = NULL;
360 } else if (candidate == (init = INIT(candidate))) {
361 init = NULL;
362 }
363 if (init != NULL) {
364 if (n < 2)
365 return (E2BIG);
366 n -= 2;
367 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
368 psenc->ch[psenc->chlen++] = ESC(init);
369 }
370 if (n < 2)
371 return (E2BIG);
372 n -= 2;
373 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
374 psenc->ch[psenc->chlen++] = ESC(candidate);
375 psenc->inuse = candidate;
376 }
377 if (n < len)
378 return (E2BIG);
379 while (len-- > 0) {
380 ch = (wc >> (len * 8)) & 0xFF;
381 if (range->start > ch || range->end < ch)
382 goto ilseq;
383 psenc->ch[psenc->chlen++] = ch | bit;
384 }
385 memcpy(s, psenc->ch, psenc->chlen);
386 *nresult = psenc->chlen;
387 psenc->chlen = 0;
388
389 return (0);
390
391 ilseq:
392 *nresult = (size_t)-1;
393 return (EILSEQ);
394 }
395
396 static __inline int
_citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei,char * __restrict s,size_t n,_HZState * __restrict psenc,size_t * __restrict nresult)397 _citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei,
398 char * __restrict s, size_t n, _HZState * __restrict psenc,
399 size_t * __restrict nresult)
400 {
401 escape_t *candidate;
402
403 if (psenc->chlen != 0 || psenc->inuse == NULL)
404 return (EINVAL);
405 candidate = INIT0(ei);
406 if (psenc->inuse != candidate) {
407 if (n < 2)
408 return (E2BIG);
409 n -= 2;
410 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
411 psenc->ch[psenc->chlen++] = ESC(candidate);
412 }
413 if (n < 1)
414 return (E2BIG);
415 if (psenc->chlen > 0)
416 memcpy(s, psenc->ch, psenc->chlen);
417 *nresult = psenc->chlen;
418 _citrus_HZ_init_state(ei, psenc);
419
420 return (0);
421 }
422
423 static __inline int
_citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei,_HZState * __restrict psenc,int * __restrict rstate)424 _citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei,
425 _HZState * __restrict psenc, int * __restrict rstate)
426 {
427
428 if (psenc->chlen < 0 || psenc->inuse == NULL)
429 return (EINVAL);
430 *rstate = (psenc->chlen == 0)
431 ? ((psenc->inuse == INIT0(ei))
432 ? _STDENC_SDGEN_INITIAL
433 : _STDENC_SDGEN_STABLE)
434 : ((psenc->ch[0] == ESCAPE_CHAR)
435 ? _STDENC_SDGEN_INCOMPLETE_SHIFT
436 : _STDENC_SDGEN_INCOMPLETE_CHAR);
437
438 return (0);
439 }
440
441 static __inline int
442 /*ARGSUSED*/
_citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused,_csid_t * __restrict csid,_index_t * __restrict idx,wchar_t wc)443 _citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused,
444 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
445 {
446 int bit;
447
448 if (wc & 0x80) {
449 bit = 0x80;
450 wc &= ~0x80;
451 } else
452 bit = 0x0;
453 if ((uint32_t)wc <= 0x7F) {
454 *csid = (_csid_t)bit;
455 *idx = (_index_t)wc;
456 } else if ((uint32_t)wc <= 0x7F7F) {
457 *csid = (_csid_t)(bit | 0x8000);
458 *idx = (_index_t)wc;
459 } else {
460 *csid = (_index_t)(wc & ~0x00FFFF7F);
461 *idx = (_csid_t)(wc & 0x00FFFF7F);
462 }
463
464 return (0);
465 }
466
467 static __inline int
468 /*ARGSUSED*/
_citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused,wchar_t * __restrict wc,_csid_t csid,_index_t idx)469 _citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused,
470 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
471 {
472
473 *wc = (wchar_t)idx;
474 switch (csid) {
475 case 0x80:
476 case 0x8080:
477 *wc |= (wchar_t)0x80;
478 /*FALLTHROUGH*/
479 case 0x0:
480 case 0x8000:
481 break;
482 default:
483 *wc |= (wchar_t)csid;
484 }
485
486 return (0);
487 }
488
489 static void
_citrus_HZ_encoding_module_uninit(_HZEncodingInfo * ei)490 _citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei)
491 {
492 escape_t *escape;
493
494 while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) {
495 TAILQ_REMOVE(E0SET(ei), escape, entry);
496 free(GL(escape));
497 free(GR(escape));
498 free(escape);
499 }
500 while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) {
501 TAILQ_REMOVE(E1SET(ei), escape, entry);
502 free(GL(escape));
503 free(GR(escape));
504 free(escape);
505 }
506 }
507
508 static int
_citrus_HZ_parse_char(void * context,const char * name __unused,const char * s)509 _citrus_HZ_parse_char(void *context, const char *name __unused, const char *s)
510 {
511 escape_t *escape;
512 void **p;
513
514 p = (void **)context;
515 escape = (escape_t *)p[0];
516 if (escape->ch != '\0')
517 return (EINVAL);
518 escape->ch = *s++;
519 if (escape->ch == ESCAPE_CHAR || *s != '\0')
520 return (EINVAL);
521
522 return (0);
523 }
524
525 static int
_citrus_HZ_parse_graphic(void * context,const char * name,const char * s)526 _citrus_HZ_parse_graphic(void *context, const char *name, const char *s)
527 {
528 _HZEncodingInfo *ei;
529 escape_t *escape;
530 graphic_t *graphic;
531 void **p;
532
533 p = (void **)context;
534 escape = (escape_t *)p[0];
535 ei = (_HZEncodingInfo *)p[1];
536 graphic = calloc(1, sizeof(*graphic));
537 if (graphic == NULL)
538 return (ENOMEM);
539 if (strcmp("GL", name) == 0) {
540 if (GL(escape) != NULL)
541 goto release;
542 GL(escape) = graphic;
543 } else if (strcmp("GR", name) == 0) {
544 if (GR(escape) != NULL)
545 goto release;
546 GR(escape) = graphic;
547 } else {
548 release:
549 free(graphic);
550 return (EINVAL);
551 }
552 graphic->escape = escape;
553 if (_bcs_strncasecmp("ASCII", s, 5) == 0) {
554 if (s[5] != '\0')
555 return (EINVAL);
556 graphic->charset = ASCII;
557 graphic->length = 1;
558 ei->ascii = graphic;
559 return (0);
560 } else if (_bcs_strncasecmp("GB2312", s, 6) == 0) {
561 if (s[6] != '\0')
562 return (EINVAL);
563 graphic->charset = GB2312;
564 graphic->length = 2;
565 ei->gb2312 = graphic;
566 return (0);
567 } else if (strncmp("94*", s, 3) == 0)
568 graphic->charset = CS94;
569 else if (strncmp("96*", s, 3) == 0)
570 graphic->charset = CS96;
571 else
572 return (EINVAL);
573 s += 3;
574 switch(*s) {
575 case '1': case '2': case '3':
576 graphic->length = (size_t)(*s - '0');
577 if (*++s == '\0')
578 break;
579 /*FALLTHROUGH*/
580 default:
581 return (EINVAL);
582 }
583 return (0);
584 }
585
586 static const _citrus_prop_hint_t escape_hints[] = {
587 _CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char),
588 _CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic),
589 _CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic),
590 _CITRUS_PROP_HINT_END
591 };
592
593 static int
_citrus_HZ_parse_escape(void * context,const char * name,const char * s)594 _citrus_HZ_parse_escape(void *context, const char *name, const char *s)
595 {
596 _HZEncodingInfo *ei;
597 escape_t *escape;
598 void *p[2];
599
600 ei = (_HZEncodingInfo *)context;
601 escape = calloc(1, sizeof(*escape));
602 if (escape == NULL)
603 return (EINVAL);
604 if (strcmp("0", name) == 0) {
605 escape->set = E0SET(ei);
606 TAILQ_INSERT_TAIL(E0SET(ei), escape, entry);
607 } else if (strcmp("1", name) == 0) {
608 escape->set = E1SET(ei);
609 TAILQ_INSERT_TAIL(E1SET(ei), escape, entry);
610 } else {
611 free(escape);
612 return (EINVAL);
613 }
614 p[0] = (void *)escape;
615 p[1] = (void *)ei;
616 return (_citrus_prop_parse_variable(
617 escape_hints, (void *)&p[0], s, strlen(s)));
618 }
619
620 static const _citrus_prop_hint_t root_hints[] = {
621 _CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape),
622 _CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape),
623 _CITRUS_PROP_HINT_END
624 };
625
626 static int
_citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei,const void * __restrict var,size_t lenvar)627 _citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei,
628 const void * __restrict var, size_t lenvar)
629 {
630 int errnum;
631
632 memset(ei, 0, sizeof(*ei));
633 TAILQ_INIT(E0SET(ei));
634 TAILQ_INIT(E1SET(ei));
635 errnum = _citrus_prop_parse_variable(
636 root_hints, (void *)ei, var, lenvar);
637 if (errnum != 0)
638 _citrus_HZ_encoding_module_uninit(ei);
639 return (errnum);
640 }
641
642 /* ----------------------------------------------------------------------
643 * public interface for stdenc
644 */
645
646 _CITRUS_STDENC_DECLS(HZ);
647 _CITRUS_STDENC_DEF_OPS(HZ);
648
649 #include "citrus_stdenc_template.h"
650