1*698f87a4SGarrett D'Amore /* $Id: chars.c,v 1.54 2013/06/20 22:39:30 schwarze Exp $ */
295c635efSGarrett D'Amore /*
395c635efSGarrett D'Amore * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
495c635efSGarrett D'Amore * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
595c635efSGarrett D'Amore *
695c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any
795c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above
895c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies.
995c635efSGarrett D'Amore *
1095c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1195c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1295c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1395c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1495c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1595c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1695c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1795c635efSGarrett D'Amore */
1895c635efSGarrett D'Amore #ifdef HAVE_CONFIG_H
1995c635efSGarrett D'Amore #include "config.h"
2095c635efSGarrett D'Amore #endif
2195c635efSGarrett D'Amore
2295c635efSGarrett D'Amore #include <assert.h>
2395c635efSGarrett D'Amore #include <ctype.h>
2495c635efSGarrett D'Amore #include <stdlib.h>
2595c635efSGarrett D'Amore #include <string.h>
2695c635efSGarrett D'Amore
2795c635efSGarrett D'Amore #include "mandoc.h"
2895c635efSGarrett D'Amore #include "libmandoc.h"
2995c635efSGarrett D'Amore
3095c635efSGarrett D'Amore #define PRINT_HI 126
3195c635efSGarrett D'Amore #define PRINT_LO 32
3295c635efSGarrett D'Amore
3395c635efSGarrett D'Amore struct ln {
3495c635efSGarrett D'Amore struct ln *next;
3595c635efSGarrett D'Amore const char *code;
3695c635efSGarrett D'Amore const char *ascii;
3795c635efSGarrett D'Amore int unicode;
3895c635efSGarrett D'Amore };
3995c635efSGarrett D'Amore
40*698f87a4SGarrett D'Amore #define LINES_MAX 329
4195c635efSGarrett D'Amore
4295c635efSGarrett D'Amore #define CHAR(in, ch, code) \
4395c635efSGarrett D'Amore { NULL, (in), (ch), (code) },
4495c635efSGarrett D'Amore
4595c635efSGarrett D'Amore #define CHAR_TBL_START static struct ln lines[LINES_MAX] = {
4695c635efSGarrett D'Amore #define CHAR_TBL_END };
4795c635efSGarrett D'Amore
4895c635efSGarrett D'Amore #include "chars.in"
4995c635efSGarrett D'Amore
5095c635efSGarrett D'Amore struct mchars {
5195c635efSGarrett D'Amore struct ln **htab;
5295c635efSGarrett D'Amore };
5395c635efSGarrett D'Amore
5495c635efSGarrett D'Amore static const struct ln *find(const struct mchars *,
5595c635efSGarrett D'Amore const char *, size_t);
5695c635efSGarrett D'Amore
5795c635efSGarrett D'Amore void
mchars_free(struct mchars * arg)5895c635efSGarrett D'Amore mchars_free(struct mchars *arg)
5995c635efSGarrett D'Amore {
6095c635efSGarrett D'Amore
6195c635efSGarrett D'Amore free(arg->htab);
6295c635efSGarrett D'Amore free(arg);
6395c635efSGarrett D'Amore }
6495c635efSGarrett D'Amore
6595c635efSGarrett D'Amore struct mchars *
mchars_alloc(void)6695c635efSGarrett D'Amore mchars_alloc(void)
6795c635efSGarrett D'Amore {
6895c635efSGarrett D'Amore struct mchars *tab;
6995c635efSGarrett D'Amore struct ln **htab;
7095c635efSGarrett D'Amore struct ln *pp;
7195c635efSGarrett D'Amore int i, hash;
7295c635efSGarrett D'Amore
7395c635efSGarrett D'Amore /*
7495c635efSGarrett D'Amore * Constructs a very basic chaining hashtable. The hash routine
7595c635efSGarrett D'Amore * is simply the integral value of the first character.
7695c635efSGarrett D'Amore * Subsequent entries are chained in the order they're processed.
7795c635efSGarrett D'Amore */
7895c635efSGarrett D'Amore
7995c635efSGarrett D'Amore tab = mandoc_malloc(sizeof(struct mchars));
80*698f87a4SGarrett D'Amore htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln *));
8195c635efSGarrett D'Amore
8295c635efSGarrett D'Amore for (i = 0; i < LINES_MAX; i++) {
8395c635efSGarrett D'Amore hash = (int)lines[i].code[0] - PRINT_LO;
8495c635efSGarrett D'Amore
8595c635efSGarrett D'Amore if (NULL == (pp = htab[hash])) {
8695c635efSGarrett D'Amore htab[hash] = &lines[i];
8795c635efSGarrett D'Amore continue;
8895c635efSGarrett D'Amore }
8995c635efSGarrett D'Amore
9095c635efSGarrett D'Amore for ( ; pp->next; pp = pp->next)
9195c635efSGarrett D'Amore /* Scan ahead. */ ;
9295c635efSGarrett D'Amore pp->next = &lines[i];
9395c635efSGarrett D'Amore }
9495c635efSGarrett D'Amore
9595c635efSGarrett D'Amore tab->htab = htab;
9695c635efSGarrett D'Amore return(tab);
9795c635efSGarrett D'Amore }
9895c635efSGarrett D'Amore
9995c635efSGarrett D'Amore int
mchars_spec2cp(const struct mchars * arg,const char * p,size_t sz)10095c635efSGarrett D'Amore mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz)
10195c635efSGarrett D'Amore {
10295c635efSGarrett D'Amore const struct ln *ln;
10395c635efSGarrett D'Amore
10495c635efSGarrett D'Amore ln = find(arg, p, sz);
10595c635efSGarrett D'Amore if (NULL == ln)
10695c635efSGarrett D'Amore return(-1);
10795c635efSGarrett D'Amore return(ln->unicode);
10895c635efSGarrett D'Amore }
10995c635efSGarrett D'Amore
11095c635efSGarrett D'Amore char
mchars_num2char(const char * p,size_t sz)11195c635efSGarrett D'Amore mchars_num2char(const char *p, size_t sz)
11295c635efSGarrett D'Amore {
11395c635efSGarrett D'Amore int i;
11495c635efSGarrett D'Amore
11595c635efSGarrett D'Amore if ((i = mandoc_strntoi(p, sz, 10)) < 0)
11695c635efSGarrett D'Amore return('\0');
11795c635efSGarrett D'Amore return(i > 0 && i < 256 && isprint(i) ?
11895c635efSGarrett D'Amore /* LINTED */ i : '\0');
11995c635efSGarrett D'Amore }
12095c635efSGarrett D'Amore
12195c635efSGarrett D'Amore int
mchars_num2uc(const char * p,size_t sz)12295c635efSGarrett D'Amore mchars_num2uc(const char *p, size_t sz)
12395c635efSGarrett D'Amore {
12495c635efSGarrett D'Amore int i;
12595c635efSGarrett D'Amore
12695c635efSGarrett D'Amore if ((i = mandoc_strntoi(p, sz, 16)) < 0)
12795c635efSGarrett D'Amore return('\0');
12895c635efSGarrett D'Amore /* FIXME: make sure we're not in a bogus range. */
12995c635efSGarrett D'Amore return(i > 0x80 && i <= 0x10FFFF ? i : '\0');
13095c635efSGarrett D'Amore }
13195c635efSGarrett D'Amore
13295c635efSGarrett D'Amore const char *
mchars_spec2str(const struct mchars * arg,const char * p,size_t sz,size_t * rsz)13395c635efSGarrett D'Amore mchars_spec2str(const struct mchars *arg,
13495c635efSGarrett D'Amore const char *p, size_t sz, size_t *rsz)
13595c635efSGarrett D'Amore {
13695c635efSGarrett D'Amore const struct ln *ln;
13795c635efSGarrett D'Amore
13895c635efSGarrett D'Amore ln = find(arg, p, sz);
13995c635efSGarrett D'Amore if (NULL == ln) {
14095c635efSGarrett D'Amore *rsz = 1;
14195c635efSGarrett D'Amore return(NULL);
14295c635efSGarrett D'Amore }
14395c635efSGarrett D'Amore
14495c635efSGarrett D'Amore *rsz = strlen(ln->ascii);
14595c635efSGarrett D'Amore return(ln->ascii);
14695c635efSGarrett D'Amore }
14795c635efSGarrett D'Amore
14895c635efSGarrett D'Amore static const struct ln *
find(const struct mchars * tab,const char * p,size_t sz)14995c635efSGarrett D'Amore find(const struct mchars *tab, const char *p, size_t sz)
15095c635efSGarrett D'Amore {
15195c635efSGarrett D'Amore const struct ln *pp;
15295c635efSGarrett D'Amore int hash;
15395c635efSGarrett D'Amore
15495c635efSGarrett D'Amore assert(p);
15595c635efSGarrett D'Amore
15695c635efSGarrett D'Amore if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI)
15795c635efSGarrett D'Amore return(NULL);
15895c635efSGarrett D'Amore
15995c635efSGarrett D'Amore hash = (int)p[0] - PRINT_LO;
16095c635efSGarrett D'Amore
16195c635efSGarrett D'Amore for (pp = tab->htab[hash]; pp; pp = pp->next)
16295c635efSGarrett D'Amore if (0 == strncmp(pp->code, p, sz) &&
16395c635efSGarrett D'Amore '\0' == pp->code[(int)sz])
16495c635efSGarrett D'Amore return(pp);
16595c635efSGarrett D'Amore
16695c635efSGarrett D'Amore return(NULL);
16795c635efSGarrett D'Amore }
168