xref: /titanic_50/usr/src/cmd/mandoc/chars.c (revision 260e9a87725c090ba5835b1f9f0b62fa2f96036f)
1*260e9a87SYuri Pankov /*	$Id: chars.c,v 1.66 2015/02/17 20:37:16 schwarze Exp $ */
295c635efSGarrett D'Amore /*
395c635efSGarrett D'Amore  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*260e9a87SYuri Pankov  * Copyright (c) 2011, 2014 Ingo Schwarze <schwarze@openbsd.org>
595c635efSGarrett D'Amore  *
695c635efSGarrett D'Amore  * Permission to use, copy, modify, and distribute this software for any
795c635efSGarrett D'Amore  * purpose with or without fee is hereby granted, provided that the above
895c635efSGarrett D'Amore  * copyright notice and this permission notice appear in all copies.
995c635efSGarrett D'Amore  *
1095c635efSGarrett D'Amore  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1195c635efSGarrett D'Amore  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1295c635efSGarrett D'Amore  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1395c635efSGarrett D'Amore  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1495c635efSGarrett D'Amore  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1595c635efSGarrett D'Amore  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1695c635efSGarrett D'Amore  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1795c635efSGarrett D'Amore  */
1895c635efSGarrett D'Amore #include "config.h"
19*260e9a87SYuri Pankov 
20*260e9a87SYuri Pankov #include <sys/types.h>
2195c635efSGarrett D'Amore 
2295c635efSGarrett D'Amore #include <assert.h>
2395c635efSGarrett D'Amore #include <ctype.h>
2495c635efSGarrett D'Amore #include <stdlib.h>
2595c635efSGarrett D'Amore #include <string.h>
2695c635efSGarrett D'Amore 
2795c635efSGarrett D'Amore #include "mandoc.h"
28*260e9a87SYuri Pankov #include "mandoc_aux.h"
2995c635efSGarrett D'Amore #include "libmandoc.h"
3095c635efSGarrett D'Amore 
3195c635efSGarrett D'Amore #define	PRINT_HI	 126
3295c635efSGarrett D'Amore #define	PRINT_LO	 32
3395c635efSGarrett D'Amore 
3495c635efSGarrett D'Amore struct	ln {
3595c635efSGarrett D'Amore 	struct ln	 *next;
3695c635efSGarrett D'Amore 	const char	 *code;
3795c635efSGarrett D'Amore 	const char	 *ascii;
3895c635efSGarrett D'Amore 	int		  unicode;
3995c635efSGarrett D'Amore };
4095c635efSGarrett D'Amore 
41*260e9a87SYuri Pankov #define	LINES_MAX	  332
4295c635efSGarrett D'Amore 
4395c635efSGarrett D'Amore #define CHAR(in, ch, code) \
4495c635efSGarrett D'Amore 	{ NULL, (in), (ch), (code) },
4595c635efSGarrett D'Amore 
4695c635efSGarrett D'Amore #define	CHAR_TBL_START	  static struct ln lines[LINES_MAX] = {
4795c635efSGarrett D'Amore #define	CHAR_TBL_END	  };
4895c635efSGarrett D'Amore 
4995c635efSGarrett D'Amore #include "chars.in"
5095c635efSGarrett D'Amore 
5195c635efSGarrett D'Amore struct	mchars {
5295c635efSGarrett D'Amore 	struct ln	**htab;
5395c635efSGarrett D'Amore };
5495c635efSGarrett D'Amore 
5595c635efSGarrett D'Amore static	const struct ln	 *find(const struct mchars *,
5695c635efSGarrett D'Amore 				const char *, size_t);
5795c635efSGarrett D'Amore 
58*260e9a87SYuri Pankov 
5995c635efSGarrett D'Amore void
mchars_free(struct mchars * arg)6095c635efSGarrett D'Amore mchars_free(struct mchars *arg)
6195c635efSGarrett D'Amore {
6295c635efSGarrett D'Amore 
6395c635efSGarrett D'Amore 	free(arg->htab);
6495c635efSGarrett D'Amore 	free(arg);
6595c635efSGarrett D'Amore }
6695c635efSGarrett D'Amore 
6795c635efSGarrett D'Amore struct mchars *
mchars_alloc(void)6895c635efSGarrett D'Amore mchars_alloc(void)
6995c635efSGarrett D'Amore {
7095c635efSGarrett D'Amore 	struct mchars	 *tab;
7195c635efSGarrett D'Amore 	struct ln	**htab;
7295c635efSGarrett D'Amore 	struct ln	 *pp;
7395c635efSGarrett D'Amore 	int		  i, hash;
7495c635efSGarrett D'Amore 
7595c635efSGarrett D'Amore 	/*
7695c635efSGarrett D'Amore 	 * Constructs a very basic chaining hashtable.  The hash routine
7795c635efSGarrett D'Amore 	 * is simply the integral value of the first character.
7895c635efSGarrett D'Amore 	 * Subsequent entries are chained in the order they're processed.
7995c635efSGarrett D'Amore 	 */
8095c635efSGarrett D'Amore 
8195c635efSGarrett D'Amore 	tab = mandoc_malloc(sizeof(struct mchars));
82698f87a4SGarrett D'Amore 	htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln *));
8395c635efSGarrett D'Amore 
8495c635efSGarrett D'Amore 	for (i = 0; i < LINES_MAX; i++) {
8595c635efSGarrett D'Amore 		hash = (int)lines[i].code[0] - PRINT_LO;
8695c635efSGarrett D'Amore 
8795c635efSGarrett D'Amore 		if (NULL == (pp = htab[hash])) {
8895c635efSGarrett D'Amore 			htab[hash] = &lines[i];
8995c635efSGarrett D'Amore 			continue;
9095c635efSGarrett D'Amore 		}
9195c635efSGarrett D'Amore 
9295c635efSGarrett D'Amore 		for ( ; pp->next; pp = pp->next)
9395c635efSGarrett D'Amore 			/* Scan ahead. */ ;
9495c635efSGarrett D'Amore 		pp->next = &lines[i];
9595c635efSGarrett D'Amore 	}
9695c635efSGarrett D'Amore 
9795c635efSGarrett D'Amore 	tab->htab = htab;
9895c635efSGarrett D'Amore 	return(tab);
9995c635efSGarrett D'Amore }
10095c635efSGarrett D'Amore 
10195c635efSGarrett D'Amore int
mchars_spec2cp(const struct mchars * arg,const char * p,size_t sz)10295c635efSGarrett D'Amore mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz)
10395c635efSGarrett D'Amore {
10495c635efSGarrett D'Amore 	const struct ln	*ln;
10595c635efSGarrett D'Amore 
10695c635efSGarrett D'Amore 	ln = find(arg, p, sz);
107*260e9a87SYuri Pankov 	return(ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1);
10895c635efSGarrett D'Amore }
10995c635efSGarrett D'Amore 
110*260e9a87SYuri Pankov int
mchars_num2char(const char * p,size_t sz)11195c635efSGarrett D'Amore mchars_num2char(const char *p, size_t sz)
11295c635efSGarrett D'Amore {
11395c635efSGarrett D'Amore 	int	  i;
11495c635efSGarrett D'Amore 
115*260e9a87SYuri Pankov 	i = mandoc_strntoi(p, sz, 10);
116*260e9a87SYuri Pankov 	return(i >= 0 && i < 256 ? i : -1);
11795c635efSGarrett D'Amore }
11895c635efSGarrett D'Amore 
11995c635efSGarrett D'Amore int
mchars_num2uc(const char * p,size_t sz)12095c635efSGarrett D'Amore mchars_num2uc(const char *p, size_t sz)
12195c635efSGarrett D'Amore {
12295c635efSGarrett D'Amore 	int	 i;
12395c635efSGarrett D'Amore 
124*260e9a87SYuri Pankov 	i = mandoc_strntoi(p, sz, 16);
125*260e9a87SYuri Pankov 	assert(i >= 0 && i <= 0x10FFFF);
126*260e9a87SYuri Pankov 	return(i);
12795c635efSGarrett D'Amore }
12895c635efSGarrett D'Amore 
12995c635efSGarrett D'Amore const char *
mchars_spec2str(const struct mchars * arg,const char * p,size_t sz,size_t * rsz)13095c635efSGarrett D'Amore mchars_spec2str(const struct mchars *arg,
13195c635efSGarrett D'Amore 		const char *p, size_t sz, size_t *rsz)
13295c635efSGarrett D'Amore {
13395c635efSGarrett D'Amore 	const struct ln	*ln;
13495c635efSGarrett D'Amore 
13595c635efSGarrett D'Amore 	ln = find(arg, p, sz);
136*260e9a87SYuri Pankov 	if (ln == NULL) {
13795c635efSGarrett D'Amore 		*rsz = 1;
138*260e9a87SYuri Pankov 		return(sz == 1 ? p : NULL);
13995c635efSGarrett D'Amore 	}
14095c635efSGarrett D'Amore 
14195c635efSGarrett D'Amore 	*rsz = strlen(ln->ascii);
14295c635efSGarrett D'Amore 	return(ln->ascii);
14395c635efSGarrett D'Amore }
14495c635efSGarrett D'Amore 
145*260e9a87SYuri Pankov const char *
mchars_uc2str(int uc)146*260e9a87SYuri Pankov mchars_uc2str(int uc)
147*260e9a87SYuri Pankov {
148*260e9a87SYuri Pankov 	int	 i;
149*260e9a87SYuri Pankov 
150*260e9a87SYuri Pankov 	for (i = 0; i < LINES_MAX; i++)
151*260e9a87SYuri Pankov 		if (uc == lines[i].unicode)
152*260e9a87SYuri Pankov 			return(lines[i].ascii);
153*260e9a87SYuri Pankov 	return("<?>");
154*260e9a87SYuri Pankov }
155*260e9a87SYuri Pankov 
15695c635efSGarrett D'Amore static const struct ln *
find(const struct mchars * tab,const char * p,size_t sz)15795c635efSGarrett D'Amore find(const struct mchars *tab, const char *p, size_t sz)
15895c635efSGarrett D'Amore {
15995c635efSGarrett D'Amore 	const struct ln	 *pp;
16095c635efSGarrett D'Amore 	int		  hash;
16195c635efSGarrett D'Amore 
16295c635efSGarrett D'Amore 	assert(p);
16395c635efSGarrett D'Amore 
16495c635efSGarrett D'Amore 	if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI)
16595c635efSGarrett D'Amore 		return(NULL);
16695c635efSGarrett D'Amore 
16795c635efSGarrett D'Amore 	hash = (int)p[0] - PRINT_LO;
16895c635efSGarrett D'Amore 
16995c635efSGarrett D'Amore 	for (pp = tab->htab[hash]; pp; pp = pp->next)
17095c635efSGarrett D'Amore 		if (0 == strncmp(pp->code, p, sz) &&
17195c635efSGarrett D'Amore 		    '\0' == pp->code[(int)sz])
17295c635efSGarrett D'Amore 			return(pp);
17395c635efSGarrett D'Amore 
17495c635efSGarrett D'Amore 	return(NULL);
17595c635efSGarrett D'Amore }
176