1*260e9a87SYuri Pankov /* $Id: chars.c,v 1.66 2015/02/17 20:37:16 schwarze Exp $ */ 295c635efSGarrett D'Amore /* 395c635efSGarrett D'Amore * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4*260e9a87SYuri Pankov * Copyright (c) 2011, 2014 Ingo Schwarze <schwarze@openbsd.org> 595c635efSGarrett D'Amore * 695c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any 795c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above 895c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies. 995c635efSGarrett D'Amore * 1095c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 1195c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1295c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 1395c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1495c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1595c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1695c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1795c635efSGarrett D'Amore */ 1895c635efSGarrett D'Amore #include "config.h" 19*260e9a87SYuri Pankov 20*260e9a87SYuri Pankov #include <sys/types.h> 2195c635efSGarrett D'Amore 2295c635efSGarrett D'Amore #include <assert.h> 2395c635efSGarrett D'Amore #include <ctype.h> 2495c635efSGarrett D'Amore #include <stdlib.h> 2595c635efSGarrett D'Amore #include <string.h> 2695c635efSGarrett D'Amore 2795c635efSGarrett D'Amore #include "mandoc.h" 28*260e9a87SYuri Pankov #include "mandoc_aux.h" 2995c635efSGarrett D'Amore #include "libmandoc.h" 3095c635efSGarrett D'Amore 3195c635efSGarrett D'Amore #define PRINT_HI 126 3295c635efSGarrett D'Amore #define PRINT_LO 32 3395c635efSGarrett D'Amore 3495c635efSGarrett D'Amore struct ln { 3595c635efSGarrett D'Amore struct ln *next; 3695c635efSGarrett D'Amore const char *code; 3795c635efSGarrett D'Amore const char *ascii; 3895c635efSGarrett D'Amore int unicode; 3995c635efSGarrett D'Amore }; 4095c635efSGarrett D'Amore 41*260e9a87SYuri Pankov #define LINES_MAX 332 4295c635efSGarrett D'Amore 4395c635efSGarrett D'Amore #define CHAR(in, ch, code) \ 4495c635efSGarrett D'Amore { NULL, (in), (ch), (code) }, 4595c635efSGarrett D'Amore 4695c635efSGarrett D'Amore #define CHAR_TBL_START static struct ln lines[LINES_MAX] = { 4795c635efSGarrett D'Amore #define CHAR_TBL_END }; 4895c635efSGarrett D'Amore 4995c635efSGarrett D'Amore #include "chars.in" 5095c635efSGarrett D'Amore 5195c635efSGarrett D'Amore struct mchars { 5295c635efSGarrett D'Amore struct ln **htab; 5395c635efSGarrett D'Amore }; 5495c635efSGarrett D'Amore 5595c635efSGarrett D'Amore static const struct ln *find(const struct mchars *, 5695c635efSGarrett D'Amore const char *, size_t); 5795c635efSGarrett D'Amore 58*260e9a87SYuri Pankov 5995c635efSGarrett D'Amore void 6095c635efSGarrett D'Amore mchars_free(struct mchars *arg) 6195c635efSGarrett D'Amore { 6295c635efSGarrett D'Amore 6395c635efSGarrett D'Amore free(arg->htab); 6495c635efSGarrett D'Amore free(arg); 6595c635efSGarrett D'Amore } 6695c635efSGarrett D'Amore 6795c635efSGarrett D'Amore struct mchars * 6895c635efSGarrett D'Amore mchars_alloc(void) 6995c635efSGarrett D'Amore { 7095c635efSGarrett D'Amore struct mchars *tab; 7195c635efSGarrett D'Amore struct ln **htab; 7295c635efSGarrett D'Amore struct ln *pp; 7395c635efSGarrett D'Amore int i, hash; 7495c635efSGarrett D'Amore 7595c635efSGarrett D'Amore /* 7695c635efSGarrett D'Amore * Constructs a very basic chaining hashtable. The hash routine 7795c635efSGarrett D'Amore * is simply the integral value of the first character. 7895c635efSGarrett D'Amore * Subsequent entries are chained in the order they're processed. 7995c635efSGarrett D'Amore */ 8095c635efSGarrett D'Amore 8195c635efSGarrett D'Amore tab = mandoc_malloc(sizeof(struct mchars)); 82698f87a4SGarrett D'Amore htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln *)); 8395c635efSGarrett D'Amore 8495c635efSGarrett D'Amore for (i = 0; i < LINES_MAX; i++) { 8595c635efSGarrett D'Amore hash = (int)lines[i].code[0] - PRINT_LO; 8695c635efSGarrett D'Amore 8795c635efSGarrett D'Amore if (NULL == (pp = htab[hash])) { 8895c635efSGarrett D'Amore htab[hash] = &lines[i]; 8995c635efSGarrett D'Amore continue; 9095c635efSGarrett D'Amore } 9195c635efSGarrett D'Amore 9295c635efSGarrett D'Amore for ( ; pp->next; pp = pp->next) 9395c635efSGarrett D'Amore /* Scan ahead. */ ; 9495c635efSGarrett D'Amore pp->next = &lines[i]; 9595c635efSGarrett D'Amore } 9695c635efSGarrett D'Amore 9795c635efSGarrett D'Amore tab->htab = htab; 9895c635efSGarrett D'Amore return(tab); 9995c635efSGarrett D'Amore } 10095c635efSGarrett D'Amore 10195c635efSGarrett D'Amore int 10295c635efSGarrett D'Amore mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz) 10395c635efSGarrett D'Amore { 10495c635efSGarrett D'Amore const struct ln *ln; 10595c635efSGarrett D'Amore 10695c635efSGarrett D'Amore ln = find(arg, p, sz); 107*260e9a87SYuri Pankov return(ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1); 10895c635efSGarrett D'Amore } 10995c635efSGarrett D'Amore 110*260e9a87SYuri Pankov int 11195c635efSGarrett D'Amore mchars_num2char(const char *p, size_t sz) 11295c635efSGarrett D'Amore { 11395c635efSGarrett D'Amore int i; 11495c635efSGarrett D'Amore 115*260e9a87SYuri Pankov i = mandoc_strntoi(p, sz, 10); 116*260e9a87SYuri Pankov return(i >= 0 && i < 256 ? i : -1); 11795c635efSGarrett D'Amore } 11895c635efSGarrett D'Amore 11995c635efSGarrett D'Amore int 12095c635efSGarrett D'Amore mchars_num2uc(const char *p, size_t sz) 12195c635efSGarrett D'Amore { 12295c635efSGarrett D'Amore int i; 12395c635efSGarrett D'Amore 124*260e9a87SYuri Pankov i = mandoc_strntoi(p, sz, 16); 125*260e9a87SYuri Pankov assert(i >= 0 && i <= 0x10FFFF); 126*260e9a87SYuri Pankov return(i); 12795c635efSGarrett D'Amore } 12895c635efSGarrett D'Amore 12995c635efSGarrett D'Amore const char * 13095c635efSGarrett D'Amore mchars_spec2str(const struct mchars *arg, 13195c635efSGarrett D'Amore const char *p, size_t sz, size_t *rsz) 13295c635efSGarrett D'Amore { 13395c635efSGarrett D'Amore const struct ln *ln; 13495c635efSGarrett D'Amore 13595c635efSGarrett D'Amore ln = find(arg, p, sz); 136*260e9a87SYuri Pankov if (ln == NULL) { 13795c635efSGarrett D'Amore *rsz = 1; 138*260e9a87SYuri Pankov return(sz == 1 ? p : NULL); 13995c635efSGarrett D'Amore } 14095c635efSGarrett D'Amore 14195c635efSGarrett D'Amore *rsz = strlen(ln->ascii); 14295c635efSGarrett D'Amore return(ln->ascii); 14395c635efSGarrett D'Amore } 14495c635efSGarrett D'Amore 145*260e9a87SYuri Pankov const char * 146*260e9a87SYuri Pankov mchars_uc2str(int uc) 147*260e9a87SYuri Pankov { 148*260e9a87SYuri Pankov int i; 149*260e9a87SYuri Pankov 150*260e9a87SYuri Pankov for (i = 0; i < LINES_MAX; i++) 151*260e9a87SYuri Pankov if (uc == lines[i].unicode) 152*260e9a87SYuri Pankov return(lines[i].ascii); 153*260e9a87SYuri Pankov return("<?>"); 154*260e9a87SYuri Pankov } 155*260e9a87SYuri Pankov 15695c635efSGarrett D'Amore static const struct ln * 15795c635efSGarrett D'Amore find(const struct mchars *tab, const char *p, size_t sz) 15895c635efSGarrett D'Amore { 15995c635efSGarrett D'Amore const struct ln *pp; 16095c635efSGarrett D'Amore int hash; 16195c635efSGarrett D'Amore 16295c635efSGarrett D'Amore assert(p); 16395c635efSGarrett D'Amore 16495c635efSGarrett D'Amore if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI) 16595c635efSGarrett D'Amore return(NULL); 16695c635efSGarrett D'Amore 16795c635efSGarrett D'Amore hash = (int)p[0] - PRINT_LO; 16895c635efSGarrett D'Amore 16995c635efSGarrett D'Amore for (pp = tab->htab[hash]; pp; pp = pp->next) 17095c635efSGarrett D'Amore if (0 == strncmp(pp->code, p, sz) && 17195c635efSGarrett D'Amore '\0' == pp->code[(int)sz]) 17295c635efSGarrett D'Amore return(pp); 17395c635efSGarrett D'Amore 17495c635efSGarrett D'Amore return(NULL); 17595c635efSGarrett D'Amore } 176