1*95c635efSGarrett D'Amore /* $Id: chars.c,v 1.52 2011/11/08 00:15:23 kristaps Exp $ */ 2*95c635efSGarrett D'Amore /* 3*95c635efSGarrett D'Amore * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4*95c635efSGarrett D'Amore * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 5*95c635efSGarrett D'Amore * 6*95c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any 7*95c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above 8*95c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies. 9*95c635efSGarrett D'Amore * 10*95c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11*95c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12*95c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13*95c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14*95c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15*95c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16*95c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17*95c635efSGarrett D'Amore */ 18*95c635efSGarrett D'Amore #ifdef HAVE_CONFIG_H 19*95c635efSGarrett D'Amore #include "config.h" 20*95c635efSGarrett D'Amore #endif 21*95c635efSGarrett D'Amore 22*95c635efSGarrett D'Amore #include <assert.h> 23*95c635efSGarrett D'Amore #include <ctype.h> 24*95c635efSGarrett D'Amore #include <stdlib.h> 25*95c635efSGarrett D'Amore #include <string.h> 26*95c635efSGarrett D'Amore 27*95c635efSGarrett D'Amore #include "mandoc.h" 28*95c635efSGarrett D'Amore #include "libmandoc.h" 29*95c635efSGarrett D'Amore 30*95c635efSGarrett D'Amore #define PRINT_HI 126 31*95c635efSGarrett D'Amore #define PRINT_LO 32 32*95c635efSGarrett D'Amore 33*95c635efSGarrett D'Amore struct ln { 34*95c635efSGarrett D'Amore struct ln *next; 35*95c635efSGarrett D'Amore const char *code; 36*95c635efSGarrett D'Amore const char *ascii; 37*95c635efSGarrett D'Amore int unicode; 38*95c635efSGarrett D'Amore }; 39*95c635efSGarrett D'Amore 40*95c635efSGarrett D'Amore #define LINES_MAX 328 41*95c635efSGarrett D'Amore 42*95c635efSGarrett D'Amore #define CHAR(in, ch, code) \ 43*95c635efSGarrett D'Amore { NULL, (in), (ch), (code) }, 44*95c635efSGarrett D'Amore 45*95c635efSGarrett D'Amore #define CHAR_TBL_START static struct ln lines[LINES_MAX] = { 46*95c635efSGarrett D'Amore #define CHAR_TBL_END }; 47*95c635efSGarrett D'Amore 48*95c635efSGarrett D'Amore #include "chars.in" 49*95c635efSGarrett D'Amore 50*95c635efSGarrett D'Amore struct mchars { 51*95c635efSGarrett D'Amore struct ln **htab; 52*95c635efSGarrett D'Amore }; 53*95c635efSGarrett D'Amore 54*95c635efSGarrett D'Amore static const struct ln *find(const struct mchars *, 55*95c635efSGarrett D'Amore const char *, size_t); 56*95c635efSGarrett D'Amore 57*95c635efSGarrett D'Amore void 58*95c635efSGarrett D'Amore mchars_free(struct mchars *arg) 59*95c635efSGarrett D'Amore { 60*95c635efSGarrett D'Amore 61*95c635efSGarrett D'Amore free(arg->htab); 62*95c635efSGarrett D'Amore free(arg); 63*95c635efSGarrett D'Amore } 64*95c635efSGarrett D'Amore 65*95c635efSGarrett D'Amore struct mchars * 66*95c635efSGarrett D'Amore mchars_alloc(void) 67*95c635efSGarrett D'Amore { 68*95c635efSGarrett D'Amore struct mchars *tab; 69*95c635efSGarrett D'Amore struct ln **htab; 70*95c635efSGarrett D'Amore struct ln *pp; 71*95c635efSGarrett D'Amore int i, hash; 72*95c635efSGarrett D'Amore 73*95c635efSGarrett D'Amore /* 74*95c635efSGarrett D'Amore * Constructs a very basic chaining hashtable. The hash routine 75*95c635efSGarrett D'Amore * is simply the integral value of the first character. 76*95c635efSGarrett D'Amore * Subsequent entries are chained in the order they're processed. 77*95c635efSGarrett D'Amore */ 78*95c635efSGarrett D'Amore 79*95c635efSGarrett D'Amore tab = mandoc_malloc(sizeof(struct mchars)); 80*95c635efSGarrett D'Amore htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **)); 81*95c635efSGarrett D'Amore 82*95c635efSGarrett D'Amore for (i = 0; i < LINES_MAX; i++) { 83*95c635efSGarrett D'Amore hash = (int)lines[i].code[0] - PRINT_LO; 84*95c635efSGarrett D'Amore 85*95c635efSGarrett D'Amore if (NULL == (pp = htab[hash])) { 86*95c635efSGarrett D'Amore htab[hash] = &lines[i]; 87*95c635efSGarrett D'Amore continue; 88*95c635efSGarrett D'Amore } 89*95c635efSGarrett D'Amore 90*95c635efSGarrett D'Amore for ( ; pp->next; pp = pp->next) 91*95c635efSGarrett D'Amore /* Scan ahead. */ ; 92*95c635efSGarrett D'Amore pp->next = &lines[i]; 93*95c635efSGarrett D'Amore } 94*95c635efSGarrett D'Amore 95*95c635efSGarrett D'Amore tab->htab = htab; 96*95c635efSGarrett D'Amore return(tab); 97*95c635efSGarrett D'Amore } 98*95c635efSGarrett D'Amore 99*95c635efSGarrett D'Amore int 100*95c635efSGarrett D'Amore mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz) 101*95c635efSGarrett D'Amore { 102*95c635efSGarrett D'Amore const struct ln *ln; 103*95c635efSGarrett D'Amore 104*95c635efSGarrett D'Amore ln = find(arg, p, sz); 105*95c635efSGarrett D'Amore if (NULL == ln) 106*95c635efSGarrett D'Amore return(-1); 107*95c635efSGarrett D'Amore return(ln->unicode); 108*95c635efSGarrett D'Amore } 109*95c635efSGarrett D'Amore 110*95c635efSGarrett D'Amore char 111*95c635efSGarrett D'Amore mchars_num2char(const char *p, size_t sz) 112*95c635efSGarrett D'Amore { 113*95c635efSGarrett D'Amore int i; 114*95c635efSGarrett D'Amore 115*95c635efSGarrett D'Amore if ((i = mandoc_strntoi(p, sz, 10)) < 0) 116*95c635efSGarrett D'Amore return('\0'); 117*95c635efSGarrett D'Amore return(i > 0 && i < 256 && isprint(i) ? 118*95c635efSGarrett D'Amore /* LINTED */ i : '\0'); 119*95c635efSGarrett D'Amore } 120*95c635efSGarrett D'Amore 121*95c635efSGarrett D'Amore int 122*95c635efSGarrett D'Amore mchars_num2uc(const char *p, size_t sz) 123*95c635efSGarrett D'Amore { 124*95c635efSGarrett D'Amore int i; 125*95c635efSGarrett D'Amore 126*95c635efSGarrett D'Amore if ((i = mandoc_strntoi(p, sz, 16)) < 0) 127*95c635efSGarrett D'Amore return('\0'); 128*95c635efSGarrett D'Amore /* FIXME: make sure we're not in a bogus range. */ 129*95c635efSGarrett D'Amore return(i > 0x80 && i <= 0x10FFFF ? i : '\0'); 130*95c635efSGarrett D'Amore } 131*95c635efSGarrett D'Amore 132*95c635efSGarrett D'Amore const char * 133*95c635efSGarrett D'Amore mchars_spec2str(const struct mchars *arg, 134*95c635efSGarrett D'Amore const char *p, size_t sz, size_t *rsz) 135*95c635efSGarrett D'Amore { 136*95c635efSGarrett D'Amore const struct ln *ln; 137*95c635efSGarrett D'Amore 138*95c635efSGarrett D'Amore ln = find(arg, p, sz); 139*95c635efSGarrett D'Amore if (NULL == ln) { 140*95c635efSGarrett D'Amore *rsz = 1; 141*95c635efSGarrett D'Amore return(NULL); 142*95c635efSGarrett D'Amore } 143*95c635efSGarrett D'Amore 144*95c635efSGarrett D'Amore *rsz = strlen(ln->ascii); 145*95c635efSGarrett D'Amore return(ln->ascii); 146*95c635efSGarrett D'Amore } 147*95c635efSGarrett D'Amore 148*95c635efSGarrett D'Amore static const struct ln * 149*95c635efSGarrett D'Amore find(const struct mchars *tab, const char *p, size_t sz) 150*95c635efSGarrett D'Amore { 151*95c635efSGarrett D'Amore const struct ln *pp; 152*95c635efSGarrett D'Amore int hash; 153*95c635efSGarrett D'Amore 154*95c635efSGarrett D'Amore assert(p); 155*95c635efSGarrett D'Amore 156*95c635efSGarrett D'Amore if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI) 157*95c635efSGarrett D'Amore return(NULL); 158*95c635efSGarrett D'Amore 159*95c635efSGarrett D'Amore hash = (int)p[0] - PRINT_LO; 160*95c635efSGarrett D'Amore 161*95c635efSGarrett D'Amore for (pp = tab->htab[hash]; pp; pp = pp->next) 162*95c635efSGarrett D'Amore if (0 == strncmp(pp->code, p, sz) && 163*95c635efSGarrett D'Amore '\0' == pp->code[(int)sz]) 164*95c635efSGarrett D'Amore return(pp); 165*95c635efSGarrett D'Amore 166*95c635efSGarrett D'Amore return(NULL); 167*95c635efSGarrett D'Amore } 168