1*61d06d6bSBaptiste Daroussin /* $Id: chars.c,v 1.73 2017/08/23 13:01:29 schwarze Exp $ */ 2*61d06d6bSBaptiste Daroussin /* 3*61d06d6bSBaptiste Daroussin * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4*61d06d6bSBaptiste Daroussin * Copyright (c) 2011, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> 5*61d06d6bSBaptiste Daroussin * 6*61d06d6bSBaptiste Daroussin * Permission to use, copy, modify, and distribute this software for any 7*61d06d6bSBaptiste Daroussin * purpose with or without fee is hereby granted, provided that the above 8*61d06d6bSBaptiste Daroussin * copyright notice and this permission notice appear in all copies. 9*61d06d6bSBaptiste Daroussin * 10*61d06d6bSBaptiste Daroussin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11*61d06d6bSBaptiste Daroussin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12*61d06d6bSBaptiste Daroussin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13*61d06d6bSBaptiste Daroussin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14*61d06d6bSBaptiste Daroussin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15*61d06d6bSBaptiste Daroussin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16*61d06d6bSBaptiste Daroussin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17*61d06d6bSBaptiste Daroussin */ 18*61d06d6bSBaptiste Daroussin #include "config.h" 19*61d06d6bSBaptiste Daroussin 20*61d06d6bSBaptiste Daroussin #include <sys/types.h> 21*61d06d6bSBaptiste Daroussin 22*61d06d6bSBaptiste Daroussin #include <assert.h> 23*61d06d6bSBaptiste Daroussin #include <ctype.h> 24*61d06d6bSBaptiste Daroussin #include <stddef.h> 25*61d06d6bSBaptiste Daroussin #include <stdint.h> 26*61d06d6bSBaptiste Daroussin #include <stdlib.h> 27*61d06d6bSBaptiste Daroussin #include <string.h> 28*61d06d6bSBaptiste Daroussin 29*61d06d6bSBaptiste Daroussin #include "mandoc.h" 30*61d06d6bSBaptiste Daroussin #include "mandoc_aux.h" 31*61d06d6bSBaptiste Daroussin #include "mandoc_ohash.h" 32*61d06d6bSBaptiste Daroussin #include "libmandoc.h" 33*61d06d6bSBaptiste Daroussin 34*61d06d6bSBaptiste Daroussin struct ln { 35*61d06d6bSBaptiste Daroussin const char roffcode[16]; 36*61d06d6bSBaptiste Daroussin const char *ascii; 37*61d06d6bSBaptiste Daroussin int unicode; 38*61d06d6bSBaptiste Daroussin }; 39*61d06d6bSBaptiste Daroussin 40*61d06d6bSBaptiste Daroussin /* Special break control characters. */ 41*61d06d6bSBaptiste Daroussin static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' }; 42*61d06d6bSBaptiste Daroussin static const char ascii_break[2] = { ASCII_BREAK, '\0' }; 43*61d06d6bSBaptiste Daroussin 44*61d06d6bSBaptiste Daroussin static struct ln lines[] = { 45*61d06d6bSBaptiste Daroussin 46*61d06d6bSBaptiste Daroussin /* Spacing. */ 47*61d06d6bSBaptiste Daroussin { " ", ascii_nbrsp, 0x00a0 }, 48*61d06d6bSBaptiste Daroussin { "~", ascii_nbrsp, 0x00a0 }, 49*61d06d6bSBaptiste Daroussin { "0", " ", 0x2002 }, 50*61d06d6bSBaptiste Daroussin { "|", "", 0 }, 51*61d06d6bSBaptiste Daroussin { "^", "", 0 }, 52*61d06d6bSBaptiste Daroussin { "&", "", 0 }, 53*61d06d6bSBaptiste Daroussin { "%", "", 0 }, 54*61d06d6bSBaptiste Daroussin { ":", ascii_break, 0 }, 55*61d06d6bSBaptiste Daroussin /* XXX The following three do not really belong here. */ 56*61d06d6bSBaptiste Daroussin { "t", "", 0 }, 57*61d06d6bSBaptiste Daroussin { "c", "", 0 }, 58*61d06d6bSBaptiste Daroussin { "}", "", 0 }, 59*61d06d6bSBaptiste Daroussin 60*61d06d6bSBaptiste Daroussin /* Lines. */ 61*61d06d6bSBaptiste Daroussin { "ba", "|", 0x007c }, 62*61d06d6bSBaptiste Daroussin { "br", "|", 0x2502 }, 63*61d06d6bSBaptiste Daroussin { "ul", "_", 0x005f }, 64*61d06d6bSBaptiste Daroussin { "ru", "_", 0x005f }, 65*61d06d6bSBaptiste Daroussin { "rn", "-", 0x203e }, 66*61d06d6bSBaptiste Daroussin { "bb", "|", 0x00a6 }, 67*61d06d6bSBaptiste Daroussin { "sl", "/", 0x002f }, 68*61d06d6bSBaptiste Daroussin { "rs", "\\", 0x005c }, 69*61d06d6bSBaptiste Daroussin 70*61d06d6bSBaptiste Daroussin /* Text markers. */ 71*61d06d6bSBaptiste Daroussin { "ci", "O", 0x25cb }, 72*61d06d6bSBaptiste Daroussin { "bu", "+\bo", 0x2022 }, 73*61d06d6bSBaptiste Daroussin { "dd", "<**>", 0x2021 }, 74*61d06d6bSBaptiste Daroussin { "dg", "<*>", 0x2020 }, 75*61d06d6bSBaptiste Daroussin { "lz", "<>", 0x25ca }, 76*61d06d6bSBaptiste Daroussin { "sq", "[]", 0x25a1 }, 77*61d06d6bSBaptiste Daroussin { "ps", "<paragraph>", 0x00b6 }, 78*61d06d6bSBaptiste Daroussin { "sc", "<section>", 0x00a7 }, 79*61d06d6bSBaptiste Daroussin { "lh", "<=", 0x261c }, 80*61d06d6bSBaptiste Daroussin { "rh", "=>", 0x261e }, 81*61d06d6bSBaptiste Daroussin { "at", "@", 0x0040 }, 82*61d06d6bSBaptiste Daroussin { "sh", "#", 0x0023 }, 83*61d06d6bSBaptiste Daroussin { "CR", "<cr>", 0x21b5 }, 84*61d06d6bSBaptiste Daroussin { "OK", "\\/", 0x2713 }, 85*61d06d6bSBaptiste Daroussin { "CL", "<club>", 0x2663 }, 86*61d06d6bSBaptiste Daroussin { "SP", "<spade>", 0x2660 }, 87*61d06d6bSBaptiste Daroussin { "HE", "<heart>", 0x2665 }, 88*61d06d6bSBaptiste Daroussin { "DI", "<diamond>", 0x2666 }, 89*61d06d6bSBaptiste Daroussin 90*61d06d6bSBaptiste Daroussin /* Legal symbols. */ 91*61d06d6bSBaptiste Daroussin { "co", "(C)", 0x00a9 }, 92*61d06d6bSBaptiste Daroussin { "rg", "(R)", 0x00ae }, 93*61d06d6bSBaptiste Daroussin { "tm", "tm", 0x2122 }, 94*61d06d6bSBaptiste Daroussin 95*61d06d6bSBaptiste Daroussin /* Punctuation. */ 96*61d06d6bSBaptiste Daroussin { "em", "--", 0x2014 }, 97*61d06d6bSBaptiste Daroussin { "en", "-", 0x2013 }, 98*61d06d6bSBaptiste Daroussin { "hy", "-", 0x2010 }, 99*61d06d6bSBaptiste Daroussin { "e", "\\", 0x005c }, 100*61d06d6bSBaptiste Daroussin { ".", ".", 0x002e }, 101*61d06d6bSBaptiste Daroussin { "r!", "!", 0x00a1 }, 102*61d06d6bSBaptiste Daroussin { "r?", "?", 0x00bf }, 103*61d06d6bSBaptiste Daroussin 104*61d06d6bSBaptiste Daroussin /* Quotes. */ 105*61d06d6bSBaptiste Daroussin { "Bq", ",,", 0x201e }, 106*61d06d6bSBaptiste Daroussin { "bq", ",", 0x201a }, 107*61d06d6bSBaptiste Daroussin { "lq", "\"", 0x201c }, 108*61d06d6bSBaptiste Daroussin { "rq", "\"", 0x201d }, 109*61d06d6bSBaptiste Daroussin { "Lq", "\"", 0x201c }, 110*61d06d6bSBaptiste Daroussin { "Rq", "\"", 0x201d }, 111*61d06d6bSBaptiste Daroussin { "oq", "`", 0x2018 }, 112*61d06d6bSBaptiste Daroussin { "cq", "\'", 0x2019 }, 113*61d06d6bSBaptiste Daroussin { "aq", "\'", 0x0027 }, 114*61d06d6bSBaptiste Daroussin { "dq", "\"", 0x0022 }, 115*61d06d6bSBaptiste Daroussin { "Fo", "<<", 0x00ab }, 116*61d06d6bSBaptiste Daroussin { "Fc", ">>", 0x00bb }, 117*61d06d6bSBaptiste Daroussin { "fo", "<", 0x2039 }, 118*61d06d6bSBaptiste Daroussin { "fc", ">", 0x203a }, 119*61d06d6bSBaptiste Daroussin 120*61d06d6bSBaptiste Daroussin /* Brackets. */ 121*61d06d6bSBaptiste Daroussin { "lB", "[", 0x005b }, 122*61d06d6bSBaptiste Daroussin { "rB", "]", 0x005d }, 123*61d06d6bSBaptiste Daroussin { "lC", "{", 0x007b }, 124*61d06d6bSBaptiste Daroussin { "rC", "}", 0x007d }, 125*61d06d6bSBaptiste Daroussin { "la", "<", 0x27e8 }, 126*61d06d6bSBaptiste Daroussin { "ra", ">", 0x27e9 }, 127*61d06d6bSBaptiste Daroussin { "bv", "|", 0x23aa }, 128*61d06d6bSBaptiste Daroussin { "braceex", "|", 0x23aa }, 129*61d06d6bSBaptiste Daroussin { "bracketlefttp", "|", 0x23a1 }, 130*61d06d6bSBaptiste Daroussin { "bracketleftbt", "|", 0x23a3 }, 131*61d06d6bSBaptiste Daroussin { "bracketleftex", "|", 0x23a2 }, 132*61d06d6bSBaptiste Daroussin { "bracketrighttp", "|", 0x23a4 }, 133*61d06d6bSBaptiste Daroussin { "bracketrightbt", "|", 0x23a6 }, 134*61d06d6bSBaptiste Daroussin { "bracketrightex", "|", 0x23a5 }, 135*61d06d6bSBaptiste Daroussin { "lt", ",-", 0x23a7 }, 136*61d06d6bSBaptiste Daroussin { "bracelefttp", ",-", 0x23a7 }, 137*61d06d6bSBaptiste Daroussin { "lk", "{", 0x23a8 }, 138*61d06d6bSBaptiste Daroussin { "braceleftmid", "{", 0x23a8 }, 139*61d06d6bSBaptiste Daroussin { "lb", "`-", 0x23a9 }, 140*61d06d6bSBaptiste Daroussin { "braceleftbt", "`-", 0x23a9 }, 141*61d06d6bSBaptiste Daroussin { "braceleftex", "|", 0x23aa }, 142*61d06d6bSBaptiste Daroussin { "rt", "-.", 0x23ab }, 143*61d06d6bSBaptiste Daroussin { "bracerighttp", "-.", 0x23ab }, 144*61d06d6bSBaptiste Daroussin { "rk", "}", 0x23ac }, 145*61d06d6bSBaptiste Daroussin { "bracerightmid", "}", 0x23ac }, 146*61d06d6bSBaptiste Daroussin { "rb", "-\'", 0x23ad }, 147*61d06d6bSBaptiste Daroussin { "bracerightbt", "-\'", 0x23ad }, 148*61d06d6bSBaptiste Daroussin { "bracerightex", "|", 0x23aa }, 149*61d06d6bSBaptiste Daroussin { "parenlefttp", "/", 0x239b }, 150*61d06d6bSBaptiste Daroussin { "parenleftbt", "\\", 0x239d }, 151*61d06d6bSBaptiste Daroussin { "parenleftex", "|", 0x239c }, 152*61d06d6bSBaptiste Daroussin { "parenrighttp", "\\", 0x239e }, 153*61d06d6bSBaptiste Daroussin { "parenrightbt", "/", 0x23a0 }, 154*61d06d6bSBaptiste Daroussin { "parenrightex", "|", 0x239f }, 155*61d06d6bSBaptiste Daroussin 156*61d06d6bSBaptiste Daroussin /* Arrows and lines. */ 157*61d06d6bSBaptiste Daroussin { "<-", "<-", 0x2190 }, 158*61d06d6bSBaptiste Daroussin { "->", "->", 0x2192 }, 159*61d06d6bSBaptiste Daroussin { "<>", "<->", 0x2194 }, 160*61d06d6bSBaptiste Daroussin { "da", "|\bv", 0x2193 }, 161*61d06d6bSBaptiste Daroussin { "ua", "|\b^", 0x2191 }, 162*61d06d6bSBaptiste Daroussin { "va", "^v", 0x2195 }, 163*61d06d6bSBaptiste Daroussin { "lA", "<=", 0x21d0 }, 164*61d06d6bSBaptiste Daroussin { "rA", "=>", 0x21d2 }, 165*61d06d6bSBaptiste Daroussin { "hA", "<=>", 0x21d4 }, 166*61d06d6bSBaptiste Daroussin { "uA", "=\b^", 0x21d1 }, 167*61d06d6bSBaptiste Daroussin { "dA", "=\bv", 0x21d3 }, 168*61d06d6bSBaptiste Daroussin { "vA", "^=v", 0x21d5 }, 169*61d06d6bSBaptiste Daroussin { "an", "-", 0x23af }, 170*61d06d6bSBaptiste Daroussin 171*61d06d6bSBaptiste Daroussin /* Logic. */ 172*61d06d6bSBaptiste Daroussin { "AN", "^", 0x2227 }, 173*61d06d6bSBaptiste Daroussin { "OR", "v", 0x2228 }, 174*61d06d6bSBaptiste Daroussin { "no", "~", 0x00ac }, 175*61d06d6bSBaptiste Daroussin { "tno", "~", 0x00ac }, 176*61d06d6bSBaptiste Daroussin { "te", "<there\037exists>", 0x2203 }, 177*61d06d6bSBaptiste Daroussin { "fa", "<for\037all>", 0x2200 }, 178*61d06d6bSBaptiste Daroussin { "st", "<such\037that>", 0x220b }, 179*61d06d6bSBaptiste Daroussin { "tf", "<therefore>", 0x2234 }, 180*61d06d6bSBaptiste Daroussin { "3d", "<therefore>", 0x2234 }, 181*61d06d6bSBaptiste Daroussin { "or", "|", 0x007c }, 182*61d06d6bSBaptiste Daroussin 183*61d06d6bSBaptiste Daroussin /* Mathematicals. */ 184*61d06d6bSBaptiste Daroussin { "pl", "+", 0x002b }, 185*61d06d6bSBaptiste Daroussin { "mi", "-", 0x2212 }, 186*61d06d6bSBaptiste Daroussin { "-", "-", 0x002d }, 187*61d06d6bSBaptiste Daroussin { "-+", "-+", 0x2213 }, 188*61d06d6bSBaptiste Daroussin { "+-", "+-", 0x00b1 }, 189*61d06d6bSBaptiste Daroussin { "t+-", "+-", 0x00b1 }, 190*61d06d6bSBaptiste Daroussin { "pc", ".", 0x00b7 }, 191*61d06d6bSBaptiste Daroussin { "md", ".", 0x22c5 }, 192*61d06d6bSBaptiste Daroussin { "mu", "x", 0x00d7 }, 193*61d06d6bSBaptiste Daroussin { "tmu", "x", 0x00d7 }, 194*61d06d6bSBaptiste Daroussin { "c*", "O\bx", 0x2297 }, 195*61d06d6bSBaptiste Daroussin { "c+", "O\b+", 0x2295 }, 196*61d06d6bSBaptiste Daroussin { "di", "/", 0x00f7 }, 197*61d06d6bSBaptiste Daroussin { "tdi", "/", 0x00f7 }, 198*61d06d6bSBaptiste Daroussin { "f/", "/", 0x2044 }, 199*61d06d6bSBaptiste Daroussin { "**", "*", 0x2217 }, 200*61d06d6bSBaptiste Daroussin { "<=", "<=", 0x2264 }, 201*61d06d6bSBaptiste Daroussin { ">=", ">=", 0x2265 }, 202*61d06d6bSBaptiste Daroussin { "<<", "<<", 0x226a }, 203*61d06d6bSBaptiste Daroussin { ">>", ">>", 0x226b }, 204*61d06d6bSBaptiste Daroussin { "eq", "=", 0x003d }, 205*61d06d6bSBaptiste Daroussin { "!=", "!=", 0x2260 }, 206*61d06d6bSBaptiste Daroussin { "==", "==", 0x2261 }, 207*61d06d6bSBaptiste Daroussin { "ne", "!==", 0x2262 }, 208*61d06d6bSBaptiste Daroussin { "ap", "~", 0x223c }, 209*61d06d6bSBaptiste Daroussin { "|=", "-~", 0x2243 }, 210*61d06d6bSBaptiste Daroussin { "=~", "=~", 0x2245 }, 211*61d06d6bSBaptiste Daroussin { "~~", "~~", 0x2248 }, 212*61d06d6bSBaptiste Daroussin { "~=", "~=", 0x2248 }, 213*61d06d6bSBaptiste Daroussin { "pt", "<proportional\037to>", 0x221d }, 214*61d06d6bSBaptiste Daroussin { "es", "{}", 0x2205 }, 215*61d06d6bSBaptiste Daroussin { "mo", "<element\037of>", 0x2208 }, 216*61d06d6bSBaptiste Daroussin { "nm", "<not\037element\037of>", 0x2209 }, 217*61d06d6bSBaptiste Daroussin { "sb", "<proper\037subset>", 0x2282 }, 218*61d06d6bSBaptiste Daroussin { "nb", "<not\037subset>", 0x2284 }, 219*61d06d6bSBaptiste Daroussin { "sp", "<proper\037superset>", 0x2283 }, 220*61d06d6bSBaptiste Daroussin { "nc", "<not\037superset>", 0x2285 }, 221*61d06d6bSBaptiste Daroussin { "ib", "<subset\037or\037equal>", 0x2286 }, 222*61d06d6bSBaptiste Daroussin { "ip", "<superset\037or\037equal>", 0x2287 }, 223*61d06d6bSBaptiste Daroussin { "ca", "<intersection>", 0x2229 }, 224*61d06d6bSBaptiste Daroussin { "cu", "<union>", 0x222a }, 225*61d06d6bSBaptiste Daroussin { "/_", "<angle>", 0x2220 }, 226*61d06d6bSBaptiste Daroussin { "pp", "<perpendicular>", 0x22a5 }, 227*61d06d6bSBaptiste Daroussin { "is", "<integral>", 0x222b }, 228*61d06d6bSBaptiste Daroussin { "integral", "<integral>", 0x222b }, 229*61d06d6bSBaptiste Daroussin { "sum", "<sum>", 0x2211 }, 230*61d06d6bSBaptiste Daroussin { "product", "<product>", 0x220f }, 231*61d06d6bSBaptiste Daroussin { "coproduct", "<coproduct>", 0x2210 }, 232*61d06d6bSBaptiste Daroussin { "gr", "<nabla>", 0x2207 }, 233*61d06d6bSBaptiste Daroussin { "sr", "<sqrt>", 0x221a }, 234*61d06d6bSBaptiste Daroussin { "sqrt", "<sqrt>", 0x221a }, 235*61d06d6bSBaptiste Daroussin { "lc", "|~", 0x2308 }, 236*61d06d6bSBaptiste Daroussin { "rc", "~|", 0x2309 }, 237*61d06d6bSBaptiste Daroussin { "lf", "|_", 0x230a }, 238*61d06d6bSBaptiste Daroussin { "rf", "_|", 0x230b }, 239*61d06d6bSBaptiste Daroussin { "if", "<infinity>", 0x221e }, 240*61d06d6bSBaptiste Daroussin { "Ah", "<Aleph>", 0x2135 }, 241*61d06d6bSBaptiste Daroussin { "Im", "<Im>", 0x2111 }, 242*61d06d6bSBaptiste Daroussin { "Re", "<Re>", 0x211c }, 243*61d06d6bSBaptiste Daroussin { "wp", "P", 0x2118 }, 244*61d06d6bSBaptiste Daroussin { "pd", "<del>", 0x2202 }, 245*61d06d6bSBaptiste Daroussin { "-h", "/h", 0x210f }, 246*61d06d6bSBaptiste Daroussin { "hbar", "/h", 0x210f }, 247*61d06d6bSBaptiste Daroussin { "12", "1/2", 0x00bd }, 248*61d06d6bSBaptiste Daroussin { "14", "1/4", 0x00bc }, 249*61d06d6bSBaptiste Daroussin { "34", "3/4", 0x00be }, 250*61d06d6bSBaptiste Daroussin { "18", "1/8", 0x215B }, 251*61d06d6bSBaptiste Daroussin { "38", "3/8", 0x215C }, 252*61d06d6bSBaptiste Daroussin { "58", "5/8", 0x215D }, 253*61d06d6bSBaptiste Daroussin { "78", "7/8", 0x215E }, 254*61d06d6bSBaptiste Daroussin { "S1", "^1", 0x00B9 }, 255*61d06d6bSBaptiste Daroussin { "S2", "^2", 0x00B2 }, 256*61d06d6bSBaptiste Daroussin { "S3", "^3", 0x00B3 }, 257*61d06d6bSBaptiste Daroussin 258*61d06d6bSBaptiste Daroussin /* Ligatures. */ 259*61d06d6bSBaptiste Daroussin { "ff", "ff", 0xfb00 }, 260*61d06d6bSBaptiste Daroussin { "fi", "fi", 0xfb01 }, 261*61d06d6bSBaptiste Daroussin { "fl", "fl", 0xfb02 }, 262*61d06d6bSBaptiste Daroussin { "Fi", "ffi", 0xfb03 }, 263*61d06d6bSBaptiste Daroussin { "Fl", "ffl", 0xfb04 }, 264*61d06d6bSBaptiste Daroussin { "AE", "AE", 0x00c6 }, 265*61d06d6bSBaptiste Daroussin { "ae", "ae", 0x00e6 }, 266*61d06d6bSBaptiste Daroussin { "OE", "OE", 0x0152 }, 267*61d06d6bSBaptiste Daroussin { "oe", "oe", 0x0153 }, 268*61d06d6bSBaptiste Daroussin { "ss", "ss", 0x00df }, 269*61d06d6bSBaptiste Daroussin { "IJ", "IJ", 0x0132 }, 270*61d06d6bSBaptiste Daroussin { "ij", "ij", 0x0133 }, 271*61d06d6bSBaptiste Daroussin 272*61d06d6bSBaptiste Daroussin /* Accents. */ 273*61d06d6bSBaptiste Daroussin { "a\"", "\"", 0x02dd }, 274*61d06d6bSBaptiste Daroussin { "a-", "-", 0x00af }, 275*61d06d6bSBaptiste Daroussin { "a.", ".", 0x02d9 }, 276*61d06d6bSBaptiste Daroussin { "a^", "^", 0x005e }, 277*61d06d6bSBaptiste Daroussin { "aa", "\'", 0x00b4 }, 278*61d06d6bSBaptiste Daroussin { "\'", "\'", 0x00b4 }, 279*61d06d6bSBaptiste Daroussin { "ga", "`", 0x0060 }, 280*61d06d6bSBaptiste Daroussin { "`", "`", 0x0060 }, 281*61d06d6bSBaptiste Daroussin { "ab", "'\b`", 0x02d8 }, 282*61d06d6bSBaptiste Daroussin { "ac", ",", 0x00b8 }, 283*61d06d6bSBaptiste Daroussin { "ad", "\"", 0x00a8 }, 284*61d06d6bSBaptiste Daroussin { "ah", "v", 0x02c7 }, 285*61d06d6bSBaptiste Daroussin { "ao", "o", 0x02da }, 286*61d06d6bSBaptiste Daroussin { "a~", "~", 0x007e }, 287*61d06d6bSBaptiste Daroussin { "ho", ",", 0x02db }, 288*61d06d6bSBaptiste Daroussin { "ha", "^", 0x005e }, 289*61d06d6bSBaptiste Daroussin { "ti", "~", 0x007e }, 290*61d06d6bSBaptiste Daroussin 291*61d06d6bSBaptiste Daroussin /* Accented letters. */ 292*61d06d6bSBaptiste Daroussin { "'A", "'\bA", 0x00c1 }, 293*61d06d6bSBaptiste Daroussin { "'E", "'\bE", 0x00c9 }, 294*61d06d6bSBaptiste Daroussin { "'I", "'\bI", 0x00cd }, 295*61d06d6bSBaptiste Daroussin { "'O", "'\bO", 0x00d3 }, 296*61d06d6bSBaptiste Daroussin { "'U", "'\bU", 0x00da }, 297*61d06d6bSBaptiste Daroussin { "'a", "'\ba", 0x00e1 }, 298*61d06d6bSBaptiste Daroussin { "'e", "'\be", 0x00e9 }, 299*61d06d6bSBaptiste Daroussin { "'i", "'\bi", 0x00ed }, 300*61d06d6bSBaptiste Daroussin { "'o", "'\bo", 0x00f3 }, 301*61d06d6bSBaptiste Daroussin { "'u", "'\bu", 0x00fa }, 302*61d06d6bSBaptiste Daroussin { "`A", "`\bA", 0x00c0 }, 303*61d06d6bSBaptiste Daroussin { "`E", "`\bE", 0x00c8 }, 304*61d06d6bSBaptiste Daroussin { "`I", "`\bI", 0x00cc }, 305*61d06d6bSBaptiste Daroussin { "`O", "`\bO", 0x00d2 }, 306*61d06d6bSBaptiste Daroussin { "`U", "`\bU", 0x00d9 }, 307*61d06d6bSBaptiste Daroussin { "`a", "`\ba", 0x00e0 }, 308*61d06d6bSBaptiste Daroussin { "`e", "`\be", 0x00e8 }, 309*61d06d6bSBaptiste Daroussin { "`i", "`\bi", 0x00ec }, 310*61d06d6bSBaptiste Daroussin { "`o", "`\bo", 0x00f2 }, 311*61d06d6bSBaptiste Daroussin { "`u", "`\bu", 0x00f9 }, 312*61d06d6bSBaptiste Daroussin { "~A", "~\bA", 0x00c3 }, 313*61d06d6bSBaptiste Daroussin { "~N", "~\bN", 0x00d1 }, 314*61d06d6bSBaptiste Daroussin { "~O", "~\bO", 0x00d5 }, 315*61d06d6bSBaptiste Daroussin { "~a", "~\ba", 0x00e3 }, 316*61d06d6bSBaptiste Daroussin { "~n", "~\bn", 0x00f1 }, 317*61d06d6bSBaptiste Daroussin { "~o", "~\bo", 0x00f5 }, 318*61d06d6bSBaptiste Daroussin { ":A", "\"\bA", 0x00c4 }, 319*61d06d6bSBaptiste Daroussin { ":E", "\"\bE", 0x00cb }, 320*61d06d6bSBaptiste Daroussin { ":I", "\"\bI", 0x00cf }, 321*61d06d6bSBaptiste Daroussin { ":O", "\"\bO", 0x00d6 }, 322*61d06d6bSBaptiste Daroussin { ":U", "\"\bU", 0x00dc }, 323*61d06d6bSBaptiste Daroussin { ":a", "\"\ba", 0x00e4 }, 324*61d06d6bSBaptiste Daroussin { ":e", "\"\be", 0x00eb }, 325*61d06d6bSBaptiste Daroussin { ":i", "\"\bi", 0x00ef }, 326*61d06d6bSBaptiste Daroussin { ":o", "\"\bo", 0x00f6 }, 327*61d06d6bSBaptiste Daroussin { ":u", "\"\bu", 0x00fc }, 328*61d06d6bSBaptiste Daroussin { ":y", "\"\by", 0x00ff }, 329*61d06d6bSBaptiste Daroussin { "^A", "^\bA", 0x00c2 }, 330*61d06d6bSBaptiste Daroussin { "^E", "^\bE", 0x00ca }, 331*61d06d6bSBaptiste Daroussin { "^I", "^\bI", 0x00ce }, 332*61d06d6bSBaptiste Daroussin { "^O", "^\bO", 0x00d4 }, 333*61d06d6bSBaptiste Daroussin { "^U", "^\bU", 0x00db }, 334*61d06d6bSBaptiste Daroussin { "^a", "^\ba", 0x00e2 }, 335*61d06d6bSBaptiste Daroussin { "^e", "^\be", 0x00ea }, 336*61d06d6bSBaptiste Daroussin { "^i", "^\bi", 0x00ee }, 337*61d06d6bSBaptiste Daroussin { "^o", "^\bo", 0x00f4 }, 338*61d06d6bSBaptiste Daroussin { "^u", "^\bu", 0x00fb }, 339*61d06d6bSBaptiste Daroussin { ",C", ",\bC", 0x00c7 }, 340*61d06d6bSBaptiste Daroussin { ",c", ",\bc", 0x00e7 }, 341*61d06d6bSBaptiste Daroussin { "/L", "/\bL", 0x0141 }, 342*61d06d6bSBaptiste Daroussin { "/l", "/\bl", 0x0142 }, 343*61d06d6bSBaptiste Daroussin { "/O", "/\bO", 0x00d8 }, 344*61d06d6bSBaptiste Daroussin { "/o", "/\bo", 0x00f8 }, 345*61d06d6bSBaptiste Daroussin { "oA", "o\bA", 0x00c5 }, 346*61d06d6bSBaptiste Daroussin { "oa", "o\ba", 0x00e5 }, 347*61d06d6bSBaptiste Daroussin 348*61d06d6bSBaptiste Daroussin /* Special letters. */ 349*61d06d6bSBaptiste Daroussin { "-D", "Dh", 0x00d0 }, 350*61d06d6bSBaptiste Daroussin { "Sd", "dh", 0x00f0 }, 351*61d06d6bSBaptiste Daroussin { "TP", "Th", 0x00de }, 352*61d06d6bSBaptiste Daroussin { "Tp", "th", 0x00fe }, 353*61d06d6bSBaptiste Daroussin { ".i", "i", 0x0131 }, 354*61d06d6bSBaptiste Daroussin { ".j", "j", 0x0237 }, 355*61d06d6bSBaptiste Daroussin 356*61d06d6bSBaptiste Daroussin /* Currency. */ 357*61d06d6bSBaptiste Daroussin { "Do", "$", 0x0024 }, 358*61d06d6bSBaptiste Daroussin { "ct", "/\bc", 0x00a2 }, 359*61d06d6bSBaptiste Daroussin { "Eu", "EUR", 0x20ac }, 360*61d06d6bSBaptiste Daroussin { "eu", "EUR", 0x20ac }, 361*61d06d6bSBaptiste Daroussin { "Ye", "=\bY", 0x00a5 }, 362*61d06d6bSBaptiste Daroussin { "Po", "GBP", 0x00a3 }, 363*61d06d6bSBaptiste Daroussin { "Cs", "o\bx", 0x00a4 }, 364*61d06d6bSBaptiste Daroussin { "Fn", ",\bf", 0x0192 }, 365*61d06d6bSBaptiste Daroussin 366*61d06d6bSBaptiste Daroussin /* Units. */ 367*61d06d6bSBaptiste Daroussin { "de", "<degree>", 0x00b0 }, 368*61d06d6bSBaptiste Daroussin { "%0", "<permille>", 0x2030 }, 369*61d06d6bSBaptiste Daroussin { "fm", "\'", 0x2032 }, 370*61d06d6bSBaptiste Daroussin { "sd", "''", 0x2033 }, 371*61d06d6bSBaptiste Daroussin { "mc", "<micro>", 0x00b5 }, 372*61d06d6bSBaptiste Daroussin { "Of", "_\ba", 0x00aa }, 373*61d06d6bSBaptiste Daroussin { "Om", "_\bo", 0x00ba }, 374*61d06d6bSBaptiste Daroussin 375*61d06d6bSBaptiste Daroussin /* Greek characters. */ 376*61d06d6bSBaptiste Daroussin { "*A", "A", 0x0391 }, 377*61d06d6bSBaptiste Daroussin { "*B", "B", 0x0392 }, 378*61d06d6bSBaptiste Daroussin { "*G", "<Gamma>", 0x0393 }, 379*61d06d6bSBaptiste Daroussin { "*D", "<Delta>", 0x0394 }, 380*61d06d6bSBaptiste Daroussin { "*E", "E", 0x0395 }, 381*61d06d6bSBaptiste Daroussin { "*Z", "Z", 0x0396 }, 382*61d06d6bSBaptiste Daroussin { "*Y", "H", 0x0397 }, 383*61d06d6bSBaptiste Daroussin { "*H", "<Theta>", 0x0398 }, 384*61d06d6bSBaptiste Daroussin { "*I", "I", 0x0399 }, 385*61d06d6bSBaptiste Daroussin { "*K", "K", 0x039a }, 386*61d06d6bSBaptiste Daroussin { "*L", "<Lambda>", 0x039b }, 387*61d06d6bSBaptiste Daroussin { "*M", "M", 0x039c }, 388*61d06d6bSBaptiste Daroussin { "*N", "N", 0x039d }, 389*61d06d6bSBaptiste Daroussin { "*C", "<Xi>", 0x039e }, 390*61d06d6bSBaptiste Daroussin { "*O", "O", 0x039f }, 391*61d06d6bSBaptiste Daroussin { "*P", "<Pi>", 0x03a0 }, 392*61d06d6bSBaptiste Daroussin { "*R", "P", 0x03a1 }, 393*61d06d6bSBaptiste Daroussin { "*S", "<Sigma>", 0x03a3 }, 394*61d06d6bSBaptiste Daroussin { "*T", "T", 0x03a4 }, 395*61d06d6bSBaptiste Daroussin { "*U", "Y", 0x03a5 }, 396*61d06d6bSBaptiste Daroussin { "*F", "<Phi>", 0x03a6 }, 397*61d06d6bSBaptiste Daroussin { "*X", "X", 0x03a7 }, 398*61d06d6bSBaptiste Daroussin { "*Q", "<Psi>", 0x03a8 }, 399*61d06d6bSBaptiste Daroussin { "*W", "<Omega>", 0x03a9 }, 400*61d06d6bSBaptiste Daroussin { "*a", "<alpha>", 0x03b1 }, 401*61d06d6bSBaptiste Daroussin { "*b", "<beta>", 0x03b2 }, 402*61d06d6bSBaptiste Daroussin { "*g", "<gamma>", 0x03b3 }, 403*61d06d6bSBaptiste Daroussin { "*d", "<delta>", 0x03b4 }, 404*61d06d6bSBaptiste Daroussin { "*e", "<epsilon>", 0x03b5 }, 405*61d06d6bSBaptiste Daroussin { "*z", "<zeta>", 0x03b6 }, 406*61d06d6bSBaptiste Daroussin { "*y", "<eta>", 0x03b7 }, 407*61d06d6bSBaptiste Daroussin { "*h", "<theta>", 0x03b8 }, 408*61d06d6bSBaptiste Daroussin { "*i", "<iota>", 0x03b9 }, 409*61d06d6bSBaptiste Daroussin { "*k", "<kappa>", 0x03ba }, 410*61d06d6bSBaptiste Daroussin { "*l", "<lambda>", 0x03bb }, 411*61d06d6bSBaptiste Daroussin { "*m", "<mu>", 0x03bc }, 412*61d06d6bSBaptiste Daroussin { "*n", "<nu>", 0x03bd }, 413*61d06d6bSBaptiste Daroussin { "*c", "<xi>", 0x03be }, 414*61d06d6bSBaptiste Daroussin { "*o", "o", 0x03bf }, 415*61d06d6bSBaptiste Daroussin { "*p", "<pi>", 0x03c0 }, 416*61d06d6bSBaptiste Daroussin { "*r", "<rho>", 0x03c1 }, 417*61d06d6bSBaptiste Daroussin { "*s", "<sigma>", 0x03c3 }, 418*61d06d6bSBaptiste Daroussin { "*t", "<tau>", 0x03c4 }, 419*61d06d6bSBaptiste Daroussin { "*u", "<upsilon>", 0x03c5 }, 420*61d06d6bSBaptiste Daroussin { "*f", "<phi>", 0x03d5 }, 421*61d06d6bSBaptiste Daroussin { "*x", "<chi>", 0x03c7 }, 422*61d06d6bSBaptiste Daroussin { "*q", "<psi>", 0x03c8 }, 423*61d06d6bSBaptiste Daroussin { "*w", "<omega>", 0x03c9 }, 424*61d06d6bSBaptiste Daroussin { "+h", "<theta>", 0x03d1 }, 425*61d06d6bSBaptiste Daroussin { "+f", "<phi>", 0x03c6 }, 426*61d06d6bSBaptiste Daroussin { "+p", "<pi>", 0x03d6 }, 427*61d06d6bSBaptiste Daroussin { "+e", "<epsilon>", 0x03f5 }, 428*61d06d6bSBaptiste Daroussin { "ts", "<sigma>", 0x03c2 }, 429*61d06d6bSBaptiste Daroussin }; 430*61d06d6bSBaptiste Daroussin 431*61d06d6bSBaptiste Daroussin static struct ohash mchars; 432*61d06d6bSBaptiste Daroussin 433*61d06d6bSBaptiste Daroussin 434*61d06d6bSBaptiste Daroussin void 435*61d06d6bSBaptiste Daroussin mchars_free(void) 436*61d06d6bSBaptiste Daroussin { 437*61d06d6bSBaptiste Daroussin 438*61d06d6bSBaptiste Daroussin ohash_delete(&mchars); 439*61d06d6bSBaptiste Daroussin } 440*61d06d6bSBaptiste Daroussin 441*61d06d6bSBaptiste Daroussin void 442*61d06d6bSBaptiste Daroussin mchars_alloc(void) 443*61d06d6bSBaptiste Daroussin { 444*61d06d6bSBaptiste Daroussin size_t i; 445*61d06d6bSBaptiste Daroussin unsigned int slot; 446*61d06d6bSBaptiste Daroussin 447*61d06d6bSBaptiste Daroussin mandoc_ohash_init(&mchars, 9, offsetof(struct ln, roffcode)); 448*61d06d6bSBaptiste Daroussin for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) { 449*61d06d6bSBaptiste Daroussin slot = ohash_qlookup(&mchars, lines[i].roffcode); 450*61d06d6bSBaptiste Daroussin assert(ohash_find(&mchars, slot) == NULL); 451*61d06d6bSBaptiste Daroussin ohash_insert(&mchars, slot, lines + i); 452*61d06d6bSBaptiste Daroussin } 453*61d06d6bSBaptiste Daroussin } 454*61d06d6bSBaptiste Daroussin 455*61d06d6bSBaptiste Daroussin int 456*61d06d6bSBaptiste Daroussin mchars_spec2cp(const char *p, size_t sz) 457*61d06d6bSBaptiste Daroussin { 458*61d06d6bSBaptiste Daroussin const struct ln *ln; 459*61d06d6bSBaptiste Daroussin const char *end; 460*61d06d6bSBaptiste Daroussin 461*61d06d6bSBaptiste Daroussin end = p + sz; 462*61d06d6bSBaptiste Daroussin ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end)); 463*61d06d6bSBaptiste Daroussin return ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1; 464*61d06d6bSBaptiste Daroussin } 465*61d06d6bSBaptiste Daroussin 466*61d06d6bSBaptiste Daroussin int 467*61d06d6bSBaptiste Daroussin mchars_num2char(const char *p, size_t sz) 468*61d06d6bSBaptiste Daroussin { 469*61d06d6bSBaptiste Daroussin int i; 470*61d06d6bSBaptiste Daroussin 471*61d06d6bSBaptiste Daroussin i = mandoc_strntoi(p, sz, 10); 472*61d06d6bSBaptiste Daroussin return i >= 0 && i < 256 ? i : -1; 473*61d06d6bSBaptiste Daroussin } 474*61d06d6bSBaptiste Daroussin 475*61d06d6bSBaptiste Daroussin int 476*61d06d6bSBaptiste Daroussin mchars_num2uc(const char *p, size_t sz) 477*61d06d6bSBaptiste Daroussin { 478*61d06d6bSBaptiste Daroussin int i; 479*61d06d6bSBaptiste Daroussin 480*61d06d6bSBaptiste Daroussin i = mandoc_strntoi(p, sz, 16); 481*61d06d6bSBaptiste Daroussin assert(i >= 0 && i <= 0x10FFFF); 482*61d06d6bSBaptiste Daroussin return i; 483*61d06d6bSBaptiste Daroussin } 484*61d06d6bSBaptiste Daroussin 485*61d06d6bSBaptiste Daroussin const char * 486*61d06d6bSBaptiste Daroussin mchars_spec2str(const char *p, size_t sz, size_t *rsz) 487*61d06d6bSBaptiste Daroussin { 488*61d06d6bSBaptiste Daroussin const struct ln *ln; 489*61d06d6bSBaptiste Daroussin const char *end; 490*61d06d6bSBaptiste Daroussin 491*61d06d6bSBaptiste Daroussin end = p + sz; 492*61d06d6bSBaptiste Daroussin ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end)); 493*61d06d6bSBaptiste Daroussin if (ln == NULL) { 494*61d06d6bSBaptiste Daroussin *rsz = 1; 495*61d06d6bSBaptiste Daroussin return sz == 1 ? p : NULL; 496*61d06d6bSBaptiste Daroussin } 497*61d06d6bSBaptiste Daroussin 498*61d06d6bSBaptiste Daroussin *rsz = strlen(ln->ascii); 499*61d06d6bSBaptiste Daroussin return ln->ascii; 500*61d06d6bSBaptiste Daroussin } 501*61d06d6bSBaptiste Daroussin 502*61d06d6bSBaptiste Daroussin const char * 503*61d06d6bSBaptiste Daroussin mchars_uc2str(int uc) 504*61d06d6bSBaptiste Daroussin { 505*61d06d6bSBaptiste Daroussin size_t i; 506*61d06d6bSBaptiste Daroussin 507*61d06d6bSBaptiste Daroussin for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) 508*61d06d6bSBaptiste Daroussin if (uc == lines[i].unicode) 509*61d06d6bSBaptiste Daroussin return lines[i].ascii; 510*61d06d6bSBaptiste Daroussin return "<?>"; 511*61d06d6bSBaptiste Daroussin } 512